1 source("RemembR/R/RemembeR.R")
3 change.remember.file("ordinal.quality.model.RDS")
7 remember(weights, "sample.weights")
10 remember(n.holdout,"n.holdout")
11 holdout <- df[sample(.N,n.holdout)]
12 saveRDS(holdout,'data/holdout_quality_labels.RDS')
13 df <- df[!(revid %in% holdout$revid)]
14 saveRDS(df,'data/training_quality_labels.RDS')
17 df <- df[sample(.N,2000)]
20 ## So it turns out that the 6 predictors we have are highly correlated creating problems for sampling so use QR decomposition
21 df <- df[!is.na(wp10)]
23 df[, start.p.stub := Start + Stub]
25 saveRDS(df,"data/training_quality_labels.RDS")
27 ## So it turns out that the 6 predictors we have are highly correlated creating problems for sampling so use QR decomposition
28 df <- df[!is.na(wp10)]
30 df[, start.p.stub := Start + Stub]
32 fam.cloglog <- sratio(link='cloglog', threshold='flexible')
33 #formula.1 <- brmsformula(wp10 | weights(weight) ~ 1,decomp='QR',center=TRUE)
35 fam <- sratio(link='logit', threshold='flexible')
36 fam.cumulative <- sratio(link='logit', threshold='flexible')
38 ## It turns out that the matrix is singular if we include all the predictors.
39 ## C is the most correlated with the other variables so for now let's remove it.
41 ## it turns out that we don't need to do model selection at all since we don't care about the coefficients.
42 ## we can just do the csv!
43 x <- df[,.(Stub,Start,C,B,GA,FA)]
45 wpca <- function(x, weight){
48 means <- unlist(lapply(1:dim(x)[2], function(i) weighted.mean(x[,i], weight)))
50 centered <- as.matrix(t(t(x) - means))
51 weightmat <- diag(weight)
52 covmat <- t(centered) %*% weightmat %*% centered / (sum(weight) - 1)
54 factors <- eigen(covmat)
55 basis <- factors$vectors
56 result <- centered %*% basis
57 # return a list with the info we need to do the transformation
58 return(list(means=means, basis=basis, x=result))
61 #unweighted.pca <- wpca(df[,.(Stub,Start,C,B,GA,FA)],rep(1,nrow(df)))
62 upca <- prcomp(df[,.(Stub,Start,C,B,GA,FA)])
63 unweighted.pca <- list(means = upca$center, basis=upca$rotation, x=upca$x)
64 saveRDS(unweighted.pca,"data/ores_pca_features.noweights.RDS")
66 weighted.pca <- wpca(df[,.(Stub,Start,C,B,GA,FA)],df$article_weight)
67 saveRDS(weighted.pca, "data/ores_pca_features.RDS")
69 revision.pca <- wpca(df[,.(Stub,Start,C,B,GA,FA)],df$revision_weight)
70 saveRDS(revision.pca, "data/ores_pca_features_revisions.RDS")
72 df <- df[,":="(pca1 = weighted.pca$x[,1],
73 pca2 = weighted.pca$x[,2],
74 pca3 = weighted.pca$x[,3],
75 pca4 = weighted.pca$x[,4],
76 pca5 = weighted.pca$x[,5],
77 pca6 = weighted.pca$x[,6])]
79 df <- df[,":="(pca1.revision = revision.pca$x[,1],
80 pca2.revision = revision.pca$x[,2],
81 pca3.revision = revision.pca$x[,3],
82 pca4.revision = revision.pca$x[,4],
83 pca5.revision = revision.pca$x[,5],
84 pca6.revision = revision.pca$x[,6])]
86 df <- df[,":="(pca1.noweights = unweighted.pca$x[,1],
87 pca2.noweights = unweighted.pca$x[,2],
88 pca3.noweights = unweighted.pca$x[,3],
89 pca4.noweights = unweighted.pca$x[,4],
90 pca5.noweights = unweighted.pca$x[,5],
91 pca6.noweights = unweighted.pca$x[,6])]
93 qformula.main.pca.cs <- brmsformula(wp10 | weights(article_weight) ~ cs(pca1) + cs(pca2) + cs(pca3) + cs(pca4) + cs(pca5))
94 formula.main.pca.noweights.cs <- brmsformula(wp10 ~ cs(pca1.noweights) + cs(pca2.noweights) + cs(pca3.noweights) + cs(pca4.noweights) + cs(pca5.noweights))
95 formula.revision.pca.cs <- brmsformula(wp10 | weights(revision_weight) ~ cs(pca1.revision) + cs(pca2.revision) + cs(pca3.revision) + cs(pca4.revision) + cs(pca5.revision))
96 formula.qe6.cs <- brmsformula(wp10 | weights(article_weight) ~ cs(quality.even6))
97 formula.qe6.revision.cs <- brmsformula(wp10 | weights(revision_weight) ~ cs(quality.even6))
98 formula.qe6.noweights.cs <- brmsformula(wp10 ~ cs(quality.even6))
100 formula.main.pca <- brmsformula(wp10 | weights(article_weight) ~ pca1 + pca2 + pca3 + pca4 + pca5)
101 formula.main.pca.noweights <- brmsformula(wp10 ~ pca1.noweights + pca2.noweights + pca3.noweights + pca4.noweights + pca5.noweights)
102 formula.revision.pca <- brmsformula(wp10 | weights(revision_weight) ~ pca1.revision + pca2.revision + pca3.revision + pca4.revision + pca5.revision)
103 formula.qe6 <- brmsformula(wp10 | weights(article_weight) ~ quality.even6)
104 formula.qe6.revision <- brmsformula(wp10 | weights(revision_weight) ~ quality.even6)
105 formula.qe6.noweights <- brmsformula(wp10 ~ quality.even6)
107 formula.scores.noweights <- brmsformula(wp10 ~ Start + Stub + GA + FA + B)
112 options(mc.cores = parallel::detectCores())
114 plan(lapply(1:7,function(x) tweak(multisession, workers=4)))
116 model.main.pca %<-% brm(formula=formula.main.pca, data=df, family=fam, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE))
117 model.qe6 %<-% brm(formula.qe6, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
119 model.main.revision %<-% brm(formula=formula.revision.pca, data=df, family=fam, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE))
121 model.qe6.revision %<-% brm(formula.qe6.revision, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
122 model.qe6.noweights %<-% brm(formula.qe6.noweights, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
123 model.main.pca.noweights %<-% brm(formula=formula.main.pca.noweights, data=df, family=fam, control=list(max_treedepth=15), future=TRUE,save_pars=save_pars(all=TRUE))
125 ## model.main.pca.cs %<-% brm(formula=formula.main.pca.cs, data=df, family=fam, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE))
126 ## model.qe6.cs %<-% brm(formula.qe6.cs, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
128 ## model.main.revision.cs %<-% brm(formula=formula.revision.pca.cs, data=df, family=fam, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE))
130 ## model.qe6.revision.cs %<-% brm(formula.qe6.revision.cs, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
131 ## model.qe6.noweights.cs %<-% brm(formula.qe6.noweights.cs, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
132 ## model.main.pca.noweights.cs %<-% brm(formula=formula.main.pca.noweights.cs, data=df, family=fam, control=list(max_treedepth=15), future=TRUE,save_pars=save_pars(all=TRUE))
134 model.main.pca.cumulative %<-% brm(formula=formula.main.pca, data=df, family=fam.cumulative, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE))
135 model.qe6.cumulative %<-% brm(formula.qe6, data=df, family=fam.cumulative, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
137 model.main.revision.cumulative %<-% brm(formula=formula.revision.pca, data=df, family=fam.cumulative, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE))
139 model.qe6.revision.cumulative %<-% brm(formula.qe6.revision, data=df, family=fam.cumulative, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
140 model.qe6.noweights.cumulative %<-% brm(formula.qe6.noweights, data=df, family=fam.cumulative, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE))
141 model.main.pca.noweights.cumulative %<-% brm(formula=formula.main.pca.noweights, data=df, family=fam.cumulative, control=list(max_treedepth=15), future=TRUE,save_pars=save_pars(all=TRUE))
144 #model.scores.noweights <- brm(formula=formula.scores.noweights, data=df, family=fam, control=list(max_treedepth=15), future=TRUE,save_pars=save_pars(all=TRUE))
146 models <- resolve(globalenv(),result=F)
149 model.main.revision <- add_criterion(model.main.revision,'loo',moment_match=T)
150 model.main.pca <- add_criterion(model.main.pca,'loo',moment_match=T)
151 model.qe6.revision <- add_criterion(model.qe6.revision,'loo')
152 model.qe6 <- add_criterion(model.qe6,'loo')
153 model.main.pca.noweights <- add_criterion(model.main.pca.noweights,'loo',moment_match=T)
154 model.qe6.noweights <- add_criterion(model.qe6.noweights,'loo')
156 model.main.revision.cumulative <- add_criterion(model.main.revision.cumulative,'loo',moment_match=T)
157 model.main.pca.cumulative <- add_criterion(model.main.pca.cumulative,'loo',moment_match=T)
158 model.qe6.revision.cumulative <- add_criterion(model.qe6.revision.cumulative,'loo')
159 model.qe6.cumulative <- add_criterion(model.qe6.cumulative,'loo')
160 model.main.pca.noweights.cumulative <- add_criterion(model.main.pca.noweights.cumulative,'loo',moment_match=T)
161 model.qe6.noweights.cumulative <- add_criterion(model.qe6.noweights.cumulative,'loo')
164 ## model.main.revision.cs <- add_criterion(model.main.revision.cs,'loo',moment_match=T)
165 ## model.main.pca.cs <- add_criterion(model.main.pca.cs,'loo',moment_match=T)
166 ## model.qe6.revision.cs <- add_criterion(model.qe6.revision.cs,'loo')
167 ## model.qe6.cs <- add_criterion(model.qe6.cs,'loo')
168 ## model.main.pca.noweights.cs <- add_criterion(model.main.pca.noweights.cs,'loo',moment_match=T)
169 ## model.qe6.noweights.cs <- add_criterion(model.qe6.noweights.cs,'loo')
171 saveRDS(model.qe6.revision,"models/ordinal_quality_qe6_revision.RDS")
172 saveRDS(model.qe6,"models/ordinal_quality_qe6.RDS")
173 saveRDS(model.main.pca.noweights,"models/ordinal_quality_pca.noweights.RDS")
174 saveRDS(model.qe6.noweights,"models/ordinal_quality_qe6.noweights.RDS")
175 saveRDS(model.main.pca,"models/ordinal_quality_pca.RDS")
176 saveRDS(model.main.revision,"models/ordinal_quality_pca_revision.RDS")
178 saveRDS(model.qe6.revision.cumulative,"models/ordinal_quality_qe6_revision.cumulative.RDS")
179 saveRDS(model.qe6.cumulative,"models/ordinal_quality_qe6.cumulative.RDS")
180 saveRDS(model.main.pca.noweights.cumulative,"models/ordinal_quality_pca.noweights.cumulative.RDS")
181 saveRDS(model.qe6.noweights.cumulative,"models/ordinal_quality_qe6.noweights.cumulative.RDS")
182 saveRDS(model.main.pca.cumulative,"models/ordinal_quality_pca.cumulative.RDS")
183 saveRDS(model.main.revision.cumulative,"models/ordinal_quality_pca_revision.cumulative.RDS")
185 ## saveRDS(model.qe6.revision.cs,"models/ordinal_quality_qe6_revision.RDS")
186 ## saveRDS(model.qe6.cs,"models/ordinal_quality_qe6.RDS")
187 ## saveRDS(model.main.pca.noweights.cs,"models/ordinal_quality_pca.noweights.RDS")
188 ## saveRDS(model.qe6.noweights.cs,"models/ordinal_quality_qe6.noweights.RDS")
189 ## saveRDS(model.main.pca.cs,"models/ordinal_quality_pca.RDS")
190 ## saveRDS(model.main.revision.cs,"models/ordinal_quality_pca_revision.RDS")
194 models <- resolve(globalenv(),result=F)