source("RemembR/R/RemembeR.R") source("load_data.R") change.remember.file("ordinal.quality.model.RDS") test <- F remember(weights, "sample.weights") n.holdout <- 4000 remember(n.holdout,"n.holdout") holdout <- df[sample(.N,n.holdout)] saveRDS(holdout,'data/holdout_quality_labels.RDS') df <- df[!(revid %in% holdout$revid)] saveRDS(df,'data/training_quality_labels.RDS') if(test == TRUE){ df <- df[sample(.N,2000)] } ## So it turns out that the 6 predictors we have are highly correlated creating problems for sampling so use QR decomposition df <- df[!is.na(wp10)] df[, start.p.stub := Start + Stub] saveRDS(df,"data/training_quality_labels.RDS") ## So it turns out that the 6 predictors we have are highly correlated creating problems for sampling so use QR decomposition df <- df[!is.na(wp10)] df[, start.p.stub := Start + Stub] fam.cloglog <- sratio(link='cloglog', threshold='flexible') #formula.1 <- brmsformula(wp10 | weights(weight) ~ 1,decomp='QR',center=TRUE) fam <- sratio(link='logit', threshold='flexible') fam.cumulative <- sratio(link='logit', threshold='flexible') ## It turns out that the matrix is singular if we include all the predictors. ## C is the most correlated with the other variables so for now let's remove it. ## it turns out that we don't need to do model selection at all since we don't care about the coefficients. ## we can just do the csv! x <- df[,.(Stub,Start,C,B,GA,FA)] wpca <- function(x, weight){ name <- names(x) x <- as.matrix(x) means <- unlist(lapply(1:dim(x)[2], function(i) weighted.mean(x[,i], weight))) names(means) <- name centered <- as.matrix(t(t(x) - means)) weightmat <- diag(weight) covmat <- t(centered) %*% weightmat %*% centered / (sum(weight) - 1) factors <- eigen(covmat) basis <- factors$vectors result <- centered %*% basis # return a list with the info we need to do the transformation return(list(means=means, basis=basis, x=result)) } #unweighted.pca <- wpca(df[,.(Stub,Start,C,B,GA,FA)],rep(1,nrow(df))) upca <- prcomp(df[,.(Stub,Start,C,B,GA,FA)]) unweighted.pca <- list(means = upca$center, basis=upca$rotation, x=upca$x) saveRDS(unweighted.pca,"data/ores_pca_features.noweights.RDS") weighted.pca <- wpca(df[,.(Stub,Start,C,B,GA,FA)],df$article_weight) saveRDS(weighted.pca, "data/ores_pca_features.RDS") revision.pca <- wpca(df[,.(Stub,Start,C,B,GA,FA)],df$revision_weight) saveRDS(revision.pca, "data/ores_pca_features_revisions.RDS") df <- df[,":="(pca1 = weighted.pca$x[,1], pca2 = weighted.pca$x[,2], pca3 = weighted.pca$x[,3], pca4 = weighted.pca$x[,4], pca5 = weighted.pca$x[,5], pca6 = weighted.pca$x[,6])] df <- df[,":="(pca1.revision = revision.pca$x[,1], pca2.revision = revision.pca$x[,2], pca3.revision = revision.pca$x[,3], pca4.revision = revision.pca$x[,4], pca5.revision = revision.pca$x[,5], pca6.revision = revision.pca$x[,6])] df <- df[,":="(pca1.noweights = unweighted.pca$x[,1], pca2.noweights = unweighted.pca$x[,2], pca3.noweights = unweighted.pca$x[,3], pca4.noweights = unweighted.pca$x[,4], pca5.noweights = unweighted.pca$x[,5], pca6.noweights = unweighted.pca$x[,6])] qformula.main.pca.cs <- brmsformula(wp10 | weights(article_weight) ~ cs(pca1) + cs(pca2) + cs(pca3) + cs(pca4) + cs(pca5)) formula.main.pca.noweights.cs <- brmsformula(wp10 ~ cs(pca1.noweights) + cs(pca2.noweights) + cs(pca3.noweights) + cs(pca4.noweights) + cs(pca5.noweights)) formula.revision.pca.cs <- brmsformula(wp10 | weights(revision_weight) ~ cs(pca1.revision) + cs(pca2.revision) + cs(pca3.revision) + cs(pca4.revision) + cs(pca5.revision)) formula.qe6.cs <- brmsformula(wp10 | weights(article_weight) ~ cs(quality.even6)) formula.qe6.revision.cs <- brmsformula(wp10 | weights(revision_weight) ~ cs(quality.even6)) formula.qe6.noweights.cs <- brmsformula(wp10 ~ cs(quality.even6)) formula.main.pca <- brmsformula(wp10 | weights(article_weight) ~ pca1 + pca2 + pca3 + pca4 + pca5) formula.main.pca.noweights <- brmsformula(wp10 ~ pca1.noweights + pca2.noweights + pca3.noweights + pca4.noweights + pca5.noweights) formula.revision.pca <- brmsformula(wp10 | weights(revision_weight) ~ pca1.revision + pca2.revision + pca3.revision + pca4.revision + pca5.revision) formula.qe6 <- brmsformula(wp10 | weights(article_weight) ~ quality.even6) formula.qe6.revision <- brmsformula(wp10 | weights(revision_weight) ~ quality.even6) formula.qe6.noweights <- brmsformula(wp10 ~ quality.even6) formula.scores.noweights <- brmsformula(wp10 ~ Start + Stub + GA + FA + B) library(future) library(parallel) options(mc.cores = parallel::detectCores()) plan(lapply(1:7,function(x) tweak(multisession, workers=4))) model.main.pca %<-% brm(formula=formula.main.pca, data=df, family=fam, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE)) model.qe6 %<-% brm(formula.qe6, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE)) model.main.revision %<-% brm(formula=formula.revision.pca, data=df, family=fam, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE)) model.qe6.revision %<-% brm(formula.qe6.revision, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE)) model.qe6.noweights %<-% brm(formula.qe6.noweights, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE)) model.main.pca.noweights %<-% brm(formula=formula.main.pca.noweights, data=df, family=fam, control=list(max_treedepth=15), future=TRUE,save_pars=save_pars(all=TRUE)) ## model.main.pca.cs %<-% brm(formula=formula.main.pca.cs, data=df, family=fam, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE)) ## model.qe6.cs %<-% brm(formula.qe6.cs, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE)) ## model.main.revision.cs %<-% brm(formula=formula.revision.pca.cs, data=df, family=fam, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE)) ## model.qe6.revision.cs %<-% brm(formula.qe6.revision.cs, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE)) ## model.qe6.noweights.cs %<-% brm(formula.qe6.noweights.cs, data=df, family=fam, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE)) ## model.main.pca.noweights.cs %<-% brm(formula=formula.main.pca.noweights.cs, data=df, family=fam, control=list(max_treedepth=15), future=TRUE,save_pars=save_pars(all=TRUE)) model.main.pca.cumulative %<-% brm(formula=formula.main.pca, data=df, family=fam.cumulative, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE)) model.qe6.cumulative %<-% brm(formula.qe6, data=df, family=fam.cumulative, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE)) model.main.revision.cumulative %<-% brm(formula=formula.revision.pca, data=df, family=fam.cumulative, control=list(max_treedepth=15), future=TRUE, save_pars=save_pars(all=TRUE)) model.qe6.revision.cumulative %<-% brm(formula.qe6.revision, data=df, family=fam.cumulative, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE)) model.qe6.noweights.cumulative %<-% brm(formula.qe6.noweights, data=df, family=fam.cumulative, control=list(max_treedepth=15),future=TRUE,save_pars=save_pars(all=TRUE)) model.main.pca.noweights.cumulative %<-% brm(formula=formula.main.pca.noweights, data=df, family=fam.cumulative, control=list(max_treedepth=15), future=TRUE,save_pars=save_pars(all=TRUE)) #model.scores.noweights <- brm(formula=formula.scores.noweights, data=df, family=fam, control=list(max_treedepth=15), future=TRUE,save_pars=save_pars(all=TRUE)) models <- resolve(globalenv(),result=F) print("adding loo") model.main.revision <- add_criterion(model.main.revision,'loo',moment_match=T) model.main.pca <- add_criterion(model.main.pca,'loo',moment_match=T) model.qe6.revision <- add_criterion(model.qe6.revision,'loo') model.qe6 <- add_criterion(model.qe6,'loo') model.main.pca.noweights <- add_criterion(model.main.pca.noweights,'loo',moment_match=T) model.qe6.noweights <- add_criterion(model.qe6.noweights,'loo') model.main.revision.cumulative <- add_criterion(model.main.revision.cumulative,'loo',moment_match=T) model.main.pca.cumulative <- add_criterion(model.main.pca.cumulative,'loo',moment_match=T) model.qe6.revision.cumulative <- add_criterion(model.qe6.revision.cumulative,'loo') model.qe6.cumulative <- add_criterion(model.qe6.cumulative,'loo') model.main.pca.noweights.cumulative <- add_criterion(model.main.pca.noweights.cumulative,'loo',moment_match=T) model.qe6.noweights.cumulative <- add_criterion(model.qe6.noweights.cumulative,'loo') ## model.main.revision.cs <- add_criterion(model.main.revision.cs,'loo',moment_match=T) ## model.main.pca.cs <- add_criterion(model.main.pca.cs,'loo',moment_match=T) ## model.qe6.revision.cs <- add_criterion(model.qe6.revision.cs,'loo') ## model.qe6.cs <- add_criterion(model.qe6.cs,'loo') ## model.main.pca.noweights.cs <- add_criterion(model.main.pca.noweights.cs,'loo',moment_match=T) ## model.qe6.noweights.cs <- add_criterion(model.qe6.noweights.cs,'loo') saveRDS(model.qe6.revision,"models/ordinal_quality_qe6_revision.RDS") saveRDS(model.qe6,"models/ordinal_quality_qe6.RDS") saveRDS(model.main.pca.noweights,"models/ordinal_quality_pca.noweights.RDS") saveRDS(model.qe6.noweights,"models/ordinal_quality_qe6.noweights.RDS") saveRDS(model.main.pca,"models/ordinal_quality_pca.RDS") saveRDS(model.main.revision,"models/ordinal_quality_pca_revision.RDS") saveRDS(model.qe6.revision.cumulative,"models/ordinal_quality_qe6_revision.cumulative.RDS") saveRDS(model.qe6.cumulative,"models/ordinal_quality_qe6.cumulative.RDS") saveRDS(model.main.pca.noweights.cumulative,"models/ordinal_quality_pca.noweights.cumulative.RDS") saveRDS(model.qe6.noweights.cumulative,"models/ordinal_quality_qe6.noweights.cumulative.RDS") saveRDS(model.main.pca.cumulative,"models/ordinal_quality_pca.cumulative.RDS") saveRDS(model.main.revision.cumulative,"models/ordinal_quality_pca_revision.cumulative.RDS") ## saveRDS(model.qe6.revision.cs,"models/ordinal_quality_qe6_revision.RDS") ## saveRDS(model.qe6.cs,"models/ordinal_quality_qe6.RDS") ## saveRDS(model.main.pca.noweights.cs,"models/ordinal_quality_pca.noweights.RDS") ## saveRDS(model.qe6.noweights.cs,"models/ordinal_quality_qe6.noweights.RDS") ## saveRDS(model.main.pca.cs,"models/ordinal_quality_pca.RDS") ## saveRDS(model.main.revision.cs,"models/ordinal_quality_pca_revision.RDS") models <- resolve(globalenv(),result=F)