library(MASS) library(brms) options(mc.cores=28) library(ggplot2) library(data.table) library(arrow) library(wCorr) source("RemembR/R/RemembeR.R") change.remember.file("ordinal.quality.analysis.noweights.RDS") #model.1 <- readRDS("models/ordinal_quality_intercept.RDS") model.main.pca <- readRDS("models/ordinal_quality_pca.noweights.RDS") model.main.pca.cumulative <- readRDS("models/ordinal_quality_pca.noweights.cumulative.RDS") model.qe6 <- readRDS("models/ordinal_quality_qe6.noweights.RDS") df <- readRDS("data/training_quality_labels.RDS") # then compare them with loo comparison.loo <- loo_compare(model.main.pca,model.qe6,model.main.pca.cumulative) #comparison.waic <- loo_compare(model.main.noC,model.main.noB,model.main.noFa,model.main.noGa,model.main.noStart,model.main.noStub,criterion='waic') print(comparison.loo,simplify=F,digits=2) remember(comparison.loo,"comparison.loo") # LOO Chooses NoC best.model <- model.main.pca.cumulative pca_features_unweighted <- readRDS("data/ores_pca_features.noweights.RDS") test.df <- readRDS("data/holdout_quality_labels.RDS") wpca_transform <- function(wpca, x){ x <- as.matrix(x) centered <- as.matrix(t(t(x) - wpca$means)) return(centered %*% wpca$basis) } unweighted.pca <- wpca_transform(pca_features_unweighted, test.df[,.(Stub, Start, C, B, GA, FA)]) test.df <- test.df[,":="(pca1.noweights = unweighted.pca[,1], pca2.noweights = unweighted.pca[,2], pca3.noweights = unweighted.pca[,3], pca4.noweights = unweighted.pca[,4], pca5.noweights = unweighted.pca[,5], pca6.noweights = unweighted.pca[,6])] draws <- as.data.table(best.model) test.df <- test.df[,idx.max:=.(apply(test.df[,.(Stub,Start,C,B,GA,FA)],1,which.max))] test.df <- test.df[,MPQC:=.(apply(test.df[,.(idx.max)],1,function(idx) c("stub","start","c","b","ga","fa")[idx]))] top.preds <- test.df[,MPQC] #ordinal.fitted.1 <- fitted(best.model, test.df, scale='response') ordinal.fitted <- data.table(fitted(best.model, test.df, scale='linear')) ordinal.pred <- ordinal.fitted$Estimate remember(ordinal.fitted,'ordinal.fitted') quality.ordinal <- ordinal.pred quality.even6 <- apply(test.df[,.(Stub,Start,B,C,GA,FA)],1,function(r) r %*% c(0,1,2,3,4,5)) quality.even5 <- apply(test.df[,.(Stub,Start,B,GA,FA)],1,function(r) r %*% c(1,2,3,4,5)) test.df <- test.df[,quality.ordinal := quality.ordinal] test.df <- test.df[,quality.even6 := quality.even6] (spearcor <- cor(test.df$quality.ordinal, test.df$quality.even6, method='spearman')) remember(spearcor, 'spearman.corr') (pearsoncor <- cor(test.df$quality.ordinal, test.df$quality.even6, method='pearson')) remember(pearsoncor, 'pearson.corr') ordinal.preds <- data.table(predict(best.model, test.df, robust=T)) #names(ordinal.preds) <- c("Stub","Start","C","B","A","GA","FA") names(ordinal.preds) <- c("Stub","Start","C","B","GA","FA") ordinal.preds <- ordinal.preds[,idx.max:=.(apply(ordinal.preds[,.(Stub,Start,C,B,GA,FA)],1,which.max))] #ordinal.preds <- ordinal.preds[,predicted:=.(apply(ordinal.preds[,.(idx.max)],1,function(idx) c("stub","start","c","b",'a',"ga","fa")[idx]))] ordinal.preds <- ordinal.preds[,predicted:=.(apply(ordinal.preds[,.(idx.max)],1,function(idx) c("stub","start","c","b","ga","fa")[idx]))] pred.qe6 <- data.table(predict(model.qe6,test.df)) names(pred.qe6) <- c("Stub","Start","C","B","GA","FA") pred.qe6 <- pred.qe6[,idx.max:=.(apply(pred.qe6[,.(Stub,Start,C,B,GA,FA)],1,which.max))] #pred.qe6 <- pred.qe6[,predicted:=.(apply(pred.qe6[,.(idx.max)],1,function(idx) c("stub","start","c","b",'a',"ga","fa")[idx]))] pred.qe6 <- pred.qe6[,predicted:=.(apply(pred.qe6[,.(idx.max)],1,function(idx) c("stub","start","c","b","ga","fa")[idx]))] test.df <- test.df[,ordinal.pred := ordinal.preds$predicted] test.df <- test.df[,pred.qe6 := pred.qe6$predicted] test.df <- test.df[,idx.max:=.(apply(test.df[,.(Stub,Start,C,B,GA,FA)],1,which.max))] test.df <- test.df[,MPQC:=.(apply(test.df[,.(idx.max)],1,function(idx) c("stub","start","c","b","ga","fa")[idx]))] (top.pred.accuracy <- test.df[,mean(MPQC==wp10)]) remember(top.pred.accuracy, "top.pred.accuracy") (ordinal.pred.accuracy <- test.df[,mean(ordinal.pred == wp10)]) remember(ordinal.pred.accuracy, "ordinal.pred.accuracy") quality.even6 <- apply(df[,.(Stub,Start,B,C,GA,FA)],1,function(r) r %*% c(1,2,3,4,5,6)) (pred.qe6.accuracy <- mean(test.df[,.(pred.qe6)] == test.df[,.(wp10)])) remember(ordinal.pred.accuracy, "ordinal.pred.accuracy") remember(best.model, "best.model") (accuracy.macro <- test.df[,.(top.pred.accuracy = mean(MPQC==wp10), ordinal.pred.accuracy = mean(ordinal.pred==wp10), pred.qe6.accuracy = mean(pred.qe6==wp10)),by=.(wp10)]) accuracy.macro[,sapply(.SD,mean), .SDcols=c("top.pred.accuracy","ordinal.pred.accuracy","pred.qe6.accuracy")] remember(test.df,'test.df') ordinal.preds[,wp10:=test.df$wp10] ordinal.preds[,weight:=test.df$article_weight] total.weight <- sum(ordinal.preds$weight) library(modi) calibration.stats.1 <- ordinal.preds[,.(prob.predicted=apply(.SD,2,function(c) weighted.mean(c,weight)), var.predicted=apply(.SD,2,function(c) weighted.var(c,weight))),.SDcols=c("Stub","Start","C","B","GA","FA")] calibration.stats.1[,wp10:=c("stub","start","c","b","ga","fa")] calip.data = ordinal.preds[order(wp10),.(prob.data=sum(weight)/total.weight, var.data=var(weight)/total.weight),by=.(wp10)] calibration.stats.1 <- calibration.stats.1[calip.data,on=.(wp10)] calibration.stats.1$weighttype <- 'Article weight' ordinal.preds[,weight:=test.df$revision_weight] total.weight <- sum(ordinal.preds$weight) calibration.stats.2 <- ordinal.preds[,.(prob.predicted=apply(.SD,2,function(c) weighted.mean(c,weight)), var.predicted=apply(.SD,2,function(c) weighted.var(c,weight))),.SDcols=c("Stub","Start","C","B","GA","FA")] calibration.stats.2[,wp10:=c("stub","start","c","b","ga","fa")] calip.data = ordinal.preds[order(wp10),.(prob.data=sum(weight)/total.weight, var.data=var(weight)/total.weight),by=.(wp10)] calibration.stats.2 <- calibration.stats.2[calip.data,on=.(wp10)] calibration.stats.2$weighttype <- 'Revision weight' ordinal.preds[,weight:=rep(1,nrow(ordinal.preds))] total.weight <- sum(ordinal.preds$weight) calibration.stats.3 <- ordinal.preds[,.(prob.predicted=apply(.SD,2,function(c) weighted.mean(c,weight)), var.predicted=apply(.SD,2,function(c) weighted.var(c,weight))),.SDcols=c("Stub","Start","C","B","GA","FA")] calibration.stats.3[,wp10:=c("stub","start","c","b","ga","fa")] calip.data = ordinal.preds[order(wp10),.(prob.data=sum(weight)/total.weight, var.data=var(weight)/total.weight),by=.(wp10)] calibration.stats.3 <- calibration.stats.3[calip.data,on=.(wp10)] calibration.stats.3$weighttype <- 'No weight' calibration.stats <- rbind(calibration.stats.1,calibration.stats.2,calibration.stats.3) calibration.stats[,'calibration':=prob.data - prob.predicted] remember(calibration.stats, "calibration.stats") ## p <- ggplot(data.frame(quality.ordinal, quality.even6, quality.even5)) ## p <- p + geom_point(aes(x=quality.even6,y=quality.ordinal)) + geom_smooth(aes(x=quality.even6,y=quality.ordinal)) ## print(p) ## dev.off() ## post.pred <- posterior_predict(model.main) ## preds <- as.character(predict(polrmodel)) ## polrmodel.accuracy <- weighted.mean(preds==df$wp10,df$weight)