]> code.communitydata.science - ml_measurement_error_public.git/blobdiff - simulations/plot_irr_example.R
update real data examples code and rerun project.
[ml_measurement_error_public.git] / simulations / plot_irr_example.R
index bf5e66193b749dd8a31b8765de45517d60727506..4ec79dce6c1b3175ad84767f947ea7508b12e125 100644 (file)
@@ -17,6 +17,10 @@ build_plot_dataset <- function(df){
 
     z.true <-  summarize.estimator(df, 'true','z')
 
+    x.naive <-  summarize.estimator(df, 'naive','x')
+
+    z.naive <-  summarize.estimator(df, 'naive','z')
+
     x.loa0.feasible <- summarize.estimator(df, 'loa0.feasible','x')
     
     z.loa0.feasible <- summarize.estimator(df,'loa0.feasible','z')
@@ -33,36 +37,55 @@ build_plot_dataset <- function(df){
 
     z.loco.mle <- summarize.estimator(df, 'loco.mle', 'z')
 
+    x.loco.mle <- summarize.estimator(df, 'loco.mle', 'x')
+
+    z.loco.amelia <- summarize.estimator(df, 'amelia.full', 'z')
+    x.loco.amelia <- summarize.estimator(df, 'amelia.full', 'x')
+
+    z.loco.zhang <- summarize.estimator(df, 'zhang', 'z')
+    x.loco.zhang <- summarize.estimator(df, 'zhang', 'x')
+
+
+    z.loco.gmm <- summarize.estimator(df, 'gmm', 'z')
+    x.loco.gmm <- summarize.estimator(df, 'gmm', 'x')
+
+    
+
+
     ## x.mle <- summarize.estimator(df, 'mle', 'x')
 
     ## z.mle <- summarize.estimator(df, 'mle', 'z')
 
     accuracy <- df[,mean(accuracy)]
-    plot.df <- rbindlist(list(x.true,z.true,x.loa0.feasible,z.loa0.feasible,x.loa0.mle,z.loa0.mle,x.loco.feasible, z.loco.feasible, x.loco.mle, z.loco.mle),use.names=T)
+    plot.df <- rbindlist(list(x.true,z.true,x.loa0.feasible,z.loa0.feasible,x.loa0.mle,z.loa0.mle,x.loco.feasible, z.loco.feasible, x.loco.mle, z.loco.mle, x.loco.amelia, z.loco.amelia,x.loco.zhang, z.loco.zhang,x.loco.gmm, z.loco.gmm,x.naive,z.naive),use.names=T)
     plot.df[,accuracy := accuracy]
     plot.df <- plot.df[,":="(sd.est=sqrt(var.est)/N.sims)]
     return(plot.df)
 }
 
 
-plot.df <- read_feather(args$infile)
-print(unique(plot.df$N))
+sims.df <- read_feather(args$infile)
+print(unique(sims.df$N))
 
 # df <- df[apply(df,1,function(x) !any(is.na(x)))]
 
-if(!('Bzx' %in% names(plot.df)))
-    plot.df[,Bzx:=NA]
+if(!('Bzx' %in% names(sims.df)))
+    sims.df[,Bzx:=NA]
 
-if(!('accuracy_imbalance_difference' %in% names(plot.df)))
-    plot.df[,accuracy_imbalance_difference:=NA]
+if(!('accuracy_imbalance_difference' %in% names(sims.df)))
+    sims.df[,accuracy_imbalance_difference:=NA]
 
-unique(plot.df[,'accuracy_imbalance_difference'])
+unique(sims.df[,'accuracy_imbalance_difference'])
 
 #plot.df <- build_plot_dataset(df[accuracy_imbalance_difference==0.1][N==700])
-plot.df <- build_plot_dataset(plot.df)
+plot.df <- build_plot_dataset(sims.df)
 change.remember.file("remember_irr.RDS",clear=TRUE)
 remember(plot.df,args$name)
 
+
+set.remember.prefix(gsub("plot.df.","",args$name))
+remember(median(sims.df$loco.accuracy),'med.loco.acc')
+
 #ggplot(df,aes(x=Bxy.est.mle)) + geom_histogram() + facet_grid(accuracy_imbalance_difference ~ Bzy)
 
 ## ## ## df[gmm.ER_pval<0.05]

Community Data Science Collective || Want to submit a patch?