update real data examples code and rerun project.

[ml_measurement_error_public.git] / simulations / plot_irr_example.R
diff --git a/simulations/plot_irr_example.R b/simulations/plot_irr_example.R

index bf5e66193b749dd8a31b8765de45517d60727506..4ec79dce6c1b3175ad84767f947ea7508b12e125 100644 (file)
--- a/simulations/plot_irr_example.R
+++ b/simulations/plot_irr_example.R
@@ -17,6 +17,10 @@ build_plot_dataset <- function(df){
  
      z.true <-  summarize.estimator(df, 'true','z')
  
+    x.naive <-  summarize.estimator(df, 'naive','x')
+
+    z.naive <-  summarize.estimator(df, 'naive','z')
+
      x.loa0.feasible <- summarize.estimator(df, 'loa0.feasible','x')
      
      z.loa0.feasible <- summarize.estimator(df,'loa0.feasible','z')
@@ -33,36 +37,55 @@ build_plot_dataset <- function(df){
  
      z.loco.mle <- summarize.estimator(df, 'loco.mle', 'z')
  
+    x.loco.mle <- summarize.estimator(df, 'loco.mle', 'x')
+
+    z.loco.amelia <- summarize.estimator(df, 'amelia.full', 'z')
+    x.loco.amelia <- summarize.estimator(df, 'amelia.full', 'x')
+
+    z.loco.zhang <- summarize.estimator(df, 'zhang', 'z')
+    x.loco.zhang <- summarize.estimator(df, 'zhang', 'x')
+
+
+    z.loco.gmm <- summarize.estimator(df, 'gmm', 'z')
+    x.loco.gmm <- summarize.estimator(df, 'gmm', 'x')
+
+    
+
+
      ## x.mle <- summarize.estimator(df, 'mle', 'x')
  
      ## z.mle <- summarize.estimator(df, 'mle', 'z')
  
      accuracy <- df[,mean(accuracy)]
-    plot.df <- rbindlist(list(x.true,z.true,x.loa0.feasible,z.loa0.feasible,x.loa0.mle,z.loa0.mle,x.loco.feasible, z.loco.feasible, x.loco.mle, z.loco.mle),use.names=T)
+    plot.df <- rbindlist(list(x.true,z.true,x.loa0.feasible,z.loa0.feasible,x.loa0.mle,z.loa0.mle,x.loco.feasible, z.loco.feasible, x.loco.mle, z.loco.mle, x.loco.amelia, z.loco.amelia,x.loco.zhang, z.loco.zhang,x.loco.gmm, z.loco.gmm,x.naive,z.naive),use.names=T)
      plot.df[,accuracy := accuracy]
      plot.df <- plot.df[,":="(sd.est=sqrt(var.est)/N.sims)]
      return(plot.df)
  }
  
  
-plot.df <- read_feather(args$infile)
-print(unique(plot.df$N))
+sims.df <- read_feather(args$infile)
+print(unique(sims.df$N))
  
  # df <- df[apply(df,1,function(x) !any(is.na(x)))]
  
-if(!('Bzx' %in% names(plot.df)))
-    plot.df[,Bzx:=NA]
+if(!('Bzx' %in% names(sims.df)))
+    sims.df[,Bzx:=NA]
  
-if(!('accuracy_imbalance_difference' %in% names(plot.df)))
-    plot.df[,accuracy_imbalance_difference:=NA]
+if(!('accuracy_imbalance_difference' %in% names(sims.df)))
+    sims.df[,accuracy_imbalance_difference:=NA]
  
-unique(plot.df[,'accuracy_imbalance_difference'])
+unique(sims.df[,'accuracy_imbalance_difference'])
  
  #plot.df <- build_plot_dataset(df[accuracy_imbalance_difference==0.1][N==700])
-plot.df <- build_plot_dataset(plot.df)
+plot.df <- build_plot_dataset(sims.df)
  change.remember.file("remember_irr.RDS",clear=TRUE)
  remember(plot.df,args$name)
  
+
+set.remember.prefix(gsub("plot.df.","",args$name))
+remember(median(sims.df$loco.accuracy),'med.loco.acc')
+
  #ggplot(df,aes(x=Bxy.est.mle)) + geom_histogram() + facet_grid(accuracy_imbalance_difference ~ Bzy)
  
  ## ## ## df[gmm.ER_pval<0.05]