X-Git-Url: https://code.communitydata.science/ml_measurement_error_public.git/blobdiff_plain/979dc14b6861ae31f00d56392fd5b8cf69f17333..69948cae1e691191fc86e6abdaa485bc98f38f1f:/simulations/plot_irr_example.R diff --git a/simulations/plot_irr_example.R b/simulations/plot_irr_example.R index bf5e661..4ec79dc 100644 --- a/simulations/plot_irr_example.R +++ b/simulations/plot_irr_example.R @@ -17,6 +17,10 @@ build_plot_dataset <- function(df){ z.true <- summarize.estimator(df, 'true','z') + x.naive <- summarize.estimator(df, 'naive','x') + + z.naive <- summarize.estimator(df, 'naive','z') + x.loa0.feasible <- summarize.estimator(df, 'loa0.feasible','x') z.loa0.feasible <- summarize.estimator(df,'loa0.feasible','z') @@ -33,36 +37,55 @@ build_plot_dataset <- function(df){ z.loco.mle <- summarize.estimator(df, 'loco.mle', 'z') + x.loco.mle <- summarize.estimator(df, 'loco.mle', 'x') + + z.loco.amelia <- summarize.estimator(df, 'amelia.full', 'z') + x.loco.amelia <- summarize.estimator(df, 'amelia.full', 'x') + + z.loco.zhang <- summarize.estimator(df, 'zhang', 'z') + x.loco.zhang <- summarize.estimator(df, 'zhang', 'x') + + + z.loco.gmm <- summarize.estimator(df, 'gmm', 'z') + x.loco.gmm <- summarize.estimator(df, 'gmm', 'x') + + + + ## x.mle <- summarize.estimator(df, 'mle', 'x') ## z.mle <- summarize.estimator(df, 'mle', 'z') accuracy <- df[,mean(accuracy)] - plot.df <- rbindlist(list(x.true,z.true,x.loa0.feasible,z.loa0.feasible,x.loa0.mle,z.loa0.mle,x.loco.feasible, z.loco.feasible, x.loco.mle, z.loco.mle),use.names=T) + plot.df <- rbindlist(list(x.true,z.true,x.loa0.feasible,z.loa0.feasible,x.loa0.mle,z.loa0.mle,x.loco.feasible, z.loco.feasible, x.loco.mle, z.loco.mle, x.loco.amelia, z.loco.amelia,x.loco.zhang, z.loco.zhang,x.loco.gmm, z.loco.gmm,x.naive,z.naive),use.names=T) plot.df[,accuracy := accuracy] plot.df <- plot.df[,":="(sd.est=sqrt(var.est)/N.sims)] return(plot.df) } -plot.df <- read_feather(args$infile) -print(unique(plot.df$N)) +sims.df <- read_feather(args$infile) +print(unique(sims.df$N)) # df <- df[apply(df,1,function(x) !any(is.na(x)))] -if(!('Bzx' %in% names(plot.df))) - plot.df[,Bzx:=NA] +if(!('Bzx' %in% names(sims.df))) + sims.df[,Bzx:=NA] -if(!('accuracy_imbalance_difference' %in% names(plot.df))) - plot.df[,accuracy_imbalance_difference:=NA] +if(!('accuracy_imbalance_difference' %in% names(sims.df))) + sims.df[,accuracy_imbalance_difference:=NA] -unique(plot.df[,'accuracy_imbalance_difference']) +unique(sims.df[,'accuracy_imbalance_difference']) #plot.df <- build_plot_dataset(df[accuracy_imbalance_difference==0.1][N==700]) -plot.df <- build_plot_dataset(plot.df) +plot.df <- build_plot_dataset(sims.df) change.remember.file("remember_irr.RDS",clear=TRUE) remember(plot.df,args$name) + +set.remember.prefix(gsub("plot.df.","",args$name)) +remember(median(sims.df$loco.accuracy),'med.loco.acc') + #ggplot(df,aes(x=Bxy.est.mle)) + geom_histogram() + facet_grid(accuracy_imbalance_difference ~ Bzy) ## ## ## df[gmm.ER_pval<0.05]