]> code.communitydata.science - ml_measurement_error_public.git/blobdiff - simulations/summarize_estimator.R
update simulation base from hyak
[ml_measurement_error_public.git] / simulations / summarize_estimator.R
index 8199c0619496f81607dc22c5d7e5736f2ab106f6..1e1341d2514096e795fcf6e937de9ab521c39abb 100644 (file)
@@ -1,22 +1,27 @@
 
 summarize.estimator <- function(df, suffix='naive', coefname='x'){
 
 
 summarize.estimator <- function(df, suffix='naive', coefname='x'){
 
-    part <- df[,c('N',
-                  'm',
-                  'Bxy',
-                  paste0('B',coefname,'y.est.',suffix),
-                  paste0('B',coefname,'y.ci.lower.',suffix),
-                  paste0('B',coefname,'y.ci.upper.',suffix),
-                  'y_explained_variance',
-                  'Bzx',
-                  'Bzy',
-                  'accuracy_imbalance_difference'
-                  ),
-               with=FALSE]
+    reported_vars <- c(
+                       'Bxy',
+                       paste0('B',coefname,'y.est.',suffix),
+                       paste0('B',coefname,'y.ci.lower.',suffix),
+                       paste0('B',coefname,'y.ci.upper.',suffix)
+                       )
+
     
     
+    grouping_vars <- c('N','m','B0', 'Bxy', 'Bzy', 'Bzx', 'Px', 'Py','y_explained_variance', 'prediction_accuracy','outcome_formula','proxy_formula','truth_formula','z_bias','y_bias')
+
+    grouping_vars <- grouping_vars[grouping_vars %in% names(df)]
+
+    part <- df[,
+               c(reported_vars,
+                 grouping_vars),
+               with=FALSE]
+
+
     true.in.ci <- as.integer((part$Bxy >= part[[paste0('B',coefname,'y.ci.lower.',suffix)]]) & (part$Bxy <= part[[paste0('B',coefname,'y.ci.upper.',suffix)]]))
     zero.in.ci <- as.integer(0 >= part[[paste0('B',coefname,'y.ci.lower.',suffix)]]) & (0 <= part[[paste0('B',coefname,'y.ci.upper.',suffix)]])
     true.in.ci <- as.integer((part$Bxy >= part[[paste0('B',coefname,'y.ci.lower.',suffix)]]) & (part$Bxy <= part[[paste0('B',coefname,'y.ci.upper.',suffix)]]))
     zero.in.ci <- as.integer(0 >= part[[paste0('B',coefname,'y.ci.lower.',suffix)]]) & (0 <= part[[paste0('B',coefname,'y.ci.upper.',suffix)]])
-    bias <- part$Bxy - part[[paste0('B',coefname,'y.est.',suffix)]]
+    bias <- part[[paste0('B',coefname,'y')]] - part[[paste0('B',coefname,'y.est.',suffix)]]
     sign.correct <- as.integer(sign(part$Bxy) == sign(part[[paste0('B',coefname,'y.est.',suffix)]]))
 
     part <- part[,':='(true.in.ci = true.in.ci,
     sign.correct <- as.integer(sign(part$Bxy) == sign(part[[paste0('B',coefname,'y.est.',suffix)]]))
 
     part <- part[,':='(true.in.ci = true.in.ci,
@@ -26,16 +31,25 @@ summarize.estimator <- function(df, suffix='naive', coefname='x'){
 
     part.plot <- part[, .(p.true.in.ci = mean(true.in.ci),
                           mean.bias = mean(bias),
 
     part.plot <- part[, .(p.true.in.ci = mean(true.in.ci),
                           mean.bias = mean(bias),
-                          mean.est = mean(.SD[[paste0('B',coefname,'y.est.',suffix)]]),
-                          var.est = var(.SD[[paste0('B',coefname,'y.est.',suffix)]]),
-                          est.upper.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.95,na.rm=T),
-                          est.lower.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.05,na.rm=T),
+                          mean.est = mean(.SD[[paste0('B',coefname,'y.est.',suffix)]],na.rm=T),
+                          var.est = var(.SD[[paste0('B',coefname,'y.est.',suffix)]],na.rm=T),
+                          est.upper.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.975,na.rm=T),
+                          est.lower.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.025,na.rm=T),
+                          mean.ci.upper = mean(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],na.rm=T),
+                          mean.ci.lower = mean(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],na.rm=T),
+                          median.ci.upper = median(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],na.rm=T),
+                          median.ci.lower = median(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],na.rm=T),
+                          ci.upper.975 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.975,na.rm=T),
+                          ci.upper.025 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.025,na.rm=T),
+                          ci.lower.975 = quantile(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],0.975,na.rm=T),
+                          ci.lower.025 = quantile(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],0.025,na.rm=T),
+                          N.ci.is.NA = sum(is.na(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]])),
                           N.sims = .N,
                           p.sign.correct = mean(as.integer(sign.correct & (! zero.in.ci))),
                           variable=coefname,
                           method=suffix
                           ),
                           N.sims = .N,
                           p.sign.correct = mean(as.integer(sign.correct & (! zero.in.ci))),
                           variable=coefname,
                           method=suffix
                           ),
-                      by=c("N","m",'y_explained_variance','Bzx', 'Bzy', 'accuracy_imbalance_difference')
+                      by=grouping_vars,
                       ]
     
     return(part.plot)
                       ]
     
     return(part.plot)

Community Data Science Collective || Want to submit a patch?