]> code.communitydata.science - ml_measurement_error_public.git/blobdiff - simulations/summarize_estimator.R
real-data example on raw perspective scores
[ml_measurement_error_public.git] / simulations / summarize_estimator.R
index 3e4209f42f4c3486a89c1499e221c297b424b643..1e1341d2514096e795fcf6e937de9ab521c39abb 100644 (file)
@@ -1,17 +1,21 @@
 
 summarize.estimator <- function(df, suffix='naive', coefname='x'){
 
 
 summarize.estimator <- function(df, suffix='naive', coefname='x'){
 
-    part <- df[,c('N',
-                  'm',
-                  'Bxy',
-                  paste0('B',coefname,'y.est.',suffix),
-                  paste0('B',coefname,'y.ci.lower.',suffix),
-                  paste0('B',coefname,'y.ci.upper.',suffix),
-                  'y_explained_variance',
-                  'Bzx',
-                  'Bzy',
-                  'accuracy_imbalance_difference'
-                  ),
+    reported_vars <- c(
+                       'Bxy',
+                       paste0('B',coefname,'y.est.',suffix),
+                       paste0('B',coefname,'y.ci.lower.',suffix),
+                       paste0('B',coefname,'y.ci.upper.',suffix)
+                       )
+
+    
+    grouping_vars <- c('N','m','B0', 'Bxy', 'Bzy', 'Bzx', 'Px', 'Py','y_explained_variance', 'prediction_accuracy','outcome_formula','proxy_formula','truth_formula','z_bias','y_bias')
+
+    grouping_vars <- grouping_vars[grouping_vars %in% names(df)]
+
+    part <- df[,
+               c(reported_vars,
+                 grouping_vars),
                with=FALSE]
 
 
                with=FALSE]
 
 
@@ -27,12 +31,14 @@ summarize.estimator <- function(df, suffix='naive', coefname='x'){
 
     part.plot <- part[, .(p.true.in.ci = mean(true.in.ci),
                           mean.bias = mean(bias),
 
     part.plot <- part[, .(p.true.in.ci = mean(true.in.ci),
                           mean.bias = mean(bias),
-                          mean.est = mean(.SD[[paste0('B',coefname,'y.est.',suffix)]]),
-                          var.est = var(.SD[[paste0('B',coefname,'y.est.',suffix)]]),
+                          mean.est = mean(.SD[[paste0('B',coefname,'y.est.',suffix)]],na.rm=T),
+                          var.est = var(.SD[[paste0('B',coefname,'y.est.',suffix)]],na.rm=T),
                           est.upper.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.975,na.rm=T),
                           est.lower.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.025,na.rm=T),
                           mean.ci.upper = mean(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],na.rm=T),
                           mean.ci.lower = mean(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],na.rm=T),
                           est.upper.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.975,na.rm=T),
                           est.lower.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.025,na.rm=T),
                           mean.ci.upper = mean(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],na.rm=T),
                           mean.ci.lower = mean(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],na.rm=T),
+                          median.ci.upper = median(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],na.rm=T),
+                          median.ci.lower = median(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],na.rm=T),
                           ci.upper.975 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.975,na.rm=T),
                           ci.upper.025 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.025,na.rm=T),
                           ci.lower.975 = quantile(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],0.975,na.rm=T),
                           ci.upper.975 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.975,na.rm=T),
                           ci.upper.025 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.025,na.rm=T),
                           ci.lower.975 = quantile(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],0.975,na.rm=T),
@@ -43,7 +49,7 @@ summarize.estimator <- function(df, suffix='naive', coefname='x'){
                           variable=coefname,
                           method=suffix
                           ),
                           variable=coefname,
                           method=suffix
                           ),
-                      by=c("N","m",'y_explained_variance','Bzx', 'Bzy', 'accuracy_imbalance_difference')
+                      by=grouping_vars,
                       ]
     
     return(part.plot)
                       ]
     
     return(part.plot)

Community Data Science Collective || Want to submit a patch?