summarize.estimator <- function(df, suffix='naive', coefname='x'){
- part <- df[,c('N',
- 'm',
- 'Bxy',
- paste0('B',coefname,'y.est.',suffix),
- paste0('B',coefname,'y.ci.lower.',suffix),
- paste0('B',coefname,'y.ci.upper.',suffix),
- 'y_explained_variance',
- 'Bzx',
- 'Bzy',
- 'accuracy_imbalance_difference'
- ),
- with=FALSE]
+ reported_vars <- c(
+ 'Bxy',
+ paste0('B',coefname,'y.est.',suffix),
+ paste0('B',coefname,'y.ci.lower.',suffix),
+ paste0('B',coefname,'y.ci.upper.',suffix)
+ )
+
+ grouping_vars <- c('N','m','B0', 'Bxy', 'Bzy', 'Bzx', 'Px', 'Py','y_explained_variance', 'prediction_accuracy','outcome_formula','proxy_formula','truth_formula','z_bias','y_bias')
+
+ grouping_vars <- grouping_vars[grouping_vars %in% names(df)]
+
+ part <- df[,
+ c(reported_vars,
+ grouping_vars),
+ with=FALSE]
+
+
true.in.ci <- as.integer((part$Bxy >= part[[paste0('B',coefname,'y.ci.lower.',suffix)]]) & (part$Bxy <= part[[paste0('B',coefname,'y.ci.upper.',suffix)]]))
zero.in.ci <- as.integer(0 >= part[[paste0('B',coefname,'y.ci.lower.',suffix)]]) & (0 <= part[[paste0('B',coefname,'y.ci.upper.',suffix)]])
- bias <- part$Bxy - part[[paste0('B',coefname,'y.est.',suffix)]]
+ bias <- part[[paste0('B',coefname,'y')]] - part[[paste0('B',coefname,'y.est.',suffix)]]
sign.correct <- as.integer(sign(part$Bxy) == sign(part[[paste0('B',coefname,'y.est.',suffix)]]))
part <- part[,':='(true.in.ci = true.in.ci,
part.plot <- part[, .(p.true.in.ci = mean(true.in.ci),
mean.bias = mean(bias),
- mean.est = mean(.SD[[paste0('B',coefname,'y.est.',suffix)]]),
- var.est = var(.SD[[paste0('B',coefname,'y.est.',suffix)]]),
- est.upper.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.95,na.rm=T),
- est.lower.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.05,na.rm=T),
+ mean.est = mean(.SD[[paste0('B',coefname,'y.est.',suffix)]],na.rm=T),
+ var.est = var(.SD[[paste0('B',coefname,'y.est.',suffix)]],na.rm=T),
+ est.upper.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.975,na.rm=T),
+ est.lower.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.025,na.rm=T),
+ mean.ci.upper = mean(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],na.rm=T),
+ mean.ci.lower = mean(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],na.rm=T),
+ median.ci.upper = median(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],na.rm=T),
+ median.ci.lower = median(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],na.rm=T),
+ ci.upper.975 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.975,na.rm=T),
+ ci.upper.025 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.025,na.rm=T),
+ ci.lower.975 = quantile(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],0.975,na.rm=T),
+ ci.lower.025 = quantile(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],0.025,na.rm=T),
+ N.ci.is.NA = sum(is.na(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]])),
N.sims = .N,
p.sign.correct = mean(as.integer(sign.correct & (! zero.in.ci))),
variable=coefname,
method=suffix
),
- by=c("N","m",'y_explained_variance','Bzx', 'Bzy', 'accuracy_imbalance_difference')
+ by=grouping_vars,
]
return(part.plot)