X-Git-Url: https://code.communitydata.science/ml_measurement_error_public.git/blobdiff_plain/979dc14b6861ae31f00d56392fd5b8cf69f17333..bb6f5e4731c603b336afb1a900bc9083d1b175bf:/simulations/summarize_estimator.R?ds=sidebyside diff --git a/simulations/summarize_estimator.R b/simulations/summarize_estimator.R index 8199c06..1e1341d 100644 --- a/simulations/summarize_estimator.R +++ b/simulations/summarize_estimator.R @@ -1,22 +1,27 @@ summarize.estimator <- function(df, suffix='naive', coefname='x'){ - part <- df[,c('N', - 'm', - 'Bxy', - paste0('B',coefname,'y.est.',suffix), - paste0('B',coefname,'y.ci.lower.',suffix), - paste0('B',coefname,'y.ci.upper.',suffix), - 'y_explained_variance', - 'Bzx', - 'Bzy', - 'accuracy_imbalance_difference' - ), - with=FALSE] + reported_vars <- c( + 'Bxy', + paste0('B',coefname,'y.est.',suffix), + paste0('B',coefname,'y.ci.lower.',suffix), + paste0('B',coefname,'y.ci.upper.',suffix) + ) + + grouping_vars <- c('N','m','B0', 'Bxy', 'Bzy', 'Bzx', 'Px', 'Py','y_explained_variance', 'prediction_accuracy','outcome_formula','proxy_formula','truth_formula','z_bias','y_bias') + + grouping_vars <- grouping_vars[grouping_vars %in% names(df)] + + part <- df[, + c(reported_vars, + grouping_vars), + with=FALSE] + + true.in.ci <- as.integer((part$Bxy >= part[[paste0('B',coefname,'y.ci.lower.',suffix)]]) & (part$Bxy <= part[[paste0('B',coefname,'y.ci.upper.',suffix)]])) zero.in.ci <- as.integer(0 >= part[[paste0('B',coefname,'y.ci.lower.',suffix)]]) & (0 <= part[[paste0('B',coefname,'y.ci.upper.',suffix)]]) - bias <- part$Bxy - part[[paste0('B',coefname,'y.est.',suffix)]] + bias <- part[[paste0('B',coefname,'y')]] - part[[paste0('B',coefname,'y.est.',suffix)]] sign.correct <- as.integer(sign(part$Bxy) == sign(part[[paste0('B',coefname,'y.est.',suffix)]])) part <- part[,':='(true.in.ci = true.in.ci, @@ -26,16 +31,25 @@ summarize.estimator <- function(df, suffix='naive', coefname='x'){ part.plot <- part[, .(p.true.in.ci = mean(true.in.ci), mean.bias = mean(bias), - mean.est = mean(.SD[[paste0('B',coefname,'y.est.',suffix)]]), - var.est = var(.SD[[paste0('B',coefname,'y.est.',suffix)]]), - est.upper.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.95,na.rm=T), - est.lower.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.05,na.rm=T), + mean.est = mean(.SD[[paste0('B',coefname,'y.est.',suffix)]],na.rm=T), + var.est = var(.SD[[paste0('B',coefname,'y.est.',suffix)]],na.rm=T), + est.upper.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.975,na.rm=T), + est.lower.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.025,na.rm=T), + mean.ci.upper = mean(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],na.rm=T), + mean.ci.lower = mean(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],na.rm=T), + median.ci.upper = median(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],na.rm=T), + median.ci.lower = median(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],na.rm=T), + ci.upper.975 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.975,na.rm=T), + ci.upper.025 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.025,na.rm=T), + ci.lower.975 = quantile(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],0.975,na.rm=T), + ci.lower.025 = quantile(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],0.025,na.rm=T), + N.ci.is.NA = sum(is.na(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]])), N.sims = .N, p.sign.correct = mean(as.integer(sign.correct & (! zero.in.ci))), variable=coefname, method=suffix ), - by=c("N","m",'y_explained_variance','Bzx', 'Bzy', 'accuracy_imbalance_difference') + by=grouping_vars, ] return(part.plot)