]> code.communitydata.science - ml_measurement_error_public.git/blob - simulations/summarize_estimator.R
real-data example on raw perspective scores
[ml_measurement_error_public.git] / simulations / summarize_estimator.R
1
2 summarize.estimator <- function(df, suffix='naive', coefname='x'){
3
4     reported_vars <- c(
5                        'Bxy',
6                        paste0('B',coefname,'y.est.',suffix),
7                        paste0('B',coefname,'y.ci.lower.',suffix),
8                        paste0('B',coefname,'y.ci.upper.',suffix)
9                        )
10
11     
12     grouping_vars <- c('N','m','B0', 'Bxy', 'Bzy', 'Bzx', 'Px', 'Py','y_explained_variance', 'prediction_accuracy','outcome_formula','proxy_formula','truth_formula','z_bias','y_bias')
13
14     grouping_vars <- grouping_vars[grouping_vars %in% names(df)]
15
16     part <- df[,
17                c(reported_vars,
18                  grouping_vars),
19                with=FALSE]
20
21
22     true.in.ci <- as.integer((part$Bxy >= part[[paste0('B',coefname,'y.ci.lower.',suffix)]]) & (part$Bxy <= part[[paste0('B',coefname,'y.ci.upper.',suffix)]]))
23     zero.in.ci <- as.integer(0 >= part[[paste0('B',coefname,'y.ci.lower.',suffix)]]) & (0 <= part[[paste0('B',coefname,'y.ci.upper.',suffix)]])
24     bias <- part[[paste0('B',coefname,'y')]] - part[[paste0('B',coefname,'y.est.',suffix)]]
25     sign.correct <- as.integer(sign(part$Bxy) == sign(part[[paste0('B',coefname,'y.est.',suffix)]]))
26
27     part <- part[,':='(true.in.ci = true.in.ci,
28                        zero.in.ci = zero.in.ci,
29                        bias=bias,
30                        sign.correct =sign.correct)]
31
32     part.plot <- part[, .(p.true.in.ci = mean(true.in.ci),
33                           mean.bias = mean(bias),
34                           mean.est = mean(.SD[[paste0('B',coefname,'y.est.',suffix)]],na.rm=T),
35                           var.est = var(.SD[[paste0('B',coefname,'y.est.',suffix)]],na.rm=T),
36                           est.upper.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.975,na.rm=T),
37                           est.lower.95 = quantile(.SD[[paste0('B',coefname,'y.est.',suffix)]],0.025,na.rm=T),
38                           mean.ci.upper = mean(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],na.rm=T),
39                           mean.ci.lower = mean(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],na.rm=T),
40                           median.ci.upper = median(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],na.rm=T),
41                           median.ci.lower = median(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],na.rm=T),
42                           ci.upper.975 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.975,na.rm=T),
43                           ci.upper.025 = quantile(.SD[[paste0('B',coefname,'y.ci.upper.',suffix)]],0.025,na.rm=T),
44                           ci.lower.975 = quantile(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],0.975,na.rm=T),
45                           ci.lower.025 = quantile(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]],0.025,na.rm=T),
46                           N.ci.is.NA = sum(is.na(.SD[[paste0('B',coefname,'y.ci.lower.',suffix)]])),
47                           N.sims = .N,
48                           p.sign.correct = mean(as.integer(sign.correct & (! zero.in.ci))),
49                           variable=coefname,
50                           method=suffix
51                           ),
52                       by=grouping_vars,
53                       ]
54     
55     return(part.plot)
56 }

Community Data Science Collective || Want to submit a patch?