]> code.communitydata.science - ml_measurement_error_public.git/blob - simulations/simulation_base.R
add missing simulation code
[ml_measurement_error_public.git] / simulations / simulation_base.R
1 library(predictionError)
2 library(mecor)
3 options(amelia.parallel="no",
4         amelia.ncpus=1)
5 library(Amelia)
6 library(Zelig)
7
8 logistic <- function(x) {1/(1+exp(-1*x))}
9
10 run_simulation <-  function(df, result){
11
12     accuracy <- df[,mean(w_pred==x)]
13     result <- append(result, list(accuracy=accuracy))
14
15     (model.true <- lm(y ~ x + g, data=df))
16     true.ci.Bxy <- confint(model.true)['x',]
17     true.ci.Bgy <- confint(model.true)['g',]
18
19     result <- append(result, list(Bxy.est.true=coef(model.true)['x'],
20                                   Bgy.est.true=coef(model.true)['g'],
21                                   Bxy.ci.upper.true = true.ci.Bxy[2],
22                                   Bxy.ci.lower.true = true.ci.Bxy[1],
23                                   Bgy.ci.upper.true = true.ci.Bgy[2],
24                                   Bgy.ci.lower.true = true.ci.Bgy[1]))
25                                   
26     (model.feasible <- lm(y~x.obs+g,data=df))
27
28     feasible.ci.Bxy <- confint(model.feasible)['x.obs',]
29     result <- append(result, list(Bxy.est.feasible=coef(model.feasible)['x.obs'],
30                                   Bxy.ci.upper.feasible = feasible.ci.Bxy[2],
31                                   Bxy.ci.lower.feasible = feasible.ci.Bxy[1]))
32
33     feasible.ci.Bgy <- confint(model.feasible)['g',]
34     result <- append(result, list(Bgy.est.feasible=coef(model.feasible)['g'],
35                                   Bgy.ci.upper.feasible = feasible.ci.Bgy[2],
36                                   Bgy.ci.lower.feasible = feasible.ci.Bgy[1]))
37
38     (model.naive <- lm(y~w+g, data=df))
39     
40     naive.ci.Bxy <- confint(model.naive)['w',]
41     naive.ci.Bgy <- confint(model.naive)['g',]
42
43     result <- append(result, list(Bxy.est.naive=coef(model.naive)['w'],
44                                   Bgy.est.naive=coef(model.naive)['g'],
45                                   Bxy.ci.upper.naive = naive.ci.Bxy[2],
46                                   Bxy.ci.lower.naive = naive.ci.Bxy[1],
47                                   Bgy.ci.upper.naive = naive.ci.Bgy[2],
48                                   Bgy.ci.lower.naive = naive.ci.Bgy[1]))
49                                   
50
51     ## multiple imputation when k is observed
52     ## amelia does great at this one.
53     noms <- c()
54     if(length(unique(df$x.obs)) <=2){
55         noms <- c(noms, 'x.obs')
56     }
57
58     if(length(unique(df$g)) <=2){
59         noms <- c(noms, 'g')
60     }
61
62
63     amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('x','w_pred'),noms=noms)
64     mod.amelia.k <- zelig(y~x.obs+g, model='ls', data=amelia.out.k$imputations, cite=FALSE)
65     (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE))
66
67     est.x.mi <- coefse['x.obs','Estimate']
68     est.x.se <- coefse['x.obs','Std.Error']
69     result <- append(result,
70                      list(Bxy.est.amelia.full = est.x.mi,
71                           Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se,
72                           Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se
73                           ))
74
75     est.g.mi <- coefse['g','Estimate']
76     est.g.se <- coefse['g','Std.Error']
77
78     result <- append(result,
79                      list(Bgy.est.amelia.full = est.g.mi,
80                           Bgy.ci.upper.amelia.full = est.g.mi + 1.96 * est.g.se,
81                           Bgy.ci.lower.amelia.full = est.g.mi - 1.96 * est.g.se
82                           ))
83
84     ## What if we can't observe k -- most realistic scenario. We can't include all the ML features in a model.
85     amelia.out.nok <- amelia(df, m=200, p2s=0, idvars=c("x","k","w_pred"), noms=noms)
86     mod.amelia.nok <- zelig(y~x.obs+g, model='ls', data=amelia.out.nok$imputations, cite=FALSE)
87     (coefse <- combine_coef_se(mod.amelia.nok, messages=FALSE))
88
89     est.x.mi <- coefse['x.obs','Estimate']
90     est.x.se <- coefse['x.obs','Std.Error']
91     result <- append(result,
92                      list(Bxy.est.amelia.nok = est.x.mi,
93                           Bxy.ci.upper.amelia.nok = est.x.mi + 1.96 * est.x.se,
94                           Bxy.ci.lower.amelia.nok = est.x.mi - 1.96 * est.x.se
95                           ))
96
97     est.g.mi <- coefse['g','Estimate']
98     est.g.se <- coefse['g','Std.Error']
99
100     result <- append(result,
101                      list(Bgy.est.amelia.nok = est.g.mi,
102                           Bgy.ci.upper.amelia.nok = est.g.mi + 1.96 * est.g.se,
103                           Bgy.ci.lower.amelia.nok = est.g.mi - 1.96 * est.g.se
104                           ))
105
106     N <- nrow(df)
107     m <- nrow(df[!is.na(x.obs)])
108     p <- v <- train <- rep(0,N)
109     M <- m
110     p[(M+1):(N)] <- 1
111     v[1:(M)] <- 1
112     df <- df[order(x.obs)]
113     y <- df[,y]
114     x <- df[,x.obs]
115     g <- df[,g]
116     w <- df[,w]
117     # gmm gets pretty close
118     (gmm.res <- predicted_covariates(y, x, g, w, v, train, p, max_iter=100, verbose=TRUE))
119
120     result <- append(result,
121                      list(Bxy.est.gmm = gmm.res$beta[1,1],
122                           Bxy.ci.upper.gmm = gmm.res$confint[1,2],
123                           Bxy.ci.lower.gmm = gmm.res$confint[1,1],
124                           gmm.ER_pval = gmm.res$ER_pval
125                           ))
126
127     result <- append(result,
128                      list(Bgy.est.gmm = gmm.res$beta[2,1],
129                           Bgy.ci.upper.gmm = gmm.res$confint[2,2],
130                           Bgy.ci.lower.gmm = gmm.res$confint[2,1]))
131
132
133     mod.calibrated.mle <- mecor(y ~ MeasError(w, reference = x.obs) + g, df, B=400, method='efficient')
134     (mod.calibrated.mle)
135     (mecor.ci <- summary(mod.calibrated.mle)$c$ci['x.obs',])
136     result <- append(result, list(
137                                  Bxy.est.mecor = mecor.ci['Estimate'],
138                                  Bxy.upper.mecor = mecor.ci['UCI'],
139                                  Bxy.lower.mecor = mecor.ci['LCI'])
140                      )
141
142     (mecor.ci <- summary(mod.calibrated.mle)$c$ci['g',])
143
144     result <- append(result, list(
145                                  Bgy.est.mecor = mecor.ci['Estimate'],
146                                  Bgy.upper.mecor = mecor.ci['UCI'],
147                                  Bgy.lower.mecor = mecor.ci['LCI'])
148                      )
149
150 ##    clean up memory
151     rm(list=c("df","y","x","g","w","v","train","p","amelia.out.k","amelia.out.nok", "mod.calibrated.mle","gmm.res","mod.amelia.k","mod.amelia.nok", "model.true","model.naive","model.feasible"))
152     
153     gc()
154     return(result)
155 }

Community Data Science Collective || Want to submit a patch?