simulations/simulation_base.R

   1 library(predictionError)
   2 library(mecor)
   3 options(amelia.parallel="no",
   4         amelia.ncpus=1)
   5 library(Amelia)
   6 library(Zelig)
   7 library(bbmle)
   8 library(matrixStats) # for numerically stable logsumexps
   9
  10 source("measerr_methods.R") ## for my more generic function.
  11
  12 ## This uses the pseudolikelihood approach from Carroll page 349.
  13 ## assumes MAR
  14 ## assumes differential error, but that only depends on Y
  15 ## inefficient, because pseudolikelihood
  16
  17 ## This uses the pseudo-likelihood approach from Carroll page 346.
  18 my.pseudo.mle <- function(df){
  19     p1.est <- mean(df[w_pred==1]$y.obs==1,na.rm=T)
  20     p0.est <- mean(df[w_pred==0]$y.obs==0,na.rm=T)
  21
  22     nll <- function(B0, Bxy, Bzy){
  23
  24         pw <- vector(mode='numeric',length=nrow(df))
  25         dfw1 <- df[w_pred==1]
  26         dfw0 <- df[w_pred==0]
  27         pw[df$w_pred==1] <- plogis(B0 + Bxy * dfw1$x + Bzy * dfw1$z, log=T)
  28         pw[df$w_pred==0] <- plogis(B0 + Bxy * dfw0$x + Bzy * dfw0$z, lower.tail=FALSE, log=T)
  29
  30         probs <- colLogSumExps(rbind(log(1 - p0.est), log(p1.est + p0.est - 1) + pw))
  31         return(-1*sum(probs))
  32     }
  33
  34     mlefit <- mle2(minuslogl = nll, start = list(B0=0.0, Bxy=0.0, Bzy=0.0), control=list(maxit=1e6),method='L-BFGS-B')
  35     return(mlefit)
  36
  37 }
  38
  39
  40 ## model from Zhang's arxiv paper, with predictions for y
  41 ## Zhang got this model from Hausman 1998
  42 ### I think this is actually eqivalent to the pseudo.mle method
  43 zhang.mle.iv <- function(df){
  44     df.obs <- df[!is.na(x.obs)]
  45     df.unobs <- df[is.na(x.obs)]
  46
  47     tn <- df.obs[(w_pred == 0) & (x.obs == w_pred),.N]
  48     pn <- df.obs[(w_pred==0), .N]
  49     npv <- tn / pn
  50
  51     tp <- df.obs[(w_pred==1) & (x.obs == w_pred),.N]
  52     pp <- df.obs[(w_pred==1),.N]
  53     ppv <- tp / pp
  54
  55     nll <- function(B0=0, Bxy=0, Bzy=0, sigma_y=0.1){
  56
  57     ## fpr = 1 - TNR
  58     ### Problem: accounting for uncertainty in ppv / npv
  59
  60     ## fnr = 1 - TPR
  61     ll.y.obs <- with(df.obs, dnorm(y, B0 + Bxy * x + Bzy * z, sd=sigma_y,log=T))
  62     ll <- sum(ll.y.obs)
  63
  64     # unobserved case; integrate out x
  65     ll.x.1 <- with(df.unobs, dnorm(y, B0 + Bxy + Bzy * z, sd = sigma_y, log=T))
  66     ll.x.0 <- with(df.unobs, dnorm(y, B0 + Bzy * z, sd = sigma_y,log=T))
  67
  68     ## case x == 1
  69     lls.x.1 <- colLogSumExps(rbind(log(ppv) + ll.x.1, log(1-ppv) + ll.x.0))
  70
  71     ## case x == 0
  72     lls.x.0 <- colLogSumExps(rbind(log(1-npv) + ll.x.1, log(npv) + ll.x.0))
  73
  74     lls <- colLogSumExps(rbind(df.unobs$w_pred * lls.x.1, (1-df.unobs$w_pred) * lls.x.0))
  75     ll <- ll + sum(lls)
  76     return(-ll)
  77     }
  78     mlefit <- mle2(minuslogl = nll, control=list(maxit=1e6), lower=list(sigma_y=0.0001, B0=-Inf, Bxy=-Inf, Bzy=-Inf),
  79                    upper=list(sigma_y=Inf, B0=Inf, Bxy=Inf, Bzy=Inf),method='L-BFGS-B')
  80     return(mlefit)
  81 }
  82
  83 ## this is equivalent to the pseudo-liklihood model from Caroll
  84 ## zhang.mle.dv <- function(df){
  85
  86 ##     nll <- function(B0=0, Bxy=0, Bzy=0, ppv=0.9, npv=0.9){
  87 ##     df.obs <- df[!is.na(y.obs)]
  88
  89 ##     ## fpr = 1 - TNR
  90 ##     ll.w0y0 <- with(df.obs[y.obs==0],dbinom(1-w_pred,1,npv,log=TRUE))
  91 ##     ll.w1y1 <- with(df.obs[y.obs==1],dbinom(w_pred,1,ppv,log=TRUE))
  92
  93 ##     # observed case
  94 ##     ll.y.obs <- vector(mode='numeric', length=nrow(df.obs))
  95 ##     ll.y.obs[df.obs$y.obs==1] <- with(df.obs[y.obs==1], plogis(B0 + Bxy * x + Bzy * z,log=T))
  96 ##     ll.y.obs[df.obs$y.obs==0] <- with(df.obs[y.obs==0], plogis(B0 + Bxy * x + Bzy * z,log=T,lower.tail=FALSE))
  97
  98 ##     ll <- sum(ll.y.obs) + sum(ll.w0y0) + sum(ll.w1y1)
  99
 100 ##     # unobserved case; integrate out y
 101 ##     ## case y = 1
 102 ##     ll.y.1 <- vector(mode='numeric', length=nrow(df))
 103 ##     pi.y.1 <- with(df,plogis(B0 + Bxy * x + Bzy*z, log=T))
 104 ##     ## P(w=1| y=1)P(y=1) + P(w=0|y=1)P(y=1) = P(w=1,y=1) + P(w=0,y=1)
 105 ##     lls.y.1 <- colLogSumExps(rbind(log(ppv) + pi.y.1, log(1-ppv) + pi.y.1))
 106
 107 ##     ## case y = 0
 108 ##     ll.y.0 <- vector(mode='numeric', length=nrow(df))
 109 ##     pi.y.0 <- with(df,plogis(B0 + Bxy * x + Bzy*z, log=T,lower.tail=FALSE))
 110
 111 ##     ## P(w=1 | y=0)P(y=0) + P(w=0|y=0)P(y=0) = P(w=1,y=0) + P(w=0,y=0)
 112 ##     lls.y.0 <- colLogSumExps(rbind(log(npv) + pi.y.0, log(1-npv) + pi.y.0))
 113
 114 ##     lls <- colLogSumExps(rbind(lls.y.1, lls.y.0))
 115 ##     ll <- ll + sum(lls)
 116 ##     return(-ll)
 117 ##     }
 118 ##     mlefit <- mle2(minuslogl = nll, control=list(maxit=1e6),method='L-BFGS-B',lower=list(B0=-Inf, Bxy=-Inf, Bzy=-Inf, ppv=0.001,npv=0.001),
 119 ##                    upper=list(B0=Inf, Bxy=Inf, Bzy=Inf,ppv=0.999,npv=0.999))
 120 ##     return(mlefit)
 121 ## }
 122
 123 zhang.mle.dv <- function(df){
 124     df.obs <- df[!is.na(y.obs)]
 125     df.unobs <- df[is.na(y.obs)]
 126
 127     fp <- df.obs[(w_pred==1) & (y.obs != w_pred),.N]
 128     p <- df.obs[(w_pred==1),.N]
 129     fpr <- fp / p
 130     fn <- df.obs[(w_pred==0) & (y.obs != w_pred), .N]
 131     n <- df.obs[(w_pred==0),.N]
 132     fnr <- fn / n
 133
 134     nll <- function(B0=0, Bxy=0, Bzy=0){
 135
 136
 137         ## observed case
 138         ll.y.obs <- vector(mode='numeric', length=nrow(df.obs))
 139         ll.y.obs[df.obs$y.obs==1] <- with(df.obs[y.obs==1], plogis(B0 + Bxy * x + Bzy * z,log=T))
 140         ll.y.obs[df.obs$y.obs==0] <- with(df.obs[y.obs==0], plogis(B0 + Bxy * x + Bzy * z,log=T,lower.tail=FALSE))
 141
 142         ll <- sum(ll.y.obs)
 143
 144         pi.y.1 <- with(df,plogis(B0 + Bxy * x + Bzy*z, log=T))
 145         pi.y.0 <- with(df,plogis(B0 + Bxy * x + Bzy*z, log=T,lower.tail=FALSE))
 146
 147         lls <- with(df.unobs, colLogSumExps(rbind(w_pred * colLogSumExps(rbind(log(fpr), log(1 - fnr - fpr)+pi.y.1)),
 148         (1-w_pred) * colLogSumExps(rbind(log(1-fpr), log(1 - fnr - fpr)+pi.y.0)))))
 149
 150         ll <- ll + sum(lls)
 151         return(-ll)
 152     }
 153     mlefit <- mle2(minuslogl = nll, control=list(maxit=1e6),method='L-BFGS-B',lower=c(B0=-Inf, Bxy=-Inf, Bzy=-Inf),
 154                    upper=c(B0=Inf, Bxy=Inf, Bzy=Inf))
 155     return(mlefit)
 156 }
 157
 158 ## This uses the likelihood approach from Carroll page 353.
 159 ## assumes that we have a good measurement error model
 160 my.mle <- function(df){
 161
 162     ## liklihood for observed responses
 163     nll <- function(B0, Bxy, Bzy, gamma0, gamma_y, gamma_z, gamma_yz){
 164         df.obs <- df[!is.na(y.obs)]
 165         yobs0 <- df.obs$y==0
 166         yobs1 <- df.obs$y==1
 167         p.y.obs <- vector(mode='numeric', length=nrow(df.obs))
 168
 169         p.y.obs[yobs1] <- plogis(B0 + Bxy * df.obs[yobs1]$x + Bzy*df.obs[yobs1]$z,log=T)
 170         p.y.obs[yobs0] <- plogis(B0 + Bxy * df.obs[yobs0]$x + Bzy*df.obs[yobs0]$z,lower.tail=FALSE,log=T)
 171
 172         wobs0 <- df.obs$w_pred==0
 173         wobs1 <- df.obs$w_pred==1
 174         p.w.obs <- vector(mode='numeric', length=nrow(df.obs))
 175
 176         p.w.obs[wobs1] <- plogis(gamma0 + gamma_y * df.obs[wobs1]$y + gamma_z*df.obs[wobs1]$z + df.obs[wobs1]$z*df.obs[wobs1]$y* gamma_yz, log=T)
 177         p.w.obs[wobs0] <- plogis(gamma0 + gamma_y * df.obs[wobs0]$y + gamma_z*df.obs[wobs0]$z + df.obs[wobs0]$z*df.obs[wobs0]$y* gamma_yz, lower.tail=FALSE, log=T)
 178
 179         p.obs <- p.w.obs + p.y.obs
 180
 181         df.unobs <- df[is.na(y.obs)]
 182
 183         p.unobs.0 <- vector(mode='numeric',length=nrow(df.unobs))
 184         p.unobs.1 <- vector(mode='numeric',length=nrow(df.unobs))
 185
 186         wunobs.0 <- df.unobs$w_pred == 0
 187         wunobs.1 <- df.unobs$w_pred == 1
 188
 189         p.unobs.0[wunobs.1] <- plogis(B0 + Bxy * df.unobs[wunobs.1]$x + Bzy*df.unobs[wunobs.1]$z, log=T) + plogis(gamma0 + gamma_y + gamma_z*df.unobs[wunobs.1]$z + df.unobs[wunobs.1]$z*gamma_yz, log=T)
 190
 191         p.unobs.0[wunobs.0] <- plogis(B0 + Bxy * df.unobs[wunobs.0]$x + Bzy*df.unobs[wunobs.0]$z, log=T) + plogis(gamma0 + gamma_y + gamma_z*df.unobs[wunobs.0]$z + df.unobs[wunobs.0]$z*gamma_yz, lower.tail=FALSE, log=T)
 192
 193         p.unobs.1[wunobs.1] <- plogis(B0 + Bxy * df.unobs[wunobs.1]$x + Bzy*df.unobs[wunobs.1]$z, log=T, lower.tail=FALSE) + plogis(gamma0 + gamma_z*df.unobs[wunobs.1]$z, log=T)
 194
 195         p.unobs.1[wunobs.0] <- plogis(B0 + Bxy * df.unobs[wunobs.0]$x + Bzy*df.unobs[wunobs.0]$z, log=T, lower.tail=FALSE) + plogis(gamma0 + gamma_z*df.unobs[wunobs.0]$z, lower.tail=FALSE, log=T)
 196
 197         p.unobs <- colLogSumExps(rbind(p.unobs.1, p.unobs.0))
 198
 199         p <- c(p.obs, p.unobs)
 200
 201         return(-1*(sum(p)))
 202     }
 203
 204     mlefit <- mle2(minuslogl = nll, start = list(B0=0, Bxy=0,Bzy=0, gamma0=0, gamma_y=0, gamma_z=0, gamma_yz=0), control=list(maxit=1e6),method='L-BFGS-B')
 205
 206     return(mlefit)
 207 }
 208
 209 run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formula=w_pred~y){
 210
 211     accuracy <- df[,mean(w_pred==y)]
 212     result <- append(result, list(accuracy=accuracy))
 213
 214     (model.true <- glm(y ~ x + z, data=df,family=binomial(link='logit')))
 215     true.ci.Bxy <- confint(model.true)['x',]
 216     true.ci.Bzy <- confint(model.true)['z',]
 217
 218     result <- append(result, list(Bxy.est.true=coef(model.true)['x'],
 219                                   Bzy.est.true=coef(model.true)['z'],
 220                                   Bxy.ci.upper.true = true.ci.Bxy[2],
 221                                   Bxy.ci.lower.true = true.ci.Bxy[1],
 222                                   Bzy.ci.upper.true = true.ci.Bzy[2],
 223                                   Bzy.ci.lower.true = true.ci.Bzy[1]))
 224
 225     (model.feasible <- glm(y.obs~x+z,data=df,family=binomial(link='logit')))
 226
 227     feasible.ci.Bxy <- confint(model.feasible)['x',]
 228     result <- append(result, list(Bxy.est.feasible=coef(model.feasible)['x'],
 229                                   Bxy.ci.upper.feasible = feasible.ci.Bxy[2],
 230                                   Bxy.ci.lower.feasible = feasible.ci.Bxy[1]))
 231
 232     feasible.ci.Bzy <- confint(model.feasible)['z',]
 233     result <- append(result, list(Bzy.est.feasible=coef(model.feasible)['z'],
 234                                   Bzy.ci.upper.feasible = feasible.ci.Bzy[2],
 235                                   Bzy.ci.lower.feasible = feasible.ci.Bzy[1]))
 236
 237     (model.naive <- glm(w_pred~x+z, data=df, family=binomial(link='logit')))
 238
 239     naive.ci.Bxy <- confint(model.naive)['x',]
 240     naive.ci.Bzy <- confint(model.naive)['z',]
 241
 242     result <- append(result, list(Bxy.est.naive=coef(model.naive)['x'],
 243                                   Bzy.est.naive=coef(model.naive)['z'],
 244                                   Bxy.ci.upper.naive = naive.ci.Bxy[2],
 245                                   Bxy.ci.lower.naive = naive.ci.Bxy[1],
 246                                   Bzy.ci.upper.naive = naive.ci.Bzy[2],
 247                                   Bzy.ci.lower.naive = naive.ci.Bzy[1]))
 248
 249
 250     (model.naive.cont <- lm(w~x+z, data=df))
 251     naivecont.ci.Bxy <- confint(model.naive.cont)['x',]
 252     naivecont.ci.Bzy <- confint(model.naive.cont)['z',]
 253
 254     ## my implementation of liklihood based correction
 255
 256     temp.df <- copy(df)
 257     temp.df[,y:=y.obs]
 258     mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula)
 259     fisher.info <- solve(mod.caroll.lik$hessian)
 260     coef <- mod.caroll.lik$par
 261     ci.upper <- coef + sqrt(diag(fisher.info)) * 1.96
 262     ci.lower <- coef - sqrt(diag(fisher.info)) * 1.96
 263     result <- append(result,
 264                      list(Bxy.est.mle = coef['x'],
 265                           Bxy.ci.upper.mle = ci.upper['x'],
 266                           Bxy.ci.lower.mle = ci.lower['x'],
 267                           Bzy.est.mle = coef['z'],
 268                           Bzy.ci.upper.mle = ci.upper['z'],
 269                           Bzy.ci.lower.mle = ci.lower['z']))
 270
 271
 272     ## my implementatoin of liklihood based correction
 273     mod.zhang <- zhang.mle.dv(df)
 274     coef <- coef(mod.zhang)
 275     ci <- confint(mod.zhang,method='quad')
 276
 277     result <- append(result,
 278                      list(Bxy.est.zhang = coef['Bxy'],
 279                           Bxy.ci.upper.zhang = ci['Bxy','97.5 %'],
 280                           Bxy.ci.lower.zhang = ci['Bxy','2.5 %'],
 281                           Bzy.est.zhang = coef['Bzy'],
 282                           Bzy.ci.upper.zhang = ci['Bzy','97.5 %'],
 283                           Bzy.ci.lower.zhang = ci['Bzy','2.5 %']))
 284
 285
 286
 287     # amelia says use normal distribution for binary variables.
 288     tryCatch({
 289         amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('y','ystar','w'))
 290         mod.amelia.k <- zelig(y.obs~x+z, model='ls', data=amelia.out.k$imputations, cite=FALSE)
 291         (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE))
 292         est.x.mi <- coefse['x','Estimate']
 293         est.x.se <- coefse['x','Std.Error']
 294         result <- append(result,
 295                          list(Bxy.est.amelia.full = est.x.mi,
 296                               Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se,
 297                               Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se
 298                               ))
 299
 300         est.z.mi <- coefse['z','Estimate']
 301         est.z.se <- coefse['z','Std.Error']
 302
 303         result <- append(result,
 304                          list(Bzy.est.amelia.full = est.z.mi,
 305                               Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se,
 306                               Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se
 307                               ))
 308
 309     },
 310     error = function(e){
 311         message("An error occurred:\n",e)
 312         result$error <- paste0(result$error,'\n', e)
 313     })
 314
 315
 316     return(result)
 317
 318 }
 319
 320
 321 ## outcome_formula, proxy_formula, and truth_formula are passed to measerr_mle
 322 run_simulation <-  function(df, result, outcome_formula=y~x+z, proxy_formula=NULL, truth_formula=NULL){
 323
 324     accuracy <- df[,mean(w_pred==x)]
 325     result <- append(result, list(accuracy=accuracy))
 326
 327     (model.true <- lm(y ~ x + z, data=df))
 328     true.ci.Bxy <- confint(model.true)['x',]
 329     true.ci.Bzy <- confint(model.true)['z',]
 330
 331     result <- append(result, list(Bxy.est.true=coef(model.true)['x'],
 332                                   Bzy.est.true=coef(model.true)['z'],
 333                                   Bxy.ci.upper.true = true.ci.Bxy[2],
 334                                   Bxy.ci.lower.true = true.ci.Bxy[1],
 335                                   Bzy.ci.upper.true = true.ci.Bzy[2],
 336                                   Bzy.ci.lower.true = true.ci.Bzy[1]))
 337
 338     (model.feasible <- lm(y~x.obs+z,data=df))
 339
 340     feasible.ci.Bxy <- confint(model.feasible)['x.obs',]
 341     result <- append(result, list(Bxy.est.feasible=coef(model.feasible)['x.obs'],
 342                                   Bxy.ci.upper.feasible = feasible.ci.Bxy[2],
 343                                   Bxy.ci.lower.feasible = feasible.ci.Bxy[1]))
 344
 345     feasible.ci.Bzy <- confint(model.feasible)['z',]
 346     result <- append(result, list(Bzy.est.feasible=coef(model.feasible)['z'],
 347                                   Bzy.ci.upper.feasible = feasible.ci.Bzy[2],
 348                                   Bzy.ci.lower.feasible = feasible.ci.Bzy[1]))
 349
 350     (model.naive <- lm(y~w_pred+z, data=df))
 351
 352     naive.ci.Bxy <- confint(model.naive)['w_pred',]
 353     naive.ci.Bzy <- confint(model.naive)['z',]
 354
 355     result <- append(result, list(Bxy.est.naive=coef(model.naive)['w_pred'],
 356                                   Bzy.est.naive=coef(model.naive)['z'],
 357                                   Bxy.ci.upper.naive = naive.ci.Bxy[2],
 358                                   Bxy.ci.lower.naive = naive.ci.Bxy[1],
 359                                   Bzy.ci.upper.naive = naive.ci.Bzy[2],
 360                                   Bzy.ci.lower.naive = naive.ci.Bzy[1]))
 361
 362
 363     tryCatch({
 364     amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('x','w'))
 365     mod.amelia.k <- zelig(y~x.obs+z, model='ls', data=amelia.out.k$imputations, cite=FALSE)
 366     (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE))
 367
 368     est.x.mi <- coefse['x.obs','Estimate']
 369     est.x.se <- coefse['x.obs','Std.Error']
 370     result <- append(result,
 371                      list(Bxy.est.amelia.full = est.x.mi,
 372                           Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se,
 373                           Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se
 374                           ))
 375
 376     est.z.mi <- coefse['z','Estimate']
 377     est.z.se <- coefse['z','Std.Error']
 378
 379     result <- append(result,
 380                      list(Bzy.est.amelia.full = est.z.mi,
 381                           Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se,
 382                           Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se
 383                           ))
 384
 385     },
 386     error = function(e){
 387         message("An error occurred:\n",e)
 388         result$error <-paste0(result$error,'\n', e)
 389     }
 390     )
 391
 392     tryCatch({
 393         temp.df <- copy(df)
 394         temp.df <- temp.df[,x:=x.obs]
 395         mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula)
 396         fisher.info <- solve(mod.caroll.lik$hessian)
 397         coef <- mod.caroll.lik$par
 398         ci.upper <- coef + sqrt(diag(fisher.info)) * 1.96
 399         ci.lower <- coef - sqrt(diag(fisher.info)) * 1.96
 400
 401
 402         result <- append(result,
 403                          list(Bxy.est.mle = coef['x'],
 404                               Bxy.ci.upper.mle = ci.upper['x'],
 405                               Bxy.ci.lower.mle = ci.lower['x'],
 406                               Bzy.est.mle = coef['z'],
 407                               Bzy.ci.upper.mle = ci.upper['z'],
 408                               Bzy.ci.lower.mle = ci.lower['z']))
 409     },
 410
 411     error = function(e){
 412         message("An error occurred:\n",e)
 413         result$error <- paste0(result$error,'\n', e)
 414     })
 415
 416     tryCatch({
 417
 418         mod.zhang.lik <- zhang.mle.iv(df)
 419         coef <- coef(mod.zhang.lik)
 420         ci <- confint(mod.zhang.lik,method='quad')
 421         result <- append(result,
 422                          list(Bxy.est.zhang = coef['Bxy'],
 423                               Bxy.ci.upper.zhang = ci['Bxy','97.5 %'],
 424                               Bxy.ci.lower.zhang = ci['Bxy','2.5 %'],
 425                               Bzy.est.zhang = coef['Bzy'],
 426                               Bzy.ci.upper.zhang = ci['Bzy','97.5 %'],
 427                               Bzy.ci.lower.zhang = ci['Bzy','2.5 %']))
 428     },
 429
 430     error = function(e){
 431         message("An error occurred:\n",e)
 432         result$error <- paste0(result$error,'\n', e)
 433     })
 434
 435     ## What if we can't observe k -- most realistic scenario. We can't include all the ML features in a model.
 436     ## amelia.out.nok <- amelia(df, m=200, p2s=0, idvars=c("x","w_pred"), noms=noms)
 437     ## mod.amelia.nok <- zelig(y~x.obs+g, model='ls', data=amelia.out.nok$imputations, cite=FALSE)
 438     ## (coefse <- combine_coef_se(mod.amelia.nok, messages=FALSE))
 439
 440     ## est.x.mi <- coefse['x.obs','Estimate']
 441     ## est.x.se <- coefse['x.obs','Std.Error']
 442     ## result <- append(result,
 443     ##                  list(Bxy.est.amelia.nok = est.x.mi,
 444     ##                       Bxy.ci.upper.amelia.nok = est.x.mi + 1.96 * est.x.se,
 445     ##                       Bxy.ci.lower.amelia.nok = est.x.mi - 1.96 * est.x.se
 446     ##                       ))
 447
 448     ## est.g.mi <- coefse['g','Estimate']
 449     ## est.g.se <- coefse['g','Std.Error']
 450
 451     ## result <- append(result,
 452     ##                  list(Bgy.est.amelia.nok = est.g.mi,
 453     ##                       Bgy.ci.upper.amelia.nok = est.g.mi + 1.96 * est.g.se,
 454     ##                       Bgy.ci.lower.amelia.nok = est.g.mi - 1.96 * est.g.se
 455     ##                       ))
 456
 457     N <- nrow(df)
 458     m <- nrow(df[!is.na(x.obs)])
 459     p <- v <- train <- rep(0,N)
 460     M <- m
 461     p[(M+1):(N)] <- 1
 462     v[1:(M)] <- 1
 463     df <- df[order(x.obs)]
 464     y <- df[,y]
 465     x <- df[,x.obs]
 466     z <- df[,z]
 467     w <- df[,w_pred]
 468     # gmm gets pretty close
 469     (gmm.res <- predicted_covariates(y, x, z, w, v, train, p, max_iter=100, verbose=TRUE))
 470
 471     result <- append(result,
 472                      list(Bxy.est.gmm = gmm.res$beta[1,1],
 473                           Bxy.ci.upper.gmm = gmm.res$confint[1,2],
 474                           Bxy.ci.lower.gmm = gmm.res$confint[1,1],
 475                           gmm.ER_pval = gmm.res$ER_pval
 476                           ))
 477
 478     result <- append(result,
 479                      list(Bzy.est.gmm = gmm.res$beta[2,1],
 480                           Bzy.ci.upper.gmm = gmm.res$confint[2,2],
 481                           Bzy.ci.lower.gmm = gmm.res$confint[2,1]))
 482
 483
 484     tryCatch({
 485     mod.calibrated.mle <- mecor(y ~ MeasError(w_pred, reference = x.obs) + z, df, B=400, method='efficient')
 486     (mod.calibrated.mle)
 487     (mecor.ci <- summary(mod.calibrated.mle)$c$ci['x.obs',])
 488     result <- append(result, list(
 489                                  Bxy.est.mecor = mecor.ci['Estimate'],
 490                                  Bxy.ci.upper.mecor = mecor.ci['UCI'],
 491                                  Bxy.ci.lower.mecor = mecor.ci['LCI'])
 492                      )
 493
 494     (mecor.ci <- summary(mod.calibrated.mle)$c$ci['z',])
 495
 496     result <- append(result, list(
 497                                  Bzy.est.mecor = mecor.ci['Estimate'],
 498                                  Bzy.ci.upper.mecor = mecor.ci['UCI'],
 499                                  Bzy.ci.lower.mecor = mecor.ci['LCI'])
 500                      )
 501     },
 502     error = function(e){
 503         message("An error occurred:\n",e)
 504         result$error <- paste0(result$error, '\n', e)
 505     }
 506     )
 507 ##    clean up memory
 508 ##    rm(list=c("df","y","x","g","w","v","train","p","amelia.out.k","amelia.out.nok", "mod.calibrated.mle","gmm.res","mod.amelia.k","mod.amelia.nok", "model.true","model.naive","model.feasible"))
 509
 510 ##    gc()
 511     return(result)
 512 }