simulations/simulation_base.R

   1 library(predictionError)
   2 library(mecor)
   3 options(amelia.parallel="no",
   4         amelia.ncpus=1)
   5 library(Amelia)
   6 library(Zelig)
   7 library(bbmle)
   8 library(matrixStats) # for numerically stable logsumexps
   9
  10 source("measerr_methods.R") ## for my more generic function.
  11
  12 ## This uses the pseudolikelihood approach from Carroll page 349.
  13 ## assumes MAR
  14 ## assumes differential error, but that only depends on Y
  15 ## inefficient, because pseudolikelihood
  16
  17 ## This uses the pseudo-likelihood approach from Carroll page 346.
  18 my.pseudo.mle <- function(df){
  19     p1.est <- mean(df[w_pred==1]$y.obs==1,na.rm=T)
  20     p0.est <- mean(df[w_pred==0]$y.obs==0,na.rm=T)
  21
  22     nll <- function(B0, Bxy, Bzy){
  23
  24         pw <- vector(mode='numeric',length=nrow(df))
  25         dfw1 <- df[w_pred==1]
  26         dfw0 <- df[w_pred==0]
  27         pw[df$w_pred==1] <- plogis(B0 + Bxy * dfw1$x + Bzy * dfw1$z, log=T)
  28         pw[df$w_pred==0] <- plogis(B0 + Bxy * dfw0$x + Bzy * dfw0$z, lower.tail=FALSE, log=T)
  29
  30         probs <- colLogSumExps(rbind(log(1 - p0.est), log(p1.est + p0.est - 1) + pw))
  31         return(-1*sum(probs))
  32     }
  33
  34     mlefit <- mle2(minuslogl = nll, start = list(B0=0.0, Bxy=0.0, Bzy=0.0), control=list(maxit=1e6),method='L-BFGS-B')
  35     return(mlefit)
  36
  37 }
  38
  39
  40 ## model from Zhang's arxiv paper, with predictions for y
  41 ## Zhang got this model from Hausman 1998
  42 ### I think this is actually eqivalent to the pseudo.mle method
  43 zhang.mle.iv <- function(df){
  44     df.obs <- df[!is.na(x.obs)]
  45     df.unobs <- df[is.na(x.obs)]
  46
  47     tn <- df.obs[(w_pred == 0) & (x.obs == w_pred),.N]
  48     pn <- df.obs[(w_pred==0), .N]
  49     npv <- tn / pn
  50
  51     tp <- df.obs[(w_pred==1) & (x.obs == w_pred),.N]
  52     pp <- df.obs[(w_pred==1),.N]
  53     ppv <- tp / pp
  54
  55     nll <- function(B0=0, Bxy=0, Bzy=0, sigma_y=0.1){
  56
  57     ## fpr = 1 - TNR
  58     ### Problem: accounting for uncertainty in ppv / npv
  59
  60     ## fnr = 1 - TPR
  61     ll.y.obs <- with(df.obs, dnorm(y, B0 + Bxy * x + Bzy * z, sd=sigma_y,log=T))
  62     ll <- sum(ll.y.obs)
  63
  64     # unobserved case; integrate out x
  65     ll.x.1 <- with(df.unobs, dnorm(y, B0 + Bxy + Bzy * z, sd = sigma_y, log=T))
  66     ll.x.0 <- with(df.unobs, dnorm(y, B0 + Bzy * z, sd = sigma_y,log=T))
  67
  68     ## case x == 1
  69     lls.x.1 <- colLogSumExps(rbind(log(ppv) + ll.x.1, log(1-ppv) + ll.x.0))
  70
  71     ## case x == 0
  72     lls.x.0 <- colLogSumExps(rbind(log(1-npv) + ll.x.1, log(npv) + ll.x.0))
  73
  74     lls <- colLogSumExps(rbind(df.unobs$w_pred * lls.x.1, (1-df.unobs$w_pred) * lls.x.0))
  75     ll <- ll + sum(lls)
  76     return(-ll)
  77     }
  78     mlefit <- mle2(minuslogl = nll, control=list(maxit=1e6), lower=list(sigma_y=0.0001, B0=-Inf, Bxy=-Inf, Bzy=-Inf),
  79                    upper=list(sigma_y=Inf, B0=Inf, Bxy=Inf, Bzy=Inf),method='L-BFGS-B')
  80     return(mlefit)
  81 }
  82
  83 ## this is equivalent to the pseudo-liklihood model from Caroll
  84 ## zhang.mle.dv <- function(df){
  85
  86 ##     nll <- function(B0=0, Bxy=0, Bzy=0, ppv=0.9, npv=0.9){
  87 ##     df.obs <- df[!is.na(y.obs)]
  88
  89 ##     ## fpr = 1 - TNR
  90 ##     ll.w0y0 <- with(df.obs[y.obs==0],dbinom(1-w_pred,1,npv,log=TRUE))
  91 ##     ll.w1y1 <- with(df.obs[y.obs==1],dbinom(w_pred,1,ppv,log=TRUE))
  92
  93 ##     # observed case
  94 ##     ll.y.obs <- vector(mode='numeric', length=nrow(df.obs))
  95 ##     ll.y.obs[df.obs$y.obs==1] <- with(df.obs[y.obs==1], plogis(B0 + Bxy * x + Bzy * z,log=T))
  96 ##     ll.y.obs[df.obs$y.obs==0] <- with(df.obs[y.obs==0], plogis(B0 + Bxy * x + Bzy * z,log=T,lower.tail=FALSE))
  97
  98 ##     ll <- sum(ll.y.obs) + sum(ll.w0y0) + sum(ll.w1y1)
  99
 100 ##     # unobserved case; integrate out y
 101 ##     ## case y = 1
 102 ##     ll.y.1 <- vector(mode='numeric', length=nrow(df))
 103 ##     pi.y.1 <- with(df,plogis(B0 + Bxy * x + Bzy*z, log=T))
 104 ##     ## P(w=1| y=1)P(y=1) + P(w=0|y=1)P(y=1) = P(w=1,y=1) + P(w=0,y=1)
 105 ##     lls.y.1 <- colLogSumExps(rbind(log(ppv) + pi.y.1, log(1-ppv) + pi.y.1))
 106
 107 ##     ## case y = 0
 108 ##     ll.y.0 <- vector(mode='numeric', length=nrow(df))
 109 ##     pi.y.0 <- with(df,plogis(B0 + Bxy * x + Bzy*z, log=T,lower.tail=FALSE))
 110
 111 ##     ## P(w=1 | y=0)P(y=0) + P(w=0|y=0)P(y=0) = P(w=1,y=0) + P(w=0,y=0)
 112 ##     lls.y.0 <- colLogSumExps(rbind(log(npv) + pi.y.0, log(1-npv) + pi.y.0))
 113
 114 ##     lls <- colLogSumExps(rbind(lls.y.1, lls.y.0))
 115 ##     ll <- ll + sum(lls)
 116 ##     return(-ll)
 117 ##     }
 118 ##     mlefit <- mle2(minuslogl = nll, control=list(maxit=1e6),method='L-BFGS-B',lower=list(B0=-Inf, Bxy=-Inf, Bzy=-Inf, ppv=0.001,npv=0.001),
 119 ##                    upper=list(B0=Inf, Bxy=Inf, Bzy=Inf,ppv=0.999,npv=0.999))
 120 ##     return(mlefit)
 121 ## }
 122
 123 zhang.mle.dv <- function(df){
 124     df.obs <- df[!is.na(y.obs)]
 125     df.unobs <- df[is.na(y.obs)]
 126
 127     fp <- df.obs[(w_pred==1) & (y.obs != w_pred),.N]
 128     p <- df.obs[(w_pred==1),.N]
 129     fpr <- fp / p
 130     fn <- df.obs[(w_pred==0) & (y.obs != w_pred), .N]
 131     n <- df.obs[(w_pred==0),.N]
 132     fnr <- fn / n
 133
 134     nll <- function(B0=0, Bxy=0, Bzy=0){
 135
 136
 137         ## observed case
 138         ll.y.obs <- vector(mode='numeric', length=nrow(df.obs))
 139         ll.y.obs[df.obs$y.obs==1] <- with(df.obs[y.obs==1], plogis(B0 + Bxy * x + Bzy * z,log=T))
 140         ll.y.obs[df.obs$y.obs==0] <- with(df.obs[y.obs==0], plogis(B0 + Bxy * x + Bzy * z,log=T,lower.tail=FALSE))
 141
 142         ll <- sum(ll.y.obs)
 143
 144         pi.y.1 <- with(df,plogis(B0 + Bxy * x + Bzy*z, log=T))
 145         pi.y.0 <- with(df,plogis(B0 + Bxy * x + Bzy*z, log=T,lower.tail=FALSE))
 146
 147         lls <- with(df.unobs, colLogSumExps(rbind(w_pred * colLogSumExps(rbind(log(fpr), log(1 - fnr - fpr)+pi.y.1)),
 148         (1-w_pred) * colLogSumExps(rbind(log(1-fpr), log(1 - fnr - fpr)+pi.y.0)))))
 149
 150         ll <- ll + sum(lls)
 151         return(-ll)
 152     }
 153     mlefit <- mle2(minuslogl = nll, control=list(maxit=1e6),method='L-BFGS-B',lower=c(B0=-Inf, Bxy=-Inf, Bzy=-Inf),
 154                    upper=c(B0=Inf, Bxy=Inf, Bzy=Inf))
 155     return(mlefit)
 156 }
 157
 158 ## This uses the likelihood approach from Carroll page 353.
 159 ## assumes that we have a good measurement error model
 160 my.mle <- function(df){
 161
 162     ## liklihood for observed responses
 163     nll <- function(B0, Bxy, Bzy, gamma0, gamma_y, gamma_z, gamma_yz){
 164         df.obs <- df[!is.na(y.obs)]
 165         yobs0 <- df.obs$y==0
 166         yobs1 <- df.obs$y==1
 167         p.y.obs <- vector(mode='numeric', length=nrow(df.obs))
 168
 169         p.y.obs[yobs1] <- plogis(B0 + Bxy * df.obs[yobs1]$x + Bzy*df.obs[yobs1]$z,log=T)
 170         p.y.obs[yobs0] <- plogis(B0 + Bxy * df.obs[yobs0]$x + Bzy*df.obs[yobs0]$z,lower.tail=FALSE,log=T)
 171
 172         wobs0 <- df.obs$w_pred==0
 173         wobs1 <- df.obs$w_pred==1
 174         p.w.obs <- vector(mode='numeric', length=nrow(df.obs))
 175
 176         p.w.obs[wobs1] <- plogis(gamma0 + gamma_y * df.obs[wobs1]$y + gamma_z*df.obs[wobs1]$z + df.obs[wobs1]$z*df.obs[wobs1]$y* gamma_yz, log=T)
 177         p.w.obs[wobs0] <- plogis(gamma0 + gamma_y * df.obs[wobs0]$y + gamma_z*df.obs[wobs0]$z + df.obs[wobs0]$z*df.obs[wobs0]$y* gamma_yz, lower.tail=FALSE, log=T)
 178
 179         p.obs <- p.w.obs + p.y.obs
 180
 181         df.unobs <- df[is.na(y.obs)]
 182
 183         p.unobs.0 <- vector(mode='numeric',length=nrow(df.unobs))
 184         p.unobs.1 <- vector(mode='numeric',length=nrow(df.unobs))
 185
 186         wunobs.0 <- df.unobs$w_pred == 0
 187         wunobs.1 <- df.unobs$w_pred == 1
 188
 189         p.unobs.0[wunobs.1] <- plogis(B0 + Bxy * df.unobs[wunobs.1]$x + Bzy*df.unobs[wunobs.1]$z, log=T) + plogis(gamma0 + gamma_y + gamma_z*df.unobs[wunobs.1]$z + df.unobs[wunobs.1]$z*gamma_yz, log=T)
 190
 191         p.unobs.0[wunobs.0] <- plogis(B0 + Bxy * df.unobs[wunobs.0]$x + Bzy*df.unobs[wunobs.0]$z, log=T) + plogis(gamma0 + gamma_y + gamma_z*df.unobs[wunobs.0]$z + df.unobs[wunobs.0]$z*gamma_yz, lower.tail=FALSE, log=T)
 192
 193         p.unobs.1[wunobs.1] <- plogis(B0 + Bxy * df.unobs[wunobs.1]$x + Bzy*df.unobs[wunobs.1]$z, log=T, lower.tail=FALSE) + plogis(gamma0 + gamma_z*df.unobs[wunobs.1]$z, log=T)
 194
 195         p.unobs.1[wunobs.0] <- plogis(B0 + Bxy * df.unobs[wunobs.0]$x + Bzy*df.unobs[wunobs.0]$z, log=T, lower.tail=FALSE) + plogis(gamma0 + gamma_z*df.unobs[wunobs.0]$z, lower.tail=FALSE, log=T)
 196
 197         p.unobs <- colLogSumExps(rbind(p.unobs.1, p.unobs.0))
 198
 199         p <- c(p.obs, p.unobs)
 200
 201         return(-1*(sum(p)))
 202     }
 203
 204     mlefit <- mle2(minuslogl = nll, start = list(B0=0, Bxy=0,Bzy=0, gamma0=0, gamma_y=0, gamma_z=0, gamma_yz=0), control=list(maxit=1e6),method='L-BFGS-B')
 205
 206     return(mlefit)
 207 }
 208
 209 run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formula=w_pred~y){
 210
 211     accuracy <- df[,mean(w_pred==y)]
 212     result <- append(result, list(accuracy=accuracy))
 213     error.cor.x <- cor(df$x, df$w - df$x)
 214     result <- append(result, list(error.cor.x = error.cor.x))
 215
 216     model.null <- glm(y~1, data=df,family=binomial(link='logit'))
 217     (model.true <- glm(y ~ x + z, data=df,family=binomial(link='logit')))
 218     (lik.ratio <- exp(logLik(model.true) - logLik(model.null)))
 219
 220     true.ci.Bxy <- confint(model.true)['x',]
 221     true.ci.Bzy <- confint(model.true)['z',]
 222
 223
 224     result <- append(result, list(lik.ratio=lik.ratio))
 225
 226     result <- append(result, list(Bxy.est.true=coef(model.true)['x'],
 227                                   Bzy.est.true=coef(model.true)['z'],
 228                                   Bxy.ci.upper.true = true.ci.Bxy[2],
 229                                   Bxy.ci.lower.true = true.ci.Bxy[1],
 230                                   Bzy.ci.upper.true = true.ci.Bzy[2],
 231                                   Bzy.ci.lower.true = true.ci.Bzy[1]))
 232
 233     (model.feasible <- glm(y.obs~x+z,data=df,family=binomial(link='logit')))
 234
 235     feasible.ci.Bxy <- confint(model.feasible)['x',]
 236     result <- append(result, list(Bxy.est.feasible=coef(model.feasible)['x'],
 237                                   Bxy.ci.upper.feasible = feasible.ci.Bxy[2],
 238                                   Bxy.ci.lower.feasible = feasible.ci.Bxy[1]))
 239
 240     feasible.ci.Bzy <- confint(model.feasible)['z',]
 241     result <- append(result, list(Bzy.est.feasible=coef(model.feasible)['z'],
 242                                   Bzy.ci.upper.feasible = feasible.ci.Bzy[2],
 243                                   Bzy.ci.lower.feasible = feasible.ci.Bzy[1]))
 244
 245     (model.naive <- glm(w_pred~x+z, data=df, family=binomial(link='logit')))
 246
 247     naive.ci.Bxy <- confint(model.naive)['x',]
 248     naive.ci.Bzy <- confint(model.naive)['z',]
 249
 250     result <- append(result, list(Bxy.est.naive=coef(model.naive)['x'],
 251                                   Bzy.est.naive=coef(model.naive)['z'],
 252                                   Bxy.ci.upper.naive = naive.ci.Bxy[2],
 253                                   Bxy.ci.lower.naive = naive.ci.Bxy[1],
 254                                   Bzy.ci.upper.naive = naive.ci.Bzy[2],
 255                                   Bzy.ci.lower.naive = naive.ci.Bzy[1]))
 256
 257
 258     (model.naive.cont <- lm(w~x+z, data=df))
 259     naivecont.ci.Bxy <- confint(model.naive.cont)['x',]
 260     naivecont.ci.Bzy <- confint(model.naive.cont)['z',]
 261
 262     ## my implementation of liklihood based correction
 263
 264     temp.df <- copy(df)
 265     temp.df[,y:=y.obs]
 266     mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula)
 267     fisher.info <- solve(mod.caroll.lik$hessian)
 268     coef <- mod.caroll.lik$par
 269     ci.upper <- coef + sqrt(diag(fisher.info)) * 1.96
 270     ci.lower <- coef - sqrt(diag(fisher.info)) * 1.96
 271     result <- append(result,
 272                      list(Bxy.est.mle = coef['x'],
 273                           Bxy.ci.upper.mle = ci.upper['x'],
 274                           Bxy.ci.lower.mle = ci.lower['x'],
 275                           Bzy.est.mle = coef['z'],
 276                           Bzy.ci.upper.mle = ci.upper['z'],
 277                           Bzy.ci.lower.mle = ci.lower['z']))
 278
 279
 280     ## my implementatoin of liklihood based correction
 281     mod.zhang <- zhang.mle.dv(df)
 282     coef <- coef(mod.zhang)
 283     ci <- confint(mod.zhang,method='quad')
 284
 285     result <- append(result,
 286                      list(Bxy.est.zhang = coef['Bxy'],
 287                           Bxy.ci.upper.zhang = ci['Bxy','97.5 %'],
 288                           Bxy.ci.lower.zhang = ci['Bxy','2.5 %'],
 289                           Bzy.est.zhang = coef['Bzy'],
 290                           Bzy.ci.upper.zhang = ci['Bzy','97.5 %'],
 291                           Bzy.ci.lower.zhang = ci['Bzy','2.5 %']))
 292
 293
 294
 295     # amelia says use normal distribution for binary variables.
 296     tryCatch({
 297         amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('y','ystar','w'))
 298         mod.amelia.k <- zelig(y.obs~x+z, model='ls', data=amelia.out.k$imputations, cite=FALSE)
 299         (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE))
 300         est.x.mi <- coefse['x','Estimate']
 301         est.x.se <- coefse['x','Std.Error']
 302         result <- append(result,
 303                          list(Bxy.est.amelia.full = est.x.mi,
 304                               Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se,
 305                               Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se
 306                               ))
 307
 308         est.z.mi <- coefse['z','Estimate']
 309         est.z.se <- coefse['z','Std.Error']
 310
 311         result <- append(result,
 312                          list(Bzy.est.amelia.full = est.z.mi,
 313                               Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se,
 314                               Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se
 315                               ))
 316
 317     },
 318     error = function(e){
 319         message("An error occurred:\n",e)
 320         result$error <- paste0(result$error,'\n', e)
 321     })
 322
 323
 324     return(result)
 325
 326 }
 327
 328
 329 ## outcome_formula, proxy_formula, and truth_formula are passed to measerr_mle
 330 run_simulation <-  function(df, result, outcome_formula=y~x+z, proxy_formula=NULL, truth_formula=NULL){
 331
 332     accuracy <- df[,mean(w_pred==x)]
 333     accuracy.y0 <- df[y<=0,mean(w_pred==x)]
 334     accuracy.y1 <- df[y>=0,mean(w_pred==x)]
 335     cor.y.xi <- cor(df$x - df$w_pred, df$y)
 336
 337     fnr <- df[w_pred==0,mean(w_pred!=x)]
 338     fnr.y0 <- df[(w_pred==0) & (y<=0),mean(w_pred!=x)]
 339     fnr.y1 <- df[(w_pred==0) & (y>=0),mean(w_pred!=x)]
 340
 341     fpr <- df[w_pred==1,mean(w_pred!=x)]
 342     fpr.y0 <- df[(w_pred==1) & (y<=0),mean(w_pred!=x)]
 343     fpr.y1 <- df[(w_pred==1) & (y>=0),mean(w_pred!=x)]
 344     cor.resid.w_pred <- cor(resid(lm(y~x+z,df)),df$w_pred)
 345
 346     result <- append(result, list(accuracy=accuracy,
 347                                   accuracy.y0=accuracy.y0,
 348                                   accuracy.y1=accuracy.y1,
 349                                   cor.y.xi=cor.y.xi,
 350                                   fnr=fnr,
 351                                   fnr.y0=fnr.y0,
 352                                   fnr.y1=fnr.y1,
 353                                   fpr=fpr,
 354                                   fpr.y0=fpr.y0,
 355                                   fpr.y1=fpr.y1,
 356                                   cor.resid.w_pred=cor.resid.w_pred
 357                                   ))
 358
 359     result <- append(result, list(cor.xz=cor(df$x,df$z)))
 360     (model.true <- lm(y ~ x + z, data=df))
 361     true.ci.Bxy <- confint(model.true)['x',]
 362     true.ci.Bzy <- confint(model.true)['z',]
 363
 364     result <- append(result, list(Bxy.est.true=coef(model.true)['x'],
 365                                   Bzy.est.true=coef(model.true)['z'],
 366                                   Bxy.ci.upper.true = true.ci.Bxy[2],
 367                                   Bxy.ci.lower.true = true.ci.Bxy[1],
 368                                   Bzy.ci.upper.true = true.ci.Bzy[2],
 369                                   Bzy.ci.lower.true = true.ci.Bzy[1]))
 370
 371     (model.feasible <- lm(y~x.obs+z,data=df))
 372
 373     feasible.ci.Bxy <- confint(model.feasible)['x.obs',]
 374     result <- append(result, list(Bxy.est.feasible=coef(model.feasible)['x.obs'],
 375                                   Bxy.ci.upper.feasible = feasible.ci.Bxy[2],
 376                                   Bxy.ci.lower.feasible = feasible.ci.Bxy[1]))
 377
 378     feasible.ci.Bzy <- confint(model.feasible)['z',]
 379     result <- append(result, list(Bzy.est.feasible=coef(model.feasible)['z'],
 380                                   Bzy.ci.upper.feasible = feasible.ci.Bzy[2],
 381                                   Bzy.ci.lower.feasible = feasible.ci.Bzy[1]))
 382
 383     (model.naive <- lm(y~w_pred+z, data=df))
 384
 385     naive.ci.Bxy <- confint(model.naive)['w_pred',]
 386     naive.ci.Bzy <- confint(model.naive)['z',]
 387
 388     result <- append(result, list(Bxy.est.naive=coef(model.naive)['w_pred'],
 389                                   Bzy.est.naive=coef(model.naive)['z'],
 390                                   Bxy.ci.upper.naive = naive.ci.Bxy[2],
 391                                   Bxy.ci.lower.naive = naive.ci.Bxy[1],
 392                                   Bzy.ci.upper.naive = naive.ci.Bzy[2],
 393                                   Bzy.ci.lower.naive = naive.ci.Bzy[1]))
 394
 395
 396     tryCatch({
 397     amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('x','w'))
 398     mod.amelia.k <- zelig(y~x.obs+z, model='ls', data=amelia.out.k$imputations, cite=FALSE)
 399     (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE))
 400
 401     est.x.mi <- coefse['x.obs','Estimate']
 402     est.x.se <- coefse['x.obs','Std.Error']
 403     result <- append(result,
 404                      list(Bxy.est.amelia.full = est.x.mi,
 405                           Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se,
 406                           Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se
 407                           ))
 408
 409     est.z.mi <- coefse['z','Estimate']
 410     est.z.se <- coefse['z','Std.Error']
 411
 412     result <- append(result,
 413                      list(Bzy.est.amelia.full = est.z.mi,
 414                           Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se,
 415                           Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se
 416                           ))
 417
 418     },
 419     error = function(e){
 420         message("An error occurred:\n",e)
 421         result$error <-paste0(result$error,'\n', e)
 422     }
 423     )
 424
 425     tryCatch({
 426         temp.df <- copy(df)
 427         temp.df <- temp.df[,x:=x.obs]
 428         mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula)
 429         fisher.info <- solve(mod.caroll.lik$hessian)
 430         coef <- mod.caroll.lik$par
 431         ci.upper <- coef + sqrt(diag(fisher.info)) * 1.96
 432         ci.lower <- coef - sqrt(diag(fisher.info)) * 1.96
 433
 434
 435         result <- append(result,
 436                          list(Bxy.est.mle = coef['x'],
 437                               Bxy.ci.upper.mle = ci.upper['x'],
 438                               Bxy.ci.lower.mle = ci.lower['x'],
 439                               Bzy.est.mle = coef['z'],
 440                               Bzy.ci.upper.mle = ci.upper['z'],
 441                               Bzy.ci.lower.mle = ci.lower['z']))
 442     },
 443
 444     error = function(e){
 445         message("An error occurred:\n",e)
 446         result$error <- paste0(result$error,'\n', e)
 447     })
 448
 449     tryCatch({
 450
 451         mod.zhang.lik <- zhang.mle.iv(df)
 452         coef <- coef(mod.zhang.lik)
 453         ci <- confint(mod.zhang.lik,method='quad')
 454         result <- append(result,
 455                          list(Bxy.est.zhang = coef['Bxy'],
 456                               Bxy.ci.upper.zhang = ci['Bxy','97.5 %'],
 457                               Bxy.ci.lower.zhang = ci['Bxy','2.5 %'],
 458                               Bzy.est.zhang = coef['Bzy'],
 459                               Bzy.ci.upper.zhang = ci['Bzy','97.5 %'],
 460                               Bzy.ci.lower.zhang = ci['Bzy','2.5 %']))
 461     },
 462
 463     error = function(e){
 464         message("An error occurred:\n",e)
 465         result$error <- paste0(result$error,'\n', e)
 466     })
 467
 468     ## What if we can't observe k -- most realistic scenario. We can't include all the ML features in a model.
 469     ## amelia.out.nok <- amelia(df, m=200, p2s=0, idvars=c("x","w_pred"), noms=noms)
 470     ## mod.amelia.nok <- zelig(y~x.obs+g, model='ls', data=amelia.out.nok$imputations, cite=FALSE)
 471     ## (coefse <- combine_coef_se(mod.amelia.nok, messages=FALSE))
 472
 473     ## est.x.mi <- coefse['x.obs','Estimate']
 474     ## est.x.se <- coefse['x.obs','Std.Error']
 475     ## result <- append(result,
 476     ##                  list(Bxy.est.amelia.nok = est.x.mi,
 477     ##                       Bxy.ci.upper.amelia.nok = est.x.mi + 1.96 * est.x.se,
 478     ##                       Bxy.ci.lower.amelia.nok = est.x.mi - 1.96 * est.x.se
 479     ##                       ))
 480
 481     ## est.g.mi <- coefse['g','Estimate']
 482     ## est.g.se <- coefse['g','Std.Error']
 483
 484     ## result <- append(result,
 485     ##                  list(Bgy.est.amelia.nok = est.g.mi,
 486     ##                       Bgy.ci.upper.amelia.nok = est.g.mi + 1.96 * est.g.se,
 487     ##                       Bgy.ci.lower.amelia.nok = est.g.mi - 1.96 * est.g.se
 488     ##                       ))
 489
 490     N <- nrow(df)
 491     m <- nrow(df[!is.na(x.obs)])
 492     p <- v <- train <- rep(0,N)
 493     M <- m
 494     p[(M+1):(N)] <- 1
 495     v[1:(M)] <- 1
 496     df <- df[order(x.obs)]
 497     y <- df[,y]
 498     x <- df[,x.obs]
 499     z <- df[,z]
 500     w <- df[,w_pred]
 501     # gmm gets pretty close
 502     (gmm.res <- predicted_covariates(y, x, z, w, v, train, p, max_iter=100, verbose=TRUE))
 503
 504     result <- append(result,
 505                      list(Bxy.est.gmm = gmm.res$beta[1,1],
 506                           Bxy.ci.upper.gmm = gmm.res$confint[1,2],
 507                           Bxy.ci.lower.gmm = gmm.res$confint[1,1],
 508                           gmm.ER_pval = gmm.res$ER_pval
 509                           ))
 510
 511     result <- append(result,
 512                      list(Bzy.est.gmm = gmm.res$beta[2,1],
 513                           Bzy.ci.upper.gmm = gmm.res$confint[2,2],
 514                           Bzy.ci.lower.gmm = gmm.res$confint[2,1]))
 515
 516
 517     tryCatch({
 518     mod.calibrated.mle <- mecor(y ~ MeasError(w_pred, reference = x.obs) + z, df, B=400, method='efficient')
 519     (mod.calibrated.mle)
 520     (mecor.ci <- summary(mod.calibrated.mle)$c$ci['x.obs',])
 521     result <- append(result, list(
 522                                  Bxy.est.mecor = mecor.ci['Estimate'],
 523                                  Bxy.ci.upper.mecor = mecor.ci['UCI'],
 524                                  Bxy.ci.lower.mecor = mecor.ci['LCI'])
 525                      )
 526
 527     (mecor.ci <- summary(mod.calibrated.mle)$c$ci['z',])
 528
 529     result <- append(result, list(
 530                                  Bzy.est.mecor = mecor.ci['Estimate'],
 531                                  Bzy.ci.upper.mecor = mecor.ci['UCI'],
 532                                  Bzy.ci.lower.mecor = mecor.ci['LCI'])
 533                      )
 534     },
 535     error = function(e){
 536         message("An error occurred:\n",e)
 537         result$error <- paste0(result$error, '\n', e)
 538     }
 539     )
 540 ##    clean up memory
 541 ##    rm(list=c("df","y","x","g","w","v","train","p","amelia.out.k","amelia.out.nok", "mod.calibrated.mle","gmm.res","mod.amelia.k","mod.amelia.nok", "model.true","model.naive","model.feasible"))
 542
 543 ##    gc()
 544     return(result)
 545 }