X-Git-Url: https://code.communitydata.science/ml_measurement_error_public.git/blobdiff_plain/588bdd7ed74cf8fe8fd0f15df58a6a40c26ebae5..d9d3e47a44ddead1cdf7a649bc0e9849c2219498:/simulations/simulation_base.R diff --git a/simulations/simulation_base.R b/simulations/simulation_base.R index a73ed79..bafd7d3 100644 --- a/simulations/simulation_base.R +++ b/simulations/simulation_base.R @@ -4,225 +4,380 @@ options(amelia.parallel="no", amelia.ncpus=1) library(Amelia) library(Zelig) -library(stats4) +library(bbmle) +library(matrixStats) # for numerically stable logsumexps +source("pl_methods.R") +source("measerr_methods.R") ## for my more generic function. ## This uses the pseudolikelihood approach from Carroll page 349. ## assumes MAR ## assumes differential error, but that only depends on Y ## inefficient, because pseudolikelihood -logistic.correction.pseudo <- function(df){ + +## This uses the pseudo-likelihood approach from Carroll page 346. +my.pseudo.mle <- function(df){ p1.est <- mean(df[w_pred==1]$y.obs==1,na.rm=T) p0.est <- mean(df[w_pred==0]$y.obs==0,na.rm=T) - nll <- function(B0, Bxy, Bgy){ - probs <- (1 - p0.est) + (p1.est + p0.est - 1)*plogis(B0 + Bxy * df$x + Bgy * df$g) + nll <- function(B0, Bxy, Bzy){ - part1 = sum(log(probs[df$w_pred == 1])) - part2 = sum(log(1-probs[df$w_pred == 0])) + pw <- vector(mode='numeric',length=nrow(df)) + dfw1 <- df[w_pred==1] + dfw0 <- df[w_pred==0] + pw[df$w_pred==1] <- plogis(B0 + Bxy * dfw1$x + Bzy * dfw1$z, log=T) + pw[df$w_pred==0] <- plogis(B0 + Bxy * dfw0$x + Bzy * dfw0$z, lower.tail=FALSE, log=T) - return(-1*(part1 + part2)) + probs <- colLogSumExps(rbind(log(1 - p0.est), log(p1.est + p0.est - 1) + pw)) + return(-1*sum(probs)) } - mlefit <- stats4::mle(minuslogl = nll, start = list(B0=0, Bxy=0.0, Bgy=0.0)) + mlefit <- mle2(minuslogl = nll, start = list(B0=0.0, Bxy=0.0, Bzy=0.0), control=list(maxit=1e6),method='L-BFGS-B') return(mlefit) } + ## This uses the likelihood approach from Carroll page 353. ## assumes that we have a good measurement error model -logistic.correction.liklihood <- function(df){ +my.mle <- function(df){ ## liklihood for observed responses - nll <- function(B0, Bxy, Bgy, gamma0, gamma_y, gamma_g){ + nll <- function(B0, Bxy, Bzy, gamma0, gamma_y, gamma_z, gamma_yz){ df.obs <- df[!is.na(y.obs)] - p.y.obs <- plogis(B0 + Bxy * df.obs$x + Bgy*df.obs$g) - p.y.obs[df.obs$y==0] <- 1-p.y.obs[df.obs$y==0] - p.s.obs <- plogis(gamma0 + gamma_y * df.obs$y + gamma_g*df.obs$g) - p.s.obs[df.obs$w_pred==0] <- 1 - p.s.obs[df.obs$w_pred==0] + yobs0 <- df.obs$y==0 + yobs1 <- df.obs$y==1 + p.y.obs <- vector(mode='numeric', length=nrow(df.obs)) + + p.y.obs[yobs1] <- plogis(B0 + Bxy * df.obs[yobs1]$x + Bzy*df.obs[yobs1]$z,log=T) + p.y.obs[yobs0] <- plogis(B0 + Bxy * df.obs[yobs0]$x + Bzy*df.obs[yobs0]$z,lower.tail=FALSE,log=T) + + wobs0 <- df.obs$w_pred==0 + wobs1 <- df.obs$w_pred==1 + p.w.obs <- vector(mode='numeric', length=nrow(df.obs)) + + p.w.obs[wobs1] <- plogis(gamma0 + gamma_y * df.obs[wobs1]$y + gamma_z*df.obs[wobs1]$z + df.obs[wobs1]$z*df.obs[wobs1]$y* gamma_yz, log=T) + p.w.obs[wobs0] <- plogis(gamma0 + gamma_y * df.obs[wobs0]$y + gamma_z*df.obs[wobs0]$z + df.obs[wobs0]$z*df.obs[wobs0]$y* gamma_yz, lower.tail=FALSE, log=T) - p.obs <- p.s.obs * p.y.obs + p.obs <- p.w.obs + p.y.obs df.unobs <- df[is.na(y.obs)] - p.unobs.1 <- plogis(B0 + Bxy * df.unobs$x + Bgy*df.unobs$g)*plogis(gamma0 + gamma_y + gamma_g*df.unobs$g) - p.unobs.0 <- (1-plogis(B0 + Bxy * df.unobs$x + Bgy*df.unobs$g))*plogis(gamma0 + gamma_g*df.unobs$g) - p.unobs <- p.unobs.1 + p.unobs.0 - p.unobs[df.unobs$w_pred==0] <- 1 - p.unobs[df.unobs$w_pred==0] + p.unobs.0 <- vector(mode='numeric',length=nrow(df.unobs)) + p.unobs.1 <- vector(mode='numeric',length=nrow(df.unobs)) + + wunobs.0 <- df.unobs$w_pred == 0 + wunobs.1 <- df.unobs$w_pred == 1 + + p.unobs.0[wunobs.1] <- plogis(B0 + Bxy * df.unobs[wunobs.1]$x + Bzy*df.unobs[wunobs.1]$z, log=T) + plogis(gamma0 + gamma_y + gamma_z*df.unobs[wunobs.1]$z + df.unobs[wunobs.1]$z*gamma_yz, log=T) + + p.unobs.0[wunobs.0] <- plogis(B0 + Bxy * df.unobs[wunobs.0]$x + Bzy*df.unobs[wunobs.0]$z, log=T) + plogis(gamma0 + gamma_y + gamma_z*df.unobs[wunobs.0]$z + df.unobs[wunobs.0]$z*gamma_yz, lower.tail=FALSE, log=T) + + p.unobs.1[wunobs.1] <- plogis(B0 + Bxy * df.unobs[wunobs.1]$x + Bzy*df.unobs[wunobs.1]$z, log=T, lower.tail=FALSE) + plogis(gamma0 + gamma_z*df.unobs[wunobs.1]$z, log=T) + + p.unobs.1[wunobs.0] <- plogis(B0 + Bxy * df.unobs[wunobs.0]$x + Bzy*df.unobs[wunobs.0]$z, log=T, lower.tail=FALSE) + plogis(gamma0 + gamma_z*df.unobs[wunobs.0]$z, lower.tail=FALSE, log=T) + + p.unobs <- colLogSumExps(rbind(p.unobs.1, p.unobs.0)) p <- c(p.obs, p.unobs) - return(-1*(sum(log(p)))) + return(-1*(sum(p))) } - mlefit <- stats4::mle(minuslogl = nll, start = list(B0=1, Bxy=0,Bgy=0, gamma0=5, gamma_y=0, gamma_g=0)) + mlefit <- mle2(minuslogl = nll, start = list(B0=0, Bxy=0,Bzy=0, gamma0=0, gamma_y=0, gamma_z=0, gamma_yz=0), control=list(maxit=1e6),method='L-BFGS-B') return(mlefit) } +run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formula=w_pred~y, confint_method='quad'){ -logistic <- function(x) {1/(1+exp(-1*x))} - -run_simulation_depvar <- function(df, result){ - - accuracy <- df[,mean(w_pred==y)] + (accuracy <- df[,mean(w_pred==y)]) result <- append(result, list(accuracy=accuracy)) + (error.cor.z <- cor(df$z, df$y - df$w_pred)) + (error.cor.x <- cor(df$x, df$y - df$w_pred)) + (error.cor.y <- cor(df$y, df$y - df$w_pred)) + result <- append(result, list(error.cor.x = error.cor.x, + error.cor.z = error.cor.z, + error.cor.y = error.cor.y)) + + model.null <- glm(y~1, data=df,family=binomial(link='logit')) + (model.true <- glm(y ~ x + z, data=df,family=binomial(link='logit'))) + (lik.ratio <- exp(logLik(model.true) - logLik(model.null))) - (model.true <- glm(y ~ x + g, data=df,family=binomial(link='logit'))) true.ci.Bxy <- confint(model.true)['x',] - true.ci.Bgy <- confint(model.true)['g',] + true.ci.Bzy <- confint(model.true)['z',] + + result <- append(result, list(cor.xz=cor(df$x,df$z))) + result <- append(result, list(lik.ratio=lik.ratio)) result <- append(result, list(Bxy.est.true=coef(model.true)['x'], - Bgy.est.true=coef(model.true)['g'], + Bzy.est.true=coef(model.true)['z'], Bxy.ci.upper.true = true.ci.Bxy[2], Bxy.ci.lower.true = true.ci.Bxy[1], - Bgy.ci.upper.true = true.ci.Bgy[2], - Bgy.ci.lower.true = true.ci.Bgy[1])) + Bzy.ci.upper.true = true.ci.Bzy[2], + Bzy.ci.lower.true = true.ci.Bzy[1])) - (model.feasible <- glm(y.obs~x+g,data=df,family=binomial(link='logit'))) + (model.feasible <- glm(y.obs~x+z,data=df,family=binomial(link='logit'))) feasible.ci.Bxy <- confint(model.feasible)['x',] result <- append(result, list(Bxy.est.feasible=coef(model.feasible)['x'], Bxy.ci.upper.feasible = feasible.ci.Bxy[2], Bxy.ci.lower.feasible = feasible.ci.Bxy[1])) - feasible.ci.Bgy <- confint(model.feasible)['g',] - result <- append(result, list(Bgy.est.feasible=coef(model.feasible)['g'], - Bgy.ci.upper.feasible = feasible.ci.Bgy[2], - Bgy.ci.lower.feasible = feasible.ci.Bgy[1])) + feasible.ci.Bzy <- confint(model.feasible)['z',] + result <- append(result, list(Bzy.est.feasible=coef(model.feasible)['z'], + Bzy.ci.upper.feasible = feasible.ci.Bzy[2], + Bzy.ci.lower.feasible = feasible.ci.Bzy[1])) - (model.naive <- glm(w_pred~x+g, data=df, family=binomial(link='logit'))) + (model.naive <- glm(w_pred~x+z, data=df, family=binomial(link='logit'))) naive.ci.Bxy <- confint(model.naive)['x',] - naive.ci.Bgy <- confint(model.naive)['g',] + naive.ci.Bzy <- confint(model.naive)['z',] result <- append(result, list(Bxy.est.naive=coef(model.naive)['x'], - Bgy.est.naive=coef(model.naive)['g'], + Bzy.est.naive=coef(model.naive)['z'], Bxy.ci.upper.naive = naive.ci.Bxy[2], Bxy.ci.lower.naive = naive.ci.Bxy[1], - Bgy.ci.upper.naive = naive.ci.Bgy[2], - Bgy.ci.lower.naive = naive.ci.Bgy[1])) + Bzy.ci.upper.naive = naive.ci.Bzy[2], + Bzy.ci.lower.naive = naive.ci.Bzy[1])) - (model.naive.cont <- lm(w~x+g, data=df)) + (model.naive.cont <- lm(w~x+z, data=df)) naivecont.ci.Bxy <- confint(model.naive.cont)['x',] - naivecont.ci.Bgy <- confint(model.naive.cont)['g',] + naivecont.ci.Bzy <- confint(model.naive.cont)['z',] - ## my implementatoin of liklihood based correction - mod.caroll.lik <- logistic.correction.liklihood(df) - coef <- coef(mod.caroll.lik) - ci <- confint(mod.caroll.lik) + ## my implementation of liklihood based correction - result <- append(result, - list(Bxy.est.mle = coef['Bxy'], - Bxy.ci.upper.mle = ci['Bxy','97.5 %'], - Bxy.ci.lower.mle = ci['Bxy','2.5 %'], - Bgy.est.mle = coef['Bgy'], - Bgy.ci.upper.mle = ci['Bgy','97.5 %'], - Bgy.ci.lower.mle = ci['Bgy','2.5 %'])) - + temp.df <- copy(df) + temp.df[,y:=y.obs] - ## my implementatoin of liklihood based correction - mod.caroll.pseudo <- logistic.correction.pseudo(df) - coef <- coef(mod.caroll.pseudo) - ci <- confint(mod.caroll.pseudo) + if(confint_method=='quad'){ + mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula) + fischer.info <- solve(mod.caroll.lik$hessian) + coef <- mod.caroll.lik$par + ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96 + ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96 + } + else{ ## confint_method is 'profile' + + mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, method='bbmle') + coef <- coef(mod.caroll.lik) + ci <- confint(mod.caroll.lik, method='spline') + ci.lower <- ci[,'2.5 %'] + ci.upper <- ci[,'97.5 %'] + } result <- append(result, - list(Bxy.est.pseudo = coef['Bxy'], - Bxy.ci.upper.pseudo = ci['Bxy','97.5 %'], - Bxy.ci.lower.pseudo = ci['Bxy','2.5 %'], - Bgy.est.pseudo = coef['Bgy'], - Bgy.ci.upper.pseudo = ci['Bgy','97.5 %'], - Bgy.ci.lower.pseudo = ci['Bgy','2.5 %'])) - + list(Bxy.est.mle = coef['x'], + Bxy.ci.upper.mle = ci.upper['x'], + Bxy.ci.lower.mle = ci.lower['x'], + Bzy.est.mle = coef['z'], + Bzy.ci.upper.mle = ci.upper['z'], + Bzy.ci.lower.mle = ci.lower['z'])) - # amelia says use normal distribution for binary variables. - amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('y','ystar','w_pred')) - mod.amelia.k <- zelig(y.obs~x+g, model='ls', data=amelia.out.k$imputations, cite=FALSE) - (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE)) - est.x.mi <- coefse['x','Estimate'] - est.x.se <- coefse['x','Std.Error'] + ## my implementatoin of liklihood based correction + mod.zhang <- zhang.mle.dv(df) + coef <- coef(mod.zhang) + ci <- confint(mod.zhang,method='quad') + result <- append(result, - list(Bxy.est.amelia.full = est.x.mi, - Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se, - Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se - )) + list(Bxy.est.zhang = coef['Bxy'], + Bxy.ci.upper.zhang = ci['Bxy','97.5 %'], + Bxy.ci.lower.zhang = ci['Bxy','2.5 %'], + Bzy.est.zhang = coef['Bzy'], + Bzy.ci.upper.zhang = ci['Bzy','97.5 %'], + Bzy.ci.lower.zhang = ci['Bzy','2.5 %'])) - est.g.mi <- coefse['g','Estimate'] - est.g.se <- coefse['g','Std.Error'] + - result <- append(result, - list(Bgy.est.amelia.full = est.g.mi, - Bgy.ci.upper.amelia.full = est.g.mi + 1.96 * est.g.se, - Bgy.ci.lower.amelia.full = est.g.mi - 1.96 * est.g.se - )) + # amelia says use normal distribution for binary variables. + amelia_result <- list(Bxy.est.amelia.full = NA, + Bxy.ci.upper.amelia.full = NA, + Bxy.ci.lower.amelia.full = NA, + Bzy.est.amelia.full = NA, + Bzy.ci.upper.amelia.full = NA, + Bzy.ci.lower.amelia.full = NA + ) + + tryCatch({ + amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('y','ystar','w')) + mod.amelia.k <- zelig(y.obs~x+z, model='ls', data=amelia.out.k$imputations, cite=FALSE) + (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE)) + est.x.mi <- coefse['x','Estimate'] + est.x.se <- coefse['x','Std.Error'] + + est.z.mi <- coefse['z','Estimate'] + est.z.se <- coefse['z','Std.Error'] + amelia_result <- list(Bxy.est.amelia.full = est.x.mi, + Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se, + Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se, + Bzy.est.amelia.full = est.z.mi, + Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se, + Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se + ) + }, + error = function(e){ + result[['error']] <- e} + ) + result <- append(result,amelia_result) return(result) } -run_simulation <- function(df, result){ - accuracy <- df[,mean(w_pred==x)] - result <- append(result, list(accuracy=accuracy)) +## outcome_formula, proxy_formula, and truth_formula are passed to measerr_mle +run_simulation <- function(df, result, outcome_formula=y~x+z, proxy_formula=NULL, truth_formula=NULL, confint_method='quad'){ - (model.true <- lm(y ~ x + g, data=df)) + accuracy <- df[,mean(w_pred==x)] + accuracy.y0 <- df[y<=0,mean(w_pred==x)] + accuracy.y1 <- df[y>=0,mean(w_pred==x)] + cor.y.xi <- cor(df$x - df$w_pred, df$y) + + fnr <- df[w_pred==0,mean(w_pred!=x)] + fnr.y0 <- df[(w_pred==0) & (y<=0),mean(w_pred!=x)] + fnr.y1 <- df[(w_pred==0) & (y>=0),mean(w_pred!=x)] + + fpr <- df[w_pred==1,mean(w_pred!=x)] + fpr.y0 <- df[(w_pred==1) & (y<=0),mean(w_pred!=x)] + fpr.y1 <- df[(w_pred==1) & (y>=0),mean(w_pred!=x)] + cor.resid.w_pred <- cor(resid(lm(y~x+z,df)),df$w_pred) + + result <- append(result, list(accuracy=accuracy, + accuracy.y0=accuracy.y0, + accuracy.y1=accuracy.y1, + cor.y.xi=cor.y.xi, + fnr=fnr, + fnr.y0=fnr.y0, + fnr.y1=fnr.y1, + fpr=fpr, + fpr.y0=fpr.y0, + fpr.y1=fpr.y1, + cor.resid.w_pred=cor.resid.w_pred + )) + + result <- append(result, list(cor.xz=cor(df$x,df$z))) + (model.true <- lm(y ~ x + z, data=df)) true.ci.Bxy <- confint(model.true)['x',] - true.ci.Bgy <- confint(model.true)['g',] + true.ci.Bzy <- confint(model.true)['z',] result <- append(result, list(Bxy.est.true=coef(model.true)['x'], - Bgy.est.true=coef(model.true)['g'], + Bzy.est.true=coef(model.true)['z'], Bxy.ci.upper.true = true.ci.Bxy[2], Bxy.ci.lower.true = true.ci.Bxy[1], - Bgy.ci.upper.true = true.ci.Bgy[2], - Bgy.ci.lower.true = true.ci.Bgy[1])) + Bzy.ci.upper.true = true.ci.Bzy[2], + Bzy.ci.lower.true = true.ci.Bzy[1])) - (model.feasible <- lm(y~x.obs+g,data=df)) + (model.feasible <- lm(y~x.obs+z,data=df)) feasible.ci.Bxy <- confint(model.feasible)['x.obs',] result <- append(result, list(Bxy.est.feasible=coef(model.feasible)['x.obs'], Bxy.ci.upper.feasible = feasible.ci.Bxy[2], Bxy.ci.lower.feasible = feasible.ci.Bxy[1])) - feasible.ci.Bgy <- confint(model.feasible)['g',] - result <- append(result, list(Bgy.est.feasible=coef(model.feasible)['g'], - Bgy.ci.upper.feasible = feasible.ci.Bgy[2], - Bgy.ci.lower.feasible = feasible.ci.Bgy[1])) + feasible.ci.Bzy <- confint(model.feasible)['z',] + result <- append(result, list(Bzy.est.feasible=coef(model.feasible)['z'], + Bzy.ci.upper.feasible = feasible.ci.Bzy[2], + Bzy.ci.lower.feasible = feasible.ci.Bzy[1])) - (model.naive <- lm(y~w+g, data=df)) + (model.naive <- lm(y~w_pred+z, data=df)) - naive.ci.Bxy <- confint(model.naive)['w',] - naive.ci.Bgy <- confint(model.naive)['g',] + naive.ci.Bxy <- confint(model.naive)['w_pred',] + naive.ci.Bzy <- confint(model.naive)['z',] - result <- append(result, list(Bxy.est.naive=coef(model.naive)['w'], - Bgy.est.naive=coef(model.naive)['g'], + result <- append(result, list(Bxy.est.naive=coef(model.naive)['w_pred'], + Bzy.est.naive=coef(model.naive)['z'], Bxy.ci.upper.naive = naive.ci.Bxy[2], Bxy.ci.lower.naive = naive.ci.Bxy[1], - Bgy.ci.upper.naive = naive.ci.Bgy[2], - Bgy.ci.lower.naive = naive.ci.Bgy[1])) - - - amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('x','w_pred')) - mod.amelia.k <- zelig(y~x.obs+g, model='ls', data=amelia.out.k$imputations, cite=FALSE) - (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE)) + Bzy.ci.upper.naive = naive.ci.Bzy[2], + Bzy.ci.lower.naive = naive.ci.Bzy[1])) + + amelia_result <- list( + Bxy.est.amelia.full = NULL, + Bxy.ci.upper.amelia.full = NULL, + Bxy.ci.lower.amelia.full = NULL, + Bzy.est.amelia.full = NULL, + Bzy.ci.upper.amelia.full = NULL, + Bzy.ci.lower.amelia.full = NULL + ) + + tryCatch({ + amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('x','w')) + mod.amelia.k <- zelig(y~x.obs+z, model='ls', data=amelia.out.k$imputations, cite=FALSE) + (coefse <- combine_coef_se(mod.amelia.k)) + + est.x.mi <- coefse['x.obs','Estimate'] + est.x.se <- coefse['x.obs','Std.Error'] + est.z.mi <- coefse['z','Estimate'] + est.z.se <- coefse['z','Std.Error'] + + amelia_result <- list(Bxy.est.amelia.full = est.x.mi, + Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se, + Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se, + Bzy.est.amelia.full = est.z.mi, + Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se, + Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se + ) + + }, + + error = function(e){ + result[['error']] <- e} + ) + + + result <- append(result, amelia_result) + + + mle_result <- list(Bxy.est.mle = NULL, + Bxy.ci.upper.mle = NULL, + Bxy.ci.lower.mle = NULL, + Bzy.est.mle = NULL, + Bzy.ci.upper.mle = NULL, + Bzy.ci.lower.mle = NULL) + + tryCatch({ + temp.df <- copy(df) + temp.df <- temp.df[,x:=x.obs] + if(confint_method=='quad'){ + mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='optim') + fischer.info <- solve(mod.caroll.lik$hessian) + coef <- mod.caroll.lik$par + ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96 + ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96 + } else { # confint_method == 'bbmle' + + mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='bbmle') + coef <- coef(mod.caroll.lik) + ci <- confint(mod.caroll.lik, method='spline') + ci.lower <- ci[,'2.5 %'] + ci.upper <- ci[,'97.5 %'] + } + mle_result <- list(Bxy.est.mle = coef['x'], + Bxy.ci.upper.mle = ci.upper['x'], + Bxy.ci.lower.mle = ci.lower['x'], + Bzy.est.mle = coef['z'], + Bzy.ci.upper.mle = ci.upper['z'], + Bzy.ci.lower.mle = ci.lower['z']) + }, + + error=function(e) {result[['error']] <- as.character(e) + }) - est.x.mi <- coefse['x.obs','Estimate'] - est.x.se <- coefse['x.obs','Std.Error'] - result <- append(result, - list(Bxy.est.amelia.full = est.x.mi, - Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se, - Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se - )) - - est.g.mi <- coefse['g','Estimate'] - est.g.se <- coefse['g','Std.Error'] + + result <- append(result, mle_result) + mod.zhang.lik <- zhang.mle.iv(df) + coef <- coef(mod.zhang.lik) + ci <- confint(mod.zhang.lik,method='quad') result <- append(result, - list(Bgy.est.amelia.full = est.g.mi, - Bgy.ci.upper.amelia.full = est.g.mi + 1.96 * est.g.se, - Bgy.ci.lower.amelia.full = est.g.mi - 1.96 * est.g.se - )) + list(Bxy.est.zhang = coef['Bxy'], + Bxy.ci.upper.zhang = ci['Bxy','97.5 %'], + Bxy.ci.lower.zhang = ci['Bxy','2.5 %'], + Bzy.est.zhang = coef['Bzy'], + Bzy.ci.upper.zhang = ci['Bzy','97.5 %'], + Bzy.ci.lower.zhang = ci['Bzy','2.5 %'])) ## What if we can't observe k -- most realistic scenario. We can't include all the ML features in a model. ## amelia.out.nok <- amelia(df, m=200, p2s=0, idvars=c("x","w_pred"), noms=noms) @@ -255,10 +410,10 @@ run_simulation <- function(df, result){ df <- df[order(x.obs)] y <- df[,y] x <- df[,x.obs] - g <- df[,g] - w <- df[,w] + z <- df[,z] + w <- df[,w_pred] # gmm gets pretty close - (gmm.res <- predicted_covariates(y, x, g, w, v, train, p, max_iter=100, verbose=TRUE)) + (gmm.res <- predicted_covariates(y, x, z, w, v, train, p, max_iter=100, verbose=TRUE)) result <- append(result, list(Bxy.est.gmm = gmm.res$beta[1,1], @@ -268,28 +423,34 @@ run_simulation <- function(df, result){ )) result <- append(result, - list(Bgy.est.gmm = gmm.res$beta[2,1], - Bgy.ci.upper.gmm = gmm.res$confint[2,2], - Bgy.ci.lower.gmm = gmm.res$confint[2,1])) - - - mod.calibrated.mle <- mecor(y ~ MeasError(w, reference = x.obs) + g, df, B=400, method='efficient') - (mod.calibrated.mle) - (mecor.ci <- summary(mod.calibrated.mle)$c$ci['x.obs',]) - result <- append(result, list( - Bxy.est.mecor = mecor.ci['Estimate'], - Bxy.upper.mecor = mecor.ci['UCI'], - Bxy.lower.mecor = mecor.ci['LCI']) - ) - - (mecor.ci <- summary(mod.calibrated.mle)$c$ci['g',]) - - result <- append(result, list( - Bgy.est.mecor = mecor.ci['Estimate'], - Bgy.upper.mecor = mecor.ci['UCI'], - Bgy.lower.mecor = mecor.ci['LCI']) - ) - + list(Bzy.est.gmm = gmm.res$beta[2,1], + Bzy.ci.upper.gmm = gmm.res$confint[2,2], + Bzy.ci.lower.gmm = gmm.res$confint[2,1])) + + + ## tryCatch({ + ## mod.calibrated.mle <- mecor(y ~ MeasError(w_pred, reference = x.obs) + z, df, B=400, method='efficient') + ## (mod.calibrated.mle) + ## (mecor.ci <- summary(mod.calibrated.mle)$c$ci['x.obs',]) + ## result <- append(result, list( + ## Bxy.est.mecor = mecor.ci['Estimate'], + ## Bxy.ci.upper.mecor = mecor.ci['UCI'], + ## Bxy.ci.lower.mecor = mecor.ci['LCI']) + ## ) + + ## (mecor.ci <- summary(mod.calibrated.mle)$c$ci['z',]) + + ## result <- append(result, list( + ## Bzy.est.mecor = mecor.ci['Estimate'], + ## Bzy.ci.upper.mecor = mecor.ci['UCI'], + ## Bzy.ci.lower.mecor = mecor.ci['LCI']) + ## ) + ## }, + ## error = function(e){ + ## message("An error occurred:\n",e) + ## result$error <- paste0(result$error, '\n', e) + ## } + ## ) ## clean up memory ## rm(list=c("df","y","x","g","w","v","train","p","amelia.out.k","amelia.out.nok", "mod.calibrated.mle","gmm.res","mod.amelia.k","mod.amelia.nok", "model.true","model.naive","model.feasible"))