X-Git-Url: https://code.communitydata.science/ml_measurement_error_public.git/blobdiff_plain/fa05dbab6bd2c5db6ed4eccf38cff03bb4fd6683..d9d3e47a44ddead1cdf7a649bc0e9849c2219498:/simulations/simulation_base.R diff --git a/simulations/simulation_base.R b/simulations/simulation_base.R index e715edf..bafd7d3 100644 --- a/simulations/simulation_base.R +++ b/simulations/simulation_base.R @@ -89,7 +89,7 @@ my.mle <- function(df){ return(mlefit) } -run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formula=w_pred~y){ +run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formula=w_pred~y, confint_method='quad'){ (accuracy <- df[,mean(w_pred==y)]) result <- append(result, list(accuracy=accuracy)) @@ -150,11 +150,23 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu temp.df <- copy(df) temp.df[,y:=y.obs] - mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula) - fischer.info <- solve(mod.caroll.lik$hessian) - coef <- mod.caroll.lik$par - ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96 - ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96 + + if(confint_method=='quad'){ + mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula) + fischer.info <- solve(mod.caroll.lik$hessian) + coef <- mod.caroll.lik$par + ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96 + ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96 + } + else{ ## confint_method is 'profile' + + mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, method='bbmle') + coef <- coef(mod.caroll.lik) + ci <- confint(mod.caroll.lik, method='spline') + ci.lower <- ci[,'2.5 %'] + ci.upper <- ci[,'97.5 %'] + } + result <- append(result, list(Bxy.est.mle = coef['x'], Bxy.ci.upper.mle = ci.upper['x'], @@ -216,7 +228,7 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu ## outcome_formula, proxy_formula, and truth_formula are passed to measerr_mle -run_simulation <- function(df, result, outcome_formula=y~x+z, proxy_formula=NULL, truth_formula=NULL){ +run_simulation <- function(df, result, outcome_formula=y~x+z, proxy_formula=NULL, truth_formula=NULL, confint_method='quad'){ accuracy <- df[,mean(w_pred==x)] accuracy.y0 <- df[y<=0,mean(w_pred==x)] @@ -280,79 +292,92 @@ run_simulation <- function(df, result, outcome_formula=y~x+z, proxy_formula=NUL Bxy.ci.lower.naive = naive.ci.Bxy[1], Bzy.ci.upper.naive = naive.ci.Bzy[2], Bzy.ci.lower.naive = naive.ci.Bzy[1])) - - - amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('x','w')) - mod.amelia.k <- zelig(y~x.obs+z, model='ls', data=amelia.out.k$imputations, cite=FALSE) - (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE)) + amelia_result <- list( + Bxy.est.amelia.full = NULL, + Bxy.ci.upper.amelia.full = NULL, + Bxy.ci.lower.amelia.full = NULL, + Bzy.est.amelia.full = NULL, + Bzy.ci.upper.amelia.full = NULL, + Bzy.ci.lower.amelia.full = NULL + ) - est.x.mi <- coefse['x.obs','Estimate'] - est.x.se <- coefse['x.obs','Std.Error'] - result <- append(result, - list(Bxy.est.amelia.full = est.x.mi, - Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se, - Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se - )) + tryCatch({ + amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('x','w')) + mod.amelia.k <- zelig(y~x.obs+z, model='ls', data=amelia.out.k$imputations, cite=FALSE) + (coefse <- combine_coef_se(mod.amelia.k)) - est.z.mi <- coefse['z','Estimate'] - est.z.se <- coefse['z','Std.Error'] + est.x.mi <- coefse['x.obs','Estimate'] + est.x.se <- coefse['x.obs','Std.Error'] + est.z.mi <- coefse['z','Estimate'] + est.z.se <- coefse['z','Std.Error'] - result <- append(result, - list(Bzy.est.amelia.full = est.z.mi, - Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se, - Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se - )) + amelia_result <- list(Bxy.est.amelia.full = est.x.mi, + Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se, + Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se, + Bzy.est.amelia.full = est.z.mi, + Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se, + Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se + ) + }, - temp.df <- copy(df) - temp.df <- temp.df[,x:=x.obs] - mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula) + error = function(e){ + result[['error']] <- e} + ) - ## tryCatch({ - ## mod.calibrated.mle <- mecor(y ~ MeasError(w_pred, reference = x.obs) + z, df, B=400, method='efficient') - ## (mod.calibrated.mle) - ## (mecor.ci <- summary(mod.calibrated.mle)$c$ci['x.obs',]) - ## result <- append(result, list( - ## Bxy.est.mecor = mecor.ci['Estimate'], - ## Bxy.ci.upper.mecor = mecor.ci['UCI'], - ## Bxy.ci.lower.mecor = mecor.ci['LCI']) - ## ) + result <- append(result, amelia_result) - fischer.info <- NA - ci.upper <- NA - ci.lower <- NA + mle_result <- list(Bxy.est.mle = NULL, + Bxy.ci.upper.mle = NULL, + Bxy.ci.lower.mle = NULL, + Bzy.est.mle = NULL, + Bzy.ci.upper.mle = NULL, + Bzy.ci.lower.mle = NULL) - tryCatch({fischer.info <- solve(mod.caroll.lik$hessian) - ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96 - ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96 + tryCatch({ + temp.df <- copy(df) + temp.df <- temp.df[,x:=x.obs] + if(confint_method=='quad'){ + mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='optim') + fischer.info <- solve(mod.caroll.lik$hessian) + coef <- mod.caroll.lik$par + ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96 + ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96 + } else { # confint_method == 'bbmle' + + mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='bbmle') + coef <- coef(mod.caroll.lik) + ci <- confint(mod.caroll.lik, method='spline') + ci.lower <- ci[,'2.5 %'] + ci.upper <- ci[,'97.5 %'] + } + mle_result <- list(Bxy.est.mle = coef['x'], + Bxy.ci.upper.mle = ci.upper['x'], + Bxy.ci.lower.mle = ci.lower['x'], + Bzy.est.mle = coef['z'], + Bzy.ci.upper.mle = ci.upper['z'], + Bzy.ci.lower.mle = ci.lower['z']) }, error=function(e) {result[['error']] <- as.character(e) }) - coef <- mod.caroll.lik$par - result <- append(result, - list(Bxy.est.mle = coef['x'], - Bxy.ci.upper.mle = ci.upper['x'], - Bxy.ci.lower.mle = ci.lower['x'], - Bzy.est.mle = coef['z'], - Bzy.ci.upper.mle = ci.upper['z'], - Bzy.ci.lower.mle = ci.lower['z'])) - - mod.zhang.lik <- zhang.mle.iv(df) - coef <- coef(mod.zhang.lik) - ci <- confint(mod.zhang.lik,method='quad') - result <- append(result, - list(Bxy.est.zhang = coef['Bxy'], - Bxy.ci.upper.zhang = ci['Bxy','97.5 %'], - Bxy.ci.lower.zhang = ci['Bxy','2.5 %'], - Bzy.est.zhang = coef['Bzy'], - Bzy.ci.upper.zhang = ci['Bzy','97.5 %'], - Bzy.ci.lower.zhang = ci['Bzy','2.5 %'])) + result <- append(result, mle_result) + + mod.zhang.lik <- zhang.mle.iv(df) + coef <- coef(mod.zhang.lik) + ci <- confint(mod.zhang.lik,method='quad') + result <- append(result, + list(Bxy.est.zhang = coef['Bxy'], + Bxy.ci.upper.zhang = ci['Bxy','97.5 %'], + Bxy.ci.lower.zhang = ci['Bxy','2.5 %'], + Bzy.est.zhang = coef['Bzy'], + Bzy.ci.upper.zhang = ci['Bzy','97.5 %'], + Bzy.ci.lower.zhang = ci['Bzy','2.5 %'])) ## What if we can't observe k -- most realistic scenario. We can't include all the ML features in a model. ## amelia.out.nok <- amelia(df, m=200, p2s=0, idvars=c("x","w_pred"), noms=noms)