X-Git-Url: https://code.communitydata.science/ml_measurement_error_public.git/blobdiff_plain/bb6f5e4731c603b336afb1a900bc9083d1b175bf..refs/heads/master:/simulations/simulation_base.R diff --git a/simulations/simulation_base.R b/simulations/simulation_base.R index bafd7d3..af03408 100644 --- a/simulations/simulation_base.R +++ b/simulations/simulation_base.R @@ -151,21 +151,11 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu temp.df <- copy(df) temp.df[,y:=y.obs] - if(confint_method=='quad'){ - mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula) - fischer.info <- solve(mod.caroll.lik$hessian) - coef <- mod.caroll.lik$par - ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96 - ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96 - } - else{ ## confint_method is 'profile' - - mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, method='bbmle') - coef <- coef(mod.caroll.lik) - ci <- confint(mod.caroll.lik, method='spline') - ci.lower <- ci[,'2.5 %'] - ci.upper <- ci[,'97.5 %'] - } + mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula) + fischer.info <- solve(mod.caroll.lik$hessian) + coef <- mod.caroll.lik$par + ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96 + ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96 result <- append(result, list(Bxy.est.mle = coef['x'], @@ -175,6 +165,19 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu Bzy.ci.upper.mle = ci.upper['z'], Bzy.ci.lower.mle = ci.lower['z'])) + mod.caroll.profile.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, method='bbmle') + coef <- coef(mod.caroll.profile.lik) + ci <- confint(mod.caroll.profile.lik, method='spline') + ci.lower <- ci[,'2.5 %'] + ci.upper <- ci[,'97.5 %'] + + result <- append(result, + list(Bxy.est.mle.profile = coef['x'], + Bxy.ci.upper.mle.profile = ci.upper['x'], + Bxy.ci.lower.mle.profile = ci.lower['x'], + Bzy.est.mle.profile = coef['z'], + Bzy.ci.upper.mle.profile = ci.upper['z'], + Bzy.ci.lower.mle.profile = ci.lower['z'])) ## my implementatoin of liklihood based correction mod.zhang <- zhang.mle.dv(df) @@ -201,8 +204,8 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu ) tryCatch({ - amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('y','ystar','w')) - mod.amelia.k <- zelig(y.obs~x+z, model='ls', data=amelia.out.k$imputations, cite=FALSE) + amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('y','ystar','w'),ords="y.obs") + mod.amelia.k <- zelig(y.obs~x+z, model='logit', data=amelia.out.k$imputations, cite=FALSE) (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE)) est.x.mi <- coefse['x','Estimate'] est.x.se <- coefse['x','Std.Error'] @@ -340,44 +343,72 @@ run_simulation <- function(df, result, outcome_formula=y~x+z, proxy_formula=NUL tryCatch({ temp.df <- copy(df) temp.df <- temp.df[,x:=x.obs] - if(confint_method=='quad'){ - mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='optim') - fischer.info <- solve(mod.caroll.lik$hessian) - coef <- mod.caroll.lik$par - ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96 - ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96 - } else { # confint_method == 'bbmle' - - mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='bbmle') - coef <- coef(mod.caroll.lik) - ci <- confint(mod.caroll.lik, method='spline') - ci.lower <- ci[,'2.5 %'] - ci.upper <- ci[,'97.5 %'] - } + mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='optim') + fischer.info <- solve(mod.caroll.lik$hessian) + coef <- mod.caroll.lik$par + ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96 + ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96 + mle_result <- list(Bxy.est.mle = coef['x'], Bxy.ci.upper.mle = ci.upper['x'], Bxy.ci.lower.mle = ci.lower['x'], Bzy.est.mle = coef['z'], Bzy.ci.upper.mle = ci.upper['z'], Bzy.ci.lower.mle = ci.lower['z']) + }, + error=function(e) {result[['error']] <- as.character(e) + }) + + result <- append(result, mle_result) + mle_result_proflik <- list(Bxy.est.mle.profile = NULL, + Bxy.ci.upper.mle.profile = NULL, + Bxy.ci.lower.mle.profile = NULL, + Bzy.est.mle.profile = NULL, + Bzy.ci.upper.mle.profile = NULL, + Bzy.ci.lower.mle.profile = NULL) + + tryCatch({ + ## confint_method == 'bbmle' + mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='bbmle') + coef <- coef(mod.caroll.lik) + ci <- confint(mod.caroll.lik, method='spline') + ci.lower <- ci[,'2.5 %'] + ci.upper <- ci[,'97.5 %'] + + mle_result_proflik <- list(Bxy.est.mle.profile = coef['x'], + Bxy.ci.upper.mle.profile = ci.upper['x'], + Bxy.ci.lower.mle.profile = ci.lower['x'], + Bzy.est.mle.profile = coef['z'], + Bzy.ci.upper.mle.profile = ci.upper['z'], + Bzy.ci.lower.mle.profile = ci.lower['z']) }, error=function(e) {result[['error']] <- as.character(e) }) - - result <- append(result, mle_result) + result <- append(result, mle_result_proflik) + zhang_result <- list(Bxy.est.mle.zhang = NULL, + Bxy.ci.upper.mle.zhang = NULL, + Bxy.ci.lower.mle.zhang = NULL, + Bzy.est.mle.zhang = NULL, + Bzy.ci.upper.mle.zhang = NULL, + Bzy.ci.lower.mle.zhang = NULL) + + tryCatch({ mod.zhang.lik <- zhang.mle.iv(df) coef <- coef(mod.zhang.lik) ci <- confint(mod.zhang.lik,method='quad') - result <- append(result, - list(Bxy.est.zhang = coef['Bxy'], - Bxy.ci.upper.zhang = ci['Bxy','97.5 %'], - Bxy.ci.lower.zhang = ci['Bxy','2.5 %'], - Bzy.est.zhang = coef['Bzy'], - Bzy.ci.upper.zhang = ci['Bzy','97.5 %'], - Bzy.ci.lower.zhang = ci['Bzy','2.5 %'])) + zhang_result <- list(Bxy.est.zhang = coef['Bxy'], + Bxy.ci.upper.zhang = ci['Bxy','97.5 %'], + Bxy.ci.lower.zhang = ci['Bxy','2.5 %'], + Bzy.est.zhang = coef['Bzy'], + Bzy.ci.upper.zhang = ci['Bzy','97.5 %'], + Bzy.ci.lower.zhang = ci['Bzy','2.5 %']) + }, + error=function(e) {result[['error']] <- as.character(e) + }) + result <- append(result, zhang_result) ## What if we can't observe k -- most realistic scenario. We can't include all the ML features in a model. ## amelia.out.nok <- amelia(df, m=200, p2s=0, idvars=c("x","w_pred"), noms=noms)