X-Git-Url: https://code.communitydata.science/ml_measurement_error_public.git/blobdiff_plain/bb6f5e4731c603b336afb1a900bc9083d1b175bf..refs/heads/main:/simulations/simulation_base.R

diff --git a/simulations/simulation_base.R b/simulations/simulation_base.R
index bafd7d3..af03408 100644
--- a/simulations/simulation_base.R
+++ b/simulations/simulation_base.R
@@ -151,21 +151,11 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu
     temp.df <- copy(df)
     temp.df[,y:=y.obs]
 
-    if(confint_method=='quad'){
-        mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula)
-        fischer.info <- solve(mod.caroll.lik$hessian)
-        coef <- mod.caroll.lik$par
-        ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96
-        ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96
-    }
-    else{ ## confint_method is 'profile'
-
-        mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, method='bbmle')
-        coef <- coef(mod.caroll.lik)
-        ci <- confint(mod.caroll.lik, method='spline')
-        ci.lower <- ci[,'2.5 %']
-        ci.upper <- ci[,'97.5 %']
-    }
+    mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula)
+    fischer.info <- solve(mod.caroll.lik$hessian)
+    coef <- mod.caroll.lik$par
+    ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96
+    ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96
 
     result <- append(result,
                      list(Bxy.est.mle = coef['x'],
@@ -175,6 +165,19 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu
                           Bzy.ci.upper.mle = ci.upper['z'],
                           Bzy.ci.lower.mle = ci.lower['z']))
 
+    mod.caroll.profile.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, method='bbmle')
+    coef <- coef(mod.caroll.profile.lik)
+    ci <- confint(mod.caroll.profile.lik, method='spline')
+    ci.lower <- ci[,'2.5 %']
+    ci.upper <- ci[,'97.5 %']
+
+    result <- append(result,
+                     list(Bxy.est.mle.profile = coef['x'],
+                          Bxy.ci.upper.mle.profile = ci.upper['x'],
+                          Bxy.ci.lower.mle.profile = ci.lower['x'],
+                          Bzy.est.mle.profile = coef['z'],
+                          Bzy.ci.upper.mle.profile = ci.upper['z'],
+                          Bzy.ci.lower.mle.profile = ci.lower['z']))
 
     ## my implementatoin of liklihood based correction
     mod.zhang <- zhang.mle.dv(df)
@@ -201,8 +204,8 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu
                           )
 
     tryCatch({
-        amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('y','ystar','w'))
-        mod.amelia.k <- zelig(y.obs~x+z, model='ls', data=amelia.out.k$imputations, cite=FALSE)
+        amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('y','ystar','w'),ords="y.obs")
+        mod.amelia.k <- zelig(y.obs~x+z, model='logit', data=amelia.out.k$imputations, cite=FALSE)
         (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE))
         est.x.mi <- coefse['x','Estimate']
         est.x.se <- coefse['x','Std.Error']
@@ -340,44 +343,72 @@ run_simulation <-  function(df, result, outcome_formula=y~x+z, proxy_formula=NUL
     tryCatch({
         temp.df <- copy(df)
         temp.df <- temp.df[,x:=x.obs]
-        if(confint_method=='quad'){
-            mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='optim')
-            fischer.info <- solve(mod.caroll.lik$hessian)
-            coef <- mod.caroll.lik$par
-            ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96
-            ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96
-        } else { # confint_method == 'bbmle'
-
-            mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='bbmle')
-            coef <- coef(mod.caroll.lik)
-            ci <- confint(mod.caroll.lik, method='spline')
-            ci.lower <- ci[,'2.5 %']
-            ci.upper <- ci[,'97.5 %']
-        }
+        mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='optim')
+        fischer.info <- solve(mod.caroll.lik$hessian)
+        coef <- mod.caroll.lik$par
+        ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96
+        ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96
+
         mle_result <- list(Bxy.est.mle = coef['x'],
                            Bxy.ci.upper.mle = ci.upper['x'],
                            Bxy.ci.lower.mle = ci.lower['x'],
                            Bzy.est.mle = coef['z'],
                            Bzy.ci.upper.mle = ci.upper['z'],
                            Bzy.ci.lower.mle = ci.lower['z'])
+        },
+        error=function(e) {result[['error']] <- as.character(e)
+        })
+
+    result <- append(result, mle_result)
+    mle_result_proflik <- list(Bxy.est.mle.profile = NULL,
+                               Bxy.ci.upper.mle.profile = NULL,
+                               Bxy.ci.lower.mle.profile = NULL,
+                               Bzy.est.mle.profile = NULL,
+                               Bzy.ci.upper.mle.profile = NULL,
+                               Bzy.ci.lower.mle.profile = NULL)
+
+    tryCatch({
+        ## confint_method == 'bbmle'
+        mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='bbmle')
+        coef <- coef(mod.caroll.lik)
+        ci <- confint(mod.caroll.lik, method='spline')
+        ci.lower <- ci[,'2.5 %']
+        ci.upper <- ci[,'97.5 %']
+
+        mle_result_proflik <- list(Bxy.est.mle.profile = coef['x'],
+                                   Bxy.ci.upper.mle.profile = ci.upper['x'],
+                                   Bxy.ci.lower.mle.profile = ci.lower['x'],
+                                   Bzy.est.mle.profile = coef['z'],
+                                   Bzy.ci.upper.mle.profile = ci.upper['z'],
+                                   Bzy.ci.lower.mle.profile = ci.lower['z'])
     },
 
     error=function(e) {result[['error']] <- as.character(e)
     })
-
         
-    result <- append(result, mle_result)
+    result <- append(result, mle_result_proflik)
 
+    zhang_result <- list(Bxy.est.mle.zhang = NULL,
+                       Bxy.ci.upper.mle.zhang = NULL,
+                       Bxy.ci.lower.mle.zhang = NULL,
+                       Bzy.est.mle.zhang = NULL,
+                       Bzy.ci.upper.mle.zhang = NULL,
+                       Bzy.ci.lower.mle.zhang = NULL)
+
+    tryCatch({
     mod.zhang.lik <- zhang.mle.iv(df)
     coef <- coef(mod.zhang.lik)
     ci <- confint(mod.zhang.lik,method='quad')
-    result <- append(result,
-                     list(Bxy.est.zhang = coef['Bxy'],
-                          Bxy.ci.upper.zhang = ci['Bxy','97.5 %'],
-                          Bxy.ci.lower.zhang = ci['Bxy','2.5 %'],
-                          Bzy.est.zhang = coef['Bzy'],
-                          Bzy.ci.upper.zhang = ci['Bzy','97.5 %'],
-                          Bzy.ci.lower.zhang = ci['Bzy','2.5 %']))
+    zhang_result <- list(Bxy.est.zhang = coef['Bxy'],
+                         Bxy.ci.upper.zhang = ci['Bxy','97.5 %'],
+                         Bxy.ci.lower.zhang = ci['Bxy','2.5 %'],
+                         Bzy.est.zhang = coef['Bzy'],
+                         Bzy.ci.upper.zhang = ci['Bzy','97.5 %'],
+                         Bzy.ci.lower.zhang = ci['Bzy','2.5 %'])
+    },
+    error=function(e) {result[['error']] <- as.character(e)
+    })
+    result <- append(result, zhang_result)
 
     ## What if we can't observe k -- most realistic scenario. We can't include all the ML features in a model.
     ## amelia.out.nok <- amelia(df, m=200, p2s=0, idvars=c("x","w_pred"), noms=noms)