]> code.communitydata.science - ml_measurement_error_public.git/blobdiff - simulations/simulation_base.R
real-data example on raw perspective scores
[ml_measurement_error_public.git] / simulations / simulation_base.R
index 08b11ec9595a49a34553b3b1eaaaa3fb1463e27f..bafd7d34bba08ccd43513e3d699f4e3090e8fa30 100644 (file)
@@ -89,7 +89,7 @@ my.mle <- function(df){
     return(mlefit)
 }
 
-run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formula=w_pred~y){
+run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formula=w_pred~y, confint_method='quad'){
 
     (accuracy <- df[,mean(w_pred==y)])
     result <- append(result, list(accuracy=accuracy))
@@ -150,11 +150,23 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu
 
     temp.df <- copy(df)
     temp.df[,y:=y.obs]
-    mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula)
-    fischer.info <- solve(mod.caroll.lik$hessian)
-    coef <- mod.caroll.lik$par
-    ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96
-    ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96
+
+    if(confint_method=='quad'){
+        mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula)
+        fischer.info <- solve(mod.caroll.lik$hessian)
+        coef <- mod.caroll.lik$par
+        ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96
+        ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96
+    }
+    else{ ## confint_method is 'profile'
+
+        mod.caroll.lik <- measerr_mle_dv(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, method='bbmle')
+        coef <- coef(mod.caroll.lik)
+        ci <- confint(mod.caroll.lik, method='spline')
+        ci.lower <- ci[,'2.5 %']
+        ci.upper <- ci[,'97.5 %']
+    }
+
     result <- append(result,
                      list(Bxy.est.mle = coef['x'],
                           Bxy.ci.upper.mle = ci.upper['x'],
@@ -180,26 +192,35 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu
     
 
     # amelia says use normal distribution for binary variables.
-
-    amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('y','ystar','w'))
-    mod.amelia.k <- zelig(y.obs~x+z, model='ls', data=amelia.out.k$imputations, cite=FALSE)
-    (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE))
-    est.x.mi <- coefse['x','Estimate']
-    est.x.se <- coefse['x','Std.Error']
-    result <- append(result,
-                     list(Bxy.est.amelia.full = est.x.mi,
+    amelia_result <- list(Bxy.est.amelia.full = NA,
+                          Bxy.ci.upper.amelia.full = NA,
+                          Bxy.ci.lower.amelia.full = NA,
+                          Bzy.est.amelia.full = NA,
+                          Bzy.ci.upper.amelia.full = NA,
+                          Bzy.ci.lower.amelia.full = NA
+                          )
+
+    tryCatch({
+        amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('y','ystar','w'))
+        mod.amelia.k <- zelig(y.obs~x+z, model='ls', data=amelia.out.k$imputations, cite=FALSE)
+        (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE))
+        est.x.mi <- coefse['x','Estimate']
+        est.x.se <- coefse['x','Std.Error']
+
+        est.z.mi <- coefse['z','Estimate']
+        est.z.se <- coefse['z','Std.Error']
+        amelia_result <- list(Bxy.est.amelia.full = est.x.mi,
                           Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se,
-                          Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se
-                          ))
-
-    est.z.mi <- coefse['z','Estimate']
-    est.z.se <- coefse['z','Std.Error']
-
-    result <- append(result,
-                     list(Bzy.est.amelia.full = est.z.mi,
+                          Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se,
+                          Bzy.est.amelia.full = est.z.mi,
                           Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se,
                           Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se
-                          ))
+                          )
+    },
+    error = function(e){
+    result[['error']] <- e}
+    )
+    result <- append(result,amelia_result)
 
     return(result)
 
@@ -207,7 +228,7 @@ run_simulation_depvar <- function(df, result, outcome_formula=y~x+z, proxy_formu
 
 
 ## outcome_formula, proxy_formula, and truth_formula are passed to measerr_mle 
-run_simulation <-  function(df, result, outcome_formula=y~x+z, proxy_formula=NULL, truth_formula=NULL){
+run_simulation <-  function(df, result, outcome_formula=y~x+z, proxy_formula=NULL, truth_formula=NULL, confint_method='quad'){
 
     accuracy <- df[,mean(w_pred==x)]
     accuracy.y0 <- df[y<=0,mean(w_pred==x)]
@@ -271,79 +292,92 @@ run_simulation <-  function(df, result, outcome_formula=y~x+z, proxy_formula=NUL
                                   Bxy.ci.lower.naive = naive.ci.Bxy[1],
                                   Bzy.ci.upper.naive = naive.ci.Bzy[2],
                                   Bzy.ci.lower.naive = naive.ci.Bzy[1]))
-                                  
 
+    amelia_result <- list(
+        Bxy.est.amelia.full = NULL,
+        Bxy.ci.upper.amelia.full = NULL,
+        Bxy.ci.lower.amelia.full = NULL,
+        Bzy.est.amelia.full = NULL,
+        Bzy.ci.upper.amelia.full = NULL,
+        Bzy.ci.lower.amelia.full = NULL
+        )
+
+    tryCatch({
+        amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('x','w'))
+        mod.amelia.k <- zelig(y~x.obs+z, model='ls', data=amelia.out.k$imputations, cite=FALSE)
+        (coefse <- combine_coef_se(mod.amelia.k))
+
+        est.x.mi <- coefse['x.obs','Estimate']
+        est.x.se <- coefse['x.obs','Std.Error']
+        est.z.mi <- coefse['z','Estimate']
+        est.z.se <- coefse['z','Std.Error']
+
+        amelia_result <- list(Bxy.est.amelia.full = est.x.mi,
+                              Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se,
+                              Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se,
+                              Bzy.est.amelia.full = est.z.mi,
+                              Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se,
+                              Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se
+                              )
 
-    amelia.out.k <- amelia(df, m=200, p2s=0, idvars=c('x','w'))
-    mod.amelia.k <- zelig(y~x.obs+z, model='ls', data=amelia.out.k$imputations, cite=FALSE)
-    (coefse <- combine_coef_se(mod.amelia.k, messages=FALSE))
+    },
 
-    est.x.mi <- coefse['x.obs','Estimate']
-    est.x.se <- coefse['x.obs','Std.Error']
-    result <- append(result,
-                     list(Bxy.est.amelia.full = est.x.mi,
-                          Bxy.ci.upper.amelia.full = est.x.mi + 1.96 * est.x.se,
-                          Bxy.ci.lower.amelia.full = est.x.mi - 1.96 * est.x.se
-                          ))
+    error = function(e){
+        result[['error']] <- e}
+    )
 
-    est.z.mi <- coefse['z','Estimate']
-    est.z.se <- coefse['z','Std.Error']
 
-    result <- append(result,
-                     list(Bzy.est.amelia.full = est.z.mi,
-                          Bzy.ci.upper.amelia.full = est.z.mi + 1.96 * est.z.se,
-                          Bzy.ci.lower.amelia.full = est.z.mi - 1.96 * est.z.se
-                          ))
+    result <- append(result, amelia_result)
+
 
+   mle_result <- list(Bxy.est.mle = NULL,
+                      Bxy.ci.upper.mle = NULL,
+                      Bxy.ci.lower.mle = NULL,
+                      Bzy.est.mle = NULL,
+                      Bzy.ci.upper.mle = NULL,
+                      Bzy.ci.lower.mle = NULL)
 
+    tryCatch({
         temp.df <- copy(df)
         temp.df <- temp.df[,x:=x.obs]
-        mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula)
-
-    ## tryCatch({
-    ## mod.calibrated.mle <- mecor(y ~ MeasError(w_pred, reference = x.obs) + z, df, B=400, method='efficient')
-    ## (mod.calibrated.mle)
-    ## (mecor.ci <- summary(mod.calibrated.mle)$c$ci['x.obs',])
-    ## result <- append(result, list(
-    ##                              Bxy.est.mecor = mecor.ci['Estimate'],
-    ##                              Bxy.ci.upper.mecor = mecor.ci['UCI'],
-    ##                              Bxy.ci.lower.mecor = mecor.ci['LCI'])
-    ##                  )
-
-
-
-    fischer.info <- NA
-    ci.upper <- NA
-    ci.lower <- NA
-
-    tryCatch({fischer.info <- solve(mod.caroll.lik$hessian)
-        ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96
-        ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96
+        if(confint_method=='quad'){
+            mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='optim')
+            fischer.info <- solve(mod.caroll.lik$hessian)
+            coef <- mod.caroll.lik$par
+            ci.upper <- coef + sqrt(diag(fischer.info)) * 1.96
+            ci.lower <- coef - sqrt(diag(fischer.info)) * 1.96
+        } else { # confint_method == 'bbmle'
+
+            mod.caroll.lik <- measerr_mle(temp.df, outcome_formula=outcome_formula, proxy_formula=proxy_formula, truth_formula=truth_formula, method='bbmle')
+            coef <- coef(mod.caroll.lik)
+            ci <- confint(mod.caroll.lik, method='spline')
+            ci.lower <- ci[,'2.5 %']
+            ci.upper <- ci[,'97.5 %']
+        }
+        mle_result <- list(Bxy.est.mle = coef['x'],
+                           Bxy.ci.upper.mle = ci.upper['x'],
+                           Bxy.ci.lower.mle = ci.lower['x'],
+                           Bzy.est.mle = coef['z'],
+                           Bzy.ci.upper.mle = ci.upper['z'],
+                           Bzy.ci.lower.mle = ci.lower['z'])
     },
 
     error=function(e) {result[['error']] <- as.character(e)
     })
 
-    coef <- mod.caroll.lik$par
         
-        result <- append(result,
-                         list(Bxy.est.mle = coef['x'],
-                              Bxy.ci.upper.mle = ci.upper['x'],
-                              Bxy.ci.lower.mle = ci.lower['x'],
-                              Bzy.est.mle = coef['z'],
-                              Bzy.ci.upper.mle = ci.upper['z'],
-                              Bzy.ci.lower.mle = ci.lower['z']))
-
-        mod.zhang.lik <- zhang.mle.iv(df)
-        coef <- coef(mod.zhang.lik)
-        ci <- confint(mod.zhang.lik,method='quad')
-        result <- append(result,
-                         list(Bxy.est.zhang = coef['Bxy'],
-                              Bxy.ci.upper.zhang = ci['Bxy','97.5 %'],
-                              Bxy.ci.lower.zhang = ci['Bxy','2.5 %'],
-                              Bzy.est.zhang = coef['Bzy'],
-                              Bzy.ci.upper.zhang = ci['Bzy','97.5 %'],
-                              Bzy.ci.lower.zhang = ci['Bzy','2.5 %']))
+    result <- append(result, mle_result)
+
+    mod.zhang.lik <- zhang.mle.iv(df)
+    coef <- coef(mod.zhang.lik)
+    ci <- confint(mod.zhang.lik,method='quad')
+    result <- append(result,
+                     list(Bxy.est.zhang = coef['Bxy'],
+                          Bxy.ci.upper.zhang = ci['Bxy','97.5 %'],
+                          Bxy.ci.lower.zhang = ci['Bxy','2.5 %'],
+                          Bzy.est.zhang = coef['Bzy'],
+                          Bzy.ci.upper.zhang = ci['Bzy','97.5 %'],
+                          Bzy.ci.lower.zhang = ci['Bzy','2.5 %']))
 
     ## What if we can't observe k -- most realistic scenario. We can't include all the ML features in a model.
     ## amelia.out.nok <- amelia(df, m=200, p2s=0, idvars=c("x","w_pred"), noms=noms)

Community Data Science Collective || Want to submit a patch?