-### EXAMPLE 2_b: demonstrates how measurement error can lead to a type sign error in a covariate
+### EXAMPLE 1: demonstrates how measurement error can lead to a type sign error in a covariate
### What kind of data invalidates fong + tyler?
-### This is the same as example 2, only instead of x->k we have k->x.
### Even when you have a good predictor, if it's biased against a covariate you can get the wrong sign.
### Even when you include the proxy variable in the regression.
### But with some ground truth and multiple imputation, you can fix it.
## one way to do it is by adding correlation to x.obs and y that isn't in w.
## in other words, the model is missing an important feature of x.obs that's related to y.
-simulate_latent_cocause <- function(N, m, B0, Bxy, Bgy, Bkx, Bgx, seed){
+simulate_data <- function(N, m, B0, Bxy, Bgy, Bkx, Bgx, seed, xy.explained.variance=0.01, u.explained.variance=0.1){
set.seed(seed)
## the true value of x
g <- rbinom(N, 1, 0.5)
# make w and y dependent
- u <- rnorm(N,0,Bxy)
+ u <- rnorm(N,0,)
xprime <- Bgx * g + rnorm(N,0,1)
x <- as.integer(logistic(scale(xprime)) > 0.5)
- y <- Bxy * x + Bgy * g + rnorm(N, 0, 1) + B0 + u
+ y <- Bxy * x + Bgy * g + B0 + u + rnorm(N, 0, 1)
df <- data.table(x=x,k=k,y=y,g=g)
return(df)
}
-## simulate_latent_cocause_2 <- function(N, m, B0, Bxy, Bgy, Bkx, Bgx, seed){
-## set.seed(seed)
-
-## ## the true value of x
-
-## g <- rbinom(N, 1, 0.5)
-
-## # make w and y dependent
-## u <- rnorm(N,0,5)
-
-## xprime <- Bgx * g + rnorm(N,0,1)
-
-## k <- Bkx*xprime + rnorm(N,0,3)
-
-## x <- as.integer(logistic(scale(xprime+0.3)) > 0.5)
-
-## y <- Bxy * x + Bgy * g + rnorm(N, 0, 1) + B0 + u
-
-## df <- data.table(x=x,k=k,y=y,g=g)
-
-## w.model <- glm(x ~ k, df, family=binomial(link='logit'))
-
-## if( m < N){
-## df <- df[sample(nrow(df), m), x.obs := x]
-## } else {
-## df <- df[, x.obs := x]
-## }
-
-## w <- predict(w.model,data.frame(k=k)) + u
-## ## y = B0 + B1x + e
-
-## df[,':='(w=w, w_pred = as.integer(w>0.5),u=u)]
-## return(df)
-## }
-
-
schennach <- function(df){
fwx <- glm(x.obs~w, df, family=binomial(link='logit'))
Bgx <- 0
-outline <- run_simulation(simulate_latent_cocause(args$N, args$m, B0, Bxy, Bgy, Bkx, Bgx, args$seed)
+outline <- run_simulation(simulate_data(args$N, args$m, B0, Bxy, Bgy, Bkx, Bgx, args$seed)
,list('N'=args$N,'m'=args$m,'B0'=B0,'Bxy'=Bxy,'Bgy'=Bgy, 'Bkx'=Bkx, 'Bgx'=Bgx, 'seed'=args$seed))
outfile_lock <- lock(paste0(args$outfile, '_lock'),exclusive=TRUE)