X-Git-Url: https://code.communitydata.science/ml_measurement_error_public.git/blobdiff_plain/3d1964b806106d76f13301f0cf6dccf35cd7d66c..82fe7b0f482a71c95e8ae99f7e6d37b79357506a:/simulations/pl_methods.R diff --git a/simulations/pl_methods.R b/simulations/pl_methods.R new file mode 100644 index 0000000..b3007d1 --- /dev/null +++ b/simulations/pl_methods.R @@ -0,0 +1,84 @@ +library(stats4) +library(bbmle) +library(matrixStats) + +zhang.mle.dv <- function(df){ + df.obs <- df[!is.na(y.obs)] + df.unobs <- df[is.na(y.obs)] + + fp <- df.obs[(w_pred==1) & (y.obs != w_pred),.N] + tn <- df.obs[(w_pred == 0) & (y.obs == w_pred),.N] + fpr <- fp / (fp+tn) + + fn <- df.obs[(w_pred==0) & (y.obs != w_pred), .N] + tp <- df.obs[(w_pred==1) & (y.obs == w_pred),.N] + fnr <- fn / (fn+tp) + + nll <- function(B0=0, Bxy=0, Bzy=0){ + + + ## observed case + ll.y.obs <- vector(mode='numeric', length=nrow(df.obs)) + ll.y.obs[df.obs$y.obs==1] <- with(df.obs[y.obs==1], plogis(B0 + Bxy * x + Bzy * z,log=T)) + ll.y.obs[df.obs$y.obs==0] <- with(df.obs[y.obs==0], plogis(B0 + Bxy * x + Bzy * z,log=T,lower.tail=FALSE)) + + ll <- sum(ll.y.obs) + + pi.y.1 <- with(df.unobs,plogis(B0 + Bxy * x + Bzy*z, log=T)) + #pi.y.0 <- with(df.unobs,plogis(B0 + Bxy * x + Bzy*z, log=T,lower.tail=FALSE)) + + lls <- with(df.unobs, colLogSumExps(rbind(w_pred * colLogSumExps(rbind(log(fpr), log(1 - fnr - fpr)+pi.y.1)), + (1-w_pred) * (log(1-fpr) - exp(log(1-fnr-fpr)+pi.y.1))))) + + ll <- ll + sum(lls) + print(paste0(B0,Bxy,Bzy)) + print(ll) + return(-ll) + } + mlefit <- mle2(minuslogl = nll, control=list(maxit=1e6),method='L-BFGS-B',lower=c(B0=-Inf, Bxy=-Inf, Bzy=-Inf), + upper=c(B0=Inf, Bxy=Inf, Bzy=Inf)) + return(mlefit) +} + + +## model from Zhang's arxiv paper, with predictions for y +## Zhang got this model from Hausman 1998 +zhang.mle.iv <- function(df){ + df.obs <- df[!is.na(x.obs)] + df.unobs <- df[is.na(x.obs)] + + tn <- df.obs[(w_pred == 0) & (x.obs == w_pred),.N] + fn <- df.obs[(w_pred==0) & (x.obs==1), .N] + npv <- tn / (tn + fn) + + tp <- df.obs[(w_pred==1) & (x.obs == w_pred),.N] + fp <- df.obs[(w_pred==1) & (x.obs == 0),.N] + ppv <- tp / (tp + fp) + + nll <- function(B0=0, Bxy=0, Bzy=0, sigma_y=0.1){ + + ## fpr = 1 - TNR + ### Problem: accounting for uncertainty in ppv / npv + + ## fnr = 1 - TPR + ll.y.obs <- with(df.obs, dnorm(y, B0 + Bxy * x + Bzy * z, sd=sigma_y,log=T)) + ll <- sum(ll.y.obs) + + # unobserved case; integrate out x + ll.x.1 <- with(df.unobs, dnorm(y, B0 + Bxy + Bzy * z, sd = sigma_y, log=T)) + ll.x.0 <- with(df.unobs, dnorm(y, B0 + Bzy * z, sd = sigma_y,log=T)) + + ## case x == 1 + lls.x.1 <- colLogSumExps(rbind(log(ppv) + ll.x.1, log(1-ppv) + ll.x.0)) + + ## case x == 0 + lls.x.0 <- colLogSumExps(rbind(log(1-npv) + ll.x.1, log(npv) + ll.x.0)) + + lls <- colLogSumExps(rbind(df.unobs$w_pred * lls.x.1, (1-df.unobs$w_pred) * lls.x.0)) + ll <- ll + sum(lls) + return(-ll) + } + mlefit <- mle2(minuslogl = nll, control=list(maxit=1e6), lower=list(sigma_y=0.0001, B0=-Inf, Bxy=-Inf, Bzy=-Inf), + upper=list(sigma_y=Inf, B0=Inf, Bxy=Inf, Bzy=Inf),method='L-BFGS-B') + return(mlefit) +}