From: chainsawriot Date: Tue, 26 Jul 2022 13:26:33 +0000 (+0200) Subject: Add simulation using simex X-Git-Url: https://code.communitydata.science/ml_measurement_error_public.git/commitdiff_plain/6b19a3946446e798a3238d096bb33c50eb6dd954?ds=sidebyside;hp=-c Add simulation using simex And there is a great potential for overestimating the true `Bxy` --- 6b19a3946446e798a3238d096bb33c50eb6dd954 diff --git a/irr/simex_sim.R b/irr/simex_sim.R new file mode 100644 index 0000000..521e249 --- /dev/null +++ b/irr/simex_sim.R @@ -0,0 +1,56 @@ +##install.packages(c("purrr", "simex", "irr")) + +.emulate_coding <- function(ground_truth, Q = 1) { + if (runif(1) > Q) { + return(sample(c(0, 1), size = 1, replace = TRUE)) + } else { + return(ground_truth) + } +} + +distort_gt <- function(x, Q = NULL) { + return(purrr::map_dbl(x, .emulate_coding, Q = Q)) +} + +N <- c(1000, 3600, 14400) +m <- c(75, 150, 300) + +B0 <- c(0, 0.1, 0.3) +Bxy <- c(0.1, 0.2, 0.5) + +Q <- c(.6, .8, .9) + +conditions <- expand.grid(N, m, B0, Bxy, Q) + +logistic <- function(x) {1/(1+exp(-1*x))} + +.step <- function(Bxy, B0, Q, N, m) { + x <- rbinom(N, 1, 0.5) + y <- Bxy * x + rnorm(N, 0, .5) + B0 + + dx <- as.numeric(distort_gt(x, Q = Q)) + + randomx <- sample(x, m) + coder1x <- distort_gt(randomx, Q = Q) + coder2x <- distort_gt(randomx, Q = Q) + coding_data <- matrix(c(as.numeric(coder1x), as.numeric(coder2x)), nrow = 2, byrow = TRUE) + alpha <- irr::kripp.alpha(coding_data, method = "nominal") + estimated_q <- alpha$value^(1/2) + estimated_q2 <- alpha$value + + res <- data.frame(x = as.factor(x), y = y, dx = as.factor(dx)) + + naive_mod <- glm(y~dx, data = res, x = TRUE, y = TRUE) + real_mod <- glm(y~x, data = res, x = TRUE, y = TRUE) + + px <- matrix(c(estimated_q, 1-estimated_q, 1-estimated_q, estimated_q), nrow = 2) + colnames(px) <- levels(res$dx) + corrected_mod <- simex::mcsimex(naive_mod, SIMEXvariable = "dx", mc.matrix = px, jackknife.estimation = FALSE, B = 300) + px2 <- matrix(c(estimated_q2, 1-estimated_q2, 1-estimated_q2, estimated_q2), nrow = 2) + colnames(px2) <- levels(res$dx) + corrected_mod2 <- simex::mcsimex(naive_mod, SIMEXvariable = "dx", mc.matrix = px2, jackknife.estimation = FALSE, B = 300) + + return(tibble::tibble(N, m, Q, Bxy, B0, estimated_q, naive_Bxy = as.numeric(coef(naive_mod)[2]), real_Bxy = as.numeric(coef(real_mod)[2]), corrected_Bxy = coef(corrected_mod)[2], corrected_Bxy2 = coef(corrected_mod2)[2])) +} + +## res <- .step(0.2, 0, 0.8, N = 1000, m = 100)