From: chainsawriot Date: Tue, 26 Jul 2022 13:21:52 +0000 (+0200) Subject: Add simulation code of IRR X-Git-Url: https://code.communitydata.science/ml_measurement_error_public.git/commitdiff_plain/a02bcbb1d4c7af0ff8e0dbba63ea31935ffb4a45?ds=inline Add simulation code of IRR --- diff --git a/irr/irr.R b/irr/irr.R new file mode 100644 index 0000000..5af632e --- /dev/null +++ b/irr/irr.R @@ -0,0 +1,43 @@ +require(tibble) +require(purrr) + +.emulate_coding <- function(ground_truth, Q = 1) { + if (runif(1) > Q) { + return(sample(c(1,0), 1)) + } else { + return(ground_truth) + } +} + +##irr::kripp.alpha(matrix(c(obs_x, obs_x2), nrow = 2, byrow = TRUE), method = "nominal") +### Which is very close to +## cor(obs_x, obs_x2) + +.sim <- function(N = 100, P = 0.5, Q = 0.8) { + real_x <- rbinom(N, 1, P) + obs_x <- purrr::map_dbl(real_x, .emulate_coding, Q = Q) +### then learn w from obs_x and k + obs_x2 <- purrr::map_dbl(real_x, .emulate_coding, Q = Q) + ra <- sum(diag(table(obs_x, obs_x2))) / N ## raw agreement + rr <- cor(obs_x, obs_x2) + irr <- irr::kripp.alpha(matrix(c(obs_x, obs_x2), nrow = 2, byrow = TRUE), method = "nominal")$value + return(data.frame(N, P, Q, ra, rr, irr)) +} + +N <- c(50, 100, 300) +P <- c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9) +Q <- c(0.5, 0.6, 0.7, 0.8, 0.9, 1) +conditions <- tibble::tibble(expand.grid(N, P, Q)) +colnames(conditions) <- c("N", "P", "Q") +res <- list() + +for (i in seq_len(nrow(conditions))) { + print(i) + res[[i]] <- purrr::map_dfr(rep(NA, 100), ~ .sim(conditions$N[i], conditions$P[i], conditions$Q[i])) +} + +conditions$res <- res + +require(dplyr) + +conditions %>% mutate(mra = purrr::map_dbl(res, ~mean(.$ra, na.rm = TRUE)), mrr = purrr::map_dbl(res, ~mean(.$rr, na.rm = TRUE)), mirr = purrr::map_dbl(res, ~mean(.$irr, na.rm = TRUE))) %>% lm(mirr~0+P+poly(Q, 2), data =.) %>% summary