]> code.communitydata.science - ml_measurement_error_public.git/blob - irr/irr.R
Add simulation code of IRR
[ml_measurement_error_public.git] / irr / irr.R
1 require(tibble)
2 require(purrr)
3
4 .emulate_coding <- function(ground_truth, Q = 1) {
5     if (runif(1) > Q) {
6         return(sample(c(1,0), 1))
7     } else {
8         return(ground_truth)
9     }
10 }
11
12 ##irr::kripp.alpha(matrix(c(obs_x, obs_x2), nrow = 2, byrow = TRUE), method = "nominal")
13 ### Which is very close to
14 ## cor(obs_x, obs_x2)
15
16 .sim <- function(N = 100, P = 0.5, Q = 0.8) {
17     real_x <- rbinom(N, 1, P)
18     obs_x <- purrr::map_dbl(real_x, .emulate_coding, Q = Q)
19 ### then learn w from obs_x and k
20     obs_x2 <- purrr::map_dbl(real_x, .emulate_coding, Q = Q)
21     ra <- sum(diag(table(obs_x, obs_x2))) / N ## raw agreement
22     rr <- cor(obs_x, obs_x2)
23     irr <- irr::kripp.alpha(matrix(c(obs_x, obs_x2), nrow = 2, byrow = TRUE), method = "nominal")$value
24     return(data.frame(N, P, Q, ra, rr, irr))
25 }
26
27 N <- c(50, 100, 300)
28 P <- c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
29 Q <- c(0.5, 0.6, 0.7, 0.8, 0.9, 1)
30 conditions <- tibble::tibble(expand.grid(N, P, Q))
31 colnames(conditions) <- c("N", "P", "Q")
32 res <- list()
33
34 for (i in seq_len(nrow(conditions))) {
35     print(i)
36     res[[i]] <- purrr::map_dfr(rep(NA, 100), ~ .sim(conditions$N[i], conditions$P[i], conditions$Q[i]))
37 }
38
39 conditions$res <- res
40
41 require(dplyr)
42
43 conditions %>% mutate(mra = purrr::map_dbl(res, ~mean(.$ra, na.rm = TRUE)), mrr = purrr::map_dbl(res, ~mean(.$rr, na.rm = TRUE)), mirr = purrr::map_dbl(res, ~mean(.$irr, na.rm = TRUE))) %>% lm(mirr~0+P+poly(Q, 2), data =.) %>% summary

Community Data Science Collective || Want to submit a patch?