X-Git-Url: https://code.communitydata.science/coldcallbot-discord.git/blobdiff_plain/3955a6bfcc0bd424fcf069f05d866c75315ee16c..743e0a39f3f56beab45e22845cd5117a5e316506:/data/track_participation.R diff --git a/data/track_participation.R b/data/track_participation.R index 8bce2e5..9a51084 100644 --- a/data/track_participation.R +++ b/data/track_participation.R @@ -1,15 +1,17 @@ library(ggplot2) +library(data.table) gs <- read.delim("student_information.tsv") d <- gs[,c(2,5)] colnames(d) <- c("student.num", "discord.name") -call.list <- do.call("rbind", lapply(list.files(".", pattern="^call_list-.*tsv$"), function (x) {read.delim(x)[,1:3]})) +call.list <- do.call("rbind", lapply(list.files(".", pattern="^call_list-.*tsv$"), function (x) {read.delim(x)[,1:4]})) colnames(call.list) <- gsub("_", ".", colnames(call.list)) call.list$day <- as.Date(call.list$timestamp) ## drop calls where the person wasn't present +call.list.full <- call.list call.list[!call.list$answered,] call.list <- call.list[call.list$answered,] @@ -23,7 +25,6 @@ d$num.calls[is.na(d$num.calls)] <- 0 attendance <- unlist(lapply(list.files(".", pattern="^attendance-.*tsv$"), function (x) {d <- read.delim(x); strsplit(d[[2]], ",")})) - file.to.attendance.list <- function (x) { tmp <- read.delim(x) d.out <- data.frame(discord.name=unlist(strsplit(tmp[[2]], ","))) @@ -35,6 +36,22 @@ attendance <- do.call("rbind", lapply(list.files(".", pattern="^attendance-.*tsv$"), file.to.attendance.list)) +## create list of folks who are missing in class +missing.in.class <- call.list.full[is.na(call.list.full$answered) | + (!is.na(call.list.full$answered) & !call.list.full$answered), + c("discord.name", "day")] + +missing.in.class <- unique(missing.in.class) + +setDT(attendance) +setkey(attendance, discord.name, day) +setDT(missing.in.class) +setkey(missing.in.class, discord.name, day) + +## drop presence for people on missing days +attendance[missing.in.class,] +attendance <- as.data.frame(attendance[!missing.in.class,]) + attendance.counts <- data.frame(table(attendance$discord.name)) colnames(attendance.counts) <- c("discord.name", "num.present") @@ -43,9 +60,10 @@ d <- merge(d, attendance.counts, by="discord.name") days.list <- lapply(unique(attendance$day), function (day) { - day.total <- table(call.list$day == day)[["TRUE"]] + day.total <- table(call.list.full$day == day)[["TRUE"]] lapply(d$discord.name, function (discord.name) { num.present <- nrow(attendance[attendance$day == day & attendance$discord.name == discord.name,]) + if (num.present/day.total > 1) {print(day)} data.frame(discord.name=discord.name, days.present=(num.present/day.total)) }) @@ -61,6 +79,7 @@ attendance.days <- data.frame(discord.name=names(days.tbl), d <- merge(d, attendance.days, all.x=TRUE, all.y=TRUE, by="discord.name") + d[sort.list(d$days.absent), c("discord.name", "num.calls", "days.absent")] ## make some visualizations of whose here/not here