From: Benjamin Mako Hill <mako@atdot.cc>
Date: Sun, 6 Mar 2022 04:51:15 +0000 (-0800)
Subject: code to create final case discussion grades
X-Git-Url: https://code.communitydata.science/coldcallbot-discord.git/commitdiff_plain/78ac188f0487ba413244246181ad90b9a73451d8?ds=sidebyside

code to create final case discussion grades

This still needs to be check over but this is new code to build the
final grades. Current threshold for minimum questions comes from 1000
simulated classes (simulation.R).
---

diff --git a/assessment_and_tracking/compute_final_case_grades.R b/assessment_and_tracking/compute_final_case_grades.R
index b26270b..22dae47 100644
--- a/assessment_and_tracking/compute_final_case_grades.R
+++ b/assessment_and_tracking/compute_final_case_grades.R
@@ -1,72 +1,147 @@
 ## load in the data
 #################################
+myuw <- read.csv("../data/2022_winter_COM_481_A_students.csv", stringsAsFactors=FALSE)
 
-myuw <- read.csv("myuw-COMMLD_570_A_spring_2021_students.csv", stringsAsFactors=FALSE)
+current.dir <- getwd()
+source("../assessment_and_tracking/track_participation.R")
+setwd(current.dir)
+
+rownames(d) <- d$unique.name
+call.list$timestamp <- as.Date(call.list$timestamp)
 
 ## class-level variables
-question.grades <- c("GOOD"=100, "FAIR"=100-(50/3.3), "WEAK"=100-(50/(3.3)*2))
+question.grades <- c("PLUS"=100, "CHECK"=100-(50/3.3), "MINUS"=100-(50/(3.3)*2))
+missed.question.penalty <- (50/3.3) * 0.2 ## 1/5 of a full point on the GPA scale
 
-source("../assessment_and_tracking/track_participation.R")
-setwd("case_grades")
+## inspect set the absence threashold
+ggplot(d) + aes(x=absences) + geom_histogram(binwidth=1, fill="white",color="black")
+## absence.threshold <- median(d$absences)
+absence.threshold <- 4 ## TODO talk about this
 
-rownames(d) <- d$unique.name
+## inspect and set the questions cutoff
+## questions.cutoff <- median(d$num.calls)
+## median(d$num.calls)
+## questions.cutoff <- nrow(call.list) / nrow(d) ## TODO talk about this
+## first these are the people were were not called simply because they got unlucky
+
+ ## this is the 95% percentile based on simulation in simulation.R
+questions.cutoff <- 4
 
 ## show the distribution of assessments
 table(call.list$assessment)
 prop.table(table(call.list$assessment))
-table(call.list$answered)
-prop.table(table(call.list$answered))
+
+table(call.list.full$answered)
+prop.table(table(call.list.full$answered))
 
 total.questions.asked <- nrow(call.list)
 
-## generate grades
+## find out how man questions folks have present/absent for
 ##########################################################
+calls.per.day <- data.frame(day=as.Date(names(table(call.list$timestamp))),
+                            questions.asked=as.numeric(table(call.list$timestamp)))
+
+## function to return the numbers of calls present for or zero if they
+## were absent
+calls.for.student.day <- function (day, student.id) {
+    if (any(absence$unique.name == student.id & absence$date.absent == day)) {
+        return(0)
+    } else {
+        return(calls.per.day$questions.asked[calls.per.day$day == day])
+    }
+}
+
+compute.questions.present.for.student <- function (student.id) {
+    sum(unlist(lapply(unique(calls.per.day$day), calls.for.student.day, student.id)))
+}
 
-d$part.grade <- NA
+## create new column with number of questions present
+d$q.present <- unlist(lapply(d$unique.name, compute.questions.present.for.student))
+d$prop.asked <- d$num.calls / d$q.present
+
+## generate statistics using these new variables
+prop.asks.quantiles <- quantile(d$prop.asked, probs=seq(0,1, 0.01))
+prop.asks.quantiles <- prop.asks.quantiles[!duplicated(prop.asks.quantiles)]
+
+d$prop.asked.quant <- cut(d$prop.asked, right=FALSE, breaks=c(prop.asks.quantiles, 1),
+    labels=names(prop.asks.quantiles)[1:(length(prop.asks.quantiles))])
+
+## generate grades
+##########################################################
 
 ## print the median number of questions for (a) everybody and (b)
 ## people that have been present 75% of the time
 median(d$num.calls)
 
-questions.cutoff <- median(d$num.calls)
-
 ## helper function to generate average grade minus number of missing
 gen.part.grade <- function (x.unique.name) {
     q.scores <- question.grades[call.list$assessment[call.list$unique.name == x.unique.name]]
     base.score <- mean(q.scores, na.rm=TRUE)
 
     ## number of missing days
-    # missing.days <- nrow(missing.in.class[missing.in.class$unique.name == x.unique.name,])
+    missing.in.class.days <- nrow(missing.in.class[missing.in.class$unique.name == x.unique.name,])
 
     ## return the final score
     data.frame(unique.name=x.unique.name,
-               part.grade=(base.score))
+               base.grade=base.score,
+               missing.in.class.days=missing.in.class.days)
 }
 
 
+## create the base grades which do NOT include missing questions
 tmp <- do.call("rbind", lapply(d$unique.name, gen.part.grade))
+d <- merge(d, tmp)
+rownames(d) <- d$unique.name
+
+## apply the penality for number of days we called on them and they were gone
+d$part.grade <- d$base.grade - d$missing.in.class.days * missed.question.penalty
+d$part.grade.orig <- d$part.grade
+
+## first we handle the zeros
+## step 1: first double check the people who have zeros and ensure that they didn't "just" get unlucky"
+d[d$num.calls == 0,]
+
+## set those people to 0 :(
+d[d$num.calls == 0]
+d$part.grade[d$num.calls == 0] <- 0
 
-d[as.character(tmp$unique.name), "part.grade"] <- tmp$part.grade
+## step 2: identify the people who were were not asked "enough" questions but were unlucky/lucky
+## penalized.unique.names <- d$unique.name[d$num.calls < median(d$num.calls) & d$absences > median(d$absences)]
 
-## generate the baseline participation grades as per the process above
+## first these are the people were were not called simply because they got unlucky
+d[d$num.calls < questions.cutoff & d$absences < absence.threshold,]
 
+## first these are the people were were not called simply because they got unlucky
+penalized.unique.names <- d$unique.name[d$num.calls < questions.cutoff & d$absences > absence.threshold]
+d[d$unique.name %in% penalized.unique.names,]
+
+## now add "zeros" for every questions that is below the normal
+d[as.character(penalized.unique.names),"part.grade"] <- ((
+    (questions.cutoff - d[as.character(penalized.unique.names),"num.calls"] * 0) +
+    (d[as.character(penalized.unique.names),"num.calls"] * d[as.character(penalized.unique.names),"part.grade"]) )
+    / questions.cutoff)
+
+d[as.character(penalized.unique.names),]
+
+## TODO ensure this is right. i think it is
 ## map part grades back to 4.0 letter scale and points
-d$part.4point <-round((d$part.grade / (50/3.3)) - 2.6, 2)
+d$part.4point <- round((d$part.grade / (50/3.3)) - 2.6, 2)
 
-d[sort.list(d$part.4point),]
+d[sort.list(d$part.4point, decreasing=TRUE),
+  c("unique.name", "short.name", "num.calls", "absences", "part.4point")]
 
 
-## writing out data
+## writing out data to CSV
 d.print <- merge(d, myuw[,c("StudentNo", "FirstName", "LastName", "UWNetID")],
-           by.x="student.num", by.y="StudentNo")
-write.csv(d.print, file="final_participation_grades.csv")
-
-## library(rmarkdown)
-
-## for (x.unique.name in d$unique.name) {
-##     render(input="../../assessment_and_tracking/student_report_template.Rmd",
-##            output_format="html_document",
-##            output_file=paste("../data/case_grades/student_reports/",
-##                              d.print$UWNetID[d.print$unique.name == x.unique.name],
-##                              sep=""))
-## }
+                 by.x="unique.name", by.y="StudentNo")
+write.csv(d.print, file="../data/final_participation_grades.csv")
+
+library(rmarkdown)
+
+for (id in d$unique.name) {
+    render(input="student_report_template.Rmd",
+           output_format="html_document",
+           output_file=paste("../data/case_grades/",
+                             d.print$unique.name[d.print$unique.name == id],
+                             sep=""))
+}
diff --git a/assessment_and_tracking/simulation.R b/assessment_and_tracking/simulation.R
new file mode 100644
index 0000000..7134bef
--- /dev/null
+++ b/assessment_and_tracking/simulation.R
@@ -0,0 +1,24 @@
+weight.fac <- 2
+num.calls <- 373
+num.students <- 76
+
+gen.calls.per.students <- function (x) {
+    raw.weights <<- rep(1, num.students)
+    names(raw.weights) <- seq(1, num.students)
+
+    table(sapply(1:num.calls, function (i) {
+        probs <- raw.weights / sum(raw.weights)
+        selected <- sample(names(raw.weights), 1, prob=probs)
+        ## update the raw.weights
+        raw.weights[selected] <<- raw.weights[selected] / weight.fac
+                                        #print(raw.weights)
+        return(selected)
+    }))
+}
+
+
+simulated.call.list <- unlist(lapply(1:1000, gen.calls.per.students))
+hist(simulated.call.list)
+
+quantile(simulated.call.list, probs=seq(0,1,by=0.01))
+quantile(simulated.call.list, probs=0.05)
diff --git a/assessment_and_tracking/student_report_template.Rmd b/assessment_and_tracking/student_report_template.Rmd
index a0b2145..866b1e0 100644
--- a/assessment_and_tracking/student_report_template.Rmd
+++ b/assessment_and_tracking/student_report_template.Rmd
@@ -1,22 +1,19 @@
-**Student Name:** `r paste(d.print[d.print$discord.name == x.discord.name, c("FirstName", "LastName")])`
+**Student Name:** `r paste(d.print[d.print$unique.name == id, c("LastName", "FirstName")])` (`r id`)
 
-**Discord Name:** `r d.print[d.print$discord.name == x.discord.name, c("discord.name")]`
+**Participation grade:** `r d.print$part.4point[d.print$unique.name == id]`
 
-**Participation grade:** `r d.print$part.4point[d.print$discord.name == x.discord.name]`
+**Questions asked:** `r d.print[d$unique.name == id, "num.calls"]`
 
-**Questions asked:** `r d.print[d$discord.name == x.discord.name, "prev.questions"]`
+**Days Absent:** `r d.print[d.print$unique.name == id, "absences"]` / `r length(unique(as.Date(unique(call.list$timestamp))))`
 
-**Days Absent:** `r d.print[d.print$discord.name == x.discord.name, "days.absent"]` / `r case.sessions`
+**Missing in class days:** `r d.print[d$unique.name == id, "missing.in.class.days"]` (base grade lowered by 0.2 per day)
 
 **List of questions:**
 
 ```{r echo=FALSE}
-call.list[call.list$discord.name == x.discord.name,]
+call.list[call.list$unique.name == id,]
 ```
 
-**Luckiness:** `r d.print[d.print$discord.name == x.discord.name, "prop.asked.quant"]`
-
-If you a student has a luckiness over 50% that means that they were helped by the weighting of the system and/or got lucky. We did not penalize *any* students with a luckiness under 50% for absences.