## load in the data
#################################
+myuw <- read.csv("../data/2022_winter_COM_481_A_students.csv", stringsAsFactors=FALSE)
-myuw <- read.csv("myuw-COMMLD_570_A_spring_2021_students.csv", stringsAsFactors=FALSE)
+current.dir <- getwd()
+source("../assessment_and_tracking/track_participation.R")
+setwd(current.dir)
+
+rownames(d) <- d$unique.name
+call.list$timestamp <- as.Date(call.list$timestamp)
## class-level variables
-question.grades <- c("GOOD"=100, "FAIR"=100-(50/3.3), "WEAK"=100-(50/(3.3)*2))
+gpa.point.value <- 50/(4 - 0.7)
+question.grades <- c("PLUS"=100, "CHECK"=100-gpa.point.value, "MINUS"=100-(gpa.point.value*2))
+missed.question.penalty <- gpa.point.value * 0.2 ## 1/5 of a full point on the GPA scale
-source("../assessment_and_tracking/track_participation.R")
-setwd("case_grades")
+## inspect set the absence threashold
+ggplot(d) + aes(x=absences) + geom_histogram(binwidth=1, fill="white",color="black")
+absence.threshold <- median(d$absences)
-rownames(d) <- d$unique.name
+
+## inspect and set the questions cutoff
+## questions.cutoff <- median(d$num.calls)
+## median(d$num.calls)
+## questions.cutoff <- nrow(call.list) / nrow(d) ## TODO talk about this
+## this is the 95% percentile based on simulation in simulation.R
+questions.cutoff <- 4
## show the distribution of assessments
table(call.list$assessment)
prop.table(table(call.list$assessment))
-table(call.list$answered)
-prop.table(table(call.list$answered))
+
+table(call.list.full$answered)
+prop.table(table(call.list.full$answered))
total.questions.asked <- nrow(call.list)
-## generate grades
-##########################################################
+## find out how man questions folks have present/absent for.
+##
+## NOTE: this is currently only for informational purposes and is NOT
+## being used to compute grants in any way.
+########################################################################
+calls.per.day <- data.frame(day=as.Date(names(table(call.list$timestamp))),
+ questions.asked=as.numeric(table(call.list$timestamp)))
+
+## function to return the numbers of calls present for or zero if they
+## were absent
+calls.for.student.day <- function (day, student.id) {
+ if (any(absence$unique.name == student.id & absence$date.absent == day)) {
+ return(0)
+ } else {
+ return(calls.per.day$questions.asked[calls.per.day$day == day])
+ }
+}
+
+compute.questions.present.for.student <- function (student.id) {
+ sum(unlist(lapply(unique(calls.per.day$day), calls.for.student.day, student.id)))
+}
-d$part.grade <- NA
+## create new column with number of questions present
+d$q.present <- unlist(lapply(d$unique.name, compute.questions.present.for.student))
+d$prop.asked <- d$num.calls / d$q.present
+
+## generate statistics using these new variables
+prop.asks.quantiles <- quantile(d$prop.asked, probs=seq(0,1, 0.01))
+prop.asks.quantiles <- prop.asks.quantiles[!duplicated(prop.asks.quantiles)]
+
+d$prop.asked.quant <- cut(d$prop.asked, right=FALSE, breaks=c(prop.asks.quantiles, 1),
+ labels=names(prop.asks.quantiles)[1:(length(prop.asks.quantiles))])
+
+## generate grades
+########################################################################
## print the median number of questions for (a) everybody and (b)
## people that have been present 75% of the time
median(d$num.calls)
-questions.cutoff <- median(d$num.calls)
-
## helper function to generate average grade minus number of missing
gen.part.grade <- function (x.unique.name) {
q.scores <- question.grades[call.list$assessment[call.list$unique.name == x.unique.name]]
base.score <- mean(q.scores, na.rm=TRUE)
## number of missing days
- # missing.days <- nrow(missing.in.class[missing.in.class$unique.name == x.unique.name,])
+ missing.in.class.days <- nrow(missing.in.class[missing.in.class$unique.name == x.unique.name,])
## return the final score
data.frame(unique.name=x.unique.name,
- part.grade=(base.score))
+ base.grade=base.score,
+ missing.in.class.days=missing.in.class.days)
}
+## create the base grades which do NOT include missing questions
tmp <- do.call("rbind", lapply(d$unique.name, gen.part.grade))
+d <- merge(d, tmp)
+rownames(d) <- d$unique.name
+d$part.grade <- d$base.grade
-d[as.character(tmp$unique.name), "part.grade"] <- tmp$part.grade
+## first we handle the zeros
+## step 1: first double check the people who have zeros and ensure that they didn't "just" get unlucky"
+d[d$num.calls == 0,]
-## generate the baseline participation grades as per the process above
+## set those people to 0 :(
+d$part.grade[d$num.calls == 0] <- 0
-## map part grades back to 4.0 letter scale and points
-d$part.4point <-round((d$part.grade / (50/3.3)) - 2.6, 2)
+## step 2: identify the people who were were not asked "enough"
+## questions but were unlucky/lucky
+
+## first this just prints out are the people were were not called
+## simply because they got unlucky
+d[d$num.calls < questions.cutoff & d$absences < absence.threshold,]
+
+## these are the people were were not called simply unlucky (i.e.,
+## they were not in class very often)
+penalized.unique.names <- d$unique.name[d$num.calls < questions.cutoff & d$absences > absence.threshold]
+d[d$unique.name %in% penalized.unique.names,]
+
+## now add "zeros" for every questions that is below the normal
+d[as.character(penalized.unique.names),"part.grade"] <- (
+ (d[as.character(penalized.unique.names),"num.calls"] * d[as.character(penalized.unique.names),"part.grade"])
+ / questions.cutoff)
-d[sort.list(d$part.4point),]
+d[as.character(penalized.unique.names),]
+## apply the penality for number of days we called on them and they were gone
+d$part.grade <- d$part.grade - d$missing.in.class.days * missed.question.penalty
-## writing out data
+## TODO ensure this is right. i think it is
+## map part grades back to 4.0 letter scale and points
+d$part.4point <- round((d$part.grade / gpa.point.value) - ((100 / gpa.point.value) - 4), 2)
+
+d[sort.list(d$part.4point, decreasing=TRUE),
+ c("unique.name", "short.name", "num.calls", "absences", "part.4point")]
+
+## writing out data to CSV
d.print <- merge(d, myuw[,c("StudentNo", "FirstName", "LastName", "UWNetID")],
- by.x="student.num", by.y="StudentNo")
-write.csv(d.print, file="final_participation_grades.csv")
-
-## library(rmarkdown)
-
-## for (x.unique.name in d$unique.name) {
-## render(input="../../assessment_and_tracking/student_report_template.Rmd",
-## output_format="html_document",
-## output_file=paste("../data/case_grades/student_reports/",
-## d.print$UWNetID[d.print$unique.name == x.unique.name],
-## sep=""))
-## }
+ by.x="unique.name", by.y="StudentNo")
+write.csv(d.print, file="../data/final_participation_grades.csv")
+
+library(rmarkdown)
+
+for (id in d$unique.name) {
+ render(input="student_report_template.Rmd",
+ output_format="html_document",
+ output_file=paste("../data/case_grades/",
+ d.print$unique.name[d.print$unique.name == id],
+ sep=""))
+}
--- /dev/null
+weight.fac <- 2
+num.calls <- 373
+num.students <- 76
+
+gen.calls.per.students <- function (x) {
+ raw.weights <<- rep(1, num.students)
+ names(raw.weights) <- seq(1, num.students)
+
+ table(sapply(1:num.calls, function (i) {
+ probs <- raw.weights / sum(raw.weights)
+ selected <- sample(names(raw.weights), 1, prob=probs)
+ ## update the raw.weights
+ raw.weights[selected] <<- raw.weights[selected] / weight.fac
+ #print(raw.weights)
+ return(selected)
+ }))
+}
+
+
+simulated.call.list <- unlist(lapply(1:1000, gen.calls.per.students))
+hist(simulated.call.list)
+
+quantile(simulated.call.list, probs=seq(0,1,by=0.01))
+quantile(simulated.call.list, probs=0.05)
-**Student Name:** `r paste(d.print[d.print$discord.name == x.discord.name, c("FirstName", "LastName")])`
+**Student Name:** `r paste(d.print[d.print$unique.name == id, c("LastName", "FirstName")])` (`r id`)
-**Discord Name:** `r d.print[d.print$discord.name == x.discord.name, c("discord.name")]`
+**Participation grade:** `r d.print$part.4point[d.print$unique.name == id]`
-**Participation grade:** `r d.print$part.4point[d.print$discord.name == x.discord.name]`
+**Questions asked:** `r d.print[d$unique.name == id, "num.calls"]`
-**Questions asked:** `r d.print[d$discord.name == x.discord.name, "prev.questions"]`
+**Days Absent:** `r d.print[d.print$unique.name == id, "absences"]` / `r length(unique(as.Date(unique(call.list$timestamp))))`
-**Days Absent:** `r d.print[d.print$discord.name == x.discord.name, "days.absent"]` / `r case.sessions`
+**Missing in class days:** `r d.print[d$unique.name == id, "missing.in.class.days"]` (base grade lowered by 0.2 per day)
**List of questions:**
```{r echo=FALSE}
-call.list[call.list$discord.name == x.discord.name,]
+call.list[call.list$unique.name == id,]
```
-**Luckiness:** `r d.print[d.print$discord.name == x.discord.name, "prop.asked.quant"]`
-
-If you a student has a luckiness over 50% that means that they were helped by the weighting of the system and/or got lucky. We did not penalize *any* students with a luckiness under 50% for absences.
-myuw <- read.csv("myuw-COMMLD_570_A_spring_2021_students.csv")
-gs <- read.delim("student_information.tsv")
+myuw <- read.csv("../data/2022_winter_COM_481_A_students.csv")
+gs <- read.delim("../data/student_information.tsv")
## these are students who dropped the class (should be empty)
gs[!gs$Your.UW.student.number %in% myuw$StudentNo,]
setwd("~/online_communities/coldcallbot/data/")
-library(ggplot2)
library(data.table)
-gs <- read.delim("student_information.tsv")
-d <- gs[,c(2,4)]
-colnames(d) <- c("student.num", "unique.name")
+################################################
+## LOAD call_list TSV data
+################################################
call.list <- do.call("rbind", lapply(list.files(".", pattern="^call_list-.*tsv$"), function (x) {read.delim(x, stringsAsFactors=FALSE)[,1:4]}))
colnames(call.list) <- gsub("_", ".", colnames(call.list))
-table(call.list$unique_name[call.list$answered])
+table(call.list$unique.name[call.list$answered])
## drop calls where the person wasn't present
call.list.full <- call.list
call.list[!call.list$answered,]
call.list <- call.list[call.list$answered,]
+## show the distribution of assessments
+prop.table(table(call.list$assessment))
+
call.counts <- data.frame(table(call.list$unique.name))
colnames(call.counts) <- c("unique.name", "num.calls")
-d <- merge(d, call.counts, all.x=TRUE, all.y=TRUE, by="unique.name"); d
+## create list of folks who are missing in class w/o reporting it
+absence.data.cols <- c("unique.name", "date.absent", "reported")
+
+missing.in.class <- call.list.full[!call.list.full$answered,
+ c("unique.name", "timestamp")]
+missing.in.class$date.absent <- as.Date(missing.in.class$timestamp)
+missing.in.class$reported <- FALSE
+missing.in.class <- missing.in.class[,absence.data.cols]
+missing.in.class <- unique(missing.in.class)
+
+################################################
+## LOAD absence data TSV data
+################################################
+
+absence.google <- read.delim("absence_poll_data.tsv")
+colnames(absence.google) <- c("timestamp", "unique.name", "date.absent")
+absence.google$date.absent <- as.Date(absence.google$date.absent, format="%m/%d/%Y")
+absence.google$reported <- TRUE
+absence.google <- absence.google[,absence.data.cols]
+absence.google <- unique(absence.google)
+
+## combine the two absence lists and then create a unique subset
+absence <- rbind(missing.in.class[,absence.data.cols],
+ absence.google[,absence.data.cols])
+
+## these are people that show up in both lists (i.e., probably they
+## submitted too late but it's worth verifying before we penalize
+## them. i'd actually remove them from the absence sheet to suppress
+## this error
+absence[duplicated(absence[,1:2]),]
+absence <- absence[!duplicated(absence[,1:2]),]
+
+## print total questions asked and absences
+absence.count <- data.frame(table(unique(absence[,c("unique.name", "date.absent")])[,"unique.name"]))
+colnames(absence.count) <- c("unique.name", "absences")
+
+
+## load up the full class list
+gs <- read.delim("student_information.tsv")
+d <- gs[,c("Your.UW.student.number", "Name.you.d.like.to.go.by.in.class")]
+colnames(d) <- c("unique.name", "short.name")
+
+## merge in the call counts
+d <- merge(d, call.counts, all.x=TRUE, all.y=FALSE, by="unique.name")
+d <- merge(d, absence.count, by="unique.name", all.x=TRUE, all.y=FALSE)
+
+d
+
+## set anything that's missing to zero
+d$num.calls[is.na(d$num.calls)] <- 0
+d$absences[is.na(d$absences)] <- 0
+
+################################################
+## list people who have been absent often or called on a lot
+################################################
+
+
+## list students sorted in terms of (a) absences and (b) prev questions
+d[sort.list(d$absences),]
+
+d[sort.list(d$num.calls, decreasing=TRUE),]
+
+################################################
+## build visualizations
+################################################
+
+
+library(ggplot2)
+
+color.gradient <- scales::seq_gradient_pal("yellow", "magenta", "Lab")(seq(0,1,length.out=range(d$absences)[2]+1))
+
+table(d$num.calls, d$absences)
+
+png("questions_absence_histogram_combined.png", units="px", width=600, height=400)
+
+ggplot(d) +
+ aes(x=as.factor(num.calls), fill=as.factor(absences)) +
+ geom_bar(color="black") +
+ stat_count() +
+ scale_x_discrete("Number of questions answered") +
+ scale_y_continuous("Number of students") +
+ ##scale_fill_brewer("Absences", palette="Blues") +
+ scale_fill_manual("Absences", values=color.gradient) +
+ theme_bw()
+
+dev.off()
+
+absence.labeller <- function (df) {
+ lapply(df, function (x) { paste("Absences:", x) })
+}
+
+## png("questions_absence_histogram_facets.png", units="px", width=600, height=400)
+
+## ggplot(d) +
+## aes(x=as.factor(num.calls)) +
+## geom_bar() +
+## stat_count() +
+## scale_x_discrete("Number of questions answered") +
+## scale_y_continuous("Number of students") +
+## theme_bw() +
+## facet_wrap(.~absences, ncol=5, labeller="absence.labeller")
self.__fn_daily_attendance = config["daily_attendance"].format(date=self.today)
self.unique_row = config["unique_name_rowname"]
- self.preferred_row = config["preferred_name_rowname"]
-
- self.preferred_names = self.__get_preferred_names()
+ if "preferred_name_rowname" in config:
+ self.preferred_row = config["preferred_name_rowname"]
+ else:
+ self.preferred_row = None
def __load_prev_questions(self):
previous_questions = defaultdict(int)
else:
return None
- def __select_student_from_list (self, students_present):
+ def select_student_from_list(self, students_present):
prev_questions = self.__load_prev_questions()
# created a weighted list by starting out with everybody 1
# print(weights) # DEBUG LINE
return choices(list(weights.keys()), weights=list(weights.values()), k=1)[0]
- def __record_attendance(self, students_present):
+ def record_attendance(self, students_present):
# if it's the first one of the day, write it out
if not os.path.exists(self.__fn_daily_attendance):
with open(self.__fn_daily_attendance, "w") as f:
",".join(students_present)]),
file=f)
- def __record_coldcall(self, selected_student):
+ def record_coldcall(self, selected_student):
# if it's the first one of the day, write it out
if not os.path.exists(self.__fn_daily_calllist):
with open(self.__fn_daily_calllist, "w") as f:
"MISSING", "MISSING", str(datetime.now())]), file=f)
def coldcall(self, students_present):
- selected_student = self.__select_student_from_list(students_present)
+ selected_student = self.select_student_from_list(students_present)
# record the called-upon student in the right place
if self.record_attendance:
- self.__record_attendance(students_present)
- self.__record_coldcall(selected_student)
+ self.record_attendance(students_present)
+ self.record_coldcall(selected_student)
preferred_name = self.__get_preferred_name(selected_student)
if preferred_name:
#!/usr/bin/env python3
from coldcall import ColdCall
-import re
+from datetime import datetime
+from csv import DictReader
+
+current_time = datetime.today()
## create the coldcall object
-cc = ColdCall(record_attendance=False)
+cc = ColdCall(record_attendance=False, preferred_name_field="Name you'd like to go by in class")
+
+def get_missing(d=current_time):
+ date_string = f'{d.month}/{d.day}/{d.year}'
+ with open("data/absence_poll_data.tsv", 'r') as f:
+ for row in DictReader(f, delimiter="\t"):
+ if row["Date of class session you will be absent"] == date_string:
+ yield(row["Your UW student number"])
+
+full_names = {}
+registered_students = []
+with open("data/2022_winter_COM_481_A_students.csv", 'r') as f:
+ for row in DictReader(f, delimiter=","):
+ student_no = row["StudentNo"].strip()
+ registered_students.append(student_no)
+ full_names[student_no] = f"{row['FirstName']} {row['LastName']}"
+## print("Registered:", registered_students)
-student_list = cc.preferred_names
+missing_today = [x for x in get_missing(current_time)]
+## print("Missing Today: ", missing_today)
-# print out 100 students
+preferred_names = {}
+with open("data/student_information.tsv", 'r') as f:
+ for row in DictReader(f, delimiter="\t"):
+ preferred_names[row["Your UW student number"]] = row["Name you'd like to go by in class"]
+## print("Preferred names:", preferred_names)
+
+students_present = [s for s in registered_students if s not in missing_today]
+## print("Students present:", students_present)
for i in range(100):
- print(f"{i + 1}. {cc.coldcall(student_list)} [ ] [ ]\n")
+ selected_student = cc.select_student_from_list(students_present)
+
+ try:
+ preferred_name = preferred_names[selected_student]
+ except KeyError:
+ preferred_name = "MISSING PREFERRED NAME"
+
+ print(f"{i + 1}.",
+ preferred_name, "::",
+ selected_student, "::",
+ full_names[selected_student])
+ cc.record_coldcall(selected_student)