# Processes data from the Wikia API to identify bots and admins # Copyright (C) 2018 Nathan TeBlunthuis # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . library(devtools) load_all("RCommunityData") # Get files for a wiki load.rights.files <- function (filename) { wiki <- gsub('\\.tsv$', '', filename) print(wiki) logevents <- read.delim(paste("logevents-2017/", filename, sep=""), stringsAsFactors=FALSE, encoding="UTF-8", quote="") current.userroles <- read.delim(paste("userlist-2017/", filename, sep=""), stringsAsFactors=FALSE, na.string="", encoding="UTF-8", header=TRUE) d <- generate.admin.addrm(logevents, current.userroles) d$wiki <- wiki return(d) } setwd("userroles_data/") wiki.files = paste0(wiki.list$wiki,".tsv") userroles <- rbindlist(lapply(wiki.files, load.rights.files)) userroles$blocked <- grepl('^(.*?)$', userroles$role) userroles$role <- gsub('^(.*?)$','\\1', userroles$role) userroles$role <- gsub('^(.*?)$','\\1', userroles$role) userroles[, is.action.admin := (role %in% c("sysop", "bureaucrat","sysop,bureaucrat","staff","admin","fanonadmin","steward"))] userroles[, is.action.bot := (role %in% c("bot", "fyzbot","bot-global"))] bots = userroles[is.action.bot==TRUE] admins = userroles[is.action.admin==TRUE] setorder(bots,"timestamp") setorder(admins,"timestamp") ## we want to keep track of when the roles changed ## assume nobody was a bot or admin at the beginning of Mediawiki ## userroles[,':='( ## prev.isbot = ifelse(is.na(prev.isbot),(isbot & action=="removed"),prev.isbot) bots[, ":="( role.period.begin = timestamp, role.period.end = shift(timestamp,fill=as.POSIXct("2017-01-01"),type="lead")) ,by = .(wiki,user) ] bots[,":="(is.bot = (action == "added"))] admins[, ":="( role.period.begin = timestamp, role.period.end = shift(timestamp,fill=as.POSIXct("2017-01-01"),type="lead")) ,by = .(wiki,user) ] admins[,":="(is.admin = (action == "added") )] # save data to an output file for knitr setwd(".."); rm(load.rights.files) rm(wiki.files,userroles) if (!nosave) { saveRDS(bots, file="bots.RDS") saveRDS(admins, file="admins.RDS") saveRDS(r, file="lib-01-generate_userroles.RDS") }