1 # Processes data from the Wikia API to identify bots and admins
2 # Copyright (C) 2018 Nathan TeBlunthuis
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
19 load_all("RCommunityData")
21 # Get files for a wiki
22 load.rights.files <- function (filename) {
23 wiki <- gsub('\\.tsv$', '', filename)
25 logevents <- read.delim(paste("logevents-2017/", filename, sep=""),
26 stringsAsFactors=FALSE, encoding="UTF-8", quote="")
28 current.userroles <- read.delim(paste("userlist-2017/", filename, sep=""),
29 stringsAsFactors=FALSE, na.string="",
30 encoding="UTF-8", header=TRUE)
32 d <- generate.admin.addrm(logevents, current.userroles)
37 setwd("userroles_data/")
38 wiki.files = paste0(wiki.list$wiki,".tsv")
39 userroles <- rbindlist(lapply(wiki.files, load.rights.files))
40 userroles$blocked <- grepl('^<span class="listusers_blockeduser">(.*?)$', userroles$role)
41 userroles$role <- gsub('^<span class="listusers_blockeduser">(.*?)$','\\1', userroles$role)
42 userroles$role <- gsub('^(.*?)</span>$','\\1', userroles$role)
44 userroles[, is.action.admin := (role %in% c("sysop", "bureaucrat","sysop,bureaucrat","staff","admin","fanonadmin","steward"))]
45 userroles[, is.action.bot := (role %in% c("bot", "fyzbot","bot-global"))]
47 bots = userroles[is.action.bot==TRUE]
48 admins = userroles[is.action.admin==TRUE]
50 setorder(bots,"timestamp")
51 setorder(admins,"timestamp")
52 ## we want to keep track of when the roles changed
53 ## assume nobody was a bot or admin at the beginning of Mediawiki
56 ## prev.isbot = ifelse(is.na(prev.isbot),(isbot & action=="removed"),prev.isbot)
60 role.period.begin = timestamp,
61 role.period.end = shift(timestamp,fill=as.POSIXct("2017-01-01"),type="lead"))
65 bots[,":="(is.bot = (action == "added"))]
69 role.period.begin = timestamp,
70 role.period.end = shift(timestamp,fill=as.POSIXct("2017-01-01"),type="lead"))
74 admins[,":="(is.admin = (action == "added") )]
76 # save data to an output file for knitr
79 rm(wiki.files,userroles)
82 saveRDS(bots, file="bots.RDS")
83 saveRDS(admins, file="admins.RDS")
84 saveRDS(r, file="lib-01-generate_userroles.RDS")