#!/usr/bin/env Rscript # Fits models predicting reverions of namespace 4 edits # Copyright (C) 2018 Nathan TeBlunthuis # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . library(effects) library(texreg) library(lme4) if(!exists("newcomers")){ source("01_build_datasets.R") } nosave <- FALSE sample <- FALSE if(sample == TRUE){ source("lib-01-sample-datasets.R") ns4.ds <- sample.ns4.edits() weights <- ns4.ds$weight }else{ ns4.ds <- ns4.reg.edits } ns4.ds <- ns4.ds[,":="(wiki.age.log = log1p(as.double(wiki.age,units="years")), age.log = log1p(as.double(age,units="years")), wiki.age = as.double(wiki.age,units='years'), quarter = as.factor(paste0(year(date.time),"_",ceiling(month(date.time)/4))), age = as.double(age,units='years'))] ns4.ds <- ns4.ds[,":="(time.first.wikia.edit = min(time.first.edit)),by=.(editor)] ns4.ds.all.newcomers <- ns4.ds ns4.ds <- ns4.ds[time.first.wikia.edit == time.first.edit] ns4.summary.stats <- list() ns4.summary.stats$p.reverted <- mean(ns4.ds$reverted) ns4.summary.stats$var.reverted <- var(ns4.ds$reverted) ns4.summary.stats$mean.editor.age <- mean(ns4.ds$age) ns4.summary.stats$var.editor.age <- var(ns4.ds$age) ns4.summary.stats$median.editor.age <- median(ns4.ds$age) ns4.summary.stats$mean.wiki.age <- mean(ns4.ds$wiki.age) ns4.summary.stats$var.wiki.age <- var(ns4.ds$wiki.age) ns4.summary.stats$median.wiki.age <- median(ns4.ds$wiki.age) remember(ns4.summary.stats) print('fit morgan model') f.morgan <- as.formula("reverted ~ age + wiki.age + quarter + wiki.name") morgan.model <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit')) saveRDS(morgan.model,"morgan.model.RDS") remember(extract(morgan.model),"morgan.model",silent=TRUE) print('fit morgan model weights') f.morgan <- as.formula("reverted ~ age + wiki.age + quarter + wiki.name") n.total.wikis <- length(unique(ns4.ds$wiki.name)) weight.per.wiki <- nrow(ns4.ds)/n.total.wikis ns4.ds <- ns4.ds[,weights := weight.per.wiki/.N, by=.(wiki.name)] morgan.model.weighted <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=ns4.ds$weights) saveRDS(morgan.model.weighted,"morgan.model.weighted.RDS") remember(extract(morgan.model.weighted),"morgan.model.weighted",silent=TRUE) print('fit morgan model weights') f.morgan <- as.formula("reverted ~ age + wiki.age + quarter + wiki.name") ns4.ds <- ns4.ds[,N:=.N,by=wiki.name] ns4.ds.temp <- ns4.ds min.edits <- 10 remember(print(1 - length(unique(ns4.ds[N>=min.edits]$wiki.name))/length(unique(ns4.ds$wiki.name))),"p.wikis.removed.weighted2") # remove the bottom 24.1% of wikis ns4.ds <- ns4.ds[N>=min.edits] n.total.wikis <- length(unique(ns4.ds$wiki.name)) weight.per.wiki <- nrow(ns4.ds)/n.total.wikis ns4.ds <- ns4.ds[,weights := weight.per.wiki/.N, by=.(wiki.name)] morgan.model.weighted2 <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=ns4.ds$weights) saveRDS(morgan.model.weighted2,"morgan.model.weighted2.RDS") remember(extract(morgan.model.weighted2),"morgan.model.weighted2",silent=TRUE) ns4.ds <- ns4.ds.temp print('fit morgan model all newcomers') morgan.model.all.newcomers <- glm(f.morgan,data=ns4.ds.all.newcomers,family=binomial(link='logit')) saveRDS(morgan.model.all.newcomers,"morgan.model.all.newcomers.RDS") remember(extract(morgan.model.all.newcomers),"morgan.model.all.newcomers",silent=TRUE) print('fitting RE model') re.icc.reverted.model <- glmer(as.formula("reverted ~ + (1 | wiki.name) -1 "),data=ns4.ds,family=binomial(link=logit)) saveRDS(re.icc.reverted.model,"re.icc.reverted.model.RDS") varcorrmat <- as.data.table(VarCorr(re.icc.reverted.model)) wiki.var <- varcorrmat[grp=='wiki.name' & var1=="(Intercept)" ,vcov] group.var <- var(residuals(re.icc.reverted.model)) icc <- wiki.var/(group.var + wiki.var) remember(varcorrmat,'icc.reverted.varcorrmat') remember(group.var,'icc.reverted.group.var') remember(icc,'icc.reverted') ## print("fit morgan model sample") ## sample.size <- 30 ## ns4.ds <- ns4.ds[,in.sample:=(.N >= sample.size),by=wiki.name] ## # DT[,.SD[sample(.N, min(3,.N))],by = a] ## ns4.ds.equal.sample <- ns4.ds[,.SD[sample(.N,min(sample.size,.N))], by=wiki.name] ## morgan.model.sampled <- glm(f.morgan,data=ns4.ds.equal.sample,family=binomial(link='logit')) ## saveRDS(morgan.model.sampled,"morgan.model.sampled.RDS") ## remember(extract(morgan.model.sampled),"morgan.model.sampled",silent=TRUE) ## ns4.model2.formula <- as.formula("reverted ~ age.log + wiki.age + quarter") ## ns4.model2 <- glm(ns4.model2,data=ns4.ds,family=binomial(link='logit'),weights=weights) ## remember(extract(ns4.model2),"ns4.model2") ## print('fit morgan no pooling model') ## f.morgan <- as.formula("reverted ~ age.log + wiki.age + quarter + wiki.name:age.log + wiki.name:wiki.age") ## morgan.model <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=weights) ## remember(extract(morgan.model),"morgan.model") ## re.ns4.model <- glmer(as.formula("reverted ~ age.log + wiki.age + quarter | wiki.name"),data=ns4.ds,family=binomial(link='logit'),weights=weights) ## remember(extract(re.ns4.model),'re.ns4.model') ## print('fit morgan.robustness.1 model') ## f.morgan.robustness.1 <- as.formula("reverted ~ age.log + wiki.age + quarter + wiki.name") ## ns4.reg.edits.robustness <- build.namespace4.dataset(all.edits[p.reverted < 0.5]) ## ns4.reg.edits.robustness[,":="(wiki.age.log = log1p(as.double(wiki.age,units="weeks")), ## age.log = log1p(as.double(age,units="weeks")), ## wiki.age = as.double(wiki.age,units='weeks'), ## quarter = as.factor(paste0(year(date.time),"_",ceiling(month(date.time)/4))))] ## morgan.robustness.1.model <- glm(f.morgan.robustness.1,data=pns4.reg.edits.robustness,family=binomial(link='logit'),weights=weights) ## saveRDS(morgan.robustness.1.model,"morgan.robustness.1.model.RDS") ## remember(extract(morgan.robustness.1.model),"morgan.robustness.1.model") ## ns4.ds[,":="(wiki.age.log = log1p(as.numeric(wiki.age,units="weeks")), age.log = log1p(as.numeric(age,units="weeks")))] ## f.ns4.2 <- as.formula("reverted ~ age.log + wiki.age.log + age.log|wiki.age.log + wiki.name") ## ns4.2.model <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=weights) ## remember(extract(ns4.2.model)) ## summary statistics for namespace 4 edits