3 # Fits models predicting reverions of namespace 4 edits
4 # Copyright (C) 2018 Nathan TeBlunthuis
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 if(!exists("newcomers")){
23 source("01_build_datasets.R")
29 source("lib-01-sample-datasets.R")
30 ns4.ds <- sample.ns4.edits()
31 weights <- ns4.ds$weight
33 ns4.ds <- ns4.reg.edits
37 ns4.ds <- ns4.ds[,":="(wiki.age.log = log1p(as.double(wiki.age,units="years")),
38 age.log = log1p(as.double(age,units="years")),
39 wiki.age = as.double(wiki.age,units='years'),
40 quarter = as.factor(paste0(year(date.time),"_",ceiling(month(date.time)/4))),
41 age = as.double(age,units='years'))]
43 ns4.ds <- ns4.ds[,":="(time.first.wikia.edit = min(time.first.edit)),by=.(editor)]
44 ns4.ds.all.newcomers <- ns4.ds
45 ns4.ds <- ns4.ds[time.first.wikia.edit == time.first.edit]
47 ns4.summary.stats <- list()
48 ns4.summary.stats$p.reverted <- mean(ns4.ds$reverted)
49 ns4.summary.stats$var.reverted <- var(ns4.ds$reverted)
50 ns4.summary.stats$mean.editor.age <- mean(ns4.ds$age)
51 ns4.summary.stats$var.editor.age <- var(ns4.ds$age)
52 ns4.summary.stats$median.editor.age <- median(ns4.ds$age)
53 ns4.summary.stats$mean.wiki.age <- mean(ns4.ds$wiki.age)
54 ns4.summary.stats$var.wiki.age <- var(ns4.ds$wiki.age)
55 ns4.summary.stats$median.wiki.age <- median(ns4.ds$wiki.age)
57 remember(ns4.summary.stats)
59 print('fit morgan model')
60 f.morgan <- as.formula("reverted ~ age + wiki.age + quarter + wiki.name")
61 morgan.model <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'))
62 saveRDS(morgan.model,"morgan.model.RDS")
63 remember(extract(morgan.model),"morgan.model",silent=TRUE)
65 print('fit morgan model weights')
66 f.morgan <- as.formula("reverted ~ age + wiki.age + quarter + wiki.name")
68 n.total.wikis <- length(unique(ns4.ds$wiki.name))
69 weight.per.wiki <- nrow(ns4.ds)/n.total.wikis
70 ns4.ds <- ns4.ds[,weights := weight.per.wiki/.N, by=.(wiki.name)]
71 morgan.model.weighted <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=ns4.ds$weights)
72 saveRDS(morgan.model.weighted,"morgan.model.weighted.RDS")
73 remember(extract(morgan.model.weighted),"morgan.model.weighted",silent=TRUE)
75 print('fit morgan model weights')
76 f.morgan <- as.formula("reverted ~ age + wiki.age + quarter + wiki.name")
77 ns4.ds <- ns4.ds[,N:=.N,by=wiki.name]
80 remember(print(1 - length(unique(ns4.ds[N>=min.edits]$wiki.name))/length(unique(ns4.ds$wiki.name))),"p.wikis.removed.weighted2")
81 # remove the bottom 24.1% of wikis
82 ns4.ds <- ns4.ds[N>=min.edits]
83 n.total.wikis <- length(unique(ns4.ds$wiki.name))
84 weight.per.wiki <- nrow(ns4.ds)/n.total.wikis
85 ns4.ds <- ns4.ds[,weights := weight.per.wiki/.N, by=.(wiki.name)]
86 morgan.model.weighted2 <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=ns4.ds$weights)
87 saveRDS(morgan.model.weighted2,"morgan.model.weighted2.RDS")
88 remember(extract(morgan.model.weighted2),"morgan.model.weighted2",silent=TRUE)
91 print('fit morgan model all newcomers')
92 morgan.model.all.newcomers <- glm(f.morgan,data=ns4.ds.all.newcomers,family=binomial(link='logit'))
93 saveRDS(morgan.model.all.newcomers,"morgan.model.all.newcomers.RDS")
94 remember(extract(morgan.model.all.newcomers),"morgan.model.all.newcomers",silent=TRUE)
96 print('fitting RE model')
98 re.icc.reverted.model <- glmer(as.formula("reverted ~ + (1 | wiki.name) -1 "),data=ns4.ds,family=binomial(link=logit))
99 saveRDS(re.icc.reverted.model,"re.icc.reverted.model.RDS")
100 varcorrmat <- as.data.table(VarCorr(re.icc.reverted.model))
101 wiki.var <- varcorrmat[grp=='wiki.name' & var1=="(Intercept)" ,vcov]
102 group.var <- var(residuals(re.icc.reverted.model))
103 icc <- wiki.var/(group.var + wiki.var)
104 remember(varcorrmat,'icc.reverted.varcorrmat')
105 remember(group.var,'icc.reverted.group.var')
106 remember(icc,'icc.reverted')
108 ## print("fit morgan model sample")
110 ## ns4.ds <- ns4.ds[,in.sample:=(.N >= sample.size),by=wiki.name]
111 ## # DT[,.SD[sample(.N, min(3,.N))],by = a]
112 ## ns4.ds.equal.sample <- ns4.ds[,.SD[sample(.N,min(sample.size,.N))], by=wiki.name]
113 ## morgan.model.sampled <- glm(f.morgan,data=ns4.ds.equal.sample,family=binomial(link='logit'))
114 ## saveRDS(morgan.model.sampled,"morgan.model.sampled.RDS")
115 ## remember(extract(morgan.model.sampled),"morgan.model.sampled",silent=TRUE)
117 ## ns4.model2.formula <- as.formula("reverted ~ age.log + wiki.age + quarter")
118 ## ns4.model2 <- glm(ns4.model2,data=ns4.ds,family=binomial(link='logit'),weights=weights)
119 ## remember(extract(ns4.model2),"ns4.model2")
121 ## print('fit morgan no pooling model')
122 ## f.morgan <- as.formula("reverted ~ age.log + wiki.age + quarter + wiki.name:age.log + wiki.name:wiki.age")
123 ## morgan.model <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=weights)
124 ## remember(extract(morgan.model),"morgan.model")
126 ## re.ns4.model <- glmer(as.formula("reverted ~ age.log + wiki.age + quarter | wiki.name"),data=ns4.ds,family=binomial(link='logit'),weights=weights)
128 ## remember(extract(re.ns4.model),'re.ns4.model')
130 ## print('fit morgan.robustness.1 model')
131 ## f.morgan.robustness.1 <- as.formula("reverted ~ age.log + wiki.age + quarter + wiki.name")
132 ## ns4.reg.edits.robustness <- build.namespace4.dataset(all.edits[p.reverted < 0.5])
134 ## ns4.reg.edits.robustness[,":="(wiki.age.log = log1p(as.double(wiki.age,units="weeks")),
135 ## age.log = log1p(as.double(age,units="weeks")),
136 ## wiki.age = as.double(wiki.age,units='weeks'),
137 ## quarter = as.factor(paste0(year(date.time),"_",ceiling(month(date.time)/4))))]
139 ## morgan.robustness.1.model <- glm(f.morgan.robustness.1,data=pns4.reg.edits.robustness,family=binomial(link='logit'),weights=weights)
140 ## saveRDS(morgan.robustness.1.model,"morgan.robustness.1.model.RDS")
141 ## remember(extract(morgan.robustness.1.model),"morgan.robustness.1.model")
144 ## ns4.ds[,":="(wiki.age.log = log1p(as.numeric(wiki.age,units="weeks")), age.log = log1p(as.numeric(age,units="weeks")))]
145 ## f.ns4.2 <- as.formula("reverted ~ age.log + wiki.age.log + age.log|wiki.age.log + wiki.name")
146 ## ns4.2.model <- glm(f.morgan,data=ns4.ds,family=binomial(link='logit'),weights=weights)
147 ## remember(extract(ns4.2.model))
149 ## summary statistics for namespace 4 edits