]> code.communitydata.science - rises_declines_wikia_code.git/blob - 03_generate_plots.R
add copy of the GPL
[rises_declines_wikia_code.git] / 03_generate_plots.R
1 #!/usr/bin/env Rscript
2
3 # Creates data for plotting
4
5 # Copyright (C) 2018  Nathan TeBlunthuis
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
19
20 library("ggplot2")
21 library("bootstrap")
22
23 library("scales")
24 source("lib-00-utils.R")
25 if(!exists("newcomers")){
26     source("01_build_datasets.R")
27 }
28
29 remember(min(all.edits$date.time),"earliest.data.point")
30 remember(max(all.edits$date.time),"latest.data.point")
31
32 p1.data <- newcomers[,.(p.reverted = mean(is.reverted),
33                           var.reverted=var(is.reverted),
34                           p.survives=mean(survives),
35                           var.survives=(var(survives)),
36                           N=.N),
37                        by=.(wiki.name,wiki.age.half.years)]
38 p1.data <- p1.data[N>1]
39 p1.data[,N.wikis := .N, by = .(wiki.age.half.years)]
40 ## put p1 data onto sd scales
41 p1.data[,p.survives.in.sd := p.survives/sd(p.survives),by=.(wiki.name)]
42 p1.data[,p.reverted.in.sd := p.reverted/sd(p.reverted),by=.(wiki.name)]
43
44 p.data <- melt(p1.data,id.vars=c("wiki.name","wiki.age.half.years"),measure.vars=c("p.survives","p.reverted","p.survives.in.sd","p.reverted.in.sd"))
45
46 p.stats <- p.data[,as.list(c(setNames(boxplot.stats(value,coef=1.5)$stats,c("min","q1","med","q3","max")),
47                              mu=mean(value),N.wikis=.N)),by=.(wiki.age.half.years,variable)]
48
49 remember(p.stats)
50 p.stats[variable=="p.survives"]$variable="Survives"
51 p.stats[variable=="p.reverted"]$variable="Reverted"
52
53 remember(cor.test(p1.data$wiki.age.half.years,p1.data$p.survives,method='spearman',alternative='less'),"survives.cor.test")
54 remember(cor.test(p1.data$wiki.age.half.years,p1.data$p.reverted,method='spearman',alternative='greater'),"reverted.cor.test")
55
56 xlabels = paste0("Year ", 0:max(p.stats$wiki.age.half.years))
57 p <- ggplot(p.stats,aes(x=as.factor(wiki.age.half.years),ymin=min,lower=q1,middle=med,upper=q3,ymax=max,width=0.3))
58 p <- p + geom_boxplot(stat='identity')
59 p <- p + geom_line(aes(x=wiki.age.half.years+1,y=med), linetype=2)
60 p <- p + facet_wrap("variable",nrow=2,strip.position="bottom",scales="free")
61 p <- p + scale_y_continuous(name="Proportion of newcomers",minor_breaks=NULL) + scale_x_discrete(name="Wiki age", labels=xlabels)
62 p <- p + theme_bw()  + theme(legend.position="None")
63
64 pdf(width=6,height=6)
65 print(p)
66 dev.off()
67
68 active.editors <- all.edits[,
69                             .(N.edits=.N,
70                               wiki.age.years=first(wiki.age.years)),
71                             by=.(wiki.name,
72                                  editor,
73                                  wiki.age.months)]
74
75 n.active.editors <- active.editors[N.edits >= 5,
76                                    .(N.active.editors = .N,
77                                      wiki.age.years=first(wiki.age.years)),
78                                    by=.(wiki.name,wiki.age.months)]
79
80 n.active.editors[, ":="(N=.N), by=.(wiki.age.months)]
81
82 n.active.editors[,":="(max.age=max(wiki.age.months),max.active.editors=max(N.active.editors),sd.units.active.editors=N.active.editors/sd(N.active.editors)),by="wiki.name"]
83 n.active.editors[,":="(active.editors.pmax=N.active.editors/max.active.editors)]
84 wiki.age.quantile <- .90
85
86 max.age.months <- quantile(n.active.editors$max.age,wiki.age.quantile)
87
88 boot <- n.active.editors[is.finite(sd.units.active.editors)&wiki.age.months <= max.age.months,.(thetastar = bootstrap(x=sd.units.active.editors,nboot=5000,mean)$thetastar),by=.(wiki.age.months)]
89
90 boot.ci <- boot[,as.list(quantile(thetastar,probs=c(0.025,0.975))),by=.(wiki.age.months)]
91 names(boot.ci) <- c("wiki.age.months","lower.ci","upper.ci")
92
93 plot2.data <- n.active.editors[is.finite(sd.units.active.editors) & wiki.age.months <= max.age.months,.(sd.units.active.editors = mean(sd.units.active.editors),N.active.editors = mean(N.active.editors),wiki.age.years=first(wiki.age.years),N.wikis=.N),by=.(wiki.age.months)]
94
95 plot2.data[boot.ci,":="(lower.ci=lower.ci,upper.ci=upper.ci),on="wiki.age.months"]
96
97 remember(plot2.data,'plot.active.editors.dt')

Community Data Science Collective || Want to submit a patch?