1 ### COVID-19 Digital Observatory
3 ### Minimal example analysis file using revisions data.
5 ### Note: The revisions data files can get quite big and it may not make
6 ### sense to try to load them into memory in R. This example file
7 ### loads the first 1,000 rows of a sample file.
13 ### Import and cleanup first 1000 rows of one datafile from the observatory
16 url("https://covid19.communitydata.science/datasets/wikipedia/digobs_covid19-wikipedia-enwiki_revisions-20200401.tsv")
18 revisions <- read.table(DataURL, sep="\t", header=TRUE,
19 stringsAsFactors=FALSE,
22 revisions$timestamp <- as_datetime(revisions$timestamp)
24 ### Group edit by editor to see edit counts
25 user.edit.counts <- revisions %>%
26 group_by(user, title) %>% # for the example, there's only one
28 summarize(appearances = n()) %>%
31 ### Export that as a little table
32 write.table(user.edit.counts,
33 file="output/user_edit_counts-first1000.csv", sep=",",
36 ### A simple time series of edits
38 arrange(timestamp) %>%
39 mutate(count = as.numeric(rownames(revisions))) %>%
40 ggplot(aes(x=timestamp, y=count)) +
41 geom_jitter(alpha=0.1) +
43 ylab("Total edits (cumulative)") +
44 ggtitle(paste("Revisions to article: ",
45 head(revisions$title,1), sep="")) +
48 ggsave("output/first_1000_edit_timeseries.png", plot)