1 ### COVID-19 Digital Observatory
4 ### Minimal example analysis file using pageview data
10 ### Import and cleanup data
13 url("https://github.com/CommunityDataScienceCollective/COVID-19_Digital_Observatory/raw/master/wikipedia_views/data/dailyviews2020032600.tsv")
16 read.table(DataURL, sep="\t", header=TRUE, stringsAsFactors=FALSE)
18 ### Alternatively, uncomment and run if working locally with full git
21 ### Identify data source directory and file
22 ## DataDir <- ("../data/")
23 ## DataFile <- ("dailyviews2020032600.tsv")
25 ## related.searches.top <- read.table(paste(DataDir,DataFile, sep=""),
26 ## sep="\t", header=TRUE,
27 ## stringsAsFactors=FALSE)
29 ### Cleanup and do the grouping with functions from the Tidyverse
30 ### (see https://www.tidyverse.org for more info)
32 views <- views[,c("article", "project", "timestamp", "views")]
33 views$timestamp <- factor(views$timestamp)
35 ### Sorts and groups at the same time
36 views.by.proj.date <- arrange(group_by(views, project, timestamp),
39 ### Export just the top 10 by pageviews
40 write.table(head(views.by.proj.date, 10),
41 file="output/top10_views_by_project_date.csv", sep=",",
44 ### A simple visualization
45 p <- ggplot(data=views.by.proj.date, aes(views))
47 ## Density plot with log-transformed axis
48 p + geom_density() + scale_x_log10(labels=comma)