]> code.communitydata.science - covid19.git/blob - transliterations/analysis/related_searches_example.R
minimal analysis example with pageview data
[covid19.git] / transliterations / analysis / related_searches_example.R
1 ### COVID-19 Digital Observatory
2 ### 2020-03-28
3 ### 
4 ### Minimal example analysis file using trending search data
5
6 ### Import and cleanup data
7
8 DataURL <-
9     url("https://github.com/CommunityDataScienceCollective/COVID-19_Digital_Observatory/blob/master/transliterations/data/output/related_searches_top.csv")
10
11 related.searches.top <- read.table(DataURL,
12                            sep=",", header=TRUE,
13                            stringsAsFactors=FALSE) 
14
15 ### Alternatively, uncomment and run if working locally with full git tree
16 ### Identify data source directory and file
17 ## DataDir <- ("../data/output/")
18 ## DataFile <- ("related_searches_top.csv")
19
20 ## related.searches.top <- read.table(paste(DataDir,DataFile, sep=""),
21 ##                                   sep=",", header=TRUE,
22 ##                                   stringsAsFactors=FALSE)
23
24 ### Aggregate top 5 search queries by term/day
25 top5.per.term.date <- aggregate(query ~ term + date,
26                                 data=related.searches.top,
27                                 head, 5)
28
29 ## Might cleanup a bit for further analysis or visualization...
30 top5.per.term.date$date <- as.Date(top5.per.term.date$date)
31
32 ### Export
33 write.table(top5.per.term.date,
34             file="output/top5_queries_per_term_per_date.csv", sep=",",
35             row.names=FALSE)
36

Community Data Science Collective || Want to submit a patch?