transliterations/analysis/related_searches_example.R

   1 ### COVID-19 Digital Observatory
   2 ### 2020-03-28
   3 ###
   4 ### Minimal example analysis file using trending search data
   5
   6 ### Import and cleanup data
   7
   8 DataURL <-
   9     url("https://github.com/CommunityDataScienceCollective/COVID-19_Digital_Observatory/blob/master/transliterations/data/output/related_searches_top.csv")
  10
  11 related.searches.top <- read.table(DataURL,
  12                            sep=",", header=TRUE,
  13                            stringsAsFactors=FALSE)
  14
  15 ### Alternatively, uncomment and run if working locally with full git tree
  16 ### Identify data source directory and file
  17 ## DataDir <- ("../data/output/")
  18 ## DataFile <- ("related_searches_top.csv")
  19
  20 ## related.searches.top <- read.table(paste(DataDir,DataFile, sep=""),
  21 ##                                   sep=",", header=TRUE,
  22 ##                                   stringsAsFactors=FALSE)
  23
  24 ### Aggregate top 5 search queries by term/day
  25 top5.per.term.date <- aggregate(query ~ term + date,
  26                                 data=related.searches.top,
  27                                 head, 5)
  28
  29 ## Might cleanup a bit for further analysis or visualization...
  30 top5.per.term.date$date <- as.Date(top5.per.term.date$date)
  31
  32 ### Export
  33 write.table(top5.per.term.date,
  34             file="output/top5_queries_per_term_per_date.csv", sep=",",
  35             row.names=FALSE)
  36