From c025a526e8fc1767546343105f52d1698c0c5e14 Mon Sep 17 00:00:00 2001 From: aaronshaw Date: Sat, 28 Mar 2020 10:18:33 -0500 Subject: [PATCH 1/1] a minimal example in R that outputs a table of top 5 related search terms per day per query --- .../output/top5_queries_per_term_per_date.csv | 6 ++++ .../analysis/related_searches_example.R | 28 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 transliterations/analysis/output/top5_queries_per_term_per_date.csv create mode 100644 transliterations/analysis/related_searches_example.R diff --git a/transliterations/analysis/output/top5_queries_per_term_per_date.csv b/transliterations/analysis/output/top5_queries_per_term_per_date.csv new file mode 100644 index 0000000..5ce1aa7 --- /dev/null +++ b/transliterations/analysis/output/top5_queries_per_term_per_date.csv @@ -0,0 +1,6 @@ +"term","date","query.1","query.2","query.3","query.4","query.5" +"coronavirus","2020-03-27",coronavirus update,corona,coronavirus symptoms,news coronavirus,coronavirus cases +"covid-19","2020-03-27",covid-19 coronavirus,coronavirus,covid,covid-19 cases,covid 19 +"covid-19 pandemic","2020-03-27",coronavirus,covid-19 coronavirus pandemic,coronavirus pandemic,who,is covid-19 a pandemic +"covid19","2020-03-27",covid,covid 19,coronavirus covid19,coronavirus,covid19 cases +"sars-cov-2","2020-03-27",coronavirus,coronavirus sars-cov-2,covid-19,covid-19 sars-cov-2,sars diff --git a/transliterations/analysis/related_searches_example.R b/transliterations/analysis/related_searches_example.R new file mode 100644 index 0000000..19ddfc7 --- /dev/null +++ b/transliterations/analysis/related_searches_example.R @@ -0,0 +1,28 @@ +### COVID-19 Digital Observatory +### 2020-03-28 +### +### Minimal example analysis file using trending search data + +### Identify data source directory and file +DataDir <- ("../data/output/") +DataFile <- ("related_searches_top.csv") + +### Import and cleanup data +related.searches.top <- read.table(paste(DataDir,DataFile, + sep=""), + sep=",", header=TRUE, + stringsAsFactors=FALSE) + +### Aggregate top 5 search queries by term/day +top5.per.term.date <- aggregate(query ~ term + date, + data=related.searches.top, + head, 5) + +## Might cleanup a bit for further analysis or visualization... +top5.per.term.date$date <- asDate(top5.per.term.date$date) + +### Export +write.table(top5.per.term.date, + file="output/top5_queries_per_term_per_date.csv", sep=",", + row.names=FALSE) + -- 2.39.2