]> code.communitydata.science - covid19.git/commitdiff
a minimal example in R that outputs a table of top 5 related search terms per day...
authoraaronshaw <aaron.d.shaw@gmail.com>
Sat, 28 Mar 2020 15:18:33 +0000 (10:18 -0500)
committeraaronshaw <aaron.d.shaw@gmail.com>
Sat, 28 Mar 2020 15:18:33 +0000 (10:18 -0500)
transliterations/analysis/output/top5_queries_per_term_per_date.csv [new file with mode: 0644]
transliterations/analysis/related_searches_example.R [new file with mode: 0644]

diff --git a/transliterations/analysis/output/top5_queries_per_term_per_date.csv b/transliterations/analysis/output/top5_queries_per_term_per_date.csv
new file mode 100644 (file)
index 0000000..5ce1aa7
--- /dev/null
@@ -0,0 +1,6 @@
+"term","date","query.1","query.2","query.3","query.4","query.5"
+"coronavirus","2020-03-27",coronavirus update,corona,coronavirus symptoms,news coronavirus,coronavirus cases
+"covid-19","2020-03-27",covid-19 coronavirus,coronavirus,covid,covid-19 cases,covid 19
+"covid-19 pandemic","2020-03-27",coronavirus,covid-19 coronavirus pandemic,coronavirus pandemic,who,is covid-19 a pandemic
+"covid19","2020-03-27",covid,covid 19,coronavirus covid19,coronavirus,covid19 cases
+"sars-cov-2","2020-03-27",coronavirus,coronavirus sars-cov-2,covid-19,covid-19 sars-cov-2,sars
diff --git a/transliterations/analysis/related_searches_example.R b/transliterations/analysis/related_searches_example.R
new file mode 100644 (file)
index 0000000..19ddfc7
--- /dev/null
@@ -0,0 +1,28 @@
+### COVID-19 Digital Observatory
+### 2020-03-28
+### 
+### Minimal example analysis file using trending search data
+
+### Identify data source directory and file
+DataDir <- ("../data/output/")
+DataFile <- ("related_searches_top.csv")
+
+### Import and cleanup data
+related.searches.top <- read.table(paste(DataDir,DataFile,
+                                 sep=""),
+                           sep=",", header=TRUE,
+                           stringsAsFactors=FALSE) 
+
+### Aggregate top 5 search queries by term/day
+top5.per.term.date <- aggregate(query ~ term + date,
+                                data=related.searches.top,
+                                head, 5)
+
+## Might cleanup a bit for further analysis or visualization...
+top5.per.term.date$date <- asDate(top5.per.term.date$date)
+
+### Export
+write.table(top5.per.term.date,
+            file="output/top5_queries_per_term_per_date.csv", sep=",",
+            row.names=FALSE)
+

Community Data Science Collective || Want to submit a patch?