]> code.communitydata.science - covid19.git/blobdiff - transliterations/analysis/related_searches_example.R
a minimal example in R that outputs a table of top 5 related search terms per day...
[covid19.git] / transliterations / analysis / related_searches_example.R
diff --git a/transliterations/analysis/related_searches_example.R b/transliterations/analysis/related_searches_example.R
new file mode 100644 (file)
index 0000000..19ddfc7
--- /dev/null
@@ -0,0 +1,28 @@
+### COVID-19 Digital Observatory
+### 2020-03-28
+### 
+### Minimal example analysis file using trending search data
+
+### Identify data source directory and file
+DataDir <- ("../data/output/")
+DataFile <- ("related_searches_top.csv")
+
+### Import and cleanup data
+related.searches.top <- read.table(paste(DataDir,DataFile,
+                                 sep=""),
+                           sep=",", header=TRUE,
+                           stringsAsFactors=FALSE) 
+
+### Aggregate top 5 search queries by term/day
+top5.per.term.date <- aggregate(query ~ term + date,
+                                data=related.searches.top,
+                                head, 5)
+
+## Might cleanup a bit for further analysis or visualization...
+top5.per.term.date$date <- asDate(top5.per.term.date$date)
+
+### Export
+write.table(top5.per.term.date,
+            file="output/top5_queries_per_term_per_date.csv", sep=",",
+            row.names=FALSE)
+

Community Data Science Collective || Want to submit a patch?