From: Jeremy Foote Date: Wed, 1 Apr 2020 02:42:31 +0000 (-0400) Subject: Adding a tidyverse example (with very verbose comments) X-Git-Url: https://code.communitydata.science/covid19.git/commitdiff_plain/6b05896aa509267a912238327ce9fcb1321bceaa?ds=inline;hp=8bb3db8b46e22311a5b7a1b0f88c8cc84c649699 Adding a tidyverse example (with very verbose comments) --- diff --git a/keywords/analysis/output/top_queries_plot.png b/keywords/analysis/output/top_queries_plot.png new file mode 100644 index 0000000..276610e Binary files /dev/null and b/keywords/analysis/output/top_queries_plot.png differ diff --git a/keywords/analysis/related_searches_tidyverse_example.R b/keywords/analysis/related_searches_tidyverse_example.R new file mode 100644 index 0000000..f22ab0d --- /dev/null +++ b/keywords/analysis/related_searches_tidyverse_example.R @@ -0,0 +1,31 @@ +### COVID-19 Digital Observatory +### 2020-03-28 +### +### Minimal example analysis file using trending search data + +library(tidyverse) + +### Import and cleanup data + + +related.searches.top = read_csv("https://github.com/CommunityDataScienceCollective/COVID-19_Digital_Observatory/raw/master/keywords/output/intermediate/related_searches_top.csv") + + +## Plot how often the top 10 queries appear in the top 10 suggested list each day + +plot <- related.searches.top %>% + group_by(term, date) %>% # Group by term and date + arrange(-value) %>% # Sort by value (this should already be done anyway) + top_n(10) %>% # Get the top 10 queries for each term-day pair + group_by(query) %>% # Group by again, this time for each query + summarize(appearances = n()) %>% # Count how often this query appears in the top 10 (which is how many Google displays) + arrange(-appearances) %>% # Sort by appearances + top_n(10) %>% # And get the top 10 queries + ggplot(aes(x=reorder(query, appearances), y=appearances)) + # Plot the number of appearances, ordered by appearances + geom_bar(stat = 'identity') + # Tell R that we want to use the values of `appearances` as the counts + coord_flip() + # Flip the plot + xlab("Query") + + ylab("Number of appearances in top 10 suggested queries") + + theme_minimal() # And make it minimal + +ggsave('./output/top_queries_plot.png', plot) \ No newline at end of file