keywords/example_analysis/related_searches_tidyverse_example.R

   1 ### COVID-19 Digital Observatory
   2 ### 2020-03-28
   3 ###
   4 ### Minimal example analysis file using trending search data
   5
   6 library(tidyverse)
   7
   8 ### Import and cleanup data
   9
  10
  11 related.searches.top = read_csv("https://github.com/CommunityDataScienceCollective/COVID-19_Digital_Observatory/raw/master/keywords/output/intermediate/related_searches_top.csv")
  12
  13
  14 ## Plot how often the top 10 queries appear in the top 10 suggested list each day
  15
  16 plot <- related.searches.top %>%
  17   group_by(term, date) %>% # Group by term and date
  18   arrange(-value) %>% # Sort by value (this should already be done anyway)
  19   top_n(10) %>% # Get the top 10 queries for each term-day pair
  20   group_by(query) %>% # Group by again, this time for each query
  21   summarize(appearances = n()) %>% # Count how often this query appears in the top 10 (which is how many Google displays)
  22   arrange(-appearances) %>% # Sort by appearances
  23   top_n(10) %>% # And get the top 10 queries
  24   ggplot(aes(x=reorder(query, appearances), y=appearances)) + # Plot the number of appearances, ordered by appearances
  25   geom_bar(stat = 'identity') + # Tell R that we want to use the values of `appearances` as the counts
  26   coord_flip() + # Flip the plot
  27   xlab("Query") +
  28   ylab("Number of appearances in top 10 suggested queries") +
  29   theme_minimal() # And make it minimal
  30
  31 ggsave('./output/top_queries_plot.png', plot)