1 # this follows a similar approach to nick's trends.js but in python
2 from pytrends.request import TrendReq
3 from datetime import datetime
6 from itertools import islice, chain, zip_longest
10 # from itertools recipes
11 #https://docs.python.org/3.6/library/itertools.html#itertools-recipes
12 def grouper(iterable, n, fillvalue=None):
13 "Collect data into fixed-length chunks or blocks"
14 # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
15 args = [iter(iterable)] * n
16 return zip_longest(*args, fillvalue=fillvalue)
18 def get_daily_trends():
19 trendReq = TrendReq(backoff_factor=0.2)
20 today_trending = trendReq.today_searches()
21 daily_trends_outfile = path.join("..","output","daily_google_trends.csv")
24 header = ['date','term','top']
26 if not path.exists(daily_trends_outfile):
29 with open("../output/intermediate/daily_google_trends.csv",'a',newline='') as of:
30 writer = csv.writer(of)
32 writer.writerow(header)
34 for i, trend in enumerate(today_trending):
35 writer.writerow([str(datetime.now().date()),trend,i])
37 def get_related_queries(stems):
38 # we have to batch these in sets of 5
39 trendReq = TrendReq(backoff_factor=0.2)
40 def _get_related_queries(chunk):
41 kw_list = list(filter(lambda x: x is not None, chunk))
42 trendReq.build_payload(kw_list=kw_list)
43 related_queries = trendReq.related_queries()
44 for term, results in related_queries.items():
45 for key, df in results.items():
50 l = chain(*map(_get_related_queries, grouper(stems,5)))
54 out[key].append(value)
59 df = pd.concat(out[k])
60 df['date'] = str(datetime.now().date())
62 outfile = path.join('..','output','intermediate',f"related_searches_{k}.csv")
63 if path.exists(outfile):
70 df.to_csv(outfile, mode=mode, header=header,index=False)
72 stems = [t.strip() for t in open("../resources/base_terms.txt",'r')]
76 get_related_queries(stems)