]> code.communitydata.science - covid19.git/blobdiff - transliterations/src/collect_trends.py
rename 'transliterations' to 'keywords'
[covid19.git] / transliterations / src / collect_trends.py
diff --git a/transliterations/src/collect_trends.py b/transliterations/src/collect_trends.py
deleted file mode 100644 (file)
index 1820bc5..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-# this follows a similar approach to nick's trends.js but in python
-from pytrends.request import TrendReq
-from datetime import datetime
-from os import path
-import csv
-from itertools import islice, chain, zip_longest
-import pandas as pd
-
-
-# from itertools recipes
-#https://docs.python.org/3.6/library/itertools.html#itertools-recipes
-def grouper(iterable, n, fillvalue=None):
-    "Collect data into fixed-length chunks or blocks"
-    # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
-    args = [iter(iterable)] * n
-    return zip_longest(*args, fillvalue=fillvalue)
-
-def get_daily_trends():
-    trendReq = TrendReq(backoff_factor=0.2)
-    today_trending = trendReq.today_searches()
-    daily_trends_outfile = path.join("..","output","daily_google_trends.csv")
-
-    write_header = False
-    header = ['date','term','top']
-
-    if not path.exists(daily_trends_outfile):
-        write_header = True
-
-    with open("../output/intermediate/daily_google_trends.csv",'a',newline='') as of:
-        writer = csv.writer(of)
-        if write_header:
-            writer.writerow(header)
-
-        for i, trend in enumerate(today_trending):
-            writer.writerow([str(datetime.now().date()),trend,i])
-
-def get_related_queries(stems):
-    # we have to batch these in sets of 5
-    trendReq = TrendReq(backoff_factor=0.2)
-    def _get_related_queries(chunk):
-        kw_list = list(filter(lambda x: x is not None, chunk))
-        trendReq.build_payload(kw_list=kw_list)
-        related_queries = trendReq.related_queries()
-        for term, results in related_queries.items():
-            for key, df in results.items():
-                if df is not None:
-                    df["term"] = term
-                yield (key,df)
-
-    l = chain(*map(_get_related_queries, grouper(stems,5)))
-    out = {}
-    for key, value in l:
-        if key in out:
-            out[key].append(value)
-        else:
-            out[key] = [value]
-
-    for k in out.keys():
-        df = pd.concat(out[k])
-        df['date'] = str(datetime.now().date())
-        out[k] = df
-        outfile = path.join('..','output','intermediate',f"related_searches_{k}.csv")
-        if path.exists(outfile):
-            mode = 'a'
-            header = False
-        else:
-            mode = 'w'
-            header = True
-
-        df.to_csv(outfile, mode=mode, header=header,index=False)
-
-stems = [t.strip() for t in open("../resources/base_terms.txt",'r')]
-
-get_daily_trends()
-
-get_related_queries(stems)

Community Data Science Collective || Want to submit a patch?