X-Git-Url: https://code.communitydata.science/covid19.git/blobdiff_plain/c690df4852ffab274eecb9f86d6d4c6f48936f2a..98b07b8098611287eaa775b09622d1f3514303c8:/wikipedia_views/scripts/fetch_daily_views.py diff --git a/wikipedia_views/scripts/fetch_daily_views.py b/wikipedia_views/scripts/fetch_daily_views.py index b604e26..4f496fe 100755 --- a/wikipedia_views/scripts/fetch_daily_views.py +++ b/wikipedia_views/scripts/fetch_daily_views.py @@ -17,7 +17,8 @@ import csv import time import os.path import datetime -#import feather +import logging +#import feather #TBD def parse_args(): @@ -26,6 +27,8 @@ def parse_args(): parser.add_argument('-o', '--output_folder', help='Where to save output', default="../data/", type=str) parser.add_argument('-i', '--article_file', help='File listing article names', default="../resources/articles.txt", type=str) parser.add_argument('-d', '--query_date', help='Date if not yesterday, in YYYYMMDD format please.', type=str) + parser.add_argument('-L', '--logging_level', help='Logging level. Options are debug, info, warning, error, critical. Default: info.', default='info'), + parser.add_argument('-W', '--logging_destination', help='Logging destination.', default='../logs/'), args = parser.parse_args() return(args) @@ -38,6 +41,7 @@ def main(): outputPath = args.output_folder articleFile = args.article_file + #handle -d if (args.query_date): queryDate = args.query_date else: @@ -46,8 +50,29 @@ def main(): queryDate = queryDate + "00" #requires specifying hours + #handle -W + logHome = f"{args.logging_destination}dailylogrun{datetime.datetime.today().strftime('%Y%m%d')}" + + #handle -L + loglevel = args.logging_level + if loglevel == 'debug': + logging.basicConfig(filename=logHome, filemode='a', level=logging.DEBUG) + elif loglevel == 'info': + logging.basicConfig(filename=logHome, filemode='a', level=logging.INFO) + elif loglevel == 'warning': + logging.basicConfig(filename=logHome, filemode='a', level=logging.WARNING) + elif loglevel == 'error': + logging.basicConfig(filename=logHome, filemode='a', level=logging.ERROR) + elif loglevel == 'critical': + logging.basicConfig(filename=logHome, filemode='a', level=logging.CRITICAL) + else: + print("Choose a valid log level: debug, info, warning, error, or critical") + exit + articleList = [] + logging.debug(f"Starting run at {datetime.datetime.now()}") + #1 Load up the list of article names j_Out = f"{outputPath}dailyviews{queryDate}.json" @@ -58,6 +83,8 @@ def main(): articleList = list(infile) j = [] + success = 0 #for logging how many work/fail + failure = 0 #2 Repeatedly call the API with that list of names @@ -70,12 +97,16 @@ def main(): jd = json.loads(response.content) j.append(jd["items"][0]) time.sleep(.1) + success = success + 1 else: - print(f"Not ok response: {response.status_code} from {url}") + failure = failure + 1 + logging.warning(f"Failure: {response.status_code} from {url}") #3 Save results as a JSON and TSV #all data in j now, make json file + logging.info(f"Processed {success} successful URLs and {failure} failures.") + with open(j_Out, 'w') as j_outfile: json.dump(j, j_outfile, indent=2) @@ -84,6 +115,7 @@ def main(): dw.writeheader() dw.writerows(j) + logging.debug(f"Run complete at {datetime.datetime.now()}") # f_Out = outputPath + "dailyviews" + queryDate + ".feather" # read the json back in and make a feather file?