From: Nathan TeBlunthuis Date: Sat, 28 Mar 2020 03:27:02 +0000 (-0700) Subject: A few suggestions for the python script: X-Git-Url: https://code.communitydata.science/covid19.git/commitdiff_plain/49c3203d78f07c502c02a6a2ab42551175bb802c?ds=inline A few suggestions for the python script: - using format strings (f-strings) is a nice way in python to build strings using variables. - you can read and process a file in one pass if you iterate over the open file itself instead of reading it into a variable and then looping - i had to change your strip code when i stopped using csv reader - my python linter and auto-formater hate non-indendent comments - i added a few lines to print cases where we don't get Ok responses. --- diff --git a/wikipedia_views/scripts/fetch_daily_views.py b/wikipedia_views/scripts/fetch_daily_views.py index 5ce989f..18bc01f 100755 --- a/wikipedia_views/scripts/fetch_daily_views.py +++ b/wikipedia_views/scripts/fetch_daily_views.py @@ -20,7 +20,6 @@ import datetime #import feather - def parse_args(): parser = argparse.ArgumentParser(description='Call the views API repeatedly.') @@ -49,35 +48,32 @@ def main(): articleList = [] -#1 Load up the list of article names - - with open(articleFile, 'r') as infileHandle: - theInfile = csv.reader(infileHandle) - next(theInfile) #skip header - for currentLine in theInfile: - articleList.append(currentLine) + #1 Load up the list of article names - j_Out = outputPath + "dailyviews" + queryDate + ".json" - t_Out = outputPath + "dailyviews" + queryDate + ".tsv" + j_Out = f"{outputPath}dailyviews{queryDate}.json" + t_Out = f"{outputPath}dailyviews{queryDate}.tsv" - j = [] + with open(articleFile, 'r') as infile: + next(infile) #skip header + articleList = infile - i = 0 #iterator to deal with end of file + j = [] -#2 Repeatedly call the API with that list of names + #2 Repeatedly call the API with that list of names - for a in articleList: - a = a[0] #destringify - i = i+1 - url= "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/all-agents/" - url= url + a + "/daily/" + queryDate + "/" + queryDate #for now, single date at a time - response = requests.get(url) - if response.ok: - jd = json.loads(response.content) - j.append(jd["items"][0]) - time.sleep(.1) + for a in articleList: + a = a.strip("\"\n") #destringify + url= f"https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/all-agents/{a}/daily/{queryDate}/{queryDate}" -#3 Save results as a JSON and TSV + response = requests.get(url) + if response.ok: + jd = json.loads(response.content) + j.append(jd["items"][0]) + time.sleep(.1) + else: + print(f"Not ok response: {response.status_code} from {url}") + + #3 Save results as a JSON and TSV #all data in j now, make json file with open(j_Out, 'w') as j_outfile: @@ -89,8 +85,8 @@ def main(): dw.writerows(j) - f_Out = outputPath + "dailyviews" + queryDate + ".feather" - #read the json back in and make a feather file? + # f_Out = outputPath + "dailyviews" + queryDate + ".feather" + # read the json back in and make a feather file? if __name__ == "__main__":