From: Kaylea Champion Date: Sat, 28 Mar 2020 21:15:53 +0000 (-0700) Subject: Merge pull request #1 from CommunityDataScienceCollective/kaylea/master X-Git-Url: https://code.communitydata.science/covid19.git/commitdiff_plain/dd7d968bb681dda6fed495e0cd437c0932961519?hp=5ffb2cacd695e3d9b20b8d61099356479cf34fd0 Merge pull request #1 from CommunityDataScienceCollective/kaylea/master Some suggested changes. --- diff --git a/data/dailyviews2020032600.json b/wikipedia_views/data/dailyviews2020032600.json similarity index 100% rename from data/dailyviews2020032600.json rename to wikipedia_views/data/dailyviews2020032600.json diff --git a/data/dailyviews2020032600.tsv b/wikipedia_views/data/dailyviews2020032600.tsv similarity index 100% rename from data/dailyviews2020032600.tsv rename to wikipedia_views/data/dailyviews2020032600.tsv diff --git a/resources/articles.txt b/wikipedia_views/resources/articles.txt similarity index 100% rename from resources/articles.txt rename to wikipedia_views/resources/articles.txt diff --git a/bin/fetch_daily_views.py b/wikipedia_views/scripts/fetch_daily_views.py similarity index 69% rename from bin/fetch_daily_views.py rename to wikipedia_views/scripts/fetch_daily_views.py index 5ce989f..b604e26 100755 --- a/bin/fetch_daily_views.py +++ b/wikipedia_views/scripts/fetch_daily_views.py @@ -20,7 +20,6 @@ import datetime #import feather - def parse_args(): parser = argparse.ArgumentParser(description='Call the views API repeatedly.') @@ -49,35 +48,32 @@ def main(): articleList = [] -#1 Load up the list of article names + #1 Load up the list of article names - with open(articleFile, 'r') as infileHandle: - theInfile = csv.reader(infileHandle) - next(theInfile) #skip header - for currentLine in theInfile: - articleList.append(currentLine) + j_Out = f"{outputPath}dailyviews{queryDate}.json" + t_Out = f"{outputPath}dailyviews{queryDate}.tsv" - j_Out = outputPath + "dailyviews" + queryDate + ".json" - t_Out = outputPath + "dailyviews" + queryDate + ".tsv" + with open(articleFile, 'r') as infile: + next(infile) #skip header + articleList = list(infile) j = [] - i = 0 #iterator to deal with end of file - -#2 Repeatedly call the API with that list of names + #2 Repeatedly call the API with that list of names for a in articleList: - a = a[0] #destringify - i = i+1 - url= "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/all-agents/" - url= url + a + "/daily/" + queryDate + "/" + queryDate #for now, single date at a time + a = a.strip("\"\n") #destringify + url= f"https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/all-agents/{a}/daily/{queryDate}/{queryDate}" + response = requests.get(url) if response.ok: jd = json.loads(response.content) j.append(jd["items"][0]) time.sleep(.1) + else: + print(f"Not ok response: {response.status_code} from {url}") -#3 Save results as a JSON and TSV + #3 Save results as a JSON and TSV #all data in j now, make json file with open(j_Out, 'w') as j_outfile: @@ -89,8 +85,8 @@ def main(): dw.writerows(j) - f_Out = outputPath + "dailyviews" + queryDate + ".feather" - #read the json back in and make a feather file? + # f_Out = outputPath + "dailyviews" + queryDate + ".feather" + # read the json back in and make a feather file? if __name__ == "__main__":