X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/9cd0954288512d002144d02c70b22869a6a4a271..82d184d9c608db47f5c37f17a18962f07169cbc7:/check_comments_shas.py diff --git a/check_comments_shas.py b/check_comments_shas.py old mode 100644 new mode 100755 index a2bc89b..199261c --- a/check_comments_shas.py +++ b/check_comments_shas.py @@ -5,8 +5,10 @@ import requests from os import path import hashlib -shasums = requests.get("https://files.pushshift.io/reddit/comments/sha256sums.txt").text +shasums1 = requests.get("https://files.pushshift.io/reddit/comments/sha256sum.txt").text +shasums2 = requests.get("https://files.pushshift.io/reddit/comments/daily/sha256sum.txt").text +shasums = shasums1 + shasums2 dumpdir = "/gscratch/comdata/raw_data/reddit_dumps/comments" for l in shasums.strip().split('\n'):