X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/2740f55915d6ecca7c5cd800747d9687c4cd9245..4ced659d1961630c20a1ef817422f242f723af7f:/check_comments_shas.py diff --git a/check_comments_shas.py b/check_comments_shas.py old mode 100644 new mode 100755 index a2bc89b..199261c --- a/check_comments_shas.py +++ b/check_comments_shas.py @@ -5,8 +5,10 @@ import requests from os import path import hashlib -shasums = requests.get("https://files.pushshift.io/reddit/comments/sha256sums.txt").text +shasums1 = requests.get("https://files.pushshift.io/reddit/comments/sha256sum.txt").text +shasums2 = requests.get("https://files.pushshift.io/reddit/comments/daily/sha256sum.txt").text +shasums = shasums1 + shasums2 dumpdir = "/gscratch/comdata/raw_data/reddit_dumps/comments" for l in shasums.strip().split('\n'):