X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/9cd0954288512d002144d02c70b22869a6a4a271..a60747292e91a47d122158659182f82bfd2e922a:/check_comments_shas.py?ds=sidebyside diff --git a/check_comments_shas.py b/check_comments_shas.py old mode 100644 new mode 100755 index a2bc89b..199261c --- a/check_comments_shas.py +++ b/check_comments_shas.py @@ -5,8 +5,10 @@ import requests from os import path import hashlib -shasums = requests.get("https://files.pushshift.io/reddit/comments/sha256sums.txt").text +shasums1 = requests.get("https://files.pushshift.io/reddit/comments/sha256sum.txt").text +shasums2 = requests.get("https://files.pushshift.io/reddit/comments/daily/sha256sum.txt").text +shasums = shasums1 + shasums2 dumpdir = "/gscratch/comdata/raw_data/reddit_dumps/comments" for l in shasums.strip().split('\n'):