]> code.communitydata.science - cdsc_reddit.git/blobdiff - check_comments_shas.py
Add code for running tf-idf at the weekly level.
[cdsc_reddit.git] / check_comments_shas.py
old mode 100644 (file)
new mode 100755 (executable)
index a2bc89b..199261c
@@ -5,8 +5,10 @@ import requests
 from os import path
 import hashlib
 
 from os import path
 import hashlib
 
-shasums = requests.get("https://files.pushshift.io/reddit/comments/sha256sums.txt").text
+shasums1 = requests.get("https://files.pushshift.io/reddit/comments/sha256sum.txt").text
+shasums2 = requests.get("https://files.pushshift.io/reddit/comments/daily/sha256sum.txt").text
 
 
+shasums = shasums1 + shasums2
 dumpdir = "/gscratch/comdata/raw_data/reddit_dumps/comments"
 
 for l in shasums.strip().split('\n'):
 dumpdir = "/gscratch/comdata/raw_data/reddit_dumps/comments"
 
 for l in shasums.strip().split('\n'):

Community Data Science Collective || Want to submit a patch?