]> code.communitydata.science - mediawiki_dump_tools.git/commitdiff
Merge branch 'user_level_wikiq' of code.communitydata.cc:mediawiki_dump_tools into...
authorgroceryheist <nathante@uw.edu>
Fri, 31 Aug 2018 23:02:05 +0000 (16:02 -0700)
committergroceryheist <nathante@uw.edu>
Fri, 31 Aug 2018 23:02:48 +0000 (16:02 -0700)
wikiq_users/wikiq_users_spark.py

index 31d078a24a0e3d8fc9c2952d5cabca904e134d3b..5b0baf98fa9b42fe750c8c0fe16fa882a0d593ce 100755 (executable)
@@ -53,7 +53,7 @@ if __name__ == "__main__":
     struct = struct.add("articleid",types.LongType(),True)
     
     if read_collapse is True:
-        struct = struct.add("collapsed_revs", type.IntegerType(), True)
+        struct = struct.add("collapsed_revs", types.IntegerType(), True)
 
     struct = struct.add("date_time",types.TimestampType(), True)
     struct = struct.add("deleted",types.BooleanType(), True)
@@ -123,8 +123,7 @@ if __name__ == "__main__":
 
     # some kind of bad work around a bug
     # see https://issues.apache.org/jira/browse/SPARK-14948
-    reverts_df = reverts_df.select(reverts_df.revid.alias("r_revid"),'editor_nth_revert','article_nth_r
-evert')
+    reverts_df = reverts_df.select(reverts_df.revid.alias("r_revid"),'editor_nth_revert','article_nth_revert')
     df = df.join(reverts_df, df.revid == reverts_df.r_revid, how='left_outer')
     df = df.drop("r_revid")
     del(reverts_df)

Community Data Science Collective || Want to submit a patch?