]> code.communitydata.science - mediawiki_dump_tools.git/commitdiff
Merge branch 'user_level_wikiq' of code.communitydata.cc:mediawiki_dump_tools into...
authorgroceryheist <nathante@uw.edu>
Fri, 31 Aug 2018 23:02:05 +0000 (16:02 -0700)
committergroceryheist <nathante@uw.edu>
Fri, 31 Aug 2018 23:02:48 +0000 (16:02 -0700)
1  2 
wikiq_users/wikiq_users_spark.py

index edc2d4f664d13bfb72ac06fdd3e2bd17d9d792e3,31d078a24a0e3d8fc9c2952d5cabca904e134d3b..5b0baf98fa9b42fe750c8c0fe16fa882a0d593ce
@@@ -53,7 -53,7 +53,7 @@@ if __name__ == "__main__"
      struct = struct.add("articleid",types.LongType(),True)
      
      if read_collapse is True:
--        struct = struct.add("collapsed_revs", type.IntegerType(), True)
++        struct = struct.add("collapsed_revs", types.IntegerType(), True)
  
      struct = struct.add("date_time",types.TimestampType(), True)
      struct = struct.add("deleted",types.BooleanType(), True)
  
      # some kind of bad work around a bug
      # see https://issues.apache.org/jira/browse/SPARK-14948
 -    reverts_df = reverts_df.select(reverts_df.revid.alias("r_revid"),'editor_nth_revert','article_nth_r
 -evert')
 +    reverts_df = reverts_df.select(reverts_df.revid.alias("r_revid"),'editor_nth_revert','article_nth_revert')
      df = df.join(reverts_df, df.revid == reverts_df.r_revid, how='left_outer')
      df = df.drop("r_revid")
      del(reverts_df)

Community Data Science Collective || Want to submit a patch?