From: groceryheist Date: Fri, 31 Aug 2018 23:02:05 +0000 (-0700) Subject: Merge branch 'user_level_wikiq' of code.communitydata.cc:mediawiki_dump_tools into... X-Git-Url: https://code.communitydata.science/mediawiki_dump_tools.git/commitdiff_plain/3af71f03e08c852438888467ea114959387e6e67?hp=-c Merge branch 'user_level_wikiq' of code.communitydata.cc:mediawiki_dump_tools into user_level_wikiq --- 3af71f03e08c852438888467ea114959387e6e67 diff --combined wikiq_users/wikiq_users_spark.py index edc2d4f,31d078a..5b0baf9 --- a/wikiq_users/wikiq_users_spark.py +++ b/wikiq_users/wikiq_users_spark.py @@@ -53,7 -53,7 +53,7 @@@ if __name__ == "__main__" struct = struct.add("articleid",types.LongType(),True) if read_collapse is True: -- struct = struct.add("collapsed_revs", type.IntegerType(), True) ++ struct = struct.add("collapsed_revs", types.IntegerType(), True) struct = struct.add("date_time",types.TimestampType(), True) struct = struct.add("deleted",types.BooleanType(), True) @@@ -123,7 -123,8 +123,7 @@@ # some kind of bad work around a bug # see https://issues.apache.org/jira/browse/SPARK-14948 - reverts_df = reverts_df.select(reverts_df.revid.alias("r_revid"),'editor_nth_revert','article_nth_r -evert') + reverts_df = reverts_df.select(reverts_df.revid.alias("r_revid"),'editor_nth_revert','article_nth_revert') df = df.join(reverts_df, df.revid == reverts_df.r_revid, how='left_outer') df = df.drop("r_revid") del(reverts_df)