From: groceryheist Date: Fri, 31 Aug 2018 23:02:05 +0000 (-0700) Subject: Merge branch 'user_level_wikiq' of code.communitydata.cc:mediawiki_dump_tools into... X-Git-Url: https://code.communitydata.science/mediawiki_dump_tools.git/commitdiff_plain/3af71f03e08c852438888467ea114959387e6e67?hp=4c77c0f12e185b8203449eabda085eed8873d7b5 Merge branch 'user_level_wikiq' of code.communitydata.cc:mediawiki_dump_tools into user_level_wikiq --- diff --git a/wikiq_users/wikiq_users_spark.py b/wikiq_users/wikiq_users_spark.py index 31d078a..5b0baf9 100755 --- a/wikiq_users/wikiq_users_spark.py +++ b/wikiq_users/wikiq_users_spark.py @@ -53,7 +53,7 @@ if __name__ == "__main__": struct = struct.add("articleid",types.LongType(),True) if read_collapse is True: - struct = struct.add("collapsed_revs", type.IntegerType(), True) + struct = struct.add("collapsed_revs", types.IntegerType(), True) struct = struct.add("date_time",types.TimestampType(), True) struct = struct.add("deleted",types.BooleanType(), True) @@ -123,8 +123,7 @@ if __name__ == "__main__": # some kind of bad work around a bug # see https://issues.apache.org/jira/browse/SPARK-14948 - reverts_df = reverts_df.select(reverts_df.revid.alias("r_revid"),'editor_nth_revert','article_nth_r -evert') + reverts_df = reverts_df.select(reverts_df.revid.alias("r_revid"),'editor_nth_revert','article_nth_revert') df = df.join(reverts_df, df.revid == reverts_df.r_revid, how='left_outer') df = df.drop("r_revid") del(reverts_df)