struct = struct.add("articleid",types.LongType(),True)
if read_collapse is True:
-- struct = struct.add("collapsed_revs", type.IntegerType(), True)
++ struct = struct.add("collapsed_revs", types.IntegerType(), True)
struct = struct.add("date_time",types.TimestampType(), True)
struct = struct.add("deleted",types.BooleanType(), True)
# some kind of bad work around a bug
# see https://issues.apache.org/jira/browse/SPARK-14948
- reverts_df = reverts_df.select(reverts_df.revid.alias("r_revid"),'editor_nth_revert','article_nth_r
-evert')
+ reverts_df = reverts_df.select(reverts_df.revid.alias("r_revid"),'editor_nth_revert','article_nth_revert')
df = df.join(reverts_df, df.revid == reverts_df.r_revid, how='left_outer')
df = df.drop("r_revid")
del(reverts_df)