]> code.communitydata.science - mediawiki_dump_tools.git/blobdiff - wikiq
added a line to fix persistence with deleted revs
[mediawiki_dump_tools.git] / wikiq
diff --git a/wikiq b/wikiq
index 7e75eda22228cd5c44c248c264c8435b3268a948..4a5c129a1078722e58dea1ae99435ef86a5c4ec4 100755 (executable)
--- a/wikiq
+++ b/wikiq
@@ -144,7 +144,7 @@ class RegexPair(object):
         temp_dict = {}
         # if there are named capture groups in the regex
         if self.has_groups:
-            # initialize the {capture_group_name:list} for each capture group
+
             # if there are matches of some sort in this revision content, fill the lists for each cap_group
             if self.pattern.search(content) is not None:
                 m = self.pattern.finditer(content)
@@ -183,7 +183,6 @@ class RegexPair(object):
         rev_data.update(temp_dict)
         return rev_data
 
-
         
 class WikiqParser():
     def __init__(self, input_file, output_file, regex_match_revision, regex_match_comment, regex_revision_label, regex_comment_label, collapse_user=False, persist=None, urlencode=False, namespaces = None, revert_radius=15):
@@ -368,6 +367,8 @@ class WikiqParser():
                     rev_data['collapsed_revs'] = rev.collapsed_revs
 
                 if self.persist != PersistMethod.none:
+                    # initialize an empty dictionary before assigning things into it. this catches bugs if the first revision is deleted                    
+                    old_rev_data = {}
                     if rev.deleted.text:
                         for k in ["token_revs", "tokens_added", "tokens_removed", "tokens_window"]:
                             old_rev_data[k] = None

Community Data Science Collective || Want to submit a patch?