code review.

[mediawiki_dump_tools.git] / wikiq
diff --git a/wikiq b/wikiq

index 91b86d758bfc96191fc3b2660dc911b86652f8ff..e8c1247046c9f799c828d75b9d638dc5855ca339 100755 (executable)
--- a/wikiq
+++ b/wikiq
@@ -145,29 +145,26 @@ class RegexPair(object):
          if self.has_groups:
  
              # if there are matches of some sort in this revision content, fill the lists for each cap_group
-            if content is not None and self.pattern.search(content) is not None:
-                m = self.pattern.finditer(content)
-                matchobjects = list(m)
-
+            if content is not None and len(matchobjects := list(self.pattern.finditer(content))) > 0:
                  for cap_group in self.capture_groups:
                      key = self._make_key(cap_group)
                      temp_list = []
                      for match in matchobjects:
                          # we only want to add the match for the capture group if the match is not None
-                        if match.group(cap_group) != None:
-                            temp_list.append(match.group(cap_group))
+                        if (group := match.group(cap_group)) is not None:
+                            temp_list.append(group)
  
-                    # if temp_list of matches is empty just make that column None
-                    if len(temp_list)==0:
-                        temp_dict[key] = None
-                    # else we put in the list we made in the for-loop above
-                    else:
-                        if count_only:
-                            temp_dict[key] = len(temp_list)
+                        # if temp_list of matches is empty just make that column None
+                        if len(temp_list)==0:
+                            temp_dict[key] = None
+                            # else we put in the list we made in the for-loop above
                          else:
-                            temp_dict[key] = ', '.join(temp_list)
+                            if count_only:
+                                temp_dict[key] = len(temp_list)
+                            else:
+                                temp_dict[key] = ', '.join(temp_list)
  
-            # there are no matches at all in this revision content, we default values to None
+                # there are no matches at all in this revision content, we default values to None
              else:
                  for cap_group in self.capture_groups:
                      key = self._make_key(cap_group)
@@ -196,7 +193,7 @@ class RegexPair(object):
  
          
  class WikiqParser():
-    def __init__(self, input_file, output_file, regex_match_revision, regex_revision_label, regex_revision_output_count, regex_match_comment, regex_comment_label, regex_comment_output_count, collapse_user=False, persist=None, urlencode=False, namespaces = None, revert_radius=15):
+    def __init__(self, input_file, output_file, regex_revision_match, regex_revision_label, regex_revision_output_count, regex_comment_match, regex_comment_label, regex_comment_output_count, collapse_user=False, persist=None, urlencode=False, namespaces = None, revert_radius=15):
          """ 
          Parameters:
             persist : what persistence method to use. Takes a PersistMethod value
@@ -215,10 +212,10 @@ class WikiqParser():
          else:
              self.namespace_filter = None
  
-        self.regex_revision_pairs = self.make_matchmake_pairs(regex_match_revision, regex_revision_label)
+        self.regex_revision_pairs = self.make_matchmake_pairs(regex_revision_match, regex_revision_label)
          self.regex_revision_output_count = regex_revision_output_count
  
-        self.regex_comment_pairs = self.make_matchmake_pairs(regex_match_comment, regex_comment_label)
+        self.regex_comment_pairs = self.make_matchmake_pairs(regex_comment_match, regex_comment_label)
          self.regex_comment_output_count = regex_comment_output_count
  
      def make_matchmake_pairs(self, patterns, labels):
@@ -503,7 +500,7 @@ parser.add_argument('-rr',
                      default=15,
                      help="Number of edits to check when looking for reverts (default: 15)")
  
-parser.add_argument('-RP', '--revision-pattern', dest="regex_match_revision", default=None, type=str, action='append',
+parser.add_argument('-RP', '--revision-pattern', dest="regex_revision_match", default=None, type=str, action='append',
                      help="The regular expression to search for in revision text. The regex must be surrounded by quotes.")
  
  parser.add_argument('-RPl', '--revision-pattern-label', dest="regex_revision_label", default=None, type=str, action='append',
@@ -512,7 +509,7 @@ parser.add_argument('-RPl', '--revision-pattern-label', dest="regex_revision_lab
  parser.add_argument('-RPc', '--revision-pattern-count', dest="regex_revision_output_count", action='store_true',
                      help="If present, this will cause the revision patterns to return counts of the number of matches instead of the text of the matches themselves.  It will affect all revision patterns.")
  
-parser.add_argument('-CP', '--comment-pattern', dest="regex_match_comment", default=None, type=str, action='append',
+parser.add_argument('-CP', '--comment-pattern', dest="regex_comment_match", default=None, type=str, action='append',
                      help="The regular expression to search for in comments of revisions.")
  
  parser.add_argument('-CPl', '--comment-pattern-label', dest="regex_comment_label", default=None, type=str, action='append',
@@ -564,10 +561,10 @@ if len(args.dumpfiles) > 0:
                              urlencode=args.urlencode,
                              namespaces=namespaces,
                              revert_radius=args.revert_radius,
-                            regex_match_revision = args.regex_match_revision,
+                            regex_revision_match = args.regex_revision_match,
                              regex_revision_label = args.regex_revision_label,
                              regex_revision_output_count = args.regex_revision_output_count,
-                            regex_match_comment = args.regex_match_comment,
+                            regex_comment_match = args.regex_comment_match,
                              regex_comment_label = args.regex_comment_label,
                              regex_comment_output_count = args.regex_comment_output_count)
  
@@ -585,10 +582,10 @@ else:
                          urlencode=args.urlencode,
                          namespaces=namespaces,
                          revert_radius=args.revert_radius,
-                        regex_match_revision = args.regex_match_revision,
+                        regex_revision_match = args.regex_revision_match,
                          regex_revision_label = args.regex_revision_label,
                          regex_revision_output_count = args.regex_revision_output_count,
-                        regex_match_comment = args.regex_match_comment,
+                        regex_comment_match = args.regex_comment_match,
                          regex_comment_label = args.regex_comment_label,
                          regex_comment_output_count = args.regex_comment_output_count)