From: Nathan TeBlunthuis <nathante@uw.edu>
Date: Wed, 3 May 2023 17:23:30 +0000 (-0700)
Subject: code review.
X-Git-Url: https://code.communitydata.science/mediawiki_dump_tools.git/commitdiff_plain/933ca753ede98a783c935951130458eb2c895eef

code review.
---

diff --git a/README.rst b/README.rst
index 94802bd..9320fed 100644
--- a/README.rst
+++ b/README.rst
@@ -16,4 +16,4 @@ files. Therefore wikiq depends on `7za`, `gzcat`, and `zcat`.
 There are also a series of Python dependencies. You can install these using pip
 with a command like:
 
-  pip3 install mwbase mwreverts mwxml mwtypes mwcli mwdiffs mwpersistence
+  pip3 install mwbase mwreverts mwxml mwtypes mwcli mwdiffs mwpersistence pandas
diff --git a/code_review_notes.txt b/code_review_notes.txt
new file mode 100644
index 0000000..77b7de9
--- /dev/null
+++ b/code_review_notes.txt
@@ -0,0 +1,9 @@
+Please add unit tests for the new count-only functionality.
+
+line 43 def matchmake:
+     This was making redundant calls to regex matching functions and so could be slower than necessary. I suggest changes that use the walrus operator to keep the same logical structure without the redundant calls. 
+
+
+line 212 def __init__:
+
+     Minor note: This constructor is taking a lot of arguments.  This is fine, but from a style + maintainability perspective it might make sense to create a new class for the regex matching configuration and pass a configuration object to this contructor instead.
diff --git a/wikiq b/wikiq
index d9045be..e8c1247 100755
--- a/wikiq
+++ b/wikiq
@@ -145,29 +145,26 @@ class RegexPair(object):
         if self.has_groups:
 
             # if there are matches of some sort in this revision content, fill the lists for each cap_group
-            if content is not None and self.pattern.search(content) is not None:
-                m = self.pattern.finditer(content)
-                matchobjects = list(m)
-
+            if content is not None and len(matchobjects := list(self.pattern.finditer(content))) > 0:
                 for cap_group in self.capture_groups:
                     key = self._make_key(cap_group)
                     temp_list = []
                     for match in matchobjects:
                         # we only want to add the match for the capture group if the match is not None
-                        if match.group(cap_group) != None:
-                            temp_list.append(match.group(cap_group))
+                        if (group := match.group(cap_group)) is not None:
+                            temp_list.append(group)
 
-                    # if temp_list of matches is empty just make that column None
-                    if len(temp_list)==0:
-                        temp_dict[key] = None
-                    # else we put in the list we made in the for-loop above
-                    else:
-                        if count_only:
-                            temp_dict[key] = len(temp_list)
+                        # if temp_list of matches is empty just make that column None
+                        if len(temp_list)==0:
+                            temp_dict[key] = None
+                            # else we put in the list we made in the for-loop above
                         else:
-                            temp_dict[key] = ', '.join(temp_list)
+                            if count_only:
+                                temp_dict[key] = len(temp_list)
+                            else:
+                                temp_dict[key] = ', '.join(temp_list)
 
-            # there are no matches at all in this revision content, we default values to None
+                # there are no matches at all in this revision content, we default values to None
             else:
                 for cap_group in self.capture_groups:
                     key = self._make_key(cap_group)