From: Nathan TeBlunthuis Date: Wed, 3 May 2023 17:23:30 +0000 (-0700) Subject: code review. X-Git-Url: https://code.communitydata.science/mediawiki_dump_tools.git/commitdiff_plain/933ca753ede98a783c935951130458eb2c895eef code review. --- diff --git a/README.rst b/README.rst index 94802bd..9320fed 100644 --- a/README.rst +++ b/README.rst @@ -16,4 +16,4 @@ files. Therefore wikiq depends on `7za`, `gzcat`, and `zcat`. There are also a series of Python dependencies. You can install these using pip with a command like: - pip3 install mwbase mwreverts mwxml mwtypes mwcli mwdiffs mwpersistence + pip3 install mwbase mwreverts mwxml mwtypes mwcli mwdiffs mwpersistence pandas diff --git a/code_review_notes.txt b/code_review_notes.txt new file mode 100644 index 0000000..77b7de9 --- /dev/null +++ b/code_review_notes.txt @@ -0,0 +1,9 @@ +Please add unit tests for the new count-only functionality. + +line 43 def matchmake: + This was making redundant calls to regex matching functions and so could be slower than necessary. I suggest changes that use the walrus operator to keep the same logical structure without the redundant calls. + + +line 212 def __init__: + + Minor note: This constructor is taking a lot of arguments. This is fine, but from a style + maintainability perspective it might make sense to create a new class for the regex matching configuration and pass a configuration object to this contructor instead. diff --git a/wikiq b/wikiq index d9045be..e8c1247 100755 --- a/wikiq +++ b/wikiq @@ -145,29 +145,26 @@ class RegexPair(object): if self.has_groups: # if there are matches of some sort in this revision content, fill the lists for each cap_group - if content is not None and self.pattern.search(content) is not None: - m = self.pattern.finditer(content) - matchobjects = list(m) - + if content is not None and len(matchobjects := list(self.pattern.finditer(content))) > 0: for cap_group in self.capture_groups: key = self._make_key(cap_group) temp_list = [] for match in matchobjects: # we only want to add the match for the capture group if the match is not None - if match.group(cap_group) != None: - temp_list.append(match.group(cap_group)) + if (group := match.group(cap_group)) is not None: + temp_list.append(group) - # if temp_list of matches is empty just make that column None - if len(temp_list)==0: - temp_dict[key] = None - # else we put in the list we made in the for-loop above - else: - if count_only: - temp_dict[key] = len(temp_list) + # if temp_list of matches is empty just make that column None + if len(temp_list)==0: + temp_dict[key] = None + # else we put in the list we made in the for-loop above else: - temp_dict[key] = ', '.join(temp_list) + if count_only: + temp_dict[key] = len(temp_list) + else: + temp_dict[key] = ', '.join(temp_list) - # there are no matches at all in this revision content, we default values to None + # there are no matches at all in this revision content, we default values to None else: for cap_group in self.capture_groups: key = self._make_key(cap_group)