+
+class RegexPair(object):
+ def __init__(self, pattern, label):
+ self.pattern = re.compile(pattern)
+ self.label = label
+ self.has_groups = bool(self.pattern.groupindex)
+ if self.has_groups:
+ self.capture_groups = list(self.pattern.groupindex.keys())
+
+ def _make_key(self, cap_group):
+ return ("{}_{}".format(self.label, cap_group))
+
+ def matchmake(self, content, rev_data, count_only=False):
+ temp_dict = {}
+ # if there are named capture groups in the regex
+ if self.has_groups:
+
+ # if there are matches of some sort in this revision content, fill the lists for each cap_group
+ if content is not None and self.pattern.search(content) is not None:
+ m = self.pattern.finditer(content)
+ matchobjects = list(m)
+
+ for cap_group in self.capture_groups:
+ key = self._make_key(cap_group)
+ temp_list = []
+ for match in matchobjects:
+ # we only want to add the match for the capture group if the match is not None
+ if match.group(cap_group) != None:
+ temp_list.append(match.group(cap_group))
+
+ # if temp_list of matches is empty just make that column None
+ if len(temp_list)==0:
+ temp_dict[key] = None
+ # else we put in the list we made in the for-loop above
+ else:
+ if count_only:
+ temp_dict[key] = len(temp_list)
+ else:
+ temp_dict[key] = ', '.join(temp_list)
+
+ # there are no matches at all in this revision content, we default values to None
+ else:
+ for cap_group in self.capture_groups:
+ key = self._make_key(cap_group)
+ if count_only:
+ temp_dict[key] = 0
+ else:
+ temp_dict[key] = None
+
+ # there are no capture groups, we just search for all the matches of the regex
+ else:
+ #given that there are matches to be made
+ if content is not None and self.pattern.search(content) is not None:
+ m = self.pattern.findall(content)
+ if count_only:
+ temp_dict[self.label] = len(m)
+ else:
+ temp_dict[self.label] = ', '.join(m)
+ else:
+ if count_only:
+ temp_dict[self.label] = 0
+ else:
+ temp_dict[self.label] = None
+ # update rev_data with our new columns
+ rev_data.update(temp_dict)
+ return rev_data
+
+