X-Git-Url: https://code.communitydata.science/mediawiki_dump_tools.git/blobdiff_plain/097c60a7bccd4a9fcbf59f55049b003bc76bf10e..refs/heads/regex_scanner:/wikiq diff --git a/wikiq b/wikiq index 0dad9e3..f15eee6 100755 --- a/wikiq +++ b/wikiq @@ -189,7 +189,17 @@ class RegexPair(object): #given that there are matches to be made if self.pattern.search(content) is not None: m = self.pattern.findall(content) - temp_dict[self.label] = ', '.join(m) + m_fixed = [] + for match in m: + if type(match) is tuple: + matchies = set() + for sub_m in match: + matchies.add(sub_m) + m_fixed += matchies + else: + m_fixed.append(match) + + temp_dict[self.label] = ', '.join(m_fixed) else: temp_dict[self.label] = None