]> code.communitydata.science - mediawiki_dump_tools.git/commitdiff
regex scanner groups findall tuple bug fixed regex_scanner
authorsohyeonhwang <sohyeonhwang@u.northwestern.edu>
Thu, 12 Dec 2019 13:47:07 +0000 (07:47 -0600)
committersohyeonhwang <sohyeonhwang@u.northwestern.edu>
Thu, 12 Dec 2019 13:47:07 +0000 (07:47 -0600)
test/Wikiq_Unit_Test.py
wikiq

index cc27fb52b7037a7aa2de214223cb592db5565ccd..7f64a8120c43cc18cb682058e16b4d9672f7454b 100644 (file)
@@ -320,7 +320,8 @@ class Test_Regex(unittest.TestCase):
         # sample inputs for checking the outcomes of good inputs / test_basic_regex\r
         self.good_inputs_list = [\r
             "-RP '\\b\\d{3}\\b' -RPl threedigits",\r
-            "-RP 'TestCase' -RP 'page' -RPl testcases -RPl page_word",\r
+            #"-RP 'TestCase' -RP 'page' -RPl testcases -RPl page_word",\r
+            "-RP '(\\b[a-zA-Z]{3}\\b)' -RPl 3LETTERS -RP '(\\b(1[\d+])|(2[\d+])\\b)' -RPl NUMBERS",\r
             "-CP 'Chevalier' -CPl chev_com -RP 'welcome to Wikipedia' -RPl wiki_welcome -CP 'Warning' -CPl warning",\r
             "-CP 'WP:EVADE' -CPl wp_evade"         \r
         ]\r
diff --git a/wikiq b/wikiq
index 0dad9e32deb3fea6345d0e7c9a68e58239a7dde6..f15eee6b425ce7822c39f2b3a48eb1d249696515 100755 (executable)
--- a/wikiq
+++ b/wikiq
@@ -189,7 +189,17 @@ class RegexPair(object):
                 #given that there are matches to be made\r
                 if self.pattern.search(content) is not None:\r
                     m = self.pattern.findall(content)\r
-                    temp_dict[self.label] = ', '.join(m)\r
+                    m_fixed = []\r
+                    for match in m:\r
+                        if type(match) is tuple:\r
+                            matchies = set()\r
+                            for sub_m in match:\r
+                                matchies.add(sub_m)\r
+                            m_fixed += matchies\r
+                        else:\r
+                            m_fixed.append(match)\r
+\r
+                    temp_dict[self.label] = ', '.join(m_fixed)\r
                 else:\r
                     temp_dict[self.label] = None    \r
         \r

Community Data Science Collective || Want to submit a patch?