From: Nate E TeBlunthuis Date: Fri, 24 Aug 2018 01:02:56 +0000 (-0700) Subject: add namespace filter parameter X-Git-Url: https://code.communitydata.science/mediawiki_dump_tools.git/commitdiff_plain/5b6aaad86232a364f071f4705d18aaa01a7581f6?hp=-c add namespace filter parameter --- 5b6aaad86232a364f071f4705d18aaa01a7581f6 diff --git a/wikiq b/wikiq index 7cf4be2..47dd687 100755 --- a/wikiq +++ b/wikiq @@ -63,6 +63,11 @@ class WikiqPage(): def __init__(self, page, namespace_map, collapse_user=False): self.id = page.id self.namespace = page.namespace + # following mwxml, we assume namespace 0 in cases where + # page.namespace is inconsistent with namespace_map + if page.namespace not in namespace_map: + self.title = page.title + page.namespace = 0 if page.namespace != 0: self.title = ':'.join([namespace_map[page.namespace], page.title]) else: @@ -368,6 +373,9 @@ parser.add_argument('-p', '--persistence', dest="persist", default=None, const=' parser.add_argument('-u', '--url-encode', dest="urlencode", action="store_true", help="Output url encoded text strings. This works around some data issues like newlines in editor names. In the future it may be used to output other text data.") +parser.add_argument('-ns', '--namespace-filter', dest="namespace_filter", type=str, help="Comma-seperate list of namespaces numbers to include") + + args = parser.parse_args() # set persistence method