X-Git-Url: https://code.communitydata.science/mediawiki_dump_tools.git/blobdiff_plain/f468d1a5b60dce36e5d132c65c3cada921dbb4a7..5b6aaad86232a364f071f4705d18aaa01a7581f6:/wikiq?ds=inline diff --git a/wikiq b/wikiq index 7cf4be2..47dd687 100755 --- a/wikiq +++ b/wikiq @@ -63,6 +63,11 @@ class WikiqPage(): def __init__(self, page, namespace_map, collapse_user=False): self.id = page.id self.namespace = page.namespace + # following mwxml, we assume namespace 0 in cases where + # page.namespace is inconsistent with namespace_map + if page.namespace not in namespace_map: + self.title = page.title + page.namespace = 0 if page.namespace != 0: self.title = ':'.join([namespace_map[page.namespace], page.title]) else: @@ -368,6 +373,9 @@ parser.add_argument('-p', '--persistence', dest="persist", default=None, const=' parser.add_argument('-u', '--url-encode', dest="urlencode", action="store_true", help="Output url encoded text strings. This works around some data issues like newlines in editor names. In the future it may be used to output other text data.") +parser.add_argument('-ns', '--namespace-filter', dest="namespace_filter", type=str, help="Comma-seperate list of namespaces numbers to include") + + args = parser.parse_args() # set persistence method