X-Git-Url: https://code.communitydata.science/mediawiki_dump_tools.git/blobdiff_plain/f468d1a5b60dce36e5d132c65c3cada921dbb4a7..afd40c1a45166602773aefe35e10a202e8dc47dd:/wikiq diff --git a/wikiq b/wikiq index 7cf4be2..a584390 100755 --- a/wikiq +++ b/wikiq @@ -63,6 +63,11 @@ class WikiqPage(): def __init__(self, page, namespace_map, collapse_user=False): self.id = page.id self.namespace = page.namespace + # following mwxml, we assume namespace 0 in cases where + # page.namespace is inconsistent with namespace_map + if page.namespace not in namespace_map: + self.title = page.title + page.namespace = 0 if page.namespace != 0: self.title = ':'.join([namespace_map[page.namespace], page.title]) else: @@ -368,8 +373,12 @@ parser.add_argument('-p', '--persistence', dest="persist", default=None, const=' parser.add_argument('-u', '--url-encode', dest="urlencode", action="store_true", help="Output url encoded text strings. This works around some data issues like newlines in editor names. In the future it may be used to output other text data.") -args = parser.parse_args() +parser.add_argument('-n', '--namespace-include', dest="namespace_filter", type=int, action='append', + help="Id number of namspace to include.") + +args = parser.parse_args() +print(args) # set persistence method if args.persist is None: @@ -404,7 +413,6 @@ if len(args.dumpfiles) > 0: persist=persist, urlencode=args.urlencode) - wikiq.process() # close things