1 from ...types import Namespace
2 from ...util import autovivifying, none_or
3 from .functions import normalize
8 Constructs a page name parser from a set of :class:`mw.Namespace`. Such a
9 parser can be used to convert a full page name (namespace included with a
10 colon; e.g, ``"Talk:Foo"``) into a namespace ID and
11 :func:`mw.lib.title.normalize`'d page title (e.g., ``(1, "Foo")``).
14 namespaces : set( :class:`mw.Namespace` )
16 >>> from mw import Namespace
17 >>> from mw.lib import title
19 >>> parser = title.Parser(
21 ... Namespace(0, "", case="first-letter"),
22 ... Namespace(1, "Discuss\u00e3o", canonical="Talk", case="first-letter"),
23 ... Namespace(2, "Usu\u00e1rio(a)", canonical="User", aliases={"U"}, case="first-letter")
27 >>> parser.parse("Discuss\u00e3o:Foo") # Using the standard name
29 >>> parser.parse("Talk:Foo bar") # Using the cannonical name
31 >>> parser.parse("U:Foo bar") # Using an alias
33 >>> parser.parse("Herpderp:Foo bar") # Psuedo namespace
34 (0, 'Herpderp:Foo_bar')
37 def __init__(self, namespaces=None):
38 namespaces = none_or(namespaces, set)
43 if namespaces is not None:
44 for namespace in namespaces:
45 self.add_namespace(namespace)
47 def parse(self, page_name):
49 Parses a page name to extract the namespace.
53 A page name including the namespace prefix and a colon (if not Main)
56 A tuple of (namespace : `int`, title : `str`)
58 parts = page_name.split(":", 1)
61 title = normalize(page_name)
63 ns_name, title = parts
64 ns_name, title = normalize(ns_name), normalize(title)
66 if self.contains_name(ns_name):
67 ns_id = self.get_namespace(name=ns_name).id
70 title = normalize(page_name)
74 def add_namespace(self, namespace):
76 Adds a namespace to the parser.
79 namespace : :class:`mw.Namespace`
82 self.ids[namespace.id] = namespace
83 self.names[namespace.name] = namespace
85 for alias in namespace.aliases:
86 self.names[alias] = namespace
88 if namespace.canonical is not None:
89 self.names[namespace.canonical] = namespace
91 def contains_name(self, name):
92 return normalize(name) in self.names
94 def get_namespace(self, id=None, name=None):
96 Gets a namespace from the parser. Throws a :class:`KeyError` if a
97 namespace cannot be found.
103 A namespace name (standard, cannonical names and aliases
106 A :class:`mw.Namespace`.
109 return self.ids[int(id)]
111 return self.names[normalize(name)]
114 def from_site_info(cls, si_doc):
116 Constructs a parser from the result of a :meth:`mw.api.SiteInfo.query`.
120 The result of a site_info request.
123 An initialized :class:`mw.lib.title.Parser`
125 aliases = autovivifying.Dict(vivifier=lambda k: [])
127 if 'namespacealiases' in si_doc:
128 for alias_doc in si_doc['namespacealiases']:
129 aliases[alias_doc['id']].append(alias_doc['*'])
132 for ns_doc in si_doc['namespaces'].values():
134 Namespace.from_doc(ns_doc, aliases)
137 return Parser(namespaces)
140 def from_api(cls, session):
142 Constructs a parser from a :class:`mw.api.Session`
145 session : :class:`mw.api.Session`
149 An initialized :class:`mw.lib.title.Parser`
151 si_doc = session.site_info.query(
152 properties={'namespaces', 'namespacealiases'}
155 return cls.from_site_info(si_doc)
158 def from_dump(cls, dump):
160 Constructs a parser from a :class:`mw.xml_dump.Iterator`. Note that
161 XML database dumps do not include namespace aliases or cannonical names
162 so the parser that will be constructed will only work in common cases.
165 dump : :class:`mw.xml_dump.Iterator`
169 An initialized :class:`mw.lib.title.Parser`
171 return cls(dump.namespaces)