]> code.communitydata.science - rises_declines_wikia_code.git/blob - mediawiki_dump_tools/Mediawiki-Utilities/mw/xml_dump/iteration/page.py
Initial commit
[rises_declines_wikia_code.git] / mediawiki_dump_tools / Mediawiki-Utilities / mw / xml_dump / iteration / page.py
1 from ...types import serializable
2 from ...util import none_or
3 from ..errors import MalformedXML
4 from .redirect import Redirect
5 from .revision import Revision
6
7
8 class Page(serializable.Type):
9     """
10     Page meta data and a :class:`~mw.xml_dump.Revision` iterator.  Instances of
11     this class can be called as iterators directly.  E.g.
12
13     .. code-block:: python
14
15         page = mw.xml_dump.Page( ... )
16
17         for revision in page:
18             print("{0} {1}".format(revision.id, page_id))
19
20     """
21     __slots__ = (
22         'id',
23         'title',
24         'namespace',
25         'redirect',
26         'restrictions'
27     )
28
29     def __init__(self, id, title, namespace, redirect, restrictions, revisions=None):
30         self.id = none_or(id, int)
31         """
32         Page ID : `int`
33         """
34
35         self.title = none_or(title, str)
36         """
37         Page title (namespace excluded) : `str`
38         """
39
40         self.namespace = none_or(namespace, int)
41         """
42         Namespace ID : `int`
43         """
44
45         self.redirect = none_or(redirect, Redirect)
46         """
47         Page is currently redirect? : :class:`~mw.xml_dump.Redirect` | `None`
48         """
49
50         self.restrictions = serializable.List.deserialize(restrictions)
51         """
52         A list of page editing restrictions (empty unless restrictions are specified) : list( `str` )
53         """
54
55         # Should be a lazy generator
56         self.__revisions = revisions or []
57
58     def __iter__(self):
59         return self.__revisions
60
61     def __next__(self):
62         return next(self.__revisions)
63
64     @classmethod
65     def load_revisions(cls, first_revision, element):
66         yield Revision.from_element(first_revision)
67
68         for sub_element in element:
69             tag = sub_element.tag
70
71             if tag == "revision":
72                 yield Revision.from_element(sub_element)
73             else:
74                 raise MalformedXML("Expected to see 'revision'.  " +
75                                    "Instead saw '{0}'".format(tag))
76
77     @classmethod
78     def from_element(cls, element):
79         title = None
80         namespace = None
81         id = None
82         redirect = None
83         restrictions = []
84
85         first_revision = None
86
87         # Consume each of the elements until we see <id> which should come last.
88         for sub_element in element:
89             tag = sub_element.tag
90             if tag == "title":
91                 title = sub_element.text
92             elif tag == "ns":
93                 namespace = sub_element.text
94             elif tag == "id":
95                 id = int(sub_element.text)
96             elif tag == "redirect":
97                 redirect = Redirect.from_element(sub_element)
98             elif tag == "restrictions":
99                 restrictions.append(sub_element.text)
100             elif tag == "DiscussionThreading":
101                 continue
102             elif tag == "sha1":
103                 continue
104             elif tag == "revision":
105                 first_revision = sub_element
106                 break
107             # Assuming that the first revision seen marks the end of page
108             # metadata.  I'm not too keen on this assumption, so I'm leaving
109             # this long comment to warn whoever ends up maintaining this.
110             else:
111                 raise MalformedXML("Unexpected tag found when processing " +
112                                    "a <page>: '{0}'".format(tag))
113
114         # Assuming that I got here by seeing a <revision> tag.  See verbose
115         # comment above.
116         revisions = cls.load_revisions(first_revision, element)
117
118         return cls(id, title, namespace, redirect, restrictions, revisions)

Community Data Science Collective || Want to submit a patch?