]> code.communitydata.science - rises_declines_wikia_code.git/blob - mediawiki_dump_tools/Mediawiki-Utilities/mw/xml_dump/iteration/text.py
Initial commit
[rises_declines_wikia_code.git] / mediawiki_dump_tools / Mediawiki-Utilities / mw / xml_dump / iteration / text.py
1 from ...types import serializable
2 from ...util import none_or
3
4
5 class Text(str, serializable.Type):
6     """
7     Revision text content.  This class behaves identically to
8     :class:`str` except that it takes and stores an additional set of parameters.
9
10     **deleted**
11         Was the text deleted? : `bool`
12     **xml_space**
13         What to do with extra whitespace? : `str`
14     **id**
15         TODO: ??? : `int` | `None`
16     **bytes**
17         TODO: ??? : `int` | `None`
18
19     >>> from mw.xml_dump import Text
20     >>>
21     >>> t = Text("foo")
22     >>> t == "foo"
23     True
24     >>> t.deleted
25     False
26     >>> t.xml_space
27     'preserve'
28     """
29
30     def __new__(cls, string_or_text="", deleted=False, xml_space="preserve", id=None, bytes=None):
31         if isinstance(string_or_text, cls):
32             return string_or_text
33         else:
34             inst = super().__new__(cls, string_or_text)
35             inst.initialize(string_or_text, deleted, xml_space, id, bytes)
36             return inst
37
38     def initialize(self, string, deleted, xml_space, id, bytes):
39         self.deleted = bool(deleted)
40         self.xml_space = none_or(xml_space, str)
41         self.id = none_or(id, int)
42         self.bytes = none_or(bytes, int)
43
44     def __str__(self):
45         return str.__str__(self)
46
47     def __repr__(self):
48         return "{0}({1})".format(
49             self.__class__.__name__,
50             ", ".join([
51                 str.__repr__(self),
52                 "deleted={0}".format(repr(self.deleted))
53             ])
54         )
55
56     def serialize(self):
57         return {
58             "string_or_text": str(self),
59             "deleted": self.deleted,
60             "xml_space": self.xml_space,
61             "id": self.id,
62             "bytes": self.bytes
63         }
64
65     @classmethod
66     def from_element(cls, e):
67         content = e.text or ""
68         return cls(
69             content,
70             deleted=e.attr('deleted', False),
71             xml_space=e.attr('xml:space'),
72             id=e.attr('id'),
73             bytes=e.attr('bytes')
74         )

Community Data Science Collective || Want to submit a patch?