3 from nose.tools import eq_, assert_is_instance
5 from ....types import Timestamp
6 from ..iterator import Iterator
7 from ..comment import Comment
8 from ..text import Text
9 from ..revision import Revision
10 from ..page import Page
14 <mediawiki xmlns="http://www.mediawiki.org/xml/export-0.8/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http
15 ://www.mediawiki.org/xml/export-0.8/ http://www.mediawiki.org/xml/export-0.8.xsd" version="0.8" xml:lang="en">
17 <sitename>Wikipedia</sitename>
18 <base>http://en.wikipedia.org/wiki/Main_Page</base>
19 <generator>MediaWiki 1.22wmf2</generator>
20 <case>first-letter</case>
22 <namespace key="0" case="first-letter" />
23 <namespace key="1" case="first-letter">Talk</namespace>
30 <revision beginningofpage="true">
32 <timestamp>2004-08-09T09:04:08Z</timestamp>
34 <username>Gen0cide</username>
37 <text xml:space="preserve" bytes="234" id="55">Revision 1 text</text>
38 <sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
39 <model>wikitext</model>
40 <format>text/x-wiki</format>
44 <timestamp>2004-08-10T09:04:08Z</timestamp>
46 <ip>222.152.210.109</ip>
48 <text xml:space="preserve" bytes="235" id="56">Revision 2 text</text>
49 <sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
50 <model>wikitext</model>
51 <comment>Comment 2</comment>
52 <format>text/x-wiki</format>
59 <redirect title="Computer accessibility" />
60 <restrictions>edit=sysop:move=sysop</restrictions>
61 <revision beginningofpage="true">
63 <timestamp>2004-08-11T09:04:08Z</timestamp>
65 <ip>222.152.210.22</ip>
67 <text xml:space="preserve" bytes="236" id="57">Revision 3 text</text>
68 <sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
69 <model>wikitext</model>
70 <format>text/x-wiki</format>
74 <timestamp>2004-08-12T09:04:08Z</timestamp>
75 <text id="58" bytes="237" />
76 <sha1>6ixvq7o1yg75n9g9chqqg94myzq11c5</sha1>
77 <model>wikitext</model>
78 <format>text/x-wiki</format>
85 f = io.StringIO(SAMPLE_XML)
87 dump = Iterator.from_file(f)
88 eq_([0, 1], list(ns.id for ns in dump.namespaces))
91 eq_(page.title, "Foo")
92 eq_(page.namespace, 0)
94 eq_(page.redirect, None)
95 eq_(page.restrictions, [])
99 eq_(revision.timestamp, Timestamp("2004-08-09T09:04:08Z"))
100 eq_(revision.contributor.id, 92182)
101 eq_(revision.contributor.user_text, "Gen0cide")
102 assert_is_instance(revision.text, Text)
103 eq_(revision.text, "Revision 1 text")
104 eq_(revision.text.bytes, 234)
105 eq_(revision.text.id, 55)
106 eq_(revision.text, "Revision 1 text")
107 eq_(revision.sha1, "g9chqqg94myzq11c56ixvq7o1yg75n9")
108 eq_(revision.comment, None)
109 eq_(revision.model, "wikitext")
110 eq_(revision.format, "text/x-wiki")
111 eq_(revision.beginningofpage, True)
113 revision = next(page)
115 eq_(revision.timestamp, Timestamp("2004-08-10T09:04:08Z"))
116 eq_(revision.contributor.id, None)
117 eq_(revision.contributor.user_text, "222.152.210.109")
118 eq_(revision.text, "Revision 2 text")
119 eq_(revision.text.bytes, 235)
120 eq_(revision.text.id, 56)
121 eq_(revision.sha1, "g9chqqg94myzq11c56ixvq7o1yg75n9")
122 assert_is_instance(revision.comment, Comment)
123 eq_(revision.comment, "Comment 2")
124 eq_(revision.model, "wikitext")
125 eq_(revision.format, "text/x-wiki")
126 eq_(revision.beginningofpage, False)
129 assert_is_instance(page, Page)
130 eq_(page.title, "Bar")
131 eq_(page.namespace, 1)
133 eq_(page.redirect.title, "Computer accessibility")
134 eq_(page.restrictions, ["edit=sysop:move=sysop"])
136 revision = next(page)
137 assert_is_instance(revision, Revision)
139 eq_(revision.timestamp, Timestamp("2004-08-11T09:04:08Z"))
140 eq_(revision.contributor.id, None)
141 eq_(revision.contributor.user_text, "222.152.210.22")
142 assert_is_instance(revision.text, Text)
143 eq_(revision.text.bytes, 236)
144 eq_(revision.text.id, 57)
145 eq_(revision.text, "Revision 3 text")
146 eq_(revision.sha1, "g9chqqg94myzq11c56ixvq7o1yg75n9")
147 eq_(revision.comment, None)
148 eq_(revision.model, "wikitext")
149 eq_(revision.format, "text/x-wiki")
150 assert_is_instance(str(page), str)
152 revision = next(page)
153 assert_is_instance(revision, Revision)
155 eq_(revision.timestamp, Timestamp("2004-08-12T09:04:08Z"))
156 eq_(revision.contributor, None)
157 assert_is_instance(revision.text, Text)
158 eq_(revision.text.bytes, 237)
159 eq_(revision.text.id, 58)
160 eq_(revision.text, "")
161 eq_(revision.sha1, "6ixvq7o1yg75n9g9chqqg94myzq11c5")
162 eq_(revision.comment, None)
163 eq_(revision.model, "wikitext")
164 eq_(revision.format, "text/x-wiki")
165 assert_is_instance(str(revision), str)
169 f = io.StringIO(SAMPLE_XML)
171 dump = Iterator.from_file(f)
174 eq_(page.title, "Foo")
175 eq_(page.namespace, 0)
179 eq_(page.title, "Bar")
180 eq_(page.namespace, 1)
183 revision = next(page)
185 eq_(revision.timestamp, Timestamp("2004-08-11T09:04:08Z"))
186 eq_(revision.contributor.id, None)
187 eq_(revision.contributor.user_text, "222.152.210.22")
188 assert_is_instance(revision.text, Text)
189 eq_(revision.text, "Revision 3 text")
190 eq_(revision.sha1, "g9chqqg94myzq11c56ixvq7o1yg75n9")
191 eq_(revision.comment, None)
192 eq_(revision.model, "wikitext")
193 eq_(revision.format, "text/x-wiki")
196 def test_serialization():
197 f = io.StringIO(SAMPLE_XML)
199 dump = Iterator.from_file(f)
201 eq_(dump, Iterator.deserialize(dump.serialize()))
203 def test_from_page_xml():
211 <timestamp>2004-08-09T09:04:08Z</timestamp>
213 <username>Gen0cide</username>
216 <text xml:space="preserve">Revision 1 text</text>
217 <sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
218 <model>wikitext</model>
219 <format>text/x-wiki</format>
223 <timestamp>2004-08-10T09:04:08Z</timestamp>
225 <ip>222.152.210.109</ip>
227 <text xml:space="preserve">Revision 2 text</text>
228 <sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
229 <model>wikitext</model>
230 <comment>Comment 2</comment>
231 <format>text/x-wiki</format>
236 dump = Iterator.from_page_xml(io.StringIO(page_xml))
238 # You have a `namespaces`, but it's empty.
239 eq_(dump.namespaces, [])
242 eq_(page.title, "Foo")
243 eq_(page.namespace, 0)
246 revision = next(page)
248 eq_(revision.timestamp, Timestamp("2004-08-09T09:04:08Z"))
249 eq_(revision.contributor.id, 92182)
250 eq_(revision.contributor.user_text, "Gen0cide")
251 eq_(revision.text, "Revision 1 text")
252 eq_(revision.sha1, "g9chqqg94myzq11c56ixvq7o1yg75n9")
253 eq_(revision.comment, None)
254 eq_(revision.model, "wikitext")
255 eq_(revision.format, "text/x-wiki")
257 revision = next(page)
259 eq_(revision.timestamp, Timestamp("2004-08-10T09:04:08Z"))
260 eq_(revision.contributor.id, None)
261 eq_(revision.contributor.user_text, "222.152.210.109")
262 eq_(revision.text, "Revision 2 text")
263 eq_(revision.sha1, "g9chqqg94myzq11c56ixvq7o1yg75n9")
264 eq_(revision.comment, "Comment 2")
265 eq_(revision.model, "wikitext")
266 eq_(revision.format, "text/x-wiki")