]> code.communitydata.science - rises_declines_wikia_code.git/blob - mediawiki_dump_tools/Mediawiki-Utilities/mw/xml_dump/tests/test_processor.py
Initial commit
[rises_declines_wikia_code.git] / mediawiki_dump_tools / Mediawiki-Utilities / mw / xml_dump / tests / test_processor.py
1 import io
2 from multiprocessing import Queue
3
4 from nose.tools import eq_, raises
5
6 from ..processor import DONE, Processor
7
8
9 SAMPLE_XML = """
10 <mediawiki xmlns="http://www.mediawiki.org/xml/export-0.8/"
11            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
12            xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.8/
13            http://www.mediawiki.org/xml/export-0.8.xsd"
14            version="0.8" xml:lang="en">
15   <siteinfo>
16     <sitename>Wikipedia</sitename>
17     <base>http://en.wikipedia.org/wiki/Main_Page</base>
18     <generator>MediaWiki 1.22wmf2</generator>
19     <case>first-letter</case>
20     <namespaces>
21       <namespace key="0" case="first-letter" />
22       <namespace key="1" case="first-letter">Talk</namespace>
23     </namespaces>
24   </siteinfo>
25   <page>
26     <title>Foo</title>
27     <ns>0</ns>
28     <id>1</id>
29     <revision>
30       <id>1</id>
31       <timestamp>2004-08-09T09:04:08Z</timestamp>
32       <contributor>
33         <username>Gen0cide</username>
34         <id>92182</id>
35       </contributor>
36       <text xml:space="preserve">Revision 1 text</text>
37       <sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
38       <model>wikitext</model>
39       <format>text/x-wiki</format>
40     </revision>
41     <revision>
42       <id>2</id>
43       <timestamp>2004-08-10T09:04:08Z</timestamp>
44       <contributor>
45         <ip>222.152.210.109</ip>
46       </contributor>
47       <text xml:space="preserve">Revision 2 text</text>
48       <sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
49       <model>wikitext</model>
50       <comment>Comment 2</comment>
51       <format>text/x-wiki</format>
52     </revision>
53   </page>
54   <page>
55     <title>Bar</title>
56     <ns>1</ns>
57     <id>2</id>
58     <revision>
59       <id>3</id>
60       <timestamp>2004-08-11T09:04:08Z</timestamp>
61       <contributor>
62         <ip>222.152.210.22</ip>
63       </contributor>
64       <text xml:space="preserve">Revision 3 text</text>
65       <sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
66       <model>wikitext</model>
67       <format>text/x-wiki</format>
68     </revision>
69   </page>
70 </mediawiki>"""
71
72
73
74 def test_processor():
75
76     pathq = Queue()
77     pathq.put(io.StringIO(SAMPLE_XML))
78
79     outputq = Queue()
80
81     def process_dump(dump, path):
82         for page in dump:
83             yield page.id
84
85
86     processor = Processor(pathq, outputq, process_dump)
87     processor.start()
88
89     error, item = outputq.get()
90     assert not error
91     eq_(item, 1)
92
93     error, item = outputq.get()
94     assert not error
95     eq_(item, 2)
96
97     error, item = outputq.get()
98     assert not error
99     eq_(item, DONE)
100
101 def test_processor_error():
102
103     pathq = Queue()
104     pathq.put(io.StringIO(SAMPLE_XML))
105
106     outputq = Queue()
107
108     def process_dump(dump, path):
109         raise Exception("foo")
110
111
112     processor = Processor(pathq, outputq, process_dump)
113     processor.start()
114
115     error, item = outputq.get()
116     assert error
117
118     error, item = outputq.get()
119     assert not error
120     eq_(item, DONE)

Community Data Science Collective || Want to submit a patch?