]> code.communitydata.science - rises_declines_wikia_code.git/blob - mediawiki_dump_tools/Mediawiki-Utilities/mw/xml_dump/tests/test_map.py
Initial commit
[rises_declines_wikia_code.git] / mediawiki_dump_tools / Mediawiki-Utilities / mw / xml_dump / tests / test_map.py
1 import io
2
3 from nose.tools import eq_, raises
4
5 from ..map import map
6
7
8 SAMPLE_XML = """
9 <mediawiki xmlns="http://www.mediawiki.org/xml/export-0.8/"
10            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
11            xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.8/
12            http://www.mediawiki.org/xml/export-0.8.xsd"
13            version="0.8" xml:lang="en">
14   <siteinfo>
15     <sitename>Wikipedia</sitename>
16     <base>http://en.wikipedia.org/wiki/Main_Page</base>
17     <generator>MediaWiki 1.22wmf2</generator>
18     <case>first-letter</case>
19     <namespaces>
20       <namespace key="0" case="first-letter" />
21       <namespace key="1" case="first-letter">Talk</namespace>
22     </namespaces>
23   </siteinfo>
24   <page>
25     <title>Foo</title>
26     <ns>0</ns>
27     <id>1</id>
28     <revision>
29       <id>1</id>
30       <timestamp>2004-08-09T09:04:08Z</timestamp>
31       <contributor>
32         <username>Gen0cide</username>
33         <id>92182</id>
34       </contributor>
35       <text xml:space="preserve">Revision 1 text</text>
36       <sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
37       <model>wikitext</model>
38       <format>text/x-wiki</format>
39     </revision>
40     <revision>
41       <id>2</id>
42       <timestamp>2004-08-10T09:04:08Z</timestamp>
43       <contributor>
44         <ip>222.152.210.109</ip>
45       </contributor>
46       <text xml:space="preserve">Revision 2 text</text>
47       <sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
48       <model>wikitext</model>
49       <comment>Comment 2</comment>
50       <format>text/x-wiki</format>
51     </revision>
52   </page>
53   <page>
54     <title>Bar</title>
55     <ns>1</ns>
56     <id>2</id>
57     <revision>
58       <id>3</id>
59       <timestamp>2004-08-11T09:04:08Z</timestamp>
60       <contributor>
61         <ip>222.152.210.22</ip>
62       </contributor>
63       <text xml:space="preserve">Revision 3 text</text>
64       <sha1>g9chqqg94myzq11c56ixvq7o1yg75n9</sha1>
65       <model>wikitext</model>
66       <format>text/x-wiki</format>
67     </revision>
68   </page>
69 </mediawiki>"""
70
71
72 def test_map():
73     f = io.StringIO(SAMPLE_XML)
74
75     def process_dump(dump, path):
76         for page in dump:
77             count = 0
78             for rev in page:
79                 count += 1
80
81             yield {'page_id': page.id, 'revisions': count}
82
83     pages = 0
84     for doc in map([f], process_dump):
85         page_id = doc['page_id']
86         revisions = doc['revisions']
87         if page_id == 1:
88             eq_(revisions, 2)
89         elif page_id == 2:
90             eq_(revisions, 1)
91         else:
92             assert False
93
94         pages += 1
95
96     eq_(pages, 2)
97
98
99 def test_dict_yield():
100     def test_map():
101         f = io.StringIO(SAMPLE_XML)
102
103         def process_dump(dump, path):
104             for page in dump:
105                 count = 0
106                 for rev in page:
107                     count += 1
108
109                 yield {'page_id': page.id, 'revisions': count}
110
111         pages = 0
112         for doc in map([f], process_dump):
113             page_id = doc['page_id']
114             revisions = doc['revisions']
115             if page_id == 1:
116                 eq_(revisions, 2)
117             elif page_id == 2:
118                 eq_(revisions, 1)
119             else:
120                 assert False
121
122             pages += 1
123
124         eq_(pages, 2)
125
126
127 @raises(TypeError)
128 def test_map_error():
129     f = io.StringIO(SAMPLE_XML)
130
131     def process_dump(dump, path):
132         for page in dump:
133
134             if page.id == 2:
135                 raise TypeError("Fake error")
136
137     pages = 0
138     for doc in map([f], process_dump):
139         page_id = doc['page_id']
140
141
142 def test_map_error_handler():
143     f = io.StringIO(SAMPLE_XML)
144
145     def process_dump(dump, path, handle_error=lambda exp, stack: None):
146         for page in dump:
147             count = 0
148
149             for rev in page:
150                 count += 1
151
152             if count > 2:
153                 raise TypeError("Fake type error.")
154
155             yield {'page_id': page.id, 'revisions': count}
156
157     pages = 0
158     for doc in map([f], process_dump):
159         page_id = doc['page_id']
160         revisions = doc['revisions']
161         if page_id == 1:
162             eq_(revisions, 2)
163         elif page_id == 2:
164             eq_(revisions, 1)
165         else:
166             assert False
167
168         pages += 1
169
170     eq_(pages, 2)

Community Data Science Collective || Want to submit a patch?