import io
from nose.tools import eq_, assert_is_instance
from ....types import Timestamp
from ..iterator import Iterator
from ..comment import Comment
from ..text import Text
from ..revision import Revision
from ..page import Page
SAMPLE_XML = """
Wikipedia
http://en.wikipedia.org/wiki/Main_Page
MediaWiki 1.22wmf2
first-letter
Talk
Foo
0
1
1
2004-08-09T09:04:08Z
Gen0cide
92182
Revision 1 text
g9chqqg94myzq11c56ixvq7o1yg75n9
wikitext
text/x-wiki
2
2004-08-10T09:04:08Z
222.152.210.109
Revision 2 text
g9chqqg94myzq11c56ixvq7o1yg75n9
wikitext
Comment 2
text/x-wiki
Bar
1
2
edit=sysop:move=sysop
3
2004-08-11T09:04:08Z
222.152.210.22
Revision 3 text
g9chqqg94myzq11c56ixvq7o1yg75n9
wikitext
text/x-wiki
4
2004-08-12T09:04:08Z
6ixvq7o1yg75n9g9chqqg94myzq11c5
wikitext
text/x-wiki
"""
def test_complete():
f = io.StringIO(SAMPLE_XML)
dump = Iterator.from_file(f)
eq_([0, 1], list(ns.id for ns in dump.namespaces))
page = next(dump)
eq_(page.title, "Foo")
eq_(page.namespace, 0)
eq_(page.id, 1)
eq_(page.redirect, None)
eq_(page.restrictions, [])
revision = next(page)
eq_(revision.id, 1)
eq_(revision.timestamp, Timestamp("2004-08-09T09:04:08Z"))
eq_(revision.contributor.id, 92182)
eq_(revision.contributor.user_text, "Gen0cide")
assert_is_instance(revision.text, Text)
eq_(revision.text, "Revision 1 text")
eq_(revision.text.bytes, 234)
eq_(revision.text.id, 55)
eq_(revision.text, "Revision 1 text")
eq_(revision.sha1, "g9chqqg94myzq11c56ixvq7o1yg75n9")
eq_(revision.comment, None)
eq_(revision.model, "wikitext")
eq_(revision.format, "text/x-wiki")
eq_(revision.beginningofpage, True)
revision = next(page)
eq_(revision.id, 2)
eq_(revision.timestamp, Timestamp("2004-08-10T09:04:08Z"))
eq_(revision.contributor.id, None)
eq_(revision.contributor.user_text, "222.152.210.109")
eq_(revision.text, "Revision 2 text")
eq_(revision.text.bytes, 235)
eq_(revision.text.id, 56)
eq_(revision.sha1, "g9chqqg94myzq11c56ixvq7o1yg75n9")
assert_is_instance(revision.comment, Comment)
eq_(revision.comment, "Comment 2")
eq_(revision.model, "wikitext")
eq_(revision.format, "text/x-wiki")
eq_(revision.beginningofpage, False)
page = next(dump)
assert_is_instance(page, Page)
eq_(page.title, "Bar")
eq_(page.namespace, 1)
eq_(page.id, 2)
eq_(page.redirect.title, "Computer accessibility")
eq_(page.restrictions, ["edit=sysop:move=sysop"])
revision = next(page)
assert_is_instance(revision, Revision)
eq_(revision.id, 3)
eq_(revision.timestamp, Timestamp("2004-08-11T09:04:08Z"))
eq_(revision.contributor.id, None)
eq_(revision.contributor.user_text, "222.152.210.22")
assert_is_instance(revision.text, Text)
eq_(revision.text.bytes, 236)
eq_(revision.text.id, 57)
eq_(revision.text, "Revision 3 text")
eq_(revision.sha1, "g9chqqg94myzq11c56ixvq7o1yg75n9")
eq_(revision.comment, None)
eq_(revision.model, "wikitext")
eq_(revision.format, "text/x-wiki")
assert_is_instance(str(page), str)
revision = next(page)
assert_is_instance(revision, Revision)
eq_(revision.id, 4)
eq_(revision.timestamp, Timestamp("2004-08-12T09:04:08Z"))
eq_(revision.contributor, None)
assert_is_instance(revision.text, Text)
eq_(revision.text.bytes, 237)
eq_(revision.text.id, 58)
eq_(revision.text, "")
eq_(revision.sha1, "6ixvq7o1yg75n9g9chqqg94myzq11c5")
eq_(revision.comment, None)
eq_(revision.model, "wikitext")
eq_(revision.format, "text/x-wiki")
assert_is_instance(str(revision), str)
def test_skipping():
f = io.StringIO(SAMPLE_XML)
dump = Iterator.from_file(f)
page = next(dump)
eq_(page.title, "Foo")
eq_(page.namespace, 0)
eq_(page.id, 1)
page = next(dump)
eq_(page.title, "Bar")
eq_(page.namespace, 1)
eq_(page.id, 2)
revision = next(page)
eq_(revision.id, 3)
eq_(revision.timestamp, Timestamp("2004-08-11T09:04:08Z"))
eq_(revision.contributor.id, None)
eq_(revision.contributor.user_text, "222.152.210.22")
assert_is_instance(revision.text, Text)
eq_(revision.text, "Revision 3 text")
eq_(revision.sha1, "g9chqqg94myzq11c56ixvq7o1yg75n9")
eq_(revision.comment, None)
eq_(revision.model, "wikitext")
eq_(revision.format, "text/x-wiki")
def test_serialization():
f = io.StringIO(SAMPLE_XML)
dump = Iterator.from_file(f)
eq_(dump, Iterator.deserialize(dump.serialize()))
def test_from_page_xml():
page_xml = """
Foo
0
1
1
2004-08-09T09:04:08Z
Gen0cide
92182
Revision 1 text
g9chqqg94myzq11c56ixvq7o1yg75n9
wikitext
text/x-wiki
2
2004-08-10T09:04:08Z
222.152.210.109
Revision 2 text
g9chqqg94myzq11c56ixvq7o1yg75n9
wikitext
Comment 2
text/x-wiki
"""
dump = Iterator.from_page_xml(io.StringIO(page_xml))
# You have a `namespaces`, but it's empty.
eq_(dump.namespaces, [])
page = next(dump)
eq_(page.title, "Foo")
eq_(page.namespace, 0)
eq_(page.id, 1)
revision = next(page)
eq_(revision.id, 1)
eq_(revision.timestamp, Timestamp("2004-08-09T09:04:08Z"))
eq_(revision.contributor.id, 92182)
eq_(revision.contributor.user_text, "Gen0cide")
eq_(revision.text, "Revision 1 text")
eq_(revision.sha1, "g9chqqg94myzq11c56ixvq7o1yg75n9")
eq_(revision.comment, None)
eq_(revision.model, "wikitext")
eq_(revision.format, "text/x-wiki")
revision = next(page)
eq_(revision.id, 2)
eq_(revision.timestamp, Timestamp("2004-08-10T09:04:08Z"))
eq_(revision.contributor.id, None)
eq_(revision.contributor.user_text, "222.152.210.109")
eq_(revision.text, "Revision 2 text")
eq_(revision.sha1, "g9chqqg94myzq11c56ixvq7o1yg75n9")
eq_(revision.comment, "Comment 2")
eq_(revision.model, "wikitext")
eq_(revision.format, "text/x-wiki")