]> code.communitydata.science - mediawiki_dump_tools.git/blob - test/Wikiq_Unit_Test.py
create baseline tests for xml dump processing
[mediawiki_dump_tools.git] / test / Wikiq_Unit_Test.py
1 import unittest
2 import os
3 import sys
4 from shutil import copyfile
5
6 # with / without pwr DONE
7 # with / without url encode DONE
8 # with / without collapse user DONE
9 # with output to sdtout
10 # note that the persistence radius is 7 by default
11 # reading various file formats including
12 #        7z, gz, bz2, xml 
13 # wikia and wikipedia data
14 # malformed xmls
15
16 class Test_Basic(unittest.TestCase):
17
18     def setUp(self):
19         if not os.path.exists("test_output"):
20             os.mkdir("test_output")
21
22         self.wiki = 'sailormoon'
23         self.wikiq_out_name =  self.wiki + ".tsv"
24         self.test_output_dir = os.path.join(".", "test_output")
25         self.call_output = os.path.join(self.test_output_dir, self.wikiq_out_name)
26
27         self.infile = "{0}.xml.7z".format(self.wiki)
28         self.base_call = "../wikiq {0} -o {1}"
29         self.input_dir = "dumps"
30         self.input_file = os.path.join(".", self.input_dir,self.infile)
31         self.baseline_output_dir = "baseline_output"
32
33     def test_noargs(self):
34
35         call = self.base_call.format(self.input_file, self.test_output_dir)
36         os.system(call)
37
38         test_file =  "noargs_" + self.wikiq_out_name
39         copyfile(self.call_output, os.path.join(self.test_output_dir, test_file))
40
41         baseline_file = os.path.join(".", self.baseline_output_dir, test_file)
42
43         test_lines = open(os.path.join(self.test_output_dir,test_file))
44         baseline_lines = open(baseline_file)
45         for test, baseline in zip(test_lines, baseline_lines):
46             self.assertEqual(test,baseline)
47
48         test_lines.close()
49         baseline_lines.close()
50
51     def test_collapse_user(self):
52         call = self.base_call.format(self.input_file, self.test_output_dir)
53         os.system(call)
54         call = call + " --collapse-user"
55
56         os.system(call)
57
58         test_file =  "collapse-user_" + self.wikiq_out_name
59         copyfile(self.call_output, os.path.join(self.test_output_dir, test_file))
60
61         baseline_file = os.path.join(".", self.baseline_output_dir, test_file)
62
63         test_lines = open(os.path.join(self.test_output_dir,test_file))
64         baseline_lines = open(baseline_file)
65         for test, baseline in zip(test_lines, baseline_lines):
66             self.assertEqual(test,baseline)
67
68         test_lines.close()
69         baseline_lines.close()
70
71     def test_pwr(self):
72         call = self.base_call.format(self.input_file, self.test_output_dir)
73         call = call + " --persistence"
74         os.system(call)
75
76         test_file =  "persistence_" + self.wikiq_out_name
77         copyfile(self.call_output, os.path.join(self.test_output_dir, test_file))
78
79         baseline_file = os.path.join(".", self.baseline_output_dir, test_file)
80
81         test_lines = open(os.path.join(self.test_output_dir,test_file))
82         baseline_lines = open(baseline_file)
83         for test, baseline in zip(test_lines, baseline_lines):
84             self.assertEqual(test,baseline)
85
86         test_lines.close()
87         baseline_lines.close()
88
89     def test_url_encode(self):
90         call = self.base_call.format(self.input_file, self.test_output_dir)
91         call = call + " --url-encode"
92         os.system(call)
93         test_file =  "url-encode_" + self.wikiq_out_name
94         copyfile(self.call_output, os.path.join(self.test_output_dir, test_file))
95         baseline_file = os.path.join(".", self.baseline_output_dir, test_file)
96
97         test_lines = open(os.path.join(self.test_output_dir,test_file))
98         baseline_lines = open(baseline_file)
99         for test, baseline in zip(test_lines, baseline_lines):
100             self.assertEqual(test,baseline)
101
102         test_lines.close()
103         baseline_lines.close()
104
105
106         
107 if __name__ == '__main__':
108     unittest.main()

Community Data Science Collective || Want to submit a patch?