5 from shutil import copyfile
8 # with / without pwr DONE
9 # with / without url encode DONE
10 # with / without collapse user DONE
11 # with output to sdtout DONE
12 # note that the persistence radius is 7 by default
13 # reading various file formats including
14 # 7z, gz, bz2, xml DONE
15 # wikia and wikipedia data DONE
18 class Test_Wikipedia(unittest.TestCase):
20 if not os.path.exists("test_output"):
21 os.mkdir("test_output")
23 self.wiki = 'ikwiki-20180301-pages-meta-history'
24 self.wikiq_out_name = self.wiki + ".tsv"
25 self.test_output_dir = os.path.join(".", "test_output")
26 self.call_output = os.path.join(self.test_output_dir, self.wikiq_out_name)
28 self.infile = "{0}.xml.bz2".format(self.wiki)
29 self.base_call = "../wikiq {0} -o {1}"
30 self.input_dir = "dumps"
31 self.input_file = os.path.join(".", self.input_dir,self.infile)
32 self.baseline_output_dir = "baseline_output"
34 def test_WP_url_encode(self):
35 test_filename = "url-encode_" + self.wikiq_out_name
36 test_file = os.path.join(self.test_output_dir, test_filename)
37 if os.path.exists(test_file):
40 call = self.base_call.format(self.input_file, self.test_output_dir)
41 call = call + " --url-encode"
42 proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True)
45 copyfile(self.call_output, test_file)
46 baseline_file = os.path.join(".", self.baseline_output_dir, test_filename)
48 test_lines = open(test_file)
49 baseline_lines = open(baseline_file)
50 for test, baseline in zip(test_lines, baseline_lines):
51 self.assertEqual(test,baseline)
54 baseline_lines.close()
57 class Test_Basic(unittest.TestCase):
60 if not os.path.exists("test_output"):
61 os.mkdir("test_output")
63 self.wiki = 'sailormoon'
64 self.wikiq_out_name = self.wiki + ".tsv"
65 self.test_output_dir = os.path.join(".", "test_output")
66 self.call_output = os.path.join(self.test_output_dir, self.wikiq_out_name)
68 self.infile = "{0}.xml.7z".format(self.wiki)
69 self.base_call = "../wikiq {0} -o {1}"
70 self.input_dir = "dumps"
71 self.input_file = os.path.join(".", self.input_dir,self.infile)
72 self.baseline_output_dir = "baseline_output"
74 def test_noargs(self):
76 test_filename = "noargs_" + self.wikiq_out_name
77 test_file = os.path.join(self.test_output_dir, test_filename)
78 if os.path.exists(test_file):
81 call = self.base_call.format(self.input_file, self.test_output_dir)
82 proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True)
85 copyfile(self.call_output, test_file)
87 baseline_file = os.path.join(".", self.baseline_output_dir, test_filename)
89 test_lines = open(test_file)
90 baseline_lines = open(baseline_file)
91 for test, baseline in zip(test_lines, baseline_lines):
92 self.assertEqual(test, baseline)
95 baseline_lines.close()
97 def test_collapse_user(self):
98 test_filename = "collapse-user_" + self.wikiq_out_name
99 test_file = os.path.join(self.test_output_dir, test_filename)
100 if os.path.exists(test_file):
103 call = self.base_call.format(self.input_file, self.test_output_dir)
104 call = call + " --collapse-user"
106 proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True)
109 copyfile(self.call_output, test_file)
111 baseline_file = os.path.join(".", self.baseline_output_dir, test_filename)
113 test_lines = open(test_file)
114 baseline_lines = open(baseline_file)
115 for test, baseline in zip(test_lines, baseline_lines):
116 self.assertEqual(test,baseline)
119 baseline_lines.close()
121 def test_pwr_legacy(self):
122 test_filename = "persistence_legacy_" + self.wikiq_out_name
123 test_file = os.path.join(self.test_output_dir, test_filename)
124 if os.path.exists(test_file):
127 call = self.base_call.format(self.input_file, self.test_output_dir)
128 call = call + " --persistence-legacy"
129 proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True)
133 copyfile(self.call_output, test_file)
135 baseline_file = os.path.join(".", self.baseline_output_dir, test_filename)
137 test_lines = open(test_file)
138 baseline_lines = open(baseline_file)
139 for test, baseline in zip(test_lines, baseline_lines):
140 self.assertEqual(test,baseline)
143 baseline_lines.close()
145 def test_url_encode(self):
146 test_filename = "url-encode_" + self.wikiq_out_name
148 test_file = os.path.join(self.test_output_dir, test_filename)
149 if os.path.exists(test_file):
152 call = self.base_call.format(self.input_file, self.test_output_dir)
153 call = call + " --url-encode"
154 proc = subprocess.Popen(call,stdout=subprocess.PIPE,shell=True)
157 copyfile(self.call_output, test_file)
158 baseline_file = os.path.join(".", self.baseline_output_dir, test_filename)
160 test_lines = open(test_file)
161 baseline_lines = open(baseline_file)
162 for test, baseline in zip(test_lines, baseline_lines):
163 self.assertEqual(test,baseline)
166 baseline_lines.close()
168 class Test_Malformed(unittest.TestCase):
171 if not os.path.exists("test_output"):
172 os.mkdir("test_output")
174 self.wiki = 'twinpeaks'
175 self.wikiq_out_name = self.wiki + ".tsv"
176 self.test_output_dir = os.path.join(".", "test_output")
177 self.call_output = os.path.join(self.test_output_dir, self.wikiq_out_name)
179 self.infile = "{0}.xml.7z".format(self.wiki)
180 self.base_call = "../wikiq {0} -o {1}"
181 self.input_dir = "dumps"
182 self.input_file = os.path.join(".", self.input_dir,self.infile)
185 def test_malformed_noargs(self):
187 call = self.base_call.format(self.input_file, self.test_output_dir)
188 proc = subprocess.Popen(call,stdout=subprocess.PIPE,stderr=subprocess.PIPE, shell=True)
190 outs, errs = proc.communicate()
191 errlines = str(errs).split("\\n")
192 self.assertEqual(errlines[-2],'xml.etree.ElementTree.ParseError: no element found: line 1369, column 0')
194 class Test_Stdout(unittest.TestCase):
197 self.wiki = 'sailormoon'
198 self.wikiq_out_name = self.wiki + ".tsv"
200 self.infile = "{0}.xml.7z".format(self.wiki)
201 self.base_call = "../wikiq {0} --stdout"
202 self.input_dir = "dumps"
203 self.input_file = os.path.join(".", self.input_dir,self.infile)
204 self.baseline_output_dir = "baseline_output"
206 def test_noargs(self):
208 call = self.base_call.format(self.input_file)
209 proc = subprocess.run(call,stdout=subprocess.PIPE,shell=True)
210 outs = proc.stdout.decode('utf-8')
212 test_file = "noargs_" + self.wikiq_out_name
213 baseline_file = os.path.join(".", self.baseline_output_dir, test_file)
215 test_lines = outs.splitlines(True)
216 baseline_lines = open(baseline_file)
217 for test, baseline in zip(test_lines, baseline_lines):
218 self.assertEqual(test,baseline)
220 def test_persistence(self):
222 call = self.base_call.format(self.input_file) + " --persistence"
223 proc = subprocess.run(call,stdout=subprocess.PIPE,shell=True)
224 outs = proc.stdout.decode('utf-8')
226 test_file = "persistence_" + self.wikiq_out_name
227 baseline_file = os.path.join(".", self.baseline_output_dir, test_file)
229 test_lines = outs.splitlines(True)
230 baseline_lines = open(baseline_file)
231 for test, baseline in zip(test_lines, baseline_lines):
232 self.assertEqual(test,baseline)
234 # test_file = "noargs_" + self.wikiq_out_name
235 # copyfile(self.call_output, os.path.join(self.test_output_dir, test_file))
237 # baseline_file = os.path.join(".", self.baseline_output_dir, test_file)
239 # test_lines = open(os.path.join(self.test_output_dir,test_file))
240 # baseline_lines = open(baseline_file)
241 # for test, baseline in zip(test_lines, baseline_lines):
242 # self.assertEqual(test,baseline)
244 # baseline_lines.close()
247 if __name__ == '__main__':