import unittest import os import sys from shutil import copyfile # with / without pwr DONE # with / without url encode DONE # with / without collapse user DONE # with output to sdtout # note that the persistence radius is 7 by default # reading various file formats including # 7z, gz, bz2, xml # wikia and wikipedia data # malformed xmls class Test_Basic(unittest.TestCase): def setUp(self): if not os.path.exists("test_output"): os.mkdir("test_output") self.wiki = 'sailormoon' self.wikiq_out_name = self.wiki + ".tsv" self.test_output_dir = os.path.join(".", "test_output") self.call_output = os.path.join(self.test_output_dir, self.wikiq_out_name) self.infile = "{0}.xml.7z".format(self.wiki) self.base_call = "../wikiq {0} -o {1}" self.input_dir = "dumps" self.input_file = os.path.join(".", self.input_dir,self.infile) self.baseline_output_dir = "baseline_output" def test_noargs(self): call = self.base_call.format(self.input_file, self.test_output_dir) os.system(call) test_file = "noargs_" + self.wikiq_out_name copyfile(self.call_output, os.path.join(self.test_output_dir, test_file)) baseline_file = os.path.join(".", self.baseline_output_dir, test_file) test_lines = open(os.path.join(self.test_output_dir,test_file)) baseline_lines = open(baseline_file) for test, baseline in zip(test_lines, baseline_lines): self.assertEqual(test,baseline) test_lines.close() baseline_lines.close() def test_collapse_user(self): call = self.base_call.format(self.input_file, self.test_output_dir) os.system(call) call = call + " --collapse-user" os.system(call) test_file = "collapse-user_" + self.wikiq_out_name copyfile(self.call_output, os.path.join(self.test_output_dir, test_file)) baseline_file = os.path.join(".", self.baseline_output_dir, test_file) test_lines = open(os.path.join(self.test_output_dir,test_file)) baseline_lines = open(baseline_file) for test, baseline in zip(test_lines, baseline_lines): self.assertEqual(test,baseline) test_lines.close() baseline_lines.close() def test_pwr(self): call = self.base_call.format(self.input_file, self.test_output_dir) call = call + " --persistence" os.system(call) test_file = "persistence_" + self.wikiq_out_name copyfile(self.call_output, os.path.join(self.test_output_dir, test_file)) baseline_file = os.path.join(".", self.baseline_output_dir, test_file) test_lines = open(os.path.join(self.test_output_dir,test_file)) baseline_lines = open(baseline_file) for test, baseline in zip(test_lines, baseline_lines): self.assertEqual(test,baseline) test_lines.close() baseline_lines.close() def test_url_encode(self): call = self.base_call.format(self.input_file, self.test_output_dir) call = call + " --url-encode" os.system(call) test_file = "url-encode_" + self.wikiq_out_name copyfile(self.call_output, os.path.join(self.test_output_dir, test_file)) baseline_file = os.path.join(".", self.baseline_output_dir, test_file) test_lines = open(os.path.join(self.test_output_dir,test_file)) baseline_lines = open(baseline_file) for test, baseline in zip(test_lines, baseline_lines): self.assertEqual(test,baseline) test_lines.close() baseline_lines.close() if __name__ == '__main__': unittest.main()