]> code.communitydata.science - covid19.git/blob - transliterations/src/wikidata_search.py
Merge pull request #2 from aaronshaw/patch-1
[covid19.git] / transliterations / src / wikidata_search.py
1 # generate a list of wikidata entities related to keywords
2 from os import path
3 from sys import stdout
4 from wikidata_api_calls import search_wikidata, get_wikidata_api
5 import csv
6
7 class Wikidata_ResultSet:
8     def __init__(self):
9         self.results = []
10
11     def extend(self, term, results):
12         self.results.extend([Wikidata_Result(term, result, i)
13                                     for i, result in enumerate(results)])
14
15     def to_csv(self, outfile=None):
16         if outfile is None:
17             of = stdout
18
19         else:
20             of = open(outfile,'w',newline='')
21
22         writer = csv.writer(of)
23         writer.writerow(Wikidata_Result.__slots__)
24         writer.writerows(map(Wikidata_Result.to_list, self.results))
25
26
27 class Wikidata_Result:
28     # store unique entities found in the search results, the position in the search result, and the date
29     __slots__=['search_term','entityid','pageid','search_position','timestamp']
30
31     def __init__(self,
32                  term,
33                  search_result,
34                  position):
35
36         self.search_term = term.strip()
37         self.entityid = search_result['title']
38         self.pageid = int(search_result['pageid'])
39         self.search_position = int(position)
40         self.timestamp = search_result['timestamp']
41
42     def to_list(self):
43         return [self.search_term,
44                 self.entityid,
45                 self.pageid,
46                 self.search_position,
47                 self.timestamp]
48     
49 def run_wikidata_searches(terms_file = '../data/input/base_terms.txt', outfile="../data/output/wikidata_search_results.csv"):
50
51     resultset = Wikidata_ResultSet()
52     for term in open(terms_file,'r'):
53         api = get_wikidata_api()
54         search_results = search_wikidata(api, term)
55         resultset.extend(term, search_results)
56
57     resultset.to_csv(outfile)
58
59
60     ## search each of the base terms in wikidata
61
62     # store unique entities found in the search results, the position in the search result, and the date
63
64 if __name__ == "__main__":
65     run_wikidata_searches()

Community Data Science Collective || Want to submit a patch?