1 # generate a list of wikidata entities related to keywords
4 from wikidata_api_calls import search_wikidata, get_wikidata_api
7 class Wikidata_ResultSet:
11 def extend(self, term, results):
12 self.results.extend([Wikidata_Result(term, result, i)
13 for i, result in enumerate(results)])
15 def to_csv(self, outfile=None):
20 of = open(outfile,'w',newline='')
22 writer = csv.writer(of)
23 writer.writerow(Wikidata_Result.__slots__)
24 writer.writerows(map(Wikidata_Result.to_list, self.results))
27 class Wikidata_Result:
28 # store unique entities found in the search results, the position in the search result, and the date
29 __slots__=['search_term','entityid','pageid','search_position','timestamp']
36 self.search_term = term.strip()
37 self.entityid = search_result['title']
38 self.pageid = int(search_result['pageid'])
39 self.search_position = int(position)
40 self.timestamp = search_result['timestamp']
43 return [self.search_term,
49 def run_wikidata_searches(terms_file = '../data/input/base_terms.txt', outfile="../data/output/wikidata_search_results.csv"):
51 resultset = Wikidata_ResultSet()
52 for term in open(terms_file,'r'):
53 api = get_wikidata_api()
54 search_results = search_wikidata(api, term)
55 resultset.extend(term, search_results)
57 resultset.to_csv(outfile)
60 ## search each of the base terms in wikidata
62 # store unique entities found in the search results, the position in the search result, and the date
64 if __name__ == "__main__":
65 run_wikidata_searches()