1 # generate a list of wikidata entities related to keywords
4 from wikidata_api_calls import search_wikidata, get_wikidata_api
6 class Wikidata_ResultSet(object):
10 def extend(self, term, results):
11 self.results.extend([Wikidata_Result(term, result, i)
12 for i, result in enumerate(results)])
14 def to_csv(self, outfile=None):
16 header = ','.join(['search_term', 'entityid', 'pageid', 'search_position','timestamp']) + '\n'
21 of = open(outfile,'w')
24 for result in self.results:
25 of.write(result.to_csv())
30 class Wikidata_Result(object):
31 # store unique entities found in the search results, the position in the search result, and the date
32 __slots__=['search_term','entityid','pageid','search_position','timestamp']
39 self.search_term = term.strip()
40 self.entityid = search_result['title']
41 self.pageid = search_result['pageid']
42 self.search_position = position
43 self.timestamp = search_result['timestamp']
46 return ','.join([self.search_term,
49 str(self.search_position),
50 str(self.timestamp)]) + '\n'
52 def run_wikidata_searches(terms_file = '../data/input/base_terms.txt', outfile="../data/output/wikidata_search_results.csv"):
54 resultset = Wikidata_ResultSet()
55 for term in open(terms_file,'r'):
56 api = get_wikidata_api()
57 search_results = search_wikidata(api, term)
58 resultset.extend(term, search_results)
60 resultset.to_csv(outfile)
63 ## search each of the base terms in wikidata
65 # store unique entities found in the search results, the position in the search result, and the date
67 if __name__ == "__main__":
68 run_wikidata_searches()