X-Git-Url: https://code.communitydata.science/covid19.git/blobdiff_plain/308d462e767920ef541f8ccef2942e87eb854be8:/translations/src/find_entities.py..36167295ecfe297780be2ec42ef1213e26d014e0:/transliterations/src/static/git-favicon.png diff --git a/translations/src/find_entities.py b/translations/src/find_entities.py deleted file mode 100644 index 6edce05..0000000 --- a/translations/src/find_entities.py +++ /dev/null @@ -1,68 +0,0 @@ -# generate a list of wikidata entities related to keywords -from os import path -from sys import stdout -from wikidata_api_calls import search_wikidata, get_wikidata_api - -class Wikidata_ResultSet(object): - def __init__(self): - self.results = [] - - def extend(self, term, results): - self.results.extend([Wikidata_Result(term, result, i) - for i, result in enumerate(results)]) - - def to_csv(self, outfile=None): - - header = ','.join(['search_term', 'entityid', 'pageid', 'search_position','timestamp']) + '\n' - if outfile is None: - of = stdout - - else: - of = open(outfile,'w') - - of.write(header) - for result in self.results: - of.write(result.to_csv()) - - of.close() - - -class Wikidata_Result(object): - # store unique entities found in the search results, the position in the search result, and the date - __slots__=['search_term','entityid','pageid','search_position','timestamp'] - - def __init__(self, - term, - search_result, - position): - - self.search_term = term.strip() - self.entityid = search_result['title'] - self.pageid = search_result['pageid'] - self.search_position = position - self.timestamp = search_result['timestamp'] - - def to_csv(self): - return ','.join([self.search_term, - self.entityid, - str(self.pageid), - str(self.search_position), - str(self.timestamp)]) + '\n' - -def run_wikidata_searches(terms_file = '../data/input/base_terms.txt', outfile="../data/output/wikidata_search_results.csv"): - - resultset = Wikidata_ResultSet() - for term in open(terms_file,'r'): - api = get_wikidata_api() - search_results = search_wikidata(api, term) - resultset.extend(term, search_results) - - resultset.to_csv(outfile) - - - ## search each of the base terms in wikidata - - # store unique entities found in the search results, the position in the search result, and the date - -if __name__ == "__main__": - run_wikidata_searches()