]> code.communitydata.science - covid19.git/blobdiff - transliterations/src/static/gitweb.js
Finish MVP for transliterations
[covid19.git] / transliterations / src / static / gitweb.js
diff --git a/translations/src/find_entities.py b/translations/src/find_entities.py
deleted file mode 100644 (file)
index 6edce05..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-# generate a list of wikidata entities related to keywords
-from os import path
-from sys import stdout
-from wikidata_api_calls import search_wikidata, get_wikidata_api
-
-class Wikidata_ResultSet(object):
-    def __init__(self):
-        self.results = []
-
-    def extend(self, term, results):
-        self.results.extend([Wikidata_Result(term, result, i)
-                                    for i, result in enumerate(results)])
-
-    def to_csv(self, outfile=None):
-        
-        header = ','.join(['search_term', 'entityid', 'pageid', 'search_position','timestamp']) + '\n'
-        if outfile is None:
-            of = stdout
-
-        else:
-            of = open(outfile,'w')
-
-        of.write(header)
-        for result in self.results:
-            of.write(result.to_csv())
-
-        of.close()
-
-
-class Wikidata_Result(object):
-    # store unique entities found in the search results, the position in the search result, and the date
-    __slots__=['search_term','entityid','pageid','search_position','timestamp']
-
-    def __init__(self,
-                 term,
-                 search_result,
-                 position):
-
-        self.search_term = term.strip()
-        self.entityid = search_result['title']
-        self.pageid = search_result['pageid']
-        self.search_position = position
-        self.timestamp = search_result['timestamp']
-
-    def to_csv(self):
-        return ','.join([self.search_term,
-                         self.entityid,
-                         str(self.pageid),
-                         str(self.search_position),
-                         str(self.timestamp)]) + '\n'
-    
-def run_wikidata_searches(terms_file = '../data/input/base_terms.txt', outfile="../data/output/wikidata_search_results.csv"):
-
-    resultset = Wikidata_ResultSet()
-    for term in open(terms_file,'r'):
-        api = get_wikidata_api()
-        search_results = search_wikidata(api, term)
-        resultset.extend(term, search_results)
-
-    resultset.to_csv(outfile)
-
-
-    ## search each of the base terms in wikidata
-
-    # store unique entities found in the search results, the position in the search result, and the date
-
-if __name__ == "__main__":
-    run_wikidata_searches()

Community Data Science Collective || Want to submit a patch?