Keep better track of time.

author Nathan TeBlunthuis <nathante@uw.edu>

Sat, 28 Mar 2020 20:49:19 +0000 (13:49 -0700)

committer Nathan TeBlunthuis <nathante@uw.edu>

Sat, 28 Mar 2020 20:52:54 +0000 (13:52 -0700)
author Nathan TeBlunthuis <nathante@uw.edu>
Sat, 28 Mar 2020 20:49:19 +0000 (13:49 -0700)
committer Nathan TeBlunthuis <nathante@uw.edu>
Sat, 28 Mar 2020 20:52:54 +0000 (13:52 -0700)
diff --git a/transliterations/src/compile_transliterated_phrases.sh b/transliterations/src/compile_transliterated_phrases.sh

index 09f3bb5a66027bc46b5f179a6fe42e2b7a03e2f7..55fe211555b12d7a0d3040e51d93e0b9a6c5f7a3 100755 (executable)
--- a/transliterations/src/compile_transliterated_phrases.sh
+++ b/transliterations/src/compile_transliterated_phrases.sh
@@ -12,4 +12,5 @@ echo "Searching for Wikidata entities using Google trends"
  python3 wikidata_search.py ../data/output/related_searches_rising.csv ../data/output/related_searches_top.csv --use-gtrends --output ../data/output/wikidata_search_results_from_gtrends.csv
  
  echo "Finding transliterations from Wikidata using sparql"
  python3 wikidata_search.py ../data/output/related_searches_rising.csv ../data/output/related_searches_top.csv --use-gtrends --output ../data/output/wikidata_search_results_from_gtrends.csv
  
  echo "Finding transliterations from Wikidata using sparql"
-python3 wikidata_transliterations.py  ../data/output/wikidata_search_results_from_gtrends.csv  ../data/output/wikidata_search_results.csv --topN 10 20 --output ../data/output/wikidata_entity_labels.csv
+python3 wikidata_transliterations.py  ../data/output/wikidata_search_results_from_gtrends.csv  ../data/output/wikidata_search_results.csv --topN 10 20 --output ../data/output/$(date '+%Y-%m-%d')_wikidata_entity_labels.csv
+
diff --git a/transliterations/src/wikidata_search.py b/transliterations/src/wikidata_search.py

index e774f68980b03b4b8e4dc4190eca4a102a520ea8..21e8598a2045f3cbdafb09bef848ff57adcd0116 100644 (file)
--- a/transliterations/src/wikidata_search.py
+++ b/transliterations/src/wikidata_search.py
@@ -15,12 +15,15 @@ class Wikidata_ResultSet:
               for i, result in enumerate(results))
          )
  
               for i, result in enumerate(results))
          )
  
-    def to_csv(self, outfile=None):
+    def to_csv(self, outfile=None, mode='w'):
          if outfile is None:
              of = stdout
  
          else:
          if outfile is None:
              of = stdout
  
          else:
-            of = open(outfile,'w',newline='')
+            if path.exists(outfile) and mode != 'w':
+                of = open(outfile,'a',newline='')
+            else:
+                of = open(outfile,'w',newline='')
          writer = csv.writer(of)
          writer.writerow(Wikidata_Result.__slots__)
          writer.writerows(map(Wikidata_Result.to_list, chain(* self.results)))
          writer = csv.writer(of)
          writer.writerow(Wikidata_Result.__slots__)
          writer.writerows(map(Wikidata_Result.to_list, chain(* self.results)))
@@ -64,15 +67,15 @@ def read_google_trends_files(terms_files):
          yield row['query']
  
  
          yield row['query']
  
  
-def trawl_google_trends(terms_files, outfile = None):
+def trawl_google_trends(terms_files, outfile = None, mode='w'):
      terms = read_google_trends_files(terms_files)
      resultset = run_wikidata_searches(terms)
      terms = read_google_trends_files(terms_files)
      resultset = run_wikidata_searches(terms)
-    resultset.to_csv(outfile)
+    resultset.to_csv(outfile, mode)
  
  
-def trawl_base_terms(infiles, outfile = None):
+def trawl_base_terms(infiles, outfile = None, mode='w'):
      terms = chain(* (open(infile,'r') for infile in infiles))
      resultset = run_wikidata_searches(terms)
      terms = chain(* (open(infile,'r') for infile in infiles))
      resultset = run_wikidata_searches(terms)
-    resultset.to_csv(outfile)
+    resultset.to_csv(outfile, mode)
  
      ## search each of the base terms in wikidata
  
  
      ## search each of the base terms in wikidata
  
@@ -84,6 +87,7 @@ if __name__ == "__main__":
      parser.add_argument('inputs', type=str, nargs='+', help='one or more files to read')
      parser.add_argument('--use-gtrends', action='store_true', help = 'toggle whether the input is the output from google trends')
      parser.add_argument('--output', type=str, help='an output file. defaults to stdout')
      parser.add_argument('inputs', type=str, nargs='+', help='one or more files to read')
      parser.add_argument('--use-gtrends', action='store_true', help = 'toggle whether the input is the output from google trends')
      parser.add_argument('--output', type=str, help='an output file. defaults to stdout')
+    parser.add_argument('--overwrite', action='store_true', help = 'overwrite existing output files instead of appending')
      args = parser.parse_args()
      if args.use_gtrends:
          trawl_google_trends(args.inputs, args.output)
      args = parser.parse_args()
      if args.use_gtrends:
          trawl_google_trends(args.inputs, args.output)
author	Nathan TeBlunthuis <nathante@uw.edu>
	Sat, 28 Mar 2020 20:49:19 +0000 (13:49 -0700)
committer	Nathan TeBlunthuis <nathante@uw.edu>
	Sat, 28 Mar 2020 20:52:54 +0000 (13:52 -0700)
transliterations/src/compile_transliterated_phrases.sh		patch \| blob \| history
transliterations/src/wikidata_search.py		patch \| blob \| history