]> code.communitydata.science - taguette_google_sheet_integration.git/blobdiff - taguette-export_tags_to_csv.py
Merge branches 'master' and 'master' of /home/healspersecond/taguette_google_sheet_in...
[taguette_google_sheet_integration.git] / taguette-export_tags_to_csv.py
index cf109db9e776cdbe1d460a9d84ac798c180f9662..d0fd28c4646bb40211cb179eca225d11174df2a3 100755 (executable)
@@ -1,12 +1,13 @@
 #!/usr/bin/env python3
 
 import re
+import json
 import sqlite3
 from configparser import ConfigParser
 import csv
 import os
 
-config_files = [f for f in os.listdir() if f.startswith('.taguette_gdocs_')]
+config_files = [f for f in os.listdir() if f.startswith('.taguette_gdocs')]
 
 for file_path in config_files:
 
@@ -27,26 +28,36 @@ for file_path in config_files:
     # Run this if you just want tags and no highlights
     sql_stmt_get = "SELECT id, path, description FROM tags WHERE project_id = ?"
 
-    # Run this if you want tags AND highlights
-    #sql_stmt_get = "SELECT tags.id, tags.path, tags.description, highlights.snippet FROM highlight_tags INNER JOIN tags ON highlight_tags.tag_id = tags.id INNER JOIN highlights ON highlight_tags.highlight_id = highlights.id WHERE project_id = ?"
     cur.execute(sql_stmt_get, (project_id,))
-
-
+    
     with open(output_file_name, 'w', newline='') as output_file:
         writer = csv.writer(output_file, delimiter='\t')
+        writer.writerow(['id', 'axial codes', 'tag', 'category', 'description', 'url'])
+
         while True:
             row = cur.fetchone()
             if row == None:
                 break
-
+                
             tag_id, path, description = row
 
-            m = re.match(r'^(.+)\_(.*)$', path)
-            if m:
-                axial = m.group(1)
-                tag = m.group(2)
+            tag_match = re.match(r'^(.+)\_(.*)$', path) 
+            if tag_match:
+                axial = tag_match.group(1)
+                tag = tag_match.group(2)
             else:
                 axial = ""
                 tag = path
 
-            writer.writerow([str(tag_id), axial, tag, description])
\ No newline at end of file
+            # look for extra category information stored in the description
+            cat_match = re.match('^(.*)\s*(\{(.*)\})$', description)
+            if cat_match:
+                description = cat_match.group(1)
+                category = json.loads(cat_match.group(2))["category"]
+            else:
+                category = ""
+        
+            # create a URL that will link to the list of highlights
+            url = f"https://taguette.communitydata.science/project/{project_id}/highlights/{tag}"
+
+            writer.writerow([str(tag_id), axial, tag, category, description, url])

Community Data Science Collective || Want to submit a patch?