Merge branches 'master' and 'master' of /home/healspersecond/taguette_google_sheet_in...

author Benjamin Mako Hill <mako@atdot.cc>

Tue, 28 Feb 2023 00:54:38 +0000 (16:54 -0800)

committer Benjamin Mako Hill <mako@atdot.cc>

Tue, 28 Feb 2023 00:54:38 +0000 (16:54 -0800)
author Benjamin Mako Hill <mako@atdot.cc>
Tue, 28 Feb 2023 00:54:38 +0000 (16:54 -0800)
committer Benjamin Mako Hill <mako@atdot.cc>
Tue, 28 Feb 2023 00:54:38 +0000 (16:54 -0800)
diff --cc README.md

index d3aa877d64dea032f2a0104f8ca9c305d0ded937,d3aa877d64dea032f2a0104f8ca9c305d0ded937..0627be07c9cfcc357256820ee40d4a234768f520
--- 1/README.md
--- 2/README.md
+++ b/README.md
@@@ -10,10 -10,10 +10,11 @@@ In order to not commit your changes int
   git update-index --assume-unchanged .taguette_gdocs
   ```
   
--I also create a directory called `taguette_backups` like:
++I also create a directory called `taguette_backups` and one called `exported_tags` like:
   
   ```
   mkdir taguette_backups
++mkdir exported_tags
   ```
   
   ## Step 1: Backing things up
@@@ -52,7 -52,7 +53,7 @@@ sudo chown taguette:taguette /var/lib/t
   Exporting tags should be as easy as:
   
   ```
--python3 taguette-export_tags_to_csv.py > exported_tags.tsv
++python3 taguette-export_tags_to_csv.py
   ```
   
   This will create a new file called `exported_tags.tsv` which you can manually
diff --cc taguette-export_tags_to_csv.py

index 8c39554ca87089566564b203dc78dad742dc22f7,cf109db9e776cdbe1d460a9d84ac798c180f9662..d0fd28c4646bb40211cb179eca225d11174df2a3
--- 1/taguette-export_tags_to_csv.py
--- 2/taguette-export_tags_to_csv.py
+++ b/taguette-export_tags_to_csv.py
@@@ -1,54 -1,52 +1,63 @@@
   #!/usr/bin/env python3
   
   import re
+ +import json
   import sqlite3
   from configparser import ConfigParser
+ import csv
+ import os
   
- config = ConfigParser()
- config.read('.taguette_gdocs')
- -config_files = [f for f in os.listdir() if f.startswith('.taguette_gdocs_')]
++config_files = [f for f in os.listdir() if f.startswith('.taguette_gdocs')]
   
- ## this is project ID from the configuration
- project_id = int(config['General']['taguette_project_id'])
- taguette_database_file = config['General']['taguette_database_file']
+ for file_path in config_files:
   
+     config = ConfigParser()
+     config.read(file_path)
   
- ## connect to sqlite3
- con = sqlite3.connect(taguette_database_file)
- cur = con.cursor()
+     ## this is project ID from the configuration
+     project_id = int(config['General']['taguette_project_id'])
+     taguette_database_file = config['General']['taguette_database_file']
   
- # Run this if you just want tags and no highlights
- sql_stmt_get = "SELECT id, path, description FROM tags WHERE project_id = ?"
+     # set output file name
+     output_file_name = f'exported_tags/exported_tags_{project_id}.tsv'
   
- # Run this if you want tags AND highlights
- #sql_stmt_get = "SELECT tags.id, tags.path, tags.description, highlights.snippet FROM highlight_tags INNER JOIN tags ON highlight_tags.tag_id = tags.id INNER JOIN highlights ON highlight_tags.highlight_id = highlights.id WHERE project_id = ?"
- cur.execute(sql_stmt_get, (project_id,))
+     ## connect to sqlite3
+     con = sqlite3.connect(taguette_database_file)
+     cur = con.cursor()
   
- # print out a header
- print("\t".join(['id', 'axial codes', 'tags', 'category', 'description']))
+     # Run this if you just want tags and no highlights
+     sql_stmt_get = "SELECT id, path, description FROM tags WHERE project_id = ?"
   
- while True:
-     row = cur.fetchone()
-     if row == None:
-         break
-         
-     tag_id, path, description = row
- 
-     tag_match = re.match(r'^(.+)\_(.*)$', path) 
-     if tag_match:
-         axial = tag_match.group(1)
-         tag = tag_match.group(2)
-     else:
-         axial = ""
-         tag = path
- 
-     # look for extra category information stored in the description
-     cat_match = re.match('^(.*)\s*(\{(.*)\})$', description)
-     if cat_match:
-         description = cat_match.group(1)
-         category = json.loads(cat_match.group(2))["category"]
-     else:
-         category = ""
- -    # Run this if you want tags AND highlights
- -    #sql_stmt_get = "SELECT tags.id, tags.path, tags.description, highlights.snippet FROM highlight_tags INNER JOIN tags ON highlight_tags.tag_id = tags.id INNER JOIN highlights ON highlight_tags.highlight_id = highlights.id WHERE project_id = ?"
+     cur.execute(sql_stmt_get, (project_id,))
- -
- -
+ +    
-     print("\t".join([str(tag_id), axial, tag, category, description]))
+     with open(output_file_name, 'w', newline='') as output_file:
+         writer = csv.writer(output_file, delimiter='\t')
++        writer.writerow(['id', 'axial codes', 'tag', 'category', 'description', 'url'])
++
+         while True:
+             row = cur.fetchone()
+             if row == None:
+                 break
- -
++                
+             tag_id, path, description = row
+ 
- -            m = re.match(r'^(.+)\_(.*)$', path)
- -            if m:
- -                axial = m.group(1)
- -                tag = m.group(2)
++            tag_match = re.match(r'^(.+)\_(.*)$', path) 
++            if tag_match:
++                axial = tag_match.group(1)
++                tag = tag_match.group(2)
+             else:
+                 axial = ""
+                 tag = path
+ 
- -            writer.writerow([str(tag_id), axial, tag, description])
++            # look for extra category information stored in the description
++            cat_match = re.match('^(.*)\s*(\{(.*)\})$', description)
++            if cat_match:
++                description = cat_match.group(1)
++                category = json.loads(cat_match.group(2))["category"]
++            else:
++                category = ""
++        
++            # create a URL that will link to the list of highlights
++            url = f"https://taguette.communitydata.science/project/{project_id}/highlights/{tag}"
+ +
++            writer.writerow([str(tag_id), axial, tag, category, description, url])
author	Benjamin Mako Hill <mako@atdot.cc>
	Tue, 28 Feb 2023 00:54:38 +0000 (16:54 -0800)
committer	Benjamin Mako Hill <mako@atdot.cc>
	Tue, 28 Feb 2023 00:54:38 +0000 (16:54 -0800)
		1	2
README.md	patch \|	diff1 \|	diff2 \|	blob \| history
taguette-export_tags_to_csv.py	patch \|	diff1 \|	diff2 \|	blob \| history