From: Benjamin Mako Hill Date: Tue, 28 Feb 2023 00:54:38 +0000 (-0800) Subject: Merge branches 'master' and 'master' of /home/healspersecond/taguette_google_sheet_in... X-Git-Url: https://code.communitydata.science/taguette_google_sheet_integration.git/commitdiff_plain/6926d9d22320e3ae1247fe11ab1e2ba83680232b Merge branches 'master' and 'master' of /home/healspersecond/taguette_google_sheet_integration fix some bugs and integrate the changes from the branches from charlie and mako --- 6926d9d22320e3ae1247fe11ab1e2ba83680232b diff --cc README.md index d3aa877,d3aa877..0627be0 --- a/README.md +++ b/README.md @@@ -10,10 -10,10 +10,11 @@@ In order to not commit your changes int git update-index --assume-unchanged .taguette_gdocs ``` --I also create a directory called `taguette_backups` like: ++I also create a directory called `taguette_backups` and one called `exported_tags` like: ``` mkdir taguette_backups ++mkdir exported_tags ``` ## Step 1: Backing things up @@@ -52,7 -52,7 +53,7 @@@ sudo chown taguette:taguette /var/lib/t Exporting tags should be as easy as: ``` --python3 taguette-export_tags_to_csv.py > exported_tags.tsv ++python3 taguette-export_tags_to_csv.py ``` This will create a new file called `exported_tags.tsv` which you can manually diff --cc taguette-export_tags_to_csv.py index 8c39554,cf109db..d0fd28c --- a/taguette-export_tags_to_csv.py +++ b/taguette-export_tags_to_csv.py @@@ -1,54 -1,52 +1,63 @@@ #!/usr/bin/env python3 import re +import json import sqlite3 from configparser import ConfigParser + import csv + import os - config = ConfigParser() - config.read('.taguette_gdocs') -config_files = [f for f in os.listdir() if f.startswith('.taguette_gdocs_')] ++config_files = [f for f in os.listdir() if f.startswith('.taguette_gdocs')] - ## this is project ID from the configuration - project_id = int(config['General']['taguette_project_id']) - taguette_database_file = config['General']['taguette_database_file'] + for file_path in config_files: + config = ConfigParser() + config.read(file_path) - ## connect to sqlite3 - con = sqlite3.connect(taguette_database_file) - cur = con.cursor() + ## this is project ID from the configuration + project_id = int(config['General']['taguette_project_id']) + taguette_database_file = config['General']['taguette_database_file'] - # Run this if you just want tags and no highlights - sql_stmt_get = "SELECT id, path, description FROM tags WHERE project_id = ?" + # set output file name + output_file_name = f'exported_tags/exported_tags_{project_id}.tsv' - # Run this if you want tags AND highlights - #sql_stmt_get = "SELECT tags.id, tags.path, tags.description, highlights.snippet FROM highlight_tags INNER JOIN tags ON highlight_tags.tag_id = tags.id INNER JOIN highlights ON highlight_tags.highlight_id = highlights.id WHERE project_id = ?" - cur.execute(sql_stmt_get, (project_id,)) + ## connect to sqlite3 + con = sqlite3.connect(taguette_database_file) + cur = con.cursor() - # print out a header - print("\t".join(['id', 'axial codes', 'tags', 'category', 'description'])) + # Run this if you just want tags and no highlights + sql_stmt_get = "SELECT id, path, description FROM tags WHERE project_id = ?" - while True: - row = cur.fetchone() - if row == None: - break - - tag_id, path, description = row - - tag_match = re.match(r'^(.+)\_(.*)$', path) - if tag_match: - axial = tag_match.group(1) - tag = tag_match.group(2) - else: - axial = "" - tag = path - - # look for extra category information stored in the description - cat_match = re.match('^(.*)\s*(\{(.*)\})$', description) - if cat_match: - description = cat_match.group(1) - category = json.loads(cat_match.group(2))["category"] - else: - category = "" - # Run this if you want tags AND highlights - #sql_stmt_get = "SELECT tags.id, tags.path, tags.description, highlights.snippet FROM highlight_tags INNER JOIN tags ON highlight_tags.tag_id = tags.id INNER JOIN highlights ON highlight_tags.highlight_id = highlights.id WHERE project_id = ?" + cur.execute(sql_stmt_get, (project_id,)) - - + - print("\t".join([str(tag_id), axial, tag, category, description])) + with open(output_file_name, 'w', newline='') as output_file: + writer = csv.writer(output_file, delimiter='\t') ++ writer.writerow(['id', 'axial codes', 'tag', 'category', 'description', 'url']) ++ + while True: + row = cur.fetchone() + if row == None: + break - ++ + tag_id, path, description = row + - m = re.match(r'^(.+)\_(.*)$', path) - if m: - axial = m.group(1) - tag = m.group(2) ++ tag_match = re.match(r'^(.+)\_(.*)$', path) ++ if tag_match: ++ axial = tag_match.group(1) ++ tag = tag_match.group(2) + else: + axial = "" + tag = path + - writer.writerow([str(tag_id), axial, tag, description]) ++ # look for extra category information stored in the description ++ cat_match = re.match('^(.*)\s*(\{(.*)\})$', description) ++ if cat_match: ++ description = cat_match.group(1) ++ category = json.loads(cat_match.group(2))["category"] ++ else: ++ category = "" ++ ++ # create a URL that will link to the list of highlights ++ url = f"https://taguette.communitydata.science/project/{project_id}/highlights/{tag}" + ++ writer.writerow([str(tag_id), axial, tag, category, description, url])