taguette-export_tags_to_csv.py

   1 #!/usr/bin/env python3
   2
   3 import re
   4 import json
   5 import sqlite3
   6 from configparser import ConfigParser
   7 import csv
   8 import os
   9
  10 config_files = [f for f in os.listdir() if f.startswith('.taguette_gdocs')]
  11
  12 for file_path in config_files:
  13
  14     config = ConfigParser()
  15     config.read(file_path)
  16
  17     ## this is project ID from the configuration
  18     project_id = int(config['General']['taguette_project_id'])
  19     taguette_database_file = config['General']['taguette_database_file']
  20
  21     # set output file name
  22     output_file_name = f'exported_tags/exported_tags_{project_id}.tsv'
  23
  24     ## connect to sqlite3
  25     con = sqlite3.connect(taguette_database_file)
  26     cur = con.cursor()
  27
  28     # Run this if you just want tags and no highlights
  29     sql_stmt_get = "SELECT id, path, description FROM tags WHERE project_id = ?"
  30
  31     cur.execute(sql_stmt_get, (project_id,))
  32
  33     with open(output_file_name, 'w', newline='') as output_file:
  34         writer = csv.writer(output_file, delimiter='\t')
  35         writer.writerow(['id', 'axial codes', 'tag', 'category', 'description', 'url'])
  36
  37         while True:
  38             row = cur.fetchone()
  39             if row == None:
  40                 break
  41
  42             tag_id, path, description = row
  43
  44             tag_match = re.match(r'^(.+)\_(.*)$', path)
  45             if tag_match:
  46                 axial = tag_match.group(1)
  47                 tag = tag_match.group(2)
  48             else:
  49                 axial = ""
  50                 tag = path
  51
  52             # look for extra category information stored in the description
  53             cat_match = re.match('^(.*)\s*(\{(.*)\})$', description)
  54             if cat_match:
  55                 description = cat_match.group(1)
  56                 category = json.loads(cat_match.group(2))["category"]
  57             else:
  58                 category = ""
  59
  60             # create a URL that will link to the list of highlights
  61             url = f"https://taguette.communitydata.science/project/{project_id}/highlights/{tag}"
  62
  63             writer.writerow([str(tag_id), axial, tag, category, description, url])