]> code.communitydata.science - taguette_google_sheet_integration.git/blob - taguette-export_tags_to_csv.py
added support for merging and deleting tags
[taguette_google_sheet_integration.git] / taguette-export_tags_to_csv.py
1 #!/usr/bin/env python3
2
3 import re
4 import json
5 import sqlite3
6 from configparser import ConfigParser
7 import csv
8 import os
9 import urllib.parse
10
11 config_files = [f for f in os.listdir() if f.startswith('.taguette_gdocs')]
12
13 for file_path in config_files:
14
15     config = ConfigParser()
16     config.read(file_path)
17
18     ## this is project ID from the configuration
19     project_id = int(config['General']['taguette_project_id'])
20     taguette_database_file = config['General']['taguette_database_file']
21
22     # set output file name
23     output_file_name = f'exported_tags/exported_tags_{project_id}.tsv'
24
25     ## connect to sqlite3
26     con = sqlite3.connect(taguette_database_file)
27     cur = con.cursor()
28
29     # Run this if you just want tags and no highlights
30     sql_stmt_get = "SELECT id, path, description FROM tags WHERE project_id = ?"
31
32     cur.execute(sql_stmt_get, (project_id,))
33     
34     with open(output_file_name, 'w', newline='') as output_file:
35         writer = csv.writer(output_file, delimiter='\t')
36         writer.writerow(['id', 'axial codes', 'tag', 'category', 'description', 'url'])
37
38         while True:
39             row = cur.fetchone()
40             if row == None:
41                 break
42                 
43             tag_id, path, description = row
44
45             tag_match = re.match(r'^(.+)\_(.*)$', path) 
46             if tag_match:
47                 axial = tag_match.group(1)
48                 tag = tag_match.group(2)
49             else:
50                 axial = ""
51                 tag = path
52
53             # look for extra category information stored in the description
54             cat_match = re.match('^(.*)\s*(\{(.*)\})$', description)
55             if cat_match:
56                 description = cat_match.group(1)
57                 category = json.loads(cat_match.group(2))["category"]
58             else:
59                 category = ""
60         
61             # create a URL that will link to the list of highlights
62             tag_url = urllib.parse.quote(tag)
63             url = f"https://taguette.communitydata.science/project/{project_id}/highlights/{tag_url}"
64
65             writer.writerow([str(tag_id), axial, tag, category, description, url])

Community Data Science Collective || Want to submit a patch?