]> code.communitydata.science - taguette_google_sheet_integration.git/blob - taguette-update_tags_from_sheet.py
added support for merging and deleting tags
[taguette_google_sheet_integration.git] / taguette-update_tags_from_sheet.py
1 #!/usr/bin/env python3
2
3 import requests
4 from csv import DictReader
5 import sqlite3
6 from configparser import ConfigParser
7 import re
8 import json
9 import os
10
11 config_files = [f for f in os.listdir() if f.startswith('.taguette_gdocs')]
12
13 for file_path in config_files:
14
15     config = ConfigParser()
16     config.read(file_path)
17
18     # this is project ID from the configuration
19     project_id = int(config['General']['taguette_project_id'])
20     taguette_database_file = config['General']['taguette_database_file']
21
22     ## connect to sqlite3
23     con = sqlite3.connect(taguette_database_file)
24     cur = con.cursor()
25
26     # load the googgle sheet ID from the configuration
27     gsheet_id = config['General']['gsheet_id']
28     gsheet_gid = config['General']['gsheet_gid']
29
30     # get the spreadsheet data
31     axial_url = f"https://docs.google.com/spreadsheets/d/{gsheet_id}/export?format=csv&id={gsheet_id}&gid={gsheet_gid}"
32     rv = requests.get(axial_url)
33     csv_text = rv.content.decode('utf-8')
34
35     # import taguette.database as tagdb
36     # db = tagdb.connect("sqlite:////home/mako/taguette-snapshot-20210422-1.sqlite3")
37
38     # ditionary to ensure that we don't have duplicate tags and to handle merges
39     seen_paths = {}
40     for row in DictReader(csv_text.splitlines(), delimiter=","):
41         #print(row)
42         tag_id = row['id']
43         new_name = row['tag']
44         axial_code = row['axial codes']
45         category = row['category']
46         description = row['description']
47
48         sql_stmt_get = "SELECT id, path, description from TAGS where id = ? AND project_id = ?"
49         cur.execute(sql_stmt_get, (tag_id, project_id))
50         tag_info = cur.fetchall()
51
52         if len(tag_info) > 1:
53             print(f"ERROR: '{tag_id}' is not unique, SKIPPING")
54         elif len(tag_info) == 0:
55             print(f"ERROR: 'tag with ID {tag_id}' does not exist, SKIPPING")
56         else:
57             oldname = tag_info[0][1]
58             old_description = tag_info[0][2]
59
60             # delete any code and associated snipits
61             if new_name == "DELETE":
62                 # delete all highlights
63                 sql_stmt_update = "DELETE FROM highlight_tags WHERE tag_id = ?"
64                 cur.execute(sql_stmt_update, (tag_id,))
65
66                 # delete the tag itself
67                 sql_stmt_update = "DELETE FROM tags WHERE project_id = ? AND id = ?"
68                 cur.execute(sql_stmt_update, (project_id, tag_id))
69
70                 print(f"DELETE TAG FROM {project_id}: {tag_id}")
71                 continue
72
73             if axial_code: 
74                 new_name = axial_code.lower() + "_" + new_name.lower()
75             else:
76                 new_name = new_name.lower()
77
78             new_description = description
79             if description and category:
80                 new_description += " "
81             if category:
82                 new_description += json.dumps({'category' : category})
83
84             # merge tags if we have two with the same name
85             if new_name in seen_paths:
86                 # identify what we're merging into
87                 merge_keep_id = seen_paths[new_name]
88                 merge_delete_id = tag_id
89
90                 # reassociate all the highlight associated with tag 2 so they are associated with tag 1 intsead
91                 sql_stmt_update = "UPDATE highlight_tags SET tag_id = ? WHERE tag_id = ?"
92                 cur.execute(sql_stmt_update, (merge_keep_id, merge_delete_id))
93                 
94                 # reassociate all the highlight associated with tag 2 so they are associated with tag 1 intsead
95                 sql_stmt_update = "DELETE FROM tags WHERE project_id = ? AND id = ?"
96                 cur.execute(sql_stmt_update, (project_id, merge_delete_id))
97
98                 print(f"MERGE DUPLICATE TAGS for {project_id}: {new_name}")
99                 continue
100
101             else:
102                 seen_paths[new_name] = tag_id
103
104             if not oldname == new_name:
105                 sql_stmt_update = "UPDATE tags SET path = ? WHERE project_id = ? AND id = ?"
106                 print(f"UPDATE TAG for {project_id}: {oldname} → {new_name}")
107                 cur.execute(sql_stmt_update, (new_name, project_id, tag_id))
108                 
109             if new_description.strip() != old_description.strip():
110                 sql_stmt_update = "UPDATE tags SET description = ? WHERE project_id = ? AND id = ?"
111                 print(f"UPDATE DESC for {project_id}: {old_description} → {new_description}")
112                 cur.execute(sql_stmt_update, (new_description, project_id, tag_id))
113
114     con.commit()
115     con.close()
116
117

Community Data Science Collective || Want to submit a patch?