]> code.communitydata.science - taguette_google_sheet_integration.git/commitdiff
added support for merging and deleting tags master
authorBenjamin Mako Hill <mako@atdot.cc>
Tue, 28 Feb 2023 01:57:58 +0000 (17:57 -0800)
committerBenjamin Mako Hill <mako@atdot.cc>
Tue, 28 Feb 2023 02:00:51 +0000 (18:00 -0800)
taguette-update_tags_from_sheet.py

index 1c3c735fd81a07597379ff2748b4a37dd3be0a7c..27e01b752e734ab087844e0c87ca8ee39eb382b8 100755 (executable)
@@ -17,7 +17,6 @@ for file_path in config_files:
 
     # this is project ID from the configuration
     project_id = int(config['General']['taguette_project_id'])
 
     # this is project ID from the configuration
     project_id = int(config['General']['taguette_project_id'])
-    print(project_id)
     taguette_database_file = config['General']['taguette_database_file']
 
     ## connect to sqlite3
     taguette_database_file = config['General']['taguette_database_file']
 
     ## connect to sqlite3
@@ -36,6 +35,8 @@ for file_path in config_files:
     # import taguette.database as tagdb
     # db = tagdb.connect("sqlite:////home/mako/taguette-snapshot-20210422-1.sqlite3")
 
     # import taguette.database as tagdb
     # db = tagdb.connect("sqlite:////home/mako/taguette-snapshot-20210422-1.sqlite3")
 
+    # ditionary to ensure that we don't have duplicate tags and to handle merges
+    seen_paths = {}
     for row in DictReader(csv_text.splitlines(), delimiter=","):
         #print(row)
         tag_id = row['id']
     for row in DictReader(csv_text.splitlines(), delimiter=","):
         #print(row)
         tag_id = row['id']
@@ -49,17 +50,30 @@ for file_path in config_files:
         tag_info = cur.fetchall()
 
         if len(tag_info) > 1:
         tag_info = cur.fetchall()
 
         if len(tag_info) > 1:
-            print(f"ERROR: '{id}' is not unique, SKIPPING")
+            print(f"ERROR: '{tag_id}' is not unique, SKIPPING")
         elif len(tag_info) == 0:
         elif len(tag_info) == 0:
-            print(f"ERROR: 'tag with ID {id}' does not exist, SKIPPING")
+            print(f"ERROR: 'tag with ID {tag_id}' does not exist, SKIPPING")
         else:
             oldname = tag_info[0][1]
             old_description = tag_info[0][2]
 
         else:
             oldname = tag_info[0][1]
             old_description = tag_info[0][2]
 
+            # delete any code and associated snipits
+            if new_name == "DELETE":
+                # delete all highlights
+                sql_stmt_update = "DELETE FROM highlight_tags WHERE tag_id = ?"
+                cur.execute(sql_stmt_update, (tag_id,))
+
+                # delete the tag itself
+                sql_stmt_update = "DELETE FROM tags WHERE project_id = ? AND id = ?"
+                cur.execute(sql_stmt_update, (project_id, tag_id))
+
+                print(f"DELETE TAG FROM {project_id}: {tag_id}")
+                continue
+
             if axial_code: 
             if axial_code: 
-                newname = axial_code.lower() + "_" + new_name.lower()
+                new_name = axial_code.lower() + "_" + new_name.lower()
             else:
             else:
-                newname = new_name.lower()
+                new_name = new_name.lower()
 
             new_description = description
             if description and category:
 
             new_description = description
             if description and category:
@@ -67,15 +81,35 @@ for file_path in config_files:
             if category:
                 new_description += json.dumps({'category' : category})
 
             if category:
                 new_description += json.dumps({'category' : category})
 
-            if not oldname == newname:
+            # merge tags if we have two with the same name
+            if new_name in seen_paths:
+                # identify what we're merging into
+                merge_keep_id = seen_paths[new_name]
+                merge_delete_id = tag_id
+
+                # reassociate all the highlight associated with tag 2 so they are associated with tag 1 intsead
+                sql_stmt_update = "UPDATE highlight_tags SET tag_id = ? WHERE tag_id = ?"
+                cur.execute(sql_stmt_update, (merge_keep_id, merge_delete_id))
+                
+                # reassociate all the highlight associated with tag 2 so they are associated with tag 1 intsead
+                sql_stmt_update = "DELETE FROM tags WHERE project_id = ? AND id = ?"
+                cur.execute(sql_stmt_update, (project_id, merge_delete_id))
+
+                print(f"MERGE DUPLICATE TAGS for {project_id}: {new_name}")
+                continue
+
+            else:
+                seen_paths[new_name] = tag_id
+
+            if not oldname == new_name:
                 sql_stmt_update = "UPDATE tags SET path = ? WHERE project_id = ? AND id = ?"
                 sql_stmt_update = "UPDATE tags SET path = ? WHERE project_id = ? AND id = ?"
-                cur.execute(sql_stmt_update, (newname, project_id, tag_id))
-                print(f"UPDATE TAG for {project_id}: {oldname} → {newname}")
+                print(f"UPDATE TAG for {project_id}: {oldname} → {new_name}")
+                cur.execute(sql_stmt_update, (new_name, project_id, tag_id))
                 
             if new_description.strip() != old_description.strip():
                 sql_stmt_update = "UPDATE tags SET description = ? WHERE project_id = ? AND id = ?"
                 
             if new_description.strip() != old_description.strip():
                 sql_stmt_update = "UPDATE tags SET description = ? WHERE project_id = ? AND id = ?"
+                print(f"UPDATE DESC for {project_id}: {old_description} → {new_description}")
                 cur.execute(sql_stmt_update, (new_description, project_id, tag_id))
                 cur.execute(sql_stmt_update, (new_description, project_id, tag_id))
-                print(f"UPDATE DESC for {project_id}:: {old_description} → {new_description}")
 
     con.commit()
     con.close()
 
     con.commit()
     con.close()

Community Data Science Collective || Want to submit a patch?