#!/usr/bin/env python3

import re
import json
import sqlite3
from configparser import ConfigParser
import csv
import os
import urllib.parse

config_files = [f for f in os.listdir() if f.startswith('.taguette_gdocs')]

for file_path in config_files:

    config = ConfigParser()
    config.read(file_path)

    ## this is project ID from the configuration
    project_id = int(config['General']['taguette_project_id'])
    taguette_database_file = config['General']['taguette_database_file']

    # set output file name
    output_file_name = f'exported_tags/exported_tags_{project_id}.tsv'

    ## connect to sqlite3
    con = sqlite3.connect(taguette_database_file)
    cur = con.cursor()

    # Run this if you just want tags and no highlights
    sql_stmt_get = "SELECT id, path, description FROM tags WHERE project_id = ?"

    cur.execute(sql_stmt_get, (project_id,))
    
    with open(output_file_name, 'w', newline='') as output_file:
        writer = csv.writer(output_file, delimiter='\t')
        writer.writerow(['id', 'axial codes', 'tag', 'category', 'description', 'url'])

        while True:
            row = cur.fetchone()
            if row == None:
                break
                
            tag_id, path, description = row

            tag_match = re.match(r'^(.+)\_(.*)$', path) 
            if tag_match:
                axial = tag_match.group(1)
                tag = tag_match.group(2)
            else:
                axial = ""
                tag = path

            # look for extra category information stored in the description
            cat_match = re.match('^(.*)\s*(\{(.*)\})$', description)
            if cat_match:
                description = cat_match.group(1)
                category = json.loads(cat_match.group(2))["category"]
            else:
                category = ""
        
            # create a URL that will link to the list of highlights
            tag_url = urllib.parse.quote(tag)
            url = f"https://taguette.communitydata.science/project/{project_id}/highlights/{tag_url}"

            writer.writerow([str(tag_id), axial, tag, category, description, url])