#! /usr/bin/env python3
import os
import re
import argparse
import sys
from tokenize import tokenize, tok_name, INDENT, DEDENT, NAME

from pygyat import VERSION_NUMBER, GYAT2PY_MAPPINGS
from pygyat.logger import Logger


def ends_in_py(word):
    """
    Returns True if word ends in .py, else False

    Args:
        word (str):     Filename to check

    Returns:
        boolean: Whether 'word' ends with 'py' or not
    """
    return word[-3:] == ".py"


def change_file_name(name, outputname=None):
    """
    Changes *.py filenames to *.gyat filenames. If filename does not end in .py,
    it adds .gyat to the end.

    Args:
        name (str):         Filename to edit
        outputname (str):   Optional. Overrides result of function.

    Returns:
        str: Resulting filename with *.gyat at the end (unless 'outputname' is
        specified, then that is returned).
    """
    # If outputname is specified, return that
    if outputname is not None:
        return outputname

    # Otherwise, create a new name
    if ends_in_py(name):
        return name[:-3] + ".gyat"
    else:
        return name + ".gyat"


def translate_dictionary(definition_string):
    """
    Translate one specific dictionary definition from using {} to using dict()

    Args:
        definition_string (str):    A string with a dictionary definition
                                    (including '=' beforehand)

    Returns:
        str: An equivalent definition (including '='), but using the
        dict()-contructor instead of { and }
    """
    # Remove = before definition
    definition_string = re.sub(r"\s*=\s*", "", definition_string)

    # Remove { and }
    definition_string = re.sub(r"[{}]", "", definition_string)

    # Remove newlines
    definition_string = re.sub(r"\s*\n\s*", "", definition_string)

    # Find all pairs
    pairs = re.split(r"\s*,\s*", definition_string)

    # Convert each pair to a tuple definition
    result_inner = ""
    for pair in pairs:
        if pair.strip() == "":
            continue
        key, value = re.split(r"\s*:\s*", pair)

        if result_inner == "":
            result_inner = "(%s, %s)" % (key, value)

        else:
            result_inner += ", (%s, %s)" % (key, value)

    if result_inner == "":
        return "= dict()"
    else:
        return "= dict([%s])" % result_inner


def pre_reverse_parse(infile_string):
    """
    Perform some necessary changes to the file before reverse parsing can ensue.
    This include changing dict definitions to include

    Args:
        infile_string (str):    A string containing the whole python source

    Returns:
        str: The source with changes to dictionary definitions
    """
    dictionaries = re.findall(
        r"=\s*{\s*(?:.+\s*:\s*.+(?:\s*,\s*)?)*\s*}", infile_string
    )

    for dictionary in dictionaries:
        infile_string = re.sub(
            dictionary, translate_dictionary(dictionary), infile_string
        )

    return infile_string


def safe_substitute(value, deescaped_key, line):
    """
    Performs Python token substitution on a Pygyat line, but ignores tokens inside of strings.
    TODO: Can be extended to ignore tokens inside of comments as well.

    Args:
        value (str):             Pygyat token
        deescaped_key (str):     Python token
        line (str):              Code line

    Returns:
        Code line with safe Python token substitutions
    """
    string_pattern = r"""
        (?P<string>(['"])(?:\\.|(?!\2).)*\2)  # Match single or double-quoted strings
    """

    def replace_callback(match):
        if match.group("string"):
            return match.group(0)
        else:
            return re.sub(
                rf'(?<!["\'#])\b{re.escape(value)}\b(?!["\'])',
                f"{deescaped_key}_is_not_valid_python",
                match.group(0),
            )

    return re.sub(string_pattern, replace_callback, line)


def reverse_parse(filename, outputname):
    """
    Changes a Python file to a PyGyat file

    All semantically significant whitespace resulting in a change
    in indentation levels will have a matching opening or closing
    curly-brace.

    Args:
        filename (str):     Path of file to parse
        outputname (str):   Path of destination file
    """
    # Open a file as bytes
    infile = open(filename, "r", encoding="utf-8")
    inlines = infile.readlines()

    # Read file to string
    infile_str_raw = ""
    for line in inlines:
        infile_str_raw += line

    # Fix indentation
    infile_str_indented = ""
    for line in infile_str_raw.split("\n"):
        # Search for comments, and remove for now. Re-add them before writing to
        # result string
        m = re.search(r"[ \t]*(#.*$)", line)

        # Make sure # sign is not inside quotations. Delete match object if it is
        if m is not None:
            m2 = re.search(r"[\"'].*#.*[\"']", m.group(0))
            if m2 is not None:
                m = None

        if m is not None:
            add_comment = m.group(0)
            line = re.sub(r"[ \t]*(#.*$)", "", line)
        else:
            add_comment = ""

        # skip empty lines:
        if line.strip() in ("\n", "\r\n", ""):
            infile_str_indented += add_comment + "\n"
            continue

        # disallow PyGyat in original Python file
        # replace anything in mappings.keys() with its value but opposite
        for value, key in GYAT2PY_MAPPINGS.items():
            if "\\s" in value:
                value = value.replace("\\s+", " ")
            line = safe_substitute(value, key, line)
            line = re.sub(
                r'(?<!["\'#])\b{}\b(?!["\'])'.format(re.escape(key)), value, line
            )

        infile_str_indented += line + add_comment + "\n"

    # Save the file
    outfile = open(outputname, "w", encoding="utf-8")
    outfile.write(infile_str_indented)


def main():
    """
    Translate python to pygyat

    Command line utility and Python module for translating python code
    to pygyat code, adding curly braces at semantically significant
    indentations.
    """
    argparser = argparse.ArgumentParser(
        "py2gyat",
        description="py2gyat translates python to pygyat",
        formatter_class=argparse.RawTextHelpFormatter,
    )
    argparser.add_argument(
        "-v",
        "--version",
        action="version",
        version="py2gyat is a part of PyGyat v%s\nShamith Pasula 2024" % VERSION_NUMBER,
    )
    argparser.add_argument(
        "-o", "--output", type=str, help="specify name of output file", nargs=1
    )
    argparser.add_argument("input", type=str, help="python file to translate", nargs=1)

    cmd_args = argparser.parse_args()

    logger = Logger()

    try:
        outputname = (
            cmd_args.output[0]
            if cmd_args.output is not None
            else change_file_name(cmd_args.input[0], None)
        )

        infile = open(cmd_args.input[0], "r", encoding="utf-8")
        infile_string = "".join(infile.readlines())

        # pre_parsed = pre_reverse_parse(infile_string)
        tempoutfile = open(cmd_args.input[0] + ".py2gyattemp", "w", encoding="utf-8")
        # tempoutfile.write(pre_parsed)
        tempoutfile.write(infile_string)
        tempoutfile.close()

        reverse_parse(cmd_args.input[0] + ".py2gyattemp", outputname)

        os.remove(cmd_args.input[0] + ".py2gyattemp")

    except FileNotFoundError:
        logger.log_error("No file named %s" % cmd_args.input[0])

    except Exception as e:
        logger.log_error("Unexpected error: %s" % str(e))
        sys.exit(1)


if __name__ == "__main__":
    main()
