#!/usr/bin/python3
"""gnomonicus is a script which links together the functions defined in gnomonicus.py as a CLI script to 
    produce variants, mutations and an antibiogram from a minos VCF, reference genome and a resistance
    catalogue
"""
import argparse
import logging
import os
import time

import gumpy
import piezo

import gnomonicus
from gnomonicus import (
    loadGenome,
    populateEffects,
    populateMutations,
    populateVariants,
    saveJSON,
)

if __name__ == "__main__":
    start = time.time()
    # Argparser setup
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v",
        "--version",
        action="version",
        version=f"gnomonicus {gnomonicus.__version__}",
    )
    parser.add_argument(
        "--vcf_file", required=True, help="the path to a single VCF file"
    )
    parser.add_argument(
        "--genome_object",
        required=True,
        help="the path to a compressed gumpy Genome object or a genbank file",
    )
    parser.add_argument(
        "--catalogue_file",
        default=None,
        required=False,
        help="the path to the resistance catalogue",
    )
    parser.add_argument(
        "--ignore_vcf_filter",
        action="store_true",
        default=False,
        help="whether to ignore the FILTER field in the vcf (e.g. necessary for some versions of Clockwork VCFs)",
    )
    parser.add_argument(
        "--progress",
        action="store_true",
        default=False,
        help="whether to show progress using tqdm",
    )
    parser.add_argument(
        "--output_dir",
        required=False,
        default=".",
        help="Directory to save output files to. Defaults to wherever the script is run from.",
    )
    parser.add_argument(
        "--json",
        required=False,
        action="store_true",
        default=False,
        help="Flag to create a single JSON output as well as the CSVs",
    )
    parser.add_argument(
        "--fasta",
        required=False,
        default=None,
        help="Use to output a FASTA file of the resultant genome. Specify either 'fixed' or 'variable' for fixed length and variable length FASTA respectively. Alternatively, use 'both' to produce both",
    )
    parser.add_argument(
        "--minor_populations",
        required=False,
        default=None,
        help="Path to a line separated file containing genome indices of minor populations.",
    )
    parser.add_argument(
        "--csvs",
        required=False,
        nargs="+",
        help="Types of CSV to produce. Accepted values are [variants, mutations, effects, predictions, all]. `all` produces all of the CSVs",
    )
    parser.add_argument(
        "--debug",
        default=False,
        action="store_true",
        help="Whether to log debugging messages to the log. Defaults to False",
    )
    parser.add_argument(
        "--resistance_genes",
        required=False,
        default=False,
        action="store_true",
        help="Flag to filter mutations and variants to only include genes present in the resistance catalogue",
    )
    parser.add_argument(
        "--fasta_adjudication",
        required=False,
        default=None,
        help="Path to a FASTA file to use for null rule adjudication. This returns matches for null-call rules in cases of corresponding `N` values in a FASTA file.",
    )
    parser.add_argument(
        "--min_dp",
        type=int,
        required=False,
        default=None,
        help="Minimum depth for a variant to be considered in the VCF. Below this value, rows are interpreted as null calls. Should allow for skipping FASTA adjudication in cases of low depth while adding coverage data.",
    )
    options = parser.parse_args()

    options.output_dir = os.path.realpath(options.output_dir)

    # Make the output directory if it doesn't already exist
    os.makedirs(options.output_dir, exist_ok=True)

    # Get the stem of the VCF filename for use as a unique ID
    vcfStem = os.path.split(options.vcf_file)[-1].replace(".vcf", "")

    # Logging setup
    if options.debug:
        LOG_LEVEL = logging.DEBUG
    else:
        LOG_LEVEL = logging.WARNING
    logging.basicConfig(
        filename=os.path.join(options.output_dir, f"{vcfStem}.gnomonicus.log"),
        filemode="w",
        format="%(asctime)s -  %(levelname)s - %(message)s",
        datefmt="%d-%b-%y %H:%M:%S",
        level=LOG_LEVEL,
    )
    logging.info(f"gnomonicus starting with output directory {options.output_dir}")

    # Figure out which CSVs should be produced
    if options.csvs is None:
        make_variants_csv = False
        make_mutations_csv = False
        make_effects_csv = False
        make_prediction_csv = False
    elif "all" in options.csvs:
        make_variants_csv = True
        make_mutations_csv = True
        make_effects_csv = True
        make_prediction_csv = True
    else:
        make_variants_csv = "variants" in options.csvs
        make_mutations_csv = "mutations" in options.csvs
        make_effects_csv = "effects" in options.csvs
        make_prediction_csv = "predictions" in options.csvs

    if options.csvs is None and options.json is False and options.fasta is None:
        # No files will be created so warn the user
        print(
            "[WARNING]: No outputs selected. No files will be created. For help, try `gnomonicus --help`"
        )
        logging.warning("No outputs selected. No files will be created")

    # Get reference genome
    reference = loadGenome(options.genome_object, options.progress)
    logging.debug("Loaded reference genome")

    # Check if we have minor population indices
    if options.minor_populations is not None:
        # Load from line separated file
        with open(options.minor_populations) as f:
            minor_indices_ = [line.strip() for line in f]
            minor_indices = []
            # Try converting line by line so we can produce a nice error if invalid
            for idx, index in enumerate(minor_indices_):
                try:
                    minor_indices.append(int(index))
                except Exception as e:
                    raise ValueError(f"Invalid index on line {idx}: {index}") from e
    else:
        minor_indices = []
    # Build the mutated genome using gumpy
    vcf = gumpy.VCFFile(
        options.vcf_file,
        ignore_filter=options.ignore_vcf_filter,
        minor_population_indices=minor_indices,
        min_dp=options.min_dp,
    )
    sample = reference + vcf
    logging.debug("Applied the VCF to the reference")

    # Get resistance catalogue
    if options.catalogue_file:
        resistanceCatalogue = piezo.ResistanceCatalogue(
            options.catalogue_file, prediction_subset_only=options.resistance_genes
        )
        logging.debug("Loaded resistance catalogue")
    else:
        resistanceCatalogue = None
        logging.info(
            "No resistance catalogue provided, producing variants and mutations only"
        )

    # Get the GenomeDifference for extracting genome level mutations
    diff = reference - sample
    logging.debug("Got the genome difference")

    # Complain if there are no variants
    if diff.variants is None:
        logging.error("No variants detected!")
        raise Exception("No variants detected!")

    # Get the variations and mutations
    variants = populateVariants(
        vcfStem,
        options.output_dir,
        diff,
        make_variants_csv,
        options.resistance_genes,
        catalogue=resistanceCatalogue,
    )
    logging.debug("Populated and saved variants.csv")

    mutations, referenceGenes, minor_errors = populateMutations(
        vcfStem,
        options.output_dir,
        diff,
        reference,
        sample,
        resistanceCatalogue,
        make_mutations_csv,
        options.resistance_genes,
    )
    if mutations is None:
        logging.info(
            "No mutations found - probably due to exclusively inter-gene variation or no variation.\n\t\t\t\t\t\t\t No effects.csv written"
        )
    else:
        logging.debug("Populated and saved mutatons.csv")

    # Get the effects and predictions of the mutations
    if resistanceCatalogue is not None:
        effects, phenotypes, mutations = populateEffects(
            options.output_dir,
            resistanceCatalogue,
            mutations,
            referenceGenes,
            vcfStem,
            make_effects_csv,
            make_prediction_csv,
            fasta=options.fasta_adjudication,
            reference=reference,
            sample_genome=sample,
            make_mutations_csv=make_mutations_csv,
        )
        logging.debug("Populated and saved effects.csv and predictions.csv")
    else:
        phenotypes = {}
        effects = None
        logging.info(
            "Skipped effects.csv due to lack of resistance catalogue or mutations"
        )

    # Add data to the log
    logging.info("********** Successfully completed **********")

    logging.info(f"VCF file: {options.vcf_file}")
    logging.info(f"Reference genome file: {options.genome_object}")

    if resistanceCatalogue:
        logging.info(
            f"Catalogue reference genome: {resistanceCatalogue.catalogue.genbank_reference}"
        )
        logging.info(f"Catalogue name: {resistanceCatalogue.catalogue.name}")
        logging.info(f"Catalogue version: {resistanceCatalogue.catalogue.version}")
        logging.info(f"Catalogue grammar: {resistanceCatalogue.catalogue.grammar}")
        logging.info(f"Catalogue values: {resistanceCatalogue.catalogue.values}")
        logging.info(f"Catalogue path: {options.catalogue_file}")
    for drug in sorted(phenotypes.keys()):
        logging.info(f"{drug} {phenotypes[drug]}")
    logging.info(f"Completed in {time.time()-start}s")

    if options.json:
        # Default prediction values are RFUS but use piezo catalogue's values if existing
        values = (
            resistanceCatalogue.catalogue.values
            if resistanceCatalogue is not None
            else None
        )
        if values is None:
            values = list("RFUS")
        logging.info(f"Saving a JSON... See {options.output_dir}/gnomonicus-out.json")
        saveJSON(
            variants,
            mutations,
            effects,
            phenotypes,
            options.output_dir,
            vcfStem,
            resistanceCatalogue,
            gnomonicus.__version__,
            time.time() - start,
            reference,
            options.vcf_file,
            options.genome_object,
            options.catalogue_file,
            minor_errors,
        )

    if options.fasta and options.fasta.lower() in ["fixed", "variable"]:
        fixed = options.fasta.lower() == "fixed"
        # Write the resultant fasta file
        sample.save_fasta(
            os.path.join(options.output_dir, vcfStem + "-" + options.fasta + ".fasta"),
            fixed_length=fixed,
        )
    elif options.fasta and options.fasta.lower() == "both":
        sample.save_fasta(
            os.path.join(options.output_dir, vcfStem + "-fixed.fasta"),
            fixed_length=True,
        )
        sample.save_fasta(
            os.path.join(options.output_dir, vcfStem + "-variable.fasta"),
            fixed_length=False,
        )
