#!/usr/bin/python3
'''gnomon is a script which links together the functions defined in gnomon.py as a CLI script to 
    produce variants, mutations and an antibiogram from a minos VCF, reference genome and a resistance
    catalogue
'''
import argparse
import logging
import os
import time

import gumpy
import piezo

from gnomon import loadGenome, NoVariantsException, populateVariants, populateMutations, populateEffects, saveJSON, toAltJSON
import gnomon

if __name__ == "__main__":
    start = time.time()
    #Argparser setup
    parser = argparse.ArgumentParser()
    parser.add_argument("--vcf_file",required=True,help="the path to a single VCF file")
    parser.add_argument("--genome_object",required=True,help="the path to a compressed gumpy Genome object or a genbank file")
    parser.add_argument("--catalogue_file",default=None,required=False,help="the path to the resistance catalogue")
    parser.add_argument("--ignore_vcf_filter",action='store_true',default=False,help="whether to ignore the FILTER field in the vcf (e.g. necessary for some versions of Clockwork VCFs)")
    parser.add_argument("--progress",action='store_true',default=False,help="whether to show progress using tqdm")
    parser.add_argument("--output_dir", required=False, default=".", help="Directory to save output files to. Defaults to wherever the script is run from.")
    parser.add_argument("--json", required=False, action='store_true', default=False, help="Flag to create a single JSON output as well as the CSVs")
    parser.add_argument("--alt_json", required=False, default=False, action='store_true', help="Whether to produce the alternate JSON format. Requires the --json flag too")
    parser.add_argument("--fasta", required=False, default=None, help="Use to output a FASTA file of the resultant genome. Specify either 'fixed' or 'variable' for fixed length and variable length FASTA respectively.")
    options = parser.parse_args()

    options.output_dir = os.path.realpath(options.output_dir)

    #Make the output directory if it doesn't already exist
    os.makedirs(options.output_dir, exist_ok=True)

    #Get the stem of the VCF filename for use as a unique ID
    vcfStem = os.path.split(options.vcf_file)[-1].split(".")[0]

    #Logging setup
    logging.basicConfig(filename=os.path.join(options.output_dir, f'{vcfStem}.gnomon.log'), filemode='w', format='%(asctime)s -  %(levelname)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S', level=logging.DEBUG)
    logging.info(f"Gnomon starting with output directory {options.output_dir}")


    #Get reference genome
    reference = loadGenome(options.genome_object, options.progress)
    logging.debug("Loaded reference genome")

    #Build the mutated genome using gumpy
    vcf = gumpy.VCFFile(options.vcf_file, ignore_filter=options.ignore_vcf_filter)
    sample = reference + vcf
    logging.debug("Applied the VCF to the reference")

    #Get resistance catalogue
    if options.catalogue_file:
        resistanceCatalogue = piezo.ResistanceCatalogue(options.catalogue_file, prediction_subset_only=True)
        logging.debug("Loaded resistance catalogue")
    else:
        resistanceCatalogue = None
        logging.info("No resistance catalogue provided, producing variants and mutations only")

    #Get the GenomeDifference for extracting genome level mutations
    diff = reference - sample
    logging.debug("Got the genome difference")

    #Complain if there are no variants
    if diff.variants is None:
        logging.error("No variants detected!")
        raise NoVariantsException()

    #Get the variations and mutations
    variants = populateVariants(vcfStem, options.output_dir, diff)
    logging.debug("Populated and saved variants.csv")

    mutations, referenceGenes = populateMutations(vcfStem, options.output_dir, diff, 
                        reference, sample, resistanceCatalogue)
    if mutations is None:
        logging.info("No mutations found - probably due to exclusively inter-gene variation or no variation.\n\t\t\t\t\t\t\t No effects.csv written")
    else:
        logging.debug("Populated and saved mutatons.csv")

    #Get the effects of the mutations
    if resistanceCatalogue is not None and mutations is not None:
        effects, metadata = populateEffects(sample, options.output_dir, resistanceCatalogue, mutations, referenceGenes, vcfStem)
        logging.debug("Populated and saved effects.csv")
    else:
        metadata = {}
        effects = None
        logging.info("Skipped effects.csv due to lack of resistance catalogue or mutations")

    #Add data to the log
    logging.info("********** Successfully completed **********")
    
    logging.info(f"VCF file: {options.vcf_file}")
    logging.info(f"Reference genome file: {options.genome_object}")

    if resistanceCatalogue:
        logging.info(f"Catalogue reference genome: {resistanceCatalogue.catalogue.genbank_reference}")
        logging.info(f"Catalogue name: {resistanceCatalogue.catalogue.name}")
        logging.info(f"Catalogue version: {resistanceCatalogue.catalogue.version}")
        logging.info(f"Catalogue grammar: {resistanceCatalogue.catalogue.grammar}")
        logging.info(f"Catalogue values: {resistanceCatalogue.catalogue.values}")
        logging.info(f"Catalogue path: {options.catalogue_file}")
    for drug in sorted(metadata.keys()):
        logging.info(f"{drug} {metadata[drug]}")
    logging.info(f"Completed in {time.time()-start}s")

    if options.json:
        #Default prediction values are RFUS but use piezo catalogue's values if existing
        values = resistanceCatalogue.catalogue.values
        if not values:
            values = list("RFUS")
        logging.info(f"Saving a JSON... See {options.output_dir}/gnomon-out.json")
        saveJSON(variants, mutations, effects, options.output_dir, vcfStem, values, gnomon.__version__)
        if options.alt_json:
            logging.info(f"Saving the alternate JSON too... See {options.output_dir}/alt-gnomon-out.json")
            toAltJSON(options.output_dir, reference, vcfStem, resistanceCatalogue.catalogue.name)

    if options.fasta and options.fasta.lower() in ['fixed', 'variable']:
        fixed = options.fasta.lower() == 'fixed'
        #Write the resultant fasta file
        sample.save_fasta(os.path.join(options.output_dir, vcfStem+"-"+options.fasta+".fasta"), fixed_length=fixed)