#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os, sys
import argparse

__author__ = "Corinna Ernst"
_libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../')
if os.path.isfile(os.path.join(_libdir, 'pancake', '__init__.py')):
	sys.path.insert(0, _libdir)

from pancake import *


def argument_parser():
	p = argparse.ArgumentParser(description='''PanCake -- A data structure for pangenomes enabling for core and singleton identification, without the requirement of annotation data.\n''')
	subparsers = p.add_subparsers(dest="subcommand")

	create = subparsers.add_parser("create", help="create a PanCake Data Object")
	create.add_argument("--sequences", "-s", nargs='+', help="fasta or multiple fasta file providing input chromosome sequences")
	create.add_argument("--ids", "-i", nargs='+', help="gi ids of sequences to download from NCBI")
	create.add_argument("--email", "-e", type=str, help="if downloading your sequences via gi ids, please specify your email address; in case of excessive usage, NCBI will attempt to contact a user at the e-mail address provided prior to blocking access to the E-utilities", default='')
	create.add_argument( "--pan_file", "-p", type=str , help="File name of new PanCake Object (DEFAULT=pan_files/pancake.pan)", default='')
	create.add_argument("--ali", "-a", nargs = '*', help="pairwise alignments (BLAST or nucmer output) to include in PanCake Object", default=None)
	create.add_argument("--min_len", '-l', help="minimum length of pairwise alignments to include (DEFUALT=25)", type=int, default=25)
	create.add_argument("--no_self_alignments", "-nsa" , action='store_false', help="if set, skip pairwise alignments between regions on identical chromosomes as input (DEFAULT=False)", default=True)

	status = subparsers.add_parser("status", help="get overview about a PanCake Object")
	status.add_argument("PAN_FILE", nargs=1, help = "Name of PanCake Data Object File")

	addAli = subparsers.add_parser("addAli", help="include information from pairwise (BLAST or nucmer) alignments into an already existing PanCake Object")
	addAli.add_argument("--panfile", '-p', metavar="PAN_FILE" , nargs=1, help="Name of PanCake Data Object File (required)", type=str, required=True)
	addAli.add_argument("--output", '-o', metavar="NEW_PAN_FILE", nargs=1, help='output pangenome file (PAN_FILE will be unchanged)', default=None)
	addAli.add_argument("--min_len", '-l', metavar='INT', help="minimum length of pairwise alignments to get included (DEFAULT=25)", type=int, default=25)
	addAli.add_argument("--no_self_alignments", "-nsa" , action='store_false', help="if set, skip pairwise alignments between regions on identical chromosomes (DEFAULT=False)", default=True)
	addAli.add_argument("ALI_FILE" , nargs='+', help="Alignment File (BLAST or nucmer output)")

	specify = subparsers.add_parser("specify", help="include genome information or rename chromosomes")
	specify.add_argument("--panfile", '-p', metavar="PAN_FILE" , nargs=1, help="Name of PanCake Data Object File (required)", type=str, required=True)
	specify.add_argument("--chrom", "-c" , metavar='CHROM', nargs='+', help="name(s) of respective chromosome(s)", type=str)#, required=True)
	specify.add_argument("--name", '-n', metavar='NEW_NAME', help="new name of specified chromosome, this will become the chromosome's name in incidental output files", type=str)
	specify.add_argument("--genome", '-g', metavar='GENOME', help="name of genome CHROM belongs to", type=str)
	specify.add_argument("--genome_file", '-f', metavar='FILE_NAME', nargs=1, help="input file containing mapping of chromosomes to genomes and additional chromosome names")
	specify.add_argument("--delete", '-d', metavar='OLD_NAME', nargs='+', help= "chromosome names to delete")

	addChromosome = subparsers.add_parser("addChrom", help="add chromosomes to an already existing PanCake Object")
	addChromosome.add_argument("--panfile", '-p', metavar="PAN_FILE" , nargs=1, help="Name of PanCake Data Object File (required)", type=str, required=True)
	addChromosome.add_argument("--sequences", "-s", nargs='+', help="fasta or multiple fasta file providing input chromosome sequences")
	addChromosome.add_argument("--ids", "-i", nargs='+', help="gi ids of sequences to download from NCBI")
	addChromosome.add_argument("--email", "-e", type=str, help="if downloading your sequences via gi ids, please specify your email address; in case of excessive usage, NCBI will attempt to contact a user at the e-mail address provided prior to blocking access to the E-utilities", default='')
	addChromosome.add_argument("--output", "-o", nargs=1, type=str, help="output file for new PanCake Object (DEFAULT=PAN_FILE); if specified, PanCake Data Object in PAN_FILE stays unchanged")
	addChromosome.add_argument("--ali", "-a", nargs = '*', help="pairwise alignments (BLAST or nucmer output) to include in PanCake Object", default=None)
	addChromosome.add_argument("--min_len", '-l', help="minimum length of pairwise alignments to include (DEFUALT=25)", type=int, default=25)
	addChromosome.add_argument("--no_self_alignments", "-nsa" , action='store_false', help="if set, skip pairwise alignments between regions on identical chromosomes as input (DEFAULT=False)", default=True)

	core = subparsers.add_parser("core", help="identify core regions")
	core.add_argument("--panfile", '-p', metavar="PAN_FILE" , nargs=1, help="Name of PanCake Data Object File (required)", type=str, required=True)
	core.add_argument("--ref_chrom", '-rc', nargs=1, help="Reference CHROMOSOME (define either ONE reference chromosome OR ONE reference genome)", default='', type=str)
	core.add_argument("--ref_genome", '-rg', nargs=1, help="Reference GENOME (define either ONE reference chromosome OR ONE reference genome)", default='', type=str)
	core.add_argument("--non_ref_chroms", '-nrc', nargs='+', help="Names of non-reference CHROMOSOMES (DEFAULT: ALL non-reference chromosomes)", default=[])
	core.add_argument("--non_ref_genomes", '-nrg', nargs='+', help="Names of non-reference GENOMES (DEFAULT: ALL non-reference genomes)", default=[])
	core.add_argument("--exclude_genomes", '-eg', metavar='GENOME' ,nargs='+', help="Names of GENOMES to exclude from core analysis (DEFAULT: No genomes excluded)", default=[])
	core.add_argument("--exclude_chromosomes", '-ec', metavar='CHROMOSOME', nargs='+', help="Names of CHROMOSOMES to exclude from core analysis (DEFAULT: No chromosomes excluded)", default=[])
	core.add_argument("--max_non_core_frac", '-f', help='Maximum fraction of non-core sequence regions within each included sequence (FLOAT, DEAFULT=0.05)', default=0.05, type=float)
	core.add_argument("--min_len", '-l', help="minimum length of regions to identify as part of core genome (INTEGER, DEFAULT=25)", type=int, default=25)
	core.add_argument("--output", '-o', metavar='DICT', help="directory to which .fasta files of core regions are written (DEFAULT: core_{REF_CHROM|REF_GENOME})")
	core.add_argument("--no_output", '-no', action='store_false', help="if set, supress .fasta output of core regions", default=True)
	core.add_argument("--bed_file", '-b', metavar='BED_FILE', help='.bed file to which core regions are written (DEFAULT= core_{REF_CHROM|REF_GENOME}.bed)', default='', type=str)
	core.add_argument("--max_space", '-s', metavar='INT' , help='maximum non-core space allowed within a core region (DEFAULT=25)', type=int, default=25)

	singletons = subparsers.add_parser("singletons", help="identify singleton regions")
	singletons.add_argument("--panfile", '-p', metavar="PAN_FILE" , nargs=1, help="Name of PanCake Data Object File (required)", type=str, required=True)
	singletons.add_argument("--ref_chrom", '-rc', nargs=1, help="Reference CHROMOSOME (define either ONE reference chromosome or ONE reference genome)", default='', type=str)
	singletons.add_argument("--ref_genome", '-rg', nargs=1, help="Reference GENOME (define either ONE reference chromosome or ONE reference genome)", default='', type=str)
	singletons.add_argument("--non_ref_chroms", '-nrc', nargs='+', help="Names of non-reference CHROMOSOMES (DEFAULT: ALL non-reference chromosomes)", default=[])
	singletons.add_argument("--non_ref_genomes", '-nrg', nargs='+', help="Names of non-reference GENOMES (DEFAULT: ALL non-reference genomes)", default=[])
	singletons.add_argument("--exclude_genomes", '-eg', metavar='GENOME' ,nargs='+', help="Names of GENOMES to exclude from singleton analysis (DEFAULT: No genomes excluded)", default=[])
	singletons.add_argument("--exclude_chromosomes", '-ec', metavar='CHROMOSOME', nargs='+', help="Names of CHROMOSOMES to exclude from singleton analysis (DEFAULT: No chromosomes excluded)", default=[])
	singletons.add_argument("--min_len", '-l', help="minimum length of regions to identify as a singleton region (INTEGER, DEFAULT=25)", type=int, default=25)
	singletons.add_argument("--output", '-o', metavar='DICT', help="directory to which .fasta files of core regions are written (DEFAULT: core_{REF_CHROM|REF_GENOME})")
	singletons.add_argument("--no_output", '-no', action='store_false', help="if set, supress .fasta output of singleton regions", default=True)
	singletons.add_argument("--bed_file", '-b', metavar='BED_FILE', help='.bed file to which singleton regions are written (DEFAULT= singletons_{REF_CHROM|REF_GENOME}.bed)', default='', type=str)

	#DEBUG
	#printer = subparsers.add_parser("printer")
	#printer.add_argument("PAN_FILE" , nargs=1, help="Name of PanCake Data Object File")

	sequence = subparsers.add_parser("sequence", help="get sequence of a chromosome (or chromosomal region) from a PanCake Object")
	sequence.add_argument("--panfile", '-p', metavar="PAN_FILE" , nargs=1, help="Name of PanCake Data Object File (required)", type=str, required=True)
	group=sequence.add_mutually_exclusive_group()
	group.add_argument("--chrom", '-c', metavar='CHROMOSOME', help = "Chromosome from which sequence originates", type=str )
	group.add_argument("--genome", '-g', metavar='GENOME', help='(multiple) .fasta output of GENOME (if set, start and stop will be ignored)', type=str)
	sequence.add_argument("--output", '-o', metavar= 'F', nargs=1, help="file to which .fasta output will be written (DEFAULT = STDOUT)", type=str, default='')
	sequence.add_argument("--linewidth", '-lw', nargs=1, metavar='INT', help='line witdth in .fastafile (DEFAULT=100)', type=int, default = [100])
	sequence.add_argument("-start", metavar='INT', nargs=1, help='(1-based) start position on CHROMOSME (DEFAULT = 1)', type=int, default=[1])
	sequence.add_argument("-stop", metavar='INT', nargs=1, help='(1-based) stop position on CHROMOSME (DEFAULT = length of CHROMOSME)', type=int, default=[None])

	graph = subparsers.add_parser("graph", help="graphical output of PanCake Objects (in DOT format) #BETA MODE#")
	graph.add_argument("--panfile", '-p', metavar="PAN_FILE" , nargs=1, help="Name of PanCake Data Object File (required)", type=str, required=True)
	graph.add_argument("--chroms", '-c', metavar="CHROMS", nargs='+', help= 'Chromosomes in Output (by default all chromosomes covered in PAN_FILE)')
	graph.add_argument("-starts",  metavar="START_POS", nargs='+', help= 'Start positions (in same order as chromosomes), DEFAULT=1 on all chromosomes', type=int)
	graph.add_argument("-stops",  metavar="STOP_POS", nargs='+', help= 'Stop positions (in same order as chromosomes), DEFAULT=length of chromosomes', type=int)
	graph.add_argument("--max_nodes", metavar="MAX_NODES", help='Maximal number of nodes in output graph. (DEFAULT=10,000): if exceeded, PanCake will warn and interrupt!', type=int, default=10000)
	graph.add_argument("--max_edges", metavar="MAX_EDGES", help='Maximal number of edges in output graph. (DEFAULT=10,000): if exceeded, PanCake will warn and interrupt!', type=int, default=10000)
	graph.add_argument("--max_entries", '-me', metavar="MAX_ENTRIES", help='Shared features are truncated in output if number of contained feature instances > MAX_ENTRIES (DEFAULT: MAX_ENTRIES=50)', type=int, default=50)
	graph.add_argument("-all", action='store_true', help="if set, all chromosomes contained in PAN_FILE appear in output (irrespective to CHROMS), DEFAULT=False", default=False)
	graph.add_argument("-regions", action='store_true', help="if set, only specified regions are shown in output (DEFAULT=False), ignored if -all is set", default=False)
	graph.add_argument("--output", '-o', metavar='FILE', help="output DOT file (DEFAULT: STDOUT)", default=None, type=str)


	return p

def main():

	parser = argument_parser()
	args = parser.parse_args()
#	subcommand_help = """
#	create		create a PanCake Data Object
#	addAli		include Information from pairwise Alignmants into a PanCake Object
#	addChrom	add a chromosome to an existing PanCake Object
#	core		identify core regions 
#	singletons	identify singleton regions
#	status		get overview of a PanCake Object
#	specify		include genome information or rename chromosomes
#	sequence	get sequence of a chromosome (region) in a PanCake Object
#	graph		get graphical output
#	"""
	if args.subcommand == "create":
		create(args.sequences, 
		args.ids,
		args.email,
		args.pan_file, 
		alis=args.ali, 
		min_len=args.min_len, 
		self_alis = args.no_self_alignments) 
	elif args.subcommand == "addAli":
		output = args.output[0] if args.output else args.panfile[0]
		add_alignments(args.panfile[0], 
		args.ALI_FILE, 
		output, 
		min_len=args.min_len, 
		self_alis = args.no_self_alignments)
	elif args.subcommand == "addChrom":
		output = args.output[0] if args.output else args.panfile[0]
		add_chromosomes(args.panfile[0], 
		args.sequences, 
		args.ids, 
		email=args.email, 
		outfile=output,
		alis=args.ali, 
		min_len=args.min_len,
		self_alis = args.no_self_alignments)
	elif args.subcommand=="core":
		if args.ref_chrom and args.ref_genome: raise ValueError('Input ERROR: Reference Chromosome AND Genome defined! Assign either ONE reference chromosome OR ONE reference genome!')
		elif not args.ref_chrom and not args.ref_genome: raise ValueError('Input ERROR: No reference Chromosome or Genome defined! Assign either ONE reference chromosome OR ONE reference genome!')
		if args.ref_chrom:
			core(args.panfile[0], 
			args.ref_chrom[0], 
			[],
			args.min_len, 
			args.non_ref_genomes,
			args.non_ref_chroms, 
			args.exclude_genomes,
			args.exclude_chromosomes,
			args.max_non_core_frac, 
			args.max_space, 
			args.output, 
			args.no_output,
			args.bed_file)
		elif args.ref_genome:
			core(args.panfile[0], 
			[], 
			args.ref_genome[0],
			args.min_len, 
			args.non_ref_genomes,
			args.non_ref_chroms, 
			args.exclude_genomes,
			args.exclude_chromosomes,
			args.max_non_core_frac, 
			args.max_space, 
			args.output, 
			args.no_output,
			args.bed_file)
		else:
			raise ValueError('Something wrong')
	elif args.subcommand=='singletons':
		if args.ref_chrom and args.ref_genome: raise ValueError('Input ERROR: Reference Chromosome AND Genome defined! Assign either ONE reference chromosome OR ONE reference genome!')
		elif not args.ref_chrom and not args.ref_genome: raise ValueError('Input ERROR: No reference Chromosome or Genome defined! Assign either ONE reference chromosome OR ONE reference genome!')
		if args.ref_chrom:
			singletons(args.panfile[0], 
			args.ref_chrom[0], 
			[],
			args.min_len, 
			args.non_ref_genomes,
			args.non_ref_chroms, 
			args.exclude_genomes,
			args.exclude_chromosomes,
			args.output,
			args.no_output,
			args.bed_file)
		elif args.ref_genome:
			singletons(args.panfile[0], 
			[], 
			args.ref_genome[0],
			args.min_len, 
			args.non_ref_genomes,
			args.non_ref_chroms, 
			args.exclude_genomes,
			args.exclude_chromosomes,
			args.output,
			args.no_output,
			args.bed_file)
		else:
			raise ValueError('Something wrong')
	elif args.subcommand == "status":
		stats(args.PAN_FILE[0])
	elif args.subcommand == "specify":
		if args.name:
			if len(list(args.chrom)) ==1:
				new_name(args.panfile[0], args.chrom[0], args.name)
			else:
				print('More than one chromosome specified for new name {} ...SKIPPING'.format(args.name))
		if args.genome:
			add_to_genome(args.panfile[0], args.chrom, args.genome)
		if args.genome_file:
			include_genome_info(args.panfile[0], args.genome_file[0])
		if args.delete:
			delete_name(args.panfile[0], args.delete)
	elif args.subcommand == 'sequence':
		#check for valid parameters, i.e positive start and stop, and linewidth
		l= args.linewidth[0]
		if l <= 0:  raise ValueError('Only line width >0 allowed!')
		if args.chrom:
			start,stop= args.start[0], args.stop[0]
			if start <= 0: raise ValueError('Only start positions >0 allowed!')
			if stop and stop <=0: raise ValueError('Only stop positions >0 allowed!')
			if args.output:
				sequence(args.panfile[0], None, args.chrom, start, stop, args.output[0], l)
			else:
				sequence(args.panfile[0], None, args.chrom, start, stop, '', l)
		elif args.genome:
			if args.output:
				sequence(args.panfile[0], args.genome, None, None, None, args.output[0], l)
			else:
				sequence(args.panfile[0], args.genome, None, None, None, '', l)
	elif args.subcommand == 'graph':
		if args.max_entries <1: raise ValueError('Only MAX_ENTRIES >0 allowed!')
		if args.max_edges <1: raise ValueError('Only MAX_EDGES >0 allowed!')
		if args.max_nodes <1: raise ValueError('Only MAX_NODES >0 allowed!')
		initialize_gv_output(args.panfile[0], args.chroms, args.starts, args.stops, args.all, args.regions, args.max_entries, args.max_nodes, args.max_edges, args.output)

if __name__ == "__main__":
	main()

