#!/srv/sw/python/2.7.4/bin/python
###############################################################################
#                                                                             #
#    This program is free software: you can redistribute it and/or modify     #
#    it under the terms of the GNU General Public License as published by     #
#    the Free Software Foundation, either version 3 of the License, or        #
#    (at your option) any later version.                                      #
#                                                                             #
#    This program is distributed in the hope that it will be useful,          #
#    but WITHOUT ANY WARRANTY; without even the implied warranty of           #
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            #
#    GNU General Public License for more details.                             #
#                                                                             #
#    You should have received a copy of the GNU General Public License        #
#    along with this program. If not, see <http://www.gnu.org/licenses/>.     #
#                                                                             #
###############################################################################

__author__ = "Donovan Parks"
__copyright__ = "Copyright 2015"
__credits__ = ["Donovan Parks"]
__license__ = "GPL3"
__maintainer__ = "Donovan Parks"
__email__ = "donovan.parks@gmail.com"
__status__ = "Development"

import os
import sys
import argparse

from genome_tree_tk.main import OptionsParser
from biolib.misc.custom_help_formatter import CustomHelpFormatter


def version():
    """Read program version from file."""
    bin_dir = os.path.dirname(os.path.realpath(__file__))
    version_file = open(os.path.join(bin_dir, '..', 'genome_tree_tk', 'VERSION'))
    return version_file.read().strip()


def print_help():
    """Help menu."""

    print ''
    print '                ...::: GenomeTreeTk v' + version() + ' :::...'''
    print '''\

    Infer lineage-specific genome tree:
      trusted     -> Determine trusted genomes to use for marker gene inference
      dereplicate -> Dereplicate genomes based on taxonomy
      markers     -> Determine phylogenetically informative marker genes
      infer       -> Infer genome tree

    Assess stability of tree:
      bootstrap   -> Bootstrap multiple sequence alignment
      jk_markers  -> Jackknife marker genes
      jk_taxa     -> Jackknife ingroup taxa
      combine     -> Combine all support values into a single tree

    Reroot tree:
      midpoint    -> Reroot tree at midpoint.
      outgroup    -> Reroot tree with outgroup.

  Use: genome_tree_tk <command> -h for command specific help.

  Feature requests or bug reports can be sent to Donovan Parks (donovan.parks@gmail.com)
    or posted on GitHub (https://github.com/dparks1134/GenomeTreeTK).
    '''

if __name__ == '__main__':

    # initialize the options parser
    parser = argparse.ArgumentParser(add_help=False)
    subparsers = parser.add_subparsers(help="--", dest='subparser_name')

    # determine trusted genomes
    trusted_parser = subparsers.add_parser('trusted',
                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
                                        description='Determine trusted genomes to use for marker gene inference.')
    trusted_parser.add_argument('trusted_genomes_file', help="output file listing trusted genomes")
    trusted_parser.add_argument('--trusted_comp', help='minimum completeness to trust genome for marker set inference [0, 1]', type=float, default=0.95)
    trusted_parser.add_argument('--trusted_cont', help='maximum contamination to trust genome for marker set inference [0, 1]', type=float, default=0.05)
    trusted_parser.add_argument('--max_contigs', help='maximum number of contigs to trust genome for marker set inference', type=int, default=300)
    trusted_parser.add_argument('--min_N50', help='minimum N50 of contigs to trust genome for marker set inference', type=int, default=10000)
    trusted_parser.add_argument('--allow_partial_taxonomy', help='retain genomes with incomplete taxonomic information', action='store_true')

    # dereplicate genomes
    dereplicate_parser = subparsers.add_parser('dereplicate',
                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
                                        description='Dereplicate genomes based on taxonomy.')
    dereplicate_parser.add_argument('derep_genome_file', help="output file listing dereplicated genomes")
    dereplicate_parser.add_argument('--max_species', help='maximum number of genomes of the same species to retain', type=int, default=5)
    dereplicate_parser.add_argument('--trusted_genomes_file', help='limit selected genomes to those marked as trusted', default=None)

    # determine marker genes
    markers_parser = subparsers.add_parser('markers',
                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
                                        description='Determine phylogenetically informative marker genes.')
    markers_parser.add_argument('ingroup_file', help="unique ids of genomes within the ingroup")
    markers_parser.add_argument('output_dir', help="output directory")
    markers_parser.add_argument('--redundancy', help='threshold for declaring HMMs redundant', type=float, default=0.50)
    markers_parser.add_argument('--ubiquity', help='ubiquity threshold for defining marker genes', type=float, default=0.9)
    markers_parser.add_argument('--single_copy', help='single-copy threshold for defining marker genes', type=float, default=0.95)

    markers_parser.add_argument('--min_support', help='minimum jackknife support of split during LGT filtering', type=float, default=0.9)
    markers_parser.add_argument('--min_per_taxa', help='minimum percentage of taxa to consider a split during LGT filtering', type=float, default=0.02)
    markers_parser.add_argument('--perc_markers', help='percentage of markers to keep during marker jackknifing', type=float, default=0.7)

    markers_parser.add_argument('--restict_marker_list', help='restrict marker set to genes within this list', default=None)
    markers_parser.add_argument('-c', '--cpus', help='number of cpus to use', type=int, default=16)

    # determine marker genes
    infer_parser = subparsers.add_parser('infer',
                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
                                        description='Infer genome tree.')
    infer_parser.add_argument('genome_id_file', help="unique ids of genomes to include in tree")
    infer_parser.add_argument('marker_id_file', help="unique ids of marker genes to use for inference")
    infer_parser.add_argument('output_dir', help="output directory")
    infer_parser.add_argument('-m', '--model', choices=['wag', 'jtt'], help="model of evolution to use", default='jtt')
    infer_parser.add_argument('-c', '--cpus', help='number of cpus', type=int, default=1)

    # assess robustness of genome tree using classic bootstrapping
    bootstrap_parser = subparsers.add_parser('bootstrap',
                                        formatter_class=CustomHelpFormatter,
                                        description='Bootstrap multiple sequence alignment.')
    bootstrap_parser.add_argument('input_tree', help="tree inferred from original data")
    bootstrap_parser.add_argument('msa_file', help="file containing multiple sequence alignment")
    bootstrap_parser.add_argument('output_dir', help="output directory")
    bootstrap_parser.add_argument('-m', '--model', choices=['wag', 'jtt'], help="model of evolution to use", default='jtt')
    bootstrap_parser.add_argument('-r', '--num_replicates', help="number of bootstrap replicates to perform", type=int, default=100)
    bootstrap_parser.add_argument('-c', '--cpus', help='number of cpus', type=int, default=1)

    # assess robustness of genome tree by jackknifing marker genes
    jk_markers_parser = subparsers.add_parser('jk_markers',
                                        formatter_class=CustomHelpFormatter,
                                        description='Jackknife marker genes.')
    jk_markers_parser.add_argument('input_tree', help="tree inferred from original data")
    jk_markers_parser.add_argument('msa_file', help="file containing multiple sequence alignment")
    jk_markers_parser.add_argument('gene_length_file', help="file indicating length of each gene in concatenated alignment")
    jk_markers_parser.add_argument('output_dir', help="output directory)")
    jk_markers_parser.add_argument('-m', '--model', choices=['wag', 'jtt'], help="model of evolution to use", default='jtt')
    jk_markers_parser.add_argument('-p', '--perc_markers', help="percentage of markers to keep", type=float, default=0.5)
    jk_markers_parser.add_argument('-r', '--num_replicates', help="number of jackknife replicates to perform", type=int, default=100)
    jk_markers_parser.add_argument('-c', '--cpus', help='number of cpus', type=int, default=1)

    # assess robustness of genome tree by jackknifing ingroup taxa
    jk_taxa_parser = subparsers.add_parser('jk_taxa',
                                        formatter_class=CustomHelpFormatter,
                                        description='Jackknife ingroup taxa.')
    jk_taxa_parser.add_argument('input_tree', help="tree inferred from original data")
    jk_taxa_parser.add_argument('msa_file', help="file containing multiple sequence alignment")
    jk_taxa_parser.add_argument('output_dir', help="output directory")
    jk_taxa_parser.add_argument('--outgroup_ids', help="file indicating outgroup taxa", default=None)
    jk_taxa_parser.add_argument('-m', '--model', choices=['wag', 'jtt'], help="model of evolution to use", default='jtt')
    jk_taxa_parser.add_argument('-p', '--perc_taxa', help="percentage of taxa to keep", type=float, default=0.5)
    jk_taxa_parser.add_argument('-r', '--num_replicates', help="number of jackknife replicates to perform", type=int, default=100)
    jk_taxa_parser.add_argument('-c', '--cpus', help='number of cpus', type=int, default=1)

    # assess robustness of genome tree by jackknifing ingroup taxa
    combine_parser = subparsers.add_parser('combine',
                                        formatter_class=CustomHelpFormatter,
                                        description='Combine all support values into a single tree.')
    combine_parser.add_argument('bootstrap_tree', help="tree with bootstrap support values")
    combine_parser.add_argument('jk_marker_tree', help="tree with jackknife marker support values")
    combine_parser.add_argument('jk_taxa_tree', help="tree with jackknife taxa support values")
    combine_parser.add_argument('output_tree', help="output tree")
    combine_parser.add_argument('-s', '--support_type', choices=['average', 'minimum'], help="type of support values to compute", default='average')

    # reroot tree at midpoint
    midpoint_parser = subparsers.add_parser('midpoint',
                                        formatter_class=CustomHelpFormatter,
                                        description='Reroot tree at midpoint.')
    midpoint_parser.add_argument('input_tree', help="tree to reroot")
    midpoint_parser.add_argument('output_tree', help="output tree")

    # reroot tree with outgroup
    outgroup_parser = subparsers.add_parser('outgroup',
                                        formatter_class=CustomHelpFormatter,
                                        description='Reroot tree with outgroup.')
    outgroup_parser.add_argument('input_tree', help="tree to reroot")
    outgroup_parser.add_argument('outgroup_file', help="file containing taxa in outgroup")
    outgroup_parser.add_argument('output_tree', help="output tree")

    # get and check options
    args = None
    if(len(sys.argv) == 1 or sys.argv[1] == '-h' or sys.argv == '--help'):
        print_help()
        sys.exit(0)
    else:
        args = parser.parse_args()

    # do what we came here to do
    try:
        parser = OptionsParser()
        if(False):
            # import pstats
            # p = pstats.Stats('prof')
            # p.sort_stats('cumulative').print_stats(10)
            # p.sort_stats('time').print_stats(10)
            import cProfile
            cProfile.run('parser.parse_options(args)', 'prof')
        elif False:
            import pdb
            pdb.run(parser.parse_options(args))
        else:
            parser.parse_options(args)
    except SystemExit:
        print "\n  Controlled exit resulting from an unrecoverable error or warning."
    except:
        print "\nUnexpected error:", sys.exc_info()[0]
        raise
