#!/usr/bin/env python

from __future__ import print_function

from sys import argv
import sys


from epic2.version import __version__
from epic2.src.reads_to_bins import files_to_bin_counts
from epic2.src.SICER_stats import compute_score_threshold
from epic2.src.find_islands import find_islands, compute_fdr, write_islands, add_chip_count_to_islands
from epic2.src.genome_info import egl_and_chromsizes

from collections import OrderedDict
import argparse
import os


parser = argparse.ArgumentParser(
    description="""epic2, version: {}
(Visit github.com/endrebak/epic2 for examples and help. Run epic2-example for a simple example command.)
    """.format(__version__),
    prog=os.path.basename(__file__))

parser.add_argument(
    '--treatment',
    '-t',
    required=True if not ("--version" in sys.argv or "-v" in sys.argv or "-ex" in sys.argv or "--example" in sys.argv) else False,
    type=str,
    nargs='+',
    help='''Treatment (pull-down) file(s) in one of these formats: bed, bedpe, bed.gz, bedpe.gz or (single-end) bam, sam. Mixing file formats is allowed.''')

parser.add_argument(
    '--control',
    '-c',
    required=False,
    type=str,
    nargs='+',
    help='''Control (input) file(s) in one of these formats: bed, bedpe, bed.gz, bedpe.gz or (single-end) bam, sam. Mixing file formats is allowed.''')


parser.add_argument('--genome',
                    '-gn',
                    required=False,
                    default="hg19",
                    type=str,
                    help='''Which genome to analyze. Default: hg19. If --chromsizes and --egf flag is given, --genome is not required.''')

parser.add_argument(
    '--keep-duplicates',
    '-kd',
    required=False,
    default=False,
    action='store_true',
    help=
    '''Keep reads mapping to the same position on the same strand within a library. Default: False.
                   ''')

parser.add_argument(
    '--bin-size',
    '-bin',
    required=False,
    default=200,
    type=int,
    help=
    '''Size of the windows to scan the genome. BIN-SIZE is the smallest possible island. Default 200.
                   ''')

parser.add_argument(
    '--gaps-allowed',
    '-g',
    required=False,
    default=3,
    type=int,
    help=
    '''This number is multiplied by the window size to determine the number of gaps
                   (ineligible windows) allowed between two eligible windows.
                   Must be an integer. Default: 3. ''')

parser.add_argument(
    '--fragment-size',
    '-fs',
    required=False,
    default=150,
    type=int,
    help=
    '''(Single end reads only) Size of the sequenced fragment. Each read is extended half the fragment size from the 5' end. Default 150 (i.e. extend by 75).''')

parser.add_argument(
    '--false-discovery-rate-cutoff',
    '-fdr',
    required=False,
    default=0.05,
    type=float,
    help=
    '''Remove all islands with an FDR above cutoff. Default 0.05.
                   ''')

parser.add_argument(
    '--effective-genome-fraction',
    '-egf',
    required=False,
    type=float,
    help=
    '''Use a different effective genome fraction than the one included in epic2. The default value depends on the genome and readlength, but is a number between 0 and 1.''')


parser.add_argument(
    '--chromsizes',
    '-cs',
    required=False,
    type=str,
    help=
    '''Set the chromosome lengths yourself in a file with two columns: chromosome names and sizes. Useful to analyze custom genomes, assemblies or simulated data. Only chromosomes included in the file will be analyzed.''')


parser.add_argument(
    '--e-value',
    '-e',
    required=False,
    default=1000,
    type=int,
    help=
    '''The E-value controls the genome-wide error rate of identified islands under the random background assumption. Should be used when not using a control library. Default: 1000.''')

parser.add_argument('--required-flag', '-f',
                   required=False,
                   default=0,
                   type= int,
                   help='''(bam only.) Keep reads with these bits set in flag. Same as `samtools view -f`. Default 0
                   ''')

parser.add_argument('--filter-flag', '-F',
                   required=False,
                   default=1540,
                   type= int,
                   help='''(bam only.) Discard reads with these bits set in flag. Same as `samtools view -F`. Default 1540 (hex: 0x604).
See https://broadinstitute.github.io/picard/explain-flags.html for more info.
                   ''')

parser.add_argument('--mapq', '-m',
                   required=False,
                   default=5,
                   type= int,
                   help='''(bam only.) Discard reads with mapping quality lower than this. Default 5.
                   ''')

parser.add_argument('--autodetect-chroms', '-a',
                    required=False,
                    default=False,
                    action="store_true",
                    help='''(bam only.) Autodetect chromosomes from bam file. Use with --discard-chromosomes flag to avoid non-canonical chromosomes.''')


parser.add_argument('--discard-chromosomes-pattern', '-d',
                   required=False,
                   default="_",
                   type=str,
                   help='''(bam only.) Discard reads from chromosomes matching this pattern. Default
                   '_'. Note that if you are not interested in the results from
                   non-canonical chromosomes, you should ensure they are
                   removed with this flag, otherwise they will make the
                   statistical analysis too stringent.''')

parser.add_argument(
    '--quiet',
    '-q',
    required=False,
    default=False,
    action="store_true",
    help=
    '''Do not write output messages to stderr.''')

parser.add_argument(
    '--example',
    '-ex',
    required=False,
    default=False,
    action="store_true",
    help=
    '''Show the paths of the example data and an example command.''')


parser.add_argument('--version', "-v", action='version', version=__version__)


def main():

    args = vars(parser.parse_args())

    if args["example"]:

        import pkg_resources
        treatment = pkg_resources.resource_filename("epic2", "examples/test.bed.gz")
        control = pkg_resources.resource_filename("epic2", "examples/control.bed.gz")
        print("Treatment: " + treatment)
        print("Control: " + control)
        print("Example command: epic2 -t {} -c {} > deleteme.txt".format(treatment, control))
        sys.exit(0)

    args = vars(parser.parse_args())

    args["drop_duplicates"] = int(not args["keep_duplicates"])

    import logging

    if args["quiet"]:
        level = logging.CRITICAL
    else:
        level = logging.INFO

    logging.basicConfig(
        level=level,
        format=
        '%(message)s ',
        datefmt='%a, %d %b %Y %H:%M:%S',
        stream=sys.stderr)

    effective_genome_length, chromsizes = egl_and_chromsizes(args)
    args["chromsizes"] = chromsizes
    args["effective_genome_size"] = effective_genome_length

    c_bins_counts = files_to_bin_counts(args["treatment"], args, "ChIP")
    chip_count = sum(sum(counts) for _, counts in c_bins_counts.values())

    logging.info("\nValid ChIP reads: {}\n".format(chip_count))

    score_threshold, island_enriched_threshold, average_window_readcount = compute_score_threshold(
        chip_count, args["bin_size"], effective_genome_length, args["gaps_allowed"] * args["bin_size"], args["e_value"])

    logging.info("Number of tags in a window: {}\n".format(island_enriched_threshold))

    islands = find_islands(c_bins_counts, args["gaps_allowed"], args["bin_size"], score_threshold, island_enriched_threshold, average_window_readcount)
    new_islands = add_chip_count_to_islands(islands, c_bins_counts)

    logging.info("Number of islands found: {}\n".format(sum(len(i) for i in islands.values())))

    if args["control"]:

        b_bins_counts = files_to_bin_counts(args["control"], args, "Input")
        background_count = sum(sum(counts) for _, counts in b_bins_counts.values())
        logging.info("\nValid background reads: {}\n".format(background_count))

        compute_fdr(new_islands, b_bins_counts, chip_count, background_count, effective_genome_length, args["false_discovery_rate_cutoff"])
    else:
        write_islands(new_islands, average_window_readcount, args["false_discovery_rate_cutoff"], args["e_value"])


main()
