#!/opt/miniconda3/envs/biasaway/bin/python

"""
BiasAway tool originally developed to generate adapted background for motif
overrepresentation.

 BiasAway comes with the possibility of using very different ways of
 generating backgrounds lying into two categories:
 - Creation of new random sequences:
   - k-mer shuffling using the foreground sequences
   - k-mer shuffling within a sliding window using foreground sequences
- Extraction of sequences from a set of possible background sequences:
   - respecting the %GC distribution of the foreground (using %GC bins)
   - respecting the %GC distribution as in the previous item and also
     respecting the %GC composition within a sliding window for %GC bin

Authors: BiasAway has been originally developed by Anthony Mathelier, Luis Del
Peso, and Rebecca Worsley-Hunt at the University of British Columbia.

Modified by Peter Arner, Aziz Khan and Anthony Mathelier
"""

import argparse
from biasaway import kmer_shuffling_generator as kmer_shuff
from biasaway import kmer_window_shuffling_generator as kmer_win_shuff
from biasaway import GC_compo_matching as GC_compo
from biasaway import GC_window_compo_matching as GC_window_compo
from biasaway.utils import get_seqs
from biasaway.utils import make_gc_plot
from biasaway.utils import make_len_plot
from biasaway.utils import make_dinuc_plot
from biasaway.utils import make_dinuc_acgt_only_plot
import sys
import os
import errno
import time
from biasaway import __version__ as VERSION


def kmer_shuffling_generator(argu):
    seqs, fg_gc_list, fg_lengths, fg_dinuc = get_seqs(argu.fg_file)
    bg_gc_list, bg_lengths, bg_dinuc = kmer_shuff.generate_sequences(
        seqs, argu.kmer, argu.nfold, argu.random_seed)
    if(argu.plot_filename):
        make_gc_plot(fg_gc_list, bg_gc_list, argu.plot_filename)
        make_len_plot(fg_lengths, bg_lengths, argu.plot_filename)
        make_dinuc_plot(fg_dinuc, bg_dinuc, argu.plot_filename)
        make_dinuc_acgt_only_plot(fg_dinuc, bg_dinuc, argu.plot_filename)


def kmer_shuffling_window_generator(argu):
    seqs, fg_gc_list, fg_lengths, fg_dinuc = get_seqs(argu.fg_file)
    bg_gc_list, bg_lengths, bg_dinuc = kmer_win_shuff.generate_sequences(
        seqs, argu.kmer, argu.winlen, argu.step, argu.nfold, argu.random_seed)
    if(argu.plot_filename):
        make_gc_plot(fg_gc_list, bg_gc_list, argu.plot_filename)
        make_len_plot(fg_lengths, bg_lengths, argu.plot_filename)
        make_dinuc_plot(fg_dinuc, bg_dinuc, argu.plot_filename)
        make_dinuc_acgt_only_plot(fg_dinuc, bg_dinuc, argu.plot_filename)


def test_empty_bg_dir(bg_dir):
    if os.path.isdir(bg_dir):
        if os.listdir(bg_dir):
            msg = "### EXITING since both a non-empty background directory"
            msg += "and a background file are given ###"
            sys.exit(msg)
    else:
        try:
            os.makedirs(bg_dir)
        except OSError as exc:
            if exc.errno == errno.EEXIST and os.path.isdir(bg_dir):
                pass
            else:
                raise


def test_non_empty_bg_dir(bg_dir):
    if not (os.path.isdir(bg_dir) and os.listdir(bg_dir)):
        msg = "### EXITING since the background directory does not exist"
        msg += "or is empty ###"
        sys.exit(msg)


def gc_compo_generator(argu):
    if argu.len_opt:
        gc_compo_len_generator(argu)
    else:
        gc_compo_generator_no_len(argu)


def gc_compo_generator_no_len(argu):
    fg_gc_list, fg_gc_bins, fg_lengths, fg_dinuc = GC_compo.fg_GC_bins(
        argu.fg_file)
    bg_gc_bins = None
    if argu.bg_file:
        test_empty_bg_dir(argu.bg_dir)
        _, _, _, _ = GC_compo.bg_GC_bins(argu.bg_file, argu.bg_dir)
    else:
        test_non_empty_bg_dir(argu.bg_dir)
    bg_gc_list, bg_lengths, bg_dinuc = GC_compo.generate_sequences(
        fg_gc_bins, bg_gc_bins, argu.bg_dir, argu.nfold, argu.random_seed)
    if(argu.plot_filename):
        make_gc_plot(fg_gc_list, bg_gc_list, argu.plot_filename)
        make_len_plot(fg_lengths, bg_lengths, argu.plot_filename)
        make_dinuc_plot(fg_dinuc, bg_dinuc, argu.plot_filename)
        make_dinuc_acgt_only_plot(fg_dinuc, bg_dinuc, argu.plot_filename)


def gc_compo_len_generator(argu):
    fg_gc_list, fg_gc_bins, fg_lengths, fg_dinuc = GC_compo.fg_len_GC_bins(
        argu.fg_file)
    bg_gc_bins = None
    if argu.bg_file:
        test_empty_bg_dir(argu.bg_dir)
        _, _, _, _ = GC_compo.bg_len_GC_bins(argu.bg_file, argu.bg_dir)
    else:
        test_non_empty_bg_dir(argu.bg_dir)
    bg_gc_list, bg_lengths, bg_dinuc = GC_compo.generate_len_sequences(
        fg_gc_bins, bg_gc_bins, argu.bg_dir, argu.nfold, argu.random_seed)
    if(argu.plot_filename):
        make_gc_plot(fg_gc_list, bg_gc_list, argu.plot_filename)
        make_len_plot(fg_lengths, bg_lengths, argu.plot_filename)
        make_dinuc_plot(fg_dinuc, bg_dinuc, argu.plot_filename)
        make_dinuc_acgt_only_plot(fg_dinuc, bg_dinuc, argu.plot_filename)


def gc_compo_window_generator(argu):
    if argu.len_opt:
        gc_compo_len_window_generator(argu)
    else:
        gc_compo_window_generator_no_len(argu)


def gc_compo_len_window_generator(argu):
    (fg_gc_list, fg_gc_bins, fg_lengths,
     fg_dinuc) = GC_window_compo.fg_len_GC_bins(argu.fg_file, argu.winlen,
                                                argu.step)
    bg_gc_bins = None
    if argu.bg_file:
        test_empty_bg_dir(argu.bg_dir)
        _, _, _, _ = GC_window_compo.bg_len_GC_bins(argu.bg_file, argu.bg_dir)
    else:
        test_non_empty_bg_dir(argu.bg_dir)
    bg_gc_list, bg_lengths, bg_dinuc = GC_window_compo.generate_len_sequences(
        fg_gc_bins, bg_gc_bins, argu.bg_dir, argu.deviation, argu.winlen,
        argu.step, argu.nfold, argu.random_seed)
    if(argu.plot_filename):
        make_gc_plot(fg_gc_list, bg_gc_list, argu.plot_filename)
        make_len_plot(fg_lengths, bg_lengths, argu.plot_filename)
        make_dinuc_plot(fg_dinuc, bg_dinuc, argu.plot_filename)
        make_dinuc_acgt_only_plot(fg_dinuc, bg_dinuc, argu.plot_filename)


def gc_compo_window_generator_no_len(argu):
    fg_gc_list, fg_gc_bins, fg_lengths, fg_dinuc = GC_window_compo.fg_GC_bins(
        argu.fg_file, argu.winlen, argu.step)
    bg_gc_bins = None
    if argu.bg_file:
        test_empty_bg_dir(argu.bg_dir)
        _, _, _, _ = GC_window_compo.bg_GC_bins(argu.bg_file, argu.bg_dir)
    else:
        test_non_empty_bg_dir(argu.bg_dir)
    bg_gc_list, bg_lengths, bg_dinuc = GC_window_compo.generate_sequences(
        fg_gc_bins, bg_gc_bins, argu.bg_dir, argu.deviation, argu.winlen,
        argu.step, argu.nfold, argu.random_seed)
    if(argu.plot_filename):
        make_gc_plot(fg_gc_list, bg_gc_list, argu.plot_filename)
        make_len_plot(fg_lengths, bg_lengths, argu.plot_filename)
        make_dinuc_plot(fg_dinuc, bg_dinuc, argu.plot_filename)
        make_dinuc_acgt_only_plot(fg_dinuc, bg_dinuc, argu.plot_filename)


def kmer_shuffling_arg_parsing(subparsers):
    parser_k = subparsers.add_parser('k',
                                     help="k-mer shuffling generator")
    parser_k.add_argument('-f', '--foreground', required=True, type=str,
                          dest="fg_file", action="store",
                          help="Foreground file in fasta format")
    parser_k.add_argument("-k", "--kmer", required=False, type=int,
                          dest="kmer", action="store", default=2,
                          help="K-mer used for the shuffling (default: 2)")
    help_str = "How many background sequences per each foreground sequence "
    help_str += "will be generated (default: 1)"
    parser_k.add_argument('-n', '--nfold', required=False, type=int,
                          dest="nfold", action="store", default=1,
                          help=help_str)
    help_str = "Basename for the QC plot and metric files "
    help_str += "(default: no QC plot created)"
    parser_k.add_argument('-p', '--plot_filename', required=False,
                          dest="plot_filename", type=str, action="store",
                          default="", help=help_str)
    timeseed = int(round(time.time()))
    help_str = "Seed number to initialize the random number generator"
    parser_k.add_argument('-e', '--seed', required=False, dest='random_seed',
                          type=int, action="store", default=timeseed,
                          help=help_str)
    parser_k.set_defaults(func=kmer_shuffling_generator)


def kmer_window_shuffling_arg_parsing(subparsers):
    help_str = "k-mer shuffling within a sliding window generator"
    parser_w = subparsers.add_parser('w', help=help_str)
    parser_w.add_argument('-f', '--foreground', required=True, type=str,
                          dest="fg_file", action="store",
                          help="Foreground file in fasta format")
    parser_w.add_argument("-k", "--kmer", required=False, type=int,
                          dest="kmer", action="store", default=2,
                          help="K-mer used for the shuffling (default: 2")
    parser_w.add_argument("-w", "--winlen", required=False, type=int,
                          dest="winlen", action="store", default=100,
                          help="Window length (default: 100)")
    parser_w.add_argument("-s", "--step", required=False, type=int,
                          dest="step", action="store", default=50,
                          help="Sliding step (default: 50)")
    help_str = "How many background sequences per each foreground sequence "
    help_str += "will be generated (default: 1)"
    parser_w.add_argument('-n', '--nfold', required=False, type=int,
                          dest="nfold", action="store", default=1,
                          help=help_str)
    help_str = "Basename for the QC plot and metric files "
    help_str += "(default: no QC plot created)"
    parser_w.add_argument('-p', '--plot_filename', required=False,
                          dest="plot_filename", type=str, action="store",
                          default="", help=help_str)
    timeseed = int(round(time.time()))
    help_str = "Seed number to initialize the random number generator"
    parser_w.add_argument('-e', '--seed', required=False, dest='random_seed',
                          type=int, action="store", default=timeseed,
                          help=help_str)
    parser_w.set_defaults(func=kmer_shuffling_window_generator)


def gc_compo_arg_parsing(subparsers):
    help_str = "%%GC distribution-based background chooser"
    parser_g = subparsers.add_parser('g', help=help_str)
    parser_g.add_argument('-f', '--foreground', required=True, type=str,
                          dest="fg_file", action="store",
                          help="Foreground file in fasta format")
    parser_g.add_argument("-r", "--bgdirectory", required=True, type=str,
                          dest="bg_dir", action="store",
                          help="Background directory")
    parser_g.add_argument("-b", "--background", required=False, type=str,
                          dest="bg_file", action="store",
                          help="Background file in fasta format")
    help_str = "How many background sequences per each foreground sequence "
    help_str += "will be choosen (default: 1)"
    parser_g.add_argument('-n', '--nfold', required=False, type=int,
                          dest="nfold", action="store", default=1,
                          help=help_str)
    help_str = "Try to match the length as closely as possible "
    help_str += "(not set by default)"
    parser_g.add_argument('-l', '--length', required=False, dest="len_opt",
                          action="store_const", const=1, default=0,
                          help=help_str)
    help_str = "Basename for the QC plot and metric files "
    help_str += "(default: no QC plot created)"
    parser_g.add_argument('-p', '--plot_filename', required=False,
                          dest="plot_filename", type=str, action="store",
                          default="", help=help_str)
    timeseed = int(round(time.time()))
    help_str = "Seed number to initialize the random number generator"
    parser_g.add_argument('-e', '--seed', required=False, dest='random_seed',
                          type=int, action="store", default=timeseed,
                          help=help_str)
    parser_g.set_defaults(func=gc_compo_generator)


def gc_compo_window_arg_parsing(subparsers):
    help_str = "%%GC distribution and %%GC composition within a sliding "
    help_str += "window background chooser"
    parser_c = subparsers.add_parser('c', help=help_str)
    parser_c.add_argument('-f', '--foreground', required=True, type=str,
                          dest="fg_file", action="store",
                          help="Foreground file in fasta format")
    parser_c.add_argument("-r", "--bgdirectory", required=True, type=str,
                          dest="bg_dir", action="store",
                          help="Background directory")
    parser_c.add_argument("-b", "--background", required=False, type=str,
                          dest="bg_file", action="store",
                          help="Background file in fasta format")
    parser_c.add_argument("-w", "--winlen", required=False, type=int,
                          dest="winlen", action="store", default=100,
                          help="Window length (default: 100)")
    parser_c.add_argument("-s", "--step", required=False, type=int,
                          dest="step", action="store", default=50,
                          help="Sliding step (default: 50)")
    help_str = "Deviation from the mean (default: 2.6 for a "
    help_str += "threshold of mean + 2.6 * stdev)"
    parser_c.add_argument("-d", "--deviation", required=False, type=float,
                          dest="deviation", action="store", default=2.6,
                          help=help_str)
    help_str = "How many background sequences per each foreground sequence "
    help_str += "will be choosen (default: 1)"
    parser_c.add_argument('-n', '--nfold', required=False, type=int,
                          dest="nfold", action="store", default=1,
                          help=help_str)
    help_str = "Try to match the length as closely as possible "
    help_str += "(not set by default)"
    parser_c.add_argument('-l', '--length', required=False, dest="len_opt",
                          action="store_const", const=1, default=0,
                          help=help_str)
    help_str = "Basename for the QC plot and metric files "
    help_str += "(default: no QC plot created)"
    parser_c.add_argument('-p', '--plot_filename', required=False,
                          dest="plot_filename", type=str, action="store",
                          default="", help=help_str)
    timeseed = int(round(time.time()))
    help_str = "Seed number to initialize the random number generator"
    parser_c.add_argument('-e', '--seed', required=False, dest='random_seed',
                          type=int, action="store", default=timeseed,
                          help=help_str)
    parser_c.set_defaults(func=gc_compo_window_generator)


class MyParser(argparse.ArgumentParser):
    """
    Class modified to print help when error message occurs.
    """
    def error(self, message):
        sys.stderr.write('error: %s\n' % message)
        self.print_help()
        sys.exit(1)


def arg_parsing():
    descr = '''Background generator offering the possibility of using very
    different ways of generating backgrounds lying into two categories:
        - Creation of new random sequences (generators):
            - k-mer shuffling using the foreground sequences
                -> type: `biasaway k -h`
            - k-mer shuffling within a sliding window using foreground
              sequences
                -> type: `biasaway w -h`
        - Extraction of sequences from a set of possible background sequences
          (choosers):
            - respecting the %GC distribution of the foreground (using %GC
              bins)
                -> type: `biasaway g -h`
            - respecting the %GC distribution as in the previous item and also
              respecting the %GC composition within a sliding window for %GC
              bin
                -> type: `biasaway c -h`
    '''
    parser = MyParser(description=descr,
                      formatter_class=argparse.RawDescriptionHelpFormatter)
    msg = 'generator/chooser. For more information, run the program '
    msg += 'again and type -h for help.'
    msg += '\n\n\n' + descr
    subparsers = parser.add_subparsers(dest='subcommand',
                                       title="Subcommands",
                                       description="Valid subcommands")
    parser.add_argument('-v', '--version', dest='version', action='version',
                        version='%(prog)s version '+VERSION)

    subparsers.required = True
    kmer_shuffling_arg_parsing(subparsers)
    kmer_window_shuffling_arg_parsing(subparsers)
    gc_compo_arg_parsing(subparsers)
    gc_compo_window_arg_parsing(subparsers)
    argu = parser.parse_args()
    return argu


###############################################################################
#                                   MAIN
###############################################################################
if __name__ == "__main__":
    arguments = arg_parsing()
    arguments.func(arguments)
