#!/usr/bin/env python

# Created on Tue Sep 8 20:45:18 2020
# Author: XiaoTao Wang

## Required modules

import argparse, sys, logging, logging.handlers, traceback, HiCLift, os, subprocess

currentVersion = HiCLift.__version__


def getargs():
    ## Construct an ArgumentParser object for command-line arguments
    parser = argparse.ArgumentParser(description='''Convert genomic coordinates of contact pairs from
                                     one assembly to another.''',
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    
    # Version
    parser.add_argument('-v', '--version', action='version',
                        version=' '.join(['%(prog)s',currentVersion]),
                        help='Print version number and exit.')

    parser.add_argument('--input', help='''Input file path.''')
    parser.add_argument('--input-format', default='pairs', choices=['pairs', 'hic-pro', 'cooler', 'juicer'],
                        help='''The input format. pairs: 4DN pairs; hic-pro: allValidPairs outputted by HiC-Pro;
                        cooler: Cool URI; juicer: .hic file.''')
    parser.add_argument('--out-pre', help='''Prefix of the output file names''')
    parser.add_argument('--output-format', default='pairs', choices=['pairs', 'cool', 'hic'],
                        help='''The output format.''')
    parser.add_argument('--high-res', action = 'store_true', help='''If specified, bin pairs at 11 base-pair-delimited resolutions:
                        2500000,1000000,500000,250000,100000,50000,25000,10000,5000,2000,1000. The default setting is binning pairs at
                        9 resolutions: 2500000,1000000,500000,250000,100000,50000,25000,10000,5000. This parameter is only valid when
                        "--output-format" is set to "cool" or "hic".''')
    parser.add_argument('--out-chromsizes', help='''Path to the file containing chromosome sizes of the target assembly.
                        The chromosome order in this file will be used to flip inter-chromosomal pairs.''')
    parser.add_argument('--in-assembly', default='hg19', help='''Genome assembly of the input.''')
    parser.add_argument('--out-assembly', default='hg38', help='''Target assembly of the output.''')
    parser.add_argument('--chain-file', help='''The coordinate conversion chain file from UCSC. If not provided, the file
                        will be internally downloaded according to "--in-assembly" and "--out-assembly".''')
    parser.add_argument('--tmpdir', default='.HiCLift', help='''Temporary folder for intermediate results.''')
    parser.add_argument('--memory', default='8G', help='''The amount of allocated memory.''')
    parser.add_argument('--nproc', default=8, type=int, help='''Number of allocated processes''')
    parser.add_argument('--logFile', default = 'HiCLift.log', help = '''Logging file name.''')

    ## Parse the command-line arguments
    commands = sys.argv[1:]
    if not commands:
        commands.append('-h')
    args = parser.parse_args(commands)
    
    return args, commands


def run():

    # Parse Arguments
    args, commands = getargs()
    # Improve the performance if you don't want to run it
    if commands[0] not in ['-h', '-v', '--help', '--version']:
        ## Root Logger Configuration
        logger = logging.getLogger()
        logger.setLevel(10)
        console = logging.StreamHandler()
        filehandler = logging.handlers.RotatingFileHandler(args.logFile,
                                                           maxBytes=100000,
                                                           backupCount=5)
        # Set level for Handlers
        console.setLevel('INFO')
        filehandler.setLevel('INFO')
        # Customizing Formatter
        formatter = logging.Formatter(fmt = '%(name)-25s %(levelname)-7s @ %(asctime)s: %(message)s',
                                      datefmt = '%m/%d/%y %H:%M:%S')
        
        ## Unified Formatter
        console.setFormatter(formatter)
        filehandler.setFormatter(formatter)
        # Add Handlers
        logger.addHandler(console)
        logger.addHandler(filehandler)
        
        ## Logging for argument setting
        arglist = ['# ARGUMENT LIST:',
                   '# Input path = {0}'.format(args.input),
                   '# Input format = {0}'.format(args.input_format),
                   '# Output prefix = {0}'.format(args.out_pre),
                   '# Output format = {0}'.format(args.output_format),
                   '# Chromosome Sizes of the output assembly = {0}'.format(args.out_chromsizes),
                   '# Generate contact maps at 11 resolutions = {0}'.format(args.high_res),
                   '# Input assembly = {0}'.format(args.in_assembly),
                   '# Output assembly = {0}'.format(args.out_assembly),
                   '# Chain file = {0}'.format(args.chain_file),
                   '# Temporary Dir = {0}'.format(args.tmpdir),
                   '# Allocated memory = {0}'.format(args.memory),
                   '# Number of Processes = {0}'.format(args.nproc),
                   '# Log file name = {0}'.format(args.logFile)
                   ]
        argtxt = '\n'.join(arglist)
        logger.info('\n' + argtxt)

        from HiCLift.utilities import liftover

        if args.in_assembly == args.out_assembly:
            logger.info('Trying to perform a pure format conversion without liftover ...')
            if ((args.input_format == 'pairs') and (args.output_format == 'pairs')) or \
               ((args.input_format == 'cooler') and (args.output_format == 'cool')) or \
               ((args.input_format == 'juicer') and (args.output_format == 'hic')):
               logger.info('The target format is the same as the input format, exit ...')
            else:
                liftover(
                    args.input, args.out_pre, args.input_format, args.output_format,
                    None, args.out_chromsizes,
                    args.in_assembly, args.out_assembly,
                    args.chain_file,
                    resolution = None,
                    nproc_in = args.nproc,
                    nproc_out = args.nproc,
                    tmpdir = args.tmpdir,
                    memory = args.memory,
                    high_res = args.high_res
                )
        else:
            liftover(
                args.input, args.out_pre, args.input_format, args.output_format,
                None, args.out_chromsizes,
                args.in_assembly, args.out_assembly,
                args.chain_file,
                resolution = None,
                nproc_in = args.nproc,
                nproc_out = args.nproc,
                tmpdir = args.tmpdir,
                memory = args.memory,
                high_res = args.high_res
            )

if __name__ == '__main__':
    run()