#!/usr/bin/env python3

import argparse
import os
import sys

from shmlast.app import RBL, CRBL
from shmlast.util import prog_string
from shmlast import __version__


def rbl_func(args):
    print(prog_string('Reciprocal Best LAST', 
                      __version__, args.action))
    rbl = RBL(args.query, args.database, args.output, 
              n_threads=args.n_threads, cutoff=args.evalue_cutoff,
              pbs=args.pbs)
    return rbl.run(doit_args=[args.action], 
                   profile_fn=args.profile and args.profile_output)


def crbl_func(args):
    print(prog_string('Conditional Reciprocal Best LAST', 
                      __version__, args.action))

    crbl = CRBL(args.query, args.database, args.output,
                n_threads=args.n_threads, cutoff=args.evalue_cutoff,
                pbs=args.pbs)
    return crbl.run(doit_args=[args.action], 
                    profile_fn=args.profile and args.profile_output)


desc = '''
shmlast is a reimplementation of the Conditional Reciprocal Best
Hits algorithm for finding potential orthologs between
a transcriptome and a species-specific protein database. It uses the LAST
aligner and the pydata stack to achieve much better performance while staying in the Python ecosystem. 
'''

rbl_desc = '''
Run Reciprocal Best Hits between the query and database.
'''

crbl_desc = '''
Run Conditional Reciprocal Best Hits between the query and
database.
'''

def main():

    parser = argparse.ArgumentParser(
                 description=desc,
                 formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--version', action='version',
                        version='%(prog)s ' + __version__)
    parser.set_defaults(func=lambda _: parser.print_help())
    subparsers = parser.add_subparsers()

    def add_common_args(p):
        p.add_argument('-q', '--query', required=True,
                       help='FASTA file with query transcriptome.')
        p.add_argument('-d', '--database', required=True,
                       help='FASTA file with database proteins.')
        p.add_argument('-o', '--output',
                       help='File to place the CSV format CRBL hits. '\
                       'By default, QUERY.x.DATABASE.{c}rbl.csv.')
        p.add_argument('--n_threads', type=int, default=1,
                       help='Number of threads to use.')
        p.add_argument('-e', '--evalue-cutoff', default=0.00001, type=float,
                       help='Maximum evalue to accept.')
        p.add_argument('--action', default='run',
                       help='pydoit action. A common alternative'\
                            ' is "clean."')
        p.add_argument('--profile', action='store_true', default=False,
                       help='If True, record CPU time.')
        p.add_argument('--profile-output', default=None,
                       help='Filename for profile results.')
        p.add_argument('--sshloginfile', dest='pbs',  default=None,
                       help='Distribute execution across the specified nodes.')

        return p

    rbl_cmd = subparsers.add_parser('rbl', description=rbl_desc)
    rbl_parser = add_common_args(rbl_cmd)
    rbl_parser.set_defaults(func=rbl_func)

    crbl_cmd = subparsers.add_parser('crbl', description=crbl_desc)
    crbl_parser = add_common_args(crbl_cmd)
    crbl_parser.set_defaults(func=crbl_func)

    args = parser.parse_args()
    return args.func(args)
  

if __name__ == '__main__':
    sys.exit(main())
