#!/usr/bin/env python3

import argparse
import re
import sys
import regex
import time
import yaml

sys.path.append('../itermae')
import itermae

if __name__ == '__main__':
    ### Using argparse module to define the arguments

    # Name and description of this program
    parser = argparse.ArgumentParser(description=""+
        "itermae - iteratively chop up sequences using fuzzy regular expressions. ",
        formatter_class=argparse.RawTextHelpFormatter,
        add_help=False)

    parser_config = parser.add_argument_group('Configuration file (optional)')
    parser_config.add_argument("--config",default=False,
        help="""\
    File path to the config file. If specified, 
    overrules all other relevant arguments. 
    See examples/docs. """)

    parser_input = parser.add_argument_group('Defining input sources')
    parser_input.add_argument("-i","--input",default="STDIN",
        help="""\
    Specify where the input reads are from. This can 
    be a file path, but the suggested default is 
    standard input ( 'STDIN' ).""")
    parser_input.add_argument("-z","--gzipped",action="store_true",
        help="""\
    Use this flag if the input is a gzipped file.""")
    parser_input.add_argument("--input-format",default='fastq',
        help="""
    Specify what format the input is. Default is 
    'FASTQ'. I expect this, or 'SAM', 'FASTA', or 
    'txt'. Case insensitive.""")

    parser_output = parser.add_argument_group('Defining where to output')
    parser_output.add_argument("-o","--output",default="STDOUT",
        help="""\
    Specify where to output successful matched 
    groups to. The recommended default is standard 
    out ( 'STDOUT' ), but this can also be a 
    filepath to write.""")
    parser_output.add_argument("--output-format",default='sam',
        help="""\
    Specify what format the output should be in. 
    Default is an unmapped SAM ('SAM'), also 
    available are 'FASTQ' and 'FASTA', case 
    insensitive.""")
    parser_output.add_argument("-f","--failed",default=None,
        help="""\
    Optional filepath for passing-through the reads 
    that failed at any stage of matching, filtering,
    or forming outputs. Can also be directed to 
    'STDOUT' or 'STDERR'.""")
    parser_output.add_argument("-r","--report",default=None,
        help="""\
    Optional filepath for writing a report of 
    read-level statistics. This is a large 
    inefficient output, but useful for debugging by 
    using with a small subset of the data. 
    (ex: 'head -n 10000 file.fq | tail -n 100' )""")

    parser_match = parser.add_argument_group('Matches')
    parser_match.add_argument("-m","--match",action="append",
        help="""\
    Specify what is being matched against, the 
    regular expression, and what groups to extract.
    Format example: 

    'someInputGroup > regExprWith(?<someGroupName>MatchGroups)'

    If no input group is specified on the left of a
    ' > ' delimiter, then I assume you mean the raw 
    input sequence (ie 'input'). This, and 
    'dummyspacer' are reserved names for groups (see
    docs). Each match is done in the order you 
    specify, so later matches can use previous 
    matching groups as inputs (like 'inputGroup' 
    in the above example). 

    *Importantly*, all input sequences are converted to 
    uppercase, so write all regex in uppercase !

    For more details, please refer to the README, 
    documentation, and tutorials, and the regex 
    module documentation.""")

    parser_groups = parser.add_argument_group(
        'Define groups output',
        """\
    Each ID, sequence, and filter is grouped together used in order, 
    so the first ID is used with the first sequence and the first
    filter -- unless there is only one, in which case it is recycled
    for all groups output.
    """)
    parser_groups.add_argument("-oi","--output-id",action="append",
        help="""\
    The ID field of an output. These should evaluate
    to a string in Python. You can build this by 
    concatenating together parts of the matched 
    groups, such as:

    'input.id+\"_\"+umi.seq' 

    to append the index sequence to the input's ID.""")
    parser_groups.add_argument("-os","--output-seq",action="append",
        help="""\
    The sequence of an output. This is evaluated to
    form a BioPython SeqRecord, so can be assembled 
    by combining the names of matched groups. 
    For example:

    'sample+barcode+umi'

    would append the sequence of these three groups 
    together. Group 'dummyspacer' is also available 
    to insert an 'X' into the sequence for 
    subsequent parsing.""")
    parser_groups.add_argument("-of","--output-filter",action="append",
        help="""\
    Define a filter that must be true in order to
    output this group. This must evaluate to True or 
    False (in python), and can use some attributes 
    of the matches or matched groups.  For example: 

    'umi.length == 5 & statistics.mean(barcode.quality) >= 30'

    will only output when the matched UMI is 5 bases
    and the mean quality of the barcode match is more
    than 30. While

    'sample_barcode == \"TTCAC\"'

    will only output when the sample_barcode group is
    exactly that sequence. 

    See tutorial/documentation for details and more
    examples. 

    If there is only one filter defined, it is 
    recycled to filter for all output groups.""")

    parser_misc = parser.add_argument_group('')
    parser_misc.add_argument("-v","--verbose",action="count",default=0,
        help="""\
    Level of information to pipe out to STDERR. 
      Adding none means itermae runs silently.
      -v prints setup messages and start-stop messsages.
      -v -v also prints read-level details.
      -v -v -v also prints match-level details.""")
    parser_misc.add_argument("-h","--help",action="store_true",default=False)

    # Parse the arguments, turn them into a configuration object, report that
    args = parser.parse_args()

    # Necessary since I disabled the auto-'help' argument so that it wouldn't
    # print as part of an 'optional' header
    if args.help:
        parser.print_help()
        exit(1)

    if args.config:
        try: 
            configuration = itermae.config_from_file(args.config)
        except:
            print("Configuring from that file '"+args.config+"' failed.",
                file=sys.stderr)
            raise
    else:
        try:
            configuration = itermae.config_from_args(args)
        except:
            print("Configuring from arguments failed.",file=sys.stderr)
            raise

    if configuration['verbosity'] >= 1:
        print('Configured as:',file=sys.stderr)
        print('    input from: '+configuration['input'],file=sys.stderr)
        print('    of format: '+configuration['input_format'],file=sys.stderr)
        print('    is it gzipped?: '+str(configuration['input_gzipped']),file=sys.stderr)
        print('    output APPENDING to: '+configuration['output'],file=sys.stderr)
        print('    in format is: '+configuration['output_format'],file=sys.stderr)
        print('    failed being APPENDED to file: '+str(configuration['failed']),file=sys.stderr)
        print('    report being APPENDED to file: '+str(configuration['report']),file=sys.stderr)
        print('    with verbosity set at: '+str(configuration['verbosity']),file=sys.stderr)
        print('    doing these matches:',file=sys.stderr)
        for each in configuration['matches']:
            print('        input: '+each['input'],file=sys.stderr)
            print('        regex: '+str(each['regex']),file=sys.stderr)
        print('    writing these outputs:',file=sys.stderr)
        for each in configuration['output_groups']:
            print('        id: '+str(each['id'][0]),file=sys.stderr)
            print('        seq: '+str(each['seq'][0]),file=sys.stderr)
            print('        filter: '+str(each['filter'][0]),file=sys.stderr)

    if args.verbose >= 1:
        print("["+str(time.time())+"] : Begin running the reader function "+
            "to initiate chopping...",file=sys.stderr)

    itermae.reader(configuration)
        # Configuration should be dict with values under these names:
        # input_file
        # in_format
        # is_gzipped
        # output_file
        # out_format
        # failed_file
        # report_file
        # matches_array
        # outputs_array
        # verbosity

    if args.verbose >= 1:
        print("["+str(time.time())+"]"+" : "+
            "All worked 'till the work is done --- or some fatal error.",
            file=sys.stderr)

    exit(0)
