#!/usr/bin/env python2 -u
import sys
import immunedb.common.config as config

from immunedb.importing.delimited import DEFAULT_MAPPINGS, run_import
from immunedb.identification.identify import IdentificationProps
from immunedb.identification.genes import JGermlines


if __name__ == '__main__':
    parser = config.get_base_arg_parser(
        'Imports V/J identifications from delimited output')
    parser.add_argument('input_file', help='Gapped NT file')
    parser.add_argument('v_germlines', help='FASTA file with IMGT gapped '
                        'V-gene germlines')
    parser.add_argument('j_germlines', help='FASTA file with J-gene '
                        'germlines. The final nucleotide in all genes must be '
                        'aligned. Sequence cannot contain any gaps.')

    parser.add_argument('--upstream-of-cdr3', type=int,
                        default=JGermlines.defaults['upstream_of_cdr3'],
                        help='''The number of nucleotides in the J germlines
                        upstream of the CDR3''')
    parser.add_argument('--anchor-len', type=int,
                        default=JGermlines.defaults['anchor_len'],
                        help='''The number of nucleotides at the end of the J
                        germlines to use as anchors.''')
    parser.add_argument('--min-anchor-len', type=int,
                        default=JGermlines.defaults['min_anchor_len'],
                        help='''The minimum number of nucleotides in the J
                        germline anchors required to match the sequence.''')
    parser.add_argument('--max-padding', type=int,
                        default=IdentificationProps.defaults['max_padding'],
                        help='''If specified, discards sequences with too much
                        padding.''')
    parser.add_argument('--trim-to', type=int,
                        default=IdentificationProps.defaults['trim_to'],
                        help='''If specified, trims the beginning N bases of
                        each sequence.  Useful for removing primers within
                        the V sequence.''')
    parser.add_argument('--remap-js', nargs='+', default=None,
                        help='Remaps J genes to others in the germline file. '
                        'Format is FROM:TO[ FROM:TO[...]].  For example '
                        'IGHJ1:IGHJ2 will remap any IGHJ1 gene to IGHJ2. The '
                        'FROM gene can be a prefix but TO must be the full '
                        'gene name')

    parser.add_argument('--meta', nargs='+', help='''Additional metadata for
                        the sample.  Must be in the format KEY=VALUE (e.g.
                        --meta tissue=Spleen).  May be specified multiple
                        times''')

    for key, value in DEFAULT_MAPPINGS.iteritems():
        if value is None:
            parser.add_argument(
                '--{}'.format(key.replace('_', '-')),
                help='Header for {}'.format(key)
            )
        else:
            parser.add_argument(
                '--{}'.format(key.replace('_', '-')),
                default=value,
                help='Header for {} (Default: {})'.format(key, value)
            )

    args = parser.parse_args()

    if args.max_padding and args.trim_to:
        if args.max_padding < args.trim_to:
            parser.error('--max-padding cannot be less than --trim-to')

    if args.remap_js:
        remaps = {}
        for rm in args.remap_js:
            rm = rm.split(':')
            if len(rm) != 2:
                parser.error('--remap-js must be in format FROM:TO')
            remaps[rm[0]] = rm[1]

    for m in meta:
        if '=' not in m:
            parser.error('--meta must be in format KEY=VALUE')

    session = config.init_db(args.db_config)

    sys.exit(run_import(session, args, remaps))
