#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import absolute_import, division, print_function, unicode_literals

__doc__ = """add HGVS tags to a VCF file on stdin, output to stdout

eg$ vcf-add-hgvs <in.vcf >out.vcf

"""

import argparse
import gzip
import itertools
import logging
import os
import sys

import bioutils.accessions as ba

import hgvs.edit
import hgvs.location
import hgvs.posedit
import hgvs.variant

# This code is experimental. Pyvcf is not an explicit dependency.
# You must manually install it. Try `pip install pyvcf`.
import vcf
from vcf.parser import _Info as VcfInfo, field_counts as vcf_field_counts


def parse_args(argv):
    # parse command line for configuration files
    ap = argparse.ArgumentParser(
        description = __doc__,
        formatter_class = argparse.ArgumentDefaultsHelpFormatter,
        )
    ap.add_argument('--assembly', '-A',
                    default='GRCh37')
    ap.add_argument('--in-filename', '-i',
                    default='-')
    ap.add_argument('--out-filename', '-o',
                    default='-')
    args = ap.parse_args(argv)
    return args


def build_contig_ac_map(opts,vr):
    """returns map of chrom:accession for VCF contig records

    This function relies on contig records like
       ##contig=<ID=1,length=249250621,assembly=b37>
    
    """

    contig_info = ba.contig_info[opts.assembly]

    def _map_one_contig(c):
        m = [ ci.ac for ci in contig_info if (ci.len == c.length and ci.name == ba.prepend_chr(c.id)) ]
        if len(m) == 1:
            return m[0]
        if len(m) == 0:
            logging.getLogger().warn("No accessions identified for {c}".format(c=c))
        if len(m) > 1:
            logging.getLogger().warn("Multiple accessions identified for {c}".format(c=c))
        
    if vr.contigs:
        contig_ac_map = { c.id:_map_one_contig(c) for c in vr.contigs.values() }
    elif opts.assembly:
        logging.getLogger().warn("VCF file doesn't have a contig block")
        contig_ac_map = { ba.strip_chr(ci.name):ci.ac for ci in contig_info if ci.ac and ci.name }
    else:
        raise RuntimeError("VCF file doesn't have a contig block and --assembly not specified")

    return contig_ac_map


def alts_as_genomic_hgvs(contig_ac_map,r,keep_left_anchor=False):
    """returns a list of HGVS variants corresponding to the ALTs of the
    given VCF record"""
    
    def hgvs_from_vcf_record(r,alt_index):
        """Creates a genomic SequenceVariant from a VCF record and the specified alt"""
        ref,alt,start,end = r.REF,r.ALT[alt_index].sequence,r.start,r.end
        ac = contig_ac_map[r.CHROM]
        assert ac is not None

        if ref == '' and alt != '':
            # insertion
            end += 1
        else:
            start += 1

        if not keep_left_anchor:
            pfx = os.path.commonprefix([ref,alt])
            lp = len(pfx)
            if lp > 0:
                ref = ref[lp:]
                alt = alt[lp:]
                start += lp

        var_g = hgvs.variant.SequenceVariant(
            ac=ac,
            type='g',
            posedit=hgvs.posedit.PosEdit(
                hgvs.location.Interval(
                    start=hgvs.location.SimplePosition(start),
                    end=hgvs.location.SimplePosition(end),
                    uncertain=False),
                hgvs.edit.NARefAlt(
                    ref=ref if ref != '' else None,
                    alt=alt if alt != '' else None,
                    uncertain=False)))

        return str(var_g)

    hgvs_vars = [ hgvs_from_vcf_record(r,alt_index) for alt_index in range(len(r.ALT)) ]
    return hgvs_vars


if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    opts = parse_args(sys.argv[1:])

    vr = vcf.Reader(sys.stdin) if opts.in_filename == '-' else vcf.Reader(filename=opts.in_filename)
    
    contig_ac_map = build_contig_ac_map(opts,vr)

    vr.infos['HGVS'] = VcfInfo(
        'HGVS', vcf_field_counts['A'], 'String',
        'VCF record alleles in HGVS syntax')

    vw = vcf.Writer(sys.stdout,vr) if opts.out_filename == '-' else vcf.Writer(filename=opts.out_filename,template=vr)

    for r in vr:
        r.add_info('HGVS', '|'.join(alts_as_genomic_hgvs(contig_ac_map,r)))
        vw.write_record(r)
