#!/usr/bin/env python

"""
snpflip

Report reverse and ambiguous strand SNPs.
(Visit github.com/endrebak/snpflip for examples and help.)

Usage:
    snpflip --fasta-genome=FA --bim-file=BIM [--output-prefix=PRE]
    snpflip --help
    snpflip --version

Arguments:
    -f FA --fasta-genome=FA     fasta genome
    -b BIM --bim-file=BIM       plink bed file (extended variant information)

Options:
    -h --help                   show this message
    -v --version                show the version number
    -o PRE --output-prefix=PRE  the prefix of the output-files
                                (./snpflip_output/<bim_basename> by default)

Note:
    To enable snpflip to match the chromosomes in the `.bim` and `.fa` files, the
    chromosomes in the `.bim` file must be called `1, 2, ... X, Y, M`
    while the chromosomes in the fasta file must be called the same, or
    `chr1, chr2, ... chrX, chrY, chrM`
"""

from os.path import dirname, basename, splitext as split_ext
from subprocess import call

import docopt

from snp_flip.table import create_snp_table, find_strand
from snp_flip.version import __version__

if __name__ == '__main__':
    args = docopt.docopt(__doc__, version=__version__)

    snp_table = create_snp_table(args["--bim-file"], args["--fasta-genome"])
    snp_table = find_strand(snp_table)

    prefix = args["--output-prefix"]

    if not prefix:
        prefix = "snpflip_output/" + basename(split_ext(args["--bim-file"])[0])

    if "/" in prefix:
        call("mkdir -p {}".format(dirname(prefix)), shell=True)

    reverse = snp_table["strand"] == "reverse"
    ambiguous = snp_table["strand"] == "ambiguous"

    snp_table[reverse]["snp_name"].to_csv(prefix + ".reverse", sep="\t",
                                          index=False)
    snp_table[ambiguous]["snp_name"].to_csv(prefix + ".ambiguous", sep="\t",
                                            index=False)
    snp_table.to_csv(prefix + ".annotated_bim", sep="\t", index=False)
