#!/usr/bin/env python
import os
import sys
import getopt
import string
import subprocess
from collections import defaultdict
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import generic_dna
from itertools import dropwhile

__author__ = 'Kwangbom "KB" Choi, Ph. D.'


help_message = '''
Usage:
    create-hybrid -F <fasta_files> [ -s <hap_list> -o <out_file> --create-bowtie-index ]

Input:
    -F <fasta_files> : List of fasta files (comma delimited)
    -s <hap_list>    : Names of haplotypes to be used instead (comma delimited, in the order of genomes)
    -o <out_file>    : Output file name (default: './emase.pooled.targets.fa')

Parameters:
    --help, -h            : shows this help message
    --create-bowtie-index : builds bowtie1 index
'''


class Usage(Exception):
    def __init__(self, msg):
        self.msg = msg


def main(argv=None):
    if argv is None:
        argv = sys.argv
    try:
        try:
            opts, args = getopt.getopt(argv[1:], "hF:s:o:", ["help", "create-bowtie-index"])
        except getopt.error, msg:
            raise Usage(msg)

        # Default values of vars
        fastalist = list()
        haplist = None
        num_haps = 0
        outfile = './emase.pooled.targets.fa'
        build_bowtie_index = False

        # option processing (change this later with optparse)
        for option, value in opts:
            if option in ("-h", "--help"):
                raise Usage(help_message)
            if option == "-F":
                fastalist = value.split(',')
                num_haps = len(fastalist)
            if option == "-s":
                haplist = value.split(',')
            if option == "--create-bowtie-index":
                build_bowtie_index = True
            if option == "-o":
                outfile = value

        # Check if the required options are given
        if num_haps < 2:
            print >> sys.stderr, "At least two fasta files should be given."
            return 2
        if haplist is None:
            haplist = list(string.uppercase[:num_haps])
            print >> sys.stderr, "Default haplotype names will be used: %s" % ', '.join(haplist)
        if len(haplist) != num_haps:
            print >> sys.stderr, "The number of specified haplotypes is not matching to the number of fasta files."
            return 2

        #
        # Main body
        #

        outdir = os.path.dirname(outfile)
        if not os.path.exists(outdir):
            os.mkdir(outdir)

        #
        # Get pooled transcriptome
        outbase = os.path.splitext(outfile)[0]
        lenfile = outbase + '.info'
        seqout = open(outfile, 'w')
        lenout = open(lenfile, 'w')
        for hid in xrange(num_haps):
            fasta = fastalist[hid]
            #fastaname = os.path.splitext(os.path.basename(fasta))[0]
            hapname = haplist[hid]
            print >> sys.stderr, "Adding suffix \'_%s\' to the sequence ID's of %s..." % (hapname, fasta)
            fh = open(fasta)
            curline = fh.next()  # The first fasta header
            curline = curline.rstrip().split()[0] + '_' + hapname
            seqout.write('%s\n' % curline)
            lenout.write('%s\t' % curline[1:])
            seqlen = 0
            for curline in fh:
                if curline[0] == '>':
                    curline = curline.rstrip().split()[0] + '_' + hapname + "\n"
                    lenout.write('%d\n%s\t' % (seqlen, curline[1:].rstrip()))
                    seqlen = 0
                else:
                    seqlen += len(curline.rstrip())
                seqout.write(curline)
            fh.close()
            lenout.write('%d\n' % seqlen)
        seqout.close()
        lenout.close()

        #
        # Build bowtie index for the pooled transcriptome
        if build_bowtie_index:
            out_index = outbase + '.bowtie1'
            print >> sys.stderr, "Building bowtie1 index..."
            status = subprocess.call("bowtie-build %s %s" % (outfile, out_index), shell=True)

        #
        # End of main body
        #

    except Usage, err:
        print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
        return 2


if __name__ == "__main__":
    sys.exit(main())
