#!/usr/bin/env python
import sys
from pita.io import read_gff_transcripts
from BCBio import GFF
from pita.util import get_overlapping_models

if not len(sys.argv) == 2:
    sys.stderr.write("Usage: gff3tobed12 <gff3> > <bed12>\n")
    sys.exit(1)

infile = sys.argv[1]
#infile = "test.gff"
fobj = open(infile)

def _gff_type_iterator(feature, ftypes):
    if not hasattr(feature, "sub_features") or len(feature.sub_features) == 0: #feature.type in ftypes:
        #print feature.type, len(feature.sub_features)
        yield feature
    else:
        for feature in feature.sub_features:
            for f in _gff_type_iterator(feature, ftypes):
                yield f
bedline = "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}"
smap = {"1":"+",1:"+","-1":"-",-1:"-", None:"+"}
for rec in GFF.parse(fobj):
    chrom = rec.id
    for feature in rec.features:
        exons = []
        for f in _gff_type_iterator(feature, []):
            start = int(f.location.start.position)
            end = int(f.location.end.position)    
            strand = smap[f.strand]
            name = f.qualifiers['Parent'][0]
            typ = f.type
            exons.append([start, end, name, strand, typ])
        exons = sorted(exons, lambda x,y: cmp(x[0], y[0]))
        cds_start = exons[0][0]
        cds_end = exons[-1][1]
        if exons[0][4].upper().find("UTR") > -1:
            c = 0
            while exons[c + 1][4].upper().find("UTR") > -1:
                c += 1
            cds_start = exons[c][1]
            exons[c + 1][0] = exons[c][0]
            del exons[c]
        if exons[-1][4].upper().find("UTR") > -1:
            c = -1
            while exons[c - 1][4].upper().find("UTR") > -1:
                c -= 1
            cds_end = exons[c][0]
            exons[c - 1][1] = exons[c][1]
            del exons[c]
        
        start = exons[0][0]
        end = exons[-1][1]
        starts = [e[0] - start for e in exons]
        sizes = [e[1] - e[0] for e in exons]
           
          
        print(bedline.format(
                             chrom,
                             start,
                             end,
                             exons[0][2],
                             0,
                             exons[0][3],
                             cds_start,
                             cds_end,
                             "0,0,0",
                             len(exons),
                             ",".join([str(x) for x in sizes] + [""]),
                             ",".join([str(x) for x in starts] + [""]),
                             ))                                 
