#!/usr/bin/env python
"""
Intervene: a tool for intersection and visualization of multiple genomic region sets
Created on January 10, 2017
@author: <Aziz Khan>aziz.khan@ncmm.uio.no

"""

import sys
import os
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pybedtools
from pybedtools import BedTool, example_bedtool
from intervene.modules.pairwise import pairwise
from intervene.modules.upset import upset
from intervene.modules.venn import list_venn, genomic_venn
from intervene import helpers
from argparse import ArgumentParser, RawTextHelpFormatter
from intervene import __version__, __file__

def create_dir(dir_path):
    '''
    Create a output directory if it's not exists.
    '''

    if not os.path.exists(dir_path):
        try:
            os.makedirs(dir_path)
        except:
            sys.exit( "Output directory (%s) could not be created." % dir_path )
    return dir_path

def venn_order(input_files):
    """
    Checks the order of venn plot. 

    @type input_files: list[Iterable] 
    @rtype int 

    input
      input_files: a list of files of genomic regions or lists

    return
      len(input_files): return length of input files
    """

    return len(input_files)

def get_filenames(input_files):
    """
    Checks the venn-type

    @type input_files: list[Iterable] 
    @rtype file_names: list[Iterable]

    input
      input_files: a list of files of genomic regions or lists

    return
      file_names:: return a list of file names
    """
    file_names=[]
    for fname in input_files:
      file_names.append(os.path.splitext(os.path.basename(fname))[0])

    return file_names


"""
Provides access to example files

"""

def data_dir():
    """
    Returns the data directory that contains example files.
    """
    #data_path = os.path.dirname(intervene.__file__)
    return os.path.join(os.path.dirname(__file__), 'example_data')


def example_filename(fn,sub_dir=None):
    """
    Return a bed file from the pybedtools examples directory.
    This code is adapted from https://github.com/daler/pybedtools

    """
    if sub_dir:
      fn = os.path.join(data_dir(), sub_dir, fn)
    else:
      fn = os.path.join(data_dir(), fn)
    
    if not os.path.exists(fn):
        raise ValueError("%s does not exist" % fn)
    return fn


def get_test_data(module_name):
  '''
  It returns a list of test data files, if the user uses --test argument
  
  @type module_name: chr
  @rtype test_files: list[Iterable]

  input
      module_name: Name of Intervene module

  return
      test_files:: return a list of file paths

  '''

  test_files = []
  
  if module_name == 'venn':
    test_files = [example_filename(i,'ENCODE_hESC') for i in  [
        'H3K27ac.bed',
        #'H3K4me2.bed',
        'H3K4me3.bed',
        'H3K27me3.bed'
        ]]

  elif module_name == 'upset':
    test_files = [example_filename(i, 'ENCODE_hESC') for i in  [
        'H3K27ac.bed',
        'H3K4me2.bed',
        #'H3K4me1.bed',
        'H3K4me3.bed',
        'H3K27me3.bed'
        ]]

  elif module_name == 'pairwise':
    test_files = [example_filename(i, 'dbSUPER_mm9') for i in  [
        'Bone_Marrow.bed',
        'Cerebellum.bed',
        'Cortex.bed',
        'E14.5_Brain.bed',
        'E14.5_Heart.bed',
        'E14.5_Limb.bed',
        'E14.5_Liver.bed',
        'Heart.bed',
        'HFSCs.bed',
        'Intestine.bed',
        'Kidney.bed',
        'Liver.bed',
        'Lung.bed',
        'Macrophage.bed',
        'MEF.bed',
        'mESC.bed',
        'Myotubes.bed',
        'Olfactory_Bulb.bed',
        'pro-B.bed',
        'Spleen.bed',
        'TACs.bed',
        'Testis.bed',
        'Th_Cells.bed',
        'Thymus.bed'
        ]]
  else:
    sys.exit(1)

  return test_files


def main():
    #print 'InterVene: a tool for intersection and visualization of multiple genomic region sets'
    #print 'For more details: https://github.com/asntech/intervene'
    
    desc = """
    Intervene: a tool for intersection and visualization of multiple genomic region and gene sets.
    For more details check documentation: http://intervene.readthedocs.io
    """
    #print desc
    
    parser = ArgumentParser(usage='intervene <subcommand> [options]', description=desc, formatter_class=RawTextHelpFormatter)
    subparsers = parser.add_subparsers(dest='command',help='List of subcommands')

    #venn module argument parser
    venn_parser = subparsers.add_parser('venn', usage='intervene venn [options]', formatter_class=RawTextHelpFormatter,
        description='Create Venn diagram upto 6-way after intersection of genomic regions in (BED/GTF/GFF) format or list sets.', 
        help='Venn diagram of intersection of genomic regions or list sets (upto 6-way).')
    
    venn_parser.add_argument('-i', dest='input', nargs="*",
        help='Input genomic regions in (BED/GTF/GFF) format or lists of genes/SNPs IDs.\n'
        'For files in a directory use *.<extension>. e.g. *.bed\n\n')

    venn_parser.add_argument('--type', dest='type', default='genomic', choices=('genomic','list'),
                  help='Type of input data sets. Genomic regions or lists of genes/SNPs. '
                        'Default is "%(default)s"\n\n')
    venn_parser.add_argument('--names', dest='labels', default='A,B,C,D,E,F',
                  help='Comma-separated list of names as labels for input files. '
                       'Default is: --names=A,B,C,D,E,F\n\n')
    venn_parser.add_argument('--filenames', action='store_true', default=False,
                  help='Use file names as labels instead. '
                       'Default is "%(default)s"\n\n')
    venn_parser.add_argument('--colors', dest='colors',
                  help='Comma-separated list of matplotlib-valid colors. '
                       'E.g., --colors=r,b,k\n\n')
    venn_parser.add_argument('-o', '--output', dest='output', 
                  help='Output folder path where results will be stored. '
                       'Default is current working directory.\n\n')
    venn_parser.add_argument('--figtype', dest="figtype",choices=('pdf','svg','ps','tiff','png'), 
                   default='pdf',help='Figure type for the plot. '
                       'e.g. --figtype svg. Default is "%(default)s"\n\n')    
    venn_parser.add_argument('--figsize',  nargs=2, type=int, default=(12,12),
                   help='Figure size as width and height.'
                       'e.g. --figsize 12 12.\n\n')    
    venn_parser.add_argument('--dpi', type=int, default=300,
                  help='Dots-per-inch (DPI) for the output. '
                       'Default is: "%(default)s"\n\n')  
    venn_parser.add_argument('--fill', choices=('number','percentage'),default='number',
                  help='Report number or  percentage of overlaps (Only if --type=list). '
                       'Default is "%(default)s"\n\n')
    venn_parser.add_argument('--test', action='store_true', help='This will run the program on test data.\n\n')


    #upset module argument parser
    upset_parser = subparsers.add_parser('upset', usage='intervene upset [options]', formatter_class=RawTextHelpFormatter,
        description='Create UpSet diagram after intersection of genomic regions in (BED/GTF/GFF) or list sets.', 
        help='UpSet diagram of intersection of genomic regions or list sets.')
    
    upset_parser.add_argument('-i', dest='input', nargs="*",
        help='Input genomic regions in (BED/GTF/GFF) format or lists of genes/SNPs IDs.\n '
        'For files in a directory use *.<extension>. e.g. *.bed\n\n')
    '''
    upset_parser.add_argument('-c', dest='compare', nargs=1,
        help='Input genomic region file (BED/GTF/GFF) format to compare with input files.')
    '''

    upset_parser.add_argument('--type', dest='type', choices=('genomic','list'),default='genomic',
                  help='Type of input sets. Genomic regions or lists of genes/SNPs sets. '
                       'Default is "%(default)s".\n\n') 
   
    upset_parser.add_argument('--names', dest='labels', default='A,B,C,D,E,F',
                  help='Comma-separated list of names as labels for input files. '
                       'Default is: --names=A,B,C,D,E,F\n\n')
    
    upset_parser.add_argument('--filenames', action='store_true', default=False,
                  help='Use file names as labels instead. '
                       'Default is "%(default)s".\n\n')
   
    upset_parser.add_argument('-o', '--output', dest='output', 
                  help='Output folder path where results will be stored. '
                       'Default is current working directory.\n\n')
    
    upset_parser.add_argument('--order', dest="order",choices=("freq", "degree"), 
                   default='freq',help='The order of intersections of sets. '
                       'e.g. --order degree. Default is "%(default)s".\n\n')
    
    upset_parser.add_argument('--ninter', type=int, default=40, 
                  help='Number of top intersections to show in plot. '
                       'Default is "%(default)s".\n\n')

    upset_parser.add_argument('--showzero', action='store_true', default=False,
                    help='Show empty intersection combinations. '
                       'Default is "%(default)s".\n\n')

    upset_parser.add_argument('--showsize', action='store_true', default=False,
                    help='Show intersection sizes above bars. '
                       'Default is "%(default)s".\n\n')
   
    upset_parser.add_argument('--mbcolor', type=str, default='#ea5d4e', 
                  help='Color of the main bar plot. '
                       'Default is: "%(default)s".\n\n')

    upset_parser.add_argument('--sbcolor', type=str, default='#317eab', 
                  help='Color of set size bar plot. '
                       'Default is: "%(default)s".\n\n')

    upset_parser.add_argument('--mblabel', type=str, default='No of Intersections', 
                  help='The y-axis label of the intersection size bars. '
                       'Default is: "%(default)s".\n\n')
    upset_parser.add_argument('--sxlabel', type=str, default='Set size', 
                  help='The x-axis label of the set size bars. '
                       'Default is: "%(default)s".\n\n')
           
    upset_parser.add_argument('--figtype', dest="figtype",choices=('pdf','svg','ps','tiff','png'), 
                   default='pdf',help='Figure type for the plot. '
                       'e.g. --figtype svg. Default is "%(default)s"\n\n')    
    upset_parser.add_argument('--figsize', nargs=2, type=int, default=(14,8),
                   help='Figure size for the output plot (width,height). '
                        'e.g.  --figsize 14 10\n\n')    
    upset_parser.add_argument('--dpi', type=int, dest='dpi', default=300,
                  help='Dots-per-inch (DPI) for the output. '
                       'Default is: "%(default)s".\n\n')

    upset_parser.add_argument('--run', action='store_false', default=True,
                    help='Run Rscript if R and UpSetR package are installed. '
                       'Default is "%(default)s".\n\n')
    upset_parser.add_argument('--test', action='store_true', help='This will run the program on test data.\n\n')


    #pairwise module argument parser
    pairwise_parser = subparsers.add_parser('pairwise', usage='intervene pairwise [options]', formatter_class=RawTextHelpFormatter,
        description='Pairwise intersection and heatmap of N genomic region sets in (BED/GTF/GFF) format.',
        help='Pairwise intersection and heatmap of N genomic region sets in <BED/GTF/GFF> format.')
    
    pairwise_parser.add_argument('-i', dest='input', nargs="*",
        help='Input genomic regions in (BED/GTF/GFF) format. \n'
        'For files in a directory use *.<extension>. e.g. *.bed\n')
    
    pairwise_parser.add_argument('--type', choices=('count','frac','jaccard','fisher','reldist'),
        default='frac', help='Report count/fraction of overlaps or statistical relationships. \n'
                       'count: calculates the number of overlaps. \n'
                       'frac: calculates the fraction of overlap. \n'
                       'jaccard: calculate the Jaccard statistic. \n'
                       'reldist: calculate the distribution of relative distances.\n'
                       'fisher: calculate Fisher`s statistic.\n'
                       'Default is "%(default)s".\n\n')

    pairwise_parser.add_argument('--htype', dest="htype",choices=("tribar","color", "pie","circle", "square", "ellipse", "number", "shade"), 
                   default='pie',help='Heatmap plot type. '
                           'Default is "%(default)s".\n\n')
    pairwise_parser.add_argument('--triangle', dest="triangle",choices=("lower","upper"), 
                   default='lower',help='Show lower/upper triangle of the matrix as heatmap. '
                           'Default is "%(default)s".\n\n')
    
    pairwise_parser.add_argument('--names', dest='labels',
                  help='Comma-separated list of names for input files. \n'
                       'Default is base name of input files.\n\n')
    
    pairwise_parser.add_argument('--filenames', action='store_true', default=False,
                  help='Use file names as labels instead. \n'
                       'Default is "%(default)s".\n\n')

    pairwise_parser.add_argument('--sort', action='store_true', default=False,
                  help='Set this only if your files are not sorted. \n'
                       'Default is "%(default)s".\n\n')

    '''
    pairwise_parser.add_argument('--enrichment', action='store_true',
                    help='Run randomizations (default 1000, specify otherwise '
                    'with --iterations) on each pairwise comparison and '
                    'compute the enrichment score as '
                    '(actual intersection count + 1) / (median randomized + 1)')
    pairwise_parser.add_argument('--iterations', default=1000, type=int,
                    help='Number of randomizations to perform for enrichement '
                    'scores')
    pairwise_parser.add_argument('--processes', default=1, type=int,
                    help='Number of processors to perform for enrichement '
                    'scores')
    '''
    pairwise_parser.add_argument('--genome', help='Required argument if --type=fisher.\n'
                    'Needs to be a string assembly name like "mm10" or "hg38"\n\n')
    pairwise_parser.add_argument('-o', '--output', dest='output', 
                    help='Output folder path where results will be stored. \n'
                       'Default is current working directory.\n\n')
    pairwise_parser.add_argument('--barlabel', dest="blabel", default='Set size',
                    help='x-axis label of boxplot if --htype=tribar. '
                       'Default is "%(default)s"\n\n')
    pairwise_parser.add_argument('--barcolor', dest="barcolor", default='#53cfff',
                    help='Boxplot color (hex vlaue or name, e.g. blue). '
                       'Default is "%(default)s".\n\n') 
    pairwise_parser.add_argument('--fontsize', default=8,
                   help='Label font size. '
                       'Default is "%(default)s".\n\n')
    pairwise_parser.add_argument('--title', type=str, default="Pairwise intersection",
                   help='Heatmap main title. '
                       'Default is "%(default)s".\n\n')
    
    pairwise_parser.add_argument('--space', type=float, default=1.3,
                   help='White space between barplt and heatmap, if --htype=tribar. '
                       'Default is "%(default)s".\n\n')
    pairwise_parser.add_argument('--figtype', dest="figtype",choices=('pdf','svg','ps','tiff','png'),  
                   default='pdf',help='Figure type for the plot. '
                       'e.g. --figtype svg. Default is "%(default)s"\n\n')    
    pairwise_parser.add_argument('--figsize', nargs=2, type=int,
                   help='Figure size for the output plot (width,height). '
                        'e.g.  --figsize 8 8\n\n')          
    pairwise_parser.add_argument('--dpi', type=int, default=300,
                  help='Dots-per-inch (DPI) for the output. '
                       'Default is: "%(default)s".\n\n')  
    pairwise_parser.add_argument('--test', action='store_true', help='This will run the program on test data.\n\n')
    
    
    parser.add_argument('-v','--version', dest='version', action='version', version='%(prog)s version '+__version__)

    options = parser.parse_args()

    if not options.command:
        venn_parser.print_help()
        sys.stderr.write('Missing required arguments. ')
        sys.exit(1) 

    if options.command == 'upset':
        plot_type = 'upset'
    else:
        plot_type = 'venn'

    #making the out folder if it doesn't exist
    if options.output:
        output_dir = create_dir(options.output)
    else:
        options.output = create_dir(os.getcwd()+"/Intervene_results")

    #check if user want to user test data to plot and get get test files.
    if options.test:
        print("\nRunning Intervene with test data.\n")
        options.input = get_test_data(options.command)
        options.filenames = True
        options.htype = "tribar"
        if options.command == 'venn':
          options.figsize = (7, 7)

        #sys.exit(1)

    if options.command =='pairwise':
        print("\nPerforming a pairwise intersection analysis.\n")
        pairwise.pairwise_intersection(options)
        sys.exit(1)

    '''
    if options.command =='upset':
      if options.compare:
        upset.one_vs_rest_intersection(options.input,options.compare,options.output)
        sys.exit(1)
    '''

    output_name =  options.output+'/Intervene_'+options.command+'_'+str(venn_order(options.input))+'way_'+plot_type+'.'+options.figtype

    #check file name already exists and ask if user want to overwrite
    if os.path.exists(output_name):
      print("The file name "+str(output_name)+" already exist.\n")
      answer = str(raw_input("Do you want to overwrite? [yes/no]: "))
      if answer == 'no':
        print("You can run Intervene by setting a different --output path.\nThanks for using Intervene!")
        sys.exit(1)
    
    #checke if there are at least two input bed/list files
    reqd_args = ['input']
    if not options.test:
        for ra in reqd_args:
            if not getattr(options,ra):
                if options.command == 'venn':
                    venn_parser.print_help()
                else:
                    upset_parser.print_help()
                sys.stderr.write('Missing required arg "%s"\n' % ra)
                sys.exit(1)
        if len(options.input) < 2:
          sys.stderr.write('Input should have at least two files.\n')
          sys.exit(1)

    #Cheeck colors for Venn
    if options.command == 'venn':
      if options.colors:
        options.colors = options.colors.split(',')
        options.colors = [helpers.get_colors(options.colors)[i] for i in range(venn_order(options.input))]
      else:
        options.colors = [helpers.default_colors()[i] for i in range(venn_order(options.input))]
        
    #Cheeck figure size
    if options.figsize:
      options.figsize = tuple(options.figsize)

    if options.command == 'venn' or options.command == 'upset':
        if not options.type:
            if options.command == 'venn':
                venn_parser.print_help()
            else:
                upset_parser.print_help()
            sys.stderr.write('Missing required arg --type {genomic,list}')
            sys.exit(0)
    
    #print(options.filenames)
    #sys.exit(1)
    if venn_order(options.input) == 2:
        
        print('\nGenerating a 2-way "%s" diagram.\n' %options.command)
        
        if options.filenames:
          label_names = get_filenames(options.input)
        elif options.labels:
            label_names = options.labels.split(',')
        else:
            label_names = ['A','B']

        #If the input is a gene list
        if options.type == 'list':
            if options.command == 'upset':
                cmd = 'upset_plot_intervene.R %s %s %s %s %s %s' % ('list',2,output_name, label_names,options.input[0],options.input[1])
                os.system(cmd)
                print('\nYou are done! Please check your results @ '+options.output+'. \nThank you for using Intervene!\n')
                sys.exit(1)

            else:
                a = open(options.a, 'r').readlines()
                b = open(options.b, 'r').readlines()
                labels = list_venn.get_labels([a, b], fill=[str(options.fill)])
                fig, ax = list_venn.venn2(labels, names=label_names)            
        
        #else input considered as bed file
        else:
            fig, ax = genomic_venn.venn2(input_files=options.input,options=options, names=label_names, plot_type=plot_type)
     
    elif venn_order(options.input) == 3:
        
        print('\nGenerating a 3-way "%s" diagram.\n' %options.command)
        
        if options.filenames:
          label_names = get_filenames(options.input)         
        elif options.labels:
            label_names = options.labels.split(',')
        else:
            label_names = ['A','B','C']
        #If the input is a gene list
        if options.type == 'list':
          a = open(options.input[0], 'r').readlines()
          b = open(options.input[1], 'r').readlines()
          c = open(options.input[2], 'r').readlines()
          labels = list_venn.get_labels([a, b, c], fill=['number'])
          if options.command == 'upset':
            #create an RScript
            upset.create_r_script(labels, label_names, options)

          else:
            fig, ax = list_venn.venn3(labels, names=label_names)          
        #else input considered as bed file
        else:
            fig, ax = genomic_venn.venn3(input_files=options.input,
                options=options, names=label_names, plot_type=plot_type)

    elif venn_order(options.input) == 4:
        
        print('\nGenerating a 4-way "%s" diagram.\n' %options.command)
        
        if options.filenames:
          label_names = get_filenames(options.input)
        elif options.labels:
            label_names = options.labels.split(',')
        else:
            label_names = ['A','B','C','D']
        #If the input is a gene list
        if options.type == 'list':
          a = open(options.input[0], 'r').readlines()
          b = open(options.input[1], 'r').readlines()
          c = open(options.input[2], 'r').readlines()
          d = open(options.input[3], 'r').readlines()
          labels = list_venn.get_labels([a, b, c, d], fill=['number'])
          if options.command == 'upset':
            #create an RScript
            upset.create_r_script(labels, label_names, options)

          else:
            fig, ax = list_venn.venn4(labels, names=label_names)
            
        #else input considered as bed file
        else:
             fig, ax = genomic_venn.venn4(input_files=options.input, 
                options=options, names=label_names, plot_type=plot_type)
            
                   
    elif venn_order(options.input) == 5:
        
        print('\nGenerating a 5-way "%s" diagram.\n' %options.command)
        
        if options.filenames:
          label_names = get_filenames(options.input)   
        elif options.labels:
            label_names = options.labels.split(',') 
        else:
            label_names = ['A','B','C','D','E']
        #If the input is a gene list
        if options.type == 'list':
          a = open(options.input[0], 'r').readlines()
          b = open(options.input[1], 'r').readlines()
          c = open(options.input[2], 'r').readlines()
          d = open(options.input[3], 'r').readlines()
          e = open(options.input[4], 'r').readlines()

          labels = list_venn.get_labels([a, b, c, d, e], fill=['number'])
          if options.command == 'upset':
            #create an RScript
            upset.create_r_script(labels, label_names, options)

          else:
            fig, ax = list_venn.venn5(labels, names=label_names)
        #else input considered as genomic regions file
        else:
            fig, ax = genomic_venn.venn5(input_files=options.input, 
                options=options, names=label_names, plot_type=plot_type)
        
    elif venn_order(options.input) == 6:
        
        print('\nGenerating a 6-way "%s" diagram.\n' %options.command)
        
        if options.filenames:
          label_names = get_filenames(options.input) 
        elif options.labels:
            label_names = options.labels.split(',')
        else:
            label_names = ['A','B','C','D','E','F']
        #If the input is a gene list
        if options.type == 'list':
          a = open(options.input[0], 'r').readlines()
          b = open(options.input[1], 'r').readlines()
          c = open(options.input[2], 'r').readlines()
          d = open(options.input[3], 'r').readlines()
          e = open(options.input[4], 'r').readlines()
          f = open(options.input[5], 'r').readlines()

          labels = list_venn.get_labels([a, b, c, d, e, f], fill=['number'])
          if options.command == 'upset':
            #create an RScript
            upset.create_r_script(labels, label_names, options)
          else:
            fig, ax = list_venn.venn6(labels, names=label_names)
                
        #else input considered as bed file
        else:
            fig, ax = genomic_venn.venn6(input_files=options.input,options=options, names=label_names, plot_type=plot_type)
    
    elif venn_order(options.input) > 6:
      if options.command == 'venn':
        venn_parser.print_help()
      else:
        upset_parser.print_help()
      sys.stderr.write('The number of input files are more than 6.\n')
      sys.exit(1)
    
    else:
        parser.print_help()
        sys.stderr.write('Please make sure your arguments are correct.\n')
        sys.exit(1)

    fig.savefig(output_name, dpi=options.dpi, bbox_inches='tight')
    plt.close()
    print('\nYou are done! Please check your results @ '+options.output+'. \nThank you for using Intervene!\n')

if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt:
        sys.stderr.write("I got interrupted. :-( Bye!\n")
        sys.exit(0)
