#!/usr/bin/env python
# tangermeme command-line toolkits
# Author: Jacob Schreiber <jmschreiber91@gmail.com>

import sys
import pathlib
sys.path.insert(0, str(pathlib.Path(__file__).resolve().parent))

import argparse

from _tomtom import _run_tomtom
from _fimo import _run_fimo

desc = """tangermeme is a package for genomic sequence-based machine learning.
	   This command-line tool contains many methods that are useful for
	   discovering patterns, identifying where these patterns occur in the
	   genome, and how these patterns interact with each other to cause
	   important phenotypes. Many of the tools implemented here are
	   reimplementations of those present in the MEME suite."""

_help = """Must be either 'fimo' or 'tomtom'."""


# Read in the arguments
parser = argparse.ArgumentParser(description=desc)
subparsers = parser.add_subparsers(help=_help, required=True, dest='cmd')

###
# TOMTOM command-line options
###


tomtom_help = """Calculate the similarity between two sets of motifs, where
              one set of motifs can be hard character assignments."""

tomtom_parser = subparsers.add_parser("tomtom", help=tomtom_help,
	formatter_class=argparse.ArgumentDefaultsHelpFormatter)

tomtom_parser.add_argument("-q", "--query", type=str, required=True,
	help="""Either the filename of a MEME file ending in `.meme` or the string 
	of a single motif.""")
tomtom_parser.add_argument("-t", "--targets", type=str,
	help="""The filename of a MEME file. By default, will download and use the
	JASPAR2024 Core non-redundant MEME file.""")
tomtom_parser.add_argument("-n", "--n_nearest", type=int, default=None,
	help="""The number of nearest targets to return for each query.""")
tomtom_parser.add_argument("-s", "--n_score_bins", type=int, default=100,
	help="""The number of query-target score bins to use. `t` in the paper.""")
tomtom_parser.add_argument("-m", "--n_median_bins", type=int, default=1000,
	help="""The number of bins to use for approximate median calculations.""")
tomtom_parser.add_argument("-a", "--n_target_bins", type=int, default=100,
	help="""The number of bins to use for approximate target hashing.""")
tomtom_parser.add_argument("-c", "--n_cache", type=int, default=100,
	help="""The amount of cache to provide for calculations.""")
tomtom_parser.add_argument("-r", "--norc", action='store_true',
	help="""Whether to not score reverse complements.""")
tomtom_parser.add_argument("-j", "--n_jobs", type=int, default=-1,
	help="""The number of threads to use for processing queries.""")
tomtom_parser.add_argument("-p", "--thresh", type=float, default=0.01,
	help="""The p-value threshold for returning matches.""")


###
# FIMO command-line options
###


fimo_help = """Scan a set of motifs from a MEME file against one or more
	sequences in a FASTA file."""

fimo_parser = subparsers.add_parser("fimo", help=fimo_help,
	formatter_class=argparse.ArgumentDefaultsHelpFormatter)

fimo_parser.add_argument("-m", "--motif", type=str, required=True,
	help="""The filename of a MEME-formatted file containing motifs.""")
fimo_parser.add_argument("-s", "--sequence", type=str,
	help="""The filename of a FASTA-formatted file containing the sequences to
	scan against.""")
fimo_parser.add_argument("-w", "--bin_size", type=float, default=0.1,
	help="""The width of bins to use when discretizing scores.""")
fimo_parser.add_argument("-e", "--epsilon", type=float, default=0.0001,
	help="""A pseudocount to add to each PWM.""")
fimo_parser.add_argument("-p", "--threshold", type=float, default=0.0001,
	help="""The p-value threshold for returning matches.""")
fimo_parser.add_argument("-r", "--norc", action='store_true', default=False,
	help="""Whether to only do the positive strand.""")


##############
# Run appropriate command
##############


args = parser.parse_args()

if args.cmd == 'tomtom':
	_run_tomtom(args)
elif args.cmd == 'fimo':
	_run_fimo(args)
