#!/usr/bin/env python

"""
kg

Get KEGG data from the command line.
(Visit github.com/endrebak/kg for examples and help.)

Usage:
    kg --help
    kg --mergecol=COL --species=SPEC [--genes] [--definitions] [--noheader] [--quiet] FILE
    kg --species=SPEC [--definitions] [--quiet]
    kg --removecache

Arguments:
    FILE                    infile to add KEGG data to (read STDIN with -)
    -s SPEC --species=SPEC  name of species (examples: hsa, mmu, rno...)
    -m COL --mergecol=COL   column (0-indexed int or name) containing gene names

Options:
    -h --help               show this message
    -v --version            show version info
    -q --quiet              do not show progress messages on stderr
    -n --noheader           the input data does not contain a header
    -d --definitions        add KEGG pathway definitions to the output
    -g --genes              get the genes related to KEGG pathways
                            (when used, mergecol COL should contain KEGG pathway
                            ids)
    --removecache           removes the local cache so that the KEGG REST DB is
                            accessed anew

Examples:

    Write all KEGG info to STDOUT for "Rattus Norvegicus":

        kg --species rno

    Get all human pathways associated with the genes in column called "Gene" in
    test.txt, merge them to the file, add pathway definitions and write to STDOUT

        kg -s hsa -m Gene -d test.txt

"""


from __future__ import print_function

from sys import stdout, stderr, exit as sys_exit
import logging

from docopt import docopt

from ebs.read_indata import read_indata
from ebs.merge_cols import attach_data
from ebs.args import turn_docopt_arg_names_into_valid_var_names

from kegg.lib import get_kegg_data, remove_cache, default_cache_path
from kegg.version import __version__


if __name__ == '__main__':
    args = turn_docopt_arg_names_into_valid_var_names(docopt(__doc__, version=__version__))
    # load cl-args into local namespace # pylint: disable=E0602
    locals().update(args)


    if not quiet:
        logging.basicConfig(level=logging.INFO, format='%(message)s (Time: '\
                            ' %(asctime)s )',
                            datefmt='%a, %d %b %Y %H:%M:%S', stream=stderr)

    logging.info("Cache path is: {}".format(default_cache_path))

    if removecache:
        logging.info("Removing cache in: {}".format(default_cache_path))
        remove_cache()
        sys_exit(0)

    kegg_df = get_kegg_data(species, definitions)

    # no file to merge on; just dump KEGG data to stdout
    if FILE is None:
        kegg_df.to_csv(stdout, sep="\t", index=False)
        sys_exit(0)

    input_df = read_indata(FILE, noheader)

    kegg_df_col_to_merge_on = "kegg_pathway" if genes else "entrezgene"

    final_df = attach_data(input_df, kegg_df, mergecol,
                           kegg_df_col_to_merge_on)

    final_df.to_csv(stdout, sep="\t", index=False, header=not noheader)
