#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) Duncan Macleod (2013)
#
# This file is part of GWSumm.
#
# GWSumm is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# GWSumm is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GWSumm.  If not, see <http://www.gnu.org/licenses/>.

"""Pipeline generator for the Gravitational-wave interferometer
summary information system (`gw_summary`)

This module constructs a directed, acyclic graph (DAG) that defines
a workflow to be submitted via the HTCondor scheduler
"""

import argparse
import os
import shutil

from gwdatafind.utils import find_credential

from glue import pipeline

from gwpy.io import kerberos as gwkerberos

from gwsumm import __version__
from gwsumm.utils import (mkdir, which)

__author__ = 'Duncan Macleod <duncan.macleod@ligo.org>'


# ----------------------------------------------------------------------------
# Define gw_summary job

class GWSummaryJob(pipeline.CondorDAGJob):
    """Job representing a configurable instance of gw_summary.
    """
    logtag = '$(cluster)-$(process)'

    def __init__(self, universe, executable, tag='gw_summary',
                 subdir=None, logdir=None, **cmds):
        pipeline.CondorDAGJob.__init__(self, universe, executable)
        if subdir:
            subdir = os.path.abspath(subdir)
            self.set_sub_file(os.path.join(subdir, '%s.sub' % (tag)))
        if logdir:
            logdir = os.path.abspath(logdir)
            self.set_log_file(os.path.join(
                logdir, '%s-%s.log' % (tag, self.logtag)))
            self.set_stderr_file(os.path.join(
                logdir, '%s-%s.err' % (tag, self.logtag)))
            self.set_stdout_file(os.path.join(
                logdir, '%s-%s.out' % (tag, self.logtag)))
        cmds.setdefault('getenv', 'True')
        for key, val in cmds.items():
            if hasattr(self, 'set_%s' % key.lower()):
                getattr(self, 'set_%s' % key.lower())(val)
            else:
                self.add_condor_cmd(key, val)
        # add sub-command option
        self._command = None

    def add_opt(self, opt, value=''):
        pipeline.CondorDAGJob.add_opt(self, opt, str(value))
    add_opt.__doc__ = pipeline.CondorDAGJob.add_opt.__doc__

    def set_command(self, command):
        self._command = command

    def get_command(self):
        return self._command

    def write_sub_file(self):
        pipeline.CondorDAGJob.write_sub_file(self)
        if self.get_command():
            with open(self.get_sub_file(), 'r') as f:
                sub = f.read()
            sub = sub.replace('arguments = "', 'arguments = " %s'
                              % self.get_command())
            with open(self.get_sub_file(), 'w') as f:
                f.write(sub)


class GWSummaryDAGNode(pipeline.CondorDAGNode):
    def get_cmd_line(self):
        cmd = pipeline.CondorDAGNode.get_cmd_line(self)
        if self.job().get_command():
            return '%s %s' % (self.job().get_command(), cmd)
        else:
            return cmd


# ----------------------------------------------------------------------------
# Parse command line

class GWHelpFormatter(argparse.HelpFormatter):
    def __init__(self, *args, **kwargs):
        kwargs.setdefault('indent_increment', 4)
        super(GWHelpFormatter, self).__init__(*args, **kwargs)


usage = ('%s --global-config defaults.ini --config-file myconfig.ini '
         '[--config-file myconfig2.ini] [options]'
         % os.path.basename(__file__))

parser = argparse.ArgumentParser(usage=usage, description=__doc__,
                                 formatter_class=GWHelpFormatter)

parser.add_argument("-v", "--verbose", action="store_true", default=False,
                    help="show verbose output, default: %(default)s")
parser.add_argument("-V", "--version", action="version",
                    help="show program's version number and exit")
parser.version = __version__

bopts = parser.add_argument_group("Basic options")
bopts.add_argument('-i', '--ifo', action='store', type=str, metavar='IFO',
                   help="Instrument to process. If this option is set in "
                        "the [DEFAULT] of any of the INI files, giving it "
                        "here is redundant.")
wrapgroup = bopts.add_mutually_exclusive_group()
wrapgroup.add_argument('-w', '--skip-html-wrapper', action='store_true',
                       default=False,
                       help="Do not configure first job for HTML "
                            "htmlnode, default: %(default)s. Useful "
                            "for separating large summary pipeline "
                            "across multiple DAGs")
wrapgroup.add_argument('-W', '--html-wrapper-only', action='store_true',
                       help="Only run first job for HTML htmlnode.")

bopts.add_argument('-t', '--file-tag', action='store', type=str,
                   default=os.path.basename(__file__),
                   help="file tag for pipeline files, default: %(default)s")

htcopts = parser.add_argument_group("Condor options")
htcopts.add_argument('-x', '--executable', action='store', type=str,
                     default=which('gw_summary'),
                     help="Path to gw_summary executable, default: "
                          "%(default)s")
htcopts.add_argument('-u', '--universe', action='store', type=str,
                     default='vanilla',
                     help="Universe for condor jobs, default: %(default)s")
htcopts.add_argument('-l', '--log-dir', action='store', type=str,
                     default=os.environ.get('LOCALDIR', None),
                     help="Directory path for condor log files, "
                          "default: %(default)s")
htcopts.add_argument('-m', '--maxjobs', action='store', type=int, default=None,
                     metavar='N', help="Restrict the DAG to submit only N "
                                       "jobs at any one time, default: "
                                       "%(default)s")
htcopts.add_argument('-T', '--condor-timeout', action='store', type=float,
                     default=None, metavar='T',
                     help='Configure condor to terminate jobs after T hours '
                          'to prevent idling, default: %(default)s')
htcopts.add_argument('-c', '--condor-command', action='append', type=str,
                     default=[],
                     help="Extra condor submit commands to add to "
                          "gw_summary submit file. Can be given "
                          "multiple times in the form \"key=value\"")

copts = parser.add_argument_group("Configuration options",
                                  "Each --global-config file will be used in "
                                  "all nodes of the workflow, while a single "
                                  "node will be created for each other "
                                  "--config-file")
copts.add_argument('-f', '--config-file', action='append', type=str,
                   metavar='FILE', default=[],
                   help="INI file for analysis, may be given multiple times")
copts.add_argument('-g', '--global-config', action='append', type=str,
                   metavar='FILE', default=[],
                   help="INI file for use in all workflow jobs, may be given "
                        "multiple times")
copts.add_argument('-p', '--priority', action='append', type=str, default=[],
                   help="priority for DAG node, should be given "
                        "once for each --config-file in the same order")

popts = parser.add_argument_group("Process options",
                                  "Configure how this summary will be "
                                  "processed.")
popts.add_argument('--nds', action='store_true', default='guess',
                   help='use NDS as the data source, default: %(default)s')
popts.add_argument('--single-process', action='store_true', default=False,
                   help="restrict gw_summary to a single process, mainly for "
                        "debugging purposes, default: %(default)s")
popts.add_argument('--multi-process', action='store', type=int, default=None,
                   help="maximum number of concurrent sub-processes for each "
                        "gw_summary job, {number of CPUs} / {min(number of "
                        "jobs, 4)}")
popts.add_argument('-a', '--archive', action='store_true', default=False,
                   help="Read archived data from the FILE, and "
                        "write back to it at the end")
popts.add_argument('-S', '--on-segdb-error', action='store', type=str,
                   default='raise', choices=['raise', 'ignore', 'warn'],
                   help="action upon error fetching segments from SegDB")
popts.add_argument('-G', '--on-datafind-error', action='store', type=str,
                   default='raise', choices=['raise', 'ignore', 'warn'],
                   help="action upon error querying for frames from the "
                        "datafind server: default: %(default)s")
popts.add_argument('--data-cache', action='append', default=[],
                   help='path to LAL-format cache of TimeSeries data files')
popts.add_argument('--event-cache', action='append', default=[],
                   help='path to LAL-format cache of event trigger files')
popts.add_argument('--segment-cache', action='append', default=[],
                   help='path to LAL-format cache of state or data-quality '
                        'segment files')
popts.add_argument('--no-htaccess', action='store_true', default=False,
                   help='tell gw_summary to not write .htaccess files')

outopts = parser.add_argument_group("Output options")
outopts.add_argument('-o', '--output-dir', action='store', type=str,
                     metavar='OUTDIR', default=os.curdir,
                     help="Output directory for summary information, "
                          "default: '%(default)s'")

topts = parser.add_argument_group("Time mode options",
                                  "Choose a stadard time mode, or a GPS "
                                  "[start, stop) interval")
topts.add_argument("--day", action="store", type=str, metavar='YYYYMMDD',
                   help="day to process.")
topts.add_argument("--week", action="store", type=str, metavar="YYYYMMDD",
                   help="week to process (by starting day).")
topts.add_argument("--month", action="store", type=str, metavar="YYYYMM",
                   help="month to process.")
topts.add_argument("--year", action="store", type=str, metavar="YYYY",
                   help="year to process.")
topts.add_argument("-s", "--gps-start-time", action="store", type=int,
                   metavar="GPSSTART", help="GPS start time")
topts.add_argument("-e", "--gps-end-time", action="store", type=int,
                   metavar="GPSEND", help="GPS end time")

opts = parser.parse_args()

# check time options
N = sum([opts.day is not None, opts.month is not None,
         opts.gps_start_time is not None, opts.gps_end_time is not None])
if N > 1 and not (opts.gps_start_time and opts.gps_end_time):
    raise parser.error("Please give only one of --day, --month, or "
                       "--gps-start-time and --gps-end-time.")

for i, cf in enumerate(opts.config_file):
    opts.config_file[i] = ','.join(map(os.path.abspath, cf.split(',')))
opts.global_config = list(map(
    os.path.abspath,
    [fp for csv in opts.global_config for fp in csv.split(',')],
))

# ----------------------------------------------------------------------------
# Build workflow directories

# move to output directory
mkdir(opts.output_dir)
os.chdir(opts.output_dir)
outdir = os.curdir

# set node log path, and condor log path
logdir = os.path.join(outdir, 'logs')
if opts.log_dir:
    htclogdir = opts.log_dir
else:
    htclogdir = logdir
mkdir(logdir, htclogdir)

# set config directory and copy config files
etcdir = os.path.join(outdir, 'etc')
mkdir(etcdir)

for i, fp in enumerate(opts.global_config):
    inicopy = os.path.join(etcdir, os.path.basename(fp))
    if not os.path.isfile(inicopy) or not os.path.samefile(fp, inicopy):
        shutil.copyfile(fp, inicopy)
    opts.global_config[i] = os.path.abspath(inicopy)
for i, csv in enumerate(opts.config_file):
    inicopy = []
    for fp in csv.split(','):
        fp2 = os.path.join(etcdir, os.path.basename(fp))
        if not os.path.isfile(fp2) or not os.path.samefile(fp, fp2):
            shutil.copyfile(fp, fp2)
        inicopy.append(os.path.abspath(fp2))
    opts.config_file[i] = ','.join(inicopy)
if opts.verbose:
    print("Copied all INI configuration files to %s." % etcdir)

# ----------------------------------------------------------------------------
# Configure X509 and kerberos for condor

if opts.universe != 'local':
    # copy X509 grid certificate into local location
    x509cert, x509key = find_credential()
    x509copy = os.path.join(etcdir, os.path.basename(x509cert))
    shutil.copyfile(x509cert, x509copy)

    # rerun kerberos with new path
    krb5cc = os.path.abspath(os.path.join(etcdir, 'krb5cc.krb5'))
    gwkerberos.kinit(krb5ccname=krb5cc)
    if opts.verbose:
        print("Configured Condor and Kerberos for NFS-shared credentials.")

# ----------------------------------------------------------------------------
# Build DAG

dag = pipeline.CondorDAG(os.path.join(htclogdir, '%s.log' % opts.file_tag))
dag.set_dag_file(os.path.join(outdir, opts.file_tag))

universe = opts.universe
executable = opts.executable

# ----------------------------------------------------------------------------
# Parse condor commands

# parse command line condor commands into dict
condorcmds = {}
if opts.condor_timeout:
    condorcmds['periodic_remove'] = (
        'CurrentTime-EnteredCurrentStatus > %d' % (3600 * opts.condor_timeout))
for cmd_ in opts.condor_command:
    key, value = cmd_.split('=', 1)
    condorcmds[key.rstrip().lower()] = value.strip()

if opts.universe != 'local':
    # add X509 to environment
    for env_, val_ in zip(['X509_USER_PROXY', 'KRB5CCNAME'],
                          [os.path.abspath(x509copy), krb5cc]):
        condorenv = '%s=%s' % (env_, val_)
        if ('environment' in condorcmds and
                env_ not in condorcmds['environment']):
            condorcmds['environment'] += ';%s' % condorenv
        elif 'environment' not in condorcmds:
            condorcmds['environment'] = condorenv

# ----------------------------------------------------------------------------
# Build individual gw_summary jobs

globalconfig = ','.join(opts.global_config)

jobs = []
if not opts.skip_html_wrapper:
    htmljob = GWSummaryJob('local', executable, subdir=outdir, logdir=logdir,
                           tag='%s_local' % opts.file_tag, **condorcmds)
    jobs.append(htmljob)
if not opts.html_wrapper_only:
    datajob = GWSummaryJob(universe, executable, subdir=outdir, logdir=logdir,
                           tag=opts.file_tag, **condorcmds)
    jobs.append(datajob)

# add common command-line options
for job in jobs:
    if opts.day:
        job.set_command('day')
        job.add_arg(opts.day)
    elif opts.week:
        job.set_command('week')
        job.add_arg(opts.week)
    elif opts.month:
        job.set_command('month')
        job.add_arg(opts.month)
    elif opts.year:
        job.set_command('year')
        job.add_arg(opts.year)
    elif opts.gps_start_time or opts.gps_end_time:
        job.set_command('gps')
        job.add_arg(str(opts.gps_start_time))
        job.add_arg(str(opts.gps_end_time))
    else:
        job.set_command('day')
    if opts.nds is True:
        job.add_opt('nds')
    if opts.single_process:
        job.add_opt('single-process')
    elif opts.multi_process is not None:
        job.add_opt('multi-process', opts.multi_process)
    if opts.verbose:
        job.add_opt('verbose')
    if opts.ifo:
        job.add_opt('ifo', opts.ifo)
    job.add_opt('on-segdb-error', opts.on_segdb_error)
    job.add_opt('on-datafind-error', opts.on_datafind_error)
    job.add_opt('output-dir', outdir)
    for opt, fplist in zip(
            ['--data-cache', '--event-cache', '--segment-cache'],
            [opts.data_cache, opts.event_cache, opts.segment_cache]):
        if fplist:
            job.add_arg('%s %s' % (opt, (' %s ' % opt).join(fplist)))
    if opts.no_htaccess:
        job.add_opt('no-htaccess')

# make surrounding HTML first
if not opts.skip_html_wrapper:
    htmljob.add_opt('html-only', '')
    htmljob.add_opt('config-file', ','.join(
        [globalconfig]+opts.config_file).strip(','))

    htmlnode = GWSummaryDAGNode(htmljob)
    for configfile in opts.config_file:
        htmlnode.add_input_file(opts.config_file)
    htmlnode.set_category('gw_summary')
    dag.add_node(htmlnode)
    if opts.verbose:
        print("    Configured HTML htmlnode job.")

# create node for each config file
if not opts.html_wrapper_only:
    # add html opts
    datajob.add_opt('no-html', '')
    if opts.archive:
        datajob.add_condor_cmd('+SummaryNodeType', '"$(macroarchive)"')
    # configure each data node
    for i, configfile in enumerate(opts.config_file):
        node = GWSummaryDAGNode(datajob)
        node.add_var_arg('--config-file %s' % ','.join(
            [globalconfig, configfile]).strip(','))
        if opts.archive:
            jobtag = os.path.splitext(os.path.basename(configfile))[0]
            archivetag = jobtag.upper().replace('-', '_')
            if opts.ifo and archivetag.startswith('%s_' % opts.ifo.upper()):
                archivetag = archivetag[3:]
            node.add_var_opt('archive', archivetag)
        for cf in configfile.split(','):
            node.add_input_file(cf)
        node.set_category('gw_summary')
        try:
            node.set_priority(opts.priority[i])
        except IndexError:
            node.set_priority(0)
        node.set_retry(1)
        if not opts.skip_html_wrapper:
            node.add_parent(htmlnode)
        dag.add_node(node)
        if opts.verbose:
            print("    Configured job for config %s." % configfile)

if opts.maxjobs:
    dag.add_maxjobs_category('gw_summary', opts.maxjobs)

# ----------------------------------------------------------------------------
# finish up

dag.write_sub_files()
dag.write_dag()
dag.write_script()
if opts.verbose:
    print("Setup complete. DAG written to:")
print(os.path.abspath(dag.get_dag_file()))
