#!/usr/bin/env python
# coding=utf-8
# Copyright (C) Duncan Macleod (2013)
#
# This file is part of GWSumm.
#
# GWSumm is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# GWSumm is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GWSumm.  If not, see <http://www.gnu.org/licenses/>.

"""The gravitational-wave interferometer summary information system.

This module provides the command-line interface to the GWSumm package,
allowing generation of detector summary information.

Select a <mode> to run over a calendar amount of time ('day', 'week',
or 'month'), or an arbitrary GPS (semi-open) interval.

Run 'gw_summary <mode> --help' for details of the specific arguments and
options acceptable for each mode.
"""

from __future__ import (division, print_function)

# XXX HACK XXX
# plots don't work with multiprocessing with lal.LIGOTimeGPS
from gwpy import time
from gwpy.time import _tconvert
from glue.lal import LIGOTimeGPS
time.LIGOTimeGPS = LIGOTimeGPS
_tconvert.LIGOTimeGPS = LIGOTimeGPS

import argparse
import calendar
import datetime
import getpass
import os
import re
import warnings
from collections import OrderedDict
from configparser import (DEFAULTSECT, NoOptionError, NoSectionError)
try:
    from urllib.parse import urlparse
except ImportError:  # python < 3
    from urlparse import urlparse

from dateutil.relativedelta import relativedelta

# set matplotlib backend
from matplotlib import use
use('Agg')

from glue.lal import Cache

from gwpy.segments import (Segment, SegmentList)
from gwpy.signal.fft import lal as fft_lal
from gwpy.time import (tconvert, to_gps, Time)

from gwsumm import (
    __version__,
    archive,
    globalv,
    mode,
)
from gwsumm.config import (
    GWSummConfigParser,
)
from gwsumm.segments import get_segments
from gwsumm.state import (
    ALLSTATE
)
from gwsumm.tabs import (
    TabList,
    get_tab,
)
from gwsumm.utils import (
    get_default_ifo,
    mkdir,
    re_flagdiv,
    vprint,
)
from gwsumm.data import get_timeseries_dict

__author__ = 'Duncan Macleod <duncan.macleod@ligo.org>'

# set defaults
VERBOSE = False
PROFILE = False
try:
    DEFAULT_IFO = get_default_ifo()
except ValueError:
    DEFAULT_IFO = None

# ----------------------------------------------------------------------------
# Argparse customisations

# find today's date
TODAY = datetime.datetime.utcnow().strftime('%Y%m%d')


# define custom parser
class GWArgumentParser(argparse.ArgumentParser):
    def __init__(self, *args, **kwargs):
        super(GWArgumentParser, self).__init__(*args, **kwargs)
        self._positionals.title = 'Positional arguments'
        self._optionals.title = 'Optional arguments'


# define custom help formatting (4-space)
class GWHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
    def __init__(self, *args, **kwargs):
        kwargs.setdefault('indent_increment', 4)
        super(GWHelpFormatter, self).__init__(*args, **kwargs)


# define actions for formatting dates
class DateAction(argparse.Action):
    TIMESCALE = {'days': 1}
    @staticmethod
    def set_gps_times(namespace, startdate, enddate):
        setattr(namespace, 'gpsstart', to_gps(startdate))
        setattr(namespace, 'gpsend', to_gps(enddate))

    def __call__(self, parser, namespace, values, option_string=None):
        try:
            date = datetime.datetime.strptime(values, self.DATEFORMAT)
        except ValueError:
            raise parser.error("%s malformed: %r. Please format as %s"
                               % (self.dest.title(), values, self.METAVAR))
        else:
            self.set_gps_times(namespace, date,
                               date + relativedelta(**self.TIMESCALE))
            setattr(namespace, self.dest, date)
        return date


class DayAction(DateAction):
    TIMESCALE = {'days': 1}
    DATEFORMAT = '%Y%m%d'
    METAVAR = 'YYYYMMDD'


class WeekAction(DayAction):
    TIMESCALE = {'days': 7}


class MonthAction(DateAction):
    TIMESCALE = {'months': 1}
    DATEFORMAT = '%Y%m'
    METAVAR = 'YYYYMM'


class YearAction(DateAction):
    TIMESCALE = {'years': 1}
    DATEFORMAT = '%Y'
    METAVAR = 'YYYY'


class GPSAction(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=False):
        try:
            values = float(values)
        except (TypeError, ValueError):
            pass
        setattr(namespace, self.dest, to_gps(values))

# ----------------------------------------------------------------------------
# Setup command-line parsing


# define top-level parser
parser = GWArgumentParser(
    formatter_class=GWHelpFormatter,
    description=__doc__,
    fromfile_prefix_chars='@',
    epilog="Arguments and options may be written into files and passed to "
           "%(prog)s as positional arguments prepended with '@', e.g. "
           "'%(prog)s @args.txt'. In this format, options must be give as "
           "'--argument=value', and not '--argument value'.")
parser.add_argument('-V', '--version', action='version',
                    version=__version__,
                    help="show program's version number and exit")

# define shared commands
sharedopts = GWArgumentParser(add_help=False)
sharedopts.title = 'Progress arguments'
sharedopts.add_argument('-v', '--verbose', action='store_true',
                        default=False, help="show verbose output")
sharedopts.add_argument('-D', '--debug', action='store_true',
                        default=False, help="show debug output")

# give configuration files
copts = sharedopts.add_argument_group("Configuration options",
                                      "Provide a number of INI-format "
                                      "configuration files")
copts.add_argument('-i', '--ifo', default=DEFAULT_IFO, metavar='IFO',
                   help="IFO prefix for interferometer to process. "
                        "If this option is set in the [DEFAULT] of any of "
                        "the INI files, giving it here is redundant.")
copts.add_argument('-f', '--config-file', action='append', type=str,
                   metavar='FILE', default=[],
                   help="INI file for analysis, may be given multiple times")
copts.add_argument('-t', '--process-tab', action='append', type=str,
                   help="process only this tab, can be given multiple times")

popts = sharedopts.add_argument_group("Process options",
                                      "Configure how this summary will be "
                                      "processed.")
popts.add_argument('--nds', action='store_true', default=None,
                   help='use NDS as the data source, default: \'guess\'')
popts.add_argument('-j', '--multi-process', action='store', type=int,
                   default=1, dest='multiprocess', metavar='N',
                   help="use a maximum of N parallel processes at any time")
popts.add_argument('-b', '--bulk-read', action='store_true', default=False,
                   help="read all data up-front at the start of the job, "
                        "rather than when it is needed for a tab")
popts.add_argument('-S', '--on-segdb-error', action='store', type=str,
                   default='raise', choices=['raise', 'ignore', 'warn'],
                   help="action upon error fetching segments from SegDB")
popts.add_argument('-G', '--on-datafind-error', action='store', type=str,
                   default='raise', choices=['raise', 'ignore', 'warn'],
                   help="action upon error querying for frames from the "
                        "datafind server, default: %(default)s")
popts.add_argument('--data-cache', action='append', default=[],
                   help='path to LAL-format cache of TimeSeries data files')
popts.add_argument('--event-cache', action='append', default=[],
                   help='path to LAL-format cache of event trigger files')
popts.add_argument('--segment-cache', action='append', default=[],
                   help='path to LAL-format cache of state or data-quality '
                        'segment files')

# ----------------------------------------------------------------------------
# Define sub-parsers

# add HTML options
def add_output_options(parser_):
    """Add outuput options to the subparser.

    This is only needed because argparse can't handle mutually exclusive
    groups in a parent parser handed to a subparser for some reason.
    """
    outopts = parser_.add_argument_group("Output options")
    outopts.add_argument('-o', '--output-dir', action='store', type=str,
                         metavar='DIR', default=os.curdir,
                         help="Output directory for summary information")
    htmlopts = outopts.add_mutually_exclusive_group()
    htmlopts.add_argument('-m', '--html-only', action='store_true',
                          default=False,
                          help="Generate container HTML and navigation only")
    htmlopts.add_argument('-n', '--no-html', action='store_true',
                          default=False,
                          help="Generate inner HTML and contents only, not "
                               "supporting HTML")
    outopts.add_argument('-N', '--no-htaccess', action='store_true',
                         default=False, help='don\'t create a .htaccess file '
                                             'to customise 404 errors')


# define hierarchichal archiving choise
def add_archive_options(parser_):
    """Add archiving options to the subparser.

    This is only needed because argparse can't handle mutually exclusive
    groups in a parent parser handed to a subparser for some reason.
    """
    hierarchopts = parser_.add_argument_group('Archive options')
    hierarchchoice = hierarchopts.add_mutually_exclusive_group()
    hierarchchoice.add_argument(
        '-a', '--archive', metavar='FILE_TAG', default=False,
        const='GW_SUMMARY_ARCHIVE', nargs='?',
        help="Read archived data from, and write processed data to "
             "an HDF archive file written with the FILE_TAG. If not "
             "given, no archive will be used, if given with no file "
             "tag, a default of '%(const)s' will be used.")
    hierarchchoice.add_argument(
        '-d', '--daily-archive', metavar='FILE_TAG', default=False,
        const='GW_SUMMARY_ARCHIVE',
        nargs='?', help="Read data from the daily archives, with the "
                        "given FILE_TAG. If not given, daily archives will be "
                        "used, if given with no file tag, a default of "
                        "'%(const)s' will be used.")

# define sub-parser handler
subparsers = parser.add_subparsers(
    dest='mode', title='Modes',
    description='Note: all dates are defined with UTC boundaries.\n'
                'The valid modes are:')
subparser = dict()

# DAY mode
daydoc = """
Run %s over a full UTC day, and link this day to others with a calendar
built into the HTML navigation bar. In this mode you can also archive data
in HDF-format files to allow progressive processing of live data without
restarting from scratch every time.""" % parser.prog
subparser['day'] = subparsers.add_parser('day', description=daydoc,
                                         epilog=parser.epilog,
                                         parents=[sharedopts],
                                         formatter_class=GWHelpFormatter,
                                         help="Process one day of data")
subparser['day'].add_argument('day', action=DayAction, type=str, nargs='?',
                              metavar=DayAction.METAVAR, default=TODAY,
                              help="Day to process")
add_output_options(subparser['day'])

darchopts = subparser['day'].add_argument_group('Archive options',
                                                'Choose if, and how, to '
                                                'archive data from this run')
darchopts.add_argument('-a', '--archive', metavar='FILE_TAG',
                       default=False, const='GW_SUMMARY_ARCHIVE', nargs='?',
                       help="Read archived data from, and write processed data "
                            "to, an HDF archive file written with the "
                            "FILE_TAG. If not given, no archive will be used, "
                            "if given with no file tag, a default of "
                            "'%(const)s' will be used.")

# WEEK mode
subparser['week'] = subparsers.add_parser('week', parents=[sharedopts],
                                          epilog=parser.epilog,
                                          formatter_class=GWHelpFormatter,
                                          help="Process one week of data")
subparser['week'].add_argument('week', action=WeekAction, type=str,
                               metavar=WeekAction.METAVAR,
                               help="Week to process (given as starting day)")
add_output_options(subparser['week'])
add_archive_options(subparser['week'])

# MONTH mode
subparser['month'] = subparsers.add_parser('month', parents=[sharedopts],
                                           epilog=parser.epilog,
                                           formatter_class=GWHelpFormatter,
                                           help="Process one month of data")
subparser['month'].add_argument('month', action=MonthAction, type=str,
                                metavar=MonthAction.METAVAR,
                                help="Month to process")
add_output_options(subparser['month'])
add_archive_options(subparser['month'])

# and GPS mode
subparser['gps'] = subparsers.add_parser('gps', parents=[sharedopts],
                                         epilog=parser.epilog,
                                         formatter_class=GWHelpFormatter,
                                         help="Process GPS interval")
subparser['gps'].add_argument('gpsstart', action=GPSAction, type=str,
                              metavar='GPSSTART', help='GPS start time.')
subparser['gps'].add_argument('gpsend', action=GPSAction, type=str,
                              metavar='GPSEND', help='GPS end time.')
garchopts = subparser['gps'].add_argument_group('Archive options',
                                                'Choose if, and how, to '
                                                'archive data from this run')
garchopts.add_argument('-a', '--archive', metavar='FILE_TAG',
                       default=False, const='GW_SUMMARY_ARCHIVE', nargs='?',
                       help="Read archived data from, and write processed data "
                            "to, an HDF archive file written with the "
                            "FILE_TAG. If not given, no archive will be used, "
                            "if given with no file tag, a default of "
                            "'%(const)s' will be used.")

add_output_options(subparser['gps'])

# ----------------------------------------------------------------------------
# Parse command-line and sanity check

opts = parser.parse_args()

if opts.debug:
    warnings.simplefilter('error', DeprecationWarning)

# set verbose output options
globalv.VERBOSE = opts.verbose
#globalv.PROFILE = opts.verbose

# find all config files
opts.config_file = [os.path.expanduser(fp) for csv in opts.config_file for
                    fp in csv.split(',')]

# check segdb option
if not opts.on_segdb_error in ['raise', 'warn', 'ignore']:
    parser.error("Invalid option --on-segdb-error='%s'" % opts.on_segdb_error)

# read configuration file
config = GWSummConfigParser()
config.optionxform = str
if opts.ifo:
    config.set_ifo_options(opts.ifo, section=DEFAULTSECT)
config.set(DEFAULTSECT, 'user', getpass.getuser())
config.read(opts.config_file)

try:
    ifo = config.get(DEFAULTSECT, 'IFO')
except NoOptionError:
    ifo = None
finally:
    globalv.IFO = ifo

# interpolate section names
interp = {}
if ifo:
    interp['ifo'] = ifo.lower()
    interp['IFO'] = ifo.title()
config.interpolate_section_names(**interp)

# double-check week mode matches calendar setting
if opts.mode == 'week':
    if config.has_option("calendar", "start-of-week"):
        weekday = getattr(calendar,
                          config.get("calendar", "start-of-week").upper())
        if weekday != opts.week.timetuple().tm_wday:
            msg = ("Cannot process week starting on %s. The "
                   "'start-of-week' option in the [calendar] section "
                   "of the INI file specifies weeks start on %ss."
                   % (opts.week.strftime('%Y%m%d'),
                      config.get("calendar", "start-of-week")))
            raise parser.error(msg)

# record times in ConfigParser
config.set_date_options(opts.gpsstart, opts.gpsend, section=DEFAULTSECT)

# convert times for convenience
span = Segment(opts.gpsstart, opts.gpsend)
utc = tconvert(opts.gpsstart)
starttime = Time(float(opts.gpsstart), format='gps')
endtime = Time(float(opts.gpsend), format='gps')

# set mode and output directory
mode.set_mode(opts.mode)
try:
    path = mode.get_base(utc)
except ValueError:
    path = os.path.join('%d-%d' % (opts.gpsstart, opts.gpsend))

# set LAL FFT plan wisdom level
duration = min(globalv.NOW, opts.gpsend) - opts.gpsstart
if duration > 200000:
     fft_lal.LAL_FFTPLAN_LEVEL = 3
elif duration > 40000:
     fft_lal.LAL_FFTPLAN_LEVEL = 2
else:
     fft_lal.LAL_FFTPLAN_LEVEL = 1

# set global html only flag
if opts.html_only:
    globalv.HTMLONLY = True

# build directories
mkdir(opts.output_dir)
os.chdir(opts.output_dir)
plotdir = os.path.join(path, 'plots')
mkdir(plotdir)

# -----------------------------------------------------------------------------
# Setup

vprint("""
------------------------------------------------------------------------------
Welcome to the GW summary information system command-line interface
------------------------------------------------------------------------------

This is process %d
You have selected %s mode.
Start time %s (%s)
End time: %s (%s)
Output directory: %s
""" % (os.getpid(), mode.get_mode().name,
       starttime.utc.iso, starttime.gps,
       endtime.utc.iso, endtime.gps,
       os.path.abspath(os.path.join(opts.output_dir, path))))

# -- Finalise configuration
vprint("\nLoading configuration...\n")
plugins = config.load_plugins()
if plugins:
    vprint("    Loaded %d plugins:\n" % len(plugins))
    for mod in plugins:
        vprint("        %s\n" % mod)
units = config.load_units()
vprint("    Loaded %d units\n" % len(units))
channels = config.load_channels()
vprint("    Loaded %d channels\n" % len(channels))
states = config.load_states()
vprint("    Loaded %d states\n" % len(states))
rcp = config.load_rcParams()
vprint("    Loaded %d rcParams\n" % len(rcp))

# read list of tabs
tablist = TabList.from_ini(config, match=opts.process_tab,
                           path=path, plotdir=plotdir)
tablist.sort(reverse=True)
tabs = sorted(tablist.get_hierarchy(), key=tablist._sortkey)
vprint("    Loaded %d tabs [%d parents overall]\n" % (len(tablist), len(tabs)))

# read caches
cache = {}
for key, var in zip(['datacache', 'trigcache', 'segmentcache'],
                    [opts.data_cache, opts.event_cache, opts.segment_cache]):
    if var:
        vprint("Reading %s from %d files... " % (key, len(var)))
        cache[key] = Cache()
        for fp in var:
            with open(fp, 'rb') as f:
                cache[key].extend(Cache.fromfile(f))
        cache[key] = cache[key].sieve(segment=span)
        vprint("done [%d entries]\n" % len(cache[key]))

# -----------------------------------------------------------------------------
# Read Archive

if not hasattr(opts, 'archive'):
    opts.archive = False

if opts.html_only:
    opts.archive = False
    opts.daily_archive = False
elif opts.archive is True:
    opts.archive = 'GW_SUMMARY_ARCHIVE'

archives = []

if opts.archive:
    archivedir = os.path.join(path, 'archive')
    mkdir(archivedir)
    opts.archive = os.path.join(archivedir, '%s-%s-%d-%d.h5'
                                % (ifo, opts.archive, opts.gpsstart,
                                        opts.gpsend - opts.gpsstart))
    if os.path.isfile(opts.archive):
        archives.append(opts.archive)
    else:
        vprint("No archive found in %s, one will be created at the end.\n"
               % opts.archive)

# read daily archive for week/month/... mode
if hasattr(opts, 'daily_archive') and opts.daily_archive:
    # find daily archive files
    archives.extend(archive.find_daily_archives(
        opts.gpsstart, opts.gpsend, ifo, opts.daily_archive, archivedir))
    # then don't read any actual data
    cache['datacache'] = Cache()

for arch in archives:
    vprint("Reading archived data from %s..." % arch)
    archive.read_data_archive(arch)
    vprint(" Done.\n")

# -----------------------------------------------------------------------------
# Read HTML configuration

css = config.get_css(section='html')
javascript = config.get_javascript(section='html')

# enable comments
try:
    globalv.HTML_COMMENTS_NAME = config.get('html', 'disqus-shortname')
except (NoOptionError, NoSectionError):
    pass

# find new ifo bases
ifobases = {}
try:
    bases_ = config.nditems('html')
except NoSectionError:
    pass
else:
    base_reg = re.compile(r'-base\Z')
    for key, val in config.nditems('html'):
        if base_reg.search(key):
            ifobases[key.rsplit('-', 1)[0]] = val
ifobases = OrderedDict(sorted(ifobases.items(), key=lambda x: x[0]))

# -----------------------------------------------------------------------------
# Write auxiliary pages

# get URL from output directory
if 'public_html' in os.getcwd():
    urlbase = os.path.sep + os.path.join(
                  '~%s' % config.get(DEFAULTSECT, 'user'),
                  os.getcwd().split('public_html', 1)[1][1:])
    base = urlbase
# otherwise get URL from html config
elif ifo in ifobases:
    urlbase = urlparse(ifobases[ifo]).path
    base = urlbase
# otherwise let the write_html processor work it out on-the-fly
else:
    urlbase = None
    base = None

# get link to issues report page
try:
    issues = config.get('html', 'issues')
except:
    issues = True

# write 404 error page
if not opts.no_htaccess and not opts.no_html and urlbase:
    top = os.path.join(urlbase, path)
    four0four = get_tab('404')(span=span, parent=None, path=path,
                               index=os.path.join(path, '404.html'))
    four0four.write_html(css=css, js=javascript, tabs=tabs, ifo=ifo,
                         ifomap=ifobases, top=top, base=base,
                         writedata=not opts.html_only,
                         writehtml=not opts.no_html,
                         issues=issues)
    url404 = os.path.join(urlbase, four0four.index)
    with open(os.path.join(path, '.htaccess'), 'w') as htaccess:
        print('Options -Indexes', file=htaccess)
        print('ErrorDocument 404 %s' % url404, file=htaccess)
        print('ErrorDocument 403 %s' % url404, file=htaccess)

# write config page
about = get_tab('about')(span=span, parent=None, path=path)
if not opts.no_html:
    mkdir(about.path)
    about.write_html(css=css, js=javascript, tabs=tabs, config=config.files,
                     ifo=ifo, ifomap=ifobases, about=about.index, base=base,
                     issues=issues, writedata=not opts.html_only,
                     writehtml=not opts.no_html)

# -----------------------------------------------------------------------------
# Read bulk data

# XXX: bulk data reading could optimise things
# XXX: but has never been used, so should remove (DMM 18/01/16)
if opts.bulk_read and not opts.html_only:
    vprint("\n-------------------------------------------------\n")
    vprint("Reading all data in BULK...\n")
    allts = set()
    allsv = set()
    allflags = set()
    for tab in tablist:
        snames = []
        for state in tab.states:
            snames.append(state.name)
            if state.definition:
                allflags.update(re_flagdiv.split(state.definition))
        # get all data defined for the 'all' state
        if ALLSTATE in snames:
            allts.update(tab.get_channels('timeseries', 'spectrogram',
                                          'spectrum', 'histogram'))
            allsv.update(tab.get_channels('statevector'))
            allflags.update(tab.get_flags('segments'))
        # or get data for plots defined over all states
        else:
            for plot in tab.plots:
                if plot.state is not None:
                    continue
                if plot.type in ['timeseries', 'spectrogram', 'spectrum',
                                 'histogram']:
                    allts.update(plot.channels)
                elif plot.type in ['statevector']:
                    allsv.update(plot.channels)
                elif plot.type in ['segments']:
                    allflags.update([f for cflag in plot.flags for f in
                                     re_flagdiv.split(cflag)[::2] if f])
    allseg = SegmentList([span])
    if len(allflags):
        vprint("%d data-quality flags identified for segment query from all "
               "tabs...\n" % len(allflags))
        get_segments(allflags, allseg, config=config, return_=False)
    if len(allts):
        vprint("%d channels identified for TimeSeries from all tabs...\n"
               % len(allts))
        get_timeseries_dict(allts, allseg,
                            config=config, nds=opts.nds,
                            nproc=opts.multiprocess, return_=False)
    if len(allsv):
        vprint("%d channels identified for StateVector from all tabs...\n"
               % len(allsv))
        get_timeseries_dict(allsv, allseg,
                            config=config, nds=opts.nds, statevector=True,
                            nproc=opts.multiprocess, return_=False)

# -----------------------------------------------------------------------------
# Process all tabs

# TODO: consider re-working this loop as TabList.process_all

for tab in tablist:
    vprint("\n-------------------------------------------------\n")
    if tab.parent:
        name = '%s/%s' % (tab.parent.name, tab.name)
    else:
        name = tab.name
    if not opts.html_only and isinstance(tab, get_tab('_processed')):
        vprint("Processing %s\n" % name)
        tab.process(config=config, nds=opts.nds,
                    nproc=opts.multiprocess,
                    segdb_error=opts.on_segdb_error,
                    datafind_error=opts.on_datafind_error, **cache)
    if not tab.hidden and not isinstance(tab, get_tab('link')):
        mkdir(tab.href)
        page = tab.write_html(css=css, js=javascript, tabs=tabs, ifo=ifo,
                              ifomap=ifobases, about=about.index, base=base,
                              issues=issues, writedata=not opts.html_only,
                              writehtml=not opts.no_html)

    # archive this tab
    if opts.archive:
        vprint("Writing data to archive...")
        archive.write_data_archive(opts.archive)
        vprint("Archive written in\n{}\n".format(
            os.path.abspath(opts.archive)))
    vprint("%s complete!\n" % (name))

vprint("""
------------------------------------------------------------------------------
All done. Thank you.
------------------------------------------------------------------------------
""")
