#!/usr/bin/env python
'''
Create add elements or update tables.
When using remote paths, no check is done to determine whether it corresponds
to the current host.
'''

__author__ = ['Miguel Ramos Pernas']
__email__  = ['miguel.ramos.pernas@cern.ch']

# Python
import argparse
import functools
import json
import logging
import multiprocessing
import os
import re
import subprocess
import tempfile

# Local
import hep_rfm


def _process_path( path, remote, bare = False ):
    '''
    Process the given path adding the remote if necessary.
    Check also that we can obtain a local path from it.
    '''
    local_path = os.path.abspath(path)

    if not os.path.isfile(local_path) and bare == False:
        raise RuntimeError('Attempt to create a table entry with a file that is not present locally')

    if remote is not None:

        protocol, prefix = remote

        p = hep_rfm.ProtocolPath.__protocols__[protocol].join_path(prefix, local_path)

        pp = hep_rfm.protocol_path(p, protocol)
    else:
        pp = hep_rfm.protocol_path(local_path)

    return pp, local_path


def decorate_access_mode( mode ):
    '''
    Decorator for some modes.
    '''
    @functools.wraps(mode)
    def _wrapper( **kwargs ):
        '''
        If the path specified in "table" is remote, then copy the table
        to a temporary directory, modify it there, and copy it back.
        '''
        kwargs['table'] = table = hep_rfm.protocol_path(kwargs['table'])

        if hep_rfm.is_remote(table):

            logging.getLogger(__name__).info('Copying remote table to a temporary directory')

            with tempfile.TemporaryDirectory() as d:

                tmp_table = hep_rfm.protocol_path(os.path.join(d, 'tmp_table.txt'))

                hep_rfm.copy_file(table, tmp_table)

                kwargs['table'] = tmp_table

                mode(**kwargs)

                logging.getLogger(__name__).info('Copying the table to the remote path')

                hep_rfm.copy_file(tmp_table, table)
        else:
            mode(**kwargs)

    return _wrapper


def decorate_create_mode( mode ):
    '''
    Decorator for modes where the table is created for the first time.
    '''
    @functools.wraps(mode)
    def _wrapper( **kwargs ):
        '''
        If the keyword "table" refers to a remote path,
        create the file on a temporary directory and then copy it.
        '''
        kwargs['table'] = table = hep_rfm.protocol_path(kwargs['table'])

        if hep_rfm.is_remote(table):

            logging.getLogger(__name__).info('Creating table on a temporary directory')

            with tempfile.TemporaryDirectory() as d:

                tmp_table = hep_rfm.protocol_path(os.path.join(d, 'tmp_table.txt'))

                kwargs['table'] = tmp_table

                mode(**kwargs)

                logging.getLogger(__name__).info('Copying the table to the remote path')

                hep_rfm.copy_file(tmp_table, table)
        else:
            mode(**kwargs)

    return _wrapper


def decorate_multitable_mode( mode ):
    '''
    Decorator for modes where one table is updated using another as a reference.
    '''
    @functools.wraps(mode)
    def _wrapper( **kwargs ):
        '''
        If the keywords "table" and/or "reference" refer to remote paths,
        create the file(s) on a temporary directory and then copy it(them).
        '''
        kwargs['table']     = tab = hep_rfm.protocol_path(kwargs['table'])
        kwargs['reference'] = ref = hep_rfm.protocol_path(kwargs['reference'], kwargs['refpid'])

        if hep_rfm.is_remote(ref):

            with tempfile.TemporaryDirectory() as d:

                tmp_ref = hep_rfm.protocol_path(os.path.join(d, 'tmp_reference.txt'))

                logging.getLogger(__name__).info('Copying reference table to a temporary directory')

                hep_rfm.copy_file(ref, tmp_ref)

                kwargs['reference'] = tmp_ref

                mode(**kwargs)
        else:
            mode(**kwargs)

    return _wrapper


@decorate_access_mode
def add( table, name, path, bare, remote ):
    '''
    Add a new file to the table in the given path
    '''
    t = hep_rfm.Table.read(table.path)

    pp, lp = _process_path(path, remote, bare)

    if bare:
        f = hep_rfm.FileInfo(name, pp)
    else:
        m = hep_rfm.FileMarks.from_local_path(lp)
        f = hep_rfm.FileInfo(name, pp, m)

    t[f.name] = f

    t.write(table.path)


@decorate_access_mode
def add_massive( table, files, nproc, remote, bare ):
    '''
    Add a list of files to the given table.
    The name of the files will be used as name for the table index.
    '''
    t = hep_rfm.Table.read(table.path)

    handler = hep_rfm.parallel.JobHandler()

    def _file_info( obj, queue, bare ):
        '''
        Create a FileInfo from the inputs, and put it in the queue
        '''
        name, pp, lp = obj

        if bare:
            f = hep_rfm.FileInfo(name, pp)
        else:
            m = hep_rfm.FileMarks.from_local_path(lp)
            f = hep_rfm.FileInfo(name, pp, m)

        queue.put(f)

    queue = multiprocessing.Queue()
    for _ in range(nproc):
        hep_rfm.parallel.Worker(handler, _file_info, args=(queue, bare))

    for p in files:

        pp, lp = _process_path(p, remote, bare)

        name = os.path.basename(os.path.splitext(pp.path)[0])

        handler.put((name, pp, lp))

    handler.process()

    for _ in range(len(files)):
        f = queue.get()
        t[f.name] = f

    queue.close()

    t.write(table.path)


def add_from_dir( directory, regex, **kwargs ):
    '''
    Add files from a given directory.
    A regular expression can be used to match those files.
    Paths will be converted to absolute.
    '''
    if regex:
        m = re.compile(regex)
    else:
        m = None

    files = []
    for root, _, filenames in os.walk(directory):
        for f in filenames:

            add = True
            if m and not m.match(f):
                add = False

            if add:
                files.append(os.path.join(root, f))

    # Call the "add_massive" function
    add_massive(files=files, bare=False, **kwargs)


@decorate_create_mode
def create( table ):
    '''
    Create a new empty table
    '''
    if os.path.isfile(table.path):
        raise RuntimeError('File "{}" already exists'.format(table.path))

    with open(table.path, 'wt'):
        pass


@decorate_access_mode
def display( table, fields, regex ):
    '''
    Display the contents of the table at the given path.
    One can select the fields to show.
    The names of the files will always be displayed.
    '''
    t = hep_rfm.Table.read(table.path)

    if len(t) == 0:
        logging.getLogger(__name__).info('No entries found')
        return

    # The field 'name' is always displayed
    fields = list(fields)
    if 'name' not in fields:
        fields.insert(0, 'name')

    # Get the files to show
    if regex:
        m = re.compile(regex)

    lines = tuple(
        tuple(str(f.field(fi)) for fi in fields)
        for k, f in sorted(t.items()) if not regex or m.match(k)
        )

    frmt = '\t'.join(
        '{{:<{}}}'.format(max(len(fi), max(len(v) for v in values)))
        for fi, values in zip(fields, zip(*lines))
        )

    # Display the information
    if len(lines) != 0:
        logging.getLogger(__name__).info('Contents of table "{}"'.format(table.path))
        logging.getLogger(__name__).info(frmt.format(*fields))

        for l in lines:
            logging.getLogger(__name__).info(frmt.format(*l))
    else:
        logging.getLogger(__name__).info('No entries found matching regular expression "{}"'.format(regex))


@decorate_access_mode
def remove( table, files, regex ):
    '''
    Remove the given entries from the table.
    One can provide both a set of file names and a regular expression.
    '''
    t = hep_rfm.Table.read(table.path)

    for e in files:
        t.pop(e)

    if regex:
        c = re.compile(regex)
        for e in tuple(t.keys()):
            if c.match(e):
                t.pop(e)

    t.write(table.path)


@decorate_multitable_mode
def replicate( table, location, reference, refpath, refpid, collisions, remote ):
    '''
    Reproduce the structure of sub-directories of the files stored on a
    reference table into another one, creating bare entries for all of them.
    The keys and names of the files will be maintained.

    The argument "refpath" corresponds to the fragment of the path that will
    be used as the "base" directory in the reference table, while "location"
    corresponds to the path where the schema will be reproduced. No directories
    are created after calling this function.

    The policy with collisions is governed by "collisions". If set to "raise"
    (default), if an attempt is made to add a file with a name that already
    exists in the table, warning will be displayed and the table will not be
    processed. If set to "replace", then the files will be replaced by the new
    value. Finally, if set to "omit", collisions will be omitted.
    '''
    trgtb = hep_rfm.Table.read(table.path)
    reftb = hep_rfm.Table.read(reference.path)

    def add_to_table( table, key, finfo, location, remote ):
        '''
        Add a new entry to the given table.
        '''
        if hep_rfm.is_remote(finfo.protocol_path):
                _, p = finfo.protocol_path.split_path()
        else:
            p = finfo.protocol_path.path

        if not p.startswith(refpath):
            raise RuntimeError('Paths in the reference table must start by "refpath": "{}"'.format(refpath))

        pp, _ = _process_path(p.replace(refpath, location), remote, bare=True)

        table[key] = hep_rfm.FileInfo(finfo.name, pp)

    process = True
    for rk, rv in reftb.items():

        if rk in trgtb:

            if collisions == 'raise':
                # Warnings are displayed for all files that cause collisions
                # between the two tables
                logging.getLogger(__name__).warn('Colliding file name "{}"'.format(rk))

                process = False

            elif collisions == 'replace':
                add_to_table(trgtb, rk, rv, location, remote)

            else: # collisions == 'omit'
                pass

        else:
            add_to_table(trgtb, rk, rv, location, remote)

    # Do not copy the tables if a collision was found
    if not process:
        raise RuntimeError('Colliding names in tables. See policy in "--collisions" argument.')
        return

    trgtb.write(table.path)


@decorate_access_mode
def update( table, nproc, regex ):
    '''
    Update the table located in the given path. One can select the files
    to update using a regular expression.
    '''
    t = hep_rfm.Table.read(table.path)

    if regex:

        m = re.compile(regex)

        to_update, maintain = [], []
        for n, f in t.items():
            if m.match(n):
                to_update.append(f)
            else:
                maintain.append(f)

        t = hep_rfm.Table(to_update).updated(parallelize=nproc)
        for f in maintain:
            t[f.name] = f

        t.write(table)

    else:
        t.updated(parallelize=nproc).write(table.path)


if __name__ == '__main__':

    # Logging configuration
    logging.basicConfig(format='%(message)s')
    logging.getLogger().setLevel(logging.INFO)

    def add_mode( subparsers, func ):
        '''
        Add a new mode to a parser.
        '''
        s = subparsers.add_parser(func.__name__, help=func.__doc__)
        s.set_defaults(func=func)
        return s

    # Define the parser
    parser = argparse.ArgumentParser(description=__doc__)

    subparsers = parser.add_subparsers(help='Mode to run')

    # Add the modes
    parser_add          = add_mode(subparsers, add)
    parser_add_from_dir = add_mode(subparsers, add_from_dir)
    parser_add_massive  = add_mode(subparsers, add_massive)
    parser_create       = add_mode(subparsers, create)
    parser_display      = add_mode(subparsers, display)
    parser_remove       = add_mode(subparsers, remove)
    parser_replicate    = add_mode(subparsers, replicate)
    parser_update       = add_mode(subparsers, update)

    # For most of the modes we work with only one table, and must be
    # the first argument.
    for p in (
            parser_add,
            parser_add_massive,
            parser_add_from_dir,
            parser_create,
            parser_display,
            parser_remove,
            parser_replicate,
            parser_update,
            ):
        p.add_argument('table', type=str,
                       help='Path to the table file')

    # add
    parser_add.add_argument('name', type=str,
                            help='Name of the file to add')
    parser_add.add_argument('path', type=str,
                            help='Path to the file')
    # add_from_dir
    parser_add_from_dir.add_argument('directory', type=str,
                                     help='Directory to process')
    parser_add_from_dir.add_argument('--regex', type=str, default=None,
                                     help='Regular expression to filter the files to add')
    # add_massive
    parser_add_massive.add_argument('files', nargs='+',
                                    help='Path to the files to add')
    # create

    # display
    parser_display.add_argument('--regex', type=str, default=None,
                                help='Regular expression to filter the files to show')
    parser_display.add_argument('--fields', nargs='+', default=hep_rfm.FileInfo.__fields__,
                                help='Fields to display')

    # remove
    parser_remove.add_argument('--files', nargs='+', default=[],
                               help='Names of the files to remove')
    parser_remove.add_argument('--regex', type=str, default=None,
                               help='Regular expression for the files to remove')
    # replicate
    parser_replicate.add_argument('reference', type=str,
                                  help='Path to the reference table')
    parser_replicate.add_argument('location', type=str,
                                  help='Location where to build the schema')
    parser_replicate.add_argument('refpath', type=str,
                                  help='Absolute path in the reference table to be replaced by "location"')
    parser_replicate.add_argument('--refpid', type=str, default='local',
                                  help='Possible protocol ID of the reference path')
    parser_replicate.add_argument('--collisions', default='raise',
                                  choices=('raise', 'replace', 'omit'),
                                  help='Whether to replace the paths to the files with colliding names')
    # update
    parser_update.add_argument('--regex', type=str, default=None,
                               help='Regular expression for the files to update')

    #
    # Add arguments common to different parsers
    #
    for p in (parser_add_massive, parser_add_from_dir, parser_update):
        p.add_argument('--nproc', type=int, default=4, required=False,
                       help='Number of parallel process to invoke')

    for p in (parser_add, parser_add_massive, parser_add_from_dir, parser_replicate):
        p.add_argument('--remote', '-r', nargs=2, default=None, required=False,
                       help='Remote protocol type and direction to prepend '\
                       'to the files. If you are specifying it in the file path, '\
                       'adding it as well here will cause an error.')

    for p in (parser_add, parser_add_massive):
        p.add_argument('--bare', '-b', action='store_true',
                       help='Whether the to create a bare entry in '\
                       'the file. A bare entry will not contain '\
                       'file ID nor time-stamp. This must be called '\
                       'when having multiple tables, for those '\
                       'which need to be updated.')

    # Parse the arguments and call the function
    args = parser.parse_args()
    dct  = dict(vars(args))
    dct.pop('func')

    args.func(**dct)
