#! /usr/bin/env python3
##########################################################################
# NSAp - Copyright (C) CEA, 2020 - 2024
# Distributed under the terms of the CeCILL-B license, as published by
# the CEA-CNRS-INRIA. Refer to the LICENSE file or to
# http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html
# for details.
##########################################################################

# Imports
import os
import re
import json
import shutil
import inspect
import logging
import importlib
from pprint import pprint
from collections import namedtuple
from datetime import datetime
import numpy as np
import caravel
from caravel.utils import export_report, monitor
from caravel.mail import EmailManager
from caravel.validation import run_validation
from caravel.nextcloud import NextCloud, ShareType, Permission


# Remove warning
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


@monitor
def main(project, upload_mounts, allowed_subjects, working_dir, nextcloud_url,
         smtp_host, smtp_port, support_email, test_mod=False, login=None,
         password=None, allow_overwrite=False, verbose=0):
    """ The validation/integration script.

    You need first to mount the WEBDAV shares using the mount.davfs command:
    * mount.davfs https://<host>/remote.php/dav/files/admin/ /mnt/nextcloud

    The script is organized as follows:
    * Getting Environment Variables: the script attempts to find the login
      and password in the NEXTCLOUD_ADMIN_USER and NEXTCLOUD_PASSWORD
      environment variables if they are not present.
    * Parameters and Configurations: the logger is initialized to print and
      save (to a log file named 'stdout_<timestamp>.log') all information, the
      code is run in debug mode if verbose, a 'logs', 'layout' and 'data'
      folder are created in the 'working_dir' directory, and the allowed
      subjects are loaded from the 'allowed_subjects' file if specified.
    * Step 1: connect to the NextCloud service you are using.
    * Step 2: verify that the provided mounts are shared, munted, properly
      named, and ready for validation (i.e., do not contain a WIP file and
      can be linked to a collected path). Any mount information that needs
      further processing is stored in a DataItem structure that contains the
      upload folder name (uploadname), the upload directory from the mount
      point (uploaddir), the collect folder name (collectname), the collect
      directory from the mount point (collectdir), the WIP file location from
      the mount point (wipfile), the type of data being validated (family),
      and the current user mails associated with the mount (mails).
    * Step 3: generate a report for each folder to be included in the
      collected data. A 'get_<family>_status' function is called to get the
      list of already integrated data (i.e., a list of subjects, sessions,
      ...), then the BIDS organized data is parsed using the
      '<project>-<family>_<sourcedata|rawdata>.conf' file (note that if no
      parsing can be applied, an error is raised and the whole
      validation/integration procedure is aborted), then all available project
      validators are loaded (in 'test_mod' only the '<family>.structure'),
      then all validations are run with the list of already integrated data as
      an input (from here the 'upload' share is in read only), and finally a
      report is saved for each mount to be integrated. At the end, an email is
      sent to the mount users in case of success or failure (the 'upload' share
      permissions are restored). If an internal error occurs, the
      'support_email' is added to the list of recipients (the 'upload' share is
      maintained in read only for further investigations).
    * Step 4: integrate the data listed by the 'list_<family>_datasets' function
      or generate report output. If a previous 'report.json' or 'report.docx'
      file exists in the upload mount, it will be deleted. If no validation
      error is reported, a 'list_<family>_datasets' function is called to get
      the list of paths (i.e., relative path from the mount directory) to
      include in the collected data. Each path destination is checked if not
      found, created if necessary and the data is moved from the 'upload'
      share to the 'collected' share. Finally, any empty folders are removed.
      If a validation error is reported, a 'report.json' and a 'report.docx'
      file describing the problem(s) are created. Finally, the WIP file is
      restored to the 'upload' share.
    * Save Logs: the 'runtime', 'outputs' and 'inputs' information is saved in
      the 'logs' folder.

    Parameters
    ----------
    project: str
        the name of the project: a correspoing Python module MUST be available.
    upload_mounts: list
        the folders that contains the data to be validated: each folder
        MUST end with '-upload' and begin with '<family>'.
    allowed_subjects: str
        a text file containing a list of allowed PSC1.
    working_dir: str
        a working directory.
    nextcloud_url: str
        the URL of the NextCloud service.
    smtp_host: str
        the SMTP host name.
    smtp_port: int
        the SMTP port.
    support_email: str
        the support email in case of internal error.
    test_mod: bool, default=False
        if set select only the '<family>.structure' validators.
    login: str, default None
        the NextCloud service login to use.
    password: str, default None
        the associated NextCloud login password.
    allow_overwrite: bool, default False
        allow overwriting during data integration.
    verbose: int, default 0
        increase the verbosity level: 0 silent, [1, 2] verbose.
    """
    # Getting Environment Variables
    if login is None:
        login = os.environ.get("NEXTCLOUD_ADMIN_USER")
    if password is None:
        password = os.environ.get("NEXTCLOUD_ADMIN_PASSWORD")
    if login is None or password is None:
        raise ValueError("Please define first the 'NEXTCLOUD_USERNAME' and "
                         "'NEXTCLOUD_PASSWORD' variable.")

    # Parameters and Configurations
    project_module = importlib.import_module(project)
    runtime = {
        "tool": "validation_and_integration",
        "timestamp": datetime.now().isoformat(),
        "caravel_version": caravel.__version__,
        f"{project}_version": project_module.__version__}
    logdir = os.path.join(working_dir, "logs")
    if not os.path.isdir(logdir):
        os.mkdir(logdir)
    logfile = os.path.join(logdir, f"stdout_{runtime['timestamp']}.log")
    logger = logging.getLogger("caravel")
    validation_module = importlib.import_module(f"{project}.validation")
    if verbose > 0:
        validation_module.ValidationBase.__level__ = "debug"
    validation_module.ValidationBase.setup_logging(logfile=logfile)
    layoutdir = os.path.join(working_dir, "layout")
    if os.path.isdir(layoutdir):
        shutil.rmtree(layoutdir)
    os.mkdir(layoutdir)
    datadir = os.path.join(working_dir, "data")
    if not os.path.isdir(datadir):
        os.mkdir(datadir)
    email_manager = EmailManager(smtp_host, smtp_port)
    subjects = None
    if allowed_subjects is not None:
        subjects = np.loadtxt(allowed_subjects, dtype=str)
    logger.info(f"Allowed subjects: {subjects}")
    sig = inspect.signature(main)
    vals = [locals()[param] for param in sig.parameters.keys()]
    inputs = sig.bind(*vals).arguments
    if not isinstance(upload_mounts, list):
        upload_mounts = [upload_mounts]
    if verbose > 0:
        logger.info(f"Starting {project} validation/integration ...")
        logger.info("Runtime:")
        logger.info(runtime)
        logger.info("Inputs:")
        logger.info({key: value for key,value in inputs.items()
                     if key not in ["password"]})

    # Step 1: connect to the NextCloud service you are using.
    session = NextCloud(
        endpoint=nextcloud_url, user=login, password=password,
        json_output=True, verify=False)

    # Step 2: verify that the provided mounts are shared, munted, properly
    # named, and ready for validation (i.e., do not contain a wip file and can
    # be linked to a collected path).
    shares = session.get_shares()
    shares = dict([(os.path.basename(item["path"]), item)
                   for item in shares.data
                   if item["share_type"] == ShareType.GROUP])
    logger.debug(f"Shares: {list(shares.keys())}")
    groups = session.get_groups().data["groups"]
    logger.debug(f"Groups: {groups}")
    data = []
    DataItem = namedtuple("DataItem",
                          ("uploadname uploaddir collectname collectdir "
                           "wipfile family mails"))
    local_wipfile = os.path.join(datadir, "wip")
    local_mountfile = os.path.join(datadir, "ismounted")
    for local_file in (local_wipfile, local_mountfile):
        if not os.path.isfile(local_file):
            os.mknod(local_file)
    for path in upload_mounts:
        if not path.endswith("-upload"):
            raise ValueError(f"Mount suffix MUST be '-upload': '{path}'.")
        name = os.path.basename(path)
        if name not in shares:
            raise ValueError(f"Mount '{path}' is not a share.")
        family= re.split("-|_", name)[1]
        logger.info(f"Family: {family}")
        wip_file = os.path.join(path, "wip")
        if session.isfile(
            "admin", "/".join(["/", project, name, "wip"])):
            continue
        collected_path = path.replace("-upload", "-collected")
        collected_name = os.path.basename(collected_path)
        if not os.path.isdir(collected_path):
            raise ValueError(
                f"An associated '-collected' path is expected for '{path}'.")
        if name not in groups:
            raise ValueError(f"An associated group is expected for '{name}'.")
        group_users = session.get_group(name).data["users"]
        logger.debug(f"Group members: {group_users}")
        to_addrs = []
        for uid in group_users:
            mail = session.get_user(uid).data["email"]
            if mail is not None:
                to_addrs.append(mail)
        logger.debug(f"To emails: {to_addrs}")
        item = DataItem(name, path, collected_name, collected_path,
                        wip_file, family, to_addrs)
        data.append(item)
        res = session.upload_file(
            "admin", local_filepath=local_mountfile,
            remote_filepath="/".join([project, item.uploadname, "ismounted"]))
        if not res.is_ok:
            raise ValueError("Unable to upload ISMOUNTED file.")
        is_local_file = session.isfile(
            "admin", "/".join(["/", project, item.uploadname, "ismounted"]))
        res = session.delete_path(
            "admin", path=os.path.join(project, item.uploadname, "ismounted"))
        if not res.is_ok:
            raise ValueError("Unable to delete ISMOUNTED file.")
        if not is_local_file:
            raise ValueError(
                f"Mount '{path}' is not a mounted to the good folder.")
    logger.info(f"Folder to integrate: {[item.uploaddir for item in data]}")

    # Step 3: generate a report for each folder to be included in the
    # collected data.
    outputs = []
    for item in data:
        logger.info(f"Processing '{item.uploaddir}'...")
        logger.debug(f"Generate report for item: {item}")
        share = shares[item.uploadname]
        res = session.update_share(share["id"], Permission.READ)
        status_fct = getattr(
            validation_module, f"get_{item.family}_status", None)
        if status_fct is None:
            raise ValueError(
                f"You need to define a 'get_{item.family}_status' status "
                f"function for '{item.family}'.")
        already_integrated = status_fct(item.collectdir)
        logger.info(f"Already integrated: {already_integrated}")
        projectdir = os.path.dirname(os.path.realpath(project_module.__file__))
        parser = caravel.get_parser(
            project=f"{project}-{item.family}",
            confdir=os.path.join(projectdir, "conf"), layoutdir=None)
        contain_data = False
        for dirname in ("sourcedata", "rawdata"):
            if os.path.isdir(os.path.join(item.uploaddir, dirname)):
                parser.pickling_layout(
                    bids_root=item.uploaddir, name=dirname, outdir=layoutdir)
                contain_data = True
        if not contain_data:
            res = session.upload_file(
                "admin", local_filepath=local_wipfile,
                remote_filepath="/".join([project, item.uploadname, "wip"]))
            if not res.is_ok:
                raise ValueError("Unable to upload WIP file.")
            raise ValueError("No data to integrate.")
        if test_mod:
            family_selector = f"{item.family}.structure"
        else:
            family_selector = f"{item.family}.*"
        validators = validation_module.get_validators(family=family_selector)
        logger.info(f"Validators: {validators}")
        data = {
            "layoutdir": layoutdir,
            "confdir": os.path.join(projectdir, "conf"),
            "project": f"{project}-{item.family}",
            "already_integrated": already_integrated}
        if subjects is not None:
            data["allowed_subjects"] = subjects
        report = run_validation(data, validators=validators, logfile=logfile)
        logger.info(f"Report: {report}")
        in_error = "Internal error" in repr(report)
        if in_error:
            email_manager.send_mail(
                item.mails + [support_email], subject="NeuroSpin data collection",
                body=("The validation/integration process has failed due to an "
                      "internal error. The support team is in copy of this email "
                      "and will fix the problem quickly."),
                from_addr="noreply@cea.fr", files=None)
        else:
            if [report[key] for key in report if report[key] != {}]:
                mail_txt = (
                    "The validation/integration process is now complete. "
                    "Your last upload did not pass the checks, "
                    "Please connect the service to see the report file "
                    "and correct your input.")
            else:
                mail_txt = (
                    "The validation/integration process is now complete. "
                    "Your last upload passed the checks, "
                    "you can see the result in the 'collected' "
                    "folder of your modality.")
            outputs.append((item, report))
            res = session.update_share(share["id"], Permission.ALL)
            email_manager.send_mail(
                item.mails, subject="NeuroSpin data collection",
                body=(mail_txt),
                from_addr="noreply@cea.fr", files=None)

    # Step 4: integrate the data listed by the 'list_<family>_datasets'
    # function or generate report output.
    for item, report in outputs:
        logger.debug(f"Integrate item: {item}")
        for filename in ("report.json", "report.docx"):
            if os.path.isfile(os.path.join(item.uploaddir, filename)):
                res = session.delete_path(
                    "admin", path="/".join([
                        project, item.uploadname, filename]))
                if not res.is_ok:
                    raise ValueError(f"Unable to delete '{filename}'.")
        if all(len(val) == 0 for val in report.values()):
            logger.info(f"Ready to integrate: '{item.uploaddir}'.")
            list_fct = getattr(
                validation_module, f"list_{item.family}_datasets", None)
            if list_fct is None:
                raise ValueError(
                    f"You need to define a 'list_{item.family}_datasets' list "
                    f"datasets function for '{family}'.")
            datasets = list_fct(item.uploaddir)
            logger.info(f"Datasets to move: {datasets}")
            for name in datasets:
                is_already_collected = (
                    os.path.isdir(os.path.join(item.collectdir, name)) or
                    os.path.isfile(os.path.join(item.collectdir, name))
                )
                if not allow_overwrite and is_already_collected:
                    logger.info(f"- {name}: SKIP")
                else:
                    logger.info(f"- {name}...")
                    path = "/".join([project, item.collectname])
                    splits = name.split(os.sep)
                    for idx in range(1, len(splits)):
                        if not os.path.isdir(os.path.join(
                                item.collectdir, os.sep.join(splits[:idx]))):
                            res = session.create_folder(
                                "admin",
                                folder_path="/".join([path] + splits[:idx]))
                            if not res.is_ok:
                                raise ValueError(
                                    f"Unable to CREATE directory '{path}'.")
                    srcdir = "/".join([project, item.uploadname, name])
                    dstdir = "/".join([project, item.collectname, name])
                    res = session.move_path(
                        "admin", path=srcdir, destination_path=dstdir,
                        overwrite=allow_overwrite)
                    if not res.is_ok:
                        raise ValueError(
                            f"Unable to INTEGRATE data '{name}': "
                            f"{srcdir} -> {dstdir}.")
                    path = "/".join([project, item.uploadname])
                    for idx in range(len(splits), 0, -1):
                        currentdir = os.path.join(
                            item.uploaddir, os.sep.join(splits[:idx]))
                        if (os.path.isdir(currentdir) and
                                len(os.listdir(currentdir)) == 0):
                            res = session.delete_path(
                                "admin", path="/".join([path] + splits[:idx]))
                            if not res.is_ok:
                                raise ValueError(
                                    "Unable to delete path: "
                                    f"'{'/'.join([path] + splits[:idx])}.")
        else:
            logger.info(f"In error: {item.uploaddir}")
            local_reportfile = os.path.join(datadir, f"report.json")
            with open(local_reportfile, "wt") as open_file:
                json.dump(report, open_file, indent=2)
            local_docx_reportfile = os.path.join(datadir, f"report.docx")
            export_report(report, runtime["timestamp"], local_docx_reportfile)
            for path in (local_reportfile, local_docx_reportfile):
                filename = os.path.basename(path)
                res = session.upload_file(
                    "admin", local_filepath=local_reportfile,
                    remote_filepath="/".join(
                        [project, item.uploadname, filename]))
                if not res.is_ok:
                    raise ValueError(f"Unable to upload '{filename}'.")
        res = session.upload_file(
            "admin", local_filepath=local_wipfile,
            remote_filepath="/".join([project, item.uploadname, "wip"]))
        if not res.is_ok:
            raise ValueError("Unable to upload WIP file.")

    # Save Logs.
    for name, final_struct in [("inputs", inputs), ("outputs", outputs),
                               ("runtime", runtime)]:
        log_file = os.path.join(logdir, f"{name}_{runtime['timestamp']}.json")
        with open(log_file, "wt") as open_file:
            json.dump(final_struct, open_file, sort_keys=True,
                      check_circular=True, indent=4)


if __name__ == "__main__":
    import fire
    fire.Fire(main)
