#!python
"""
Copyright 2023 Quarkslab

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

# builtin-imports
import logging
import os
from pathlib import Path
from collections import defaultdict

# Third-party imports
import click
import numpy as np
from tqdm import tqdm

# Local imports
from qbindiff.features import FEATURES, DEFAULT_FEATURES
from qbindiff.loader import LOADERS, LoaderType
from qbindiff import Program, QBinDiff, Mapping
from qbindiff.types import Distance


def configure_logging(verbose: int):
    logging.basicConfig(format="[%(levelname)s] %(message)s", level=logging.INFO)

    logger = logging.getLogger()
    if verbose >= 2:
        logger.setLevel(logging.DEBUG)
    elif verbose == 1:
        logger.setLevel(logging.INFO)
    else:
        logger.setLevel(logging.WARNING)


def display_statistics(differ: QBinDiff, mapping: Mapping) -> None:
    nb_matches = mapping.nb_match
    similarity = mapping.similarity
    nb_squares = mapping.squares

    output = (
        "Score: {:.4f} | "
        "Similarity: {:.4f} | "
        "Squares: {:.0f} | "
        "Nb matches: {}\n".format(similarity + nb_squares, similarity, nb_squares, nb_matches)
    )
    output += "Node cover:  {:.3f}% / {:.3f}% | " "Edge cover:  {:.3f}% / {:.3f}%\n".format(
        100 * nb_matches / len(differ.primary_adj_matrix),
        100 * nb_matches / len(differ.secondary_adj_matrix),
        100 * nb_squares / differ.primary_adj_matrix.sum(),
        100 * nb_squares / differ.secondary_adj_matrix.sum(),
    )
    print(output)


FEATURES_KEYS = {x.key: x for x in FEATURES}

CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"], max_content_width=300)

DEFAULT_FEATURES = tuple(x.key for x in DEFAULT_FEATURES)
DEFAULT_DISTANCE = "canberra"
DEFAULT_SPARSITY_RATIO = 0.75
DEFAULT_TRADEOFF = 0.75
DEFAULT_EPSILON = 0.5
DEFAULT_MAXITER = 1000

LOADERS_KEYS = list(LOADERS.keys())

help_formats = (
    f"The file format of the output file. Supported formats are [bindiff]. [default: bindiff]"
)
help_loaders = f"Loader type to be used. Must be one of these {LOADERS_KEYS}. [default: binexport]"
help_features = """\b
The following features are available:
{}
Features may be weighted by a positive value (default 1.0) and compared with a specificdistance (by default the option -d is used) like this <feature>:<weight>:<distance>
[default: {}]""".format(
    "\n".join("  - {}: {}".format(x.key, x.__doc__) for x in FEATURES), DEFAULT_FEATURES
)
help_distance = f"""\b
The following distances are available {[d.name for d in Distance]}
[default: canberra]"""


@click.command(context_settings=CONTEXT_SETTINGS)
@click.option(
    "-l",
    "--loader",
    type=click.Choice(LOADERS_KEYS),
    default="binexport",
    metavar="<loader>",
    help=help_loaders,
)
@click.option(
    "-f",
    "--features",
    type=str,
    default=DEFAULT_FEATURES,
    multiple=True,
    metavar="<feature>",
    help=help_features,
)
@click.option(
    "-fopt",
    "--feature-option",
    type=(str, str, str),
    multiple=True,
    metavar="<feature> <option> <value>",
    help="Specify a feature option. To get a list of options accepted by a feature look into the description of the feature",
)
@click.option(
    "-n",
    "--normalize",
    is_flag=True,
    help="Normalize the Call Graph (can potentially lead to a partial matching). [default disabled]",
)
@click.option(
    "-d",
    "--distance",
    type=click.Choice([d.name for d in Distance]),
    default=DEFAULT_DISTANCE,
    metavar="<function>",
    help=help_distance,
)
@click.option(
    "-s",
    "--sparsity-ratio",
    type=float,
    default=DEFAULT_SPARSITY_RATIO,
    help="Ratio of least probable matches to ignore. Between 0.0 (nothing is ignored) to 1.0 (only perfect matches are considered) [default: %.02f]"
    % DEFAULT_SPARSITY_RATIO,
)
@click.option(
    "-sr",
    "--sparse-row",
    is_flag=True,
    help="Whether to build the sparse similarity matrix considering its entirety or processing it row per row",
)
@click.option(
    "-t",
    "--tradeoff",
    type=float,
    default=DEFAULT_TRADEOFF,
    help="Tradeoff between function content (near 1.0) and call-graph information (near 0.0) [default: %.02f]"
    % DEFAULT_TRADEOFF,
)
@click.option(
    "-e",
    "--epsilon",
    type=float,
    default=DEFAULT_EPSILON,
    help="Relaxation parameter to enforce convergence [default: %.02f]" % DEFAULT_EPSILON,
)
@click.option(
    "-i",
    "--maxiter",
    type=int,
    default=DEFAULT_MAXITER,
    help="Maximum number of iteration for belief propagation [default: %d]" % DEFAULT_MAXITER,
)
@click.option(
    "-e1",
    "--executable1",
    "exec_primary",
    type=Path,
    help="Path to the primary raw executable. Must be provided if using quokka loader",
)
@click.option(
    "-e2",
    "--executable2",
    "exec_secondary",
    type=Path,
    help="Path to the secondary raw executable. Must be provided if using quokka loader",
)
@click.option(
    "-o",
    "--output",
    type=Path,
    help="Write output to PATH",
)
@click.option(
    "-ff",
    "--file-format",
    default="bindiff",
    type=click.Choice(["bindiff"]),
    help=help_formats,
)
@click.option(
    "--enable-cortexm",
    is_flag=True,
    help="Enable the usage of the cortex-m extension when disassembling",
)
@click.option(
    "-v",
    "--verbose",
    count=True,
    help="Activate debugging messages. Can be supplied multiple times to increase verbosity",
)
@click.argument("primary", type=Path, metavar="<primary file>")
@click.argument("secondary", type=Path, metavar="<secondary file>")
def main(
    loader,
    features,
    feature_option,
    normalize,
    distance,
    sparsity_ratio,
    sparse_row,
    tradeoff,
    epsilon,
    maxiter,
    exec_primary,
    exec_secondary,
    output,
    file_format,
    enable_cortexm,
    verbose,
    primary,
    secondary,
):
    """
    qBinDiff is an experimental binary diffing tool based on
    machine learning technics, namely Belief propagation.
    """

    configure_logging(verbose)

    if 0.0 > sparsity_ratio > 1:
        logging.warning(
            "[-] Sparsity ratio should be within 0..1 (set it to %.2f)" % DEFAULT_SPARSITY_RATIO
        )
        sparsity_ratio = DEFAULT_SPARSITY_RATIO

    if 0.0 > tradeoff > 1:
        logging.warning(
            "[-] Trade-off parameter should be within 0..1 (set it to %.2f)" % DEFAULT_TRADEOFF
        )
        tradeoff = DEFAULT_TRADEOFF

    if 0.0 > epsilon:
        logging.warning(
            "[-] Epsilon parameter should be positive (set it to %.3f)" % DEFAULT_EPSILON
        )
        epsilon = DEFAULT_EPSILON

    if not output:
        logging.warning("[-] You have not specified an output file")

    # Group options to pass to the feature extractors
    feature_option_dict = defaultdict(dict)
    for option in feature_option:
        feature_option_dict[option[0]][option[1]] = option[2]

    loader = LOADERS[loader]

    # Check that the executables have been provided
    if loader == LoaderType.quokka:
        if not (
            exec_primary
            and exec_secondary
            and os.path.exists(exec_primary)
            and os.path.exists(exec_secondary)
        ):
            logging.error("When using the quokka loader you have to provide the raw binaries")
            exit(1)
        logging.info(f"[+] Loading primary: {primary.name}")
        primary = Program(loader, primary, exec_primary)
        logging.info(f"[+] Loading secondary: {secondary.name}")
        secondary = Program(loader, secondary, exec_secondary)
    else:
        logging.info(f"[+] Loading primary: {primary.name}")
        primary = Program(loader, primary, enable_cortexm=enable_cortexm)
        logging.info(f"[+] Loading secondary: {secondary.name}")
        secondary = Program(loader, secondary, enable_cortexm=enable_cortexm)

    qbindiff = QBinDiff(
        primary,
        secondary,
        sparsity_ratio=sparsity_ratio,
        tradeoff=tradeoff,
        epsilon=epsilon,
        distance=Distance[distance],
        maxiter=maxiter,
        normalize=normalize,
        sparse_row=sparse_row,
    )

    if not features:
        logging.error("no feature provided")
        exit(1)

    for feature in set(features):
        weight = 1.0
        distance = None
        if ":" in feature:
            feature, *opts = feature.split(":")
            if len(opts) == 2:
                weight, distance = opts
            elif len(opts) == 1:
                try:
                    weight = float(opts[0])
                except ValueError:
                    distance = opts[0]
            else:
                logging.error(f"Malformed feature {feature}")
                continue
        if feature not in FEATURES_KEYS:
            logging.warning(f"Feature '{feature}' not recognized - ignored.")
            continue
        extractor_class = FEATURES_KEYS[feature]
        if distance is not None:
            distance = Distance[distance]
        feature_opts = {
            k: extractor_class.options[k].parser(v) for k, v in feature_option_dict[feature].items()
        }
        qbindiff.register_feature_extractor(
            extractor_class, float(weight), distance=distance, **feature_opts
        )

    logging.info("[+] Initializing NAP")
    qbindiff.process()

    logging.info("[+] Computing NAP")
    qbindiff.compute_matching()

    display_statistics(qbindiff, qbindiff.mapping)

    if output:
        logging.info("[+] Saving")
        if file_format == "bindiff":
            qbindiff.export_to_bindiff(output)
        logging.info("[+] Mapping successfully saved to: %s" % output)


if __name__ == "__main__":
    main()
