from __future__ import annotations

import json
from datetime import datetime, timezone
from pathlib import Path

OUTDIR = Path(config["outdir"])
QUERY = config["query_fasta"]
PROTEOMES = config["proteomes_dir"]
USER_CDS = Path(config["user_cds"])
THREADS = int(config.get("threads", 4))
COVERAGE = float(config.get("coverage", 0.70))
OUTGROUP_QUERY = str(config.get("outgroup_query", "")).strip()
ABSREL_P = float(config.get("absrel_p", 0.05))
ABSREL_DYNAMIC_START = float(config.get("absrel_dynamic_start", 0.05))
ABSREL_DYNAMIC_STEP = float(config.get("absrel_dynamic_step", 0.01))
ABSREL_DYNAMIC_MAX = float(config.get("absrel_dynamic_max", 0.2))
MEME_P = float(config.get("meme_p", 0.05))
USE_CLIPKIT = bool(config.get("use_clipkit", True))
CLIPKIT_MODE_PROTEIN = str(config.get("clipkit_mode_protein", "kpic-smart-gap")).strip() or "kpic-smart-gap"
CLIPKIT_MODE_CODON = str(config.get("clipkit_mode_codon", "kpic-smart-gap")).strip() or "kpic-smart-gap"
IQTREE_MODEL = str(config.get("iqtree_model", "MFP")).strip() or "MFP"
IQTREE_BOOTSTRAP = int(config.get("iqtree_bootstrap", 1000))
IQTREE_BNNI = bool(config.get("iqtree_bnni", False))
ABSREL_BRANCHES = str(config.get("absrel_branches", "Leaves")).strip() or "Leaves"
MEME_BRANCHES = str(config.get("meme_branches", "Leaves")).strip() or "Leaves"
CODEML_CODONFREQ = int(config.get("codeml_codonfreq", 7))

SUPPORTED_ALIGNMENT_METHODS = ("babappalign", "mafft", "prank")
SUPPORTED_TRIM_STATES = ("raw", "clipkit")
FORCED_TRIM_STATES = ["raw", "clipkit"]


def _normalize_alignment_methods(raw):
    if isinstance(raw, str):
        methods = [token.strip().lower() for token in raw.split(",") if token.strip()]
    elif isinstance(raw, list):
        methods = [str(token).strip().lower() for token in raw if str(token).strip()]
    else:
        methods = []

    if not methods:
        methods = list(SUPPORTED_ALIGNMENT_METHODS)

    normalized = []
    seen = set()
    unknown = []
    for method in methods:
        if method not in SUPPORTED_ALIGNMENT_METHODS:
            unknown.append(method)
            continue
        if method not in seen:
            seen.add(method)
            normalized.append(method)

    if unknown:
        raise ValueError(
            f"Unsupported alignment methods in config: {', '.join(unknown)}. "
            f"Supported: {', '.join(SUPPORTED_ALIGNMENT_METHODS)}"
        )

    if not normalized:
        raise ValueError("No valid alignment methods configured.")

    return normalized


def _normalize_trim_states(raw, trim_strategy, use_clipkit):
    states = []
    if isinstance(raw, str):
        states = [token.strip().lower() for token in raw.split(",") if token.strip()]
    elif isinstance(raw, list):
        states = [str(token).strip().lower() for token in raw if str(token).strip()]

    strategy = str(trim_strategy or "").strip().lower()
    if strategy == "both":
        states = ["raw", "clipkit"]
    elif strategy in {"raw", "clipkit"}:
        states = [strategy]

    if not states:
        states = ["clipkit"] if bool(use_clipkit) else ["raw"]

    normalized = []
    seen = set()
    unknown = []
    for state in states:
        if state not in SUPPORTED_TRIM_STATES:
            unknown.append(state)
            continue
        if state not in seen:
            seen.add(state)
            normalized.append(state)

    if unknown:
        raise ValueError(
            f"Unsupported trim states in config: {', '.join(unknown)}. "
            f"Supported: {', '.join(SUPPORTED_TRIM_STATES)}"
        )

    if not normalized:
        raise ValueError("No valid trim states configured.")

    return normalized


ALIGNMENT_METHODS = _normalize_alignment_methods(config.get("alignment_methods", list(SUPPORTED_ALIGNMENT_METHODS)))
TRIM_STATES = _normalize_trim_states(
    config.get("trim_states", []),
    config.get("trim_strategy", ""),
    config.get("use_clipkit", True),
)
if set(TRIM_STATES) != set(FORCED_TRIM_STATES):
    TRIM_STATES = list(FORCED_TRIM_STATES)

METHOD_COUNT = max(1, len(ALIGNMENT_METHODS))
PATHWAY_COUNT = max(1, len(ALIGNMENT_METHODS) * len(TRIM_STATES))
PER_METHOD_CORES = min(
    max(1, THREADS),
    max(1, int(config.get("per_method_cores", max(1, THREADS // METHOD_COUNT)))),
)
PER_PATHWAY_CORES = min(
    max(1, THREADS),
    max(1, int(config.get("per_pathway_cores", max(1, THREADS // PATHWAY_COUNT)))),
)

METHOD_PATTERN = "|".join(ALIGNMENT_METHODS)
TRIM_PATTERN = "|".join(TRIM_STATES)
PAL2NAL_METHODS = [method for method in ALIGNMENT_METHODS if method in {"mafft", "prank"}]
PAL2NAL_PATTERN = "|".join(PAL2NAL_METHODS)
PRIMARY_METHOD = ALIGNMENT_METHODS[0]
PRIMARY_TRIM = "clipkit" if "clipkit" in TRIM_STATES else TRIM_STATES[0]
PATHWAY_LABELS = [f"{method}:{trim_state}" for method in ALIGNMENT_METHODS for trim_state in TRIM_STATES]

EXE = config.get("executables", {})
BLASTP = EXE["blastp"]
MAKEBLASTDB = EXE["makeblastdb"]
IQTREE = EXE["iqtree"]
HYPHY = EXE["hyphy"]
CODEML = EXE["codeml"]
CLIPKIT = EXE.get("clipkit", "clipkit")
BABAPPALIGN = EXE.get("babappalign", "babappalign")
MAFFT = EXE.get("mafft", "mafft")
PRANK = EXE.get("prank", "prank")

HAVE_CDS = USER_CDS.exists() and USER_CDS.stat().st_size > 0


def pathway_protein_alignment(wildcards):
    return OUTDIR / "alignments" / wildcards.method / wildcards.trim_state / "orthogroup_proteins.analysis.fasta"


def pathway_cds_alignment(wildcards):
    return OUTDIR / "alignments" / wildcards.method / wildcards.trim_state / "mapped_orthogroup_cds.analysis.fasta"


if HAVE_CDS:
    rule all:
        input:
            expand(
                OUTDIR / "summary" / "{method}" / "{trim_state}" / "episodic_selection_summary.txt",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
            OUTDIR / "summary" / "robustness_matrix.tsv",
            OUTDIR / "summary" / "robustness_consensus.tsv",
            OUTDIR / "summary" / "robustness_narrative.txt",
            OUTDIR / "summary" / "comparative_reproducibility_summary.txt",
            OUTDIR / "summary" / "robustness_publication_table.tex",
            OUTDIR / "summary" / "run_provenance.json",
            OUTDIR / "summary" / "episodic_selection_summary.txt",
            expand(
                OUTDIR / "asr" / "{method}" / "{trim_state}" / "asr_done.json",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
            OUTDIR / "asr" / "asr_done.json"
else:
    rule all:
        input:
            OUTDIR / "orthogroup" / "WAITING_FOR_CDS.txt"


rule rbh_orthogroup:
    input:
        query=QUERY,
        proteomes=PROTEOMES,
    output:
        proteins=OUTDIR / "orthogroup" / "orthogroup_proteins.fasta",
        headers=OUTDIR / "orthogroup" / "orthogroup_headers.txt",
        rbh=OUTDIR / "orthogroup" / "rbh_summary.tsv",
    threads: THREADS
    shell:
        r'''
        mkdir -p {OUTDIR}/orthogroup
        python -m babappasnake.scripts.run_rbh_pipeline \
          --query {input.query} \
          --proteomes {input.proteomes} \
          --outdir {OUTDIR}/orthogroup \
          --coverage {COVERAGE} \
          --threads {threads} \
          --blastp "{BLASTP}" \
          --makeblastdb "{MAKEBLASTDB}"
        '''


rule waiting_note:
    input:
        proteins=rules.rbh_orthogroup.output.proteins,
        headers=rules.rbh_orthogroup.output.headers,
    output:
        note=OUTDIR / "orthogroup" / "WAITING_FOR_CDS.txt",
    shell:
        r'''
        python -m babappasnake.scripts.write_waiting_note \
          --orthogroup {input.proteins} \
          --headers {input.headers} \
          --expected-cds {USER_CDS} \
          --outfile {output.note}
        '''


if HAVE_CDS:

    rule map_cds:
        input:
            proteins=rules.rbh_orthogroup.output.proteins,
            cds=USER_CDS,
        output:
            mapped=OUTDIR / "mapped_cds" / "mapped_orthogroup_cds.fasta",
            proteins=OUTDIR / "mapped_cds" / "mapped_orthogroup_proteins.fasta",
            table=OUTDIR / "mapped_cds" / "cds_protein_mapping.tsv",
        shell:
            r'''
            mkdir -p {OUTDIR}/mapped_cds
            python -m babappasnake.scripts.map_cds_to_proteins \
              --proteins {input.proteins} \
              --cds {input.cds} \
              --out-cds {output.mapped} \
              --out-proteins {output.proteins} \
              --mapping {output.table}
            '''


    if "babappalign" in ALIGNMENT_METHODS:
        rule align_proteins_babappalign:
            input:
                rules.map_cds.output.proteins
            output:
                aln=OUTDIR / "alignments" / "babappalign" / "orthogroup_proteins.protein.aln.fasta"
            threads: PER_METHOD_CORES
            shell:
                r'''
                mkdir -p {OUTDIR}/alignments/babappalign
                cp {input} {OUTDIR}/alignments/babappalign/orthogroup_proteins.fasta
                cd {OUTDIR}/alignments/babappalign && \
                if "{BABAPPALIGN}" --help 2>&1 | grep -q -- '--model'; then \
                  "{BABAPPALIGN}" orthogroup_proteins.fasta --model babappascore --mode protein; \
                else \
                  "{BABAPPALIGN}" orthogroup_proteins.fasta --mode protein; \
                fi
                test -f {output.aln}
                '''


        rule align_cds_babappalign:
            input:
                rules.map_cds.output.mapped
            output:
                protein_aln=OUTDIR / "alignments" / "babappalign" / "mapped_orthogroup_cds.protein.aln.fasta",
                codon_aln=OUTDIR / "alignments" / "babappalign" / "mapped_orthogroup_cds.codon.aln.fasta"
            threads: PER_METHOD_CORES
            shell:
                r'''
                mkdir -p {OUTDIR}/alignments/babappalign
                cp {input} {OUTDIR}/alignments/babappalign/mapped_orthogroup_cds.fasta
                cd {OUTDIR}/alignments/babappalign && \
                if "{BABAPPALIGN}" --help 2>&1 | grep -q -- '--model'; then \
                  "{BABAPPALIGN}" mapped_orthogroup_cds.fasta --model babappascore --mode codon; \
                else \
                  "{BABAPPALIGN}" mapped_orthogroup_cds.fasta --mode codon; \
                fi
                test -f {output.protein_aln}
                test -f {output.codon_aln}
                '''


    if "mafft" in ALIGNMENT_METHODS:
        rule align_proteins_mafft:
            input:
                rules.map_cds.output.proteins
            output:
                aln=OUTDIR / "alignments" / "mafft" / "orthogroup_proteins.protein.aln.fasta"
            threads: PER_METHOD_CORES
            shell:
                r'''
                mkdir -p {OUTDIR}/alignments/mafft
                "{MAFFT}" --auto --thread {threads} {input} > {output.aln}
                test -s {output.aln}
                '''


    if "prank" in ALIGNMENT_METHODS:
        rule align_proteins_prank:
            input:
                rules.map_cds.output.proteins
            output:
                aln=OUTDIR / "alignments" / "prank" / "orthogroup_proteins.protein.aln.fasta"
            threads: 1
            shell:
                r'''
                mkdir -p {OUTDIR}/alignments/prank
                "{PRANK}" -d={input} -o={OUTDIR}/alignments/prank/orthogroup_proteins.prank -protein -F
                test -f {OUTDIR}/alignments/prank/orthogroup_proteins.prank.best.fas
                cp {OUTDIR}/alignments/prank/orthogroup_proteins.prank.best.fas {output.aln}
                '''


    if PAL2NAL_METHODS:
        rule align_cds_pal2nal_method:
            input:
                protein_aln=OUTDIR / "alignments" / "{method}" / "orthogroup_proteins.protein.aln.fasta",
                cds=rules.map_cds.output.mapped,
            output:
                protein_aln=OUTDIR / "alignments" / "{method}" / "mapped_orthogroup_cds.protein.aln.fasta",
                codon_aln=OUTDIR / "alignments" / "{method}" / "mapped_orthogroup_cds.codon.aln.fasta"
            wildcard_constraints:
                method=PAL2NAL_PATTERN
            shell:
                r'''
                mkdir -p {OUTDIR}/alignments/{wildcards.method}
                python -m babappasnake.scripts.backtranslate_alignment \
                  --protein-aln {input.protein_aln:q} \
                  --cds {input.cds:q} \
                  --out-protein {output.protein_aln:q} \
                  --out-codon {output.codon_aln:q} \
                  --report-json {OUTDIR}/alignments/{wildcards.method}/backtranslate_report.json
                test -s {output.codon_aln}
                '''


    rule align_proteins_all_methods:
        input:
            expand(OUTDIR / "alignments" / "{method}" / "orthogroup_proteins.protein.aln.fasta", method=ALIGNMENT_METHODS)


    rule align_cds_all_methods:
        input:
            expand(OUTDIR / "alignments" / "{method}" / "mapped_orthogroup_cds.protein.aln.fasta", method=ALIGNMENT_METHODS),
            expand(OUTDIR / "alignments" / "{method}" / "mapped_orthogroup_cds.codon.aln.fasta", method=ALIGNMENT_METHODS)


    if "raw" in TRIM_STATES:
        rule prepare_branch_inputs_raw_method:
            input:
                protein=OUTDIR / "alignments" / "{method}" / "orthogroup_proteins.protein.aln.fasta",
                cds=OUTDIR / "alignments" / "{method}" / "mapped_orthogroup_cds.codon.aln.fasta",
            output:
                protein=OUTDIR / "alignments" / "{method}" / "raw" / "orthogroup_proteins.analysis.fasta",
                cds=OUTDIR / "alignments" / "{method}" / "raw" / "mapped_orthogroup_cds.analysis.fasta",
            wildcard_constraints:
                method=METHOD_PATTERN
            shell:
                r'''
                mkdir -p {OUTDIR}/alignments/{wildcards.method}/raw
                cp {input.protein} {output.protein}
                python -m babappasnake.scripts.strip_terminal_stop_codon \
                  --input {input.cds} \
                  --output {output.cds}
                '''


    if "clipkit" in TRIM_STATES:
        rule prepare_branch_inputs_clipkit_method:
            input:
                protein=OUTDIR / "alignments" / "{method}" / "orthogroup_proteins.protein.aln.fasta",
                cds=OUTDIR / "alignments" / "{method}" / "mapped_orthogroup_cds.codon.aln.fasta",
            output:
                protein=OUTDIR / "alignments" / "{method}" / "clipkit" / "orthogroup_proteins.analysis.fasta",
                cds=OUTDIR / "alignments" / "{method}" / "clipkit" / "mapped_orthogroup_cds.analysis.fasta",
            wildcard_constraints:
                method=METHOD_PATTERN
            shell:
                r'''
                mkdir -p {OUTDIR}/alignments/{wildcards.method}/clipkit
                "{CLIPKIT}" {input.protein} -m {CLIPKIT_MODE_PROTEIN:q} -s aa -o {OUTDIR}/alignments/{wildcards.method}/clipkit/orthogroup_proteins.clipkit.tmp.fasta
                "{CLIPKIT}" {input.cds} -m {CLIPKIT_MODE_CODON:q} --codon -o {OUTDIR}/alignments/{wildcards.method}/clipkit/mapped_orthogroup_cds.clipkit.tmp.fasta
                python -m babappasnake.scripts.strip_terminal_stop_codon \
                  --input {OUTDIR}/alignments/{wildcards.method}/clipkit/mapped_orthogroup_cds.clipkit.tmp.fasta \
                  --output {output.cds}
                mv {OUTDIR}/alignments/{wildcards.method}/clipkit/orthogroup_proteins.clipkit.tmp.fasta {output.protein}
                rm -f {OUTDIR}/alignments/{wildcards.method}/clipkit/mapped_orthogroup_cds.clipkit.tmp.fasta
                '''


    rule prepare_branch_inputs_all_pathways:
        input:
            expand(
                OUTDIR / "alignments" / "{method}" / "{trim_state}" / "orthogroup_proteins.analysis.fasta",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
            expand(
                OUTDIR / "alignments" / "{method}" / "{trim_state}" / "mapped_orthogroup_cds.analysis.fasta",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            )


    rule iqtree_ml_pathway:
        input:
            pathway_protein_alignment
        output:
            tree=OUTDIR / "tree" / "{method}" / "{trim_state}" / "orthogroup.treefile"
        wildcard_constraints:
            method=METHOD_PATTERN,
            trim_state=TRIM_PATTERN
        threads: PER_PATHWAY_CORES
        params:
            bnni="-bnni" if IQTREE_BNNI else ""
        log:
            OUTDIR / "logs" / "{method}" / "{trim_state}" / "iqtree_ml.log"
        shell:
            r'''
            mkdir -p {OUTDIR}/tree/{wildcards.method}/{wildcards.trim_state}
            mkdir -p {OUTDIR}/logs/{wildcards.method}/{wildcards.trim_state}
            "{IQTREE}" -s {input} -nt {threads} -m {IQTREE_MODEL:q} -B {IQTREE_BOOTSTRAP} {params.bnni} -redo -pre {OUTDIR}/tree/{wildcards.method}/{wildcards.trim_state}/orthogroup > {log} 2>&1
            test -f {output.tree}
            '''


    rule iqtree_ml_all_pathways:
        input:
            expand(
                OUTDIR / "tree" / "{method}" / "{trim_state}" / "orthogroup.treefile",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            )


    rule iqtree_ml_all_methods:
        input:
            rules.iqtree_ml_all_pathways.input


    rule root_iqtree_outgroup_pathway:
        input:
            tree=OUTDIR / "tree" / "{method}" / "{trim_state}" / "orthogroup.treefile"
        output:
            rooted=OUTDIR / "tree" / "{method}" / "{trim_state}" / "orthogroup.rooted.treefile"
        wildcard_constraints:
            method=METHOD_PATTERN,
            trim_state=TRIM_PATTERN
        params:
            outgroup=OUTGROUP_QUERY
        log:
            OUTDIR / "logs" / "{method}" / "{trim_state}" / "root_tree.log"
        shell:
            r'''
            mkdir -p {OUTDIR}/logs/{wildcards.method}/{wildcards.trim_state}
            python -m babappasnake.scripts.root_tree_outgroup \
              --tree {input.tree} \
              --output {output.rooted} \
              --outgroup {params.outgroup:q} > {log} 2>&1
            '''


    rule root_iqtree_outgroup_all_pathways:
        input:
            expand(
                OUTDIR / "tree" / "{method}" / "{trim_state}" / "orthogroup.rooted.treefile",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            )


    rule root_iqtree_outgroup_all_methods:
        input:
            rules.root_iqtree_outgroup_all_pathways.input


    rule codeml_asr_pathway:
        input:
            aln=pathway_cds_alignment,
            tree=OUTDIR / "tree" / "{method}" / "{trim_state}" / "orthogroup.rooted.treefile",
        output:
            done=OUTDIR / "asr" / "{method}" / "{trim_state}" / "asr_done.json",
            mlc=OUTDIR / "asr" / "{method}" / "{trim_state}" / "mlc_asr.txt",
            rst=OUTDIR / "asr" / "{method}" / "{trim_state}" / "rst",
        wildcard_constraints:
            method=METHOD_PATTERN,
            trim_state=TRIM_PATTERN
        log:
            OUTDIR / "logs" / "{method}" / "{trim_state}" / "codeml_asr.log"
        shell:
            r'''
            mkdir -p {OUTDIR}/asr/{wildcards.method}/{wildcards.trim_state}
            mkdir -p {OUTDIR}/logs/{wildcards.method}/{wildcards.trim_state}
            python -m babappasnake.scripts.run_codeml_asr \
              --alignment {input.aln} \
              --tree {input.tree} \
              --outdir {OUTDIR}/asr/{wildcards.method}/{wildcards.trim_state} \
              --codeml "{CODEML}" \
              --codonfreq {CODEML_CODONFREQ} > {log} 2>&1
            test -f {output.done}
            test -f {output.mlc}
            test -f {output.rst}
            '''


    rule codeml_asr_all_pathways:
        input:
            expand(
                OUTDIR / "asr" / "{method}" / "{trim_state}" / "asr_done.json",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            )


    rule codeml_asr_all_methods:
        input:
            rules.codeml_asr_all_pathways.input


    rule hyphy_exploratory_pathway:
        input:
            aln=pathway_cds_alignment,
            tree=OUTDIR / "tree" / "{method}" / "{trim_state}" / "orthogroup.rooted.treefile",
        output:
            done=OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "hyphy_done.json",
            absrel=OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "absrel.json",
            meme=OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "meme.json",
        wildcard_constraints:
            method=METHOD_PATTERN,
            trim_state=TRIM_PATTERN
        threads: PER_PATHWAY_CORES
        log:
            OUTDIR / "logs" / "{method}" / "{trim_state}" / "hyphy_exploratory.log"
        shell:
            r'''
            mkdir -p {OUTDIR}/logs/{wildcards.method}/{wildcards.trim_state}
            python -m babappasnake.scripts.run_hyphy \
              --cds-aln {input.aln} \
              --tree {input.tree} \
              --outdir {OUTDIR}/hyphy/{wildcards.method}/{wildcards.trim_state} \
              --threads {threads} \
              --hyphy "{HYPHY}" \
              --absrel-branches {ABSREL_BRANCHES:q} \
              --meme-branches {MEME_BRANCHES:q} > {log} 2>&1
            test -f {output.done}
            test -f {output.absrel}
            test -f {output.meme}
            '''


    rule hyphy_exploratory_all_pathways:
        input:
            expand(
                OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "hyphy_done.json",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            )


    rule hyphy_exploratory_all_methods:
        input:
            rules.hyphy_exploratory_all_pathways.input


    rule parse_foregrounds_pathway:
        input:
            absrel=OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "absrel.json"
        output:
            tsv=OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "significant_foregrounds.tsv",
            lst=OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "significant_foregrounds.txt",
            meta=OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "foreground_threshold.json",
        wildcard_constraints:
            method=METHOD_PATTERN,
            trim_state=TRIM_PATTERN
        log:
            OUTDIR / "logs" / "{method}" / "{trim_state}" / "parse_foregrounds.log"
        shell:
            r'''
            mkdir -p {OUTDIR}/logs/{wildcards.method}/{wildcards.trim_state}
            python -m babappasnake.scripts.parse_hyphy_foregrounds \
              --absrel-json {input.absrel} \
              --pcut {ABSREL_P} \
              --dynamic \
              --dynamic-start {ABSREL_DYNAMIC_START} \
              --dynamic-step {ABSREL_DYNAMIC_STEP} \
              --dynamic-max {ABSREL_DYNAMIC_MAX} \
              --out-tsv {output.tsv} \
              --out-list {output.lst} \
              --out-meta {output.meta} > {log} 2>&1
            '''


    rule parse_foregrounds_all_pathways:
        input:
            expand(
                OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "significant_foregrounds.tsv",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
            expand(
                OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "significant_foregrounds.txt",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
            expand(
                OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "foreground_threshold.json",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            )


    rule parse_foregrounds_all_methods:
        input:
            rules.parse_foregrounds_all_pathways.input


    rule prepare_foreground_trees_pathway:
        input:
            tree=OUTDIR / "tree" / "{method}" / "{trim_state}" / "orthogroup.rooted.treefile",
            lst=OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "significant_foregrounds.txt",
        output:
            manifest=OUTDIR / "branchsite" / "{method}" / "{trim_state}" / "foreground_trees.tsv"
        wildcard_constraints:
            method=METHOD_PATTERN,
            trim_state=TRIM_PATTERN
        log:
            OUTDIR / "logs" / "{method}" / "{trim_state}" / "prepare_foreground_trees.log"
        shell:
            r'''
            mkdir -p {OUTDIR}/branchsite/{wildcards.method}/{wildcards.trim_state}
            mkdir -p {OUTDIR}/logs/{wildcards.method}/{wildcards.trim_state}
            python -m babappasnake.scripts.prepare_foreground_trees \
              --tree {input.tree} \
              --foreground-list {input.lst} \
              --outdir {OUTDIR}/branchsite/{wildcards.method}/{wildcards.trim_state} \
              --manifest {output.manifest} > {log} 2>&1
            '''


    rule prepare_foreground_trees_all_pathways:
        input:
            expand(
                OUTDIR / "branchsite" / "{method}" / "{trim_state}" / "foreground_trees.tsv",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            )


    rule prepare_foreground_trees_all_methods:
        input:
            rules.prepare_foreground_trees_all_pathways.input


    rule branchsite_batch_pathway:
        input:
            aln=pathway_cds_alignment,
            manifest=OUTDIR / "branchsite" / "{method}" / "{trim_state}" / "foreground_trees.tsv",
            lst=OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "significant_foregrounds.txt",
        output:
            tsv=OUTDIR / "branchsite" / "{method}" / "{trim_state}" / "branchsite_results.tsv"
        wildcard_constraints:
            method=METHOD_PATTERN,
            trim_state=TRIM_PATTERN
        threads: PER_PATHWAY_CORES
        log:
            OUTDIR / "logs" / "{method}" / "{trim_state}" / "branchsite_batch.log"
        shell:
            r'''
            mkdir -p {OUTDIR}/logs/{wildcards.method}/{wildcards.trim_state}
            python -m babappasnake.scripts.run_branchsite_batch \
              --alignment {input.aln} \
              --tree-dir {OUTDIR}/branchsite/{wildcards.method}/{wildcards.trim_state} \
              --foreground-list {input.lst} \
              --out-tsv {output.tsv} \
              --codeml "{CODEML}" \
              --codonfreq {CODEML_CODONFREQ} \
              --jobs {threads} > {log} 2>&1
            '''


    rule branchsite_batch_all_pathways:
        input:
            expand(
                OUTDIR / "branchsite" / "{method}" / "{trim_state}" / "branchsite_results.tsv",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            )


    rule branchsite_batch_all_methods:
        input:
            rules.branchsite_batch_all_pathways.input


    rule final_summary_pathway:
        input:
            rbh=rules.rbh_orthogroup.output.rbh,
            mapping=rules.map_cds.output.table,
            absrel=OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "significant_foregrounds.tsv",
            absrel_meta=OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "foreground_threshold.json",
            branchsite=OUTDIR / "branchsite" / "{method}" / "{trim_state}" / "branchsite_results.tsv",
            hyphy_done=OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "hyphy_done.json",
        output:
            OUTDIR / "summary" / "{method}" / "{trim_state}" / "episodic_selection_summary.txt"
        wildcard_constraints:
            method=METHOD_PATTERN,
            trim_state=TRIM_PATTERN
        log:
            OUTDIR / "logs" / "{method}" / "{trim_state}" / "final_summary.log"
        shell:
            r'''
            mkdir -p {OUTDIR}/summary/{wildcards.method}/{wildcards.trim_state}
            mkdir -p {OUTDIR}/logs/{wildcards.method}/{wildcards.trim_state}
            python -m babappasnake.scripts.summarize_results \
              --rbh {input.rbh} \
              --mapping {input.mapping} \
              --absrel {input.absrel} \
              --absrel-meta {input.absrel_meta} \
              --branchsite {input.branchsite} \
              --hyphy-dir {OUTDIR}/hyphy/{wildcards.method}/{wildcards.trim_state} \
              --meme-p {MEME_P} \
              --method {wildcards.method} \
              --trim-state {wildcards.trim_state} \
              --out {output} > {log} 2>&1
            '''


    rule final_summary_all_pathways:
        input:
            expand(
                OUTDIR / "summary" / "{method}" / "{trim_state}" / "episodic_selection_summary.txt",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            )


    rule final_summary_all_methods:
        input:
            rules.final_summary_all_pathways.input


    rule robustness_reports:
        input:
            summaries=expand(
                OUTDIR / "summary" / "{method}" / "{trim_state}" / "episodic_selection_summary.txt",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
            absrel_tsv=expand(
                OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "significant_foregrounds.tsv",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
            absrel_meta=expand(
                OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "foreground_threshold.json",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
            absrel_json=expand(
                OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "absrel.json",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
            meme_json=expand(
                OUTDIR / "hyphy" / "{method}" / "{trim_state}" / "meme.json",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
            branchsite=expand(
                OUTDIR / "branchsite" / "{method}" / "{trim_state}" / "branchsite_results.tsv",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
            asr=expand(
                OUTDIR / "asr" / "{method}" / "{trim_state}" / "asr_done.json",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
            proteins=expand(
                OUTDIR / "alignments" / "{method}" / "{trim_state}" / "orthogroup_proteins.analysis.fasta",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
            cds=expand(
                OUTDIR / "alignments" / "{method}" / "{trim_state}" / "mapped_orthogroup_cds.analysis.fasta",
                method=ALIGNMENT_METHODS,
                trim_state=TRIM_STATES,
            ),
        output:
            matrix=OUTDIR / "summary" / "robustness_matrix.tsv",
            consensus=OUTDIR / "summary" / "robustness_consensus.tsv",
            narrative=OUTDIR / "summary" / "robustness_narrative.txt",
            comparative=OUTDIR / "summary" / "comparative_reproducibility_summary.txt",
            latex=OUTDIR / "summary" / "robustness_publication_table.tex",
        params:
            pathways=",".join(PATHWAY_LABELS),
            meme_p=MEME_P,
        shell:
            r'''
            mkdir -p {OUTDIR}/summary
            python -m babappasnake.scripts.generate_robustness_reports \
              --outdir {OUTDIR} \
              --pathways {params.pathways:q} \
              --meme-p {params.meme_p} \
              --matrix-out {output.matrix} \
              --consensus-out {output.consensus} \
              --narrative-out {output.narrative} \
              --comparative-out {output.comparative} \
              --latex-out {output.latex}
            '''


    rule final_summary_primary_alias:
        input:
            OUTDIR / "summary" / PRIMARY_METHOD / PRIMARY_TRIM / "episodic_selection_summary.txt"
        output:
            OUTDIR / "summary" / "episodic_selection_summary.txt"
        shell:
            r'''
            cp {input} {output}
            '''


    rule asr_primary_alias:
        input:
            OUTDIR / "asr" / PRIMARY_METHOD / PRIMARY_TRIM / "asr_done.json"
        output:
            OUTDIR / "asr" / "asr_done.json"
        shell:
            r'''
            mkdir -p {OUTDIR}/asr
            cp {input} {output}
            '''


    rule write_run_provenance:
        input:
            matrix=OUTDIR / "summary" / "robustness_matrix.tsv",
            consensus=OUTDIR / "summary" / "robustness_consensus.tsv",
            narrative=OUTDIR / "summary" / "robustness_narrative.txt",
            comparative=OUTDIR / "summary" / "comparative_reproducibility_summary.txt",
            latex=OUTDIR / "summary" / "robustness_publication_table.tex",
            top_summary=OUTDIR / "summary" / "episodic_selection_summary.txt",
            top_asr=OUTDIR / "asr" / "asr_done.json",
        output:
            provenance=OUTDIR / "summary" / "run_provenance.json"
        run:
            provenance_path = Path(str(output.provenance))
            summary_dir = provenance_path.parent
            payload = {
                "generated_at": datetime.now(timezone.utc).isoformat(),
                "outdir": str(OUTDIR.resolve()),
                "methods": ALIGNMENT_METHODS,
                "trim_states": TRIM_STATES,
                "pathways": [f"{m}_{t}" for m in ALIGNMENT_METHODS for t in TRIM_STATES],
                "parameters": {
                    "coverage": COVERAGE,
                    "outgroup_query": OUTGROUP_QUERY,
                    "iqtree_model": IQTREE_MODEL,
                    "iqtree_bootstrap": IQTREE_BOOTSTRAP,
                    "iqtree_bnni": IQTREE_BNNI,
                    "absrel_branches": ABSREL_BRANCHES,
                    "meme_branches": MEME_BRANCHES,
                    "absrel_p": ABSREL_P,
                    "absrel_dynamic_start": ABSREL_DYNAMIC_START,
                    "absrel_dynamic_step": ABSREL_DYNAMIC_STEP,
                    "absrel_dynamic_max": ABSREL_DYNAMIC_MAX,
                    "meme_p": MEME_P,
                    "codeml_codonfreq": CODEML_CODONFREQ,
                    "clipkit_mode_protein": CLIPKIT_MODE_PROTEIN,
                    "clipkit_mode_codon": CLIPKIT_MODE_CODON,
                    "threads_total": THREADS,
                    "per_method_cores": PER_METHOD_CORES,
                    "per_pathway_cores": PER_PATHWAY_CORES,
                },
                "files": {
                    "robustness_matrix": str(summary_dir / "robustness_matrix.tsv"),
                    "robustness_consensus": str(summary_dir / "robustness_consensus.tsv"),
                    "robustness_narrative": str(summary_dir / "robustness_narrative.txt"),
                    "comparative_summary": str(summary_dir / "comparative_reproducibility_summary.txt"),
                    "publication_table_latex": str(summary_dir / "robustness_publication_table.tex"),
                    "top_level_summary": str(Path(str(input.top_summary))),
                    "top_level_asr": str(Path(str(input.top_asr))),
                },
            }
            summary_dir.mkdir(parents=True, exist_ok=True)
            provenance_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
