set shell := ["bash", "-uc"]

src := "src/llmdebug"

# List available recipes
[private]
default:
    @just --list

# Run ruff linter and format check
lint:
    uv run ruff check src tests
    uv run ruff format --check src tests

# Auto-fix lint and formatting issues
format:
    uv run ruff check --fix src tests
    uv run ruff format src tests

# Run pyright type checker
typecheck:
    uv run pyright

# Run tests with coverage report
test:
    uv run pytest --cov={{ src }} --cov-branch --cov-report=term-missing --cov-report=xml --cov-report=json --cov-fail-under=85

# Run tests without coverage (faster, stop on first failure)
test-quick:
    uv run pytest -x -q

# Run capture-overhead benchmarks and persist JSON results (track-only)
bench-capture:
    mkdir -p .benchmarks
    uv run pytest tests/test_benchmarks.py -k capture_overhead --benchmark-only --benchmark-group-by=group --benchmark-json=.benchmarks/capture-overhead-current.json -q

# Compare capture-overhead benchmark medians (baseline vs current)
bench-capture-compare:
    #!/usr/bin/env bash
    set -euo pipefail
    base=".benchmarks/capture-overhead-baseline.json"
    curr=".benchmarks/capture-overhead-current.json"
    if [[ ! -f "$base" ]]; then
        echo "Missing baseline benchmark file: $base"
        echo "Create one by copying a trusted run:"
        echo "  cp .benchmarks/capture-overhead-current.json .benchmarks/capture-overhead-baseline.json"
        exit 1
    fi
    if [[ ! -f "$curr" ]]; then
        echo "Missing current benchmark file: $curr"
        echo "Run: just bench-capture"
        exit 1
    fi
    uv run python scripts/bench_capture_compare.py "$base" "$curr"

# Show complex functions and maintainability index
complexity:
    @echo "=== Cyclomatic Complexity (C+ rated) ==="
    @uv run radon cc {{ src }} -a -nc -s
    @echo ""
    @echo "=== Maintainability Index ==="
    @uv run radon mi {{ src }} -s

# Run xenon complexity gate
complexity-gate:
    uv run xenon --max-absolute C --max-modules C --max-average B {{ src }}

# Run bandit security scanner
security:
    uv run bandit -c pyproject.toml -r {{ src }}

# Run vulture dead code detection
dead-code:
    uv run vulture {{ src }} vulture_whitelist.py --min-confidence 90

# Check dependency hygiene
deps:
    uv run deptry {{ src }}

# Synchronize auto-generated documentation blocks
docs-sync:
    uv run python -m evals.doc_sync --write

# Check documentation drift (auto-generated docs + changelog consistency)
docs-check:
    uv run python -m evals.doc_sync --check

# Traceback-only eval run using the pinned template defaults
eval-traceback-only run_id:
    uv run python -m evals.run_eval --config evals/configs/openai_traceback_only.latest.toml --run-id {{run_id}}

# Extract failed traceback-only cases into a reusable case list
eval-select-failed run_id condition="traceback_only":
    uv run python -m evals.select_cases --results-jsonl evals/results/{{run_id}}.jsonl --condition {{condition}} --mode failed --out evals/artifacts/case_lists/{{run_id}}.failed.txt

# Adaptive rescue run on failed-only cases with online policy best-settings template
eval-adaptive-online-failed source_run_id rescue_run_id:
    uv run python -m evals.run_eval --config evals/configs/openai_adaptive.template.toml --run-id {{rescue_run_id}} --case-list-file evals/artifacts/case_lists/{{source_run_id}}.failed.txt --adaptive-online-state-path evals/artifacts/policies/{{rescue_run_id}}.json --adaptive-online-events-path evals/results/{{rescue_run_id}}.adaptive_policy.jsonl

# Core summary for a single eval run (strict schema-v3)
eval-summary run_id:
    uv run python -m evals.eval_summary {{run_id}}

# Two-pass workflow summary (baseline -> failed-only rescue)
eval-two-pass-summary baseline_run_id rescue_run_id baseline_condition="traceback_only" rescue_condition="adaptive_llm_discretion":
    uv run python -m evals.eval_two_pass_summary {{baseline_run_id}} {{rescue_run_id}} --baseline-condition {{baseline_condition}} --rescue-condition {{rescue_condition}}

# Coverage on changed lines (PR/local branch check)
diff-coverage:
    git fetch origin main --depth=1
    uv run diff-cover coverage.xml --compare-branch=origin/main --fail-under=80

# Coverage no-regression gate (branch coverage only)
coverage-no-regression:
    uv run python scripts/quality/check_coverage_no_regression.py --coverage-json coverage.json --baseline-json quality/coverage_baseline.json

# Run all fast checks (lint + types + tests)
check: lint typecheck test

# Run all checks including quality tools
quality: check coverage-no-regression complexity complexity-gate security dead-code deps docs-check

# Run CI-equivalent checks locally (single Python version)
ci:
    @echo "=== Lint ==="
    @uv run ruff check src tests
    @echo ""
    @echo "=== Format Check ==="
    @uv run ruff format --check src tests
    @echo ""
    @echo "=== Type Check ==="
    @uv run pyright
    @echo ""
    @echo "=== Tests ==="
    @uv run pytest
    @echo ""
    @echo "=== Coverage Gate ==="
    @uv run pytest --cov={{ src }} --cov-branch --cov-report=term-missing --cov-report=xml --cov-report=json --cov-fail-under=85
    @echo ""
    @echo "=== Coverage No-Regression ==="
    @uv run python scripts/quality/check_coverage_no_regression.py --coverage-json coverage.json --baseline-json quality/coverage_baseline.json
    @echo ""
    @echo "=== Dependency Check ==="
    @uv run deptry {{ src }}
    @echo ""
    @echo "=== Dead Code ==="
    @uv run vulture {{ src }} vulture_whitelist.py --min-confidence 90
    @echo ""
    @echo "=== Complexity Report ==="
    @uv run radon cc -s -a {{ src }}
    @echo ""
    @echo "=== Complexity Gate ==="
    @uv run xenon --max-absolute C --max-modules C --max-average B {{ src }}
    @echo ""
    @echo "=== Security ==="
    @uv run bandit -c pyproject.toml -r {{ src }}

# Generate a comprehensive quality report
report:
    #!/usr/bin/env bash
    set -euo pipefail

    echo "=============================================================="
    echo "Code Quality Report - $(date +%Y-%m-%d)"
    echo "=============================================================="
    echo ""

    echo "-- Lines of Code ----------------------------------------------"
    find {{ src }} -name "*.py" -print0 | xargs -0 wc -l | sort -n
    echo ""

    echo "-- Tests ------------------------------------------------------"
    uv run pytest --co -q 2>/dev/null | tail -1
    echo ""

    echo "-- Coverage ---------------------------------------------------"
    COV_OUT=$(uv run pytest --cov={{ src }} -q 2>/dev/null)
    echo "$COV_OUT" | grep -E '(^src/|^TOTAL)' | \
        awk '{printf "  %-45s %5s %6s %5s\n", $1, $2, $3, $4}'
    echo "$COV_OUT" | grep -E '^Required' || true
    echo ""

    echo "-- Complexity Distribution ------------------------------------"
    uv run radon cc {{ src }} -a -j 2>/dev/null | uv run python -c "import json,sys; data=json.load(sys.stdin); total=sum(len(blocks) for blocks in data.values()); by_rank={}; [by_rank.__setitem__(b['rank'], by_rank.get(b['rank'], 0) + 1) for blocks in data.values() for b in blocks]; print(f'  Total blocks analyzed: {total}'); [print(f'  {rank}: {by_rank[rank]:>4} ({(by_rank[rank] * 100 // total) if total else 0}%)') for rank in sorted(by_rank)]"
    echo ""

    echo "-- High Complexity Functions (D+F rated) ----------------------"
    uv run radon cc {{ src }} -nd -s 2>/dev/null || echo "  (none)"
    echo ""

    echo "-- Maintainability Index --------------------------------------"
    uv run radon mi {{ src }} -j 2>/dev/null | uv run python -c "import json,sys; data=json.load(sys.stdin); [print(f\"  {info['rank']} ({info['mi']:5.1f})  {path.replace('src/llmdebug/', '')}\") for path, info in sorted(data.items())]"
    echo ""

    echo "-- Lint -------------------------------------------------------"
    uv run ruff check src tests --statistics 2>/dev/null && echo "  No issues" || true
    echo ""

    echo "-- Type Check -------------------------------------------------"
    uv run pyright 2>&1 | tail -1
    echo ""

    echo "-- Security (bandit) ------------------------------------------"
    uv run bandit -c pyproject.toml -r {{ src }} 2>&1 | grep -E '(Total issues|Files skipped)' || true
    echo ""

    echo "-- Dead Code (vulture) ----------------------------------------"
    DEAD=$(uv run vulture {{ src }} vulture_whitelist.py --min-confidence 90 2>/dev/null | wc -l | tr -d ' ')
    echo "  Unused code items: $DEAD"
    echo ""

    echo "-- Dependencies (deptry) --------------------------------------"
    uv run deptry {{ src }} 2>&1 | tail -1
    echo ""
    echo "=============================================================="

# Remove build artifacts and caches
clean:
    rm -rf .pytest_cache .ruff_cache .pyright htmlcov .coverage coverage.xml
    find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
