LICENSE
README.md
pyproject.toml
src/knowledge_fidelity/__init__.py
src/knowledge_fidelity/__main__.py
src/knowledge_fidelity/audit.py
src/knowledge_fidelity/behavioral.py
src/knowledge_fidelity/calibration.py
src/knowledge_fidelity/core.py
src/knowledge_fidelity/denoise.py
src/knowledge_fidelity/utils.py
src/knowledge_fidelity/behaviors/__init__.py
src/knowledge_fidelity/behaviors/base.py
src/knowledge_fidelity/behaviors/bias.py
src/knowledge_fidelity/behaviors/factual.py
src/knowledge_fidelity/behaviors/metrics.py
src/knowledge_fidelity/behaviors/reasoning.py
src/knowledge_fidelity/behaviors/sycophancy.py
src/knowledge_fidelity/behaviors/toxicity.py
src/knowledge_fidelity/cartography/__init__.py
src/knowledge_fidelity/cartography/engine.py
src/knowledge_fidelity/cartography/schema.py
src/knowledge_fidelity/cli/__init__.py
src/knowledge_fidelity/cli/rho_audit.py
src/knowledge_fidelity/output/__init__.py
src/knowledge_fidelity/output/comparator.py
src/knowledge_fidelity/output/exporters.py
src/knowledge_fidelity/output/schema.py
src/knowledge_fidelity/probes/__init__.py
src/knowledge_fidelity/probes/registry.py
src/knowledge_fidelity/probes/data/bias/bbq_300.json
src/knowledge_fidelity/probes/data/factual/commonsense.json
src/knowledge_fidelity/probes/data/factual/default.json
src/knowledge_fidelity/probes/data/factual/mandela.json
src/knowledge_fidelity/probes/data/factual/medical.json
src/knowledge_fidelity/probes/data/factual/truthfulqa.json
src/knowledge_fidelity/probes/data/reasoning/gsm8k_100.json
src/knowledge_fidelity/probes/data/sycophancy/anthropic_150.json
src/knowledge_fidelity/probes/data/toxicity/toxigen_200.json
src/knowledge_fidelity/svd/__init__.py
src/knowledge_fidelity/svd/compress.py
src/knowledge_fidelity/svd/freeze.py
src/knowledge_fidelity/svd/importance.py
src/rho_eval/__init__.py
src/rho_eval/__main__.py
src/rho_eval/_compat.py
src/rho_eval/audit.py
src/rho_eval/behavioral.py
src/rho_eval/calibration.py
src/rho_eval/core.py
src/rho_eval/denoise.py
src/rho_eval/utils.py
src/rho_eval.egg-info/PKG-INFO
src/rho_eval.egg-info/SOURCES.txt
src/rho_eval.egg-info/dependency_links.txt
src/rho_eval.egg-info/entry_points.txt
src/rho_eval.egg-info/requires.txt
src/rho_eval.egg-info/top_level.txt
src/rho_eval/alignment/__init__.py
src/rho_eval/alignment/dataset.py
src/rho_eval/alignment/losses.py
src/rho_eval/alignment/mlx_losses.py
src/rho_eval/alignment/mlx_trainer.py
src/rho_eval/alignment/trainer.py
src/rho_eval/behaviors/__init__.py
src/rho_eval/behaviors/base.py
src/rho_eval/behaviors/bias.py
src/rho_eval/behaviors/factual.py
src/rho_eval/behaviors/metrics.py
src/rho_eval/behaviors/reasoning.py
src/rho_eval/behaviors/refusal.py
src/rho_eval/behaviors/sycophancy.py
src/rho_eval/behaviors/toxicity.py
src/rho_eval/benchmarking/__init__.py
src/rho_eval/benchmarking/adversarial.py
src/rho_eval/benchmarking/loader.py
src/rho_eval/benchmarking/reports.py
src/rho_eval/benchmarking/schema.py
src/rho_eval/benchmarking/scorers.py
src/rho_eval/cartography/__init__.py
src/rho_eval/cartography/engine.py
src/rho_eval/cartography/schema.py
src/rho_eval/cli/__init__.py
src/rho_eval/cli/rho_align.py
src/rho_eval/cli/rho_audit.py
src/rho_eval/cli/rho_bench.py
src/rho_eval/cli/rho_interpret.py
src/rho_eval/cli/rho_steer.py
src/rho_eval/interpretability/__init__.py
src/rho_eval/interpretability/activation.py
src/rho_eval/interpretability/heads.py
src/rho_eval/interpretability/overlap.py
src/rho_eval/interpretability/schema.py
src/rho_eval/interpretability/subspaces.py
src/rho_eval/interpretability/surgical.py
src/rho_eval/interpretability/visualize.py
src/rho_eval/output/__init__.py
src/rho_eval/output/comparator.py
src/rho_eval/output/exporters.py
src/rho_eval/output/schema.py
src/rho_eval/probes/__init__.py
src/rho_eval/probes/registry.py
src/rho_eval/probes/data/bench/clinical.json
src/rho_eval/probes/data/bench/logic.json
src/rho_eval/probes/data/bench/social.json
src/rho_eval/probes/data/bias/bbq_300.json
src/rho_eval/probes/data/factual/commonsense.json
src/rho_eval/probes/data/factual/default.json
src/rho_eval/probes/data/factual/mandela.json
src/rho_eval/probes/data/factual/medical.json
src/rho_eval/probes/data/factual/truthfulqa.json
src/rho_eval/probes/data/reasoning/gsm8k_100.json
src/rho_eval/probes/data/refusal/harmful_benign_100.json
src/rho_eval/probes/data/sycophancy/anthropic_150.json
src/rho_eval/probes/data/toxicity/toxigen_200.json
src/rho_eval/steering/__init__.py
src/rho_eval/steering/analyze.py
src/rho_eval/steering/collect.py
src/rho_eval/steering/sae.py
src/rho_eval/steering/schema.py
src/rho_eval/steering/steer.py
src/rho_eval/steering/train.py
src/rho_eval/svd/__init__.py
src/rho_eval/svd/compress.py
src/rho_eval/svd/freeze.py
src/rho_eval/svd/importance.py
tests/test_alignment.py
tests/test_benchmarking.py
tests/test_cli.py
tests/test_interpretability.py
tests/test_metrics.py
tests/test_output.py
tests/test_probes.py
tests/test_registry.py
tests/test_steering.py