CHANGELOG.md
CONTRIBUTING.md
LICENSE
MANIFEST.in
METHODOLOGY.md
README.md
pyproject.toml
datasets/README.md
datasets/canonical/persona-1-basic/conversations.jsonl
datasets/canonical/persona-1-basic/events.json
datasets/canonical/persona-1-basic/ground_truth.json
datasets/canonical/persona-1-basic/metadata.json
datasets/canonical/persona-1-basic/persona.json
datasets/canonical/persona-1-basic/queries.json
datasets/canonical/persona-2-intermediate/conversations.jsonl
datasets/canonical/persona-2-intermediate/events.json
datasets/canonical/persona-2-intermediate/ground_truth.json
datasets/canonical/persona-2-intermediate/metadata.json
datasets/canonical/persona-2-intermediate/persona.json
datasets/canonical/persona-2-intermediate/queries.json
datasets/canonical/persona-3-advanced/conversations.jsonl
datasets/canonical/persona-3-advanced/events.json
datasets/canonical/persona-3-advanced/ground_truth.json
datasets/canonical/persona-3-advanced/metadata.json
datasets/canonical/persona-3-advanced/persona.json
datasets/canonical/persona-3-advanced/queries.json
docs/proposed-new-metrics.md
docs/guides/integration.md
docs/guides/quickstart.md
docs/metrics/ars.md
docs/metrics/crq.md
docs/metrics/dbu.md
docs/metrics/lnc.md
docs/metrics/mei.md
docs/metrics/pas.md
docs/metrics/qrp.md
docs/metrics/sfc.md
docs/metrics/ssi.md
docs/metrics/tc.md
src/cri/__init__.py
src/cri/adapter.py
src/cri/judge.py
src/cri/models.py
src/cri/performance.py
src/cri/py.typed
src/cri/reporter.py
src/cri/runner.py
src/cri/datasets/__init__.py
src/cri/datasets/generator.py
src/cri/datasets/loader.py
src/cri/datasets/personas/__init__.py
src/cri/datasets/personas/specs.py
src/cri/scoring/__init__.py
src/cri/scoring/engine.py
src/cri/scoring/rubrics.py
src/cri/scoring/ssi.py
src/cri/scoring/dimensions/__init__.py
src/cri/scoring/dimensions/ars.py
src/cri/scoring/dimensions/base.py
src/cri/scoring/dimensions/crq.py
src/cri/scoring/dimensions/dbu.py
src/cri/scoring/dimensions/lnc.py
src/cri/scoring/dimensions/mei.py
src/cri/scoring/dimensions/pas.py
src/cri/scoring/dimensions/qrp.py
src/cri/scoring/dimensions/sfc.py
src/cri/scoring/dimensions/tc.py
src/cri/utils/llm_anthropic_subscription.py
src/cri_benchmark.egg-info/PKG-INFO
src/cri_benchmark.egg-info/SOURCES.txt
src/cri_benchmark.egg-info/dependency_links.txt
src/cri_benchmark.egg-info/entry_points.txt
src/cri_benchmark.egg-info/requires.txt
src/cri_benchmark.egg-info/top_level.txt
tests/test_adapter.py
tests/test_baseline_adapters.py
tests/test_binary_judge.py
tests/test_dataset_generator.py
tests/test_dataset_loader.py
tests/test_dimension_base.py
tests/test_integration.py
tests/test_judge.py
tests/test_models.py
tests/test_performance.py
tests/test_reporter.py
tests/test_rubrics.py
tests/test_runner.py
tests/test_scoring_engine.py
tests/test_ssi.py