LICENSE
README.md
pyproject.toml
tests/test_amdahl_bottlenecks.py
tests/test_batched_generation.py
tests/test_countdown.py
tests/test_countdown_baseline.py
tests/test_countdown_reasoning_lora.py
tests/test_emergence_logger.py
tests/test_gradient_checkpointing.py
tests/test_grpo_gtpo.py
tests/test_grpo_stability.py
tests/test_gspo_verification.py
tests/test_gtpo.py
tests/test_hicra.py
tests/test_integration_e2e_training.py
tests/test_issue_fixes.py
tests/test_mlx_compatibility.py
tests/test_moe_models.py
tests/test_profile_hardware_targets.py
tests/test_profiling.py
tests/test_prompt_reuse_stats.py
tests/test_real_model_perf.py
tests/test_reasoning_stack.py
tests/test_regression_exact_output.py
tests/test_reward_signatures.py
tests/test_rollout_rewards.py
tests/test_runner_step_enforcement.py
tests/test_semantic_entropy.py
tests/test_sepa.py
tests/test_strategic_grams.py
tests/test_training_metrics.py
tests/test_training_pipeline.py
tests/test_validate_installation.py
textpolicy/__init__.py
textpolicy/__main__.py
textpolicy/cli.py
textpolicy/validate.py
textpolicy.egg-info/PKG-INFO
textpolicy.egg-info/SOURCES.txt
textpolicy.egg-info/dependency_links.txt
textpolicy.egg-info/entry_points.txt
textpolicy.egg-info/requires.txt
textpolicy.egg-info/top_level.txt
textpolicy/algorithms/__init__.py
textpolicy/algorithms/grpo.py
textpolicy/algorithms/gspo.py
textpolicy/algorithms/hicra.py
textpolicy/algorithms/length_shaping.py
textpolicy/analysis/__init__.py
textpolicy/analysis/emergence_logger.py
textpolicy/analysis/planning_patterns.py
textpolicy/analysis/serialization.py
textpolicy/analysis/strategic_grams.py
textpolicy/buffer/__init__.py
textpolicy/buffer/buffer.py
textpolicy/buffer/episode.py
textpolicy/buffer/sampling.py
textpolicy/buffer/storage.py
textpolicy/environment/__init__.py
textpolicy/environment/base.py
textpolicy/environment/environment.py
textpolicy/environment/factory.py
textpolicy/environment/gym.py
textpolicy/environment/task_suites.py
textpolicy/environment/text_generation.py
textpolicy/environment/vectorized.py
textpolicy/generation/__init__.py
textpolicy/generation/lora.py
textpolicy/generation/mlx_generation.py
textpolicy/generation/reload.py
textpolicy/rewards/__init__.py
textpolicy/rewards/adapters.py
textpolicy/rewards/basic.py
textpolicy/rewards/integrated_system.py
textpolicy/rewards/mlx_batch_processor.py
textpolicy/rewards/registry.py
textpolicy/rewards/rollout_rewards.py
textpolicy/rewards/verifiers.py
textpolicy/rollout/__init__.py
textpolicy/rollout/aggregator.py
textpolicy/rollout/base.py
textpolicy/rollout/rollout.py
textpolicy/rollout/runner.py
textpolicy/rollout/strategy.py
textpolicy/rollout/worker.py
textpolicy/tasks/__init__.py
textpolicy/tasks/countdown/__init__.py
textpolicy/tasks/countdown/dataset.py
textpolicy/tasks/countdown/evaluator.py
textpolicy/tasks/countdown/prompt.py
textpolicy/tasks/countdown/reward.py
textpolicy/training/__init__.py
textpolicy/training/gradient_checkpointing.py
textpolicy/training/metrics.py
textpolicy/training/reasoning_stack.py
textpolicy/training/rollout_manager.py
textpolicy/training/semantic_entropy.py
textpolicy/training/sepa.py
textpolicy/training/trainer.py
textpolicy/utils/__init__.py
textpolicy/utils/benchmarking.py
textpolicy/utils/data.py
textpolicy/utils/debug.py
textpolicy/utils/environment.py
textpolicy/utils/memory.py
textpolicy/utils/performance.py
textpolicy/utils/timing.py
textpolicy/utils/logging/__init__.py
textpolicy/utils/logging/base.py
textpolicy/utils/logging/console.py
textpolicy/utils/logging/factory.py
textpolicy/utils/logging/multi.py
textpolicy/utils/logging/tensorboard.py
textpolicy/utils/logging/wandb.py
textpolicy/validation/__init__.py
textpolicy/validation/logprob_validation.py