CONTRIBUTING.md
LICENSE
MANIFEST.in
README.md
pyproject.toml
setup.cfg
setup.py
tests/slow/__init__.py
tests/slow/test_dpo_slow.py
tests/slow/test_sft_slow.py
tests/slow/testing_constants.py
trl/__init__.py
trl/core.py
trl/data_utils.py
trl/env_utils.py
trl/import_utils.py
trl.egg-info/PKG-INFO
trl.egg-info/SOURCES.txt
trl.egg-info/dependency_links.txt
trl.egg-info/entry_points.txt
trl.egg-info/not-zip-safe
trl.egg-info/requires.txt
trl.egg-info/top_level.txt
trl/commands/__init__.py
trl/commands/cli.py
trl/commands/cli_utils.py
trl/commands/scripts/alignprop.py
trl/commands/scripts/bco.py
trl/commands/scripts/chat.py
trl/commands/scripts/cpo.py
trl/commands/scripts/ddpo.py
trl/commands/scripts/dpo.py
trl/commands/scripts/dpo_online.py
trl/commands/scripts/dpo_visual.py
trl/commands/scripts/gkd.py
trl/commands/scripts/kto.py
trl/commands/scripts/nash_md.py
trl/commands/scripts/orpo.py
trl/commands/scripts/ppo.py
trl/commands/scripts/ppo_multi_adapter.py
trl/commands/scripts/reward_modeling.py
trl/commands/scripts/sft.py
trl/commands/scripts/vsft_llava.py
trl/commands/scripts/xpo.py
trl/commands/scripts/config/default_chat_config.yaml
trl/environment/__init__.py
trl/environment/base_environment.py
trl/extras/__init__.py
trl/extras/best_of_n_sampler.py
trl/extras/dataset_formatting.py
trl/models/__init__.py
trl/models/auxiliary_modules.py
trl/models/modeling_base.py
trl/models/modeling_sd_base.py
trl/models/modeling_value_head.py
trl/models/sd_utils.py
trl/models/utils.py
trl/trainer/__init__.py
trl/trainer/alignprop_config.py
trl/trainer/alignprop_trainer.py
trl/trainer/base.py
trl/trainer/bco_config.py
trl/trainer/bco_trainer.py
trl/trainer/callbacks.py
trl/trainer/cpo_config.py
trl/trainer/cpo_trainer.py
trl/trainer/ddpo_config.py
trl/trainer/ddpo_trainer.py
trl/trainer/dpo_config.py
trl/trainer/dpo_trainer.py
trl/trainer/gkd_config.py
trl/trainer/gkd_trainer.py
trl/trainer/iterative_sft_trainer.py
trl/trainer/judges.py
trl/trainer/kto_config.py
trl/trainer/kto_trainer.py
trl/trainer/model_config.py
trl/trainer/nash_md_config.py
trl/trainer/nash_md_trainer.py
trl/trainer/online_dpo_config.py
trl/trainer/online_dpo_trainer.py
trl/trainer/orpo_config.py
trl/trainer/orpo_trainer.py
trl/trainer/ppo_config.py
trl/trainer/ppo_trainer.py
trl/trainer/ppov2_config.py
trl/trainer/ppov2_trainer.py
trl/trainer/reward_config.py
trl/trainer/reward_trainer.py
trl/trainer/rloo_config.py
trl/trainer/rloo_trainer.py
trl/trainer/sft_config.py
trl/trainer/sft_trainer.py
trl/trainer/utils.py
trl/trainer/xpo_config.py
trl/trainer/xpo_trainer.py