LICENSE
README.md
pyproject.toml
setup.py
./scripts/__init__.py
./scripts/converter_hf_to_mcore.py
./scripts/diagnose.py
./scripts/init_random_model.py
./scripts/legacy_model_merger.py
./scripts/megatron_merge_lora.py
./scripts/print_cfg.py
./scripts/rollout_viewer.py
./tests/__init__.py
./tests/test_base_config_on_cpu.py
./tests/test_protocol_on_cpu.py
./tests/test_protocol_v2_on_cpu.py
./tests/checkpoint_engine/__init__.py
./tests/checkpoint_engine/test_correctness_on_gpu.py
./tests/checkpoint_engine/test_correctness_on_npu.py
./tests/checkpoint_engine/test_special_server_adapter.py
./tests/checkpoint_engine/test_utils.py
./tests/interactions/__init__.py
./tests/interactions/test_gsm8k_interaction.py
./tests/interactions/test_interaction_registry.py
./tests/single_controller/__init__.py
./tests/single_controller/test_auto_padding_on_cpu.py
./tests/single_controller/test_colocated_workers.py
./tests/single_controller/test_colocated_workers_fused.py
./tests/single_controller/test_data_transfer.py
./tests/single_controller/test_decorator_on_cpu.py
./tests/single_controller/test_device_mesh_register.py
./tests/single_controller/test_driverfunc_to_worker.py
./tests/single_controller/test_fused_workers_on_cpu.py
./tests/single_controller/test_get_set_dispatch_collect_cpu.py
./tests/single_controller/test_high_level_scheduling_api.py
./tests/single_controller/test_nested_worker.py
./tests/single_controller/test_ray_collectives.py
./tests/single_controller/test_ray_local_envs_on_cpu.py
./tests/single_controller/test_ray_utils_on_cpu.py
./tests/single_controller/test_rvdz.py
./tests/single_controller/test_split_resource_pool.py
./tests/single_controller/test_worker_group_basics.py
./tests/single_controller/test_worker_group_torch.py
./tests/special_e2e/__init__.py
./tests/special_e2e/check_custom_rwd_fn.py
./tests/special_e2e/check_results.py
./tests/special_e2e/envs/__init__.py
./tests/special_e2e/envs/digit_completion/__init__.py
./tests/special_e2e/envs/digit_completion/task.py
./tests/special_e2e/envs/digit_completion/tokenizer.py
./tests/trainer/__init__.py
./tests/trainer/config/__init__.py
./tests/trainer/config/test_algo_config_on_cpu.py
./tests/trainer/config/test_legacy_config_on_cpu.py
./tests/trainer/ppo/__init__.py
./tests/trainer/ppo/test_core_algos_on_cpu.py
./tests/trainer/ppo/test_metric_utils_on_cpu.py
./tests/trainer/ppo/test_rollout_corr.py
./tests/trainer/ppo/test_rollout_corr_integration.py
./verl/__init__.py
./verl/base_config.py
./verl/protocol.py
./verl/py.typed
./verl/checkpoint_engine/__init__.py
./verl/checkpoint_engine/base.py
./verl/checkpoint_engine/hccl_checkpoint_engine.py
./verl/checkpoint_engine/kimi_checkpoint_engine.py
./verl/checkpoint_engine/mooncake_checkpoint_engine.py
./verl/checkpoint_engine/nccl_checkpoint_engine.py
./verl/checkpoint_engine/nixl_checkpoint_engine.py
./verl/experimental/__init__.py
./verl/experimental/agent_loop/__init__.py
./verl/experimental/agent_loop/agent_loop.py
./verl/experimental/agent_loop/prometheus_utils.py
./verl/experimental/agent_loop/single_turn_agent_loop.py
./verl/experimental/agent_loop/tool_agent_loop.py
./verl/experimental/agent_loop/tool_parser.py
./verl/experimental/agent_loop/utils.py
./verl/experimental/dataset/__init__.py
./verl/experimental/dataset/sampler.py
./verl/experimental/dynamic_dataset/__init__.py
./verl/experimental/dynamic_dataset/dynamicgen_dataset.py
./verl/experimental/fully_async_policy/config/fully_async_ppo_megatron_trainer.yaml
./verl/experimental/fully_async_policy/config/fully_async_ppo_trainer.yaml
./verl/experimental/one_step_off_policy/config/one_step_off_ppo_megatron_trainer.yaml
./verl/experimental/one_step_off_policy/config/one_step_off_ppo_trainer.yaml
./verl/experimental/reward_loop/__init__.py
./verl/experimental/reward_loop/reward_loop.py
./verl/experimental/reward_loop/reward_model.py
./verl/experimental/reward_loop/reward_manager/__init__.py
./verl/experimental/reward_loop/reward_manager/base.py
./verl/experimental/reward_loop/reward_manager/dapo.py
./verl/experimental/reward_loop/reward_manager/gdpo.py
./verl/experimental/reward_loop/reward_manager/limited.py
./verl/experimental/reward_loop/reward_manager/naive.py
./verl/experimental/reward_loop/reward_manager/registry.py
./verl/experimental/reward_loop/reward_manager/remote.py
./verl/experimental/separation/__init__.py
./verl/experimental/separation/engine_workers.py
./verl/experimental/separation/ray_trainer.py
./verl/experimental/separation/utils.py
./verl/experimental/vla/config/rob_ppo_trainer.yaml
./verl/experimental/vla/config/rob_sac_trainer.yaml
./verl/interactions/__init__.py
./verl/interactions/base.py
./verl/interactions/gsm8k_interaction.py
./verl/interactions/weather_interaction.py
./verl/interactions/utils/__init__.py
./verl/interactions/utils/interaction_registry.py
./verl/model_merger/__init__.py
./verl/model_merger/__main__.py
./verl/model_merger/base_model_merger.py
./verl/model_merger/fsdp_model_merger.py
./verl/model_merger/megatron_model_merger.py
./verl/models/__init__.py
./verl/models/registry.py
./verl/models/weight_loader_registry.py
./verl/models/llama/__init__.py
./verl/models/llama/megatron/__init__.py
./verl/models/llama/megatron/modeling_llama_megatron.py
./verl/models/llama/megatron/checkpoint_utils/__init__.py
./verl/models/llama/megatron/checkpoint_utils/llama_loader.py
./verl/models/llama/megatron/checkpoint_utils/llama_loader_depracated.py
./verl/models/llama/megatron/checkpoint_utils/llama_saver.py
./verl/models/llama/megatron/layers/__init__.py
./verl/models/llama/megatron/layers/parallel_attention.py
./verl/models/llama/megatron/layers/parallel_decoder.py
./verl/models/llama/megatron/layers/parallel_linear.py
./verl/models/llama/megatron/layers/parallel_mlp.py
./verl/models/llama/megatron/layers/parallel_rmsnorm.py
./verl/models/mcore/__init__.py
./verl/models/mcore/bridge.py
./verl/models/mcore/config_converter.py
./verl/models/mcore/loader.py
./verl/models/mcore/mbridge.py
./verl/models/mcore/model_forward.py
./verl/models/mcore/model_forward_1f1b_overlap.py
./verl/models/mcore/model_forward_fused.py
./verl/models/mcore/model_initializer.py
./verl/models/mcore/mtp_patch.py
./verl/models/mcore/patch.py
./verl/models/mcore/registry.py
./verl/models/mcore/saver.py
./verl/models/mcore/util.py
./verl/models/mcore/weight_converter.py
./verl/models/mcore/qwen2_5_vl/__init__.py
./verl/models/mcore/qwen2_5_vl/attention.py
./verl/models/mcore/qwen2_5_vl/model.py
./verl/models/mcore/qwen2_5_vl/rope_utils.py
./verl/models/mcore/qwen2_5_vl/vision_config.py
./verl/models/mcore/qwen2_5_vl/vision_model.py
./verl/models/mcore/qwen2_5_vl/vision_transformer_block.py
./verl/models/qwen2/__init__.py
./verl/models/qwen2/megatron/__init__.py
./verl/models/qwen2/megatron/modeling_qwen2_megatron.py
./verl/models/qwen2/megatron/checkpoint_utils/__init__.py
./verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py
./verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py
./verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py
./verl/models/qwen2/megatron/layers/__init__.py
./verl/models/qwen2/megatron/layers/parallel_attention.py
./verl/models/qwen2/megatron/layers/parallel_decoder.py
./verl/models/qwen2/megatron/layers/parallel_linear.py
./verl/models/qwen2/megatron/layers/parallel_mlp.py
./verl/models/qwen2/megatron/layers/parallel_rmsnorm.py
./verl/models/transformers/__init__.py
./verl/models/transformers/apertus.py
./verl/models/transformers/dense_common.py
./verl/models/transformers/glm4v.py
./verl/models/transformers/kimi_vl.py
./verl/models/transformers/llama.py
./verl/models/transformers/monkey_patch.py
./verl/models/transformers/npu_patch.py
./verl/models/transformers/qwen2.py
./verl/models/transformers/qwen2_vl.py
./verl/models/transformers/qwen3_vl.py
./verl/models/transformers/tiled_mlp.py
./verl/single_controller/__init__.py
./verl/single_controller/base/__init__.py
./verl/single_controller/base/decorator.py
./verl/single_controller/base/worker.py
./verl/single_controller/base/worker_group.py
./verl/single_controller/ray/__init__.py
./verl/single_controller/ray/base.py
./verl/third_party/__init__.py
./verl/third_party/torch/__init__.py
./verl/third_party/torch/distributed/__init__.py
./verl/third_party/torch/distributed/_state_dict_utils.py
./verl/third_party/torch/distributed/checkpoint/__init__.py
./verl/third_party/torch/distributed/checkpoint/state_dict.py
./verl/third_party/vllm/__init__.py
./verl/tools/__init__.py
./verl/tools/base_tool.py
./verl/tools/geo3k_tool.py
./verl/tools/gsm8k_tool.py
./verl/tools/image_zoom_in_tool.py
./verl/tools/mcp_base_tool.py
./verl/tools/mcp_search_tool.py
./verl/tools/sandbox_fusion_tools.py
./verl/tools/schemas.py
./verl/tools/search_tool.py
./verl/tools/utils/__init__.py
./verl/tools/utils/search_r1_like_utils.py
./verl/tools/utils/tool_registry.py
./verl/trainer/__init__.py
./verl/trainer/constants_ppo.py
./verl/trainer/main_eval.py
./verl/trainer/main_generation_server.py
./verl/trainer/main_ppo.py
./verl/trainer/sft_trainer.py
./verl/trainer/sft_trainer_ray.py
./verl/trainer/config/__init__.py
./verl/trainer/config/_generated_ppo_megatron_trainer.yaml
./verl/trainer/config/_generated_ppo_torchtitan_trainer.yaml
./verl/trainer/config/_generated_ppo_trainer.yaml
./verl/trainer/config/_generated_ppo_veomni_trainer.yaml
./verl/trainer/config/algorithm.py
./verl/trainer/config/config.py
./verl/trainer/config/evaluation.yaml
./verl/trainer/config/legacy_reward_impl.yaml
./verl/trainer/config/ppo_megatron_trainer.yaml
./verl/trainer/config/ppo_trainer.yaml
./verl/trainer/config/sft_trainer_engine.yaml
./verl/trainer/config/actor/actor.yaml
./verl/trainer/config/actor/dp_actor.yaml
./verl/trainer/config/actor/megatron_actor.yaml
./verl/trainer/config/actor/torchtitan_actor.yaml
./verl/trainer/config/actor/veomni_actor.yaml
./verl/trainer/config/algorithm/rollout_correction.yaml
./verl/trainer/config/critic/critic.yaml
./verl/trainer/config/critic/dp_critic.yaml
./verl/trainer/config/critic/megatron_critic.yaml
./verl/trainer/config/critic/torchtitan_critic.yaml
./verl/trainer/config/critic/veomni_critic.yaml
./verl/trainer/config/data/legacy_data.yaml
./verl/trainer/config/engine/fsdp.yaml
./verl/trainer/config/engine/megatron.yaml
./verl/trainer/config/engine/torchtitan.yaml
./verl/trainer/config/engine/veomni.yaml
./verl/trainer/config/model/hf_model.yaml
./verl/trainer/config/model_engine/dp.yaml
./verl/trainer/config/model_engine/torchtitan.yaml
./verl/trainer/config/model_engine/veomni.yaml
./verl/trainer/config/npu_profile/npu_profile.yaml
./verl/trainer/config/optim/fsdp.yaml
./verl/trainer/config/optim/megatron.yaml
./verl/trainer/config/optim/torchtitan.yaml
./verl/trainer/config/optim/veomni.yaml
./verl/trainer/config/profiler/profiler.yaml
./verl/trainer/config/ref/dp_ref.yaml
./verl/trainer/config/ref/megatron_ref.yaml
./verl/trainer/config/ref/ref.yaml
./verl/trainer/config/ref/torchtitan_ref.yaml
./verl/trainer/config/ref/veomni_ref.yaml
./verl/trainer/config/reward/reward.yaml
./verl/trainer/config/rollout/rollout.yaml
./verl/trainer/ppo/__init__.py
./verl/trainer/ppo/core_algos.py
./verl/trainer/ppo/metric_utils.py
./verl/trainer/ppo/prefix_grouper_utils.py
./verl/trainer/ppo/ray_trainer.py
./verl/trainer/ppo/reward.py
./verl/trainer/ppo/rollout_corr_helper.py
./verl/trainer/ppo/utils.py
./verl/utils/__init__.py
./verl/utils/activation_offload.py
./verl/utils/attention_utils.py
./verl/utils/chat_template.py
./verl/utils/config.py
./verl/utils/device.py
./verl/utils/distributed.py
./verl/utils/flops_counter.py
./verl/utils/fp8_utils.py
./verl/utils/fs.py
./verl/utils/fsdp_utils.py
./verl/utils/groupwise.py
./verl/utils/hdfs_io.py
./verl/utils/import_utils.py
./verl/utils/logging_utils.py
./verl/utils/megatron_peft_utils.py
./verl/utils/megatron_utils.py
./verl/utils/memory_utils.py
./verl/utils/model.py
./verl/utils/net_utils.py
./verl/utils/npu_flash_attn_utils.py
./verl/utils/py_functional.py
./verl/utils/ray_utils.py
./verl/utils/rollout_skip.py
./verl/utils/rollout_trace.py
./verl/utils/seqlen_balancing.py
./verl/utils/tensordict_utils.py
./verl/utils/tokenizer.py
./verl/utils/torch_dtypes.py
./verl/utils/torch_functional.py
./verl/utils/tracking.py
./verl/utils/transformers_compat.py
./verl/utils/ulysses.py
./verl/utils/checkpoint/__init__.py
./verl/utils/checkpoint/checkpoint_handler.py
./verl/utils/checkpoint/checkpoint_manager.py
./verl/utils/checkpoint/fsdp_checkpoint_manager.py
./verl/utils/checkpoint/megatron_checkpoint_manager.py
./verl/utils/dataset/__init__.py
./verl/utils/dataset/dataset_utils.py
./verl/utils/dataset/multiturn_sft_dataset.py
./verl/utils/dataset/rl_dataset.py
./verl/utils/dataset/rm_dataset.py
./verl/utils/dataset/vision_utils.py
./verl/utils/debug/__init__.py
./verl/utils/debug/metrics.py
./verl/utils/debug/performance.py
./verl/utils/debug/trajectory_tracker.py
./verl/utils/experimental/__init__.py
./verl/utils/experimental/torch_functional.py
./verl/utils/kernel/__init__.py
./verl/utils/kernel/fp8_kernel.py
./verl/utils/kernel/kernels.py
./verl/utils/kernel/linear_cross_entropy.py
./verl/utils/logger/__init__.py
./verl/utils/logger/aggregate_logger.py
./verl/utils/megatron/__init__.py
./verl/utils/megatron/dist_checkpointing.py
./verl/utils/megatron/memory.py
./verl/utils/megatron/optimizer.py
./verl/utils/megatron/pipeline_parallel.py
./verl/utils/megatron/router_replay_patch.py
./verl/utils/megatron/router_replay_utils.py
./verl/utils/megatron/sequence_parallel.py
./verl/utils/megatron/tensor_parallel.py
./verl/utils/metric/__init__.py
./verl/utils/metric/utils.py
./verl/utils/profiler/__init__.py
./verl/utils/profiler/config.py
./verl/utils/profiler/empty_annotations.py
./verl/utils/profiler/mstx_profile.py
./verl/utils/profiler/nvtx_profile.py
./verl/utils/profiler/performance.py
./verl/utils/profiler/profile.py
./verl/utils/profiler/torch_profile.py
./verl/utils/qat/__init__.py
./verl/utils/qat/core.py
./verl/utils/qat/linear.py
./verl/utils/qat/quantizer.py
./verl/utils/qat/vllm_patch.py
./verl/utils/rendezvous/__init__.py
./verl/utils/rendezvous/ray_backend.py
./verl/utils/reward_score/__init__.py
./verl/utils/reward_score/geo3k.py
./verl/utils/reward_score/gsm8k.py
./verl/utils/reward_score/math_batch.py
./verl/utils/reward_score/math_dapo.py
./verl/utils/reward_score/math_reward.py
./verl/utils/reward_score/math_verify.py
./verl/utils/reward_score/rlla.py
./verl/utils/reward_score/search_r1_like_qa_em.py
./verl/utils/reward_score/prime_code/__init__.py
./verl/utils/reward_score/prime_code/testing_util.py
./verl/utils/reward_score/prime_code/utils.py
./verl/utils/reward_score/prime_math/__init__.py
./verl/utils/reward_score/prime_math/grader.py
./verl/utils/reward_score/prime_math/math_normalize.py
./verl/utils/reward_score/sandbox_fusion/__init__.py
./verl/utils/reward_score/sandbox_fusion/utils.py
./verl/utils/vllm/__init__.py
./verl/utils/vllm/patch.py
./verl/utils/vllm/utils.py
./verl/utils/vllm/vllm_fp8_utils.py
./verl/version/version
./verl/workers/__init__.py
./verl/workers/engine_workers.py
./verl/workers/fsdp_workers.py
./verl/workers/megatron_workers.py
./verl/workers/actor/__init__.py
./verl/workers/actor/base.py
./verl/workers/actor/dp_actor.py
./verl/workers/actor/megatron_actor.py
./verl/workers/config/__init__.py
./verl/workers/config/actor.py
./verl/workers/config/critic.py
./verl/workers/config/engine.py
./verl/workers/config/megatron_peft.py
./verl/workers/config/model.py
./verl/workers/config/optimizer.py
./verl/workers/config/reward.py
./verl/workers/config/rollout.py
./verl/workers/critic/__init__.py
./verl/workers/critic/base.py
./verl/workers/critic/dp_critic.py
./verl/workers/critic/megatron_critic.py
./verl/workers/engine/__init__.py
./verl/workers/engine/base.py
./verl/workers/engine/utils.py
./verl/workers/engine/fsdp/__init__.py
./verl/workers/engine/fsdp/transformer_impl.py
./verl/workers/engine/fsdp/utils.py
./verl/workers/engine/megatron/__init__.py
./verl/workers/engine/megatron/transformer_impl.py
./verl/workers/engine/megatron/utils.py
./verl/workers/engine/mindspeed/__init__.py
./verl/workers/engine/mindspeed/transformer_impl.py
./verl/workers/engine/torchtitan/__init__.py
./verl/workers/engine/torchtitan/transformer_impl.py
./verl/workers/engine/torchtitan/utils.py
./verl/workers/engine/veomni/__init__.py
./verl/workers/engine/veomni/transformer_impl.py
./verl/workers/engine/veomni/utils.py
./verl/workers/reward_manager/__init__.py
./verl/workers/reward_manager/abstract.py
./verl/workers/reward_manager/batch.py
./verl/workers/reward_manager/dapo.py
./verl/workers/reward_manager/naive.py
./verl/workers/reward_manager/prime.py
./verl/workers/reward_manager/registry.py
./verl/workers/rollout/__init__.py
./verl/workers/rollout/base.py
./verl/workers/rollout/hf_rollout.py
./verl/workers/rollout/replica.py
./verl/workers/rollout/schemas.py
./verl/workers/rollout/tokenizer.py
./verl/workers/rollout/utils.py
./verl/workers/rollout/naive/__init__.py
./verl/workers/rollout/naive/naive_rollout.py
./verl/workers/rollout/sglang_rollout/__init__.py
./verl/workers/rollout/sglang_rollout/async_sglang_server.py
./verl/workers/rollout/sglang_rollout/http_server_engine.py
./verl/workers/rollout/sglang_rollout/sglang_rollout.py
./verl/workers/rollout/sglang_rollout/utils.py
./verl/workers/rollout/vllm_rollout/__init__.py
./verl/workers/rollout/vllm_rollout/bucketed_weight_transfer.py
./verl/workers/rollout/vllm_rollout/utils.py
./verl/workers/rollout/vllm_rollout/vllm_async_server.py
./verl/workers/rollout/vllm_rollout/vllm_rollout.py
./verl/workers/sharding_manager/__init__.py
./verl/workers/sharding_manager/base.py
./verl/workers/sharding_manager/fsdp_ulysses.py
./verl/workers/utils/__init__.py
./verl/workers/utils/losses.py
./verl/workers/utils/padding.py
scripts/__init__.py
scripts/converter_hf_to_mcore.py
scripts/diagnose.py
scripts/init_random_model.py
scripts/legacy_model_merger.py
scripts/megatron_merge_lora.py
scripts/print_cfg.py
scripts/rollout_viewer.py
tests/__init__.py
tests/test_base_config_on_cpu.py
tests/test_protocol_on_cpu.py
tests/test_protocol_v2_on_cpu.py
tests/checkpoint_engine/__init__.py
tests/checkpoint_engine/test_correctness_on_gpu.py
tests/checkpoint_engine/test_correctness_on_npu.py
tests/checkpoint_engine/test_special_server_adapter.py
tests/checkpoint_engine/test_utils.py
tests/interactions/__init__.py
tests/interactions/test_gsm8k_interaction.py
tests/interactions/test_interaction_registry.py
tests/single_controller/__init__.py
tests/single_controller/test_auto_padding_on_cpu.py
tests/single_controller/test_colocated_workers.py
tests/single_controller/test_colocated_workers_fused.py
tests/single_controller/test_data_transfer.py
tests/single_controller/test_decorator_on_cpu.py
tests/single_controller/test_device_mesh_register.py
tests/single_controller/test_driverfunc_to_worker.py
tests/single_controller/test_fused_workers_on_cpu.py
tests/single_controller/test_get_set_dispatch_collect_cpu.py
tests/single_controller/test_high_level_scheduling_api.py
tests/single_controller/test_nested_worker.py
tests/single_controller/test_ray_collectives.py
tests/single_controller/test_ray_local_envs_on_cpu.py
tests/single_controller/test_ray_utils_on_cpu.py
tests/single_controller/test_rvdz.py
tests/single_controller/test_split_resource_pool.py
tests/single_controller/test_worker_group_basics.py
tests/single_controller/test_worker_group_torch.py
tests/special_e2e/__init__.py
tests/special_e2e/check_custom_rwd_fn.py
tests/special_e2e/check_results.py
tests/special_e2e/envs/__init__.py
tests/special_e2e/envs/digit_completion/__init__.py
tests/special_e2e/envs/digit_completion/task.py
tests/special_e2e/envs/digit_completion/tokenizer.py
tests/trainer/__init__.py
tests/trainer/config/__init__.py
tests/trainer/config/test_algo_config_on_cpu.py
tests/trainer/config/test_legacy_config_on_cpu.py
tests/trainer/ppo/__init__.py
tests/trainer/ppo/test_core_algos_on_cpu.py
tests/trainer/ppo/test_metric_utils_on_cpu.py
tests/trainer/ppo/test_rollout_corr.py
tests/trainer/ppo/test_rollout_corr_integration.py
verl/__init__.py
verl/base_config.py
verl/protocol.py
verl/py.typed
verl.egg-info/PKG-INFO
verl.egg-info/SOURCES.txt
verl.egg-info/dependency_links.txt
verl.egg-info/requires.txt
verl.egg-info/top_level.txt
verl/checkpoint_engine/__init__.py
verl/checkpoint_engine/base.py
verl/checkpoint_engine/hccl_checkpoint_engine.py
verl/checkpoint_engine/kimi_checkpoint_engine.py
verl/checkpoint_engine/mooncake_checkpoint_engine.py
verl/checkpoint_engine/nccl_checkpoint_engine.py
verl/checkpoint_engine/nixl_checkpoint_engine.py
verl/experimental/__init__.py
verl/experimental/agent_loop/__init__.py
verl/experimental/agent_loop/agent_loop.py
verl/experimental/agent_loop/prometheus_utils.py
verl/experimental/agent_loop/single_turn_agent_loop.py
verl/experimental/agent_loop/tool_agent_loop.py
verl/experimental/agent_loop/tool_parser.py
verl/experimental/agent_loop/utils.py
verl/experimental/dataset/__init__.py
verl/experimental/dataset/sampler.py
verl/experimental/dynamic_dataset/__init__.py
verl/experimental/dynamic_dataset/dynamicgen_dataset.py
verl/experimental/fully_async_policy/config/fully_async_ppo_megatron_trainer.yaml
verl/experimental/fully_async_policy/config/fully_async_ppo_trainer.yaml
verl/experimental/one_step_off_policy/config/one_step_off_ppo_megatron_trainer.yaml
verl/experimental/one_step_off_policy/config/one_step_off_ppo_trainer.yaml
verl/experimental/reward_loop/__init__.py
verl/experimental/reward_loop/reward_loop.py
verl/experimental/reward_loop/reward_model.py
verl/experimental/reward_loop/reward_manager/__init__.py
verl/experimental/reward_loop/reward_manager/base.py
verl/experimental/reward_loop/reward_manager/dapo.py
verl/experimental/reward_loop/reward_manager/gdpo.py
verl/experimental/reward_loop/reward_manager/limited.py
verl/experimental/reward_loop/reward_manager/naive.py
verl/experimental/reward_loop/reward_manager/registry.py
verl/experimental/reward_loop/reward_manager/remote.py
verl/experimental/separation/__init__.py
verl/experimental/separation/engine_workers.py
verl/experimental/separation/ray_trainer.py
verl/experimental/separation/utils.py
verl/experimental/vla/config/rob_ppo_trainer.yaml
verl/experimental/vla/config/rob_sac_trainer.yaml
verl/interactions/__init__.py
verl/interactions/base.py
verl/interactions/gsm8k_interaction.py
verl/interactions/weather_interaction.py
verl/interactions/utils/__init__.py
verl/interactions/utils/interaction_registry.py
verl/model_merger/__init__.py
verl/model_merger/__main__.py
verl/model_merger/base_model_merger.py
verl/model_merger/fsdp_model_merger.py
verl/model_merger/megatron_model_merger.py
verl/models/__init__.py
verl/models/registry.py
verl/models/weight_loader_registry.py
verl/models/llama/__init__.py
verl/models/llama/megatron/__init__.py
verl/models/llama/megatron/modeling_llama_megatron.py
verl/models/llama/megatron/checkpoint_utils/__init__.py
verl/models/llama/megatron/checkpoint_utils/llama_loader.py
verl/models/llama/megatron/checkpoint_utils/llama_loader_depracated.py
verl/models/llama/megatron/checkpoint_utils/llama_saver.py
verl/models/llama/megatron/layers/__init__.py
verl/models/llama/megatron/layers/parallel_attention.py
verl/models/llama/megatron/layers/parallel_decoder.py
verl/models/llama/megatron/layers/parallel_linear.py
verl/models/llama/megatron/layers/parallel_mlp.py
verl/models/llama/megatron/layers/parallel_rmsnorm.py
verl/models/mcore/__init__.py
verl/models/mcore/bridge.py
verl/models/mcore/config_converter.py
verl/models/mcore/loader.py
verl/models/mcore/mbridge.py
verl/models/mcore/model_forward.py
verl/models/mcore/model_forward_1f1b_overlap.py
verl/models/mcore/model_forward_fused.py
verl/models/mcore/model_initializer.py
verl/models/mcore/mtp_patch.py
verl/models/mcore/patch.py
verl/models/mcore/registry.py
verl/models/mcore/saver.py
verl/models/mcore/util.py
verl/models/mcore/weight_converter.py
verl/models/mcore/qwen2_5_vl/__init__.py
verl/models/mcore/qwen2_5_vl/attention.py
verl/models/mcore/qwen2_5_vl/model.py
verl/models/mcore/qwen2_5_vl/rope_utils.py
verl/models/mcore/qwen2_5_vl/vision_config.py
verl/models/mcore/qwen2_5_vl/vision_model.py
verl/models/mcore/qwen2_5_vl/vision_transformer_block.py
verl/models/qwen2/__init__.py
verl/models/qwen2/megatron/__init__.py
verl/models/qwen2/megatron/modeling_qwen2_megatron.py
verl/models/qwen2/megatron/checkpoint_utils/__init__.py
verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py
verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py
verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py
verl/models/qwen2/megatron/layers/__init__.py
verl/models/qwen2/megatron/layers/parallel_attention.py
verl/models/qwen2/megatron/layers/parallel_decoder.py
verl/models/qwen2/megatron/layers/parallel_linear.py
verl/models/qwen2/megatron/layers/parallel_mlp.py
verl/models/qwen2/megatron/layers/parallel_rmsnorm.py
verl/models/transformers/__init__.py
verl/models/transformers/apertus.py
verl/models/transformers/dense_common.py
verl/models/transformers/glm4v.py
verl/models/transformers/kimi_vl.py
verl/models/transformers/llama.py
verl/models/transformers/monkey_patch.py
verl/models/transformers/npu_patch.py
verl/models/transformers/qwen2.py
verl/models/transformers/qwen2_vl.py
verl/models/transformers/qwen3_vl.py
verl/models/transformers/tiled_mlp.py
verl/single_controller/__init__.py
verl/single_controller/base/__init__.py
verl/single_controller/base/decorator.py
verl/single_controller/base/worker.py
verl/single_controller/base/worker_group.py
verl/single_controller/ray/__init__.py
verl/single_controller/ray/base.py
verl/third_party/__init__.py
verl/third_party/torch/__init__.py
verl/third_party/torch/distributed/__init__.py
verl/third_party/torch/distributed/_state_dict_utils.py
verl/third_party/torch/distributed/checkpoint/__init__.py
verl/third_party/torch/distributed/checkpoint/state_dict.py
verl/third_party/vllm/__init__.py
verl/tools/__init__.py
verl/tools/base_tool.py
verl/tools/geo3k_tool.py
verl/tools/gsm8k_tool.py
verl/tools/image_zoom_in_tool.py
verl/tools/mcp_base_tool.py
verl/tools/mcp_search_tool.py
verl/tools/sandbox_fusion_tools.py
verl/tools/schemas.py
verl/tools/search_tool.py
verl/tools/utils/__init__.py
verl/tools/utils/search_r1_like_utils.py
verl/tools/utils/tool_registry.py
verl/trainer/__init__.py
verl/trainer/constants_ppo.py
verl/trainer/main_eval.py
verl/trainer/main_generation_server.py
verl/trainer/main_ppo.py
verl/trainer/sft_trainer.py
verl/trainer/sft_trainer_ray.py
verl/trainer/config/__init__.py
verl/trainer/config/_generated_ppo_megatron_trainer.yaml
verl/trainer/config/_generated_ppo_torchtitan_trainer.yaml
verl/trainer/config/_generated_ppo_trainer.yaml
verl/trainer/config/_generated_ppo_veomni_trainer.yaml
verl/trainer/config/algorithm.py
verl/trainer/config/config.py
verl/trainer/config/evaluation.yaml
verl/trainer/config/legacy_reward_impl.yaml
verl/trainer/config/ppo_megatron_trainer.yaml
verl/trainer/config/ppo_trainer.yaml
verl/trainer/config/sft_trainer_engine.yaml
verl/trainer/config/actor/actor.yaml
verl/trainer/config/actor/dp_actor.yaml
verl/trainer/config/actor/megatron_actor.yaml
verl/trainer/config/actor/torchtitan_actor.yaml
verl/trainer/config/actor/veomni_actor.yaml
verl/trainer/config/algorithm/rollout_correction.yaml
verl/trainer/config/critic/critic.yaml
verl/trainer/config/critic/dp_critic.yaml
verl/trainer/config/critic/megatron_critic.yaml
verl/trainer/config/critic/torchtitan_critic.yaml
verl/trainer/config/critic/veomni_critic.yaml
verl/trainer/config/data/legacy_data.yaml
verl/trainer/config/engine/fsdp.yaml
verl/trainer/config/engine/megatron.yaml
verl/trainer/config/engine/torchtitan.yaml
verl/trainer/config/engine/veomni.yaml
verl/trainer/config/model/hf_model.yaml
verl/trainer/config/model_engine/dp.yaml
verl/trainer/config/model_engine/torchtitan.yaml
verl/trainer/config/model_engine/veomni.yaml
verl/trainer/config/npu_profile/npu_profile.yaml
verl/trainer/config/optim/fsdp.yaml
verl/trainer/config/optim/megatron.yaml
verl/trainer/config/optim/torchtitan.yaml
verl/trainer/config/optim/veomni.yaml
verl/trainer/config/profiler/profiler.yaml
verl/trainer/config/ref/dp_ref.yaml
verl/trainer/config/ref/megatron_ref.yaml
verl/trainer/config/ref/ref.yaml
verl/trainer/config/ref/torchtitan_ref.yaml
verl/trainer/config/ref/veomni_ref.yaml
verl/trainer/config/reward/reward.yaml
verl/trainer/config/rollout/rollout.yaml
verl/trainer/ppo/__init__.py
verl/trainer/ppo/core_algos.py
verl/trainer/ppo/metric_utils.py
verl/trainer/ppo/prefix_grouper_utils.py
verl/trainer/ppo/ray_trainer.py
verl/trainer/ppo/reward.py
verl/trainer/ppo/rollout_corr_helper.py
verl/trainer/ppo/utils.py
verl/utils/__init__.py
verl/utils/activation_offload.py
verl/utils/attention_utils.py
verl/utils/chat_template.py
verl/utils/config.py
verl/utils/device.py
verl/utils/distributed.py
verl/utils/flops_counter.py
verl/utils/fp8_utils.py
verl/utils/fs.py
verl/utils/fsdp_utils.py
verl/utils/groupwise.py
verl/utils/hdfs_io.py
verl/utils/import_utils.py
verl/utils/logging_utils.py
verl/utils/megatron_peft_utils.py
verl/utils/megatron_utils.py
verl/utils/memory_utils.py
verl/utils/model.py
verl/utils/net_utils.py
verl/utils/npu_flash_attn_utils.py
verl/utils/py_functional.py
verl/utils/ray_utils.py
verl/utils/rollout_skip.py
verl/utils/rollout_trace.py
verl/utils/seqlen_balancing.py
verl/utils/tensordict_utils.py
verl/utils/tokenizer.py
verl/utils/torch_dtypes.py
verl/utils/torch_functional.py
verl/utils/tracking.py
verl/utils/transformers_compat.py
verl/utils/ulysses.py
verl/utils/checkpoint/__init__.py
verl/utils/checkpoint/checkpoint_handler.py
verl/utils/checkpoint/checkpoint_manager.py
verl/utils/checkpoint/fsdp_checkpoint_manager.py
verl/utils/checkpoint/megatron_checkpoint_manager.py
verl/utils/dataset/__init__.py
verl/utils/dataset/dataset_utils.py
verl/utils/dataset/multiturn_sft_dataset.py
verl/utils/dataset/rl_dataset.py
verl/utils/dataset/rm_dataset.py
verl/utils/dataset/vision_utils.py
verl/utils/debug/__init__.py
verl/utils/debug/metrics.py
verl/utils/debug/performance.py
verl/utils/debug/trajectory_tracker.py
verl/utils/experimental/__init__.py
verl/utils/experimental/torch_functional.py
verl/utils/kernel/__init__.py
verl/utils/kernel/fp8_kernel.py
verl/utils/kernel/kernels.py
verl/utils/kernel/linear_cross_entropy.py
verl/utils/logger/__init__.py
verl/utils/logger/aggregate_logger.py
verl/utils/megatron/__init__.py
verl/utils/megatron/dist_checkpointing.py
verl/utils/megatron/memory.py
verl/utils/megatron/optimizer.py
verl/utils/megatron/pipeline_parallel.py
verl/utils/megatron/router_replay_patch.py
verl/utils/megatron/router_replay_utils.py
verl/utils/megatron/sequence_parallel.py
verl/utils/megatron/tensor_parallel.py
verl/utils/metric/__init__.py
verl/utils/metric/utils.py
verl/utils/profiler/__init__.py
verl/utils/profiler/config.py
verl/utils/profiler/empty_annotations.py
verl/utils/profiler/mstx_profile.py
verl/utils/profiler/nvtx_profile.py
verl/utils/profiler/performance.py
verl/utils/profiler/profile.py
verl/utils/profiler/torch_profile.py
verl/utils/qat/__init__.py
verl/utils/qat/core.py
verl/utils/qat/linear.py
verl/utils/qat/quantizer.py
verl/utils/qat/vllm_patch.py
verl/utils/rendezvous/__init__.py
verl/utils/rendezvous/ray_backend.py
verl/utils/reward_score/__init__.py
verl/utils/reward_score/geo3k.py
verl/utils/reward_score/gsm8k.py
verl/utils/reward_score/math_batch.py
verl/utils/reward_score/math_dapo.py
verl/utils/reward_score/math_reward.py
verl/utils/reward_score/math_verify.py
verl/utils/reward_score/rlla.py
verl/utils/reward_score/search_r1_like_qa_em.py
verl/utils/reward_score/prime_code/__init__.py
verl/utils/reward_score/prime_code/testing_util.py
verl/utils/reward_score/prime_code/utils.py
verl/utils/reward_score/prime_math/__init__.py
verl/utils/reward_score/prime_math/grader.py
verl/utils/reward_score/prime_math/math_normalize.py
verl/utils/reward_score/sandbox_fusion/__init__.py
verl/utils/reward_score/sandbox_fusion/utils.py
verl/utils/vllm/__init__.py
verl/utils/vllm/patch.py
verl/utils/vllm/utils.py
verl/utils/vllm/vllm_fp8_utils.py
verl/version/version
verl/workers/__init__.py
verl/workers/engine_workers.py
verl/workers/fsdp_workers.py
verl/workers/megatron_workers.py
verl/workers/actor/__init__.py
verl/workers/actor/base.py
verl/workers/actor/dp_actor.py
verl/workers/actor/megatron_actor.py
verl/workers/config/__init__.py
verl/workers/config/actor.py
verl/workers/config/critic.py
verl/workers/config/engine.py
verl/workers/config/megatron_peft.py
verl/workers/config/model.py
verl/workers/config/optimizer.py
verl/workers/config/reward.py
verl/workers/config/rollout.py
verl/workers/critic/__init__.py
verl/workers/critic/base.py
verl/workers/critic/dp_critic.py
verl/workers/critic/megatron_critic.py
verl/workers/engine/__init__.py
verl/workers/engine/base.py
verl/workers/engine/utils.py
verl/workers/engine/fsdp/__init__.py
verl/workers/engine/fsdp/transformer_impl.py
verl/workers/engine/fsdp/utils.py
verl/workers/engine/megatron/__init__.py
verl/workers/engine/megatron/transformer_impl.py
verl/workers/engine/megatron/utils.py
verl/workers/engine/mindspeed/__init__.py
verl/workers/engine/mindspeed/transformer_impl.py
verl/workers/engine/torchtitan/__init__.py
verl/workers/engine/torchtitan/transformer_impl.py
verl/workers/engine/torchtitan/utils.py
verl/workers/engine/veomni/__init__.py
verl/workers/engine/veomni/transformer_impl.py
verl/workers/engine/veomni/utils.py
verl/workers/reward_manager/__init__.py
verl/workers/reward_manager/abstract.py
verl/workers/reward_manager/batch.py
verl/workers/reward_manager/dapo.py
verl/workers/reward_manager/naive.py
verl/workers/reward_manager/prime.py
verl/workers/reward_manager/registry.py
verl/workers/rollout/__init__.py
verl/workers/rollout/base.py
verl/workers/rollout/hf_rollout.py
verl/workers/rollout/replica.py
verl/workers/rollout/schemas.py
verl/workers/rollout/tokenizer.py
verl/workers/rollout/utils.py
verl/workers/rollout/naive/__init__.py
verl/workers/rollout/naive/naive_rollout.py
verl/workers/rollout/sglang_rollout/__init__.py
verl/workers/rollout/sglang_rollout/async_sglang_server.py
verl/workers/rollout/sglang_rollout/http_server_engine.py
verl/workers/rollout/sglang_rollout/sglang_rollout.py
verl/workers/rollout/sglang_rollout/utils.py
verl/workers/rollout/vllm_rollout/__init__.py
verl/workers/rollout/vllm_rollout/bucketed_weight_transfer.py
verl/workers/rollout/vllm_rollout/utils.py
verl/workers/rollout/vllm_rollout/vllm_async_server.py
verl/workers/rollout/vllm_rollout/vllm_rollout.py
verl/workers/sharding_manager/__init__.py
verl/workers/sharding_manager/base.py
verl/workers/sharding_manager/fsdp_ulysses.py
verl/workers/utils/__init__.py
verl/workers/utils/losses.py
verl/workers/utils/padding.py