.dockerignore
.gitignore
.pre-commit-config.yaml
.python-version
ATTRIBUTIONS.md
CLAUDE.md
CODE_OF_CONDUCT.md
CONTRIBUTING.md
LICENSE
README.md
SECURITY.md
codecov.yml
pyproject.toml
uv.lock
.claude/skills/add-benchmark/SKILL.md
.claude/skills/add-benchmark/references/patterns.md
.github/CODEOWNERS
.github/copy-pr-bot.yaml
.github/ISSUE_TEMPLATE/bug.md
.github/ISSUE_TEMPLATE/feature.md
.github/workflows/_build_container.yml
.github/workflows/build-docs.yml
.github/workflows/build-test-publish-wheel.yml
.github/workflows/cherry-pick-release-commit.yml
.github/workflows/close-inactive-issue-pr.yml
.github/workflows/code-linting.yml
.github/workflows/copyright-check.yml
.github/workflows/release-freeze.yml
.github/workflows/release.yaml
.github/workflows/secrets-detector.yml
.github/workflows/unit-tests.yml
.github/workflows/config/.secrets.baseline
cache/.gitignore
cache/nemo_gym.egg-info/PKG-INFO
cache/nemo_gym.egg-info/SOURCES.txt
cache/nemo_gym.egg-info/dependency_links.txt
cache/nemo_gym.egg-info/entry_points.txt
cache/nemo_gym.egg-info/requires.txt
cache/nemo_gym.egg-info/top_level.txt
data/.gitignore
docs/Makefile
docs/autodoc2_docstrings_parser.py
docs/conf.py
docs/index.md
docs/project.json
docs/versions1.json
docs/_images/product_overview.svg
docs/about/ecosystem.md
docs/about/index.md
docs/about/concepts/architecture.md
docs/about/concepts/configuration.md
docs/about/concepts/core-components.md
docs/about/concepts/index.md
docs/about/concepts/key-terminology.md
docs/about/concepts/task-verification.md
docs/about/concepts/training-approaches.md
docs/agent-server/index.md
docs/contribute/development-setup.md
docs/contribute/index.md
docs/contribute/environments/index.md
docs/contribute/environments/new-environment.md
docs/contribute/rl-framework-integration/generation-backend-and-openai-compatible-http-server.md
docs/contribute/rl-framework-integration/gym-integration-footprint-and-form-factor.md
docs/contribute/rl-framework-integration/gym-rl-framework-integration-success-criteria.md
docs/contribute/rl-framework-integration/index.md
docs/contribute/rl-framework-integration/openai-compatible-http-server-on-policy-correction.md
docs/data/download-huggingface.md
docs/data/index.md
docs/data/prepare-validate.md
docs/environment-tutorials/adding-a-benchmark.md
docs/environment-tutorials/designing-customer-evaluation.md
docs/environment-tutorials/index.md
docs/environment-tutorials/integrate-external-environments.md
docs/environment-tutorials/multi-step-environment.md
docs/environment-tutorials/real-world-data-generation.md
docs/environment-tutorials/real-world-environment.md
docs/environment-tutorials/real-world-implementation.md
docs/environment-tutorials/single-step-environment.md
docs/environment-tutorials/stateful-environment.md
docs/get-started/detailed-setup.md
docs/get-started/index.md
docs/get-started/rollout-collection.md
docs/infrastructure/deployment-topology.md
docs/infrastructure/index.md
docs/infrastructure/engineering-notes/aiohttp-vs-httpx.md
docs/infrastructure/engineering-notes/index.md
docs/infrastructure/engineering-notes/responses-api-evolution.md
docs/infrastructure/engineering-notes/swe-rl-case-study.md
docs/model-recipes/index.md
docs/model-recipes/nemotron-3-nano.md
docs/model-recipes/nemotron-3-super.md
docs/model-server/index.md
docs/model-server/vllm.md
docs/reference/cli-commands.md
docs/reference/configuration.md
docs/reference/faq.md
docs/reference/index.md
docs/reference/rl-framework-compatibility.md
docs/resources-server/index.md
docs/training/index.md
docs/training-tutorials/index.md
docs/training-tutorials/multi-environment-training.md
docs/training-tutorials/offline-training-w-rollouts.md
docs/training-tutorials/unsloth.md
docs/training-tutorials/nemo-rl-grpo/about-workplace-assistant.md
docs/training-tutorials/nemo-rl-grpo/gym-configuration.md
docs/training-tutorials/nemo-rl-grpo/index.md
docs/training-tutorials/nemo-rl-grpo/multi-node-training.md
docs/training-tutorials/nemo-rl-grpo/nemo-rl-configuration.md
docs/training-tutorials/nemo-rl-grpo/setup.md
docs/training-tutorials/nemo-rl-grpo/single-node-training.md
docs/troubleshooting/configuration.md
docs/troubleshooting/index.md
nemo_gym/__init__.py
nemo_gym/base_resources_server.py
nemo_gym/base_responses_api_agent.py
nemo_gym/base_responses_api_model.py
nemo_gym/cli.py
nemo_gym/cli_setup_command.py
nemo_gym/config_types.py
nemo_gym/dataset_orchestrator.py
nemo_gym/gitlab_utils.py
nemo_gym/global_config.py
nemo_gym/hf_utils.py
nemo_gym/openai_utils.py
nemo_gym/package_info.py
nemo_gym/profiling.py
nemo_gym/reward_profile.py
nemo_gym/rollout_collection.py
nemo_gym/server_status.py
nemo_gym/server_utils.py
nemo_gym/train_data_utils.py
resources/resources_server_template.py
resources/resources_server_test_template.py
resources/rl_verifiers_system_design.png
resources_servers/.gitignore
resources_servers/arc_agi/README.md
resources_servers/arc_agi/app.py
resources_servers/arc_agi/create_dataset.py
resources_servers/arc_agi/requirements.txt
resources_servers/arc_agi/configs/arc_agi.yaml
resources_servers/arc_agi/data/example.jsonl
resources_servers/arc_agi/data/example_metrics.json
resources_servers/arc_agi/data/example_prepare.jsonl
resources_servers/arc_agi/data/example_rollouts.jsonl
resources_servers/arc_agi/tests/__init__.py
resources_servers/arc_agi/tests/test_app.py
resources_servers/aviary/README.md
resources_servers/aviary/__init__.py
resources_servers/aviary/app.py
resources_servers/aviary/client_app.py
resources_servers/aviary/gsm8k_app.py
resources_servers/aviary/hotpotqa_app.py
resources_servers/aviary/notebook_app.py
resources_servers/aviary/requirements.txt
resources_servers/aviary/schemas.py
resources_servers/aviary/configs/aviary.yaml
resources_servers/aviary/configs/bixbench_aviary.yaml
resources_servers/aviary/configs/gsm8k_aviary.yaml
resources_servers/aviary/configs/hotpotqa_aviary.yaml
resources_servers/aviary/data/.gitignore
resources_servers/aviary/data/bixbench_example.jsonl
resources_servers/aviary/data/example.jsonl
resources_servers/aviary/data/example_metrics.json
resources_servers/aviary/data/example_rollouts.jsonl
resources_servers/aviary/data/gsm8k_example.jsonl
resources_servers/aviary/data/hotpotqa_example.jsonl
resources_servers/aviary/tests/test_app.py
resources_servers/calendar/README.md
resources_servers/calendar/app.py
resources_servers/calendar/client.py
resources_servers/calendar/create_synth_conversations.py
resources_servers/calendar/dataset_preprocess.py
resources_servers/calendar/generate_rollouts.py
resources_servers/calendar/prompts.py
resources_servers/calendar/requirements.txt
resources_servers/calendar/utils.py
resources_servers/calendar/configs/calendar.yaml
resources_servers/calendar/configs/calendar_v2.yaml
resources_servers/calendar/data/.gitignore
resources_servers/calendar/data/example.jsonl
resources_servers/calendar/data/example_metrics.json
resources_servers/calendar/data/example_rollouts.jsonl
resources_servers/calendar/tests/README.md
resources_servers/calendar/tests/test_app.py
resources_servers/code_gen/README.md
resources_servers/code_gen/analyze_test_cases.py
resources_servers/code_gen/app.py
resources_servers/code_gen/livecodebench_accuracy_test.py
resources_servers/code_gen/livecodebench_accuracy_test_prep.py
resources_servers/code_gen/requirements.txt
resources_servers/code_gen/configs/code_gen.yaml
resources_servers/code_gen/data/.gitignore
resources_servers/code_gen/data/example.jsonl
resources_servers/code_gen/data/example_metrics.json
resources_servers/code_gen/data/example_rollouts.jsonl
resources_servers/code_gen/data/livecodebench_v5_2024-07-01_2025-02-01_validation_metrics.json
resources_servers/code_gen/data/opencodereasoning_filtered_25k_train_metrics.json
resources_servers/code_gen/data/train_metrics.json
resources_servers/code_gen/lcb_integration/README.md
resources_servers/code_gen/lcb_integration/__init__.py
resources_servers/code_gen/lcb_integration/compute_code_generation_metrics.py
resources_servers/code_gen/lcb_integration/extraction_utils.py
resources_servers/code_gen/lcb_integration/lm_styles.py
resources_servers/code_gen/lcb_integration/pass_k_utils.py
resources_servers/code_gen/lcb_integration/testing_util.py
resources_servers/code_gen/scripts/preprocess_train_dataset.py
resources_servers/code_gen/tests/__init__.py
resources_servers/code_gen/tests/test_app.py
resources_servers/equivalence_llm_judge/README.md
resources_servers/equivalence_llm_judge/app.py
resources_servers/equivalence_llm_judge/prepare_sciq.py
resources_servers/equivalence_llm_judge/requirements.txt
resources_servers/equivalence_llm_judge/configs/equivalence_llm_judge.yaml
resources_servers/equivalence_llm_judge/configs/lc_judge.yaml
resources_servers/equivalence_llm_judge/configs/nl2bash-equivalency.yaml
resources_servers/equivalence_llm_judge/data/example.jsonl
resources_servers/equivalence_llm_judge/data/example_metrics.json
resources_servers/equivalence_llm_judge/data/example_metrics_openqa.json
resources_servers/equivalence_llm_judge/data/example_nl2bash.jsonl
resources_servers/equivalence_llm_judge/data/example_openqa.jsonl
resources_servers/equivalence_llm_judge/data/example_openqa_metrics.json
resources_servers/equivalence_llm_judge/data/example_prepare.jsonl
resources_servers/equivalence_llm_judge/data/example_rollouts.jsonl
resources_servers/equivalence_llm_judge/data/example_rollouts_openqa.jsonl
resources_servers/equivalence_llm_judge/prompt_templates/equivalence_llm_judge.txt
resources_servers/equivalence_llm_judge/prompt_templates/lc.txt
resources_servers/equivalence_llm_judge/prompt_templates/lc_judge.txt
resources_servers/equivalence_llm_judge/tests/test_app.py
resources_servers/example_multi_step/README.md
resources_servers/example_multi_step/app.py
resources_servers/example_multi_step/dataset_preprocess.py
resources_servers/example_multi_step/requirements.txt
resources_servers/example_multi_step/train_dataset_config.yaml
resources_servers/example_multi_step/configs/example_multi_step.yaml
resources_servers/example_multi_step/data/.gitignore
resources_servers/example_multi_step/data/example.jsonl
resources_servers/example_multi_step/data/example_metrics.json
resources_servers/example_multi_step/data/example_rollouts.jsonl
resources_servers/example_multi_step/data/train_metrics.json
resources_servers/example_multi_step/data/validation_metrics.json
resources_servers/example_multi_step/tests/__init__.py
resources_servers/example_multi_step/tests/test_app.py
resources_servers/example_session_state_mgmt/README.md
resources_servers/example_session_state_mgmt/app.py
resources_servers/example_session_state_mgmt/client.py
resources_servers/example_session_state_mgmt/create_examples.py
resources_servers/example_session_state_mgmt/requirements.txt
resources_servers/example_session_state_mgmt/configs/example_session_state_mgmt.yaml
resources_servers/example_session_state_mgmt/data/.gitignore
resources_servers/example_session_state_mgmt/data/example.jsonl
resources_servers/example_session_state_mgmt/data/example_metrics.json
resources_servers/example_session_state_mgmt/data/example_rollouts.jsonl
resources_servers/example_session_state_mgmt/tests/test_app.py
resources_servers/example_single_tool_call/README.md
resources_servers/example_single_tool_call/app.py
resources_servers/example_single_tool_call/client.py
resources_servers/example_single_tool_call/create_examples.py
resources_servers/example_single_tool_call/requirements.txt
resources_servers/example_single_tool_call/configs/example_single_tool_call.yaml
resources_servers/example_single_tool_call/data/.gitignore
resources_servers/example_single_tool_call/data/example.jsonl
resources_servers/example_single_tool_call/data/example_metrics.json
resources_servers/example_single_tool_call/data/example_rollouts.jsonl
resources_servers/example_single_tool_call/tests/test_app.py
resources_servers/genrm_compare/README.md
resources_servers/genrm_compare/__init__.py
resources_servers/genrm_compare/app.py
resources_servers/genrm_compare/comparison_strategies.py
resources_servers/genrm_compare/requirements.txt
resources_servers/genrm_compare/utils.py
resources_servers/genrm_compare/configs/genrm_compare.yaml
resources_servers/genrm_compare/data/example.jsonl
resources_servers/genrm_compare/data/example_metrics.json
resources_servers/genrm_compare/data/example_rollouts.jsonl
resources_servers/genrm_compare/tests/__init__.py
resources_servers/genrm_compare/tests/test_app.py
resources_servers/genrm_compare/tests/test_comparison_strategies.py
resources_servers/genrm_compare/tests/test_utils.py
resources_servers/google_search/README.md
resources_servers/google_search/app.py
resources_servers/google_search/client.py
resources_servers/google_search/requirements.txt
resources_servers/google_search/configs/google_search.yaml
resources_servers/google_search/data/.gitignore
resources_servers/google_search/data/example.jsonl
resources_servers/google_search/data/example_metrics.json
resources_servers/google_search/data/example_rollouts.jsonl
resources_servers/google_search/data/train_metrics.json
resources_servers/google_search/tests/test_app.py
resources_servers/instruction_following/README.md
resources_servers/instruction_following/app.py
resources_servers/instruction_following/requirements.txt
resources_servers/instruction_following/configs/instruction_following.yaml
resources_servers/instruction_following/data/.gitignore
resources_servers/instruction_following/data/example.jsonl
resources_servers/instruction_following/data/example_metrics.json
resources_servers/instruction_following/data/example_rollouts.jsonl
resources_servers/instruction_following/data/train_metrics.json
resources_servers/instruction_following/tests/__init__.py
resources_servers/instruction_following/tests/test_app.py
resources_servers/jailbreak_detection/README.md
resources_servers/jailbreak_detection/app.py
resources_servers/jailbreak_detection/requirements.txt
resources_servers/jailbreak_detection/util.py
resources_servers/jailbreak_detection/configs/jailbreak_detection_nemotron_combined_reward_tp8.yaml
resources_servers/jailbreak_detection/data/.gitignore
resources_servers/jailbreak_detection/data/example.jsonl
resources_servers/jailbreak_detection/data/example_metrics.json
resources_servers/jailbreak_detection/data/example_rollouts.jsonl
resources_servers/jailbreak_detection/tests/__init__.py
resources_servers/jailbreak_detection/tests/test_app.py
resources_servers/math_advanced_calculations/README.md
resources_servers/math_advanced_calculations/__init__.py
resources_servers/math_advanced_calculations/app.py
resources_servers/math_advanced_calculations/client.py
resources_servers/math_advanced_calculations/dataset_preprocess.py
resources_servers/math_advanced_calculations/math_advanced_calculations_tools.py
resources_servers/math_advanced_calculations/requirements.txt
resources_servers/math_advanced_calculations/seed_prompt_creation.py
resources_servers/math_advanced_calculations/configs/math_advanced_calculations.yaml
resources_servers/math_advanced_calculations/data/.gitignore
resources_servers/math_advanced_calculations/data/example.jsonl
resources_servers/math_advanced_calculations/data/example_metrics.json
resources_servers/math_advanced_calculations/data/example_rollouts.jsonl
resources_servers/math_advanced_calculations/data/train_metrics.json
resources_servers/math_advanced_calculations/tests/__init__.py
resources_servers/math_advanced_calculations/tests/test_app.py
resources_servers/math_formal_lean/README.md
resources_servers/math_formal_lean/__init__.py
resources_servers/math_formal_lean/app.py
resources_servers/math_formal_lean/prepare_minif2f.py
resources_servers/math_formal_lean/prepare_nemotron_math_proofs.py
resources_servers/math_formal_lean/proof_utils.py
resources_servers/math_formal_lean/requirements.txt
resources_servers/math_formal_lean/sandbox_client.py
resources_servers/math_formal_lean/configs/math_formal_lean.yaml
resources_servers/math_formal_lean/configs/math_formal_lean_multi_turn.yaml
resources_servers/math_formal_lean/configs/nemotron_clean_easy.yaml
resources_servers/math_formal_lean/configs/nemotron_first_try_hard.yaml
resources_servers/math_formal_lean/configs/nemotron_medium_500.yaml
resources_servers/math_formal_lean/configs/nemotron_very_easy.yaml
resources_servers/math_formal_lean/data/example.jsonl
resources_servers/math_formal_lean/data/example_metrics.json
resources_servers/math_formal_lean/data/example_rollouts.jsonl
resources_servers/math_formal_lean/data/minif2f_test.jsonl
resources_servers/math_formal_lean/data/minif2f_valid.jsonl
resources_servers/math_formal_lean/data/multi_turn_full_example.jsonl
resources_servers/math_formal_lean/data/multi_turn_success_examples.jsonl
resources_servers/math_formal_lean/tests/__init__.py
resources_servers/math_formal_lean/tests/test_app.py
resources_servers/math_formal_lean/tests/test_proof_utils.py
resources_servers/math_with_code/README.md
resources_servers/math_with_code/app.py
resources_servers/math_with_code/client.py
resources_servers/math_with_code/requirements.txt
resources_servers/math_with_code/configs/math_with_code.yaml
resources_servers/math_with_code/data/.gitignore
resources_servers/math_with_code/data/example.jsonl
resources_servers/math_with_code/data/example_metrics.json
resources_servers/math_with_code/data/example_rollouts.jsonl
resources_servers/math_with_code/data/train_metrics.json
resources_servers/math_with_code/tests/test_app.py
resources_servers/math_with_judge/README.md
resources_servers/math_with_judge/app.py
resources_servers/math_with_judge/client.py
resources_servers/math_with_judge/filter_for_mixed_rewards.py
resources_servers/math_with_judge/prepare_aime24.py
resources_servers/math_with_judge/prepare_bytedtsinghua_dapo17k_aime24.py
resources_servers/math_with_judge/prepare_dapo17k.py
resources_servers/math_with_judge/requirements.txt
resources_servers/math_with_judge/configs/dapo17k.yaml
resources_servers/math_with_judge/configs/math_stack_overflow.yaml
resources_servers/math_with_judge/configs/math_with_judge.yaml
resources_servers/math_with_judge/configs/math_with_local_judge.yaml
resources_servers/math_with_judge/data/.gitignore
resources_servers/math_with_judge/data/OpenMathReasoning_aime24_validation_metrics.json
resources_servers/math_with_judge/data/OpenMathReasoning_train_metrics.json
resources_servers/math_with_judge/data/aime24_bytedtsinghua_validation_metrics.json
resources_servers/math_with_judge/data/aime24_validation_metrics.json
resources_servers/math_with_judge/data/dapo17k_bytedtsinghua_train_metrics.json
resources_servers/math_with_judge/data/dapo17k_train_metrics.json
resources_servers/math_with_judge/data/example.jsonl
resources_servers/math_with_judge/data/example_metrics.json
resources_servers/math_with_judge/data/example_rollouts.jsonl
resources_servers/math_with_judge/data/math_stack_overflow_train_metrics.json
resources_servers/math_with_judge/data/train_metrics.json
resources_servers/math_with_judge/tests/test_app.py
resources_servers/mcqa/README.md
resources_servers/mcqa/app.py
resources_servers/mcqa/dataset_preprocess.py
resources_servers/mcqa/requirements.txt
resources_servers/mcqa/configs/mcqa.yaml
resources_servers/mcqa/data/.gitignore
resources_servers/mcqa/data/example.jsonl
resources_servers/mcqa/data/example_metrics.json
resources_servers/mcqa/data/example_rollouts.jsonl
resources_servers/mcqa/data/example_rollouts_with_template_metadata.jsonl
resources_servers/mcqa/data/example_with_template_metadata.jsonl
resources_servers/mcqa/data/example_with_template_metadata_metrics.json
resources_servers/mcqa/data/train_metrics.json
resources_servers/mcqa/tests/__init__.py
resources_servers/mcqa/tests/test_app.py
resources_servers/mini_swe_agent/README.md
resources_servers/mini_swe_agent/app.py
resources_servers/mini_swe_agent/dataset_preprocess.py
resources_servers/mini_swe_agent/requirements.txt
resources_servers/mini_swe_agent/configs/mini_swe_agent.yaml
resources_servers/mini_swe_agent/data/.gitignore
resources_servers/mini_swe_agent/data/example.jsonl
resources_servers/mini_swe_agent/data/example_metrics.json
resources_servers/mini_swe_agent/data/example_rollouts.jsonl
resources_servers/mini_swe_agent/tests/test_app.py
resources_servers/multichallenge/.gitignore
resources_servers/multichallenge/README.md
resources_servers/multichallenge/app.py
resources_servers/multichallenge/dataset_preprocess.py
resources_servers/multichallenge/requirements.txt
resources_servers/multichallenge/configs/multichallenge.yaml
resources_servers/multichallenge/configs/multichallenge_nrl.yaml
resources_servers/multichallenge/data/.gitignore
resources_servers/multichallenge/data/README.md
resources_servers/multichallenge/data/example.jsonl
resources_servers/multichallenge/data/example_metrics.json
resources_servers/multichallenge/data/example_rollouts.jsonl
resources_servers/multichallenge/tests/__init__.py
resources_servers/multichallenge/tests/test_multichallenge.py
resources_servers/newton_bench/README.md
resources_servers/newton_bench/app.py
resources_servers/newton_bench/client.py
resources_servers/newton_bench/generate_dataset.py
resources_servers/newton_bench/requirements.txt
resources_servers/newton_bench/setup_newton_bench.py
resources_servers/newton_bench/configs/newton_bench.yaml
resources_servers/newton_bench/data/example.jsonl
resources_servers/newton_bench/data/example_metrics.json
resources_servers/newton_bench/data/example_rollouts.jsonl
resources_servers/newton_bench/newton_bench_utils/prompt_utils.py
resources_servers/newton_bench/newton_bench_utils/sandbox.py
resources_servers/newton_bench/newton_bench_utils/schemas.py
resources_servers/newton_bench/tests/conftest.py
resources_servers/newton_bench/tests/test_app.py
resources_servers/ns_tools/README.md
resources_servers/ns_tools/app.py
resources_servers/ns_tools/prepare_dataset.py
resources_servers/ns_tools/requirements.txt
resources_servers/ns_tools/configs/ns_tools.yaml
resources_servers/ns_tools/data/compmath_test.jsonl
resources_servers/ns_tools/data/example.jsonl
resources_servers/ns_tools/data/example_metrics.json
resources_servers/ns_tools/data/example_rollouts.jsonl
resources_servers/ns_tools/tests/__init__.py
resources_servers/ns_tools/tests/test_app.py
resources_servers/over_refusal_detection/README.md
resources_servers/over_refusal_detection/app.py
resources_servers/over_refusal_detection/requirements.txt
resources_servers/over_refusal_detection/util.py
resources_servers/over_refusal_detection/configs/over_refusal_detection.yaml
resources_servers/over_refusal_detection/configs/over_refusal_detection_nemotron.yaml
resources_servers/over_refusal_detection/configs/over_refusal_detection_nemotron_tp8.yaml
resources_servers/over_refusal_detection/data/.gitignore
resources_servers/over_refusal_detection/data/example.jsonl
resources_servers/over_refusal_detection/data/example_metrics.json
resources_servers/over_refusal_detection/data/example_rollouts.jsonl
resources_servers/over_refusal_detection/tests/__init__.py
resources_servers/over_refusal_detection/tests/test_app.py
resources_servers/reasoning_gym/README.md
resources_servers/reasoning_gym/app.py
resources_servers/reasoning_gym/requirements.txt
resources_servers/reasoning_gym/configs/reasoning_gym.yaml
resources_servers/reasoning_gym/configs/resources_only.yaml
resources_servers/reasoning_gym/data/example.jsonl
resources_servers/reasoning_gym/data/example_metrics.json
resources_servers/reasoning_gym/data/example_rollouts.jsonl
resources_servers/reasoning_gym/scripts/create_dataset.py
resources_servers/reasoning_gym/tests/test_app.py
resources_servers/single_step_tool_use_with_argument_comparison/README.md
resources_servers/single_step_tool_use_with_argument_comparison/__init__.py
resources_servers/single_step_tool_use_with_argument_comparison/app.py
resources_servers/single_step_tool_use_with_argument_comparison/requirements.txt
resources_servers/single_step_tool_use_with_argument_comparison/common/__init__.py
resources_servers/single_step_tool_use_with_argument_comparison/common/response_utils.py
resources_servers/single_step_tool_use_with_argument_comparison/common/verification_utils.py
resources_servers/single_step_tool_use_with_argument_comparison/configs/search_pivot_single_step_tool_use_with_argument_comparison.yaml
resources_servers/single_step_tool_use_with_argument_comparison/configs/single_step_tool_use_with_argument_comparison.yaml
resources_servers/single_step_tool_use_with_argument_comparison/configs/swe_pivot_single_step_tool_use_with_argument_comparison.yaml
resources_servers/single_step_tool_use_with_argument_comparison/configs/toolcall_schema_single_step_tool_use_with_argument_comparison.yaml
resources_servers/single_step_tool_use_with_argument_comparison/data/.gitignore
resources_servers/single_step_tool_use_with_argument_comparison/data/example.jsonl
resources_servers/single_step_tool_use_with_argument_comparison/data/example_metrics.json
resources_servers/single_step_tool_use_with_argument_comparison/data/example_rollouts.jsonl
resources_servers/single_step_tool_use_with_argument_comparison/tests/__init__.py
resources_servers/single_step_tool_use_with_argument_comparison/tests/test_app.py
resources_servers/single_step_tool_use_with_argument_comparison/tests/common/__init__.py
resources_servers/single_step_tool_use_with_argument_comparison/tests/common/test_response_utils.py
resources_servers/single_step_tool_use_with_argument_comparison/tests/common/test_verification_utils.py
resources_servers/structured_outputs/README.md
resources_servers/structured_outputs/app.py
resources_servers/structured_outputs/client.py
resources_servers/structured_outputs/requirements.txt
resources_servers/structured_outputs/configs/structured_outputs_json.yaml
resources_servers/structured_outputs/data/.gitignore
resources_servers/structured_outputs/data/example.jsonl
resources_servers/structured_outputs/data/example_metrics.json
resources_servers/structured_outputs/data/example_rollouts.jsonl
resources_servers/structured_outputs/data/structured_outputs_251027_nano_v3_sdg_json_train_metrics.json
resources_servers/structured_outputs/data/structured_outputs_251027_nano_v3_sdg_json_val_metrics.json
resources_servers/structured_outputs/misc/data_generation/.gitignore
resources_servers/structured_outputs/misc/data_generation/251027_nano_v3_sdg.py
resources_servers/structured_outputs/tests/__init__.py
resources_servers/structured_outputs/tests/test_app.py
resources_servers/swerl_gen/README.md
resources_servers/swerl_gen/app.py
resources_servers/swerl_gen/dataset_preprocess.py
resources_servers/swerl_gen/gen_eval_scripts.py
resources_servers/swerl_gen/prompts.py
resources_servers/swerl_gen/requirements.txt
resources_servers/swerl_gen/utils.py
resources_servers/swerl_gen/configs/swerl_gen.yaml
resources_servers/swerl_gen/data/.gitignore
resources_servers/swerl_gen/data/example.jsonl
resources_servers/swerl_gen/data/example_metrics.json
resources_servers/swerl_gen/data/example_rollouts.jsonl
resources_servers/swerl_gen/eval/__init__.py
resources_servers/swerl_gen/eval/eval_instance.py
resources_servers/swerl_gen/eval/process_patch.py
resources_servers/swerl_gen/eval/reward_functions.py
resources_servers/swerl_gen/eval/singularity_utils.py
resources_servers/swerl_gen/tests/__init__.py
resources_servers/swerl_gen/tests/test_app.py
resources_servers/swerl_llm_judge/README.md
resources_servers/swerl_llm_judge/app.py
resources_servers/swerl_llm_judge/dataset_preprocess.py
resources_servers/swerl_llm_judge/example.jsonl
resources_servers/swerl_llm_judge/example_metrics.json
resources_servers/swerl_llm_judge/prompts.py
resources_servers/swerl_llm_judge/requirements.txt
resources_servers/swerl_llm_judge/utils.py
resources_servers/swerl_llm_judge/configs/swerl_llm_judge.yaml
resources_servers/swerl_llm_judge/data/example.jsonl
resources_servers/swerl_llm_judge/data/example_metrics.json
resources_servers/swerl_llm_judge/data/example_rollouts.jsonl
resources_servers/swerl_llm_judge/tests/__init__.py
resources_servers/swerl_llm_judge/tests/test_app.py
resources_servers/tavily_search/README.md
resources_servers/tavily_search/app.py
resources_servers/tavily_search/client.py
resources_servers/tavily_search/judge_prompt.py
resources_servers/tavily_search/requirements.txt
resources_servers/tavily_search/configs/tavily_search_judge_openai_model.yaml
resources_servers/tavily_search/configs/tavily_search_judge_vllm_model.yaml
resources_servers/tavily_search/data/.gitignore
resources_servers/tavily_search/data/example.jsonl
resources_servers/tavily_search/data/example_metrics.json
resources_servers/tavily_search/data/example_rollouts.jsonl
resources_servers/tavily_search/data/preprocess_dataset/convert_simple_evals.py
resources_servers/tavily_search/data/preprocess_dataset/upload_datasets.sh
resources_servers/tavily_search/tests/dummy_exclude_domains_file.json
resources_servers/tavily_search/tests/test_app.py
resources_servers/terminus_judge/README.md
resources_servers/terminus_judge/app.py
resources_servers/terminus_judge/requirements.txt
resources_servers/terminus_judge/schemas.py
resources_servers/terminus_judge/configs/terminus_judge.yaml
resources_servers/terminus_judge/configs/terminus_judge_simple.yaml
resources_servers/terminus_judge/data/.gitignore
resources_servers/terminus_judge/data/example.jsonl
resources_servers/terminus_judge/data/example_metrics.json
resources_servers/terminus_judge/data/example_rollouts.jsonl
resources_servers/terminus_judge/prompt_templates/rubrics_v4.txt
resources_servers/terminus_judge/prompt_templates/terminus_prompt.txt
resources_servers/terminus_judge/tests/test_app.py
resources_servers/text_to_sql/README.md
resources_servers/text_to_sql/app.py
resources_servers/text_to_sql/prompts.py
resources_servers/text_to_sql/requirements.txt
resources_servers/text_to_sql/configs/text_to_sql.yaml
resources_servers/text_to_sql/data/.gitignore
resources_servers/text_to_sql/data/example.jsonl
resources_servers/text_to_sql/data/example_metrics.json
resources_servers/text_to_sql/data/example_rollouts.jsonl
resources_servers/text_to_sql/tests/__init__.py
resources_servers/text_to_sql/tests/test_app.py
resources_servers/workplace_assistant/README.md
resources_servers/workplace_assistant/app.py
resources_servers/workplace_assistant/client.py
resources_servers/workplace_assistant/dataset_preprocess.py
resources_servers/workplace_assistant/requirements.txt
resources_servers/workplace_assistant/utils.py
resources_servers/workplace_assistant/configs/workplace_assistant.yaml
resources_servers/workplace_assistant/csv_data/processed/analytics_data.csv
resources_servers/workplace_assistant/csv_data/processed/calendar_events.csv
resources_servers/workplace_assistant/csv_data/processed/customer_relationship_manager_data.csv
resources_servers/workplace_assistant/csv_data/processed/emails.csv
resources_servers/workplace_assistant/csv_data/processed/project_tasks.csv
resources_servers/workplace_assistant/csv_data/raw/email_addresses.csv
resources_servers/workplace_assistant/csv_data/raw/events.csv
resources_servers/workplace_assistant/data/.gitignore
resources_servers/workplace_assistant/data/example.jsonl
resources_servers/workplace_assistant/data/example_metrics.json
resources_servers/workplace_assistant/data/example_rollouts.jsonl
resources_servers/workplace_assistant/data/train_metrics.json
resources_servers/workplace_assistant/data/validation_metrics.json
resources_servers/workplace_assistant/notebooks/synthetic-data-generation/README.md
resources_servers/workplace_assistant/notebooks/synthetic-data-generation/multistep-toolcalling-sdg.ipynb
resources_servers/workplace_assistant/notebooks/synthetic-data-generation/requirements.txt
resources_servers/workplace_assistant/notebooks/synthetic-data-generation/tools/analytics.json
resources_servers/workplace_assistant/notebooks/synthetic-data-generation/tools/calendar.json
resources_servers/workplace_assistant/notebooks/synthetic-data-generation/tools/company_directory.json
resources_servers/workplace_assistant/notebooks/synthetic-data-generation/tools/customer_relationship_manager.json
resources_servers/workplace_assistant/notebooks/synthetic-data-generation/tools/email.json
resources_servers/workplace_assistant/notebooks/synthetic-data-generation/tools/environment.json
resources_servers/workplace_assistant/notebooks/synthetic-data-generation/tools/project_management.json
resources_servers/workplace_assistant/notebooks/synthetic-data-generation/utils/__init__.py
resources_servers/workplace_assistant/notebooks/synthetic-data-generation/utils/convert_to_nemo_gym_format.py
resources_servers/workplace_assistant/notebooks/synthetic-data-generation/utils/quality_filtering.py
resources_servers/workplace_assistant/tests/test_app.py
resources_servers/workplace_assistant/workplace_assistant_tools/analytics.py
resources_servers/workplace_assistant/workplace_assistant_tools/calendar.py
resources_servers/workplace_assistant/workplace_assistant_tools/company_directory.py
resources_servers/workplace_assistant/workplace_assistant_tools/customer_relationship_manager.py
resources_servers/workplace_assistant/workplace_assistant_tools/email.py
resources_servers/workplace_assistant/workplace_assistant_tools/project_management.py
resources_servers/xlam_fc/README.md
resources_servers/xlam_fc/app.py
resources_servers/xlam_fc/generate_dataset.py
resources_servers/xlam_fc/requirements.txt
resources_servers/xlam_fc/configs/xlam_fc.yaml
resources_servers/xlam_fc/data/example.jsonl
resources_servers/xlam_fc/data/example_metrics.json
resources_servers/xlam_fc/data/example_rollouts.jsonl
resources_servers/xlam_fc/tests/__init__.py
resources_servers/xlam_fc/tests/test_app.py
responses_api_agents/aviary_agent/README.md
responses_api_agents/aviary_agent/app.py
responses_api_agents/aviary_agent/requirements.txt
responses_api_agents/aviary_agent/tests/test_app.py
responses_api_agents/mini_swe_agent/.gitignore
responses_api_agents/mini_swe_agent/README.md
responses_api_agents/mini_swe_agent/__init__.py
responses_api_agents/mini_swe_agent/app.py
responses_api_agents/mini_swe_agent/client.py
responses_api_agents/mini_swe_agent/requirements.txt
responses_api_agents/mini_swe_agent/utils.py
responses_api_agents/mini_swe_agent/assets/miniswe_qwen_coder.png
responses_api_agents/mini_swe_agent/configs/mini_swe_agent.yaml
responses_api_agents/mini_swe_agent/tests/test_app.py
responses_api_agents/proof_refinement_agent/README.md
responses_api_agents/proof_refinement_agent/__init__.py
responses_api_agents/proof_refinement_agent/app.py
responses_api_agents/proof_refinement_agent/requirements.txt
responses_api_agents/proof_refinement_agent/configs/proof_refinement_agent.yaml
responses_api_agents/proof_refinement_agent/tests/test_app.py
responses_api_agents/simple_agent/README.md
responses_api_agents/simple_agent/__init__.py
responses_api_agents/simple_agent/app.py
responses_api_agents/simple_agent/client.py
responses_api_agents/simple_agent/requirements.txt
responses_api_agents/simple_agent/configs/simple_agent.yaml
responses_api_agents/simple_agent/tests/test_app.py
responses_api_agents/swe_agents/.gitignore
responses_api_agents/swe_agents/README.md
responses_api_agents/swe_agents/__init__.py
responses_api_agents/swe_agents/app.py
responses_api_agents/swe_agents/client.py
responses_api_agents/swe_agents/requirements.txt
responses_api_agents/swe_agents/run_openhands.py
responses_api_agents/swe_agents/utils.py
responses_api_agents/swe_agents/configs/__init__.py
responses_api_agents/swe_agents/configs/oh_config.toml
responses_api_agents/swe_agents/configs/swe_agent_config.yaml
responses_api_agents/swe_agents/configs/swe_agent_tools_openai_format.json
responses_api_agents/swe_agents/configs/swebench_openhands.yaml
responses_api_agents/swe_agents/configs/swebench_openhands_training.yaml
responses_api_agents/swe_agents/configs/swebench_swe_agent.yaml
responses_api_agents/swe_agents/data/.gitignore
responses_api_agents/swe_agents/data/example.jsonl
responses_api_agents/swe_agents/data/example_metrics.json
responses_api_agents/swe_agents/prompts/long_horizon.j2
responses_api_agents/swe_agents/prompts/system_prompt_1.j2
responses_api_agents/swe_agents/prompts/system_prompt_2.j2
responses_api_agents/swe_agents/prompts/system_prompt_3.j2
responses_api_agents/swe_agents/prompts/user_prompt_1.j2
responses_api_agents/swe_agents/prompts/user_prompt_2.j2
responses_api_agents/swe_agents/prompts/user_prompt_3.j2
responses_api_agents/swe_agents/tests/__init__.py
responses_api_agents/swe_agents/tests/test_app.py
responses_api_agents/swe_agents/tests/test_run_openhands.py
responses_api_agents/swe_agents/tests/test_utils.py
responses_api_agents/tool_simulation_agent/README.md
responses_api_agents/tool_simulation_agent/__init__.py
responses_api_agents/tool_simulation_agent/app.py
responses_api_agents/tool_simulation_agent/requirements.txt
responses_api_agents/tool_simulation_agent/configs/tool_simulation_agent.yaml
responses_api_agents/tool_simulation_agent/tests/__init__.py
responses_api_agents/tool_simulation_agent/tests/test_app.py
responses_api_agents/verifiers_agent/README.md
responses_api_agents/verifiers_agent/__init__.py
responses_api_agents/verifiers_agent/app.py
responses_api_agents/verifiers_agent/requirements.txt
responses_api_agents/verifiers_agent/configs/acereason-math.yaml
responses_api_agents/verifiers_agent/data/acereason-math-example.jsonl
responses_api_agents/verifiers_agent/scripts/create_dataset.py
responses_api_agents/verifiers_agent/tests/__init__.py
responses_api_agents/verifiers_agent/tests/test_app.py
responses_api_models/azure_openai_model/README.md
responses_api_models/azure_openai_model/app.py
responses_api_models/azure_openai_model/client.py
responses_api_models/azure_openai_model/requirements.txt
responses_api_models/azure_openai_model/configs/azure_openai_model.yaml
responses_api_models/azure_openai_model/tests/test_app.py
responses_api_models/genrm_model/README.md
responses_api_models/genrm_model/__init__.py
responses_api_models/genrm_model/app.py
responses_api_models/genrm_model/pyproject.toml
responses_api_models/genrm_model/setup.py
responses_api_models/genrm_model/configs/genrm_model.yaml
responses_api_models/genrm_model/tests/__init__.py
responses_api_models/genrm_model/tests/test_app.py
responses_api_models/local_vllm_model/README.md
responses_api_models/local_vllm_model/__init__.py
responses_api_models/local_vllm_model/app.py
responses_api_models/local_vllm_model/pyproject.toml
responses_api_models/local_vllm_model/setup.py
responses_api_models/local_vllm_model/configs/Qwen/Qwen3-235B-A22B-Instruct-2507-FP8.yaml
responses_api_models/local_vllm_model/configs/Qwen/Qwen3-30B-A3B-Instruct-2507.yaml
responses_api_models/local_vllm_model/configs/Qwen/Qwen3-30B-A3B-Thinking-2507.yaml
responses_api_models/local_vllm_model/configs/Qwen/Qwen3.5-122B-A10B.yaml
responses_api_models/local_vllm_model/configs/Qwen/Qwen3.5-27B.yaml
responses_api_models/local_vllm_model/configs/Qwen/Qwen3.5-35B-A3B.yaml
responses_api_models/local_vllm_model/configs/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.yaml
responses_api_models/local_vllm_model/configs/openai/gpt-oss-120b-reasoning-high.yaml
responses_api_models/local_vllm_model/configs/openai/gpt-oss-120b-reasoning-low.yaml
responses_api_models/local_vllm_model/configs/openai/gpt-oss-120b-reasoning-medium.yaml
responses_api_models/local_vllm_model/configs/openai/gpt-oss-20b-reasoning-high.yaml
responses_api_models/local_vllm_model/scripts/launch_vllm_server.sh
responses_api_models/local_vllm_model/test_scripts/1_node/1_instance_1x8.sh
responses_api_models/local_vllm_model/test_scripts/1_node/1_instance_2x4.sh
responses_api_models/local_vllm_model/test_scripts/1_node/2_instances_1x4.sh
responses_api_models/local_vllm_model/test_scripts/2_nodes/1_instance_1x16.sh
responses_api_models/local_vllm_model/test_scripts/2_nodes/2_instances_1x8.sh
responses_api_models/local_vllm_model/test_scripts/2_nodes/2_instances_2x4.sh
responses_api_models/local_vllm_model/test_scripts/4_nodes/1_instance_2x16.sh
responses_api_models/local_vllm_model/test_scripts/4_nodes/2_instances_1x16.sh
responses_api_models/local_vllm_model/test_scripts/8_nodes/2_instances_2x16.sh
responses_api_models/local_vllm_model/tests/__init__.py
responses_api_models/local_vllm_model/tests/test_app.py
responses_api_models/openai_model/README.md
responses_api_models/openai_model/__init__.py
responses_api_models/openai_model/app.py
responses_api_models/openai_model/client.py
responses_api_models/openai_model/requirements.txt
responses_api_models/openai_model/configs/openai_model.yaml
responses_api_models/openai_model/configs/OpenAI/gpt-4.1-2025-04-14.yaml
responses_api_models/openai_model/configs/OpenAI/gpt-5-2025-08-07.yaml
responses_api_models/openai_model/configs/OpenAI/gpt-5-nano-2025-08-07.yaml
responses_api_models/openai_model/tests/test_app.py
responses_api_models/vllm_model/README.md
responses_api_models/vllm_model/__init__.py
responses_api_models/vllm_model/app.py
responses_api_models/vllm_model/client.py
responses_api_models/vllm_model/pyproject.toml
responses_api_models/vllm_model/configs/vllm_model.yaml
responses_api_models/vllm_model/configs/vllm_model_for_training.yaml
responses_api_models/vllm_model/tests/__init__.py
responses_api_models/vllm_model/tests/round_trip_test_data.json
responses_api_models/vllm_model/tests/test_app.py
results/.gitignore
scripts/add_verified_flag.py
scripts/print_aggregate_results.py
scripts/update_resource_servers.py
tests/__init__.py
tests/conftest.py
tests/functional_tests/L2_Functional_Tests_GPU.sh
tests/functional_tests/__init__.py
tests/unit_tests/__init__.py
tests/unit_tests/test_base_resources_server.py
tests/unit_tests/test_base_responses_api_agent.py
tests/unit_tests/test_base_responses_api_model.py
tests/unit_tests/test_cli.py
tests/unit_tests/test_cli_setup_command.py
tests/unit_tests/test_config_types_help.py
tests/unit_tests/test_dataset_orchestrator.py
tests/unit_tests/test_gitlab_utils.py
tests/unit_tests/test_global_config.py
tests/unit_tests/test_hf_utils.py
tests/unit_tests/test_openai_utils.py
tests/unit_tests/test_placeholder.py
tests/unit_tests/test_profiling.py
tests/unit_tests/test_reward_profile.py
tests/unit_tests/test_rollout_collection.py
tests/unit_tests/test_server_status.py
tests/unit_tests/test_server_utils.py
tests/unit_tests/test_train_data_utils.py