.gitignore
.markdownlint.json
.pre-commit-config.yaml
CITATION.cff
CODE_OF_CONDUCT.md
CONTRIBUTING.md
Dockerfile
LICENSE
Makefile
README.md
STYLE_GUIDE.md
pyproject.toml
.github/pull_request_template.md
.github/ISSUE_TEMPLATE/bug-report.yaml
.github/ISSUE_TEMPLATE/config.yml
.github/ISSUE_TEMPLATE/feature-request.yaml
.github/workflows/doctests.yaml
.github/workflows/gpu_tests.yaml
.github/workflows/pretest.yaml
.github/workflows/release_gcp.yaml
.github/workflows/release_pypi.yaml
.vscode/launch.json
.vscode/settings.json
configs/README.md
configs/apis/anthropic/infer_claude3_7.yaml
configs/examples/README.md
configs/examples/bulk_inference/README.md
configs/examples/bulk_inference/gcp_job.yaml
configs/examples/fineweb_ablation_pretraining/README.md
configs/examples/fineweb_ablation_pretraining/ddp/gcp_job.yaml
configs/examples/fineweb_ablation_pretraining/ddp/polaris_job.yaml
configs/examples/fineweb_ablation_pretraining/ddp/train.yaml
configs/examples/fineweb_ablation_pretraining/fsdp/gcp_job.yaml
configs/examples/fineweb_ablation_pretraining/fsdp/polaris_job.yaml
configs/examples/fineweb_ablation_pretraining/fsdp/train.yaml
configs/examples/grpo_tldr/gcp_job.yaml
configs/examples/grpo_tldr/train.yaml
configs/examples/misc/README.md
configs/examples/misc/dev_gcp_job.yaml
configs/examples/misc/hello_world_gcp_job.yaml
configs/examples/misc/hello_world_polaris_job.yaml
configs/examples/misc/sky_init.sh
configs/examples/misc/tulu3_sft_mini.yaml
configs/examples/misc/vllm_polaris_job.yaml
configs/projects/README.md
configs/projects/aya/README.md
configs/projects/aya/evaluation/eval.yaml
configs/projects/aya/evaluation/gcp_job.yaml
configs/projects/aya/sft/gcp_job.yaml
configs/projects/aya/sft/train.yaml
configs/projects/chatqa/README.md
configs/projects/chatqa/chatqa_stage1_train.yaml
configs/projects/chatqa/chatqa_stage2_train.yaml
configs/projects/chatqa/gcp_job.yaml
configs/projects/coalm/405b_train.yaml
configs/projects/coalm/70b_infer.yaml
configs/projects/coalm/70b_train.yaml
configs/projects/coalm/8b_infer.yaml
configs/projects/coalm/8b_train.yaml
configs/projects/coalm/README.md
configs/projects/coalm/images/dataset.png
configs/projects/coalm/images/results.png
configs/projects/wc50m/README.md
configs/projects/wc50m/configs/base_ultrachat.yaml
configs/projects/wc50m/configs/gcp_base_ultrachat.yaml
configs/projects/wc50m/results/baseline.csv
configs/projects/wc50m/results/oumi.csv
configs/recipes/README.md
configs/recipes/deepseek_r1/README.md
configs/recipes/deepseek_r1/evaluation/distill_llama_70b/eval.yaml
configs/recipes/deepseek_r1/evaluation/distill_llama_70b/gcp_job.yaml
configs/recipes/deepseek_r1/evaluation/distill_llama_8b/eval.yaml
configs/recipes/deepseek_r1/evaluation/distill_llama_8b/gcp_job.yaml
configs/recipes/deepseek_r1/evaluation/distill_qwen_1_5b/eval.yaml
configs/recipes/deepseek_r1/evaluation/distill_qwen_1_5b/gcp_job.yaml
configs/recipes/deepseek_r1/evaluation/distill_qwen_32b/eval.yaml
configs/recipes/deepseek_r1/evaluation/distill_qwen_32b/gcp_job.yaml
configs/recipes/deepseek_r1/inference/671b_together_infer.yaml
configs/recipes/deepseek_r1/inference/distill_llama_70b_infer.yaml
configs/recipes/deepseek_r1/inference/distill_llama_8b_infer.yaml
configs/recipes/deepseek_r1/inference/distill_qwen_1_5b_infer.yaml
configs/recipes/deepseek_r1/inference/distill_qwen_32b_infer.yaml
configs/recipes/deepseek_r1/sft/distill_llama_70b/full_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_llama_70b/full_train.yaml
configs/recipes/deepseek_r1/sft/distill_llama_70b/lora_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_llama_70b/lora_train.yaml
configs/recipes/deepseek_r1/sft/distill_llama_70b/qlora_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_llama_70b/qlora_train.yaml
configs/recipes/deepseek_r1/sft/distill_llama_8b/full_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_llama_8b/full_train.yaml
configs/recipes/deepseek_r1/sft/distill_llama_8b/lora_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_llama_8b/lora_train.yaml
configs/recipes/deepseek_r1/sft/distill_llama_8b/qlora_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_llama_8b/qlora_train.yaml
configs/recipes/deepseek_r1/sft/distill_qwen_1_5b/full_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_qwen_1_5b/full_train.yaml
configs/recipes/deepseek_r1/sft/distill_qwen_1_5b/lora_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_qwen_1_5b/lora_train.yaml
configs/recipes/deepseek_r1/sft/distill_qwen_32b/lora_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_qwen_32b/lora_train.yaml
configs/recipes/gpt2/README.md
configs/recipes/gpt2/evaluation/async_eval.yaml
configs/recipes/gpt2/evaluation/async_gcp_job.yaml
configs/recipes/gpt2/inference/infer.yaml
configs/recipes/gpt2/pretraining/gcp_job.yaml
configs/recipes/gpt2/pretraining/mac_train.yaml
configs/recipes/gpt2/pretraining/train.yaml
configs/recipes/llama3_1/README.md
configs/recipes/llama3_1/evaluation/70b_eval.yaml
configs/recipes/llama3_1/evaluation/70b_gcp_job.yaml
configs/recipes/llama3_1/evaluation/70b_polaris_job.yaml
configs/recipes/llama3_1/evaluation/8b_eval.yaml
configs/recipes/llama3_1/evaluation/8b_gcp_job.yaml
configs/recipes/llama3_1/evaluation/8b_polaris_job.yaml
configs/recipes/llama3_1/inference/70b_infer.yaml
configs/recipes/llama3_1/inference/8b_infer.yaml
configs/recipes/llama3_1/inference/8b_rvllm_infer.yaml
configs/recipes/llama3_1/inference/8b_sglang_infer.yaml
configs/recipes/llama3_1/pretraining/8b/gcp_job.yaml
configs/recipes/llama3_1/pretraining/8b/polaris_job.yaml
configs/recipes/llama3_1/pretraining/8b/train.yaml
configs/recipes/llama3_1/sft/405b_full/polaris_job.yaml
configs/recipes/llama3_1/sft/405b_full/train.yaml
configs/recipes/llama3_1/sft/405b_lora/gcp_job.yaml
configs/recipes/llama3_1/sft/405b_lora/polaris_job.yaml
configs/recipes/llama3_1/sft/405b_lora/train.yaml
configs/recipes/llama3_1/sft/405b_qlora/gcp_job.yaml
configs/recipes/llama3_1/sft/405b_qlora/polaris_job.yaml
configs/recipes/llama3_1/sft/405b_qlora/train.yaml
configs/recipes/llama3_1/sft/70b_full/gcp_job.yaml
configs/recipes/llama3_1/sft/70b_full/polaris_job.yaml
configs/recipes/llama3_1/sft/70b_full/train.yaml
configs/recipes/llama3_1/sft/70b_lora/gcp_job.yaml
configs/recipes/llama3_1/sft/70b_lora/polaris_job.yaml
configs/recipes/llama3_1/sft/70b_lora/train.yaml
configs/recipes/llama3_1/sft/70b_qlora/gcp_job.yaml
configs/recipes/llama3_1/sft/70b_qlora/polaris_job.yaml
configs/recipes/llama3_1/sft/70b_qlora/train.yaml
configs/recipes/llama3_1/sft/8b_full/accelerate.yaml
configs/recipes/llama3_1/sft/8b_full/gcp_job.yaml
configs/recipes/llama3_1/sft/8b_full/longctx_train.yaml
configs/recipes/llama3_1/sft/8b_full/polaris_job.yaml
configs/recipes/llama3_1/sft/8b_full/train.yaml
configs/recipes/llama3_1/sft/8b_lora/fsdp_gcp_job.yaml
configs/recipes/llama3_1/sft/8b_lora/fsdp_train.yaml
configs/recipes/llama3_1/sft/8b_lora/gcp_job.yaml
configs/recipes/llama3_1/sft/8b_lora/polaris_job.yaml
configs/recipes/llama3_1/sft/8b_lora/train.yaml
configs/recipes/llama3_1/sft/8b_qlora/gcp_job.yaml
configs/recipes/llama3_1/sft/8b_qlora/train.yaml
configs/recipes/llama3_2/README.md
configs/recipes/llama3_2/evaluation/1b_eval.yaml
configs/recipes/llama3_2/evaluation/3b_eval.yaml
configs/recipes/llama3_2/inference/1b_infer.yaml
configs/recipes/llama3_2/inference/1b_sglang_infer.yaml
configs/recipes/llama3_2/inference/1b_vllm_infer.yaml
configs/recipes/llama3_2/inference/3b_infer.yaml
configs/recipes/llama3_2/inference/3b_sglang_infer.yaml
configs/recipes/llama3_2/inference/3b_vllm_infer.yaml
configs/recipes/llama3_2/sft/1b_full/train.yaml
configs/recipes/llama3_2/sft/3b_full/fsdp_gcp_job.yaml
configs/recipes/llama3_2/sft/3b_full/fsdp_train.yaml
configs/recipes/llama3_2/sft/3b_full/gcp_job.yaml
configs/recipes/llama3_2/sft/3b_full/polaris_job.yaml
configs/recipes/llama3_2/sft/3b_full/train.yaml
configs/recipes/llama3_2/sft/3b_lora/fsdp_gcp_job.yaml
configs/recipes/llama3_2/sft/3b_lora/fsdp_train.yaml
configs/recipes/llama3_2/sft/3b_lora/gcp_job.yaml
configs/recipes/llama3_2/sft/3b_lora/polaris_job.yaml
configs/recipes/llama3_2/sft/3b_lora/train.yaml
configs/recipes/llama3_2/sft/3b_qlora/fsdp_gcp_job.yaml
configs/recipes/llama3_2/sft/3b_qlora/fsdp_train.yaml
configs/recipes/llama3_2/sft/3b_qlora/gcp_job.yaml
configs/recipes/llama3_2/sft/3b_qlora/polaris_job.yaml
configs/recipes/llama3_2/sft/3b_qlora/train.yaml
configs/recipes/llama3_3/README.md
configs/recipes/llama3_3/evaluation/70b_eval.yaml
configs/recipes/llama3_3/evaluation/70b_gcp_job.yaml
configs/recipes/llama3_3/inference/70b_infer.yaml
configs/recipes/llama3_3/inference/70b_vllm_infer.yaml
configs/recipes/llama3_3/sft/70b_full/gcp_job.yaml
configs/recipes/llama3_3/sft/70b_full/train.yaml
configs/recipes/llama3_3/sft/70b_lora/gcp_job.yaml
configs/recipes/llama3_3/sft/70b_lora/train.yaml
configs/recipes/llama3_3/sft/70b_qlora/gcp_job.yaml
configs/recipes/llama3_3/sft/70b_qlora/train.yaml
configs/recipes/phi3/README.md
configs/recipes/phi3/dpo/gcp_job.yaml
configs/recipes/phi3/dpo/mac_train.yaml
configs/recipes/phi3/dpo/nvidia_24g_train.yaml
configs/recipes/phi3/dpo/nvidia_80g_train.yaml
configs/recipes/phi3/dpo/train.yaml
configs/recipes/phi3/evaluation/eval.yaml
configs/recipes/phi3/evaluation/gcp_job.yaml
configs/recipes/phi3/sft/lora_train.yaml
configs/recipes/phi3/sft/mac_lora_train.yaml
configs/recipes/qwq/evaluation/eval.yaml
configs/recipes/qwq/evaluation/gcp_job.yaml
configs/recipes/qwq/inference/infer.yaml
configs/recipes/qwq/sft/lora_gcp_job.yaml
configs/recipes/qwq/sft/lora_train.yaml
configs/recipes/smollm/README.md
configs/recipes/smollm/evaluation/135m/eval.yaml
configs/recipes/smollm/evaluation/135m/gcp_job.yaml
configs/recipes/smollm/evaluation/135m/quickstart_alpaca_v2_eval.yaml
configs/recipes/smollm/evaluation/135m/quickstart_eval.yaml
configs/recipes/smollm/evaluation/135m/quickstart_gcp_job.yaml
configs/recipes/smollm/evaluation/135m/leaderboards/huggingface_leaderboard_v1_eval.yaml
configs/recipes/smollm/evaluation/135m/leaderboards/huggingface_leaderboard_v1_gcp_job.yaml
configs/recipes/smollm/evaluation/135m/leaderboards/huggingface_leaderboard_v2_eval.yaml
configs/recipes/smollm/evaluation/135m/leaderboards/huggingface_leaderboard_v2_gcp_job.yaml
configs/recipes/smollm/inference/135m_infer.yaml
configs/recipes/smollm/sft/135m/gcp_job.yaml
configs/recipes/smollm/sft/135m/quickstart_gcp_job.yaml
configs/recipes/smollm/sft/135m/quickstart_train.yaml
configs/recipes/smollm/sft/135m/train.yaml
configs/recipes/vision/README.md
configs/recipes/vision/llama3_2_vision/README.md
configs/recipes/vision/llama3_2_vision/evaluation/11b_eval.yaml
configs/recipes/vision/llama3_2_vision/evaluation/11b_gcp_job.yaml
configs/recipes/vision/llama3_2_vision/inference/11b_infer.yaml
configs/recipes/vision/llama3_2_vision/inference/11b_rvllm_infer.yaml
configs/recipes/vision/llama3_2_vision/inference/11b_sglang_infer.yaml
configs/recipes/vision/llama3_2_vision/inference/11b_vllm_infer.yaml
configs/recipes/vision/llama3_2_vision/sft/11b_full/gcp_job.yaml
configs/recipes/vision/llama3_2_vision/sft/11b_full/train.yaml
configs/recipes/vision/llama3_2_vision/sft/11b_lora/gcp_job.yaml
configs/recipes/vision/llama3_2_vision/sft/11b_lora/train.yaml
configs/recipes/vision/llama3_2_vision/sft/90b_full/gcp_job.yaml
configs/recipes/vision/llama3_2_vision/sft/90b_full/train.yaml
configs/recipes/vision/llava_7b/README.md
configs/recipes/vision/llava_7b/inference/infer.yaml
configs/recipes/vision/llava_7b/inference/vllm_infer.yaml
configs/recipes/vision/llava_7b/sft/oumi_gcp_job.yaml
configs/recipes/vision/llava_7b/sft/train.yaml
configs/recipes/vision/llava_7b/sft/trl_gcp_job.yaml
configs/recipes/vision/phi3/README.md
configs/recipes/vision/phi3/inference/vllm_infer.yaml
configs/recipes/vision/phi3/sft/oumi_gcp_job.yaml
configs/recipes/vision/phi3/sft/train.yaml
configs/recipes/vision/phi3/sft/trl_gcp_job.yaml
configs/recipes/vision/qwen2_5_vl_3b/README.md
configs/recipes/vision/qwen2_5_vl_3b/inference/infer.yaml
configs/recipes/vision/qwen2_5_vl_3b/inference/vllm_infer.yaml
configs/recipes/vision/qwen2_5_vl_3b/sft/gcp_job.yaml
configs/recipes/vision/qwen2_5_vl_3b/sft/train.yaml
configs/recipes/vision/qwen2_vl_2b/README.md
configs/recipes/vision/qwen2_vl_2b/evaluation/eval.yaml
configs/recipes/vision/qwen2_vl_2b/evaluation/gcp_job.yaml
configs/recipes/vision/qwen2_vl_2b/inference/infer.yaml
configs/recipes/vision/qwen2_vl_2b/inference/sglang_infer.yaml
configs/recipes/vision/qwen2_vl_2b/inference/vllm_infer.yaml
configs/recipes/vision/qwen2_vl_2b/sft/gcp_job.yaml
configs/recipes/vision/qwen2_vl_2b/sft/train.yaml
configs/recipes/vision/smolvlm/README.md
configs/recipes/vision/smolvlm/sft/gcp_job.yaml
configs/recipes/vision/smolvlm/sft/train.yaml
data/dataset_examples/README.md
data/dataset_examples/alpaca_format.json
data/dataset_examples/alpaca_format.jsonl
data/dataset_examples/oumi_format.json
data/dataset_examples/oumi_format.jsonl
data/dataset_examples/vision_language_oumi_format.jsonl
docs/.gitignore
docs/_doclinks.config
docs/_docsummaries.sh
docs/_manage_doclinks.py
docs/_summarize_module.py
docs/citations.bib
docs/conf.py
docs/index.md
docs/_static/judge/judge_figure.svg
docs/_static/logo/favicon.png
docs/_static/logo/header_logo.png
docs/_static/logo/oumi_logo_dark.png
docs/_static/logo/oumi_logo_light.png
docs/_templates/apidoc/package.rst.jinja
docs/_templates/autodoc2/index.jinja
docs/about/acknowledgements.md
docs/about/changelog.md
docs/about/citations.md
docs/about/license.md
docs/cli/commands.md
docs/development/code_of_conduct.md
docs/development/contributing.md
docs/development/dev_setup.md
docs/development/docs_guide.md
docs/development/style_guide.md
docs/faq/oom.md
docs/faq/troubleshooting.md
docs/get_started/core_concepts.md
docs/get_started/installation.md
docs/get_started/quickstart.md
docs/get_started/tutorials.md
docs/resources/recipes.md
docs/resources/datasets/data_formats.md
docs/resources/datasets/datasets.md
docs/resources/datasets/other_datasets.md
docs/resources/datasets/preference_datasets.md
docs/resources/datasets/pretraining_datasets.md
docs/resources/datasets/sft_datasets.md
docs/resources/datasets/vl_sft_datasets.md
docs/resources/models/custom_models.md
docs/resources/models/models.md
docs/resources/models/supported_models.md
docs/user_guides/customization.md
docs/user_guides/evaluate/evaluate.md
docs/user_guides/evaluate/evaluation_config.md
docs/user_guides/evaluate/generative_benchmarks.md
docs/user_guides/evaluate/leaderboards.md
docs/user_guides/evaluate/standardized_benchmarks.md
docs/user_guides/infer/common_workflows.md
docs/user_guides/infer/configuration.md
docs/user_guides/infer/infer.md
docs/user_guides/infer/inference_cli.md
docs/user_guides/infer/inference_engines.md
docs/user_guides/judge/built_in_judge.md
docs/user_guides/judge/custom_infer.md
docs/user_guides/judge/custom_prompt.md
docs/user_guides/judge/judge.md
docs/user_guides/launch/custom_cluster.md
docs/user_guides/launch/deploy.md
docs/user_guides/launch/launch.md
docs/user_guides/train/configuration.md
docs/user_guides/train/monitoring.md
docs/user_guides/train/train.md
docs/user_guides/train/training_methods.md
docs/user_guides/train/environments/environments.md
docs/user_guides/train/environments/local.md
docs/user_guides/train/environments/notebooks.md
docs/user_guides/train/environments/vscode.md
notebooks/Oumi - A Tour.ipynb
notebooks/Oumi - Custom Judge.ipynb
notebooks/Oumi - Deploying a Job.ipynb
notebooks/Oumi - Distill a Large Model.ipynb
notebooks/Oumi - Evaluation with AlpacaEval 2.0.ipynb
notebooks/Oumi - Evaluation with MT Bench.ipynb
notebooks/Oumi - Evaluation with Oumi.ipynb
notebooks/Oumi - Finetuning Tutorial.ipynb
notebooks/Oumi - Launching Jobs on Custom Clusters.ipynb
notebooks/Oumi - MiniMath-R1-1.5B.ipynb
notebooks/Oumi - Oumi Judge.ipynb
notebooks/Oumi - Running Jobs Remotely.ipynb
notebooks/Oumi - Training CNN on Custom Dataset.ipynb
notebooks/Oumi - Using NanoGPT.ipynb
notebooks/Oumi - Using vLLM Engine for Inference.ipynb
notebooks/Oumi - Vision Language Models.ipynb
scripts/llama_e2e.py
scripts/benchmarks/benchmark_dataloader.py
scripts/benchmarks/benchmark_nccl.py
scripts/benchmarks/benchmark_trainers.sh
scripts/benchmarks/minimal_fsdp_training.py
scripts/benchmarks/minimal_multimodal_training.py
scripts/datasets/save_conversations.py
scripts/datasets/pretokenize/README.md
scripts/datasets/pretokenize/process_dataset.py
scripts/datasets/pretokenize/sky.yaml
scripts/docker/build_docker.sh
scripts/inference/README.md
scripts/inference/gcp_inference.py
scripts/polaris/README.md
scripts/polaris/launcher.sh
scripts/polaris/polaris_init.sh
scripts/polaris/jobs/build_apptainer_from_docker.sh
scripts/polaris/jobs/download_model_from_hf.sh
scripts/polaris/jobs/example_job.sh
scripts/polaris/jobs/fineweb_pt_job.sh
scripts/polaris/jobs/fineweb_pt_worker.sh
scripts/polaris/jobs/llama_tune.sh
scripts/polaris/jobs/vllm_job.sh
scripts/polaris/jobs/vllm_worker.sh
scripts/polaris/jobs/python/vllm_inference.py
scripts/polaris/jobs/python/vllm_parallel_inference.py
scripts/polaris/notebooks/Oumi - Multinode Inference on Polaris.ipynb
scripts/polaris/notebooks/Oumi - Tuning Llama.ipynb
src/experimental/__init__.py
src/experimental/configs/projects/zephyr/README.md
src/experimental/configs/projects/zephyr/evaluation/eval.yaml
src/experimental/configs/projects/zephyr/sft/full_gcp_job.yaml
src/experimental/configs/projects/zephyr/sft/full_train.yaml
src/experimental/configs/projects/zephyr/sft/qlora_gcp_job.yaml
src/experimental/configs/projects/zephyr/sft/qlora_train.yaml
src/experimental/configs/recipes/phi3/dpo/fsdp_gcp_job.yaml
src/experimental/configs/recipes/phi3/dpo/fsdp_nvidia_24g_train.yaml
src/experimental/notebooks/Oumi - Datasets Tutorial.ipynb
src/oumi/__init__.py
src/oumi/__main__.py
src/oumi/evaluate.py
src/oumi/evaluate_async.py
src/oumi/infer.py
src/oumi/judge.py
src/oumi/train.py
src/oumi.egg-info/PKG-INFO
src/oumi.egg-info/SOURCES.txt
src/oumi.egg-info/dependency_links.txt
src/oumi.egg-info/entry_points.txt
src/oumi.egg-info/requires.txt
src/oumi.egg-info/top_level.txt
src/oumi/builders/__init__.py
src/oumi/builders/callbacks.py
src/oumi/builders/collators.py
src/oumi/builders/data.py
src/oumi/builders/inference_engines.py
src/oumi/builders/lr_schedules.py
src/oumi/builders/metrics.py
src/oumi/builders/models.py
src/oumi/builders/optimizers.py
src/oumi/builders/oumi_data.py
src/oumi/builders/processors.py
src/oumi/builders/rewards.py
src/oumi/builders/training.py
src/oumi/cli/cli_utils.py
src/oumi/cli/distributed_run.py
src/oumi/cli/env.py
src/oumi/cli/evaluate.py
src/oumi/cli/fetch.py
src/oumi/cli/infer.py
src/oumi/cli/judge.py
src/oumi/cli/launch.py
src/oumi/cli/main.py
src/oumi/cli/train.py
src/oumi/core/__init__.py
src/oumi/core/async_utils.py
src/oumi/core/constants.py
src/oumi/core/distributed.py
src/oumi/core/callbacks/__init__.py
src/oumi/core/callbacks/base_trainer_callback.py
src/oumi/core/callbacks/hf_mfu_callback.py
src/oumi/core/callbacks/mfu_callback.py
src/oumi/core/callbacks/nan_inf_detection_callback.py
src/oumi/core/callbacks/profiler_step_callback.py
src/oumi/core/callbacks/telemetry_callback.py
src/oumi/core/collators/text_collator_with_padding.py
src/oumi/core/collators/text_completions_collator_with_padding.py
src/oumi/core/collators/vision_language_collator_with_padding.py
src/oumi/core/collators/vision_language_sft_collator.py
src/oumi/core/configs/__init__.py
src/oumi/core/configs/async_evaluation_config.py
src/oumi/core/configs/base_config.py
src/oumi/core/configs/evaluation_config.py
src/oumi/core/configs/inference_config.py
src/oumi/core/configs/inference_engine_type.py
src/oumi/core/configs/job_config.py
src/oumi/core/configs/judge_config.py
src/oumi/core/configs/training_config.py
src/oumi/core/configs/internal/internal_model_config.py
src/oumi/core/configs/internal/supported_models.py
src/oumi/core/configs/params/base_params.py
src/oumi/core/configs/params/data_params.py
src/oumi/core/configs/params/evaluation_params.py
src/oumi/core/configs/params/fsdp_params.py
src/oumi/core/configs/params/generation_params.py
src/oumi/core/configs/params/grpo_params.py
src/oumi/core/configs/params/guided_decoding_params.py
src/oumi/core/configs/params/model_params.py
src/oumi/core/configs/params/peft_params.py
src/oumi/core/configs/params/profiler_params.py
src/oumi/core/configs/params/remote_params.py
src/oumi/core/configs/params/telemetry_params.py
src/oumi/core/configs/params/training_params.py
src/oumi/core/datasets/__init__.py
src/oumi/core/datasets/base_dpo_dataset.py
src/oumi/core/datasets/base_grpo_dataset.py
src/oumi/core/datasets/base_iterable_dataset.py
src/oumi/core/datasets/base_map_dataset.py
src/oumi/core/datasets/base_pretraining_dataset.py
src/oumi/core/datasets/base_sft_dataset.py
src/oumi/core/datasets/packed_sft_dataset.py
src/oumi/core/datasets/pretraining_async_text_dataset.py
src/oumi/core/datasets/vision_language_dataset.py
src/oumi/core/evaluation/__init__.py
src/oumi/core/evaluation/evaluation_result.py
src/oumi/core/evaluation/evaluator.py
src/oumi/core/evaluation/backends/alpaca_eval.py
src/oumi/core/evaluation/backends/lm_harness.py
src/oumi/core/evaluation/utils/platform_prerequisites.py
src/oumi/core/evaluation/utils/save_utils.py
src/oumi/core/feature_generators/__init__.py
src/oumi/core/feature_generators/base_feature_generator.py
src/oumi/core/feature_generators/vision_language_conversation_feature_generator.py
src/oumi/core/inference/__init__.py
src/oumi/core/inference/base_inference_engine.py
src/oumi/core/launcher/__init__.py
src/oumi/core/launcher/base_cloud.py
src/oumi/core/launcher/base_cluster.py
src/oumi/core/models/__init__.py
src/oumi/core/models/base_model.py
src/oumi/core/processors/base_image_processor.py
src/oumi/core/processors/base_processor.py
src/oumi/core/processors/default_image_processor.py
src/oumi/core/processors/default_processor.py
src/oumi/core/registry/__init__.py
src/oumi/core/registry/registry.py
src/oumi/core/tokenizers/__init__.py
src/oumi/core/tokenizers/base_tokenizer.py
src/oumi/core/tokenizers/special_tokens.py
src/oumi/core/tokenizers/utils.py
src/oumi/core/trainers/__init__.py
src/oumi/core/trainers/base_trainer.py
src/oumi/core/trainers/hf_trainer.py
src/oumi/core/trainers/oumi_trainer.py
src/oumi/core/types/__init__.py
src/oumi/core/types/conversation.py
src/oumi/core/types/exceptions.py
src/oumi/datasets/__init__.py
src/oumi/datasets/debug.py
src/oumi/datasets/mmlu.py
src/oumi/datasets/chat_templates/chat_ml.jinja
src/oumi/datasets/chat_templates/default.jinja
src/oumi/datasets/chat_templates/default_gen.jinja
src/oumi/datasets/chat_templates/gpt2.jinja
src/oumi/datasets/chat_templates/llama3-instruct.jinja
src/oumi/datasets/chat_templates/llava.jinja
src/oumi/datasets/chat_templates/phi3-instruct.jinja
src/oumi/datasets/chat_templates/qwen2-vl-instruct.jinja
src/oumi/datasets/chat_templates/zephyr.jinja
src/oumi/datasets/evaluation/__init__.py
src/oumi/datasets/evaluation/alpaca.py
src/oumi/datasets/evaluation/utils.py
src/oumi/datasets/grpo/__init__.py
src/oumi/datasets/grpo/tldr.py
src/oumi/datasets/grpo/rewards/__init__.py
src/oumi/datasets/grpo/rewards/completion_length_rewards.py
src/oumi/datasets/preference_tuning/__init__.py
src/oumi/datasets/preference_tuning/orpo_dpo_mix.py
src/oumi/datasets/pretraining/__init__.py
src/oumi/datasets/pretraining/c4.py
src/oumi/datasets/pretraining/dolma.py
src/oumi/datasets/pretraining/falcon_refinedweb.py
src/oumi/datasets/pretraining/fineweb_edu.py
src/oumi/datasets/pretraining/pile.py
src/oumi/datasets/pretraining/red_pajama_v1.py
src/oumi/datasets/pretraining/red_pajama_v2.py
src/oumi/datasets/pretraining/slim_pajama.py
src/oumi/datasets/pretraining/starcoder.py
src/oumi/datasets/pretraining/the_stack.py
src/oumi/datasets/pretraining/tiny_stories.py
src/oumi/datasets/pretraining/tiny_textbooks.py
src/oumi/datasets/pretraining/wikipedia.py
src/oumi/datasets/pretraining/wikitext.py
src/oumi/datasets/pretraining/youtube_commons.py
src/oumi/datasets/sft/__init__.py
src/oumi/datasets/sft/alpaca.py
src/oumi/datasets/sft/aya.py
src/oumi/datasets/sft/chatqa.py
src/oumi/datasets/sft/chatrag_bench.py
src/oumi/datasets/sft/dolly.py
src/oumi/datasets/sft/magpie.py
src/oumi/datasets/sft/openo1_sft.py
src/oumi/datasets/sft/prompt_response.py
src/oumi/datasets/sft/sft_jsonlines.py
src/oumi/datasets/sft/tulu3_sft_mixture.py
src/oumi/datasets/sft/ultrachat.py
src/oumi/datasets/sft/wildchat.py
src/oumi/datasets/vision_language/__init__.py
src/oumi/datasets/vision_language/coco_captions.py
src/oumi/datasets/vision_language/docmatix.py
src/oumi/datasets/vision_language/flickr30k.py
src/oumi/datasets/vision_language/llava_instruct_mix_vsft.py
src/oumi/datasets/vision_language/mnist_sft.py
src/oumi/datasets/vision_language/the_cauldron.py
src/oumi/datasets/vision_language/vision_jsonlines.py
src/oumi/datasets/vision_language/vqav2_small.py
src/oumi/inference/__init__.py
src/oumi/inference/anthropic_inference_engine.py
src/oumi/inference/deepseek_inference_engine.py
src/oumi/inference/gcp_inference_engine.py
src/oumi/inference/gemini_inference_engine.py
src/oumi/inference/llama_cpp_inference_engine.py
src/oumi/inference/native_text_inference_engine.py
src/oumi/inference/openai_inference_engine.py
src/oumi/inference/parasail_inference_engine.py
src/oumi/inference/remote_inference_engine.py
src/oumi/inference/remote_vllm_inference_engine.py
src/oumi/inference/sambanova_inference_engine.py
src/oumi/inference/sglang_inference_engine.py
src/oumi/inference/together_inference_engine.py
src/oumi/inference/vllm_inference_engine.py
src/oumi/judges/__init__.py
src/oumi/judges/base_judge.py
src/oumi/judges/judge_court.py
src/oumi/judges/oumi_judge.py
src/oumi/judges/oumi_v1/helpful.json
src/oumi/judges/oumi_v1/honest.json
src/oumi/judges/oumi_v1/safe.json
src/oumi/judges/oumi_v1/valid.json
src/oumi/judges/test_judge/helpful.json
src/oumi/launcher/__init__.py
src/oumi/launcher/launcher.py
src/oumi/launcher/clients/local_client.py
src/oumi/launcher/clients/polaris_client.py
src/oumi/launcher/clients/sky_client.py
src/oumi/launcher/clients/slurm_client.py
src/oumi/launcher/clouds/__init__.py
src/oumi/launcher/clouds/local_cloud.py
src/oumi/launcher/clouds/polaris_cloud.py
src/oumi/launcher/clouds/sky_cloud.py
src/oumi/launcher/clouds/slurm_cloud.py
src/oumi/launcher/clusters/local_cluster.py
src/oumi/launcher/clusters/polaris_cluster.py
src/oumi/launcher/clusters/sky_cluster.py
src/oumi/launcher/clusters/slurm_cluster.py
src/oumi/models/__init__.py
src/oumi/models/cnn_classifier.py
src/oumi/models/mlp.py
src/oumi/models/experimental/cambrian/__init__.py
src/oumi/models/experimental/cambrian/constants.py
src/oumi/models/experimental/cambrian/mm_utils.py
src/oumi/models/experimental/cambrian/utils.py
src/oumi/models/experimental/cambrian/model/__init__.py
src/oumi/models/experimental/cambrian/model/builder.py
src/oumi/models/experimental/cambrian/model/cambrian_arch.py
src/oumi/models/experimental/cambrian/model/vision_sampler.py
src/oumi/models/experimental/cambrian/model/language_model/cambrian_llama.py
src/oumi/models/experimental/cambrian/model/language_model/cambrian_phi3.py
src/oumi/models/experimental/cambrian/model/language_model/phi3/__init__.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/__init__.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/base_encoder.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/builder.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/clip_convnext_encoder.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/clip_encoder.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/dino_encoder.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/load.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/siglip_encoder.py
src/oumi/models/experimental/cambrian/model/multimodal_projector/builder.py
src/oumi/models/experimental/cambrian/model/multimodal_projector/projectors.py
src/oumi/models/layers/ring_attention.py
src/oumi/models/layers/zigzag.py
src/oumi/models/layers/zigzag_utils.py
src/oumi/performance/mfu.py
src/oumi/performance/telemetry.py
src/oumi/performance/torch_profiler_utils.py
src/oumi/utils/batching.py
src/oumi/utils/conversation_utils.py
src/oumi/utils/device_utils.py
src/oumi/utils/distributed_utils.py
src/oumi/utils/git_utils.py
src/oumi/utils/hf_utils.py
src/oumi/utils/image_utils.py
src/oumi/utils/io_utils.py
src/oumi/utils/logging.py
src/oumi/utils/model_caching.py
src/oumi/utils/packaging.py
src/oumi/utils/peft_utils.py
src/oumi/utils/saver.py
src/oumi/utils/serialization_utils.py
src/oumi/utils/str_utils.py
src/oumi/utils/torch_naming_heuristics.py
src/oumi/utils/torch_utils.py
src/oumi/utils/version_utils.py
tests/__init__.py
tests/conftest.py
tests/markers.py
tests/e2e/__init__.py
tests/e2e/sambanova_infer_tutorial.yaml
tests/e2e/test_eval_e2e.py
tests/e2e/test_judges.py
tests/e2e/test_notebooks.py
tests/e2e/test_sambanova_inference.py
tests/e2e/test_train_e2e.py
tests/e2e/deps/test_circular_deps.py
tests/integration/cli/test_judge_e2e.py
tests/integration/datasets/test_preference_tuning_datasets_full_epoch.py
tests/integration/datasets/test_pretraining_datasets_full_epoch.py
tests/integration/datasets/test_sft_datasets_full_epoch.py
tests/integration/datasets/test_sft_datasets_load_datasets.py
tests/integration/datasets/test_sft_vision_datasets_load_datasets.py
tests/integration/evaluate/test_evaluate_async.py
tests/integration/evaluate/test_evaluate_lm_harness.py
tests/integration/infer/__init__.py
tests/integration/infer/test_infer.py
tests/integration/infer/test_native_text_inference_engine.py
tests/integration/models/test_integration_cnn_classifier.py
tests/integration/train/test_custom_models.py
tests/integration/train/test_train.py
tests/scripts/gcp_e2e_tests_job.yaml
tests/scripts/launch_tests.sh
tests/scripts/predownload_for_github_gpu_tests.sh
tests/testdata/adapter_config.json
tests/testdata/images/oumi_logo_dark.png
tests/testdata/images/oumi_logo_light.png
tests/testdata/images/the_great_wave_off_kanagawa.jpg
tests/testdata/pdfs/oumi_getting_started_first_1page.pdf
tests/testdata/pdfs/oumi_getting_started_first_2pages.pdf
tests/testdata/pdfs/oumi_getting_started_full_4pages.pdf
tests/unit/conftest.py
tests/unit/test_apache_license_header.py
tests/unit/builders/test_build_data.py
tests/unit/builders/test_callbacks.py
tests/unit/builders/test_collators.py
tests/unit/builders/test_data_mixtures.py
tests/unit/builders/test_lr_schedules.py
tests/unit/builders/test_models.py
tests/unit/builders/test_oumi_data.py
tests/unit/builders/test_processors.py
tests/unit/builders/test_rewards.py
tests/unit/cli/test_cli_distributed_run.py
tests/unit/cli/test_cli_env.py
tests/unit/cli/test_cli_evaluate.py
tests/unit/cli/test_cli_fetch.py
tests/unit/cli/test_cli_infer.py
tests/unit/cli/test_cli_judge.py
tests/unit/cli/test_cli_launch.py
tests/unit/cli/test_cli_main.py
tests/unit/cli/test_cli_speed_regression.py
tests/unit/cli/test_cli_train.py
tests/unit/cli/test_cli_utils.py
tests/unit/core/test_async_utils.py
tests/unit/core/test_distributed.py
tests/unit/core/test_registry.py
tests/unit/core/collators/test_text_collator_with_padding.py
tests/unit/core/collators/test_text_completions_collator_with_padding.py
tests/unit/core/collators/test_vision_language_collator_with_padding.py
tests/unit/core/configs/test_config.py
tests/unit/core/configs/test_guided_params.py
tests/unit/core/configs/test_parse_configs.py
tests/unit/core/configs/internal/test_supported_models.py
tests/unit/core/configs/params/test_base_params.py
tests/unit/core/configs/params/test_data_params.py
tests/unit/core/configs/params/test_evaluation_params.py
tests/unit/core/configs/params/test_model_params.py
tests/unit/core/configs/params/test_remote_params.py
tests/unit/core/datasets/test_base_map_dataset.py
tests/unit/core/datasets/test_base_sft_dataset.py
tests/unit/core/datasets/test_packed_sft_dataset.py
tests/unit/core/datasets/test_pretraining_dataset.py
tests/unit/core/datasets/test_vision_language_dataset.py
tests/unit/core/evaluation/test_backend_alpaca_eval.py
tests/unit/core/evaluation/test_backend_lm_harness.py
tests/unit/core/evaluation/test_evaluator.py
tests/unit/core/evaluation/test_save_utils.py
tests/unit/core/trainers/test_oumi_trainer.py
tests/unit/core/types/test_conversation.py
tests/unit/datasets/test_chat_templates.py
tests/unit/datasets/test_datasets_demo_examples.py
tests/unit/datasets/test_pretraining_async_text_dataset.py
tests/unit/datasets/test_text_jsonlines_dataset.py
tests/unit/datasets/test_tulu3_sft_mixture.py
tests/unit/datasets/test_vision_language_jsonlines_dataset.py
tests/unit/datasets/grpo/rewards/test_completion_length_rewards.py
tests/unit/inference/test_anthropic_inference_engine.py
tests/unit/inference/test_deepseek_inference_engine.py
tests/unit/inference/test_gcp_inference_engine.py
tests/unit/inference/test_gemini_inference_engine.py
tests/unit/inference/test_generation_params.py
tests/unit/inference/test_inference_engine_init.py
tests/unit/inference/test_llama_cpp_inference_engine.py
tests/unit/inference/test_openai_inference_engine.py
tests/unit/inference/test_parasail_inference_engine.py
tests/unit/inference/test_remote_inference_engine.py
tests/unit/inference/test_sambanova_inference_engine.py
tests/unit/inference/test_sglang_inference_engine.py
tests/unit/inference/test_together_inference_engine.py
tests/unit/inference/test_vllm_inference_engine.py
tests/unit/inference/test_vllm_inference_engine_quantization.py
tests/unit/launcher/test_launcher.py
tests/unit/launcher/clients/test_local_client.py
tests/unit/launcher/clients/test_polaris_client.py
tests/unit/launcher/clients/test_sky_client.py
tests/unit/launcher/clients/test_slurm_client.py
tests/unit/launcher/clients/data/qstat.txt
tests/unit/launcher/clients/data/sacct.txt
tests/unit/launcher/clients/data/sacct_full.txt
tests/unit/launcher/clouds/test_local_cloud.py
tests/unit/launcher/clouds/test_polaris_cloud.py
tests/unit/launcher/clouds/test_sky_cloud.py
tests/unit/launcher/clouds/test_slurm_cloud.py
tests/unit/launcher/clusters/test_local_cluster.py
tests/unit/launcher/clusters/test_polaris_cluster.py
tests/unit/launcher/clusters/test_sky_cluster.py
tests/unit/launcher/clusters/test_slurm_cluster.py
tests/unit/models/test_cnn_classifier.py
tests/unit/performance/test_mfu.py
tests/unit/performance/test_telemetry.py
tests/unit/performance/test_torch_profiler_utils.py
tests/unit/utils/test_conversation_utils.py
tests/unit/utils/test_device_utils.py
tests/unit/utils/test_distributed_utils.py
tests/unit/utils/test_hf_utils.py
tests/unit/utils/test_image_utils.py
tests/unit/utils/test_io_utils.py
tests/unit/utils/test_model_caching.py
tests/unit/utils/test_packaging.py
tests/unit/utils/test_peft_utils.py
tests/unit/utils/test_saver.py
tests/unit/utils/test_str_utils.py
tests/unit/utils/test_torch_naming_heuristics.py
tests/unit/utils/test_torch_utils.py
tests/unit/utils/test_version_utils.py