LICENSE
README.md
pyproject.toml
assets/version.txt
torchtitan/__init__.py
torchtitan/config_manager.py
torchtitan/train.py
torchtitan.egg-info/PKG-INFO
torchtitan.egg-info/SOURCES.txt
torchtitan.egg-info/dependency_links.txt
torchtitan.egg-info/requires.txt
torchtitan.egg-info/top_level.txt
torchtitan/components/checkpoint.py
torchtitan/components/dataloader.py
torchtitan/components/ft.py
torchtitan/components/loss.py
torchtitan/components/lr_scheduler.py
torchtitan/components/metrics.py
torchtitan/components/optimizer.py
torchtitan/components/tokenizer.py
torchtitan/components/quantization/__init__.py
torchtitan/components/quantization/float8.py
torchtitan/components/quantization/mx.py
torchtitan/components/quantization/utils.py
torchtitan/datasets/hf_datasets.py
torchtitan/datasets/tokenizer/tiktoken.py
torchtitan/distributed/__init__.py
torchtitan/distributed/parallel_dims.py
torchtitan/distributed/pipeline.py
torchtitan/distributed/utils.py
torchtitan/experiments/__init__.py
torchtitan/experiments/deepseek_v3/__init__.py
torchtitan/experiments/deepseek_v3/attn_mask_utils.py
torchtitan/experiments/deepseek_v3/checkpoint.py
torchtitan/experiments/deepseek_v3/download.py
torchtitan/experiments/deepseek_v3/dsgemm_kernels.py
torchtitan/experiments/deepseek_v3/dsgemm_utils.py
torchtitan/experiments/deepseek_v3/generate.py
torchtitan/experiments/deepseek_v3/group_gemms.py
torchtitan/experiments/deepseek_v3/model.py
torchtitan/experiments/deepseek_v3/model_args.py
torchtitan/experiments/deepseek_v3/model_config.py
torchtitan/experiments/deepseek_v3/moe_kernels.py
torchtitan/experiments/deepseek_v3/train_ds_dev.py
torchtitan/experiments/deepseek_v3/train_ds_real.py
torchtitan/experiments/deepseek_v3/infra/parallelize_deepseek.py
torchtitan/experiments/deepseek_v3/symm_mem_recipes/__init__.py
torchtitan/experiments/deepseek_v3/symm_mem_recipes/triton_barrier.py
torchtitan/experiments/deepseek_v3/symm_mem_recipes/triton_on_device_all_to_all_v.py
torchtitan/experiments/deepseek_v3/symm_mem_recipes/triton_utils.py
torchtitan/experiments/deepseek_v3/tokenizers/hf_tokenizer.py
torchtitan/experiments/deepseek_v3/train_configs/custom_args.py
torchtitan/experiments/deepseek_v3/unit_testing/benchmark_kernels.py
torchtitan/experiments/deepseek_v3/unit_testing/dsgemm_unit_testing.py
torchtitan/experiments/deepseek_v3/unit_testing/permute_indices_testing.py
torchtitan/experiments/deepseek_v3/unit_testing/test_create_m_indices.py
torchtitan/experiments/flux/__init__.py
torchtitan/experiments/flux/job_config.py
torchtitan/experiments/flux/loss.py
torchtitan/experiments/flux/sampling.py
torchtitan/experiments/flux/train.py
torchtitan/experiments/flux/utils.py
torchtitan/experiments/flux/dataset/flux_dataset.py
torchtitan/experiments/flux/dataset/tokenizer.py
torchtitan/experiments/flux/infra/parallelize.py
torchtitan/experiments/flux/model/args.py
torchtitan/experiments/flux/model/autoencoder.py
torchtitan/experiments/flux/model/hf_embedder.py
torchtitan/experiments/flux/model/layers.py
torchtitan/experiments/flux/model/math.py
torchtitan/experiments/flux/model/model.py
torchtitan/experiments/flux/scripts/download_autoencoder.py
torchtitan/experiments/flux/tests/__init__.py
torchtitan/experiments/flux/tests/integration_tests.py
torchtitan/experiments/flux/tests/test_generate_image.py
torchtitan/experiments/flux/tests/assets/cc12m_test/pack_test_dataset.py
torchtitan/experiments/flux/tests/unit_tests/__init__.py
torchtitan/experiments/flux/tests/unit_tests/test_flux_dataloader.py
torchtitan/experiments/kernels/moe/indices.py
torchtitan/experiments/kernels/moe/unit_tests/permute_indices_testing.py
torchtitan/experiments/kernels/triton_contiguous_group_gemm/cg_backward.py
torchtitan/experiments/kernels/triton_contiguous_group_gemm/cg_forward.py
torchtitan/experiments/kernels/triton_contiguous_group_gemm/cg_reference.py
torchtitan/experiments/kernels/triton_contiguous_group_gemm/debug.py
torchtitan/experiments/kernels/triton_contiguous_group_gemm/tma_cuda_autotune.py
torchtitan/experiments/kernels/triton_contiguous_group_gemm/unit_test_cg.py
torchtitan/experiments/kernels/triton_mg_group_gemm/benchmark.py
torchtitan/experiments/kernels/triton_mg_group_gemm/simpleMoE.py
torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/__init__.py
torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/fast_debug_ao.py
torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/mg_grouped_gemm.py
torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/reference_utils.py
torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/tma_autotuning.py
torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/unit_test_backwards.py
torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/unit_test_forwards.py
torchtitan/experiments/llama4/__init__.py
torchtitan/experiments/llama4/infra/expert_parallel.py
torchtitan/experiments/llama4/infra/parallelize.py
torchtitan/experiments/llama4/model/args.py
torchtitan/experiments/llama4/model/model.py
torchtitan/experiments/llama4/model/moe.py
torchtitan/experiments/llama4/scripts/convert_hf_to_dcp_with_gpus.py
torchtitan/experiments/llama4/scripts/convert_meta_to_dcp_with_gpus.py
torchtitan/experiments/multimodal/__init__.py
torchtitan/experiments/multimodal/check_padding_mm.py
torchtitan/experiments/multimodal/mm_collator.py
torchtitan/experiments/multimodal/mm_dataset.py
torchtitan/experiments/multimodal/model.py
torchtitan/experiments/multimodal/transform.py
torchtitan/experiments/multimodal/utils.py
torchtitan/experiments/multimodal/tests/__init__.py
torchtitan/experiments/multimodal/tests/test_multimodal_model.py
torchtitan/experiments/multimodal/tests/test_utils.py
torchtitan/experiments/multimodal/tokenizer/tiktoken.py
torchtitan/experiments/simple_fsdp/__init__.py
torchtitan/experiments/simple_fsdp/model.py
torchtitan/experiments/simple_fsdp/parallelize.py
torchtitan/experiments/simple_fsdp/simple_fsdp.py
torchtitan/experiments/simple_fsdp/tests/__init__.py
torchtitan/experiments/simple_fsdp/tests/integration_tests.py
torchtitan/experiments/simple_fsdp/tests/test_numerics.py
torchtitan/models/__init__.py
torchtitan/models/attention.py
torchtitan/models/llama3/__init__.py
torchtitan/models/llama3/infra/parallelize.py
torchtitan/models/llama3/infra/pipeline.py
torchtitan/models/llama3/model/args.py
torchtitan/models/llama3/model/model.py
torchtitan/protocols/model_converter.py
torchtitan/protocols/train_spec.py
torchtitan/tools/logging.py
torchtitan/tools/profiling.py
torchtitan/tools/utils.py