LICENSE
README.md
pyproject.toml
src/megatron/bridge/__init__.py
src/megatron/bridge/package_info.py
src/megatron/bridge/data/__init__.py
src/megatron/bridge/data/finetuning.py
src/megatron/bridge/data/iterator_utils.py
src/megatron/bridge/data/loaders.py
src/megatron/bridge/data/samplers.py
src/megatron/bridge/data/utils.py
src/megatron/bridge/data/builders/__init__.py
src/megatron/bridge/data/builders/finetuning_dataset.py
src/megatron/bridge/data/builders/hf_dataset.py
src/megatron/bridge/data/datasets/__init__.py
src/megatron/bridge/data/datasets/fim_dataset.py
src/megatron/bridge/data/datasets/packed_sequence.py
src/megatron/bridge/data/datasets/packing_utils.py
src/megatron/bridge/data/datasets/sft.py
src/megatron/bridge/data/datasets/utils.py
src/megatron/bridge/data/energon/base_energon_datamodule.py
src/megatron/bridge/data/energon/energon_provider.py
src/megatron/bridge/data/hf_processors/__init__.py
src/megatron/bridge/data/hf_processors/squad.py
src/megatron/bridge/data/vlm_datasets/__init__.py
src/megatron/bridge/data/vlm_datasets/collate.py
src/megatron/bridge/data/vlm_datasets/conversation_dataset.py
src/megatron/bridge/data/vlm_datasets/hf_dataset_makers.py
src/megatron/bridge/data/vlm_datasets/hf_provider.py
src/megatron/bridge/data/vlm_datasets/mock_provider.py
src/megatron/bridge/data/vlm_datasets/preloaded_provider.py
src/megatron/bridge/data/vlm_datasets/token_utils.py
src/megatron/bridge/inference/vlm/base.py
src/megatron/bridge/inference/vlm/qwenvl_inference_wrapper.py
src/megatron/bridge/inference/vlm/vlm_engine.py
src/megatron/bridge/inference/vlm/vlm_inference_controller.py
src/megatron/bridge/models/__init__.py
src/megatron/bridge/models/config.py
src/megatron/bridge/models/distillation_provider.py
src/megatron/bridge/models/gpt_full_te_layer_autocast_spec.py
src/megatron/bridge/models/gpt_provider.py
src/megatron/bridge/models/model_provider.py
src/megatron/bridge/models/t5_provider.py
src/megatron/bridge/models/transformer_config.py
src/megatron/bridge/models/conversion/__init__.py
src/megatron/bridge/models/conversion/auto_bridge.py
src/megatron/bridge/models/conversion/mapping_registry.py
src/megatron/bridge/models/conversion/model_bridge.py
src/megatron/bridge/models/conversion/param_mapping.py
src/megatron/bridge/models/conversion/peft_bridge.py
src/megatron/bridge/models/conversion/utils.py
src/megatron/bridge/models/decorators/__init__.py
src/megatron/bridge/models/decorators/dispatch.py
src/megatron/bridge/models/decorators/torchrun.py
src/megatron/bridge/models/deepseek/__init__.py
src/megatron/bridge/models/deepseek/common.py
src/megatron/bridge/models/deepseek/deepseek_provider.py
src/megatron/bridge/models/deepseek/deepseek_v2_bridge.py
src/megatron/bridge/models/deepseek/deepseek_v3_bridge.py
src/megatron/bridge/models/gemma/__init__.py
src/megatron/bridge/models/gemma/gemma2_bridge.py
src/megatron/bridge/models/gemma/gemma2_provider.py
src/megatron/bridge/models/gemma/gemma3_bridge.py
src/megatron/bridge/models/gemma/gemma3_provider.py
src/megatron/bridge/models/gemma/gemma_bridge.py
src/megatron/bridge/models/gemma/gemma_provider.py
src/megatron/bridge/models/gemma/modules.py
src/megatron/bridge/models/gemma_vl/__init__.py
src/megatron/bridge/models/gemma_vl/gemma3_vl_bridge.py
src/megatron/bridge/models/gemma_vl/gemma3_vl_provider.py
src/megatron/bridge/models/gemma_vl/modeling_gemma3_vl.py
src/megatron/bridge/models/glm/__init__.py
src/megatron/bridge/models/glm/glm45_bridge.py
src/megatron/bridge/models/glm/glm45_provider.py
src/megatron/bridge/models/glm_vl/__init__.py
src/megatron/bridge/models/glm_vl/glm_45v_bridge.py
src/megatron/bridge/models/glm_vl/glm_45v_provider.py
src/megatron/bridge/models/glm_vl/modeling_glm_45v.py
src/megatron/bridge/models/gpt_oss/__init__.py
src/megatron/bridge/models/gpt_oss/gpt_oss_bridge.py
src/megatron/bridge/models/gpt_oss/gpt_oss_provider.py
src/megatron/bridge/models/hf_pretrained/__init__.py
src/megatron/bridge/models/hf_pretrained/base.py
src/megatron/bridge/models/hf_pretrained/causal_lm.py
src/megatron/bridge/models/hf_pretrained/safe_config_loader.py
src/megatron/bridge/models/hf_pretrained/state.py
src/megatron/bridge/models/hf_pretrained/utils.py
src/megatron/bridge/models/hf_pretrained/vlm.py
src/megatron/bridge/models/kimi/__init__.py
src/megatron/bridge/models/kimi/kimi_provider.py
src/megatron/bridge/models/llama/__init__.py
src/megatron/bridge/models/llama/llama4_utils.py
src/megatron/bridge/models/llama/llama_bridge.py
src/megatron/bridge/models/llama/llama_provider.py
src/megatron/bridge/models/llama_nemotron/__init__.py
src/megatron/bridge/models/llama_nemotron/llama_nemotron_bridge.py
src/megatron/bridge/models/llama_nemotron/llama_nemotron_provider.py
src/megatron/bridge/models/mamba/__init__.py
src/megatron/bridge/models/mamba/mamba_provider.py
src/megatron/bridge/models/ministral3/__init__.py
src/megatron/bridge/models/ministral3/ministral3_bridge.py
src/megatron/bridge/models/ministral3/ministral3_provider.py
src/megatron/bridge/models/ministral3/modeling_ministral3.py
src/megatron/bridge/models/mistral/__init__.py
src/megatron/bridge/models/mistral/mistral_bridge.py
src/megatron/bridge/models/mistral/mistral_provider.py
src/megatron/bridge/models/nemotron/__init__.py
src/megatron/bridge/models/nemotron/nemotron_bridge.py
src/megatron/bridge/models/nemotron/nemotron_provider.py
src/megatron/bridge/models/nemotron_vl/__init__.py
src/megatron/bridge/models/nemotron_vl/modeling_nemotron_vl.py
src/megatron/bridge/models/nemotron_vl/nemotron_vl_bridge.py
src/megatron/bridge/models/nemotron_vl/nemotron_vl_provider.py
src/megatron/bridge/models/nemotron_vl/nemotron_vl_utils.py
src/megatron/bridge/models/nemotronh/__init__.py
src/megatron/bridge/models/nemotronh/nemotron_h_bridge.py
src/megatron/bridge/models/nemotronh/nemotron_h_provider.py
src/megatron/bridge/models/olmoe/__init__.py
src/megatron/bridge/models/olmoe/olmoe_bridge.py
src/megatron/bridge/models/olmoe/olmoe_provider.py
src/megatron/bridge/models/qwen/__init__.py
src/megatron/bridge/models/qwen/qwen2_bridge.py
src/megatron/bridge/models/qwen/qwen3_bridge.py
src/megatron/bridge/models/qwen/qwen3_moe_bridge.py
src/megatron/bridge/models/qwen/qwen3_next_bridge.py
src/megatron/bridge/models/qwen/qwen_provider.py
src/megatron/bridge/models/qwen_vl/__init__.py
src/megatron/bridge/models/qwen_vl/modeling_qwen25_vl.py
src/megatron/bridge/models/qwen_vl/qwen25_vl_bridge.py
src/megatron/bridge/models/qwen_vl/qwen3_vl_bridge.py
src/megatron/bridge/models/qwen_vl/qwen3_vl_provider.py
src/megatron/bridge/models/qwen_vl/qwen_vl_provider.py
src/megatron/bridge/models/qwen_vl/modelling_qwen3_vl/__init__.py
src/megatron/bridge/models/qwen_vl/modelling_qwen3_vl/model.py
src/megatron/bridge/models/qwen_vl/modelling_qwen3_vl/rope.py
src/megatron/bridge/models/qwen_vl/modelling_qwen3_vl/text_model.py
src/megatron/bridge/models/qwen_vl/modelling_qwen3_vl/transformer_block.py
src/megatron/bridge/models/qwen_vl/modelling_qwen3_vl/transformer_config.py
src/megatron/bridge/models/qwen_vl/modelling_qwen3_vl/utils.py
src/megatron/bridge/peft/__init__.py
src/megatron/bridge/peft/adapter_wrapper.py
src/megatron/bridge/peft/base.py
src/megatron/bridge/peft/canonical_lora.py
src/megatron/bridge/peft/dora.py
src/megatron/bridge/peft/dora_layers.py
src/megatron/bridge/peft/lora.py
src/megatron/bridge/peft/lora_layers.py
src/megatron/bridge/peft/module_matcher.py
src/megatron/bridge/peft/recompute.py
src/megatron/bridge/peft/utils.py
src/megatron/bridge/peft/walk_utils.py
src/megatron/bridge/recipes/__init__.py
src/megatron/bridge/recipes/run_plugins.py
src/megatron/bridge/recipes/deepseek/__init__.py
src/megatron/bridge/recipes/deepseek/deepseek_v2.py
src/megatron/bridge/recipes/deepseek/deepseek_v3.py
src/megatron/bridge/recipes/gemma/__init__.py
src/megatron/bridge/recipes/gemma/gemma2.py
src/megatron/bridge/recipes/gemma/gemma3.py
src/megatron/bridge/recipes/gemma3_vl/__init__.py
src/megatron/bridge/recipes/gemma3_vl/gemma3_vl.py
src/megatron/bridge/recipes/glm/__init__.py
src/megatron/bridge/recipes/glm/glm45.py
src/megatron/bridge/recipes/glm_vl/__init__.py
src/megatron/bridge/recipes/glm_vl/glm_45v.py
src/megatron/bridge/recipes/gpt/__init__.py
src/megatron/bridge/recipes/gpt/gpt3_175b.py
src/megatron/bridge/recipes/gpt_oss/__init__.py
src/megatron/bridge/recipes/gpt_oss/gpt_oss.py
src/megatron/bridge/recipes/kimi/kimi_k2.py
src/megatron/bridge/recipes/llama/__init__.py
src/megatron/bridge/recipes/llama/llama2.py
src/megatron/bridge/recipes/llama/llama3.py
src/megatron/bridge/recipes/ministral3/__init__.py
src/megatron/bridge/recipes/ministral3/ministral3.py
src/megatron/bridge/recipes/moonlight/__init__.py
src/megatron/bridge/recipes/moonlight/moonlight_16b.py
src/megatron/bridge/recipes/nemotron_vl/__init__.py
src/megatron/bridge/recipes/nemotron_vl/nemotron_nano_v2_vl.py
src/megatron/bridge/recipes/nemotronh/__init__.py
src/megatron/bridge/recipes/nemotronh/nemotron_3_nano.py
src/megatron/bridge/recipes/nemotronh/nemotron_nano_v2.py
src/megatron/bridge/recipes/nemotronh/nemotronh.py
src/megatron/bridge/recipes/olmoe/__init__.py
src/megatron/bridge/recipes/olmoe/olmoe_7b.py
src/megatron/bridge/recipes/qwen/__init__.py
src/megatron/bridge/recipes/qwen/qwen2.py
src/megatron/bridge/recipes/qwen/qwen3.py
src/megatron/bridge/recipes/qwen/qwen3_moe.py
src/megatron/bridge/recipes/qwen/qwen3_next.py
src/megatron/bridge/recipes/qwen_vl/__init__.py
src/megatron/bridge/recipes/qwen_vl/qwen25_vl.py
src/megatron/bridge/recipes/qwen_vl/qwen25_vl_dataset.py
src/megatron/bridge/recipes/qwen_vl/qwen3_vl.py
src/megatron/bridge/recipes/qwen_vl/data/energon/task_encoder.py
src/megatron/bridge/recipes/utils/__init__.py
src/megatron/bridge/recipes/utils/dataset_utils.py
src/megatron/bridge/recipes/utils/finetune_utils.py
src/megatron/bridge/recipes/utils/optimizer_utils.py
src/megatron/bridge/recipes/utils/tokenizer_utils.py
src/megatron/bridge/training/__init__.py
src/megatron/bridge/training/callbacks.py
src/megatron/bridge/training/checkpointing.py
src/megatron/bridge/training/comm_overlap.py
src/megatron/bridge/training/config.py
src/megatron/bridge/training/distill.py
src/megatron/bridge/training/eval.py
src/megatron/bridge/training/fault_tolerance.py
src/megatron/bridge/training/finetune.py
src/megatron/bridge/training/flex_dispatcher_backend.py
src/megatron/bridge/training/forward_step_func_types.py
src/megatron/bridge/training/gpt_step.py
src/megatron/bridge/training/initialize.py
src/megatron/bridge/training/inprocess_restart.py
src/megatron/bridge/training/llava_step.py
src/megatron/bridge/training/losses.py
src/megatron/bridge/training/mixed_precision.py
src/megatron/bridge/training/model_load_save.py
src/megatron/bridge/training/nvrx_straggler.py
src/megatron/bridge/training/optim.py
src/megatron/bridge/training/pretrain.py
src/megatron/bridge/training/profiling.py
src/megatron/bridge/training/setup.py
src/megatron/bridge/training/state.py
src/megatron/bridge/training/tensor_inspect.py
src/megatron/bridge/training/train.py
src/megatron/bridge/training/vlm_step.py
src/megatron/bridge/training/mlm_compat/__init__.py
src/megatron/bridge/training/mlm_compat/activations.py
src/megatron/bridge/training/mlm_compat/arguments.py
src/megatron/bridge/training/mlm_compat/model.py
src/megatron/bridge/training/post_training/__init__.py
src/megatron/bridge/training/post_training/checkpointing.py
src/megatron/bridge/training/post_training/distillation.py
src/megatron/bridge/training/tokenizers/__init__.py
src/megatron/bridge/training/tokenizers/bert_tokenization.py
src/megatron/bridge/training/tokenizers/config.py
src/megatron/bridge/training/tokenizers/gpt2_tokenization.py
src/megatron/bridge/training/tokenizers/multimodal_tokenizer.py
src/megatron/bridge/training/tokenizers/tokenizer.py
src/megatron/bridge/training/tokenizers/utils.py
src/megatron/bridge/training/utils/__init__.py
src/megatron/bridge/training/utils/batch_utils.py
src/megatron/bridge/training/utils/checkpoint_utils.py
src/megatron/bridge/training/utils/config_utils.py
src/megatron/bridge/training/utils/flop_utils.py
src/megatron/bridge/training/utils/log_utils.py
src/megatron/bridge/training/utils/mlflow_utils.py
src/megatron/bridge/training/utils/moe_token_drop.py
src/megatron/bridge/training/utils/omegaconf_utils.py
src/megatron/bridge/training/utils/packed_seq_utils.py
src/megatron/bridge/training/utils/padding_utils.py
src/megatron/bridge/training/utils/pg_utils.py
src/megatron/bridge/training/utils/sig_utils.py
src/megatron/bridge/training/utils/theoretical_memory_utils.py
src/megatron/bridge/training/utils/train_utils.py
src/megatron/bridge/training/utils/visual_inputs.py
src/megatron/bridge/training/utils/wandb_utils.py
src/megatron/bridge/utils/__init__.py
src/megatron/bridge/utils/common_utils.py
src/megatron/bridge/utils/decorators.py
src/megatron/bridge/utils/fusions.py
src/megatron/bridge/utils/import_utils.py
src/megatron/bridge/utils/instantiate_utils.py
src/megatron/bridge/utils/slurm_utils.py
src/megatron/bridge/utils/vocab_utils.py
src/megatron/bridge/utils/yaml_utils.py
src/megatron_bridge.egg-info/PKG-INFO
src/megatron_bridge.egg-info/SOURCES.txt
src/megatron_bridge.egg-info/dependency_links.txt
src/megatron_bridge.egg-info/entry_points.txt
src/megatron_bridge.egg-info/requires.txt
src/megatron_bridge.egg-info/top_level.txt