MANIFEST.in
README.md
setup.py
src/galvatron/__init__.py
src/galvatron/apex_install.sh
src/galvatron/flash_attn_ops_install.sh
src/galvatron/chatglm/ChatGLMForConditionalGeneration_pipeline.py
src/galvatron/chatglm/ChatGLMForConditionalGeneration_tensor_parallel.py
src/galvatron/chatglm/__init__.py
src/galvatron/chatglm/dataloader.py
src/galvatron/chatglm/hybrid_parallel_model_dist.py
src/galvatron/chatglm/process_profiled_memory.py
src/galvatron/chatglm/profile_forward.py
src/galvatron/chatglm/search_layerwise_hp_dist.py
src/galvatron/chatglm/train_hp_layerwise_dist.py
src/galvatron/chatglm/configs/forward_profiling_config.json
src/galvatron/chatglm/configs/galvatron_config_16gpus_4096hidden_28layers_72G_full_cpt_1f1b_zero2_bf16.json
src/galvatron/chatglm/configs/galvatron_config_16gpus_4096hidden_28layers_78G_full_cpt_1f1b_zero2_bf16.json
src/galvatron/chatglm/configs/memory_profiling_8gpus_dist_bf16_hidden4096_head32_seqlen2048.json
src/galvatron/clip/CLIPModel_pipeline.py
src/galvatron/clip/CLIPModel_tensor_parallel.py
src/galvatron/clip/__init__.py
src/galvatron/clip/config_utils.py
src/galvatron/clip/dataloader.py
src/galvatron/clip/hybrid_parallel_model_dist.py
src/galvatron/clip/process_profiled_memory.py
src/galvatron/clip/profile_forward.py
src/galvatron/clip/search_layerwise_hp_dist.py
src/galvatron/clip/train.py
src/galvatron/clip/train_hp_layerwise_dist.py
src/galvatron/clip/clip_hf_configs/CLIP-ViT-B-16.json
src/galvatron/clip/clip_hf_configs/CLIP-ViT-H-14-laion2B-s32B-b79K.json
src/galvatron/clip/clip_hf_configs/CLIP-ViT-L-14-laion2B-s32B-b82K.json
src/galvatron/clip/clip_hf_configs/CLIP-ViT-bigG-14-laion2B-39B-b160k.json
src/galvatron/clip/clip_hf_configs/CLIP-ViT-g-14-laion2B-s12B-b42K.json
src/galvatron/clip/configs/forward_profiling_config.json
src/galvatron/clip/configs/memory_profiling_8gpus_dist_bf16_vit-H-14.json
src/galvatron/clip/configs/memory_profiling_8gpus_dist_bf16_vit-bigG-14.json
src/galvatron/clip/configs/memory_profiling_8gpus_dist_bf16_vit-g-14.json
src/galvatron/env_configs/allreduce_bandwidth_4_gpus.json
src/galvatron/env_configs/allreduce_bandwidth_8_gpus.json
src/galvatron/env_configs/allreduce_bandwidth_dist_16_gpus.json
src/galvatron/env_configs/overlap_coefficient.json
src/galvatron/env_configs/p2p_bandwidth_dist_16_gpus.json
src/galvatron/gpt/GPTLMHeadmodel_pipeline.py
src/galvatron/gpt/GPTLMHeadmodel_tensor_parallel.py
src/galvatron/gpt/__init__.py
src/galvatron/gpt/dataloader.py
src/galvatron/gpt/gpt_config_utils.py
src/galvatron/gpt/hybrid_parallel_model_dist.py
src/galvatron/gpt/process_profiled_memory.py
src/galvatron/gpt/profile_forward.py
src/galvatron/gpt/search_layerwise_hp_dist.py
src/galvatron/gpt/train.py
src/galvatron/gpt/train_hp_layerwise_dist.py
src/galvatron/gpt/configs/forward_profiling_config.json
src/galvatron/gpt/configs/memory_profiling_8gpus_dist_bf16_hidden1600_head32_seqlen1024.json
src/galvatron/gpt/configs/memory_profiling_8gpus_dist_bf16_hidden2560_head32_seqlen2048.json
src/galvatron/gpt/configs/memory_profiling_8gpus_dist_bf16_hidden4096_head32_seqlen2048.json
src/galvatron/gpt/gpt-config/gpt-1.5b.json
src/galvatron/gpt/gpt-config/gpt-2.7b.json
src/galvatron/gpt/gpt-config/gpt-6.7b.json
src/galvatron/llama/Llamamodel_pipeline.py
src/galvatron/llama/Llamamodel_tensor_parallel.py
src/galvatron/llama/__init__.py
src/galvatron/llama/dataloader.py
src/galvatron/llama/hybrid_parallel_model_dist.py
src/galvatron/llama/llama_config_utils.py
src/galvatron/llama/process_profiled_memory.py
src/galvatron/llama/profile_forward.py
src/galvatron/llama/search_layerwise_hp_dist.py
src/galvatron/llama/train.py
src/galvatron/llama/train_hp_layerwise_dist.py
src/galvatron/llama/configs/forward_profiling_config.json
src/galvatron/llama/configs/galvatron_config_16gpus_4096hidden_32layers_34G_full_cpt_1f1b_zero2_bf16.json
src/galvatron/llama/configs/memory_profiling_8gpus_dist_bf16_hidden4096_head32_seqlen2048.json
src/galvatron/llama/configs/memory_profiling_8gpus_dist_bf16_hidden5120_head40_seqlen2048.json
src/galvatron/llama/configs/memory_profiling_8gpus_dist_bf16_hidden6656_head52_seqlen2048.json
src/galvatron/llama/llama-config/llama-13b/params.json
src/galvatron/llama/llama-config/llama-13b/llama-13b/config.json
src/galvatron/llama/llama-config/llama-30b/params.json
src/galvatron/llama/llama-config/llama-30b/llama-30b/config.json
src/galvatron/llama/llama-config/llama-7b/params.json
src/galvatron/llama/llama-config/llama-7b/llama-7b/config.json
src/galvatron/pipeline/__init__.py
src/galvatron/pipeline/pipeline.py
src/galvatron/pipeline/utils.py
src/galvatron/site_package/__init__.py
src/galvatron/site_package/megatron/__init__.py
src/galvatron/site_package/megatron/arguments.py
src/galvatron/site_package/megatron/checkpointing.py
src/galvatron/site_package/megatron/dist_signal_handler.py
src/galvatron/site_package/megatron/global_vars.py
src/galvatron/site_package/megatron/indexer.py
src/galvatron/site_package/megatron/initialize.py
src/galvatron/site_package/megatron/memory.py
src/galvatron/site_package/megatron/microbatches.py
src/galvatron/site_package/megatron/optimizer_param_scheduler.py
src/galvatron/site_package/megatron/text_generation_server.py
src/galvatron/site_package/megatron/timers.py
src/galvatron/site_package/megatron/training.py
src/galvatron/site_package/megatron/utils.py
src/galvatron/site_package/megatron/core/__init__.py
src/galvatron/site_package/megatron/core/enums.py
src/galvatron/site_package/megatron/core/package_info.py
src/galvatron/site_package/megatron/core/parallel_state.py
src/galvatron/site_package/megatron/core/utils.py
src/galvatron/site_package/megatron/core/pipeline_parallel/__init__.py
src/galvatron/site_package/megatron/core/pipeline_parallel/p2p_communication.py
src/galvatron/site_package/megatron/core/pipeline_parallel/schedules.py
src/galvatron/site_package/megatron/core/tensor_parallel/__init__.py
src/galvatron/site_package/megatron/core/tensor_parallel/cross_entropy.py
src/galvatron/site_package/megatron/core/tensor_parallel/data.py
src/galvatron/site_package/megatron/core/tensor_parallel/layers.py
src/galvatron/site_package/megatron/core/tensor_parallel/mappings.py
src/galvatron/site_package/megatron/core/tensor_parallel/mappings_group.py
src/galvatron/site_package/megatron/core/tensor_parallel/random.py
src/galvatron/site_package/megatron/core/tensor_parallel/utils.py
src/galvatron/site_package/megatron/data/__init__.py
src/galvatron/site_package/megatron/data/autoaugment.py
src/galvatron/site_package/megatron/data/bert_dataset.py
src/galvatron/site_package/megatron/data/biencoder_dataset_utils.py
src/galvatron/site_package/megatron/data/blendable_dataset.py
src/galvatron/site_package/megatron/data/data_samplers.py
src/galvatron/site_package/megatron/data/dataset_utils.py
src/galvatron/site_package/megatron/data/gpt_dataset.py
src/galvatron/site_package/megatron/data/ict_dataset.py
src/galvatron/site_package/megatron/data/image_folder.py
src/galvatron/site_package/megatron/data/indexed_dataset.py
src/galvatron/site_package/megatron/data/orqa_wiki_dataset.py
src/galvatron/site_package/megatron/data/realm_dataset_utils.py
src/galvatron/site_package/megatron/data/realm_index.py
src/galvatron/site_package/megatron/data/t5_dataset.py
src/galvatron/site_package/megatron/data/vit_dataset.py
src/galvatron/site_package/megatron/fused_kernels/__init__.py
src/galvatron/site_package/megatron/fused_kernels/tests/__init__.py
src/galvatron/site_package/megatron/fused_kernels/tests/test_fused_kernels.py
src/galvatron/site_package/megatron/model/__init__.py
src/galvatron/site_package/megatron/model/bert_model.py
src/galvatron/site_package/megatron/model/biencoder_model.py
src/galvatron/site_package/megatron/model/classification.py
src/galvatron/site_package/megatron/model/distributed.py
src/galvatron/site_package/megatron/model/enums.py
src/galvatron/site_package/megatron/model/fused_bias_gelu.py
src/galvatron/site_package/megatron/model/fused_layer_norm.py
src/galvatron/site_package/megatron/model/fused_softmax.py
src/galvatron/site_package/megatron/model/gpt_model.py
src/galvatron/site_package/megatron/model/language_model.py
src/galvatron/site_package/megatron/model/module.py
src/galvatron/site_package/megatron/model/multiple_choice.py
src/galvatron/site_package/megatron/model/realm_model.py
src/galvatron/site_package/megatron/model/retro_transformer.py
src/galvatron/site_package/megatron/model/rotary_pos_embedding.py
src/galvatron/site_package/megatron/model/t5_model.py
src/galvatron/site_package/megatron/model/transformer.py
src/galvatron/site_package/megatron/model/utils.py
src/galvatron/site_package/megatron/optimizer/__init__.py
src/galvatron/site_package/megatron/optimizer/clip_grads.py
src/galvatron/site_package/megatron/optimizer/distrib_optimizer.py
src/galvatron/site_package/megatron/optimizer/grad_scaler.py
src/galvatron/site_package/megatron/optimizer/optimizer.py
src/galvatron/site_package/megatron/text_generation/__init__.py
src/galvatron/site_package/megatron/text_generation/api.py
src/galvatron/site_package/megatron/text_generation/beam_utils.py
src/galvatron/site_package/megatron/text_generation/communication.py
src/galvatron/site_package/megatron/text_generation/forward_step.py
src/galvatron/site_package/megatron/text_generation/generation.py
src/galvatron/site_package/megatron/text_generation/sampling.py
src/galvatron/site_package/megatron/text_generation/tokenization.py
src/galvatron/site_package/megatron/tokenizer/__init__.py
src/galvatron/site_package/megatron/tokenizer/bert_tokenization.py
src/galvatron/site_package/megatron/tokenizer/gpt2_tokenization.py
src/galvatron/site_package/megatron/tokenizer/tokenizer.py
src/galvatron/site_package/megatron_layers/__init__.py
src/galvatron/site_package/megatron_layers/transformer.py
src/galvatron/utils/__init__.py
src/galvatron/utils/allgather_utils.py
src/galvatron/utils/config_utils.py
src/galvatron/utils/cost_model.py
src/galvatron/utils/cost_model_dist.py
src/galvatron/utils/dp_utils.py
src/galvatron/utils/dp_utils_dist.py
src/galvatron/utils/group_comm_utils.py
src/galvatron/utils/group_comm_utils_dist.py
src/galvatron/utils/init_utils.py
src/galvatron/utils/memory_utils.py
src/galvatron/utils/parallel_utils.py
src/galvatron/utils/strategies_utils.py
src/hetu_galvatron.egg-info/PKG-INFO
src/hetu_galvatron.egg-info/SOURCES.txt
src/hetu_galvatron.egg-info/dependency_links.txt
src/hetu_galvatron.egg-info/requires.txt
src/hetu_galvatron.egg-info/top_level.txt