LICENSE
MANIFEST.in
README.md
setup.py
version.txt
colossalai/__init__.py
colossalai/initialize.py
colossalai/version.py
colossalai.egg-info/PKG-INFO
colossalai.egg-info/SOURCES.txt
colossalai.egg-info/dependency_links.txt
colossalai.egg-info/entry_points.txt
colossalai.egg-info/requires.txt
colossalai.egg-info/top_level.txt
colossalai/_C/__init__.py
colossalai/_analyzer/__init__.py
colossalai/_analyzer/envs.py
colossalai/_analyzer/_subclasses/__init__.py
colossalai/_analyzer/_subclasses/_meta_registration.py
colossalai/_analyzer/_subclasses/_monkey_patch.py
colossalai/_analyzer/_subclasses/flop_tensor.py
colossalai/_analyzer/_subclasses/meta_tensor.py
colossalai/_analyzer/fx/__init__.py
colossalai/_analyzer/fx/codegen.py
colossalai/_analyzer/fx/graph_module.py
colossalai/_analyzer/fx/node_util.py
colossalai/_analyzer/fx/symbolic_profile.py
colossalai/_analyzer/fx/passes/__init__.py
colossalai/_analyzer/fx/passes/graph_profile.py
colossalai/_analyzer/fx/passes/shape_prop.py
colossalai/_analyzer/fx/tracer/__init__.py
colossalai/_analyzer/fx/tracer/bias_addition.py
colossalai/_analyzer/fx/tracer/custom_leaf_module.py
colossalai/_analyzer/fx/tracer/proxy.py
colossalai/_analyzer/fx/tracer/symbolic_trace.py
colossalai/_analyzer/fx/tracer/tracer.py
colossalai/accelerator/__init__.py
colossalai/accelerator/api.py
colossalai/accelerator/base_accelerator.py
colossalai/accelerator/cpu_accelerator.py
colossalai/accelerator/cuda_accelerator.py
colossalai/accelerator/npu_accelerator.py
colossalai/amp/__init__.py
colossalai/amp/naive_amp/__init__.py
colossalai/amp/naive_amp/mixed_precision_optimizer.py
colossalai/amp/naive_amp/grad_scaler/__init__.py
colossalai/amp/naive_amp/grad_scaler/base_grad_scaler.py
colossalai/amp/naive_amp/grad_scaler/constant_grad_scaler.py
colossalai/amp/naive_amp/grad_scaler/dynamic_grad_scaler.py
colossalai/amp/naive_amp/mixed_precision_mixin/__init__.py
colossalai/amp/naive_amp/mixed_precision_mixin/base.py
colossalai/amp/naive_amp/mixed_precision_mixin/bf16.py
colossalai/amp/naive_amp/mixed_precision_mixin/fp16.py
colossalai/auto_parallel/__init__.py
colossalai/auto_parallel/checkpoint/__init__.py
colossalai/auto_parallel/checkpoint/build_c_ext.py
colossalai/auto_parallel/checkpoint/ckpt_solver_base.py
colossalai/auto_parallel/checkpoint/ckpt_solver_chen.py
colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py
colossalai/auto_parallel/checkpoint/operation.py
colossalai/auto_parallel/meta_profiler/__init__.py
colossalai/auto_parallel/meta_profiler/constants.py
colossalai/auto_parallel/meta_profiler/registry.py
colossalai/auto_parallel/meta_profiler/shard_metainfo.py
colossalai/auto_parallel/meta_profiler/meta_registry/__init__.py
colossalai/auto_parallel/meta_profiler/meta_registry/activation.py
colossalai/auto_parallel/meta_profiler/meta_registry/binary_elementwise_ops.py
colossalai/auto_parallel/meta_profiler/meta_registry/conv.py
colossalai/auto_parallel/meta_profiler/meta_registry/embedding.py
colossalai/auto_parallel/meta_profiler/meta_registry/linear.py
colossalai/auto_parallel/meta_profiler/meta_registry/non_spmd.py
colossalai/auto_parallel/meta_profiler/meta_registry/norm.py
colossalai/auto_parallel/meta_profiler/meta_registry/pooling.py
colossalai/auto_parallel/meta_profiler/meta_registry/tensor.py
colossalai/auto_parallel/meta_profiler/meta_registry/where.py
colossalai/auto_parallel/offload/__init__.py
colossalai/auto_parallel/offload/amp_optimizer.py
colossalai/auto_parallel/offload/base_offload_module.py
colossalai/auto_parallel/offload/mem_optimize.py
colossalai/auto_parallel/offload/region.py
colossalai/auto_parallel/offload/region_manager.py
colossalai/auto_parallel/offload/runtime.py
colossalai/auto_parallel/offload/solver.py
colossalai/auto_parallel/offload/training_simulator.py
colossalai/auto_parallel/offload/util.py
colossalai/auto_parallel/passes/__init__.py
colossalai/auto_parallel/passes/comm_metainfo_pass.py
colossalai/auto_parallel/passes/constants.py
colossalai/auto_parallel/passes/meta_info_prop.py
colossalai/auto_parallel/passes/runtime_apply_pass.py
colossalai/auto_parallel/passes/runtime_preparation_pass.py
colossalai/auto_parallel/pipeline_shard/__init__.py
colossalai/auto_parallel/tensor_shard/__init__.py
colossalai/auto_parallel/tensor_shard/constants.py
colossalai/auto_parallel/tensor_shard/initialize.py
colossalai/auto_parallel/tensor_shard/options.py
colossalai/auto_parallel/tensor_shard/sharding_strategy.py
colossalai/auto_parallel/tensor_shard/node_handler/__init__.py
colossalai/auto_parallel/tensor_shard/node_handler/addmm_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/batch_norm_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/binary_elementwise_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/bmm_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/conv_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/default_reshape_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/embedding_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/getattr_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/getitem_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/layer_norm_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/linear_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/matmul_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/node_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/normal_pooling_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/output_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/permute_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/placeholder_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/registry.py
colossalai/auto_parallel/tensor_shard/node_handler/softmax_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/split_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/sum_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/tensor_constructor_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/transpose_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/unary_elementwise_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/view_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/where_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/__init__.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/batch_norm_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/binary_elementwise_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/conv_strategy_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/embedding_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/getattr_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/getitem_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/layer_norm_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/matmul_strategy_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/normal_pooling_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/output_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/placeholder_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/reshape_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/softmax_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/strategy_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/sum_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/tensor_constructor_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/unary_elementwise_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/where_generator.py
colossalai/auto_parallel/tensor_shard/solver/__init__.py
colossalai/auto_parallel/tensor_shard/solver/cost_graph.py
colossalai/auto_parallel/tensor_shard/solver/graph_analysis.py
colossalai/auto_parallel/tensor_shard/solver/solver.py
colossalai/auto_parallel/tensor_shard/solver/strategies_constructor.py
colossalai/auto_parallel/tensor_shard/utils/__init__.py
colossalai/auto_parallel/tensor_shard/utils/broadcast.py
colossalai/auto_parallel/tensor_shard/utils/factory.py
colossalai/auto_parallel/tensor_shard/utils/misc.py
colossalai/auto_parallel/tensor_shard/utils/reshape.py
colossalai/auto_parallel/tensor_shard/utils/sharding.py
colossalai/booster/__init__.py
colossalai/booster/accelerator.py
colossalai/booster/booster.py
colossalai/booster/mixed_precision/__init__.py
colossalai/booster/mixed_precision/bf16.py
colossalai/booster/mixed_precision/fp16_apex.py
colossalai/booster/mixed_precision/fp16_naive.py
colossalai/booster/mixed_precision/fp16_torch.py
colossalai/booster/mixed_precision/fp8.py
colossalai/booster/mixed_precision/mixed_precision_base.py
colossalai/booster/plugin/__init__.py
colossalai/booster/plugin/dp_plugin_base.py
colossalai/booster/plugin/gemini_plugin.py
colossalai/booster/plugin/hybrid_parallel_plugin.py
colossalai/booster/plugin/low_level_zero_plugin.py
colossalai/booster/plugin/moe_hybrid_parallel_plugin.py
colossalai/booster/plugin/plugin_base.py
colossalai/booster/plugin/pp_plugin_base.py
colossalai/booster/plugin/torch_ddp_plugin.py
colossalai/booster/plugin/torch_fsdp_plugin.py
colossalai/checkpoint_io/__init__.py
colossalai/checkpoint_io/checkpoint_io_base.py
colossalai/checkpoint_io/general_checkpoint_io.py
colossalai/checkpoint_io/hybrid_parallel_checkpoint_io.py
colossalai/checkpoint_io/index_file.py
colossalai/checkpoint_io/utils.py
colossalai/cli/__init__.py
colossalai/cli/cli.py
colossalai/cli/check/__init__.py
colossalai/cli/check/check_installation.py
colossalai/cli/launcher/__init__.py
colossalai/cli/launcher/hostinfo.py
colossalai/cli/launcher/multinode_runner.py
colossalai/cli/launcher/run.py
colossalai/cluster/__init__.py
colossalai/cluster/device_mesh_manager.py
colossalai/cluster/dist_coordinator.py
colossalai/cluster/process_group_manager.py
colossalai/cluster/process_group_mesh.py
colossalai/context/__init__.py
colossalai/context/config.py
colossalai/context/singleton_meta.py
colossalai/device/__init__.py
colossalai/device/alpha_beta_profiler.py
colossalai/device/calc_pipeline_strategy.py
colossalai/device/device_mesh.py
colossalai/fx/__init__.py
colossalai/fx/_compatibility.py
colossalai/fx/_meta_regist_12.py
colossalai/fx/_meta_regist_13.py
colossalai/fx/graph_module.py
colossalai/fx/proxy.py
colossalai/fx/codegen/__init__.py
colossalai/fx/codegen/activation_checkpoint_codegen.py
colossalai/fx/passes/__init__.py
colossalai/fx/passes/adding_split_node_pass.py
colossalai/fx/passes/concrete_info_prop.py
colossalai/fx/passes/meta_info_prop.py
colossalai/fx/passes/passes_for_gpt2_test.py
colossalai/fx/passes/shard_1d_pass.py
colossalai/fx/passes/split_module.py
colossalai/fx/passes/utils.py
colossalai/fx/profiler/__init__.py
colossalai/fx/profiler/constants.py
colossalai/fx/profiler/dataflow.py
colossalai/fx/profiler/memory_utils.py
colossalai/fx/profiler/opcount.py
colossalai/fx/profiler/profiler.py
colossalai/fx/profiler/shard_utils.py
colossalai/fx/profiler/tensor.py
colossalai/fx/profiler/experimental/__init__.py
colossalai/fx/profiler/experimental/constants.py
colossalai/fx/profiler/experimental/profiler.py
colossalai/fx/profiler/experimental/registry.py
colossalai/fx/profiler/experimental/shard_utils.py
colossalai/fx/profiler/experimental/profiler_function/__init__.py
colossalai/fx/profiler/experimental/profiler_function/activation_function.py
colossalai/fx/profiler/experimental/profiler_function/arithmetic.py
colossalai/fx/profiler/experimental/profiler_function/embedding.py
colossalai/fx/profiler/experimental/profiler_function/linear.py
colossalai/fx/profiler/experimental/profiler_function/normalization.py
colossalai/fx/profiler/experimental/profiler_function/pooling.py
colossalai/fx/profiler/experimental/profiler_function/python_ops.py
colossalai/fx/profiler/experimental/profiler_function/torch_ops.py
colossalai/fx/profiler/experimental/profiler_module/__init__.py
colossalai/fx/profiler/experimental/profiler_module/activation_function.py
colossalai/fx/profiler/experimental/profiler_module/attention.py
colossalai/fx/profiler/experimental/profiler_module/convolution.py
colossalai/fx/profiler/experimental/profiler_module/dropout.py
colossalai/fx/profiler/experimental/profiler_module/embedding.py
colossalai/fx/profiler/experimental/profiler_module/linear.py
colossalai/fx/profiler/experimental/profiler_module/normalization.py
colossalai/fx/profiler/experimental/profiler_module/pooling.py
colossalai/fx/profiler/experimental/profiler_module/rnn.py
colossalai/fx/profiler/experimental/profiler_module/torch_op.py
colossalai/fx/tracer/__init__.py
colossalai/fx/tracer/_meta_trace.py
colossalai/fx/tracer/_symbolic_trace.py
colossalai/fx/tracer/_tracer_utils.py
colossalai/fx/tracer/experimental.py
colossalai/fx/tracer/registry.py
colossalai/fx/tracer/tracer.py
colossalai/fx/tracer/bias_addition_patch/__init__.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/__init__.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/addbmm.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/addmm.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/bias_addition_function.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/linear.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/__init__.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/bias_addition_module.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/conv.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/linear.py
colossalai/fx/tracer/meta_patch/__init__.py
colossalai/fx/tracer/meta_patch/patched_function/__init__.py
colossalai/fx/tracer/meta_patch/patched_function/activation_function.py
colossalai/fx/tracer/meta_patch/patched_function/arithmetic.py
colossalai/fx/tracer/meta_patch/patched_function/convolution.py
colossalai/fx/tracer/meta_patch/patched_function/embedding.py
colossalai/fx/tracer/meta_patch/patched_function/normalization.py
colossalai/fx/tracer/meta_patch/patched_function/python_ops.py
colossalai/fx/tracer/meta_patch/patched_function/torch_ops.py
colossalai/fx/tracer/meta_patch/patched_module/__init__.py
colossalai/fx/tracer/meta_patch/patched_module/activation_function.py
colossalai/fx/tracer/meta_patch/patched_module/convolution.py
colossalai/fx/tracer/meta_patch/patched_module/embedding.py
colossalai/fx/tracer/meta_patch/patched_module/linear.py
colossalai/fx/tracer/meta_patch/patched_module/normalization.py
colossalai/fx/tracer/meta_patch/patched_module/pooling.py
colossalai/fx/tracer/meta_patch/patched_module/rnn.py
colossalai/inference/__init__.py
colossalai/inference/batch_bucket.py
colossalai/inference/config.py
colossalai/inference/flash_decoding_utils.py
colossalai/inference/graph_runner.py
colossalai/inference/logit_processors.py
colossalai/inference/sampler.py
colossalai/inference/struct.py
colossalai/inference/utils.py
colossalai/inference/core/__init__.py
colossalai/inference/core/async_engine.py
colossalai/inference/core/engine.py
colossalai/inference/core/plugin.py
colossalai/inference/core/request_handler.py
colossalai/inference/core/rpc_engine.py
colossalai/inference/executor/__init__.py
colossalai/inference/executor/rpc_worker.py
colossalai/inference/kv_cache/__init__.py
colossalai/inference/kv_cache/block_cache.py
colossalai/inference/kv_cache/kvcache_manager.py
colossalai/inference/modeling/__init__.py
colossalai/inference/modeling/backends/__init__.py
colossalai/inference/modeling/backends/attention_backend.py
colossalai/inference/modeling/backends/pre_attention_backend.py
colossalai/inference/modeling/layers/__init__.py
colossalai/inference/modeling/layers/attention.py
colossalai/inference/modeling/layers/baichuan_tp_linear.py
colossalai/inference/modeling/models/__init__.py
colossalai/inference/modeling/models/glide_llama.py
colossalai/inference/modeling/models/nopadding_baichuan.py
colossalai/inference/modeling/models/nopadding_llama.py
colossalai/inference/modeling/policy/__init__.py
colossalai/inference/modeling/policy/glide_llama.py
colossalai/inference/modeling/policy/nopadding_baichuan.py
colossalai/inference/modeling/policy/nopadding_llama.py
colossalai/inference/server/__init__.py
colossalai/inference/server/api_server.py
colossalai/inference/server/chat_service.py
colossalai/inference/server/completion_service.py
colossalai/inference/server/utils.py
colossalai/inference/spec/__init__.py
colossalai/inference/spec/drafter.py
colossalai/inference/spec/struct.py
colossalai/interface/__init__.py
colossalai/interface/model.py
colossalai/interface/optimizer.py
colossalai/interface/pretrained.py
colossalai/kernel/__init__.py
colossalai/kernel/kernel_loader.py
colossalai/kernel/extensions/__init__.py
colossalai/kernel/extensions/base_extension.py
colossalai/kernel/extensions/cpp_extension.py
colossalai/kernel/extensions/cuda_extension.py
colossalai/kernel/extensions/triton_extension.py
colossalai/kernel/extensions/utils.py
colossalai/kernel/extensions/csrc/__init__.py
colossalai/kernel/extensions/csrc/common/data_type.h
colossalai/kernel/extensions/csrc/common/micros.h
colossalai/kernel/extensions/csrc/common/mp_type_traits.h
colossalai/kernel/extensions/csrc/common/target.h
colossalai/kernel/extensions/csrc/common/vec_type_traits.h
colossalai/kernel/extensions/csrc/funcs/binary_functor.h
colossalai/kernel/extensions/csrc/funcs/cast_functor.h
colossalai/kernel/extensions/csrc/funcs/reduce_function.h
colossalai/kernel/extensions/csrc/funcs/ternary_functor.h
colossalai/kernel/extensions/csrc/funcs/unary_functor.h
colossalai/kernel/extensions/csrc/kernel/arm/cpu_adam_arm.cpp
colossalai/kernel/extensions/csrc/kernel/arm/cpu_adam_arm.h
colossalai/kernel/extensions/csrc/kernel/cuda/activation_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/context_kv_cache_memcpy_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/convert_fp8_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/decode_kv_cache_memcpy_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/flash_decoding_attention_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/fused_rotary_emb_and_cache_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/get_cos_and_sin_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/layer_norm_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/moe_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/multi_tensor_adam_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/multi_tensor_apply.cuh
colossalai/kernel/extensions/csrc/kernel/cuda/multi_tensor_l2norm_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/multi_tensor_lamb_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/multi_tensor_scale_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/multi_tensor_sgd_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/rms_layernorm_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/scaled_masked_softmax_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/scaled_upper_triang_masked_softmax_kernel.cu
colossalai/kernel/extensions/csrc/kernel/cuda/attention/attention_utils.h
colossalai/kernel/extensions/csrc/kernel/cuda/utils/gpu_launch_config.h
colossalai/kernel/extensions/csrc/kernel/cuda/utils/micros.h
colossalai/kernel/extensions/csrc/kernel/cuda/utils/nvgpu_dev_info.h
colossalai/kernel/extensions/csrc/kernel/cuda/utils/vec_copy.h
colossalai/kernel/extensions/csrc/kernel/x86/cpu_adam.cpp
colossalai/kernel/extensions/csrc/kernel/x86/cpu_adam.h
colossalai/kernel/extensions/pybind/__init__.py
colossalai/kernel/extensions/pybind/cpu_adam/__init__.py
colossalai/kernel/extensions/pybind/cpu_adam/cpu_adam_arm.py
colossalai/kernel/extensions/pybind/cpu_adam/cpu_adam_x86.py
colossalai/kernel/extensions/pybind/flash_attention/__init__.py
colossalai/kernel/extensions/pybind/flash_attention/flash_attention_dao_cuda.py
colossalai/kernel/extensions/pybind/flash_attention/flash_attention_npu.py
colossalai/kernel/extensions/pybind/flash_attention/flash_attention_sdpa_cuda.py
colossalai/kernel/extensions/pybind/inference/__init__.py
colossalai/kernel/extensions/pybind/inference/inference.cpp
colossalai/kernel/extensions/pybind/inference/inference_ops_cuda.py
colossalai/kernel/extensions/pybind/layernorm/__init__.py
colossalai/kernel/extensions/pybind/layernorm/layer_norm.cpp
colossalai/kernel/extensions/pybind/layernorm/layernorm_cuda.py
colossalai/kernel/extensions/pybind/moe/__init__.py
colossalai/kernel/extensions/pybind/moe/moe.cpp
colossalai/kernel/extensions/pybind/moe/moe_cuda.py
colossalai/kernel/extensions/pybind/optimizer/__init__.py
colossalai/kernel/extensions/pybind/optimizer/fused_optimizer_cuda.py
colossalai/kernel/extensions/pybind/optimizer/optimizer.cpp
colossalai/kernel/extensions/pybind/softmax/__init__.py
colossalai/kernel/extensions/pybind/softmax/scaled_masked_softmax.cpp
colossalai/kernel/extensions/pybind/softmax/scaled_masked_softmax_cuda.py
colossalai/kernel/extensions/pybind/softmax/scaled_upper_triang_masked_softmax.cpp
colossalai/kernel/extensions/pybind/softmax/scaled_upper_triangle_masked_softmax_cuda.py
colossalai/kernel/jit/__init__.py
colossalai/kernel/jit/bias_dropout_add.py
colossalai/kernel/jit/bias_gelu.py
colossalai/kernel/jit/option.py
colossalai/kernel/triton/__init__.py
colossalai/kernel/triton/context_attn_unpad.py
colossalai/kernel/triton/flash_decoding.py
colossalai/kernel/triton/fused_rotary_embedding.py
colossalai/kernel/triton/kvcache_copy.py
colossalai/kernel/triton/llama_act_combine_kernel.py
colossalai/kernel/triton/no_pad_rotary_embedding.py
colossalai/kernel/triton/qkv_matmul_kernel.py
colossalai/kernel/triton/rms_layernorm.py
colossalai/kernel/triton/rotary_cache_copy.py
colossalai/kernel/triton/softmax.py
colossalai/lazy/__init__.py
colossalai/lazy/construction.py
colossalai/lazy/lazy_init.py
colossalai/lazy/pretrained.py
colossalai/legacy/__init__.py
colossalai/legacy/constants.py
colossalai/legacy/core.py
colossalai/legacy/global_variables.py
colossalai/legacy/initialize.py
colossalai/legacy/amp/__init__.py
colossalai/legacy/amp/amp_type.py
colossalai/legacy/amp/apex_amp/__init__.py
colossalai/legacy/amp/apex_amp/apex_amp.py
colossalai/legacy/amp/naive_amp/__init__.py
colossalai/legacy/amp/naive_amp/_fp16_optimizer.py
colossalai/legacy/amp/naive_amp/_utils.py
colossalai/legacy/amp/naive_amp/naive_amp.py
colossalai/legacy/amp/torch_amp/__init__.py
colossalai/legacy/amp/torch_amp/_grad_scaler.py
colossalai/legacy/amp/torch_amp/torch_amp.py
colossalai/legacy/builder/__init__.py
colossalai/legacy/builder/builder.py
colossalai/legacy/communication/__init__.py
colossalai/legacy/communication/collective.py
colossalai/legacy/communication/p2p.py
colossalai/legacy/communication/p2p_v2.py
colossalai/legacy/communication/ring.py
colossalai/legacy/communication/utils.py
colossalai/legacy/context/__init__.py
colossalai/legacy/context/parallel_context.py
colossalai/legacy/context/parallel_mode.py
colossalai/legacy/context/process_group_initializer/__init__.py
colossalai/legacy/context/process_group_initializer/initializer_1d.py
colossalai/legacy/context/process_group_initializer/initializer_2d.py
colossalai/legacy/context/process_group_initializer/initializer_2p5d.py
colossalai/legacy/context/process_group_initializer/initializer_3d.py
colossalai/legacy/context/process_group_initializer/initializer_data.py
colossalai/legacy/context/process_group_initializer/initializer_model.py
colossalai/legacy/context/process_group_initializer/initializer_pipeline.py
colossalai/legacy/context/process_group_initializer/initializer_sequence.py
colossalai/legacy/context/process_group_initializer/initializer_tensor.py
colossalai/legacy/context/process_group_initializer/process_group_initializer.py
colossalai/legacy/context/random/__init__.py
colossalai/legacy/context/random/_helper.py
colossalai/legacy/context/random/seed_manager.py
colossalai/legacy/engine/__init__.py
colossalai/legacy/engine/_base_engine.py
colossalai/legacy/engine/gradient_accumulation/__init__.py
colossalai/legacy/engine/gradient_accumulation/_gradient_accumulation.py
colossalai/legacy/engine/gradient_handler/__init__.py
colossalai/legacy/engine/gradient_handler/_base_gradient_handler.py
colossalai/legacy/engine/gradient_handler/_data_parallel_gradient_handler.py
colossalai/legacy/engine/gradient_handler/_moe_gradient_handler.py
colossalai/legacy/engine/gradient_handler/_pipeline_parallel_gradient_handler.py
colossalai/legacy/engine/gradient_handler/_sequence_parallel_gradient_handler.py
colossalai/legacy/engine/gradient_handler/_zero_gradient_handler.py
colossalai/legacy/engine/gradient_handler/utils.py
colossalai/legacy/engine/schedule/__init__.py
colossalai/legacy/engine/schedule/_base_schedule.py
colossalai/legacy/engine/schedule/_non_pipeline_schedule.py
colossalai/legacy/engine/schedule/_pipeline_schedule.py
colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py
colossalai/legacy/inference/__init__.py
colossalai/legacy/inference/async_engine.py
colossalai/legacy/inference/async_manager.py
colossalai/legacy/inference/manager.py
colossalai/legacy/inference/dynamic_batching/__init__.py
colossalai/legacy/inference/dynamic_batching/get_tokenizer.py
colossalai/legacy/inference/dynamic_batching/infer_batch.py
colossalai/legacy/inference/dynamic_batching/io_struct.py
colossalai/legacy/inference/dynamic_batching/ray_dist_init.py
colossalai/legacy/inference/dynamic_batching/ray_init_config.py
colossalai/legacy/inference/dynamic_batching/req_queue.py
colossalai/legacy/inference/dynamic_batching/sampling_params.py
colossalai/legacy/inference/dynamic_batching/stats.py
colossalai/legacy/inference/hybridengine/__init__.py
colossalai/legacy/inference/hybridengine/engine.py
colossalai/legacy/inference/hybridengine/modeling/__init__.py
colossalai/legacy/inference/hybridengine/modeling/_utils.py
colossalai/legacy/inference/hybridengine/modeling/llama.py
colossalai/legacy/inference/hybridengine/polices/__init__.py
colossalai/legacy/inference/hybridengine/polices/llama.py
colossalai/legacy/inference/pipeline/__init__.py
colossalai/legacy/inference/pipeline/microbatch_manager.py
colossalai/legacy/inference/tensor_parallel/__init__.py
colossalai/legacy/inference/tensor_parallel/batch_infer_state.py
colossalai/legacy/inference/tensor_parallel/engine.py
colossalai/legacy/inference/tensor_parallel/kvcache_manager.py
colossalai/legacy/inference/tensor_parallel/modeling/__init__.py
colossalai/legacy/inference/tensor_parallel/modeling/_utils.py
colossalai/legacy/inference/tensor_parallel/modeling/bloom.py
colossalai/legacy/inference/tensor_parallel/modeling/chatglm2.py
colossalai/legacy/inference/tensor_parallel/modeling/llama.py
colossalai/legacy/inference/tensor_parallel/policies/__init__.py
colossalai/legacy/inference/tensor_parallel/policies/bloom.py
colossalai/legacy/inference/tensor_parallel/policies/chatglm2.py
colossalai/legacy/inference/tensor_parallel/policies/llama.py
colossalai/legacy/nn/__init__.py
colossalai/legacy/nn/_ops/__init__.py
colossalai/legacy/nn/_ops/_utils.py
colossalai/legacy/nn/layer/__init__.py
colossalai/legacy/nn/layer/base_layer.py
colossalai/legacy/nn/layer/colossalai_layer/__init__.py
colossalai/legacy/nn/layer/colossalai_layer/_utils.py
colossalai/legacy/nn/layer/colossalai_layer/dropout.py
colossalai/legacy/nn/layer/colossalai_layer/embedding.py
colossalai/legacy/nn/layer/colossalai_layer/linear.py
colossalai/legacy/nn/layer/colossalai_layer/normalization.py
colossalai/legacy/nn/layer/parallel_1d/__init__.py
colossalai/legacy/nn/layer/parallel_1d/_operation.py
colossalai/legacy/nn/layer/parallel_1d/_utils.py
colossalai/legacy/nn/layer/parallel_1d/layers.py
colossalai/legacy/nn/layer/parallel_2d/__init__.py
colossalai/legacy/nn/layer/parallel_2d/_operation.py
colossalai/legacy/nn/layer/parallel_2d/_utils.py
colossalai/legacy/nn/layer/parallel_2d/layers.py
colossalai/legacy/nn/layer/parallel_2p5d/__init__.py
colossalai/legacy/nn/layer/parallel_2p5d/_operation.py
colossalai/legacy/nn/layer/parallel_2p5d/_utils.py
colossalai/legacy/nn/layer/parallel_2p5d/layers.py
colossalai/legacy/nn/layer/parallel_3d/__init__.py
colossalai/legacy/nn/layer/parallel_3d/_operation.py
colossalai/legacy/nn/layer/parallel_3d/_utils.py
colossalai/legacy/nn/layer/parallel_3d/layers.py
colossalai/legacy/nn/layer/parallel_sequence/__init__.py
colossalai/legacy/nn/layer/parallel_sequence/_operation.py
colossalai/legacy/nn/layer/parallel_sequence/_utils.py
colossalai/legacy/nn/layer/parallel_sequence/layers.py
colossalai/legacy/nn/layer/utils/__init__.py
colossalai/legacy/nn/layer/utils/common.py
colossalai/legacy/nn/layer/vanilla/__init__.py
colossalai/legacy/nn/layer/vanilla/layers.py
colossalai/legacy/nn/layer/wrapper/__init__.py
colossalai/legacy/nn/layer/wrapper/pipeline_wrapper.py
colossalai/legacy/nn/loss/__init__.py
colossalai/legacy/nn/loss/loss_1d.py
colossalai/legacy/nn/loss/loss_2d.py
colossalai/legacy/nn/loss/loss_2p5d.py
colossalai/legacy/nn/loss/loss_3d.py
colossalai/legacy/nn/metric/__init__.py
colossalai/legacy/nn/metric/_utils.py
colossalai/legacy/nn/metric/accuracy_2d.py
colossalai/legacy/nn/metric/accuracy_2p5d.py
colossalai/legacy/nn/metric/accuracy_3d.py
colossalai/legacy/nn/parallel/__init__.py
colossalai/legacy/nn/parallel/data_parallel.py
colossalai/legacy/nn/parallel/reducer.py
colossalai/legacy/nn/parallel/layers/__init__.py
colossalai/legacy/nn/parallel/layers/colo_module.py
colossalai/legacy/nn/parallel/layers/embedding.py
colossalai/legacy/nn/parallel/layers/linear.py
colossalai/legacy/nn/parallel/layers/module_utils.py
colossalai/legacy/nn/parallel/layers/cache_embedding/__init__.py
colossalai/legacy/nn/parallel/layers/cache_embedding/base_embedding.py
colossalai/legacy/nn/parallel/layers/cache_embedding/cache_mgr.py
colossalai/legacy/nn/parallel/layers/cache_embedding/cached_embedding.py
colossalai/legacy/nn/parallel/layers/cache_embedding/copyer.py
colossalai/legacy/nn/parallel/layers/cache_embedding/embedding_config.py
colossalai/legacy/nn/parallel/layers/cache_embedding/parallel_cached_embedding.py
colossalai/legacy/nn/parallel/layers/cache_embedding/parallel_cached_embedding_tablewise.py
colossalai/legacy/nn/parallel/layers/cache_embedding/parallel_cached_embedding_tablewise_split_cache.py
colossalai/legacy/pipeline/__init__.py
colossalai/legacy/pipeline/layer_spec.py
colossalai/legacy/pipeline/pipelinable.py
colossalai/legacy/pipeline/pipeline_process_group.py
colossalai/legacy/pipeline/utils.py
colossalai/legacy/pipeline/middleware/__init__.py
colossalai/legacy/pipeline/middleware/topo.py
colossalai/legacy/pipeline/middleware/adaptor/__init__.py
colossalai/legacy/pipeline/middleware/adaptor/fx.py
colossalai/legacy/pipeline/rpc/__init__.py
colossalai/legacy/pipeline/rpc/_pipeline_base.py
colossalai/legacy/pipeline/rpc/_pipeline_schedule.py
colossalai/legacy/pipeline/rpc/utils.py
colossalai/legacy/registry/__init__.py
colossalai/legacy/registry/registry.py
colossalai/legacy/tensor/__init__.py
colossalai/legacy/tensor/compute_spec.py
colossalai/legacy/tensor/const.py
colossalai/legacy/tensor/dist_spec_mgr.py
colossalai/legacy/tensor/distspec.py
colossalai/legacy/tensor/op_wrapper.py
colossalai/legacy/tensor/process_group.py
colossalai/legacy/tensor/tensor_spec.py
colossalai/legacy/trainer/__init__.py
colossalai/legacy/trainer/_trainer.py
colossalai/legacy/trainer/hooks/__init__.py
colossalai/legacy/trainer/hooks/_base_hook.py
colossalai/legacy/trainer/hooks/_checkpoint_hook.py
colossalai/legacy/trainer/hooks/_commons_.py
colossalai/legacy/trainer/hooks/_log_hook.py
colossalai/legacy/trainer/hooks/_lr_scheduler_hook.py
colossalai/legacy/trainer/hooks/_metric_hook.py
colossalai/legacy/utils/__init__.py
colossalai/legacy/utils/activation_checkpoint.py
colossalai/legacy/utils/checkpointing.py
colossalai/legacy/utils/common.py
colossalai/legacy/utils/memory.py
colossalai/legacy/utils/checkpoint/__init__.py
colossalai/legacy/utils/checkpoint/module_checkpoint.py
colossalai/legacy/utils/checkpoint/utils.py
colossalai/legacy/utils/data_sampler/__init__.py
colossalai/legacy/utils/data_sampler/base_sampler.py
colossalai/legacy/utils/data_sampler/data_parallel_sampler.py
colossalai/legacy/utils/profiler/__init__.py
colossalai/legacy/utils/profiler/extention.py
colossalai/legacy/utils/profiler/profiler.py
colossalai/legacy/utils/profiler/stateful_tensor_mem_extention.py
colossalai/legacy/utils/profiler/legacy/__init__.py
colossalai/legacy/utils/profiler/legacy/comm_profiler.py
colossalai/legacy/utils/profiler/legacy/pcie_profiler.py
colossalai/legacy/utils/profiler/legacy/prof_utils.py
colossalai/legacy/zero/__init__.py
colossalai/legacy/zero/gemini/__init__.py
colossalai/legacy/zero/gemini/colo_init_context.py
colossalai/legacy/zero/gemini/gemini_context.py
colossalai/legacy/zero/gemini/stateful_tensor.py
colossalai/legacy/zero/gemini/stateful_tensor_mgr.py
colossalai/legacy/zero/gemini/tensor_placement_policy.py
colossalai/legacy/zero/gemini/tensor_utils.py
colossalai/legacy/zero/gemini/ophooks/__init__.py
colossalai/legacy/zero/gemini/ophooks/_shard_grad_ophook.py
colossalai/legacy/zero/gemini/ophooks/_shard_param_ophook.py
colossalai/legacy/zero/gemini/ophooks/runtime_mem_tracer_hook.py
colossalai/legacy/zero/gemini/ophooks/utils.py
colossalai/legacy/zero/gemini/paramhooks/__init__.py
colossalai/legacy/zero/gemini/paramhooks/_param_hookmgr.py
colossalai/legacy/zero/init_ctx/__init__.py
colossalai/legacy/zero/init_ctx/init_context.py
colossalai/legacy/zero/shard_utils/__init__.py
colossalai/legacy/zero/shard_utils/base_shard_strategy.py
colossalai/legacy/zero/shard_utils/bucket_tensor_shard_strategy.py
colossalai/legacy/zero/shard_utils/commons.py
colossalai/legacy/zero/shard_utils/tensor_shard_strategy.py
colossalai/legacy/zero/sharded_model/__init__.py
colossalai/legacy/zero/sharded_model/_utils.py
colossalai/legacy/zero/sharded_model/reduce_scatter.py
colossalai/legacy/zero/sharded_model/sharded_model_v2.py
colossalai/legacy/zero/sharded_model/utils.py
colossalai/legacy/zero/sharded_model/zero_hook.py
colossalai/legacy/zero/sharded_optim/__init__.py
colossalai/legacy/zero/sharded_optim/sharded_optim_v2.py
colossalai/legacy/zero/sharded_param/__init__.py
colossalai/legacy/zero/sharded_param/sharded_param.py
colossalai/legacy/zero/sharded_param/sharded_tensor.py
colossalai/logging/__init__.py
colossalai/logging/logger.py
colossalai/moe/__init__.py
colossalai/moe/_operation.py
colossalai/moe/checkpoint.py
colossalai/moe/experts.py
colossalai/moe/layers.py
colossalai/moe/load_balance.py
colossalai/moe/loss.py
colossalai/moe/manager.py
colossalai/moe/routers.py
colossalai/moe/utils.py
colossalai/nn/__init__.py
colossalai/nn/init.py
colossalai/nn/layer/__init__.py
colossalai/nn/layer/layernorm.py
colossalai/nn/layer/scaled_softmax.py
colossalai/nn/layer/utils.py
colossalai/nn/loss/__init__.py
colossalai/nn/lr_scheduler/__init__.py
colossalai/nn/lr_scheduler/cosine.py
colossalai/nn/lr_scheduler/delayed.py
colossalai/nn/lr_scheduler/linear.py
colossalai/nn/lr_scheduler/multistep.py
colossalai/nn/lr_scheduler/onecycle.py
colossalai/nn/lr_scheduler/poly.py
colossalai/nn/lr_scheduler/torch.py
colossalai/nn/optimizer/__init__.py
colossalai/nn/optimizer/adafactor.py
colossalai/nn/optimizer/came.py
colossalai/nn/optimizer/cpu_adam.py
colossalai/nn/optimizer/distributed_adafactor.py
colossalai/nn/optimizer/distributed_came.py
colossalai/nn/optimizer/distributed_galore.py
colossalai/nn/optimizer/distributed_lamb.py
colossalai/nn/optimizer/fused_adam.py
colossalai/nn/optimizer/fused_lamb.py
colossalai/nn/optimizer/fused_sgd.py
colossalai/nn/optimizer/galore.py
colossalai/nn/optimizer/hybrid_adam.py
colossalai/nn/optimizer/lamb.py
colossalai/nn/optimizer/lars.py
colossalai/nn/optimizer/nvme_optimizer.py
colossalai/pipeline/__init__.py
colossalai/pipeline/p2p.py
colossalai/pipeline/stage_manager.py
colossalai/pipeline/schedule/__init__.py
colossalai/pipeline/schedule/_utils.py
colossalai/pipeline/schedule/base.py
colossalai/pipeline/schedule/generate.py
colossalai/pipeline/schedule/interleaved_pp.py
colossalai/pipeline/schedule/one_f_one_b.py
colossalai/quantization/__init__.py
colossalai/quantization/bnb.py
colossalai/quantization/bnb_config.py
colossalai/shardformer/__init__.py
colossalai/shardformer/_utils.py
colossalai/shardformer/layer/__init__.py
colossalai/shardformer/layer/_operation.py
colossalai/shardformer/layer/attn.py
colossalai/shardformer/layer/dropout.py
colossalai/shardformer/layer/embedding.py
colossalai/shardformer/layer/linear.py
colossalai/shardformer/layer/loss.py
colossalai/shardformer/layer/normalization.py
colossalai/shardformer/layer/parallel_module.py
colossalai/shardformer/layer/qkv_fused_linear.py
colossalai/shardformer/layer/utils.py
colossalai/shardformer/modeling/__init__.py
colossalai/shardformer/modeling/bert.py
colossalai/shardformer/modeling/blip2.py
colossalai/shardformer/modeling/bloom.py
colossalai/shardformer/modeling/chatglm2.py
colossalai/shardformer/modeling/command.py
colossalai/shardformer/modeling/falcon.py
colossalai/shardformer/modeling/gpt2.py
colossalai/shardformer/modeling/gptj.py
colossalai/shardformer/modeling/jit.py
colossalai/shardformer/modeling/llama.py
colossalai/shardformer/modeling/mistral.py
colossalai/shardformer/modeling/opt.py
colossalai/shardformer/modeling/qwen2.py
colossalai/shardformer/modeling/sam.py
colossalai/shardformer/modeling/t5.py
colossalai/shardformer/modeling/vit.py
colossalai/shardformer/modeling/whisper.py
colossalai/shardformer/modeling/chatglm2_6b/__init__.py
colossalai/shardformer/modeling/chatglm2_6b/configuration_chatglm.py
colossalai/shardformer/modeling/chatglm2_6b/modeling_chatglm.py
colossalai/shardformer/policies/__init__.py
colossalai/shardformer/policies/auto_policy.py
colossalai/shardformer/policies/base_policy.py
colossalai/shardformer/policies/bert.py
colossalai/shardformer/policies/blip2.py
colossalai/shardformer/policies/bloom.py
colossalai/shardformer/policies/chatglm2.py
colossalai/shardformer/policies/command.py
colossalai/shardformer/policies/falcon.py
colossalai/shardformer/policies/gpt2.py
colossalai/shardformer/policies/gptj.py
colossalai/shardformer/policies/llama.py
colossalai/shardformer/policies/mistral.py
colossalai/shardformer/policies/opt.py
colossalai/shardformer/policies/qwen2.py
colossalai/shardformer/policies/sam.py
colossalai/shardformer/policies/t5.py
colossalai/shardformer/policies/vit.py
colossalai/shardformer/policies/whisper.py
colossalai/shardformer/shard/__init__.py
colossalai/shardformer/shard/grad_ckpt_config.py
colossalai/shardformer/shard/shard_config.py
colossalai/shardformer/shard/sharder.py
colossalai/shardformer/shard/shardformer.py
colossalai/shardformer/shard/utils.py
colossalai/tensor/__init__.py
colossalai/tensor/colo_parameter.py
colossalai/tensor/colo_tensor.py
colossalai/tensor/comm_spec.py
colossalai/tensor/param_op_hook.py
colossalai/tensor/shape_consistency.py
colossalai/tensor/sharding_spec.py
colossalai/tensor/utils.py
colossalai/tensor/d_tensor/__init__.py
colossalai/tensor/d_tensor/api.py
colossalai/tensor/d_tensor/comm_spec.py
colossalai/tensor/d_tensor/layout.py
colossalai/tensor/d_tensor/layout_converter.py
colossalai/tensor/d_tensor/misc.py
colossalai/tensor/d_tensor/sharding_spec.py
colossalai/tensor/d_tensor/utils.py
colossalai/tensor/moe_tensor/__init__.py
colossalai/tensor/moe_tensor/api.py
colossalai/tensor/moe_tensor/moe_info.py
colossalai/tensor/padded_tensor/__init__.py
colossalai/tensor/padded_tensor/api.py
colossalai/testing/__init__.py
colossalai/testing/comparison.py
colossalai/testing/pytest_wrapper.py
colossalai/testing/random.py
colossalai/testing/utils.py
colossalai/utils/__init__.py
colossalai/utils/common.py
colossalai/utils/memory.py
colossalai/utils/timer.py
colossalai/utils/model/__init__.py
colossalai/utils/model/utils.py
colossalai/utils/multi_tensor_apply/__init__.py
colossalai/utils/multi_tensor_apply/multi_tensor_apply.py
colossalai/utils/rank_recorder/__init__.py
colossalai/utils/rank_recorder/rank_recorder.py
colossalai/utils/tensor_detector/__init__.py
colossalai/utils/tensor_detector/tensor_detector.py
colossalai/zero/__init__.py
colossalai/zero/wrapper.py
colossalai/zero/gemini/__init__.py
colossalai/zero/gemini/gemini_ddp.py
colossalai/zero/gemini/gemini_hook.py
colossalai/zero/gemini/gemini_mgr.py
colossalai/zero/gemini/gemini_optimizer.py
colossalai/zero/gemini/placement_policy.py
colossalai/zero/gemini/utils.py
colossalai/zero/gemini/chunk/__init__.py
colossalai/zero/gemini/chunk/chunk.py
colossalai/zero/gemini/chunk/manager.py
colossalai/zero/gemini/chunk/search_utils.py
colossalai/zero/gemini/chunk/utils.py
colossalai/zero/gemini/memory_tracer/__init__.py
colossalai/zero/gemini/memory_tracer/chunk_memstats_collector.py
colossalai/zero/gemini/memory_tracer/memory_monitor.py
colossalai/zero/gemini/memory_tracer/memory_stats.py
colossalai/zero/gemini/memory_tracer/memstats_collector.py
colossalai/zero/gemini/memory_tracer/param_runtime_order.py
colossalai/zero/gemini/memory_tracer/runtime_mem_tracer.py
colossalai/zero/gemini/memory_tracer/static_memstats_collector.py
colossalai/zero/gemini/memory_tracer/utils.py
colossalai/zero/low_level/__init__.py
colossalai/zero/low_level/_utils.py
colossalai/zero/low_level/low_level_optim.py
colossalai/zero/low_level/bookkeeping/__init__.py
colossalai/zero/low_level/bookkeeping/base_store.py
colossalai/zero/low_level/bookkeeping/bucket_store.py
colossalai/zero/low_level/bookkeeping/gradient_store.py
colossalai/zero/low_level/bookkeeping/parameter_store.py
colossalai/zero/low_level/bookkeeping/tensor_bucket.py
examples/language/__init__.py
examples/language/data_utils.py
examples/language/model_utils.py
examples/language/performance_evaluator.py
extensions/__init__.py
extensions/base_extension.py
extensions/cpp_extension.py
extensions/cuda_extension.py
extensions/triton_extension.py
extensions/utils.py
extensions/csrc/__init__.py
extensions/csrc/common/data_type.h
extensions/csrc/common/micros.h
extensions/csrc/common/mp_type_traits.h
extensions/csrc/common/target.h
extensions/csrc/common/vec_type_traits.h
extensions/csrc/funcs/binary_functor.h
extensions/csrc/funcs/cast_functor.h
extensions/csrc/funcs/reduce_function.h
extensions/csrc/funcs/ternary_functor.h
extensions/csrc/funcs/unary_functor.h
extensions/csrc/kernel/arm/cpu_adam_arm.cpp
extensions/csrc/kernel/arm/cpu_adam_arm.h
extensions/csrc/kernel/cuda/activation_kernel.cu
extensions/csrc/kernel/cuda/context_kv_cache_memcpy_kernel.cu
extensions/csrc/kernel/cuda/convert_fp8_kernel.cu
extensions/csrc/kernel/cuda/decode_kv_cache_memcpy_kernel.cu
extensions/csrc/kernel/cuda/flash_decoding_attention_kernel.cu
extensions/csrc/kernel/cuda/fused_rotary_emb_and_cache_kernel.cu
extensions/csrc/kernel/cuda/get_cos_and_sin_kernel.cu
extensions/csrc/kernel/cuda/layer_norm_kernel.cu
extensions/csrc/kernel/cuda/moe_kernel.cu
extensions/csrc/kernel/cuda/multi_tensor_adam_kernel.cu
extensions/csrc/kernel/cuda/multi_tensor_apply.cuh
extensions/csrc/kernel/cuda/multi_tensor_l2norm_kernel.cu
extensions/csrc/kernel/cuda/multi_tensor_lamb_kernel.cu
extensions/csrc/kernel/cuda/multi_tensor_scale_kernel.cu
extensions/csrc/kernel/cuda/multi_tensor_sgd_kernel.cu
extensions/csrc/kernel/cuda/rms_layernorm_kernel.cu
extensions/csrc/kernel/cuda/scaled_masked_softmax_kernel.cu
extensions/csrc/kernel/cuda/scaled_upper_triang_masked_softmax_kernel.cu
extensions/csrc/kernel/cuda/attention/attention_utils.h
extensions/csrc/kernel/cuda/utils/gpu_launch_config.h
extensions/csrc/kernel/cuda/utils/micros.h
extensions/csrc/kernel/cuda/utils/nvgpu_dev_info.h
extensions/csrc/kernel/cuda/utils/vec_copy.h
extensions/csrc/kernel/x86/cpu_adam.cpp
extensions/csrc/kernel/x86/cpu_adam.h
extensions/pybind/__init__.py
extensions/pybind/cpu_adam/__init__.py
extensions/pybind/cpu_adam/cpu_adam_arm.py
extensions/pybind/cpu_adam/cpu_adam_x86.py
extensions/pybind/flash_attention/__init__.py
extensions/pybind/flash_attention/flash_attention_dao_cuda.py
extensions/pybind/flash_attention/flash_attention_npu.py
extensions/pybind/flash_attention/flash_attention_sdpa_cuda.py
extensions/pybind/inference/__init__.py
extensions/pybind/inference/inference.cpp
extensions/pybind/inference/inference_ops_cuda.py
extensions/pybind/layernorm/__init__.py
extensions/pybind/layernorm/layer_norm.cpp
extensions/pybind/layernorm/layernorm_cuda.py
extensions/pybind/moe/__init__.py
extensions/pybind/moe/moe.cpp
extensions/pybind/moe/moe_cuda.py
extensions/pybind/optimizer/__init__.py
extensions/pybind/optimizer/fused_optimizer_cuda.py
extensions/pybind/optimizer/optimizer.cpp
extensions/pybind/softmax/__init__.py
extensions/pybind/softmax/scaled_masked_softmax.cpp
extensions/pybind/softmax/scaled_masked_softmax_cuda.py
extensions/pybind/softmax/scaled_upper_triang_masked_softmax.cpp
extensions/pybind/softmax/scaled_upper_triangle_masked_softmax_cuda.py
requirements/requirements-test.txt
requirements/requirements.txt
tests/kit/__init__.py
tests/kit/model_zoo/__init__.py
tests/kit/model_zoo/executor.py
tests/kit/model_zoo/registry.py
tests/kit/model_zoo/custom/__init__.py
tests/kit/model_zoo/custom/base.py
tests/kit/model_zoo/custom/hanging_param_model.py
tests/kit/model_zoo/custom/nested_model.py
tests/kit/model_zoo/custom/repeated_computed_layers.py
tests/kit/model_zoo/custom/simple_mlp.py
tests/kit/model_zoo/custom/simple_net.py
tests/kit/model_zoo/diffusers/__init__.py
tests/kit/model_zoo/diffusers/diffusers.py
tests/kit/model_zoo/timm/__init__.py
tests/kit/model_zoo/timm/timm.py
tests/kit/model_zoo/torchaudio/__init__.py
tests/kit/model_zoo/torchaudio/torchaudio.py
tests/kit/model_zoo/torchrec/__init__.py
tests/kit/model_zoo/torchrec/torchrec.py
tests/kit/model_zoo/torchvision/__init__.py
tests/kit/model_zoo/torchvision/torchvision.py
tests/kit/model_zoo/transformers/__init__.py
tests/kit/model_zoo/transformers/albert.py
tests/kit/model_zoo/transformers/bert.py
tests/kit/model_zoo/transformers/blip2.py
tests/kit/model_zoo/transformers/bloom.py
tests/kit/model_zoo/transformers/chatglm2.py
tests/kit/model_zoo/transformers/command.py
tests/kit/model_zoo/transformers/falcon.py
tests/kit/model_zoo/transformers/gpt.py
tests/kit/model_zoo/transformers/gptj.py
tests/kit/model_zoo/transformers/llama.py
tests/kit/model_zoo/transformers/mistral.py
tests/kit/model_zoo/transformers/opt.py
tests/kit/model_zoo/transformers/qwen2.py
tests/kit/model_zoo/transformers/sam.py
tests/kit/model_zoo/transformers/t5.py
tests/kit/model_zoo/transformers/vit.py
tests/kit/model_zoo/transformers/whisper.py
tests/test_analyzer/__init__.py
tests/test_analyzer/test_fx/__init__.py
tests/test_analyzer/test_fx/test_bias_addition.py
tests/test_analyzer/test_fx/test_mod_dir.py
tests/test_analyzer/test_fx/test_nested_ckpt.py
tests/test_analyzer/test_fx/test_shape_prop.py
tests/test_analyzer/test_fx/test_symbolic_profile.py
tests/test_analyzer/test_fx/zoo.py
tests/test_analyzer/test_subclasses/__init__.py
tests/test_analyzer/test_subclasses/test_aten.py
tests/test_analyzer/test_subclasses/test_flop_tensor.py
tests/test_analyzer/test_subclasses/test_meta_mode.py
tests/test_auto_parallel/__init__.py
tests/test_auto_parallel/test_pass/__init__.py
tests/test_auto_parallel/test_pass/test_node_converting_pass.py
tests/test_auto_parallel/test_pass/test_size_value_converting_pass.py
tests/test_auto_parallel/test_tensor_shard/__init__.py
tests/test_auto_parallel/test_tensor_shard/test_bias_addition_forward.py
tests/test_auto_parallel/test_tensor_shard/test_broadcast.py
tests/test_auto_parallel/test_tensor_shard/test_checkpoint.py
tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_ddp.py
tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_gemini.py
tests/test_auto_parallel/test_tensor_shard/test_find_repeat_block.py
tests/test_auto_parallel/test_tensor_shard/test_liveness_analysis.py
tests/test_auto_parallel/test_tensor_shard/test_solver_with_resnet_v2.py
tests/test_auto_parallel/test_tensor_shard/test_gpt/__init__.py
tests/test_auto_parallel/test_tensor_shard/test_gpt/gpt_modules.py
tests/test_auto_parallel/test_tensor_shard/test_gpt/test_runtime_with_gpt_modules.py
tests/test_auto_parallel/test_tensor_shard/test_gpt/test_solver_with_gpt_module.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/__init__.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addbmm_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addmm_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_batch_norm_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_function_node.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_module_node.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_binary_elementwise_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bmm_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_conv_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_default_reshape_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_embedding_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_getattr_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_getitem_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_layer_norm_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_linear_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_matmul_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_norm_pooling_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_output_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_permute_and_transpose_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_placeholder_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_shard_option.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_softmax_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_split_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_sum_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_tensor_constructor.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_unary_element_wise_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_view_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_where_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/utils.py
tests/test_infer/__init__.py
tests/test_infer/_utils.py
tests/test_infer/test_batch_bucket.py
tests/test_infer/test_config_and_struct.py
tests/test_infer/test_continuous_batching.py
tests/test_infer/test_cuda_graph.py
tests/test_infer/test_drafter.py
tests/test_infer/test_inference_engine.py
tests/test_infer/test_kvcache_manager.py
tests/test_infer/test_request_handler.py
tests/test_infer/test_rpc_engine.py
tests/test_infer/test_streamingllm.py
tests/test_infer/test_kernels/__init__.py
tests/test_infer/test_kernels/cuda/__init__.py
tests/test_infer/test_kernels/cuda/test_convert_fp8.py
tests/test_infer/test_kernels/cuda/test_flash_decoding_attention.py
tests/test_infer/test_kernels/cuda/test_get_cos_and_sin.py
tests/test_infer/test_kernels/cuda/test_kv_cache_memcpy.py
tests/test_infer/test_kernels/cuda/test_rms_layernorm.py
tests/test_infer/test_kernels/cuda/test_rotary_embdding_unpad.py
tests/test_infer/test_kernels/cuda/test_silu_and_mul.py
tests/test_infer/test_kernels/triton/__init__.py
tests/test_infer/test_kernels/triton/kernel_utils.py
tests/test_infer/test_kernels/triton/test_context_attn_unpad.py
tests/test_infer/test_kernels/triton/test_decoding_attn.py
tests/test_infer/test_kernels/triton/test_fused_rotary_embedding.py
tests/test_infer/test_kernels/triton/test_kvcache_copy.py
tests/test_infer/test_kernels/triton/test_rmsnorm_triton.py
tests/test_infer/test_kernels/triton/test_rotary_embdding_unpad.py
tests/test_infer/test_kernels/triton/test_xine_copy.py
tests/test_shardformer/__init__.py
tests/test_shardformer/test_flash_attention.py
tests/test_shardformer/test_shard_utils.py
tests/test_shardformer/test_with_torch_ddp.py
tests/test_shardformer/test_model/__init__.py
tests/test_shardformer/test_model/_utils.py
tests/test_shardformer/test_model/test_shard_bert.py
tests/test_shardformer/test_model/test_shard_blip2.py
tests/test_shardformer/test_model/test_shard_bloom.py
tests/test_shardformer/test_model/test_shard_chatglm2.py
tests/test_shardformer/test_model/test_shard_command.py
tests/test_shardformer/test_model/test_shard_falcon.py
tests/test_shardformer/test_model/test_shard_gpt2.py
tests/test_shardformer/test_model/test_shard_gptj.py
tests/test_shardformer/test_model/test_shard_llama.py
tests/test_shardformer/test_model/test_shard_mistral.py
tests/test_shardformer/test_model/test_shard_opt.py
tests/test_shardformer/test_model/test_shard_qwen2.py
tests/test_shardformer/test_model/test_shard_sam.py
tests/test_shardformer/test_model/test_shard_t5.py
tests/test_shardformer/test_model/test_shard_vit.py
tests/test_shardformer/test_model/test_shard_whisper.py