LICENSE
MANIFEST.in
README.md
setup.py
version.txt
colossalai/__init__.py
colossalai/constants.py
colossalai/core.py
colossalai/global_variables.py
colossalai/initialize.py
colossalai/version.py
colossalai.egg-info/PKG-INFO
colossalai.egg-info/SOURCES.txt
colossalai.egg-info/dependency_links.txt
colossalai.egg-info/entry_points.txt
colossalai.egg-info/requires.txt
colossalai.egg-info/top_level.txt
colossalai/_C/__init__.py
colossalai/amp/__init__.py
colossalai/amp/amp_type.py
colossalai/amp/apex_amp/__init__.py
colossalai/amp/apex_amp/apex_amp.py
colossalai/amp/naive_amp/__init__.py
colossalai/amp/naive_amp/_fp16_optimizer.py
colossalai/amp/naive_amp/_utils.py
colossalai/amp/naive_amp/naive_amp.py
colossalai/amp/naive_amp/grad_scaler/__init__.py
colossalai/amp/naive_amp/grad_scaler/base_grad_scaler.py
colossalai/amp/naive_amp/grad_scaler/constant_grad_scaler.py
colossalai/amp/naive_amp/grad_scaler/dynamic_grad_scaler.py
colossalai/amp/torch_amp/__init__.py
colossalai/amp/torch_amp/_grad_scaler.py
colossalai/amp/torch_amp/torch_amp.py
colossalai/auto_parallel/__init__.py
colossalai/auto_parallel/checkpoint/__init__.py
colossalai/auto_parallel/checkpoint/build_c_ext.py
colossalai/auto_parallel/checkpoint/ckpt_solver_base.py
colossalai/auto_parallel/checkpoint/ckpt_solver_chen.py
colossalai/auto_parallel/checkpoint/ckpt_solver_rotor.py
colossalai/auto_parallel/checkpoint/operation.py
colossalai/auto_parallel/meta_profiler/__init__.py
colossalai/auto_parallel/meta_profiler/constants.py
colossalai/auto_parallel/meta_profiler/metainfo.py
colossalai/auto_parallel/meta_profiler/registry.py
colossalai/auto_parallel/meta_profiler/meta_registry/__init__.py
colossalai/auto_parallel/meta_profiler/meta_registry/activation.py
colossalai/auto_parallel/meta_profiler/meta_registry/binary_elementwise_ops.py
colossalai/auto_parallel/meta_profiler/meta_registry/conv.py
colossalai/auto_parallel/meta_profiler/meta_registry/linear.py
colossalai/auto_parallel/meta_profiler/meta_registry/norm.py
colossalai/auto_parallel/meta_profiler/meta_registry/pooling.py
colossalai/auto_parallel/passes/__init__.py
colossalai/auto_parallel/passes/comm_metainfo_pass.py
colossalai/auto_parallel/passes/constants.py
colossalai/auto_parallel/passes/meta_info_prop.py
colossalai/auto_parallel/passes/runtime_apply_pass.py
colossalai/auto_parallel/passes/runtime_preparation_pass.py
colossalai/auto_parallel/pipeline_shard/__init__.py
colossalai/auto_parallel/tensor_shard/__init__.py
colossalai/auto_parallel/tensor_shard/constants.py
colossalai/auto_parallel/tensor_shard/initialize.py
colossalai/auto_parallel/tensor_shard/sharding_strategy.py
colossalai/auto_parallel/tensor_shard/deprecated/__init__.py
colossalai/auto_parallel/tensor_shard/deprecated/_utils.py
colossalai/auto_parallel/tensor_shard/deprecated/constants.py
colossalai/auto_parallel/tensor_shard/deprecated/cost_graph.py
colossalai/auto_parallel/tensor_shard/deprecated/graph_analysis.py
colossalai/auto_parallel/tensor_shard/deprecated/options.py
colossalai/auto_parallel/tensor_shard/deprecated/sharding_strategy.py
colossalai/auto_parallel/tensor_shard/deprecated/solver.py
colossalai/auto_parallel/tensor_shard/deprecated/strategies_constructor.py
colossalai/auto_parallel/tensor_shard/deprecated/op_handler/__init__.py
colossalai/auto_parallel/tensor_shard/deprecated/op_handler/batch_norm_handler.py
colossalai/auto_parallel/tensor_shard/deprecated/op_handler/bcast_op_handler.py
colossalai/auto_parallel/tensor_shard/deprecated/op_handler/conv_handler.py
colossalai/auto_parallel/tensor_shard/deprecated/op_handler/dot_handler.py
colossalai/auto_parallel/tensor_shard/deprecated/op_handler/embedding_handler.py
colossalai/auto_parallel/tensor_shard/deprecated/op_handler/layer_norm_handler.py
colossalai/auto_parallel/tensor_shard/deprecated/op_handler/operator_handler.py
colossalai/auto_parallel/tensor_shard/deprecated/op_handler/reshape_handler.py
colossalai/auto_parallel/tensor_shard/deprecated/op_handler/strategy_generator.py
colossalai/auto_parallel/tensor_shard/deprecated/op_handler/unary_elementwise_handler.py
colossalai/auto_parallel/tensor_shard/deprecated/op_handler/where_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/__init__.py
colossalai/auto_parallel/tensor_shard/node_handler/addmm_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/batch_norm_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/binary_elementwise_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/bmm_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/conv_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/default_reshape_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/embedding_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/getattr_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/getitem_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/layer_norm_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/linear_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/matmul_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/node_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/normal_pooling_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/option.py
colossalai/auto_parallel/tensor_shard/node_handler/output_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/permute_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/placeholder_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/registry.py
colossalai/auto_parallel/tensor_shard/node_handler/softmax_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/split_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/sum_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/tensor_constructor_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/transpose_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/unary_elementwise_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/view_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/where_handler.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/__init__.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/batch_norm_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/binary_elementwise_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/conv_strategy_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/embedding_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/getattr_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/getitem_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/layer_norm_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/matmul_strategy_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/normal_pooling_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/output_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/placeholder_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/reshape_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/softmax_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/strategy_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/sum_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/tensor_constructor_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/unary_elementwise_generator.py
colossalai/auto_parallel/tensor_shard/node_handler/strategy/where_generator.py
colossalai/auto_parallel/tensor_shard/solver/__init__.py
colossalai/auto_parallel/tensor_shard/solver/cost_graph.py
colossalai/auto_parallel/tensor_shard/solver/graph_analysis.py
colossalai/auto_parallel/tensor_shard/solver/options.py
colossalai/auto_parallel/tensor_shard/solver/solver.py
colossalai/auto_parallel/tensor_shard/solver/strategies_constructor.py
colossalai/auto_parallel/tensor_shard/utils/__init__.py
colossalai/auto_parallel/tensor_shard/utils/broadcast.py
colossalai/auto_parallel/tensor_shard/utils/factory.py
colossalai/auto_parallel/tensor_shard/utils/misc.py
colossalai/auto_parallel/tensor_shard/utils/reshape.py
colossalai/auto_parallel/tensor_shard/utils/sharding.py
colossalai/builder/__init__.py
colossalai/builder/builder.py
colossalai/cli/__init__.py
colossalai/cli/cli.py
colossalai/cli/benchmark/__init__.py
colossalai/cli/benchmark/benchmark.py
colossalai/cli/benchmark/models.py
colossalai/cli/benchmark/utils.py
colossalai/cli/check/__init__.py
colossalai/cli/check/check_installation.py
colossalai/cli/launcher/__init__.py
colossalai/cli/launcher/hostinfo.py
colossalai/cli/launcher/multinode_runner.py
colossalai/cli/launcher/run.py
colossalai/communication/__init__.py
colossalai/communication/collective.py
colossalai/communication/p2p.py
colossalai/communication/p2p_v2.py
colossalai/communication/ring.py
colossalai/communication/utils.py
colossalai/context/__init__.py
colossalai/context/config.py
colossalai/context/moe_context.py
colossalai/context/parallel_context.py
colossalai/context/parallel_mode.py
colossalai/context/singleton_meta.py
colossalai/context/process_group_initializer/__init__.py
colossalai/context/process_group_initializer/initializer_1d.py
colossalai/context/process_group_initializer/initializer_2d.py
colossalai/context/process_group_initializer/initializer_2p5d.py
colossalai/context/process_group_initializer/initializer_3d.py
colossalai/context/process_group_initializer/initializer_data.py
colossalai/context/process_group_initializer/initializer_model.py
colossalai/context/process_group_initializer/initializer_pipeline.py
colossalai/context/process_group_initializer/initializer_sequence.py
colossalai/context/process_group_initializer/initializer_tensor.py
colossalai/context/process_group_initializer/process_group_initializer.py
colossalai/context/random/__init__.py
colossalai/context/random/_helper.py
colossalai/context/random/seed_manager.py
colossalai/device/__init__.py
colossalai/device/alpha_beta_profiler.py
colossalai/device/calc_pipeline_strategy.py
colossalai/device/device_mesh.py
colossalai/engine/__init__.py
colossalai/engine/_base_engine.py
colossalai/engine/gradient_accumulation/__init__.py
colossalai/engine/gradient_accumulation/_gradient_accumulation.py
colossalai/engine/gradient_handler/__init__.py
colossalai/engine/gradient_handler/_base_gradient_handler.py
colossalai/engine/gradient_handler/_data_parallel_gradient_handler.py
colossalai/engine/gradient_handler/_moe_gradient_handler.py
colossalai/engine/gradient_handler/_pipeline_parallel_gradient_handler.py
colossalai/engine/gradient_handler/_sequence_parallel_gradient_handler.py
colossalai/engine/gradient_handler/_zero_gradient_handler.py
colossalai/engine/gradient_handler/utils.py
colossalai/engine/schedule/__init__.py
colossalai/engine/schedule/_base_schedule.py
colossalai/engine/schedule/_non_pipeline_schedule.py
colossalai/engine/schedule/_pipeline_schedule.py
colossalai/engine/schedule/_pipeline_schedule_v2.py
colossalai/fx/__init__.py
colossalai/fx/_compatibility.py
colossalai/fx/_meta_registrations.py
colossalai/fx/graph_module.py
colossalai/fx/proxy.py
colossalai/fx/codegen/__init__.py
colossalai/fx/codegen/activation_checkpoint_codegen.py
colossalai/fx/passes/__init__.py
colossalai/fx/passes/adding_split_node_pass.py
colossalai/fx/passes/concrete_info_prop.py
colossalai/fx/passes/meta_info_prop.py
colossalai/fx/passes/passes_for_gpt2_test.py
colossalai/fx/passes/shard_1d_pass.py
colossalai/fx/passes/split_module.py
colossalai/fx/passes/utils.py
colossalai/fx/passes/algorithms/__init__.py
colossalai/fx/passes/algorithms/build_c_ext.py
colossalai/fx/passes/algorithms/ckpt_solver_chen.py
colossalai/fx/passes/algorithms/ckpt_solver_pofo.py
colossalai/fx/passes/algorithms/ckpt_solver_rotor.py
colossalai/fx/passes/algorithms/linearize.py
colossalai/fx/passes/algorithms/operation.py
colossalai/fx/profiler/__init__.py
colossalai/fx/profiler/constants.py
colossalai/fx/profiler/dataflow.py
colossalai/fx/profiler/memory_utils.py
colossalai/fx/profiler/opcount.py
colossalai/fx/profiler/profiler.py
colossalai/fx/profiler/shard_utils.py
colossalai/fx/profiler/tensor.py
colossalai/fx/profiler/experimental/__init__.py
colossalai/fx/profiler/experimental/constants.py
colossalai/fx/profiler/experimental/profiler.py
colossalai/fx/profiler/experimental/registry.py
colossalai/fx/profiler/experimental/shard_utils.py
colossalai/fx/profiler/experimental/profiler_function/__init__.py
colossalai/fx/profiler/experimental/profiler_function/activation_function.py
colossalai/fx/profiler/experimental/profiler_function/arithmetic.py
colossalai/fx/profiler/experimental/profiler_function/embedding.py
colossalai/fx/profiler/experimental/profiler_function/linear.py
colossalai/fx/profiler/experimental/profiler_function/normalization.py
colossalai/fx/profiler/experimental/profiler_function/pooling.py
colossalai/fx/profiler/experimental/profiler_function/python_ops.py
colossalai/fx/profiler/experimental/profiler_function/torch_ops.py
colossalai/fx/profiler/experimental/profiler_module/__init__.py
colossalai/fx/profiler/experimental/profiler_module/activation_function.py
colossalai/fx/profiler/experimental/profiler_module/attention.py
colossalai/fx/profiler/experimental/profiler_module/convolution.py
colossalai/fx/profiler/experimental/profiler_module/dropout.py
colossalai/fx/profiler/experimental/profiler_module/embedding.py
colossalai/fx/profiler/experimental/profiler_module/linear.py
colossalai/fx/profiler/experimental/profiler_module/normalization.py
colossalai/fx/profiler/experimental/profiler_module/pooling.py
colossalai/fx/profiler/experimental/profiler_module/rnn.py
colossalai/fx/profiler/experimental/profiler_module/torch_op.py
colossalai/fx/tracer/__init__.py
colossalai/fx/tracer/_meta_trace.py
colossalai/fx/tracer/_symbolic_trace.py
colossalai/fx/tracer/_tracer_utils.py
colossalai/fx/tracer/experimental.py
colossalai/fx/tracer/registry.py
colossalai/fx/tracer/tracer.py
colossalai/fx/tracer/bias_addition_patch/__init__.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/__init__.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/addbmm.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/addmm.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/bias_addition_function.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_function/linear.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/__init__.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/bias_addition_module.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/conv.py
colossalai/fx/tracer/bias_addition_patch/patched_bias_addition_module/linear.py
colossalai/fx/tracer/meta_patch/__init__.py
colossalai/fx/tracer/meta_patch/patched_function/__init__.py
colossalai/fx/tracer/meta_patch/patched_function/activation_function.py
colossalai/fx/tracer/meta_patch/patched_function/arithmetic.py
colossalai/fx/tracer/meta_patch/patched_function/convolution.py
colossalai/fx/tracer/meta_patch/patched_function/embedding.py
colossalai/fx/tracer/meta_patch/patched_function/normalization.py
colossalai/fx/tracer/meta_patch/patched_function/python_ops.py
colossalai/fx/tracer/meta_patch/patched_function/torch_ops.py
colossalai/fx/tracer/meta_patch/patched_module/__init__.py
colossalai/fx/tracer/meta_patch/patched_module/activation_function.py
colossalai/fx/tracer/meta_patch/patched_module/convolution.py
colossalai/fx/tracer/meta_patch/patched_module/embedding.py
colossalai/fx/tracer/meta_patch/patched_module/linear.py
colossalai/fx/tracer/meta_patch/patched_module/normalization.py
colossalai/fx/tracer/meta_patch/patched_module/pooling.py
colossalai/fx/tracer/meta_patch/patched_module/rnn.py
colossalai/gemini/__init__.py
colossalai/gemini/gemini_context.py
colossalai/gemini/gemini_mgr.py
colossalai/gemini/placement_policy.py
colossalai/gemini/stateful_tensor.py
colossalai/gemini/stateful_tensor_mgr.py
colossalai/gemini/tensor_placement_policy.py
colossalai/gemini/tensor_utils.py
colossalai/gemini/chunk/__init__.py
colossalai/gemini/chunk/chunk.py
colossalai/gemini/chunk/manager.py
colossalai/gemini/chunk/search_utils.py
colossalai/gemini/chunk/utils.py
colossalai/gemini/memory_tracer/__init__.py
colossalai/gemini/memory_tracer/chunk_memstats_collector.py
colossalai/gemini/memory_tracer/memory_monitor.py
colossalai/gemini/memory_tracer/memory_stats.py
colossalai/gemini/memory_tracer/memstats_collector.py
colossalai/gemini/memory_tracer/param_runtime_order.py
colossalai/gemini/memory_tracer/runtime_mem_tracer.py
colossalai/gemini/memory_tracer/static_memstats_collector.py
colossalai/gemini/memory_tracer/utils.py
colossalai/gemini/ophooks/__init__.py
colossalai/gemini/ophooks/_shard_grad_ophook.py
colossalai/gemini/ophooks/_shard_param_ophook.py
colossalai/gemini/ophooks/runtime_mem_tracer_hook.py
colossalai/gemini/ophooks/utils.py
colossalai/gemini/paramhooks/__init__.py
colossalai/gemini/paramhooks/_param_hookmgr.py
colossalai/kernel/__init__.py
colossalai/kernel/cuda_native/__init__.py
colossalai/kernel/cuda_native/flash_attention.py
colossalai/kernel/cuda_native/layer_norm.py
colossalai/kernel/cuda_native/multihead_attention.py
colossalai/kernel/cuda_native/scaled_softmax.py
colossalai/kernel/cuda_native/csrc/colossal_C_frontend.cpp
colossalai/kernel/cuda_native/csrc/compat.h
colossalai/kernel/cuda_native/csrc/cpu_adam.cpp
colossalai/kernel/cuda_native/csrc/cpu_adam.h
colossalai/kernel/cuda_native/csrc/layer_norm_cuda.cpp
colossalai/kernel/cuda_native/csrc/layer_norm_cuda_kernel.cu
colossalai/kernel/cuda_native/csrc/moe_cuda.cpp
colossalai/kernel/cuda_native/csrc/moe_cuda_kernel.cu
colossalai/kernel/cuda_native/csrc/multi_tensor_adam.cu
colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh
colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu
colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu
colossalai/kernel/cuda_native/csrc/multi_tensor_scale_kernel.cu
colossalai/kernel/cuda_native/csrc/multi_tensor_sgd_kernel.cu
colossalai/kernel/cuda_native/csrc/multihead_attention_1d.cpp
colossalai/kernel/cuda_native/csrc/multihead_attention_1d.h
colossalai/kernel/cuda_native/csrc/scaled_masked_softmax.cpp
colossalai/kernel/cuda_native/csrc/scaled_masked_softmax.h
colossalai/kernel/cuda_native/csrc/scaled_masked_softmax_cuda.cu
colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax.cpp
colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax.h
colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax_cuda.cu
colossalai/kernel/cuda_native/csrc/type_shim.h
colossalai/kernel/cuda_native/csrc/kernels/cross_entropy.cu
colossalai/kernel/cuda_native/csrc/kernels/cublas_wrappers.cu
colossalai/kernel/cuda_native/csrc/kernels/cuda_util.cu
colossalai/kernel/cuda_native/csrc/kernels/dropout_kernels.cu
colossalai/kernel/cuda_native/csrc/kernels/general_kernels.cu
colossalai/kernel/cuda_native/csrc/kernels/normalize_kernels.cu
colossalai/kernel/cuda_native/csrc/kernels/softmax_kernels.cu
colossalai/kernel/cuda_native/csrc/kernels/transform_kernels.cu
colossalai/kernel/cuda_native/csrc/kernels/include/block_reduce.h
colossalai/kernel/cuda_native/csrc/kernels/include/context.h
colossalai/kernel/cuda_native/csrc/kernels/include/cross_entropy_layer.h
colossalai/kernel/cuda_native/csrc/kernels/include/cublas_wrappers.h
colossalai/kernel/cuda_native/csrc/kernels/include/cuda_util.h
colossalai/kernel/cuda_native/csrc/kernels/include/dropout.h
colossalai/kernel/cuda_native/csrc/kernels/include/feed_forward.h
colossalai/kernel/cuda_native/csrc/kernels/include/kernels.h
colossalai/kernel/cuda_native/csrc/kernels/include/ls_cub.cuh
colossalai/kernel/cuda_native/csrc/kernels/include/normalize_layer.h
colossalai/kernel/cuda_native/csrc/kernels/include/softmax.h
colossalai/kernel/cuda_native/csrc/kernels/include/strided_batch_gemm.h
colossalai/kernel/jit/__init__.py
colossalai/kernel/jit/bias_dropout_add.py
colossalai/kernel/jit/bias_gelu.py
colossalai/kernel/jit/option.py
colossalai/kernel/op_builder/__init__.py
colossalai/kernel/op_builder/builder.py
colossalai/kernel/op_builder/cpu_adam.py
colossalai/kernel/op_builder/fused_optim.py
colossalai/kernel/op_builder/layernorm.py
colossalai/kernel/op_builder/moe.py
colossalai/kernel/op_builder/multi_head_attn.py
colossalai/kernel/op_builder/scaled_masked_softmax.py
colossalai/kernel/op_builder/scaled_upper_triangle_masked_softmax.py
colossalai/kernel/op_builder/utils.py
colossalai/logging/__init__.py
colossalai/logging/logger.py
colossalai/nn/__init__.py
colossalai/nn/init.py
colossalai/nn/_ops/__init__.py
colossalai/nn/_ops/_utils.py
colossalai/nn/_ops/addmm.py
colossalai/nn/_ops/batch_norm.py
colossalai/nn/_ops/element_wise.py
colossalai/nn/_ops/embedding.py
colossalai/nn/_ops/embedding_bag.py
colossalai/nn/_ops/layernorm.py
colossalai/nn/_ops/linear.py
colossalai/nn/_ops/loss.py
colossalai/nn/_ops/view.py
colossalai/nn/layer/__init__.py
colossalai/nn/layer/base_layer.py
colossalai/nn/layer/colossalai_layer/__init__.py
colossalai/nn/layer/colossalai_layer/_utils.py
colossalai/nn/layer/colossalai_layer/dropout.py
colossalai/nn/layer/colossalai_layer/embedding.py
colossalai/nn/layer/colossalai_layer/linear.py
colossalai/nn/layer/colossalai_layer/normalization.py
colossalai/nn/layer/moe/__init__.py
colossalai/nn/layer/moe/_operation.py
colossalai/nn/layer/moe/experts.py
colossalai/nn/layer/moe/layers.py
colossalai/nn/layer/moe/routers.py
colossalai/nn/layer/moe/utils.py
colossalai/nn/layer/parallel_1d/__init__.py
colossalai/nn/layer/parallel_1d/_operation.py
colossalai/nn/layer/parallel_1d/_utils.py
colossalai/nn/layer/parallel_1d/layers.py
colossalai/nn/layer/parallel_2d/__init__.py
colossalai/nn/layer/parallel_2d/_operation.py
colossalai/nn/layer/parallel_2d/_utils.py
colossalai/nn/layer/parallel_2d/layers.py
colossalai/nn/layer/parallel_2p5d/__init__.py
colossalai/nn/layer/parallel_2p5d/_operation.py
colossalai/nn/layer/parallel_2p5d/_utils.py
colossalai/nn/layer/parallel_2p5d/layers.py
colossalai/nn/layer/parallel_3d/__init__.py
colossalai/nn/layer/parallel_3d/_operation.py
colossalai/nn/layer/parallel_3d/_utils.py
colossalai/nn/layer/parallel_3d/layers.py
colossalai/nn/layer/parallel_sequence/__init__.py
colossalai/nn/layer/parallel_sequence/_operation.py
colossalai/nn/layer/parallel_sequence/_utils.py
colossalai/nn/layer/parallel_sequence/layers.py
colossalai/nn/layer/utils/__init__.py
colossalai/nn/layer/utils/common.py
colossalai/nn/layer/vanilla/__init__.py
colossalai/nn/layer/vanilla/layers.py
colossalai/nn/layer/wrapper/__init__.py
colossalai/nn/layer/wrapper/pipeline_wrapper.py
colossalai/nn/loss/__init__.py
colossalai/nn/loss/loss_1d.py
colossalai/nn/loss/loss_2d.py
colossalai/nn/loss/loss_2p5d.py
colossalai/nn/loss/loss_3d.py
colossalai/nn/loss/loss_moe.py
colossalai/nn/lr_scheduler/__init__.py
colossalai/nn/lr_scheduler/cosine.py
colossalai/nn/lr_scheduler/delayed.py
colossalai/nn/lr_scheduler/linear.py
colossalai/nn/lr_scheduler/multistep.py
colossalai/nn/lr_scheduler/onecycle.py
colossalai/nn/lr_scheduler/poly.py
colossalai/nn/lr_scheduler/torch.py
colossalai/nn/metric/__init__.py
colossalai/nn/metric/_utils.py
colossalai/nn/metric/accuracy_2d.py
colossalai/nn/metric/accuracy_2p5d.py
colossalai/nn/metric/accuracy_3d.py
colossalai/nn/optimizer/__init__.py
colossalai/nn/optimizer/colossalai_optimizer.py
colossalai/nn/optimizer/cpu_adam.py
colossalai/nn/optimizer/fused_adam.py
colossalai/nn/optimizer/fused_lamb.py
colossalai/nn/optimizer/fused_sgd.py
colossalai/nn/optimizer/gemini_optimizer.py
colossalai/nn/optimizer/hybrid_adam.py
colossalai/nn/optimizer/lamb.py
colossalai/nn/optimizer/lars.py
colossalai/nn/optimizer/nvme_optimizer.py
colossalai/nn/optimizer/zero_optimizer.py
colossalai/nn/parallel/__init__.py
colossalai/nn/parallel/data_parallel.py
colossalai/nn/parallel/gemini_parallel.py
colossalai/nn/parallel/reducer.py
colossalai/nn/parallel/utils.py
colossalai/nn/parallel/zero_wrapper.py
colossalai/nn/parallel/layers/__init__.py
colossalai/nn/parallel/layers/colo_module.py
colossalai/nn/parallel/layers/embedding.py
colossalai/nn/parallel/layers/linear.py
colossalai/nn/parallel/layers/module_utils.py
colossalai/nn/parallel/layers/cache_embedding/__init__.py
colossalai/nn/parallel/layers/cache_embedding/base_embedding.py
colossalai/nn/parallel/layers/cache_embedding/cache_mgr.py
colossalai/nn/parallel/layers/cache_embedding/cached_embedding.py
colossalai/nn/parallel/layers/cache_embedding/copyer.py
colossalai/nn/parallel/layers/cache_embedding/embedding_config.py
colossalai/nn/parallel/layers/cache_embedding/parallel_cached_embedding.py
colossalai/nn/parallel/layers/cache_embedding/parallel_cached_embedding_tablewise.py
colossalai/nn/parallel/layers/cache_embedding/parallel_cached_embedding_tablewise_split_cache.py
colossalai/pipeline/__init__.py
colossalai/pipeline/layer_spec.py
colossalai/pipeline/pipelinable.py
colossalai/pipeline/pipeline_process_group.py
colossalai/pipeline/utils.py
colossalai/pipeline/middleware/__init__.py
colossalai/pipeline/middleware/topo.py
colossalai/pipeline/middleware/adaptor/__init__.py
colossalai/pipeline/middleware/adaptor/fx.py
colossalai/pipeline/rpc/__init__.py
colossalai/pipeline/rpc/_pipeline_base.py
colossalai/pipeline/rpc/_pipeline_schedule.py
colossalai/pipeline/rpc/utils.py
colossalai/registry/__init__.py
colossalai/registry/registry.py
colossalai/tensor/__init__.py
colossalai/tensor/colo_parameter.py
colossalai/tensor/colo_tensor.py
colossalai/tensor/comm_spec.py
colossalai/tensor/compute_spec.py
colossalai/tensor/const.py
colossalai/tensor/dist_spec_mgr.py
colossalai/tensor/distspec.py
colossalai/tensor/op_wrapper.py
colossalai/tensor/param_op_hook.py
colossalai/tensor/process_group.py
colossalai/tensor/shape_consistency.py
colossalai/tensor/sharding_spec.py
colossalai/tensor/tensor_spec.py
colossalai/tensor/utils.py
colossalai/testing/__init__.py
colossalai/testing/comparison.py
colossalai/testing/pytest_wrapper.py
colossalai/testing/random.py
colossalai/testing/utils.py
colossalai/trainer/__init__.py
colossalai/trainer/_trainer.py
colossalai/trainer/hooks/__init__.py
colossalai/trainer/hooks/_base_hook.py
colossalai/trainer/hooks/_checkpoint_hook.py
colossalai/trainer/hooks/_commons_.py
colossalai/trainer/hooks/_log_hook.py
colossalai/trainer/hooks/_lr_scheduler_hook.py
colossalai/trainer/hooks/_metric_hook.py
colossalai/utils/__init__.py
colossalai/utils/activation_checkpoint.py
colossalai/utils/checkpointing.py
colossalai/utils/common.py
colossalai/utils/cuda.py
colossalai/utils/memory.py
colossalai/utils/moe.py
colossalai/utils/timer.py
colossalai/utils/checkpoint/__init__.py
colossalai/utils/checkpoint/module_checkpoint.py
colossalai/utils/checkpoint/utils.py
colossalai/utils/checkpoint_io/__init__.py
colossalai/utils/checkpoint_io/backend.py
colossalai/utils/checkpoint_io/constant.py
colossalai/utils/checkpoint_io/convertor.py
colossalai/utils/checkpoint_io/distributed.py
colossalai/utils/checkpoint_io/io.py
colossalai/utils/checkpoint_io/meta.py
colossalai/utils/checkpoint_io/reader.py
colossalai/utils/checkpoint_io/utils.py
colossalai/utils/checkpoint_io/writer.py
colossalai/utils/data_sampler/__init__.py
colossalai/utils/data_sampler/base_sampler.py
colossalai/utils/data_sampler/data_parallel_sampler.py
colossalai/utils/model/__init__.py
colossalai/utils/model/colo_init_context.py
colossalai/utils/model/experimental.py
colossalai/utils/model/lazy_init_context.py
colossalai/utils/model/utils.py
colossalai/utils/multi_tensor_apply/__init__.py
colossalai/utils/multi_tensor_apply/multi_tensor_apply.py
colossalai/utils/profiler/__init__.py
colossalai/utils/profiler/extention.py
colossalai/utils/profiler/profiler.py
colossalai/utils/profiler/stateful_tensor_mem_extention.py
colossalai/utils/profiler/legacy/__init__.py
colossalai/utils/profiler/legacy/comm_profiler.py
colossalai/utils/profiler/legacy/pcie_profiler.py
colossalai/utils/profiler/legacy/prof_utils.py
colossalai/utils/rank_recorder/__init__.py
colossalai/utils/rank_recorder/rank_recorder.py
colossalai/utils/tensor_detector/__init__.py
colossalai/utils/tensor_detector/tensor_detector.py
colossalai/zero/__init__.py
colossalai/zero/init_ctx/__init__.py
colossalai/zero/init_ctx/init_context.py
colossalai/zero/shard_utils/__init__.py
colossalai/zero/shard_utils/base_shard_strategy.py
colossalai/zero/shard_utils/bucket_tensor_shard_strategy.py
colossalai/zero/shard_utils/commons.py
colossalai/zero/shard_utils/tensor_shard_strategy.py
colossalai/zero/sharded_model/__init__.py
colossalai/zero/sharded_model/_utils.py
colossalai/zero/sharded_model/reduce_scatter.py
colossalai/zero/sharded_model/sharded_model_v2.py
colossalai/zero/sharded_model/utils.py
colossalai/zero/sharded_optim/__init__.py
colossalai/zero/sharded_optim/_utils.py
colossalai/zero/sharded_optim/low_level_optim.py
colossalai/zero/sharded_optim/sharded_optim_v2.py
colossalai/zero/sharded_optim/bookkeeping/__init__.py
colossalai/zero/sharded_optim/bookkeeping/base_store.py
colossalai/zero/sharded_optim/bookkeeping/bucket_store.py
colossalai/zero/sharded_optim/bookkeeping/gradient_store.py
colossalai/zero/sharded_optim/bookkeeping/parameter_store.py
colossalai/zero/sharded_optim/bookkeeping/tensor_bucket.py
colossalai/zero/sharded_param/__init__.py
colossalai/zero/sharded_param/sharded_param.py
colossalai/zero/sharded_param/sharded_tensor.py
colossalai/zero/utils/__init__.py
colossalai/zero/utils/gemini_hook.py
colossalai/zero/utils/zero_hook.py
op_builder/__init__.py
op_builder/builder.py
op_builder/cpu_adam.py
op_builder/fused_optim.py
op_builder/layernorm.py
op_builder/moe.py
op_builder/multi_head_attn.py
op_builder/scaled_masked_softmax.py
op_builder/scaled_upper_triangle_masked_softmax.py
op_builder/utils.py
requirements/requirements-test.txt
requirements/requirements.txt
tests/components_to_test/__init__.py
tests/components_to_test/albert.py
tests/components_to_test/beit.py
tests/components_to_test/bert.py
tests/components_to_test/gpt2.py
tests/components_to_test/hanging_param_model.py
tests/components_to_test/inline_op_model.py
tests/components_to_test/nested_model.py
tests/components_to_test/registry.py
tests/components_to_test/repeated_computed_layers.py
tests/components_to_test/resnet.py
tests/components_to_test/simple_net.py
tests/components_to_test/utils/__init__.py
tests/components_to_test/utils/dummy_data_generator.py
tests/components_to_test/utils/executor.py
tests/test_auto_parallel/__init__.py
tests/test_auto_parallel/test_tensor_shard/__init__.py
tests/test_auto_parallel/test_tensor_shard/test_bias_addition_forward.py
tests/test_auto_parallel/test_tensor_shard/test_broadcast.py
tests/test_auto_parallel/test_tensor_shard/test_checkpoint.py
tests/test_auto_parallel/test_tensor_shard/test_liveness_analysis.py
tests/test_auto_parallel/test_tensor_shard/test_param_resharding_cost.py
tests/test_auto_parallel/test_tensor_shard/test_shape_consistency_pass.py
tests/test_auto_parallel/test_tensor_shard/test_solver_with_resnet_v2.py
tests/test_auto_parallel/test_tensor_shard/test_gpt/__init__.py
tests/test_auto_parallel/test_tensor_shard/test_gpt/gpt_modules.py
tests/test_auto_parallel/test_tensor_shard/test_gpt/test_runtime_with_gpt_modules.py
tests/test_auto_parallel/test_tensor_shard/test_gpt/test_solver_with_gpt_module.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/__init__.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addbmm_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addmm_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_batch_norm_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_function_node.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_module_node.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_binary_elementwise_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bmm_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_conv_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_default_reshape_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_embedding_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_getattr_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_getitem_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_layer_norm_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_linear_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_matmul_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_norm_pooling_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_output_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_permute_and_transpose_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_placeholder_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_shard_option.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_softmax_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_split_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_sum_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_tensor_constructor.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_unary_element_wise_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_view_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_where_handler.py
tests/test_auto_parallel/test_tensor_shard/test_node_handler/utils.py