LICENSE
MANIFEST.in
README.md
pyproject.toml
setup.py
GPTQModel.egg-info/PKG-INFO
GPTQModel.egg-info/SOURCES.txt
GPTQModel.egg-info/dependency_links.txt
GPTQModel.egg-info/entry_points.txt
GPTQModel.egg-info/requires.txt
GPTQModel.egg-info/top_level.txt
gptqmodel/__init__.py
gptqmodel/version.py
gptqmodel/adapter/__init__.py
gptqmodel/adapter/adapter.py
gptqmodel/adapter/peft.py
gptqmodel/adapter/remote.py
gptqmodel/cli/__init__.py
gptqmodel/cli/env.py
gptqmodel/cli/gptqmodel.py
gptqmodel/eora/__init__.py
gptqmodel/eora/eora.py
gptqmodel/hf_minimax_m2/__init__.py
gptqmodel/hf_minimax_m2/configuration_minimax_m2.py
gptqmodel/hf_minimax_m2/modeling_minimax_m2.py
gptqmodel/hf_minimax_m2/test_minimax_m2_hf.py
gptqmodel/looper/__init__.py
gptqmodel/looper/awq_processor.py
gptqmodel/looper/dequantize_processor.py
gptqmodel/looper/eora_processor.py
gptqmodel/looper/gptq_processor.py
gptqmodel/looper/input_cache.py
gptqmodel/looper/loop_processor.py
gptqmodel/looper/module_looper.py
gptqmodel/looper/named_module.py
gptqmodel/looper/native_processor.py
gptqmodel/looper/qqq_processor.py
gptqmodel/looper/stage_inputs_capture.py
gptqmodel/looper/stage_layer.py
gptqmodel/looper/stage_subset.py
gptqmodel/looper/tensorparallel_weight_processor.py
gptqmodel/models/__init__.py
gptqmodel/models/_const.py
gptqmodel/models/auto.py
gptqmodel/models/base.py
gptqmodel/models/loader.py
gptqmodel/models/writer.py
gptqmodel/models/definitions/__init__.py
gptqmodel/models/definitions/apertus.py
gptqmodel/models/definitions/baichuan.py
gptqmodel/models/definitions/bailing_moe.py
gptqmodel/models/definitions/base_qwen2_5_omni.py
gptqmodel/models/definitions/base_qwen2_vl.py
gptqmodel/models/definitions/base_qwen3_vl.py
gptqmodel/models/definitions/bloom.py
gptqmodel/models/definitions/brumby.py
gptqmodel/models/definitions/chatglm.py
gptqmodel/models/definitions/codegen.py
gptqmodel/models/definitions/dbrx.py
gptqmodel/models/definitions/dbrx_converted.py
gptqmodel/models/definitions/decilm.py
gptqmodel/models/definitions/deepseek_v2.py
gptqmodel/models/definitions/deepseek_v3.py
gptqmodel/models/definitions/dream.py
gptqmodel/models/definitions/ernie4_5.py
gptqmodel/models/definitions/ernie4_5_moe.py
gptqmodel/models/definitions/exaone.py
gptqmodel/models/definitions/falcon_h1.py
gptqmodel/models/definitions/gemma2.py
gptqmodel/models/definitions/gemma3.py
gptqmodel/models/definitions/glm.py
gptqmodel/models/definitions/glm4_moe.py
gptqmodel/models/definitions/gpt2.py
gptqmodel/models/definitions/gpt_bigcode.py
gptqmodel/models/definitions/gpt_neo.py
gptqmodel/models/definitions/gpt_neox.py
gptqmodel/models/definitions/gpt_oss.py
gptqmodel/models/definitions/gptj.py
gptqmodel/models/definitions/granitemoehybrid.py
gptqmodel/models/definitions/grinmoe.py
gptqmodel/models/definitions/hymba.py
gptqmodel/models/definitions/instella.py
gptqmodel/models/definitions/internlm.py
gptqmodel/models/definitions/internlm2.py
gptqmodel/models/definitions/klear.py
gptqmodel/models/definitions/lfm2_moe.py
gptqmodel/models/definitions/llama.py
gptqmodel/models/definitions/llama4.py
gptqmodel/models/definitions/llava_qwen2.py
gptqmodel/models/definitions/longcat_flash.py
gptqmodel/models/definitions/mimo.py
gptqmodel/models/definitions/minicpm.py
gptqmodel/models/definitions/minicpm3.py
gptqmodel/models/definitions/minimax_m2.py
gptqmodel/models/definitions/mixtral.py
gptqmodel/models/definitions/mllama.py
gptqmodel/models/definitions/mobilellm.py
gptqmodel/models/definitions/moss.py
gptqmodel/models/definitions/mpt.py
gptqmodel/models/definitions/nemotron_h.py
gptqmodel/models/definitions/olmoe.py
gptqmodel/models/definitions/opt.py
gptqmodel/models/definitions/ovis.py
gptqmodel/models/definitions/ovis2.py
gptqmodel/models/definitions/pangu_alpha.py
gptqmodel/models/definitions/phi.py
gptqmodel/models/definitions/phi3.py
gptqmodel/models/definitions/phi4.py
gptqmodel/models/definitions/qwen.py
gptqmodel/models/definitions/qwen2.py
gptqmodel/models/definitions/qwen2_5_omni.py
gptqmodel/models/definitions/qwen2_5_vl.py
gptqmodel/models/definitions/qwen2_moe.py
gptqmodel/models/definitions/qwen2_vl.py
gptqmodel/models/definitions/qwen3.py
gptqmodel/models/definitions/qwen3_moe.py
gptqmodel/models/definitions/qwen3_next.py
gptqmodel/models/definitions/qwen3_omni_moe.py
gptqmodel/models/definitions/qwen3_vl.py
gptqmodel/models/definitions/rw.py
gptqmodel/models/definitions/starcoder2.py
gptqmodel/models/definitions/telechat2.py
gptqmodel/models/definitions/xverse.py
gptqmodel/nn_modules/__init__.py
gptqmodel/nn_modules/converter.py
gptqmodel/nn_modules/hooked_linear.py
gptqmodel/nn_modules/qlinear/__init__.py
gptqmodel/nn_modules/qlinear/awq_exllama.py
gptqmodel/nn_modules/qlinear/awq_exllamav2.py
gptqmodel/nn_modules/qlinear/awq_gemm.py
gptqmodel/nn_modules/qlinear/awq_gemm_ipex.py
gptqmodel/nn_modules/qlinear/awq_gemv.py
gptqmodel/nn_modules/qlinear/awq_gemv_fast.py
gptqmodel/nn_modules/qlinear/awq_machete.py
gptqmodel/nn_modules/qlinear/awq_marlin.py
gptqmodel/nn_modules/qlinear/awq_torch.py
gptqmodel/nn_modules/qlinear/bitblas.py
gptqmodel/nn_modules/qlinear/bitblas_target_detector.py
gptqmodel/nn_modules/qlinear/exllama.py
gptqmodel/nn_modules/qlinear/exllama_eora.py
gptqmodel/nn_modules/qlinear/exllamav2.py
gptqmodel/nn_modules/qlinear/lookahead.py
gptqmodel/nn_modules/qlinear/machete.py
gptqmodel/nn_modules/qlinear/marlin.py
gptqmodel/nn_modules/qlinear/pack_block_ext.py
gptqmodel/nn_modules/qlinear/qqq.py
gptqmodel/nn_modules/qlinear/torch.py
gptqmodel/nn_modules/qlinear/torch_fused.py
gptqmodel/nn_modules/qlinear/torch_fused_awq.py
gptqmodel/nn_modules/qlinear/tritonv2.py
gptqmodel/nn_modules/qlinear/utils.py
gptqmodel/nn_modules/triton_utils/__init__.py
gptqmodel/nn_modules/triton_utils/custom_autotune.py
gptqmodel/nn_modules/triton_utils/dequant.py
gptqmodel/nn_modules/triton_utils/kernels.py
gptqmodel/nn_modules/triton_utils/mixin.py
gptqmodel/quantization/__init__.py
gptqmodel/quantization/config.py
gptqmodel/quantization/dtype.py
gptqmodel/quantization/gar.py
gptqmodel/quantization/gar_ref.py
gptqmodel/quantization/gptq.py
gptqmodel/quantization/gptqv2.py
gptqmodel/quantization/qqq.py
gptqmodel/quantization/quantizer.py
gptqmodel/quantization/awq/__init__.py
gptqmodel/quantization/awq/_config.py
gptqmodel/quantization/awq/modules/__init__.py
gptqmodel/quantization/awq/modules/act.py
gptqmodel/quantization/awq/modules/linear/__init__.py
gptqmodel/quantization/awq/modules/linear/exllama.py
gptqmodel/quantization/awq/modules/linear/exllamav2.py
gptqmodel/quantization/awq/modules/linear/gemm.py
gptqmodel/quantization/awq/modules/linear/gemm_ipex.py
gptqmodel/quantization/awq/modules/linear/gemv.py
gptqmodel/quantization/awq/modules/linear/gemv_fast.py
gptqmodel/quantization/awq/modules/linear/marlin.py
gptqmodel/quantization/awq/modules/triton/__init__.py
gptqmodel/quantization/awq/modules/triton/gemm.py
gptqmodel/quantization/awq/quantize/__init__.py
gptqmodel/quantization/awq/quantize/scale.py
gptqmodel/quantization/awq/utils/__init__.py
gptqmodel/quantization/awq/utils/calib_data.py
gptqmodel/quantization/awq/utils/module.py
gptqmodel/quantization/awq/utils/packing_utils.py
gptqmodel/quantization/awq/utils/utils.py
gptqmodel/quantization/rotation/__init__.py
gptqmodel/quantization/rotation/hadamard_utils.py
gptqmodel/quantization/rotation/rotation.py
gptqmodel/utils/__init__.py
gptqmodel/utils/_extension_loader.py
gptqmodel/utils/attn_mask.py
gptqmodel/utils/backend.py
gptqmodel/utils/bitblas.py
gptqmodel/utils/calibration.py
gptqmodel/utils/colors.py
gptqmodel/utils/cpp.py
gptqmodel/utils/ctx.py
gptqmodel/utils/cuda_activation_buffer.py
gptqmodel/utils/data.py
gptqmodel/utils/device.py
gptqmodel/utils/disk.py
gptqmodel/utils/env.py
gptqmodel/utils/eval.py
gptqmodel/utils/evalplus.py
gptqmodel/utils/exllama.py
gptqmodel/utils/exllamav2.py
gptqmodel/utils/gemv.py
gptqmodel/utils/hf.py
gptqmodel/utils/image.py
gptqmodel/utils/importer.py
gptqmodel/utils/inspect.py
gptqmodel/utils/linalg_warmup.py
gptqmodel/utils/logger.py
gptqmodel/utils/looper_helpers.py
gptqmodel/utils/machete.py
gptqmodel/utils/marlin.py
gptqmodel/utils/marlin_scalar_type.py
gptqmodel/utils/memory.py
gptqmodel/utils/mlx.py
gptqmodel/utils/mmlupro.py
gptqmodel/utils/model.py
gptqmodel/utils/model_dequant.py
gptqmodel/utils/modelscope.py
gptqmodel/utils/module_locks.py
gptqmodel/utils/nogil_patcher.py
gptqmodel/utils/offload.py
gptqmodel/utils/openai_server.py
gptqmodel/utils/perplexity.py
gptqmodel/utils/python.py
gptqmodel/utils/rocm.py
gptqmodel/utils/safe.py
gptqmodel/utils/safetensor.py
gptqmodel/utils/sglang.py
gptqmodel/utils/stream.py
gptqmodel/utils/structure.py
gptqmodel/utils/tensor.py
gptqmodel/utils/terminal.py
gptqmodel/utils/threads.py
gptqmodel/utils/threadx.py
gptqmodel/utils/torch.py
gptqmodel/utils/vllm.py
gptqmodel/utils/vram.py
gptqmodel_ext/__init__.py
gptqmodel_ext/pack_block_cpu.cpp
gptqmodel_ext/awq/pybind_awq.cpp
gptqmodel_ext/awq/pybind_awq_v2.cpp
gptqmodel_ext/awq/quantization/dequantize.cuh
gptqmodel_ext/awq/quantization/gemm_cuda.h
gptqmodel_ext/awq/quantization/gemm_cuda_gen.cu
gptqmodel_ext/awq/quantization/gemv_cuda.cu
gptqmodel_ext/awq/quantization/gemv_cuda.h
gptqmodel_ext/awq/quantization_new/dequantize.cuh
gptqmodel_ext/awq/quantization_new/gemm/gemm_cuda.cu
gptqmodel_ext/awq/quantization_new/gemm/gemm_cuda.h
gptqmodel_ext/awq/quantization_new/gemm/semaphore.h
gptqmodel_ext/awq/quantization_new/gemv/gemv_cuda.cu
gptqmodel_ext/awq/quantization_new/gemv/gemv_cuda.h
gptqmodel_ext/cutlass_extensions/__init__.py
gptqmodel_ext/cutlass_extensions/common.cpp
gptqmodel_ext/cutlass_extensions/common.hpp
gptqmodel_ext/cutlass_extensions/cute_utils.cuh
gptqmodel_ext/cutlass_extensions/torch_utils.hpp
gptqmodel_ext/cutlass_extensions/vllm_collective_builder.cuh
gptqmodel_ext/cutlass_extensions/vllm_custom_types.cuh
gptqmodel_ext/cutlass_extensions/vllm_cutlass_library_extension.py
gptqmodel_ext/cutlass_extensions/vllm_numeric_conversion.cuh
gptqmodel_ext/cutlass_extensions/vllm_type_utils.cuh
gptqmodel_ext/cutlass_extensions/epilogue/broadcast_load_epilogue_array_c3x.hpp
gptqmodel_ext/cutlass_extensions/epilogue/broadcast_load_epilogue_c2x.hpp
gptqmodel_ext/cutlass_extensions/epilogue/broadcast_load_epilogue_c3x.hpp
gptqmodel_ext/cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp
gptqmodel_ext/cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp
gptqmodel_ext/exllama/cu_compat.cuh
gptqmodel_ext/exllama/cuda_buffers.cu
gptqmodel_ext/exllama/cuda_buffers.cuh
gptqmodel_ext/exllama/exllama_ext.cpp
gptqmodel_ext/exllama/hip_compat.cuh
gptqmodel_ext/exllama/matrix.cuh
gptqmodel_ext/exllama/tuning.h
gptqmodel_ext/exllama/util.cuh
gptqmodel_ext/exllama/cuda_func/column_remap.cu
gptqmodel_ext/exllama/cuda_func/column_remap.cuh
gptqmodel_ext/exllama/cuda_func/q4_matmul.cu
gptqmodel_ext/exllama/cuda_func/q4_matmul.cuh
gptqmodel_ext/exllama/cuda_func/q4_matrix.cu
gptqmodel_ext/exllama/cuda_func/q4_matrix.cuh
gptqmodel_ext/exllama_eora/benchmark.py
gptqmodel_ext/exllama_eora/setup.py
gptqmodel_ext/exllama_eora/test_actual_value.py
gptqmodel_ext/exllama_eora/test_eora.py
gptqmodel_ext/exllama_eora/test_eora_sweep.py
gptqmodel_ext/exllama_eora/eora/__init__.py
gptqmodel_ext/exllama_eora/eora/compat.cuh
gptqmodel_ext/exllama_eora/eora/matrix_view.cuh
gptqmodel_ext/exllama_eora/eora/ops.h
gptqmodel_ext/exllama_eora/eora/pybind.cu
gptqmodel_ext/exllama_eora/eora/q_gemm.cu
gptqmodel_ext/exllama_eora/eora/q_gemm_original.cu
gptqmodel_ext/exllama_eora/eora/qdq_2.cuh
gptqmodel_ext/exllama_eora/eora/qdq_3.cuh
gptqmodel_ext/exllama_eora/eora/qdq_4.cuh
gptqmodel_ext/exllama_eora/eora/qdq_8.cuh
gptqmodel_ext/exllama_eora/eora/qdq_util.cuh
gptqmodel_ext/exllamav2/config.h
gptqmodel_ext/exllamav2/ext.cpp
gptqmodel_ext/exllamav2/cpp/util.h
gptqmodel_ext/exllamav2/cuda/compat.cuh
gptqmodel_ext/exllamav2/cuda/compat_gemm.cuh
gptqmodel_ext/exllamav2/cuda/matrix_view.cuh
gptqmodel_ext/exllamav2/cuda/q_gemm.cu
gptqmodel_ext/exllamav2/cuda/q_gemm.cuh
gptqmodel_ext/exllamav2/cuda/q_gemm_kernel.cuh
gptqmodel_ext/exllamav2/cuda/q_gemm_kernel_gptq.cuh
gptqmodel_ext/exllamav2/cuda/q_matrix.cu
gptqmodel_ext/exllamav2/cuda/q_matrix.cuh
gptqmodel_ext/exllamav2/cuda/util.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_2.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_3.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_4.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_5.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_6.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_8.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_util.cuh
gptqmodel_ext/machete/generate.py
gptqmodel_ext/machete/machete_collective_builder.cuh
gptqmodel_ext/machete/machete_interleaving_utils.cuh
gptqmodel_ext/machete/machete_mainloop.cuh
gptqmodel_ext/machete/machete_mm_kernel.cuh
gptqmodel_ext/machete/machete_mm_launcher.cuh
gptqmodel_ext/machete/machete_prepack_kernel.cuh
gptqmodel_ext/machete/machete_prepack_launcher.cuh
gptqmodel_ext/machete/machete_prepacked_layout.cuh
gptqmodel_ext/machete/machete_pytorch.cu
gptqmodel_ext/machete/core/registration.h
gptqmodel_ext/machete/core/scalar_type.hpp
gptqmodel_ext/marlin/awq_marlin_repack.cu
gptqmodel_ext/marlin/awq_marlin_repack.cuh
gptqmodel_ext/marlin/dequant.h
gptqmodel_ext/marlin/generate_kernels.py
gptqmodel_ext/marlin/gptq_marlin.cu
gptqmodel_ext/marlin/gptq_marlin.cuh
gptqmodel_ext/marlin/gptq_marlin_repack.cu
gptqmodel_ext/marlin/gptq_marlin_repack.cuh
gptqmodel_ext/marlin/kernel.h
gptqmodel_ext/marlin/kernel_bf16_kfe2m1f.cu
gptqmodel_ext/marlin/kernel_bf16_kfe4m3fn.cu
gptqmodel_ext/marlin/kernel_bf16_ku4.cu
gptqmodel_ext/marlin/kernel_bf16_ku4b8.cu
gptqmodel_ext/marlin/kernel_bf16_ku8b128.cu
gptqmodel_ext/marlin/kernel_fp16_kfe2m1f.cu
gptqmodel_ext/marlin/kernel_fp16_kfe4m3fn.cu
gptqmodel_ext/marlin/kernel_fp16_ku4.cu
gptqmodel_ext/marlin/kernel_fp16_ku4b8.cu
gptqmodel_ext/marlin/kernel_fp16_ku8b128.cu
gptqmodel_ext/marlin/marlin.cuh
gptqmodel_ext/marlin/marlin_cuda.cpp
gptqmodel_ext/marlin/marlin_dtypes.cuh
gptqmodel_ext/marlin/marlin_template.h
gptqmodel_ext/marlin/core/registration.h
gptqmodel_ext/marlin/core/scalar_type.hpp
gptqmodel_ext/qqq/qqq.cpp
gptqmodel_ext/qqq/qqq_gemm.cu
gptqmodel_ext/qqq/qqq_gemm.h
licenses/LICENSE.apache