LICENSE
MANIFEST.in
README.md
requirements.txt
setup.py
gptqmodel/__init__.py
gptqmodel/version.py
gptqmodel.egg-info/PKG-INFO
gptqmodel.egg-info/SOURCES.txt
gptqmodel.egg-info/dependency_links.txt
gptqmodel.egg-info/requires.txt
gptqmodel.egg-info/top_level.txt
gptqmodel/integration/__init__.py
gptqmodel/integration/integration_vllm.py
gptqmodel/integration/src/__init__.py
gptqmodel/integration/src/vllm/__init__.py
gptqmodel/integration/src/vllm/gptq_marlin.py
gptqmodel/models/__init__.py
gptqmodel/models/_const.py
gptqmodel/models/auto.py
gptqmodel/models/base.py
gptqmodel/models/loader.py
gptqmodel/models/writer.py
gptqmodel/models/definitions/__init__.py
gptqmodel/models/definitions/baichuan.py
gptqmodel/models/definitions/bloom.py
gptqmodel/models/definitions/chatglm.py
gptqmodel/models/definitions/codegen.py
gptqmodel/models/definitions/cohere.py
gptqmodel/models/definitions/cohere2.py
gptqmodel/models/definitions/dbrx.py
gptqmodel/models/definitions/dbrx_converted.py
gptqmodel/models/definitions/decilm.py
gptqmodel/models/definitions/deepseek_v2.py
gptqmodel/models/definitions/deepseek_v3.py
gptqmodel/models/definitions/exaone.py
gptqmodel/models/definitions/gemma.py
gptqmodel/models/definitions/gemma2.py
gptqmodel/models/definitions/glm.py
gptqmodel/models/definitions/gpt2.py
gptqmodel/models/definitions/gpt_bigcode.py
gptqmodel/models/definitions/gpt_neox.py
gptqmodel/models/definitions/gptj.py
gptqmodel/models/definitions/granite.py
gptqmodel/models/definitions/grinmoe.py
gptqmodel/models/definitions/hymba.py
gptqmodel/models/definitions/internlm.py
gptqmodel/models/definitions/internlm2.py
gptqmodel/models/definitions/llama.py
gptqmodel/models/definitions/longllama.py
gptqmodel/models/definitions/minicpm.py
gptqmodel/models/definitions/minicpm3.py
gptqmodel/models/definitions/mistral.py
gptqmodel/models/definitions/mixtral.py
gptqmodel/models/definitions/mllama.py
gptqmodel/models/definitions/mobilellm.py
gptqmodel/models/definitions/moss.py
gptqmodel/models/definitions/mpt.py
gptqmodel/models/definitions/olmo2.py
gptqmodel/models/definitions/opt.py
gptqmodel/models/definitions/ovis.py
gptqmodel/models/definitions/phi.py
gptqmodel/models/definitions/phi3.py
gptqmodel/models/definitions/qwen.py
gptqmodel/models/definitions/qwen2.py
gptqmodel/models/definitions/qwen2_moe.py
gptqmodel/models/definitions/qwen2_vl.py
gptqmodel/models/definitions/rw.py
gptqmodel/models/definitions/stablelmepoch.py
gptqmodel/models/definitions/starcoder2.py
gptqmodel/models/definitions/telechat2.py
gptqmodel/models/definitions/xverse.py
gptqmodel/models/definitions/yi.py
gptqmodel/nn_modules/__init__.py
gptqmodel/nn_modules/hooked_linear.py
gptqmodel/nn_modules/qlinear/__init__.py
gptqmodel/nn_modules/qlinear/bitblas.py
gptqmodel/nn_modules/qlinear/bitblas_target_detector.py
gptqmodel/nn_modules/qlinear/dynamic_cuda.py
gptqmodel/nn_modules/qlinear/exllama.py
gptqmodel/nn_modules/qlinear/exllamav2.py
gptqmodel/nn_modules/qlinear/ipex.py
gptqmodel/nn_modules/qlinear/marlin.py
gptqmodel/nn_modules/qlinear/torch.py
gptqmodel/nn_modules/qlinear/tritonv2.py
gptqmodel/nn_modules/qlinear/utils.py
gptqmodel/nn_modules/triton_utils/__init__.py
gptqmodel/nn_modules/triton_utils/custom_autotune.py
gptqmodel/nn_modules/triton_utils/dequant.py
gptqmodel/nn_modules/triton_utils/kernels.py
gptqmodel/nn_modules/triton_utils/mixin.py
gptqmodel/quantization/__init__.py
gptqmodel/quantization/config.py
gptqmodel/quantization/gptq.py
gptqmodel/quantization/quantizer.py
gptqmodel/utils/__init__.py
gptqmodel/utils/backend.py
gptqmodel/utils/bitblas.py
gptqmodel/utils/calibration.py
gptqmodel/utils/data.py
gptqmodel/utils/device.py
gptqmodel/utils/eval.py
gptqmodel/utils/exllama.py
gptqmodel/utils/image.py
gptqmodel/utils/importer.py
gptqmodel/utils/logger.py
gptqmodel/utils/marlin.py
gptqmodel/utils/mlx.py
gptqmodel/utils/model.py
gptqmodel/utils/openai_server.py
gptqmodel/utils/perplexity.py
gptqmodel/utils/plotly.py
gptqmodel/utils/progress.py
gptqmodel/utils/rocm.py
gptqmodel/utils/safetensor.py
gptqmodel/utils/sglang.py
gptqmodel/utils/tensor.py
gptqmodel/utils/torch.py
gptqmodel/utils/vllm.py
gptqmodel/utils/vram.py
gptqmodel_ext/cuda_256/gptqmodel_cuda_256.cpp
gptqmodel_ext/cuda_256/gptqmodel_cuda_kernel_256.cu
gptqmodel_ext/cuda_64/gptqmodel_cuda_64.cpp
gptqmodel_ext/cuda_64/gptqmodel_cuda_kernel_64.cu
gptqmodel_ext/exllama/cu_compat.cuh
gptqmodel_ext/exllama/cuda_buffers.cu
gptqmodel_ext/exllama/cuda_buffers.cuh
gptqmodel_ext/exllama/exllama_ext.cpp
gptqmodel_ext/exllama/hip_compat.cuh
gptqmodel_ext/exllama/matrix.cuh
gptqmodel_ext/exllama/tuning.h
gptqmodel_ext/exllama/util.cuh
gptqmodel_ext/exllama/cuda_func/column_remap.cu
gptqmodel_ext/exllama/cuda_func/column_remap.cuh
gptqmodel_ext/exllama/cuda_func/q4_matmul.cu
gptqmodel_ext/exllama/cuda_func/q4_matmul.cuh
gptqmodel_ext/exllama/cuda_func/q4_matrix.cu
gptqmodel_ext/exllama/cuda_func/q4_matrix.cuh
gptqmodel_ext/exllamav2/config.h
gptqmodel_ext/exllamav2/ext.cpp
gptqmodel_ext/exllamav2/cpp/util.h
gptqmodel_ext/exllamav2/cuda/compat.cuh
gptqmodel_ext/exllamav2/cuda/compat_gemm.cuh
gptqmodel_ext/exllamav2/cuda/matrix_view.cuh
gptqmodel_ext/exllamav2/cuda/q_gemm.cu
gptqmodel_ext/exllamav2/cuda/q_gemm.cuh
gptqmodel_ext/exllamav2/cuda/q_gemm_kernel.cuh
gptqmodel_ext/exllamav2/cuda/q_gemm_kernel_gptq.cuh
gptqmodel_ext/exllamav2/cuda/q_matrix.cu
gptqmodel_ext/exllamav2/cuda/q_matrix.cuh
gptqmodel_ext/exllamav2/cuda/util.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_2.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_3.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_4.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_5.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_6.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_8.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_util.cuh
gptqmodel_ext/marlin/marlin.cuh
gptqmodel_ext/marlin/marlin_cuda.cpp
gptqmodel_ext/marlin/marlin_cuda_kernel.cu
gptqmodel_ext/marlin/marlin_cuda_kernel.cuh
gptqmodel_ext/marlin/marlin_dtypes.cuh
gptqmodel_ext/marlin/marlin_repack.cu
gptqmodel_ext/marlin/marlin_repack.cuh
tests/test_asym_gptq_v1.py
tests/test_bits.py
tests/test_dynamic.py
tests/test_estimate_vram.py
tests/test_eval.py
tests/test_evalplus.py
tests/test_flash_attention.py
tests/test_group_size.py
tests/test_inference_speed.py
tests/test_inference_speed_ipex.py
tests/test_ipex_xpu.py
tests/test_lm_eval.py
tests/test_lm_head.py
tests/test_mlx.py
tests/test_mlx_generate.py
tests/test_openai_server.py
tests/test_packing.py
tests/test_packing_speed.py
tests/test_parameter_count.py
tests/test_perplexity.py
tests/test_phi_3_moe.py
tests/test_q4_bitblas.py
tests/test_q4_cuda.py
tests/test_q4_exllama_v1.py
tests/test_q4_exllama_v2.py
tests/test_q4_ipex.py
tests/test_q4_marlin.py
tests/test_q4_torch.py
tests/test_q4_torch_apple.py
tests/test_q4_triton.py
tests/test_quant_batch.py
tests/test_quant_formats.py
tests/test_quant_formats_auto_round.py
tests/test_quant_time.py
tests/test_quant_trust_remote.py
tests/test_save_loaded_quantized_model.py
tests/test_serialization.py
tests/test_sglang.py
tests/test_sharded.py
tests/test_tgi.py
tests/test_tokenicer.py
tests/test_transformers.py
tests/test_triton.py
tests/test_triton_xpu.py
tests/test_verify_hash.py
tests/test_vllm.py