.gitignore
CONTRIBUTING.md
LICENSE
Makefile
README.md
pyproject.toml
setup.py
setup.sh
.github/CODEOWNERS
.github/PULL_REQUEST_TEMPLATE.md
.github/workflows/conventional-commits.yml
.github/workflows/linux-cpu-tests.yml
.github/workflows/linux-cuda-tests.yml
.github/workflows/python-examples.yml
.github/workflows/python-quality.yml
.github/workflows/stale.yml
bench/generation/README.md
bench/generation/evaluate_configurations.py
bench/generation/evaluate_many_models.sh
bench/generation/evaluate_model.py
bench/generation/gen_barchart.py
bench/generation/charts/EleutherAI-pythia-1b_Accuracy.png
bench/generation/charts/EleutherAI-pythia-1b_Latency__ms_.png
bench/generation/charts/EleutherAI-pythia-1b_Perplexity.png
bench/generation/charts/HuggingFaceH4-zephyr-7b-beta_Accuracy.png
bench/generation/charts/HuggingFaceH4-zephyr-7b-beta_Latency__ms_.png
bench/generation/charts/HuggingFaceH4-zephyr-7b-beta_Perplexity.png
bench/generation/charts/NousResearch-Llama-2-7b-hf_Accuracy.png
bench/generation/charts/NousResearch-Llama-2-7b-hf_Latency__ms_.png
bench/generation/charts/NousResearch-Llama-2-7b-hf_Perplexity.png
bench/generation/charts/facebook-opt-1.3b_Accuracy.png
bench/generation/charts/facebook-opt-1.3b_Latency__ms_.png
bench/generation/charts/facebook-opt-1.3b_Perplexity.png
bench/generation/charts/facebook-opt-125m_Accuracy.png
bench/generation/charts/facebook-opt-125m_Latency__ms_.png
bench/generation/charts/facebook-opt-125m_Perplexity.png
bench/generation/charts/facebook-opt-350m_Accuracy.png
bench/generation/charts/facebook-opt-350m_Latency__ms_.png
bench/generation/charts/facebook-opt-350m_Perplexity.png
bench/generation/charts/google-gemma-2b_Accuracy.png
bench/generation/charts/google-gemma-2b_Latency__ms_.png
bench/generation/charts/google-gemma-2b_Perplexity.png
bench/generation/charts/mistralai-Mistral-7B-v0.1_Accuracy.png
bench/generation/charts/mistralai-Mistral-7B-v0.1_Latency__ms_.png
bench/generation/charts/mistralai-Mistral-7B-v0.1_Perplexity.png
bench/generation/charts/princeton-nlp-Sheared-LLaMA-1.3B_Accuracy.png
bench/generation/charts/princeton-nlp-Sheared-LLaMA-1.3B_Latency__ms_.png
bench/generation/charts/princeton-nlp-Sheared-LLaMA-1.3B_Perplexity.png
bench/generation/metrics/__init__.py
bench/generation/metrics/latency.py
bench/generation/metrics/perplexity.py
bench/generation/metrics/prediction.py
bench/generation/setup/__init__.py
bench/generation/setup/awq.py
bench/generation/setup/bnb.py
bench/generation/setup/hqq.py
bench/generation/setup/quanto.py
bench/kernels/README.md
bench/kernels/test_int_mm.py
bench/kernels/test_int_mm_inductor.py
bench/kernels/test_int_mm_torch_int.py
bench/library/benchmark.py
examples/nlp/text-classification/sst2/quantize_sst2_model.py
examples/nlp/text-generation/quantize_causal_lm_model.py
examples/vision/StableDiffusion/READMD.md
examples/vision/StableDiffusion/quantize_StableDiffusion.py
examples/vision/StableDiffusion/requirements.txt
examples/vision/image-classification/mnist/quantize_mnist_model.py
external/awq/conftest.py
external/awq/pack_intweight.py
external/awq/packing_utils.py
external/awq/test_awq_kernels.py
external/awq/test_awq_packing.py
external/awq/test_awq_quantize.py
external/smoothquant/README.md
external/smoothquant/smoothquant.py
quanto/__init__.py
quanto/calibrate.py
quanto/quantize.py
quanto/serialization.py
quanto.egg-info/PKG-INFO
quanto.egg-info/SOURCES.txt
quanto.egg-info/dependency_links.txt
quanto.egg-info/not-zip-safe
quanto.egg-info/requires.txt
quanto.egg-info/top_level.txt
quanto/library/README.md
quanto/library/__init__.py
quanto/library/ops.py
quanto/library/ext/README.md
quanto/library/ext/__init__.py
quanto/library/ext/cpp/README.md
quanto/library/ext/cpp/__init__.py
quanto/library/ext/cpp/mm.cpp
quanto/library/ext/cpp/mm.h
quanto/library/ext/cpp/pybind_module.cpp
quanto/library/ext/cpp/unpack.cpp
quanto/library/ext/cpp/unpack.h
quanto/library/ext/cuda/README.md
quanto/library/ext/cuda/__init__.py
quanto/library/ext/cuda/pybind_module.cpp
quanto/library/ext/cuda/unpack.cu
quanto/library/ext/cuda/unpack.h
quanto/library/ext/cuda/awq/dequantize.cuh
quanto/library/ext/cuda/awq/v2/gemm_cuda.cu
quanto/library/ext/cuda/awq/v2/gemm_cuda.h
quanto/library/ext/cuda/awq/v2/gemv_cuda.cu
quanto/library/ext/cuda/awq/v2/gemv_cuda.h
quanto/library/ext/cuda/awq/v2/semaphore.h
quanto/library/ext/mps/README.md
quanto/library/ext/mps/__init__.py
quanto/library/ext/mps/pybind_module.cpp
quanto/library/ext/mps/unpack.h
quanto/library/ext/mps/unpack.mm
quanto/library/python/README.md
quanto/library/python/__init__.py
quanto/library/python/mm.py
quanto/library/python/unpack.py
quanto/nn/__init__.py
quanto/nn/qconv2d.py
quanto/nn/qlayernorm.py
quanto/nn/qlinear.py
quanto/nn/qmodule.py
quanto/tensor/__init__.py
quanto/tensor/core.py
quanto/tensor/qactivation.py
quanto/tensor/qbytes.py
quanto/tensor/qbytes_ops.py
quanto/tensor/qtensor.py
quanto/tensor/qtensor_func.py
quanto/tensor/qtype.py
quanto/tensor/qweight.py
quanto/tensor/optimizers/__init__.py
quanto/tensor/optimizers/absmax_optimizer.py
quanto/tensor/optimizers/affine_optimizer.py
quanto/tensor/optimizers/max_optimizer.py
quanto/tensor/optimizers/optimizer.py
quanto/tensor/optimizers/symmetric_optimizer.py
quanto/tensor/qbits/__init__.py
quanto/tensor/qbits/group.py
quanto/tensor/qbits/packed.py
quanto/tensor/qbits/qbits.py
quanto/tensor/qbits/qbits_ops.py
quanto/tensor/qbits/awq/__init__.py
quanto/tensor/qbits/awq/packed.py
quanto/tensor/qbits/awq/qbits.py
quanto/tensor/quantizers/__init__.py
quanto/tensor/quantizers/affine.py
quanto/tensor/quantizers/symmetric.py
test/conftest.py
test/helpers.py
test/library/test_mm.py
test/library/test_unpack.py
test/model/test_quantize_mlp.py
test/model/test_requantize_mlp.py
test/nn/test_calibrate.py
test/nn/test_qattention.py
test/nn/test_qconv2d.py
test/nn/test_qlayernorm.py
test/nn/test_qlinear.py
test/nn/test_qmodule.py
test/tensor/test_absmax.py
test/tensor/test_compile.py
test/tensor/test_packed_tensor.py
test/tensor/test_qbitstensor.py
test/tensor/test_qbytestensor.py
test/tensor/awq/test_awq_tensor.py
test/tensor/ops/test_linear_dispatch.py
test/tensor/ops/test_mm_dispatch.py
test/tensor/ops/test_qactivation_dispatch.py
test/tensor/quantizers/test_affine.py
test/tensor/quantizers/test_symmetric.py