.gitignore
CONTRIBUTING.md
LICENSE
Makefile
README.md
pyproject.toml
setup.py
setup.sh
.github/CODEOWNERS
.github/workflows/python-quality.yml
.github/workflows/python-tests.yml
bench/generation/README.md
bench/generation/evaluate_many_models.sh
bench/generation/evaluate_model.py
bench/generation/gen_barchart.py
bench/generation/latency.py
bench/generation/perplexity.py
bench/generation/prediction.py
bench/generation/charts/EleutherAI-pythia-1b_Accuracy.png
bench/generation/charts/EleutherAI-pythia-1b_Latency__ms_.png
bench/generation/charts/EleutherAI-pythia-1b_Perplexity.png
bench/generation/charts/HuggingFaceH4-zephyr-7b-beta_Accuracy.png
bench/generation/charts/HuggingFaceH4-zephyr-7b-beta_Latency__ms_.png
bench/generation/charts/HuggingFaceH4-zephyr-7b-beta_Perplexity.png
bench/generation/charts/NousResearch-Llama-2-7b-hf_Accuracy.png
bench/generation/charts/NousResearch-Llama-2-7b-hf_Latency__ms_.png
bench/generation/charts/NousResearch-Llama-2-7b-hf_Perplexity.png
bench/generation/charts/facebook-opt-1.3b_Accuracy.png
bench/generation/charts/facebook-opt-1.3b_Latency__ms_.png
bench/generation/charts/facebook-opt-1.3b_Perplexity.png
bench/generation/charts/facebook-opt-125m_Accuracy.png
bench/generation/charts/facebook-opt-125m_Latency__ms_.png
bench/generation/charts/facebook-opt-125m_Perplexity.png
bench/generation/charts/facebook-opt-350m_Accuracy.png
bench/generation/charts/facebook-opt-350m_Latency__ms_.png
bench/generation/charts/facebook-opt-350m_Perplexity.png
bench/generation/charts/mistralai-Mistral-7B-v0.1_Accuracy.png
bench/generation/charts/mistralai-Mistral-7B-v0.1_Latency__ms_.png
bench/generation/charts/mistralai-Mistral-7B-v0.1_Perplexity.png
bench/generation/charts/princeton-nlp-Sheared-LLaMA-1.3B_Accuracy.png
bench/generation/charts/princeton-nlp-Sheared-LLaMA-1.3B_Perplexity.png
bench/kernels/README.md
bench/kernels/test_int_mm.py
bench/kernels/test_int_mm_inductor.py
bench/kernels/test_int_mm_torch_int.py
bench/library/benchmark.py
examples/nlp/text-classification/sst2/quantize_sst2_model.py
examples/nlp/text-generation/quantize_causal_lm_model.py
examples/vision/image-classification/mnist/quantize_mnist_model.py
external/smoothquant/README.md
external/smoothquant/smoothquant.py
quanto/__init__.py
quanto/calibrate.py
quanto/quantize.py
quanto/serialization.py
quanto.egg-info/PKG-INFO
quanto.egg-info/SOURCES.txt
quanto.egg-info/dependency_links.txt
quanto.egg-info/not-zip-safe
quanto.egg-info/requires.txt
quanto.egg-info/top_level.txt
quanto/library/README.md
quanto/library/__init__.py
quanto/library/ops.py
quanto/library/ext/README.md
quanto/library/ext/__init__.py
quanto/library/ext/cpp/README.md
quanto/library/ext/cpp/__init__.py
quanto/library/ext/cpp/mm.cpp
quanto/library/ext/cpp/mm.h
quanto/library/ext/cpp/pybind_module.cpp
quanto/library/ext/cpp/quantize.cpp
quanto/library/ext/cpp/quantize.h
quanto/library/ext/cpp/unpack.cpp
quanto/library/ext/cpp/unpack.h
quanto/library/ext/mps/README.md
quanto/library/ext/mps/__init__.py
quanto/library/ext/mps/pybind_module.cpp
quanto/library/ext/mps/unpack.h
quanto/library/ext/mps/unpack.mm
quanto/library/python/README.md
quanto/library/python/__init__.py
quanto/library/python/mm.py
quanto/library/python/quantize.py
quanto/library/python/unpack.py
quanto/nn/__init__.py
quanto/nn/qconv2d.py
quanto/nn/qlayernorm.py
quanto/nn/qlinear.py
quanto/nn/qmodule.py
quanto/tensor/__init__.py
quanto/tensor/core.py
quanto/tensor/func.py
quanto/tensor/ops.py
quanto/tensor/packed.py
quanto/tensor/qbitstensor.py
quanto/tensor/qtensor.py
quanto/tensor/qtype.py
test/conftest.py
test/helpers.py
test/library/test_mm.py
test/library/test_quantize.py
test/library/test_unpack.py
test/model/test_quantize_mlp.py
test/nn/test_calibrate.py
test/nn/test_qattention.py
test/nn/test_qconv2d.py
test/nn/test_qlayernorm.py
test/nn/test_qlinear.py
test/nn/test_qmodule.py
test/tensor/test_absmax.py
test/tensor/test_compile.py
test/tensor/test_packed_tensor.py
test/tensor/test_qbitstensor.py
test/tensor/test_qtensor.py
test/tensor/ops/test_linear_dispatch.py
test/tensor/ops/test_mm_dispatch.py
test/tensor/ops/test_qbitstensor_dispatch.py
test/tensor/ops/test_qtensor_dispatch.py