.gitignore
.readthedocs.yaml
CMakeLists.txt
CODE_OF_CONDUCT.md
DCO
Dockerfile
Dockerfile.openEuler
LICENSE
README.md
README.zh.md
format.sh
mypy.ini
packages.txt
pyproject.toml
pytest.ini
requirements-dev.txt
requirements-lint.txt
requirements.txt
setup.py
.github/PULL_REQUEST_TEMPLATE.md
.github/dependabot.yml
.github/labeler.yml
.github/ISSUE_TEMPLATE/100-documentation.yml
.github/ISSUE_TEMPLATE/200-installation.yml
.github/ISSUE_TEMPLATE/300-usage.yml
.github/ISSUE_TEMPLATE/400-bug-report.yml
.github/ISSUE_TEMPLATE/500-feature-request.yml
.github/ISSUE_TEMPLATE/600-new-model.yml
.github/ISSUE_TEMPLATE/700-performance-discussion.yml
.github/ISSUE_TEMPLATE/750-RFC.yml
.github/ISSUE_TEMPLATE/800-others.yml
.github/ISSUE_TEMPLATE/config.yml
.github/workflows/actionlint.yml
.github/workflows/image_openeuler.yml
.github/workflows/image_ubuntu.yml
.github/workflows/labeler.yml
.github/workflows/mypy.yaml
.github/workflows/ruff.yml
.github/workflows/shellcheck.yml
.github/workflows/vllm_ascend_test.yaml
.github/workflows/yapf.yml
.github/workflows/matchers/actionlint.json
.github/workflows/matchers/mypy.json
.github/workflows/matchers/ruff.json
benchmarks/README.md
benchmarks/requirements-bench.txt
benchmarks/scripts/run-performance-benchmarks.sh
benchmarks/tests/latency-tests.json
benchmarks/tests/serving-tests.json
benchmarks/tests/throughput-tests.json
cmake/utils.cmake
cmq_test.egg-info/PKG-INFO
cmq_test.egg-info/SOURCES.txt
cmq_test.egg-info/dependency_links.txt
cmq_test.egg-info/entry_points.txt
cmq_test.egg-info/requires.txt
cmq_test.egg-info/top_level.txt
csrc/camem_allocator.cpp
csrc/ops.h
csrc/torch_binding.cpp
csrc/utils.h
csrc/kernels/pos_encoding_kernels.cpp
csrc/kernels/types.h
csrc/kernels/utils.h
docs/Makefile
docs/README.md
docs/requirements-docs.txt
docs/requirements-test.txt
docs/source/conf.py
docs/source/faqs.md
docs/source/index.md
docs/source/installation.md
docs/source/quick_start.md
docs/source/developer_guide/contributing.md
docs/source/developer_guide/contributing.zh.md
docs/source/developer_guide/versioning_policy.md
docs/source/developer_guide/versioning_policy.zh.md
docs/source/developer_guide/evaluation/index.md
docs/source/developer_guide/evaluation/using_lm_eval.md
docs/source/developer_guide/evaluation/using_opencompass.md
docs/source/logos/vllm-ascend-logo-text-dark.png
docs/source/logos/vllm-ascend-logo-text-light.png
docs/source/tutorials/index.md
docs/source/tutorials/multi_node.md
docs/source/tutorials/multi_npu.md
docs/source/tutorials/single_npu.md
docs/source/tutorials/single_npu_multimodal.md
docs/source/user_guide/release.template.md
docs/source/user_guide/release_notes.md
docs/source/user_guide/supported_models.md
docs/source/user_guide/suppoted_features.md
examples/offline_disaggregated_prefill_npu.py
examples/offline_distributed_inference_npu.py
examples/offline_inference_audio_language.py
examples/offline_inference_npu.py
tests/__init__.py
tests/conftest.py
tests/model_utils.py
tests/test_chunk_prefill.py
tests/test_chunked_prefill_scheduler.py
tests/test_offline_inference.py
tests/test_scheduler.py
tests/utils.py
tests/ops/test_rotary_embedding.py
tests/spec_decode/__init__.py
tests/spec_decode/conftest.py
tests/spec_decode/test_batch_expansion.py
tests/spec_decode/test_dynamic_spec_decode.py
tests/spec_decode/test_metrics.py
tests/spec_decode/test_multi_step_worker.py
tests/spec_decode/test_ngram_worker.py
tests/spec_decode/test_spec_decode_worker.py
tests/spec_decode/test_utils.py
tests/spec_decode/utils.py
tests/spec_decode/e2e/__init__.py
tests/spec_decode/e2e/conftest.py
tests/spec_decode/e2e/test_compatibility.py
tests/spec_decode/e2e/test_eagle_correctness.py
tests/spec_decode/e2e/test_integration.py
tests/spec_decode/e2e/test_integration_dist_tp2.py
tests/spec_decode/e2e/test_integration_dist_tp4.py
tests/spec_decode/e2e/test_logprobs.py
tests/spec_decode/e2e/test_medusa_correctness.py
tests/spec_decode/e2e/test_mlp_correctness.py
tests/spec_decode/e2e/test_mtp_correctness.py
tests/spec_decode/e2e/test_multistep_correctness.py
tests/spec_decode/e2e/test_ngram_correctness.py
tests/spec_decode/e2e/test_seed.py
tools/actionlint.sh
tools/check_repo.sh
tools/mypy.sh
tools/png-lint.sh
tools/shellcheck.sh
tools/sphinx-lint.sh
vllm_ascend/__init__.py
vllm_ascend/_version.py
vllm_ascend/envs.py
vllm_ascend/patch_config.py
vllm_ascend/patch_module.py
vllm_ascend/patch_outputs.py
vllm_ascend/platform.py
vllm_ascend/utils.py
vllm_ascend/attention/__init__.py
vllm_ascend/attention/attention.py
vllm_ascend/attention/attention_v1.py
vllm_ascend/core/__init__.py
vllm_ascend/core/schedule_config.py
vllm_ascend/core/scheduler.py
vllm_ascend/core/v1_engine_core_init.py
vllm_ascend/device_allocator/__init__.py
vllm_ascend/device_allocator/camem.py
vllm_ascend/distributed/__init__.py
vllm_ascend/distributed/communicator.py
vllm_ascend/distributed/llmdatadist_connector.py
vllm_ascend/lora/__init__.py
vllm_ascend/lora/punica_wrapper/__init__.py
vllm_ascend/lora/punica_wrapper/punica_npu.py
vllm_ascend/models/__init__.py
vllm_ascend/models/deepseek_mtp.py
vllm_ascend/models/deepseek_v2.py
vllm_ascend/models/qwen2_5_vl.py
vllm_ascend/models/qwen2_vl.py
vllm_ascend/ops/__init__.py
vllm_ascend/ops/activation.py
vllm_ascend/ops/fused_moe.py
vllm_ascend/ops/layernorm.py
vllm_ascend/ops/rotary_embedding.py
vllm_ascend/patch/__init__.py
vllm_ascend/patch/patch_cache_dtype.py
vllm_ascend/patch/patch_metrics.py
vllm_ascend/patch/patch_minicpm.py
vllm_ascend/patch/patch_multi_step_worker.py
vllm_ascend/patch/patch_rejection_sampler.py
vllm_ascend/patch/patch_spec_decode_worker.py
vllm_ascend/patch/ray_patch.py
vllm_ascend/quantization/__init__.py
vllm_ascend/quantization/quant_config.py
vllm_ascend/quantization/quantizer.py
vllm_ascend/sample/__init__.py
vllm_ascend/sample/sampler.py
vllm_ascend/sample/sampler_v1.py
vllm_ascend/sample/ops/__init__.py
vllm_ascend/sample/ops/ascend_topk_topp_sampler.py
vllm_ascend/sample/ops/penalties.py
vllm_ascend/worker/__init__.py
vllm_ascend/worker/draft_model_runner.py
vllm_ascend/worker/model_runner.py
vllm_ascend/worker/model_runner_v1.py
vllm_ascend/worker/multi_step_runner.py
vllm_ascend/worker/multi_step_worker.py
vllm_ascend/worker/pooling_model_runner.py
vllm_ascend/worker/worker.py
vllm_ascend/worker/worker_v1.py