[Speculative Decoding] Add speculators config support (#21345)

2025-08-01 08:25:18 -04:00
parent 87c94bc879
commit dfbc1f8880
9 changed files with 232 additions and 11 deletions
--- a/tests/speculative_decoding/speculators/test_eagle3.py
+++ b/tests/speculative_decoding/speculators/test_eagle3.py
@ -0,0 +1,16 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+import torch
+
+
+@pytest.mark.parametrize(
+    "model_path",
+    [("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717"),
+     ("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717-quantized")])
+def test_llama(vllm_runner, example_prompts, model_path):
+    with vllm_runner(model_path, dtype=torch.bfloat16) as vllm_model:
+        vllm_outputs = vllm_model.generate_greedy(example_prompts,
+                                                  max_tokens=20)
+        print(vllm_outputs)
+        assert vllm_outputs