Encoder model support for the Transformers backend (#25174)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-09-19 19:15:22 +01:00
committed by GitHub
parent d90e212a3a
commit 12aed7e453
4 changed files with 111 additions and 24 deletions

View File

@ -9,7 +9,7 @@ from vllm.platforms import current_platform
from ..conftest import HfRunner, VllmRunner
from ..utils import multi_gpu_test, prep_prompts
from .utils import check_logprobs_close
from .utils import check_embeddings_close, check_logprobs_close
def check_implementation(
@ -165,6 +165,40 @@ def test_embed_loading(vllm_runner, model):
assert model_config.using_transformers_backend()
@pytest.mark.parametrize(
"model",
[
# Encoder model
"BAAI/bge-base-en-v1.5",
])
def test_embed_correctness(hf_runner, vllm_runner, example_prompts, model):
import transformers
from packaging.version import Version
installed = Version(transformers.__version__)
required = Version("4.57.0.dev0")
if installed < required:
pytest.skip("Encoder models with the Transformers backend require "
f"transformers>={required}, but got {installed}")
with vllm_runner(model, max_model_len=512,
model_impl="transformers") as vllm_model:
model_config = vllm_model.llm.llm_engine.model_config
assert model_config.using_transformers_backend()
vllm_outputs = vllm_model.embed(example_prompts)
with hf_runner(model, is_sentence_transformer=True) as hf_model:
hf_outputs = hf_model.encode(example_prompts)
check_embeddings_close(
embeddings_0_lst=hf_outputs,
embeddings_1_lst=vllm_outputs,
name_0="hf",
name_1="vllm",
tol=1e-2,
)
@pytest.mark.parametrize(
"model",
["jason9693/Qwen2.5-1.5B-apeach"],