[Model] Pooling model activation supports per request control by PoolingParams (#20538)

Signed-off-by: wang.yuqi <noooop@126.com>
2025-08-05 15:37:00 +08:00
parent 811ac13d03
commit 586f286789
21 changed files with 948 additions and 173 deletions
--- a/tests/entrypoints/openai/test_embedding.py
+++ b/tests/entrypoints/openai/test_embedding.py
@ -8,6 +8,8 @@ import openai
 import pytest
 import pytest_asyncio
 import requests
+import torch
+import torch.nn.functional as F

 from vllm.entrypoints.openai.protocol import EmbeddingResponse
 from vllm.transformers_utils.tokenizer import get_tokenizer
@ -369,3 +371,35 @@ async def test_invocations_conversation(server: RemoteOpenAIServer):
                               embeddings_1_lst=[invocation_data["embedding"]],
                               name_0="chat",
                               name_1="invocation")
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+async def test_normalize(server: RemoteOpenAIServer, model_name: str):
+    input_text = ["The chef prepared a delicious meal."]
+
+    async def get_outputs(normalize):
+        request_args = {
+            "model": MODEL_NAME,
+            "input": input_text,
+            "encoding_format": "float",
+            "normalize": normalize
+        }
+
+        response = requests.post(server.url_for("v1/embeddings"),
+                                 json=request_args)
+        outputs = response.json()
+
+        return torch.tensor([x['embedding'] for x in outputs["data"]])
+
+    default = await get_outputs(normalize=None)
+    w_normal = await get_outputs(normalize=True)
+    wo_normal = await get_outputs(normalize=False)
+
+    assert torch.allclose(default, w_normal,
+                          atol=1e-2), "Default should use normal."
+    assert not torch.allclose(w_normal, wo_normal,
+                              atol=1e-2), "wo_normal should not use normal."
+    assert torch.allclose(
+        w_normal, F.normalize(wo_normal, p=2, dim=-1),
+        atol=1e-2), "w_normal should be close to normal(wo_normal)."