diff --git a/tests/entrypoints/llm/test_encode.py b/tests/entrypoints/llm/test_encode.py index ebec8baba3..a65235ccdf 100644 --- a/tests/entrypoints/llm/test_encode.py +++ b/tests/entrypoints/llm/test_encode.py @@ -8,7 +8,7 @@ import pytest from vllm import LLM, PoolingParams, PoolingRequestOutput from vllm.distributed import cleanup_dist_env_and_memory -MODEL_NAME = "intfloat/e5-mistral-7b-instruct" +MODEL_NAME = "intfloat/multilingual-e5-small" PROMPTS = [ "Hello, my name is", diff --git a/tests/entrypoints/openai/test_embedding.py b/tests/entrypoints/openai/test_embedding.py index e86ea87dd6..8d00564351 100644 --- a/tests/entrypoints/openai/test_embedding.py +++ b/tests/entrypoints/openai/test_embedding.py @@ -13,7 +13,7 @@ from vllm.transformers_utils.tokenizer import get_tokenizer from ...utils import RemoteOpenAIServer -MODEL_NAME = "intfloat/e5-mistral-7b-instruct" +MODEL_NAME = "intfloat/multilingual-e5-small" DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}""" # noqa: E501 diff --git a/tests/entrypoints/openai/test_metrics.py b/tests/entrypoints/openai/test_metrics.py index 5aa259a4f3..39ce4ba235 100644 --- a/tests/entrypoints/openai/test_metrics.py +++ b/tests/entrypoints/openai/test_metrics.py @@ -282,7 +282,7 @@ async def test_metrics_exist(server: RemoteOpenAIServer, def test_metrics_exist_run_batch(use_v1: bool): if use_v1: pytest.skip("Skipping test on vllm V1") - input_batch = """{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}}""" # noqa: E501 + input_batch = """{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "You are a helpful assistant."}}""" # noqa: E501 base_url = "0.0.0.0" port = "8001" @@ -302,7 +302,7 @@ def test_metrics_exist_run_batch(use_v1: bool): "-o", output_file.name, "--model", - "intfloat/e5-mistral-7b-instruct", + "intfloat/multilingual-e5-small", "--enable-metrics", "--url", base_url, diff --git a/tests/entrypoints/openai/test_run_batch.py b/tests/entrypoints/openai/test_run_batch.py index db049ee2bf..643d0d06ab 100644 --- a/tests/entrypoints/openai/test_run_batch.py +++ b/tests/entrypoints/openai/test_run_batch.py @@ -18,10 +18,10 @@ INPUT_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/c INVALID_INPUT_BATCH = """{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}} {"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}""" -INPUT_EMBEDDING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}} -{"custom_id": "request-2", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are an unhelpful assistant."}} +INPUT_EMBEDDING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "You are a helpful assistant."}} +{"custom_id": "request-2", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "You are an unhelpful assistant."}} -{"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "Hello world!"}} +{"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "Hello world!"}} {"custom_id": "request-4", "method": "POST", "url": "/v1/embeddings", "body": {"model": "NonExistModel", "input": "Hello world!"}}""" INPUT_SCORE_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "text_1": "What is the capital of France?", "text_2": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}} @@ -37,7 +37,7 @@ def test_empty_file(): proc = subprocess.Popen([ sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i", input_file.name, "-o", output_file.name, "--model", - "intfloat/e5-mistral-7b-instruct" + "intfloat/multilingual-e5-small" ], ) proc.communicate() proc.wait() @@ -97,7 +97,7 @@ def test_embeddings(): proc = subprocess.Popen([ sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i", input_file.name, "-o", output_file.name, "--model", - "intfloat/e5-mistral-7b-instruct" + "intfloat/multilingual-e5-small" ], ) proc.communicate() proc.wait() diff --git a/tests/model_executor/test_model_load_with_params.py b/tests/model_executor/test_model_load_with_params.py index 760a119935..f8efa2eff8 100644 --- a/tests/model_executor/test_model_load_with_params.py +++ b/tests/model_executor/test_model_load_with_params.py @@ -14,7 +14,7 @@ MODEL_NAME = os.environ.get("MODEL_NAME", "BAAI/bge-base-en-v1.5") REVISION = os.environ.get("REVISION", "main") MODEL_NAME_ROBERTA = os.environ.get("MODEL_NAME", - "intfloat/multilingual-e5-large") + "intfloat/multilingual-e5-small") REVISION_ROBERTA = os.environ.get("REVISION", "main") @@ -83,7 +83,7 @@ def test_roberta_model_loading_with_params(vllm_runner): assert model_config.pooler_config.pooling_norm # asserts on the tokenizer loaded - assert model_tokenizer.tokenizer_id == "intfloat/multilingual-e5-large" + assert model_tokenizer.tokenizer_id == "intfloat/multilingual-e5-small" assert not model_tokenizer.tokenizer_config["do_lower_case"] def check_model(model): diff --git a/tests/models/embedding/language/test_embedding.py b/tests/models/embedding/language/test_embedding.py index ad6385376d..4b9926860f 100644 --- a/tests/models/embedding/language/test_embedding.py +++ b/tests/models/embedding/language/test_embedding.py @@ -17,7 +17,7 @@ from ..utils import check_embeddings_close pytest.param("BAAI/bge-base-en-v1.5", marks=[pytest.mark.core_model, pytest.mark.cpu_model]), pytest.param("sentence-transformers/all-MiniLM-L12-v2"), - pytest.param("intfloat/multilingual-e5-large"), + pytest.param("intfloat/multilingual-e5-small"), # [Decoder-only] pytest.param("BAAI/bge-multilingual-gemma2", marks=[pytest.mark.core_model]), diff --git a/tests/models/registry.py b/tests/models/registry.py index 95bda02934..78a65b9387 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -211,7 +211,7 @@ _EMBEDDING_EXAMPLE_MODELS = { "Qwen2ForSequenceClassification": _HfExamplesInfo("jason9693/Qwen2.5-1.5B-apeach"), # noqa: E501 "RobertaModel": _HfExamplesInfo("sentence-transformers/stsb-roberta-base-v2"), # noqa: E501 "RobertaForMaskedLM": _HfExamplesInfo("sentence-transformers/all-roberta-large-v1"), # noqa: E501 - "XLMRobertaModel": _HfExamplesInfo("intfloat/multilingual-e5-large"), + "XLMRobertaModel": _HfExamplesInfo("intfloat/multilingual-e5-small"), # [Multimodal] "LlavaNextForConditionalGeneration": _HfExamplesInfo("royokong/e5-v"), "Phi3VForCausalLM": _HfExamplesInfo("TIGER-Lab/VLM2Vec-Full", diff --git a/tests/test_config.py b/tests/test_config.py index 8927a14d79..709d60b836 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -13,7 +13,7 @@ from vllm.platforms import current_platform ("model_id", "expected_runner_type", "expected_task"), [ ("distilbert/distilgpt2", "generate", "generate"), - ("intfloat/e5-mistral-7b-instruct", "pooling", "embed"), + ("intfloat/multilingual-e5-small", "pooling", "embed"), ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"), ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "score"), ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "reward"), diff --git a/vllm/test_utils.py b/vllm/test_utils.py index eb9a4d80a2..8611a25922 100644 --- a/vllm/test_utils.py +++ b/vllm/test_utils.py @@ -28,7 +28,7 @@ MODELS_ON_S3 = [ "HuggingFaceM4/Idefics3-8B-Llama3", "internlm/internlm2-1_8b-reward", "intfloat/e5-mistral-7b-instruct", - "intfloat/multilingual-e5-large", + "intfloat/multilingual-e5-small", "jason9693/Qwen2.5-1.5B-apeach", "llava-hf/llava-1.5-7b-hf", "llava-hf/llava-onevision-qwen2-0.5b-ov-hf",