[CI] Accelerate mteb test by setting SentenceTransformers mteb score to a constant (#24088)
Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
@ -37,4 +37,6 @@ def test_mteb_embed(server):
|
||||
print("SentenceTransformer main score: ", st_main_score)
|
||||
print("Difference: ", st_main_score - vllm_main_score)
|
||||
|
||||
assert st_main_score == pytest.approx(vllm_main_score, abs=MTEB_EMBED_TOL)
|
||||
# We are not concerned that the vllm mteb results are better
|
||||
# than SentenceTransformers, so we only perform one-sided testing.
|
||||
assert st_main_score - vllm_main_score < MTEB_EMBED_TOL
|
||||
|
||||
@ -6,16 +6,19 @@ import pytest
|
||||
|
||||
# yapf conflicts with isort for this block
|
||||
# yapf: disable
|
||||
from tests.models.language.pooling.mteb_utils import (
|
||||
MTEB_RERANK_LANGS, MTEB_RERANK_TASKS, MTEB_RERANK_TOL,
|
||||
RerankClientMtebEncoder, ScoreClientMtebEncoder,
|
||||
mteb_test_rerank_models_hf, run_mteb_rerank)
|
||||
from tests.models.language.pooling.mteb_utils import (MTEB_RERANK_LANGS,
|
||||
MTEB_RERANK_TASKS,
|
||||
MTEB_RERANK_TOL,
|
||||
RerankClientMtebEncoder,
|
||||
ScoreClientMtebEncoder,
|
||||
run_mteb_rerank)
|
||||
# yapf: enable
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
|
||||
os.environ["VLLM_LOGGING_LEVEL"] = "WARNING"
|
||||
|
||||
MODEL_NAME = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
||||
st_main_score = 0.33457
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -29,15 +32,7 @@ def server():
|
||||
yield remote_server
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def st_main_score(hf_runner):
|
||||
# The main score related to the version of the dependency.
|
||||
# So we need to recalculate every time.
|
||||
main_score, st_dtype = mteb_test_rerank_models_hf(hf_runner, MODEL_NAME)
|
||||
return main_score
|
||||
|
||||
|
||||
def test_mteb_score(server, st_main_score):
|
||||
def test_mteb_score(server):
|
||||
url = server.url_for("score")
|
||||
encoder = ScoreClientMtebEncoder(MODEL_NAME, url)
|
||||
vllm_main_score = run_mteb_rerank(encoder, MTEB_RERANK_TASKS,
|
||||
@ -47,10 +42,12 @@ def test_mteb_score(server, st_main_score):
|
||||
print("SentenceTransformer main score: ", st_main_score)
|
||||
print("Difference: ", st_main_score - vllm_main_score)
|
||||
|
||||
assert st_main_score == pytest.approx(vllm_main_score, abs=MTEB_RERANK_TOL)
|
||||
# We are not concerned that the vllm mteb results are better
|
||||
# than SentenceTransformers, so we only perform one-sided testing.
|
||||
assert st_main_score - vllm_main_score < MTEB_RERANK_TOL
|
||||
|
||||
|
||||
def test_mteb_rerank(server, st_main_score):
|
||||
def test_mteb_rerank(server):
|
||||
url = server.url_for("rerank")
|
||||
encoder = RerankClientMtebEncoder(MODEL_NAME, url)
|
||||
vllm_main_score = run_mteb_rerank(encoder, MTEB_RERANK_TASKS,
|
||||
@ -60,4 +57,6 @@ def test_mteb_rerank(server, st_main_score):
|
||||
print("SentenceTransformer main score: ", st_main_score)
|
||||
print("Difference: ", st_main_score - vllm_main_score)
|
||||
|
||||
assert st_main_score == pytest.approx(vllm_main_score, abs=MTEB_RERANK_TOL)
|
||||
# We are not concerned that the vllm mteb results are better
|
||||
# than SentenceTransformers, so we only perform one-sided testing.
|
||||
assert st_main_score - vllm_main_score < MTEB_RERANK_TOL
|
||||
|
||||
Reference in New Issue
Block a user