[Model] Add classification Task with Qwen2ForSequenceClassification (#9704)

Signed-off-by: Kevin-Yang <ykcha9@gmail.com> Co-authored-by: Kevin-Yang <ykcha9@gmail.com>
2024-10-27 02:53:35 +09:00
parent 07e981fdf4
commit 6650e6a930
6 changed files with 211 additions and 1 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -343,6 +343,17 @@ class HfRunner:

        return all_inputs

+    def classify(self, prompts: List[str]) -> List[str]:
+        # output is final logits
+        all_inputs = self.get_inputs(prompts)
+        outputs = []
+        for inputs in all_inputs:
+            output = self.model(**self.wrap_device(inputs))
+            logits = output.logits.softmax(dim=-1)[0].tolist()
+            outputs.append(logits)
+
+        return outputs
+
    def generate(
        self,
        prompts: List[str],
@ -688,6 +699,14 @@ class VllmRunner:

        return inputs

+    def classify(self, prompts: List[str]) -> List[str]:
+        req_outputs = self.model.encode(prompts)
+        outputs = []
+        for req_output in req_outputs:
+            embedding = req_output.outputs.embedding
+            outputs.append(embedding)
+        return outputs
+
    def generate(
        self,
        prompts: List[str],
--- a/tests/models/embedding/language/test_cls_models.py
+++ b/tests/models/embedding/language/test_cls_models.py
@ -0,0 +1,53 @@
+"""Compare the outputs of HF and vLLM when using greedy sampling.
+
+This test only tests small models. Big models such as 7B should be tested from
+test_big_models.py because it could use a larger instance to run tests.
+
+Run `pytest tests/models/test_cls_models.py`.
+"""
+import pytest
+import torch
+from transformers import AutoModelForSequenceClassification
+
+CLASSIFICATION_MODELS = ["jason9693/Qwen2.5-1.5B-apeach"]
+
+
+@pytest.mark.parametrize("model", CLASSIFICATION_MODELS)
+@pytest.mark.parametrize("dtype", ["float"])
+def test_classification_models(
+    hf_runner,
+    vllm_runner,
+    example_prompts,
+    model: str,
+    dtype: str,
+) -> None:
+    with hf_runner(model,
+                   dtype=dtype,
+                   auto_cls=AutoModelForSequenceClassification) as hf_model:
+        hf_outputs = hf_model.classify(example_prompts)
+
+    with vllm_runner(model, dtype=dtype) as vllm_model:
+        vllm_outputs = vllm_model.classify(example_prompts)
+
+    print(hf_outputs, vllm_outputs)
+
+    # check logits difference
+    for hf_output, vllm_output in zip(hf_outputs, vllm_outputs):
+        hf_output = torch.tensor(hf_output)
+        vllm_output = torch.tensor(vllm_output)
+
+        assert torch.allclose(hf_output, vllm_output, 1e-3)
+
+
+@pytest.mark.parametrize("model", CLASSIFICATION_MODELS)
+@pytest.mark.parametrize("dtype", ["float"])
+def test_classification_model_print(
+    vllm_runner,
+    model: str,
+    dtype: str,
+) -> None:
+    with vllm_runner(model, dtype=dtype) as vllm_model:
+        # This test is for verifying whether the model's extra_repr
+        # can be printed correctly.
+        print(vllm_model.model.llm_engine.model_executor.driver_worker.
+              model_runner.model)