updated

Signed-off-by: Robert Shaw <robshaw@redhat.com>
[CI] Fix tests/distributed/test_ca_buffer_sharing.py (#22849 )
2025-08-14 03:34:37 +00:00 · 2025-08-13 20:09:30 -07:00 · 2025-08-13 20:09:07 -07:00
4 changed files with 33 additions and 105 deletions
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -227,16 +227,6 @@ steps:
 ##### fast check tests  #####
 #####  1 GPU test  #####

- label: Regression Test # 5min
-  mirror_hardwares: [amdexperimental]
-  source_file_dependencies:
-  - vllm/
-  - tests/test_regression
-  commands:
-  - pip install modelscope
-  - pytest -v -s test_regression.py
-  working_dir: "/vllm-workspace/tests" # optional
-
 - label: Engine Test # 10min
  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
--- a/tests/entrypoints/openai/test_openai_schema.py
+++ b/tests/entrypoints/openai/test_openai_schema.py
@ -54,38 +54,54 @@ def before_generate_case(context: schemathesis.hooks.HookContext, strategy):
    op = context.operation
    assert op is not None

-    def no_file_type(case: schemathesis.models.Case):
+    def no_invalid_types(case: schemathesis.models.Case):
        """
-        This filter skips test cases for the `POST /tokenize` endpoint where the
-        HTTP request body uses `"type": "file"` in any message's content.
-        We expect these cases to fail because that type isn't implemented here
-        https://github.com/vllm-project/vllm/blob/0b34593017953051b3225b1483ce0f4670e3eb0e/vllm/entrypoints/chat_utils.py#L1038-L1095
+        This filter skips test cases with invalid data that schemathesis
+        incorrectly generates due to permissive schema configurations.
+        
+        1. Skips `POST /tokenize` endpoint cases with `"type": "file"` in 
+           message content, which isn't implemented.
+        
+        2. Skips tool_calls with `"type": "custom"` which schemathesis 
+           incorrectly generates instead of the valid `"type": "function"`.

        Example test cases that are skipped:
        curl -X POST -H 'Content-Type: application/json' \
-            -d '{"messages": [{"role": "assistant"}, {"content": [{"file": {}, "type": "file"}], "role": "user"}]}' \
+            -d '{"messages": [{"content": [{"file": {}, "type": "file"}], "role": "user"}]}' \
            http://localhost:8000/tokenize

        curl -X POST -H 'Content-Type: application/json' \
-            -d '{"messages": [{"content": [{"file": {}, "type": "file"}], "role": "user"}]}' \
-            http://localhost:8000/tokenize
+            -d '{"messages": [{"role": "assistant", "tool_calls": [{"custom": {"input": "", "name": ""}, "id": "", "type": "custom"}]}]}' \
+            http://localhost:8000/v1/chat/completions
        """  # noqa: E501
-        if (op.method.lower() == "post" and op.path == "/tokenize"
-                and hasattr(case, "body") and isinstance(case.body, dict)
+        if (hasattr(case, "body") and isinstance(case.body, dict)
                and "messages" in case.body
                and isinstance(case.body["messages"], list)
                and len(case.body["messages"]) > 0):
+
            for message in case.body["messages"]:
                if not isinstance(message, dict):
                    continue
-                content = message.get("content", [])
-                if not isinstance(content, list) or len(content) == 0:
-                    continue
-                if any(item.get("type") == "file" for item in content):
-                    return False
+
+                # Check for invalid file type in tokenize endpoint
+                if op.method.lower() == "post" and op.path == "/tokenize":
+                    content = message.get("content", [])
+                    if (isinstance(content, list) and len(content) > 0 and any(
+                            item.get("type") == "file" for item in content)):
+                        return False
+
+                # Check for invalid tool_calls with non-function types
+                tool_calls = message.get("tool_calls", [])
+                if isinstance(tool_calls, list):
+                    for tool_call in tool_calls:
+                        if isinstance(tool_call, dict):
+                            if tool_call.get("type") != "function":
+                                return False
+                            if "custom" in tool_call:
+                                return False
        return True

-    return strategy.filter(no_file_type)
+    return strategy.filter(no_invalid_types)


@schema.parametrize()
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@ -1,78 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""Containing tests that check for regressions in vLLM's behavior.
-
-It should include tests that are reported by users and making sure they
-will never happen again.
-
-"""
-import gc
-
-import pytest
-import torch
-
-from vllm import LLM, SamplingParams
-
-
-@pytest.mark.skip(reason="In V1, we reject tokens > max_seq_len")
-def test_duplicated_ignored_sequence_group():
-    """https://github.com/vllm-project/vllm/issues/1655"""
-
-    sampling_params = SamplingParams(temperature=0.01,
-                                     top_p=0.1,
-                                     max_tokens=256)
-    llm = LLM(model="distilbert/distilgpt2",
-              max_num_batched_tokens=4096,
-              tensor_parallel_size=1)
-    prompts = ["This is a short prompt", "This is a very long prompt " * 1000]
-    outputs = llm.generate(prompts, sampling_params=sampling_params)
-
-    assert len(prompts) == len(outputs)
-
-
-def test_max_tokens_none():
-    sampling_params = SamplingParams(temperature=0.01,
-                                     top_p=0.1,
-                                     max_tokens=None)
-    llm = LLM(model="distilbert/distilgpt2",
-              max_num_batched_tokens=4096,
-              tensor_parallel_size=1)
-    prompts = ["Just say hello!"]
-    outputs = llm.generate(prompts, sampling_params=sampling_params)
-
-    assert len(prompts) == len(outputs)
-
-
-def test_gc():
-    llm = LLM(model="distilbert/distilgpt2", enforce_eager=True)
-    del llm
-
-    gc.collect()
-    torch.cuda.empty_cache()
-
-    # The memory allocated for model and KV cache should be released.
-    # The memory allocated for PyTorch and others should be less than 50MB.
-    # Usually, it's around 10MB.
-    allocated = torch.cuda.memory_allocated()
-    assert allocated < 50 * 1024 * 1024
-
-
-def test_model_from_modelscope(monkeypatch: pytest.MonkeyPatch):
-    # model: https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary
-    with monkeypatch.context() as m:
-        m.setenv("VLLM_USE_MODELSCOPE", "True")
-        # Don't use HF_TOKEN for ModelScope repos, otherwise it will fail
-        # with 400 Client Error: Bad Request.
-        m.setenv("HF_TOKEN", "")
-        llm = LLM(model="qwen/Qwen1.5-0.5B-Chat")
-
-        prompts = [
-            "Hello, my name is",
-            "The president of the United States is",
-            "The capital of France is",
-            "The future of AI is",
-        ]
-        sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
-
-        outputs = llm.generate(prompts, sampling_params)
-        assert len(outputs) == 4
--- a/vllm/distributed/device_communicators/custom_all_reduce.py
+++ b/vllm/distributed/device_communicators/custom_all_reduce.py
@ -297,7 +297,7 @@ class CustomAllreduce:
    @staticmethod
    def free_shared_buffer(pointers: list[int],
                           group: Optional[ProcessGroup] = None,
-                           rank: Optional[int] = 0) -> None:
+                           rank: Optional[int] = None) -> None:
        if rank is None:
            rank = dist.get_rank(group=group)
        if ops is not None:
Author	SHA1	Message	Date
Robert Shaw	36ccdcad2c	updated Signed-off-by: Robert Shaw <robshaw@redhat.com>	2025-08-14 03:34:37 +00:00
Ilya Markov	1d20c34717	[CI] Fix `tests/distributed/test_ca_buffer_sharing.py` (#22849 ) Signed-off-by: ilmarkov <imarkov@redhat.com> Co-authored-by: ilmarkov <imarkov@redhat.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>	2025-08-13 20:09:30 -07:00
Will Eaton	b6af24fba7	[CI][Entrypoints]: add filter to generation to filter out invalid tool calls (#22826 ) Signed-off-by: Will Eaton <weaton@redhat.com>	2025-08-13 20:09:07 -07:00