[V1] Multiprocessing Tensor Parallel Support for v1 (#9856)

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
2024-12-10 01:28:14 -05:00
parent bc192a2b09
commit 28b3a1c7e5
21 changed files with 732 additions and 145 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -5,7 +5,6 @@ from collections import UserList
 from enum import Enum
 from typing import (Any, Callable, Dict, List, Optional, Tuple, Type,
                    TypedDict, TypeVar, Union)
-from unittest.mock import patch

 import numpy as np
 import pytest
@ -110,7 +109,7 @@ VIDEO_ASSETS = _VideoAssets()


@pytest.fixture(params=[True, False])
-def run_with_both_engines(request):
+def run_with_both_engines(request, monkeypatch):
    # Automatically runs tests twice, once with V1 and once without
    use_v1 = request.param
    # Tests decorated with `@skip_v1` are only run without v1
@ -119,11 +118,11 @@ def run_with_both_engines(request):
    if use_v1:
        if skip_v1:
            pytest.skip("Skipping test on vllm V1")
-        with patch('vllm.envs.VLLM_USE_V1', True):
-            yield
+        monkeypatch.setenv('VLLM_USE_V1', '1')
    else:
-        with patch('vllm.envs.VLLM_USE_V1', False):
-            yield
+        monkeypatch.setenv('VLLM_USE_V1', '0')
+
+    yield


@pytest.fixture(autouse=True)