[V1] Multiprocessing Tensor Parallel Support for v1 (#9856)

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
Tyler Michael Smith
2024-12-10 01:28:14 -05:00
committed by GitHub
parent bc192a2b09
commit 28b3a1c7e5
21 changed files with 732 additions and 145 deletions

View File

@ -5,7 +5,6 @@ from collections import UserList
from enum import Enum
from typing import (Any, Callable, Dict, List, Optional, Tuple, Type,
TypedDict, TypeVar, Union)
from unittest.mock import patch
import numpy as np
import pytest
@ -110,7 +109,7 @@ VIDEO_ASSETS = _VideoAssets()
@pytest.fixture(params=[True, False])
def run_with_both_engines(request):
def run_with_both_engines(request, monkeypatch):
# Automatically runs tests twice, once with V1 and once without
use_v1 = request.param
# Tests decorated with `@skip_v1` are only run without v1
@ -119,11 +118,11 @@ def run_with_both_engines(request):
if use_v1:
if skip_v1:
pytest.skip("Skipping test on vllm V1")
with patch('vllm.envs.VLLM_USE_V1', True):
yield
monkeypatch.setenv('VLLM_USE_V1', '1')
else:
with patch('vllm.envs.VLLM_USE_V1', False):
yield
monkeypatch.setenv('VLLM_USE_V1', '0')
yield
@pytest.fixture(autouse=True)