[V0 Deprecation] Remove V0 executors (#27142)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill
2025-10-21 11:09:37 -07:00
committed by GitHub
parent ddeec11ba9
commit 647214f3d5
31 changed files with 425 additions and 1043 deletions

View File

@ -157,11 +157,9 @@ def test_models_distributed(
and distributed_executor_backend == "ray"
and attention_backend == ""
and test_suite == "L4"
and enable_prompt_embeds
): # noqa
if enable_prompt_embeds:
pytest.skip("enable_prompt_embeds does not work with ray compiled dag.")
monkeypatch_context.setenv("VLLM_USE_RAY_SPMD_WORKER", "1")
monkeypatch_context.setenv("VLLM_USE_RAY_COMPILED_DAG", "1")
pytest.skip("enable_prompt_embeds does not work with ray compiled dag.")
if attention_backend:
monkeypatch_context.setenv(

View File

@ -18,8 +18,8 @@ from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
from vllm import initialize_ray_cluster
from vllm.config import ParallelConfig
from vllm.executor.ray_utils import _wait_until_pg_removed
from vllm.utils.network_utils import get_ip
from vllm.v1.executor.ray_utils import _wait_until_pg_removed
VLLM_MULTI_NODE = os.getenv("VLLM_MULTI_NODE", "0") == "1"

View File

@ -305,10 +305,8 @@ def _compare_tp(
common_args.extend(["--max-num-seqs", f"{max_num_seqs}"])
if distributed_backend == "ray":
# For V1, test Ray Compiled Graph for all the tests
# Test Ray Compiled Graph for all the tests
pp_env = {
"VLLM_USE_RAY_COMPILED_DAG": "1",
"VLLM_USE_RAY_SPMD_WORKER": "1",
"VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL": "1",
}
# Temporary. Currently when zeromq + SPMD is used, it does not properly

View File

@ -9,7 +9,7 @@ from vllm.distributed import cleanup_dist_env_and_memory
from vllm.model_executor.model_loader import tensorizer as tensorizer_mod
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
from vllm.utils.network_utils import get_distributed_init_method, get_ip, get_open_port
from vllm.v1.executor.abstract import UniProcExecutor
from vllm.v1.executor import UniProcExecutor
from vllm.v1.worker.worker_base import WorkerWrapperBase
MODEL_REF = "facebook/opt-125m"

View File

@ -15,7 +15,8 @@ from vllm.platforms import current_platform
from vllm.utils.torch_utils import set_default_torch_num_threads
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.core import EngineCore
from vllm.v1.executor.abstract import Executor, UniProcExecutor
from vllm.v1.executor.abstract import Executor
from vllm.v1.executor.uniproc_executor import UniProcExecutor
from vllm.v1.kv_cache_interface import KVCacheConfig
from vllm.v1.outputs import ModelRunnerOutput