correct LWS deployment yaml (#23104)

Signed-off-by: cberge908 <42270330+cberge908@users.noreply.github.com>
2025-09-02 14:04:59 +02:00
parent ce30dca5c4
commit 8bd5844989
2 changed files with 3 additions and 5 deletions
--- a/examples/online_serving/multi-node-serving.sh
+++ b/examples/online_serving/multi-node-serving.sh
@ -11,7 +11,7 @@
 # Example usage:
 # On the head node machine, start the Ray head node process and run a vLLM server.
 #   ./multi-node-serving.sh leader --ray_port=6379 --ray_cluster_size=<SIZE> [<extra ray args>]  && \
-#   python3 -m vllm.entrypoints.openai.api_server --port 8080 --model meta-llama/Meta-Llama-3.1-405B-Instruct --tensor-parallel-size 8 --pipeline_parallel_size 2
+#   vllm serve meta-llama/Meta-Llama-3.1-405B-Instruct --port 8080 --tensor-parallel-size 8 --pipeline_parallel_size 2
 # 
 # On each worker node, start the Ray worker node process.
 #   ./multi-node-serving.sh worker --ray_address=<HEAD_NODE_IP> --ray_port=6379 [<extra ray args>]