[Doc] Support --stream arg in openai_completion_client.py script (#18388)

Signed-off-by: googs1025 <googs1025@gmail.com>
2025-05-22 21:20:17 +08:00
parent ca86a7cf6e
commit 71075029f2
3 changed files with 25 additions and 9 deletions
--- a/examples/online_serving/openai_chat_completion_structured_outputs.py
+++ b/examples/online_serving/openai_chat_completion_structured_outputs.py
@ -12,6 +12,9 @@ from enum import Enum
 from openai import BadRequestError, OpenAI
 from pydantic import BaseModel

+openai_api_key = "EMPTY"
+openai_api_base = "http://localhost:8000/v1"
+

 # Guided decoding by Choice (list of possible options)
 def guided_choice_completion(client: OpenAI, model: str):
@ -134,8 +137,8 @@ def extra_backend_options_completion(client: OpenAI, model: str):

 def main():
    client: OpenAI = OpenAI(
-        base_url="http://localhost:8000/v1",
-        api_key="-",
+        base_url=openai_api_base,
+        api_key=openai_api_key,
    )

    model = client.models.list().data[0].id
--- a/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py
+++ b/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py
@ -7,11 +7,14 @@ from openai import OpenAI
 # to enforce the format of a tool call response, but it could be used for
 # any structured output within a subset of the response.

+openai_api_key = "EMPTY"
+openai_api_base = "http://localhost:8000/v1"
+

 def main():
    client = OpenAI(
-        base_url="http://localhost:8000/v1",
-        api_key="-",
+        base_url=openai_api_base,
+        api_key=openai_api_key,
    )

    messages = [{
--- a/examples/online_serving/openai_completion_client.py
+++ b/examples/online_serving/openai_completion_client.py
@ -1,5 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0

+import argparse
+
 from openai import OpenAI

 # Modify OpenAI's API key and API base to use vLLM's API server.
@ -7,7 +9,15 @@ openai_api_key = "EMPTY"
 openai_api_base = "http://localhost:8000/v1"


-def main():
+def parse_args():
+    parser = argparse.ArgumentParser(description="Client for vLLM API server")
+    parser.add_argument("--stream",
+                        action="store_true",
+                        help="Enable streaming response")
+    return parser.parse_args()
+
+
+def main(args):
    client = OpenAI(
        # defaults to os.environ.get("OPENAI_API_KEY")
        api_key=openai_api_key,
@ -18,18 +28,17 @@ def main():
    model = models.data[0].id

    # Completion API
-    stream = False
    completion = client.completions.create(
        model=model,
        prompt="A robot may not injure a human being",
        echo=False,
        n=2,
-        stream=stream,
+        stream=args.stream,
        logprobs=3)

    print("-" * 50)
    print("Completion results:")
-    if stream:
+    if args.stream:
        for c in completion:
            print(c)
    else:
@ -38,4 +47,5 @@ def main():


 if __name__ == "__main__":
-    main()
+    args = parse_args()
+    main(args)