[Core] Consolidate prompt arguments to LLM engines (#4328)

Co-authored-by: Roger Wang <ywang@roblox.com>
2024-05-29 04:29:31 +08:00
parent 290f4ada2b
commit 5ae5ed1e60
43 changed files with 1407 additions and 442 deletions
--- a/tests/async_engine/test_async_llm_engine.py
+++ b/tests/async_engine/test_async_llm_engine.py
@ -25,7 +25,7 @@ class MockEngine:
        return [RequestOutput(
            request_id=self.request_id)] if self.request_id else []

-    async def encode_request_async(self, *args, **kwargs):
+    async def process_model_inputs_async(self, *args, **kwargs):
        pass

    def generate(self, request_id):
--- a/tests/async_engine/test_openapi_server_ray.py
+++ b/tests/async_engine/test_openapi_server_ray.py
@ -29,7 +29,7 @@ def server():
    ray.shutdown()


-@pytest.fixture(scope="session")
+@pytest.fixture(scope="module")
 def client():
    client = openai.AsyncOpenAI(
        base_url="http://localhost:8000/v1",