Add renderer-based prompt processing for embedding and classification endpoints (#24356)
Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
@ -73,17 +73,11 @@ async def test_zero_truncation_size(client: openai.AsyncOpenAI):
|
||||
"truncate_prompt_tokens": truncation_size
|
||||
}
|
||||
|
||||
with pytest.raises(openai.BadRequestError) as err:
|
||||
await client.post(path="embeddings", cast_to=object, body={**kwargs})
|
||||
response = await client.post(path="embeddings",
|
||||
cast_to=object,
|
||||
body={**kwargs})
|
||||
|
||||
assert err.value.status_code == 400
|
||||
error_details = err.value.response.json()["error"]
|
||||
|
||||
assert error_details["type"] == "BadRequestError"
|
||||
assert "This model's maximum context length is" in error_details["message"]
|
||||
assert "tokens in the input for embedding generation" in error_details[
|
||||
"message"]
|
||||
assert "Please reduce the length of the input" in error_details["message"]
|
||||
assert response["usage"]["prompt_tokens"] == truncation_size
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@ -130,6 +130,23 @@ class TestRenderPrompt:
|
||||
assert call_args.kwargs["truncation"] is True
|
||||
assert call_args.kwargs["max_length"] == 50
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_truncation_negative(self, renderer, mock_async_tokenizer):
|
||||
# Test that negative truncation uses model's max_model_len
|
||||
mock_async_tokenizer.return_value = MockTokenizerResult(
|
||||
[101, 7592, 2088]) # Truncated to max_model_len
|
||||
renderer.async_tokenizer_pool[
|
||||
renderer.tokenizer] = mock_async_tokenizer
|
||||
|
||||
results = await renderer.render_prompt(prompt_or_prompts="Hello world",
|
||||
max_length=200,
|
||||
truncate_prompt_tokens=-1)
|
||||
|
||||
assert len(results) == 1
|
||||
call_args = mock_async_tokenizer.call_args
|
||||
assert call_args.kwargs["truncation"] is True
|
||||
assert call_args.kwargs["max_length"] == 100 # model's max_model_len
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_token_truncation_last_elements(self, renderer):
|
||||
# Test that token truncation keeps the last N elements
|
||||
|
||||
Reference in New Issue
Block a user