diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 224b68412e..8c7e626830 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -1810,6 +1810,7 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa "unsed_kwargs_2": "abc", # should not appear "chat_template": "{% Hello world! %}", + "tokenize": True, # used by tokenizer "continue_final_message": True, "tools": tools, @@ -1846,10 +1847,21 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa tools=tools, model_config=model_config, ) + with pytest.raises( + ValueError, match="Found unexpected chat template kwargs from request" + ): + # should raise error if `chat_template_kwargs` contains + # `chat_template` or `tokenize` + resolve_chat_template_kwargs( + tokenizer, + chat_template=chat_template, + chat_template_kwargs=chat_template_kwargs, + ) resolved_chat_template_kwargs = resolve_chat_template_kwargs( tokenizer, chat_template=chat_template, chat_template_kwargs=chat_template_kwargs, + raise_on_unexpected=False, ) assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 881447cb20..9bed327723 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -1499,18 +1499,25 @@ def resolve_chat_template_kwargs( tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, chat_template: str, chat_template_kwargs: dict[str, Any], + raise_on_unexpected: bool = True, ) -> dict[str, Any]: + # We exclude chat_template from kwargs here, because + # chat template has been already resolved at this stage + unexpected_vars = {"chat_template", "tokenize"} + if raise_on_unexpected and ( + unexpected_in_kwargs := unexpected_vars & chat_template_kwargs.keys() + ): + raise ValueError( + "Found unexpected chat template kwargs from request: " + f"{unexpected_in_kwargs}" + ) + fn_kw = { k for k in chat_template_kwargs if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False) } - template_vars = _cached_resolve_chat_template_kwargs(chat_template) - - # We exclude chat_template from kwargs here, because - # chat template has been already resolved at this stage - unexpected_vars = {"chat_template"} accept_vars = (fn_kw | template_vars) - unexpected_vars return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars} @@ -1522,7 +1529,6 @@ def apply_hf_chat_template( tools: list[dict[str, Any]] | None, *, model_config: ModelConfig, - tokenize: bool = False, # Different from HF's default **kwargs: Any, ) -> str: hf_chat_template = resolve_hf_chat_template( @@ -1539,17 +1545,18 @@ def apply_hf_chat_template( "does not define one." ) + resolved_kwargs = resolve_chat_template_kwargs( + tokenizer=tokenizer, + chat_template=hf_chat_template, + chat_template_kwargs=kwargs, + ) + try: - resolved_kwargs = resolve_chat_template_kwargs( - tokenizer=tokenizer, - chat_template=hf_chat_template, - chat_template_kwargs=kwargs, - ) return tokenizer.apply_chat_template( conversation=conversation, # type: ignore[arg-type] tools=tools, # type: ignore[arg-type] chat_template=hf_chat_template, - tokenize=tokenize, + tokenize=False, **resolved_kwargs, )