Simplify (and fix) passing of guided decoding backend options (#17008)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-04-29 20:02:23 +01:00
committed by GitHub
parent 2fa2a50bf9
commit a6977dbd15
17 changed files with 309 additions and 217 deletions

View File

@ -112,8 +112,8 @@ def extra_backend_options_completion(client: OpenAI, model: str):
"alan.turing@enigma.com\n")
try:
# The no-fallback option forces vLLM to use xgrammar, so when it fails
# you get a 400 with the reason why
# The guided_decoding_disable_fallback option forces vLLM to use
# xgrammar, so when it fails you get a 400 with the reason why
completion = client.chat.completions.create(
model=model,
messages=[{
@ -123,7 +123,8 @@ def extra_backend_options_completion(client: OpenAI, model: str):
extra_body={
"guided_regex": r"\w+@\w+\.com\n",
"stop": ["\n"],
"guided_decoding_backend": "xgrammar:no-fallback"
"guided_decoding_backend": "xgrammar",
"guided_decoding_disable_fallback": True,
},
)
return completion.choices[0].message.content