Simplify (and fix) passing of guided decoding backend options (#17008)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@ -112,8 +112,8 @@ def extra_backend_options_completion(client: OpenAI, model: str):
|
||||
"alan.turing@enigma.com\n")
|
||||
|
||||
try:
|
||||
# The no-fallback option forces vLLM to use xgrammar, so when it fails
|
||||
# you get a 400 with the reason why
|
||||
# The guided_decoding_disable_fallback option forces vLLM to use
|
||||
# xgrammar, so when it fails you get a 400 with the reason why
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
@ -123,7 +123,8 @@ def extra_backend_options_completion(client: OpenAI, model: str):
|
||||
extra_body={
|
||||
"guided_regex": r"\w+@\w+\.com\n",
|
||||
"stop": ["\n"],
|
||||
"guided_decoding_backend": "xgrammar:no-fallback"
|
||||
"guided_decoding_backend": "xgrammar",
|
||||
"guided_decoding_disable_fallback": True,
|
||||
},
|
||||
)
|
||||
return completion.choices[0].message.content
|
||||
|
||||
Reference in New Issue
Block a user