[Frontend][Core] Move guided decoding params into sampling params (#8252)

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com> Co-authored-by: Nick Hill <nickhill@us.ibm.com>
2024-09-30 19:34:25 -06:00
parent bce324487a
commit 062c89e7c9
16 changed files with 441 additions and 281 deletions
--- a/tests/entrypoints/llm/test_guided_generate.py
+++ b/tests/entrypoints/llm/test_guided_generate.py
@ -7,7 +7,7 @@ import pytest

 from vllm.entrypoints.llm import LLM
 from vllm.outputs import RequestOutput
-from vllm.sampling_params import SamplingParams
+from vllm.sampling_params import GuidedDecodingParams, SamplingParams

 from ...conftest import cleanup

@ -31,14 +31,12 @@ def test_guided_regex(sample_regex, llm):
    sampling_params = SamplingParams(
        temperature=0.8,
        top_p=0.95,
-    )
-    outputs = llm.generate(
-        prompts=[
-            f"Give an example IPv4 address with this regex: {sample_regex}"
-        ] * 2,
-        sampling_params=sampling_params,
-        use_tqdm=True,
-        guided_options_request=dict(guided_regex=sample_regex))
+        guided_decoding=GuidedDecodingParams(regex=sample_regex))
+    outputs = llm.generate(prompts=[
+        f"Give an example IPv4 address with this regex: {sample_regex}"
+    ] * 2,
+                           sampling_params=sampling_params,
+                           use_tqdm=True)

    assert outputs is not None
    for output in outputs:
@ -57,15 +55,13 @@ def test_guided_json_completion(sample_json_schema, llm):
    sampling_params = SamplingParams(
        temperature=1.0,
        max_tokens=1000,
-    )
-    outputs = llm.generate(
-        prompts=[
-            f"Give an example JSON for an employee profile "
-            f"that fits this schema: {sample_json_schema}"
-        ] * 2,
-        sampling_params=sampling_params,
-        use_tqdm=True,
-        guided_options_request=dict(guided_json=sample_json_schema))
+        guided_decoding=GuidedDecodingParams(json=sample_json_schema))
+    outputs = llm.generate(prompts=[
+        f"Give an example JSON for an employee profile "
+        f"that fits this schema: {sample_json_schema}"
+    ] * 2,
+                           sampling_params=sampling_params,
+                           use_tqdm=True)

    assert outputs is not None

@ -86,12 +82,11 @@ def test_guided_choice_completion(sample_guided_choice, llm):
    sampling_params = SamplingParams(
        temperature=0.8,
        top_p=0.95,
-    )
+        guided_decoding=GuidedDecodingParams(choice=sample_guided_choice))
    outputs = llm.generate(
        prompts="The best language for type-safe systems programming is ",
        sampling_params=sampling_params,
-        use_tqdm=True,
-        guided_options_request=dict(guided_choice=sample_guided_choice))
+        use_tqdm=True)

    assert outputs is not None
    for output in outputs:
@ -112,13 +107,13 @@ def test_guided_grammar(sample_sql_statements, llm):
        temperature=0.8,
        top_p=0.95,
        max_tokens=1000,
-    )
+        guided_decoding=GuidedDecodingParams(grammar=sample_sql_statements))
    outputs = llm.generate(
        prompts=("Generate a sql state that select col_1 from "
                 "table_1 where it is equals to 1"),
        sampling_params=sampling_params,
        use_tqdm=True,
-        guided_options_request=dict(guided_grammar=sample_sql_statements))
+    )

    assert outputs is not None
    for output in outputs:
@ -140,3 +135,28 @@ def test_guided_grammar(sample_sql_statements, llm):
        assert generated_text.strip() == ground_truth

        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+
+
+@pytest.mark.skip_global_cleanup
+def test_guided_options_request_deprecation_warning(sample_regex, llm):
+    sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+
+    with pytest.warns(DeprecationWarning, match="guided_options_request"):
+        llm.generate(prompts="This should fail",
+                     sampling_params=sampling_params,
+                     use_tqdm=True,
+                     guided_options_request=dict(guided_regex=sample_regex))
+
+
+@pytest.mark.skip_global_cleanup
+def test_validation_against_both_guided_decoding_options(sample_regex, llm):
+    sampling_params = SamplingParams(
+        temperature=0.8,
+        top_p=0.95,
+        guided_decoding=GuidedDecodingParams(regex=sample_regex))
+
+    with pytest.raises(ValueError, match="Cannot set both"):
+        llm.generate(prompts="This should fail",
+                     sampling_params=sampling_params,
+                     use_tqdm=True,
+                     guided_options_request=dict(guided_regex=sample_regex))
--- a/tests/entrypoints/openai/test_guided_processors.py
+++ b/tests/entrypoints/openai/test_guided_processors.py
@ -1,72 +0,0 @@
-# This unit test should be moved to a new
-# tests/test_guided_decoding directory.
-import pytest
-import torch
-from transformers import AutoTokenizer
-
-from vllm.entrypoints.openai.protocol import CompletionRequest
-from vllm.model_executor.guided_decoding import (
-    get_guided_decoding_logits_processor)
-from vllm.model_executor.guided_decoding.outlines_logits_processors import (
-    JSONLogitsProcessor, RegexLogitsProcessor)
-
-
-def test_guided_logits_processors(sample_regex, sample_json_schema):
-    """Basic unit test for RegexLogitsProcessor and JSONLogitsProcessor."""
-    tokenizer = AutoTokenizer.from_pretrained('HuggingFaceH4/zephyr-7b-beta')
-    regex_LP = RegexLogitsProcessor(sample_regex, tokenizer)
-    json_LP = JSONLogitsProcessor(sample_json_schema,
-                                  tokenizer,
-                                  whitespace_pattern=None)
-
-    token_ids = tokenizer.encode(
-        f"Give an example IPv4 address with this regex: {sample_regex}")
-    tensor = torch.rand(32000)
-    original_tensor = torch.clone(tensor)
-    regex_LP(token_ids, tensor)
-    assert tensor.shape == original_tensor.shape
-    assert not torch.allclose(tensor, original_tensor)
-
-    token_ids = tokenizer.encode(
-        f"Give an employee profile that fits this schema: {sample_json_schema}"
-    )
-    tensor = torch.rand(32000)
-    original_tensor = torch.clone(tensor)
-    json_LP(token_ids, tensor)
-    assert tensor.shape == original_tensor.shape
-    assert not torch.allclose(tensor, original_tensor)
-
-
-@pytest.mark.asyncio
-@pytest.mark.parametrize("backend", ["outlines", "lm-format-enforcer"])
-async def test_guided_logits_processor_black_box(backend: str, sample_regex,
-                                                 sample_json_schema):
-    tokenizer = AutoTokenizer.from_pretrained('HuggingFaceH4/zephyr-7b-beta')
-    token_ids = tokenizer.encode(
-        f"Give an example IPv4 address with this regex: {sample_regex}")
-    regex_request = CompletionRequest(model='test',
-                                      prompt=token_ids,
-                                      guided_regex=sample_regex)
-    regex_lp = await get_guided_decoding_logits_processor(
-        backend, regex_request, tokenizer)
-    assert regex_lp is not None
-    tensor = torch.rand(32000)
-    original_tensor = torch.clone(tensor)
-    tensor = regex_lp(token_ids, tensor)
-    assert tensor.shape == original_tensor.shape
-    assert not torch.allclose(tensor, original_tensor)
-
-    token_ids = tokenizer.encode(
-        f"Give an employee profile that fits this schema: {sample_json_schema}"
-    )
-    json_request = CompletionRequest(model='test',
-                                     prompt=token_ids,
-                                     guided_json=sample_json_schema)
-    json_lp = await get_guided_decoding_logits_processor(
-        backend, json_request, tokenizer)
-    assert json_lp is not None
-    tensor = torch.rand(32000)
-    original_tensor = torch.clone(tensor)
-    tensor = json_lp(token_ids, tensor)
-    assert tensor.shape == original_tensor.shape
-    assert not torch.allclose(tensor, original_tensor)