[CI] Expand test_guided_generate to test all backends (#11313)

Signed-off-by: mgoin <michael@neuralmagic.com>
2024-12-18 23:00:38 -05:00
parent 17ca964273
commit a30482f054
3 changed files with 128 additions and 50 deletions
--- a/tests/entrypoints/llm/test_guided_generate.py
+++ b/tests/entrypoints/llm/test_guided_generate.py
@ -10,7 +10,8 @@ from vllm.entrypoints.llm import LLM
 from vllm.outputs import RequestOutput
 from vllm.sampling_params import GuidedDecodingParams, SamplingParams

-MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
+MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
+GUIDED_DECODING_BACKENDS = ["outlines", "lm-format-enforcer", "xgrammar"]


@pytest.fixture(scope="module")
@ -26,11 +27,13 @@ def llm():


@pytest.mark.skip_global_cleanup
-def test_guided_regex(sample_regex, llm):
-    sampling_params = SamplingParams(
-        temperature=0.8,
-        top_p=0.95,
-        guided_decoding=GuidedDecodingParams(regex=sample_regex))
+@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
+def test_guided_regex(sample_regex, llm, guided_decoding_backend: str):
+    sampling_params = SamplingParams(temperature=0.8,
+                                     top_p=0.95,
+                                     guided_decoding=GuidedDecodingParams(
+                                         regex=sample_regex,
+                                         backend=guided_decoding_backend))
    outputs = llm.generate(prompts=[
        f"Give an example IPv4 address with this regex: {sample_regex}"
    ] * 2,
@ -50,11 +53,14 @@ def test_guided_regex(sample_regex, llm):


@pytest.mark.skip_global_cleanup
-def test_guided_json_completion(sample_json_schema, llm):
-    sampling_params = SamplingParams(
-        temperature=1.0,
-        max_tokens=1000,
-        guided_decoding=GuidedDecodingParams(json=sample_json_schema))
+@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
+def test_guided_json_completion(sample_json_schema, llm,
+                                guided_decoding_backend: str):
+    sampling_params = SamplingParams(temperature=1.0,
+                                     max_tokens=1000,
+                                     guided_decoding=GuidedDecodingParams(
+                                         json=sample_json_schema,
+                                         backend=guided_decoding_backend))
    outputs = llm.generate(prompts=[
        f"Give an example JSON for an employee profile "
        f"that fits this schema: {sample_json_schema}"
@ -77,11 +83,14 @@ def test_guided_json_completion(sample_json_schema, llm):


@pytest.mark.skip_global_cleanup
-def test_guided_complex_json_completion(sample_complex_json_schema, llm):
-    sampling_params = SamplingParams(
-        temperature=1.0,
-        max_tokens=1000,
-        guided_decoding=GuidedDecodingParams(json=sample_complex_json_schema))
+@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
+def test_guided_complex_json_completion(sample_complex_json_schema, llm,
+                                        guided_decoding_backend: str):
+    sampling_params = SamplingParams(temperature=1.0,
+                                     max_tokens=1000,
+                                     guided_decoding=GuidedDecodingParams(
+                                         json=sample_complex_json_schema,
+                                         backend=guided_decoding_backend))
    outputs = llm.generate(prompts=[
        f"Give an example JSON for an assignment grade "
        f"that fits this schema: {sample_complex_json_schema}"
@ -105,11 +114,14 @@ def test_guided_complex_json_completion(sample_complex_json_schema, llm):


@pytest.mark.skip_global_cleanup
-def test_guided_definition_json_completion(sample_definition_json_schema, llm):
+@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
+def test_guided_definition_json_completion(sample_definition_json_schema, llm,
+                                           guided_decoding_backend: str):
    sampling_params = SamplingParams(temperature=1.0,
                                     max_tokens=1000,
                                     guided_decoding=GuidedDecodingParams(
-                                         json=sample_definition_json_schema))
+                                         json=sample_definition_json_schema,
+                                         backend=guided_decoding_backend))
    outputs = llm.generate(prompts=[
        f"Give an example JSON for solving 8x + 7 = -23 "
        f"that fits this schema: {sample_definition_json_schema}"
@ -133,11 +145,14 @@ def test_guided_definition_json_completion(sample_definition_json_schema, llm):


@pytest.mark.skip_global_cleanup
-def test_guided_choice_completion(sample_guided_choice, llm):
-    sampling_params = SamplingParams(
-        temperature=0.8,
-        top_p=0.95,
-        guided_decoding=GuidedDecodingParams(choice=sample_guided_choice))
+@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
+def test_guided_choice_completion(sample_guided_choice, llm,
+                                  guided_decoding_backend: str):
+    sampling_params = SamplingParams(temperature=0.8,
+                                     top_p=0.95,
+                                     guided_decoding=GuidedDecodingParams(
+                                         choice=sample_guided_choice,
+                                         backend=guided_decoding_backend))
    outputs = llm.generate(
        prompts="The best language for type-safe systems programming is ",
        sampling_params=sampling_params,
@ -156,13 +171,20 @@ def test_guided_choice_completion(sample_guided_choice, llm):


@pytest.mark.skip_global_cleanup
-def test_guided_grammar(sample_sql_statements, llm):
+@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
+def test_guided_grammar(sample_sql_statements, llm,
+                        guided_decoding_backend: str):
+    if guided_decoding_backend == "outlines":
+        pytest.skip("Outlines backend fails in this test case with:\n"
+                    "AttributeError: Error in model execution: 'ParserConf' "
+                    "object has no attribute 'deterministic'")

-    sampling_params = SamplingParams(
-        temperature=0.8,
-        top_p=0.95,
-        max_tokens=1000,
-        guided_decoding=GuidedDecodingParams(grammar=sample_sql_statements))
+    sampling_params = SamplingParams(temperature=0.8,
+                                     top_p=0.95,
+                                     max_tokens=1000,
+                                     guided_decoding=GuidedDecodingParams(
+                                         grammar=sample_sql_statements,
+                                         backend=guided_decoding_backend))
    outputs = llm.generate(
        prompts=("Generate a sql state that select col_1 from "
                 "table_1 where it is equals to 1"),
@ -218,15 +240,18 @@ def test_validation_against_both_guided_decoding_options(sample_regex, llm):


@pytest.mark.skip_global_cleanup
-def test_guided_json_object(llm):
-    sampling_params = SamplingParams(
-        temperature=1.0,
-        max_tokens=100,
-        guided_decoding=GuidedDecodingParams(json_object=True))
+@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS)
+def test_guided_json_object(llm, guided_decoding_backend: str):
+    sampling_params = SamplingParams(temperature=1.0,
+                                     max_tokens=100,
+                                     n=2,
+                                     guided_decoding=GuidedDecodingParams(
+                                         json_object=True,
+                                         backend=guided_decoding_backend))

    outputs = llm.generate(
-        prompts=("Generate a JSON object describing a person with name "
-                 "and age for John Smith who is 31 years old."),
+        prompts=("Generate a JSON object with curly braces for a person with "
+                 "name and age fields for John Smith who is 31 years old."),
        sampling_params=sampling_params,
        use_tqdm=True)

@ -235,10 +260,11 @@ def test_guided_json_object(llm):
        assert output is not None
        assert isinstance(output, RequestOutput)

-        generated_text = output.outputs[0].text
-        print(generated_text)
-        assert generated_text is not None
+        for i in range(2):
+            generated_text = output.outputs[i].text
+            print(generated_text)
+            assert generated_text is not None

-        # Parse to verify it is valid JSON
-        parsed_json = json.loads(generated_text)
-        assert isinstance(parsed_json, dict)
+            # Parse to verify it is valid JSON
+            parsed_json = json.loads(generated_text)
+            assert isinstance(parsed_json, dict)