Revert "[V1] Scatter and gather placeholders in the model runner" (#16075)
This commit is contained in:
@ -55,10 +55,7 @@ def server(request, audio_assets):
|
||||
for key, value in request.param.items()
|
||||
]
|
||||
|
||||
with RemoteOpenAIServer(MODEL_NAME,
|
||||
args,
|
||||
env_dict={"VLLM_AUDIO_FETCH_TIMEOUT":
|
||||
"30"}) as remote_server:
|
||||
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
|
||||
@ -167,7 +167,7 @@ VLM_TEST_SETTINGS = {
|
||||
"cherry_blossom": "<image>What is the season?", # noqa: E501
|
||||
}),
|
||||
multi_image_prompt="<image><image>Describe the two images in detail.", # noqa: E501
|
||||
max_model_len=4096,
|
||||
max_model_len=8192,
|
||||
max_num_seqs=2,
|
||||
auto_cls=AutoModelForImageTextToText,
|
||||
vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}}
|
||||
|
||||
@ -176,8 +176,6 @@ def test_chat(
|
||||
model,
|
||||
dtype=dtype,
|
||||
tokenizer_mode="mistral",
|
||||
load_format="mistral",
|
||||
config_format="mistral",
|
||||
max_model_len=max_model_len,
|
||||
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
|
||||
) as vllm_model:
|
||||
@ -200,14 +198,22 @@ def test_chat(
|
||||
|
||||
|
||||
@large_gpu_test(min_gb=48)
|
||||
@pytest.mark.parametrize("prompt,expected_ranges",
|
||||
[(_create_engine_inputs_hf(IMG_URLS[:1]),
|
||||
[PlaceholderRange(offset=11, length=494)]),
|
||||
(_create_engine_inputs_hf(IMG_URLS[1:4]), [
|
||||
PlaceholderRange(offset=11, length=266),
|
||||
PlaceholderRange(offset=277, length=1056),
|
||||
PlaceholderRange(offset=1333, length=418)
|
||||
])])
|
||||
@pytest.mark.parametrize(
|
||||
"prompt,expected_ranges",
|
||||
[(_create_engine_inputs_hf(IMG_URLS[:1]), [{
|
||||
"offset": 11,
|
||||
"length": 494
|
||||
}]),
|
||||
(_create_engine_inputs_hf(IMG_URLS[1:4]), [{
|
||||
"offset": 11,
|
||||
"length": 266
|
||||
}, {
|
||||
"offset": 277,
|
||||
"length": 1056
|
||||
}, {
|
||||
"offset": 1333,
|
||||
"length": 418
|
||||
}])])
|
||||
def test_multi_modal_placeholders(vllm_runner, prompt,
|
||||
expected_ranges: list[PlaceholderRange],
|
||||
monkeypatch) -> None:
|
||||
|
||||
@ -92,8 +92,8 @@ def _validate_image_prompt_replacements_one(
|
||||
first_placeholder = image_placeholders[0]
|
||||
|
||||
# NOTE: There is a BOS token
|
||||
assert first_placeholder.offset == 1
|
||||
assert first_placeholder.length == (
|
||||
assert first_placeholder["offset"] == 1
|
||||
assert first_placeholder["length"] == (
|
||||
len(processed_inputs["prompt_token_ids"]) - 1) // num_imgs
|
||||
|
||||
except Exception as exc:
|
||||
|
||||
@ -92,8 +92,8 @@ def _validate_image_prompt_replacements_one(
|
||||
|
||||
first_placeholder = image_placeholders[0]
|
||||
|
||||
assert first_placeholder.offset == 0
|
||||
assert first_placeholder.length == len(
|
||||
assert first_placeholder["offset"] == 0
|
||||
assert first_placeholder["length"] == len(
|
||||
processed_inputs["prompt_token_ids"]) // num_imgs
|
||||
except Exception as exc:
|
||||
failed_size_excs.append((image_size, exc))
|
||||
|
||||
@ -277,9 +277,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
|
||||
trust_remote_code=True,
|
||||
hf_overrides={"architectures": ["GLM4VForCausalLM"]}), # noqa: E501
|
||||
"H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
|
||||
extras={"2b": "h2oai/h2ovl-mississippi-2b"}, # noqa: E501
|
||||
max_transformers_version="4.48", # noqa: E501
|
||||
transformers_version_reason="HF model is not compatible."), # noqa: E501
|
||||
extras={"2b": "h2oai/h2ovl-mississippi-2b"}), # noqa: E501
|
||||
"InternVLChatModel": _HfExamplesInfo("OpenGVLab/InternVL2-1B",
|
||||
extras={"2B": "OpenGVLab/InternVL2-2B"}, # noqa: E501
|
||||
trust_remote_code=True),
|
||||
|
||||
Reference in New Issue
Block a user