[V1] Scatter and gather placeholders in the model runner (#16076)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: Roger Wang <ywang@roblox.com> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> Co-authored-by: mgoin <mgoin64@gmail.com> Co-authored-by: Jennifer Zhao <ai.jenniferzhao@gmail.com>
This commit is contained in:
@ -330,9 +330,8 @@ VLM_TEST_SETTINGS = {
|
||||
max_num_seqs=4,
|
||||
dtype="bfloat16",
|
||||
auto_cls=AutoModelForImageTextToText,
|
||||
tensor_parallel_size=8,
|
||||
vllm_runner_kwargs={"gpu_memory_utilization": 0.8},
|
||||
marks=multi_gpu_marks(num_gpus=8),
|
||||
tensor_parallel_size=4,
|
||||
marks=multi_gpu_marks(num_gpus=4),
|
||||
),
|
||||
"llava_next": VLMTestInfo(
|
||||
models=["llava-hf/llava-v1.6-mistral-7b-hf"],
|
||||
|
||||
@ -200,22 +200,14 @@ def test_chat(
|
||||
|
||||
|
||||
@large_gpu_test(min_gb=48)
|
||||
@pytest.mark.parametrize(
|
||||
"prompt,expected_ranges",
|
||||
[(_create_engine_inputs_hf(IMG_URLS[:1]), [{
|
||||
"offset": 11,
|
||||
"length": 494
|
||||
}]),
|
||||
(_create_engine_inputs_hf(IMG_URLS[1:4]), [{
|
||||
"offset": 11,
|
||||
"length": 266
|
||||
}, {
|
||||
"offset": 277,
|
||||
"length": 1056
|
||||
}, {
|
||||
"offset": 1333,
|
||||
"length": 418
|
||||
}])])
|
||||
@pytest.mark.parametrize("prompt,expected_ranges",
|
||||
[(_create_engine_inputs_hf(IMG_URLS[:1]),
|
||||
[PlaceholderRange(offset=11, length=494)]),
|
||||
(_create_engine_inputs_hf(IMG_URLS[1:4]), [
|
||||
PlaceholderRange(offset=11, length=266),
|
||||
PlaceholderRange(offset=277, length=1056),
|
||||
PlaceholderRange(offset=1333, length=418)
|
||||
])])
|
||||
def test_multi_modal_placeholders(vllm_runner, prompt,
|
||||
expected_ranges: list[PlaceholderRange],
|
||||
monkeypatch) -> None:
|
||||
|
||||
@ -71,13 +71,11 @@ def test_processor_override(
|
||||
# image token offsets
|
||||
img_locs = processed_inputs["mm_placeholders"].get("image", [])
|
||||
assert len(img_locs) == num_imgs
|
||||
assert [img_loc["offset"] for img_loc in img_locs] == \
|
||||
assert [img_loc.offset for img_loc in img_locs] == \
|
||||
[i for i, v in enumerate(prompt_token_ids) \
|
||||
if v == config.boi_token_index]
|
||||
|
||||
# patch sizes and masks
|
||||
assert prompt_token_ids.count(config.image_token_index) \
|
||||
== sum(img_patch.sum() for img_patch in mm_kwargs["embed_is_patch"])
|
||||
patch_token_id = vocab[hf_processor.img_patch_token]
|
||||
num_patches = processed_inputs["prompt_token_ids"].count(patch_token_id)
|
||||
mm_counts = {"image": num_imgs}
|
||||
@ -89,11 +87,3 @@ def test_processor_override(
|
||||
== mm_kwargs["patches_per_image"].sum() * num_patches_per_chunk
|
||||
assert mm_kwargs["pixel_values"].shape[0] \
|
||||
== mm_kwargs["patches_per_image"].sum()
|
||||
|
||||
for embed_is_patch, aspect_ratio in zip(mm_kwargs["embed_is_patch"],
|
||||
mm_kwargs["aspect_ratios"]):
|
||||
assert embed_is_patch.shape[0] == \
|
||||
len(tokenizer.encode(
|
||||
hf_processor._prompt_split_image(
|
||||
aspect_ratio, num_patches_per_chunk),
|
||||
add_special_tokens=False))
|
||||
|
||||
@ -92,8 +92,8 @@ def _validate_image_prompt_replacements_one(
|
||||
first_placeholder = image_placeholders[0]
|
||||
|
||||
# NOTE: There is a BOS token
|
||||
assert first_placeholder["offset"] == 1
|
||||
assert first_placeholder["length"] == (
|
||||
assert first_placeholder.offset == 1
|
||||
assert first_placeholder.length == (
|
||||
len(processed_inputs["prompt_token_ids"]) - 1) // num_imgs
|
||||
|
||||
except Exception as exc:
|
||||
|
||||
@ -92,8 +92,8 @@ def _validate_image_prompt_replacements_one(
|
||||
|
||||
first_placeholder = image_placeholders[0]
|
||||
|
||||
assert first_placeholder["offset"] == 0
|
||||
assert first_placeholder["length"] == len(
|
||||
assert first_placeholder.offset == 0
|
||||
assert first_placeholder.length == len(
|
||||
processed_inputs["prompt_token_ids"]) // num_imgs
|
||||
except Exception as exc:
|
||||
failed_size_excs.append((image_size, exc))
|
||||
|
||||
@ -785,6 +785,7 @@ def test_find_update_tokens(
|
||||
item_idx=0,
|
||||
start_idx=6,
|
||||
tokens=[32000, 32000],
|
||||
is_embed=None,
|
||||
),
|
||||
],
|
||||
"pattern_4": [
|
||||
@ -793,6 +794,7 @@ def test_find_update_tokens(
|
||||
item_idx=0,
|
||||
start_idx=3,
|
||||
tokens=[32000],
|
||||
is_embed=None,
|
||||
),
|
||||
],
|
||||
}
|
||||
@ -807,12 +809,14 @@ def test_find_update_tokens(
|
||||
item_idx=0,
|
||||
start_idx=1,
|
||||
tokens=[32000, 32000],
|
||||
is_embed=None,
|
||||
),
|
||||
PlaceholderFeaturesInfo(
|
||||
modality="pattern_1",
|
||||
item_idx=1,
|
||||
start_idx=5,
|
||||
tokens=[32000, 32000],
|
||||
is_embed=None,
|
||||
),
|
||||
],
|
||||
"pattern_3": [
|
||||
@ -821,6 +825,7 @@ def test_find_update_tokens(
|
||||
item_idx=0,
|
||||
start_idx=7,
|
||||
tokens=[1550, 918, 1550],
|
||||
is_embed=None,
|
||||
),
|
||||
],
|
||||
# No match for pattern_4 as it has lower priority than pattern_1
|
||||
@ -835,12 +840,14 @@ def test_find_update_tokens(
|
||||
item_idx=0,
|
||||
start_idx=1,
|
||||
tokens=[32000, 32000],
|
||||
is_embed=None,
|
||||
),
|
||||
PlaceholderFeaturesInfo(
|
||||
modality="pattern_1",
|
||||
item_idx=1,
|
||||
start_idx=3,
|
||||
tokens=[32000, 32000],
|
||||
is_embed=None,
|
||||
),
|
||||
],
|
||||
"pattern_4": [
|
||||
@ -849,6 +856,7 @@ def test_find_update_tokens(
|
||||
item_idx=0,
|
||||
start_idx=5,
|
||||
tokens=[32000],
|
||||
is_embed=None,
|
||||
),
|
||||
],
|
||||
"pattern_3": [
|
||||
@ -857,6 +865,7 @@ def test_find_update_tokens(
|
||||
item_idx=0,
|
||||
start_idx=6,
|
||||
tokens=[1550, 918, 1550],
|
||||
is_embed=None,
|
||||
),
|
||||
],
|
||||
}
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from vllm.multimodal.inputs import MultiModalKwargs
|
||||
from vllm.multimodal.inputs import MultiModalKwargs, PlaceholderRange
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.utils import sha256
|
||||
# disable yapf here as it formats differently than isort such that both fail
|
||||
@ -158,13 +158,10 @@ def test_generate_block_hash_extra_keys():
|
||||
request = make_request(
|
||||
request_id=0,
|
||||
prompt_token_ids=[_ for _ in range(20)],
|
||||
mm_positions=[{
|
||||
"offset": 0,
|
||||
"length": 5
|
||||
}, {
|
||||
"offset": 10,
|
||||
"length": 5
|
||||
}],
|
||||
mm_positions=[
|
||||
PlaceholderRange(offset=0, length=5),
|
||||
PlaceholderRange(offset=10, length=5),
|
||||
],
|
||||
mm_hashes=["hash1", "hash2"],
|
||||
)
|
||||
|
||||
@ -222,13 +219,10 @@ def test_hash_request_tokens(hash_fn):
|
||||
request = make_request(
|
||||
request_id=0,
|
||||
prompt_token_ids=[_ for _ in range(6)],
|
||||
mm_positions=[{
|
||||
"offset": 0,
|
||||
"length": 3
|
||||
}, {
|
||||
"offset": 3,
|
||||
"length": 3
|
||||
}],
|
||||
mm_positions=[
|
||||
PlaceholderRange(offset=0, length=3),
|
||||
PlaceholderRange(offset=3, length=3),
|
||||
],
|
||||
mm_hashes=["hash1", "hash2"],
|
||||
)
|
||||
|
||||
@ -253,25 +247,19 @@ def test_hash_tokens_different_mm_input(hash_fn):
|
||||
request1 = make_request(
|
||||
request_id=0,
|
||||
prompt_token_ids=[_ for _ in range(6)],
|
||||
mm_positions=[{
|
||||
"offset": 0,
|
||||
"length": 3
|
||||
}, {
|
||||
"offset": 3,
|
||||
"length": 3
|
||||
}],
|
||||
mm_positions=[
|
||||
PlaceholderRange(offset=0, length=3),
|
||||
PlaceholderRange(offset=3, length=3),
|
||||
],
|
||||
mm_hashes=["hash1", "hash2"],
|
||||
)
|
||||
request2 = make_request(
|
||||
request_id=1,
|
||||
prompt_token_ids=[_ for _ in range(6)],
|
||||
mm_positions=[{
|
||||
"offset": 0,
|
||||
"length": 3
|
||||
}, {
|
||||
"offset": 3,
|
||||
"length": 3
|
||||
}],
|
||||
mm_positions=[
|
||||
PlaceholderRange(offset=0, length=3),
|
||||
PlaceholderRange(offset=3, length=3),
|
||||
],
|
||||
mm_hashes=["hash3", "hash2"],
|
||||
)
|
||||
block_size = 3
|
||||
|
||||
Reference in New Issue
Block a user