[Model] Add LFM2 architecture (#22845)
Signed-off-by: Paul Pak <paulpak58@gmail.com>
This commit is contained in:
@ -31,6 +31,7 @@ HYBRID_MODELS = [
|
||||
"hmellor/tiny-random-BambaForCausalLM",
|
||||
"ibm-granite/granite-4.0-tiny-preview",
|
||||
"tiiuae/Falcon-H1-0.5B-Base",
|
||||
"LiquidAI/LFM2-1.2B",
|
||||
]
|
||||
|
||||
HF_UNSUPPORTED_MODELS = [
|
||||
@ -52,6 +53,7 @@ V1_SUPPORTED_MODELS = [
|
||||
"hmellor/tiny-random-BambaForCausalLM",
|
||||
"ibm-granite/granite-4.0-tiny-preview",
|
||||
"tiiuae/Falcon-H1-0.5B-Base",
|
||||
"LiquidAI/LFM2-1.2B",
|
||||
]
|
||||
|
||||
FULL_CUDA_GRAPH_MODELS = [
|
||||
@ -59,6 +61,10 @@ FULL_CUDA_GRAPH_MODELS = [
|
||||
"Zyphra/Zamba2-1.2B-instruct",
|
||||
]
|
||||
|
||||
V0_UNSUPPORTED_MODELS = [
|
||||
"LiquidAI/LFM2-1.2B",
|
||||
]
|
||||
|
||||
# Avoid OOM
|
||||
MAX_NUM_SEQS = 4
|
||||
|
||||
@ -94,9 +100,12 @@ def test_models(
|
||||
else:
|
||||
hf_outputs = None
|
||||
|
||||
with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model:
|
||||
vllm_v0_outputs = vllm_model.generate_greedy_logprobs(
|
||||
example_prompts, max_tokens, num_logprobs)
|
||||
if model not in V0_UNSUPPORTED_MODELS:
|
||||
with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model:
|
||||
vllm_v0_outputs = vllm_model.generate_greedy_logprobs(
|
||||
example_prompts, max_tokens, num_logprobs)
|
||||
else:
|
||||
vllm_v0_outputs = None
|
||||
|
||||
if model in V1_SUPPORTED_MODELS:
|
||||
with monkeypatch.context() as m:
|
||||
@ -112,7 +121,7 @@ def test_models(
|
||||
else:
|
||||
vllm_v1_outputs = None
|
||||
|
||||
if hf_outputs is not None:
|
||||
if hf_outputs is not None and vllm_v0_outputs is not None:
|
||||
check_logprobs_close(
|
||||
outputs_0_lst=hf_outputs,
|
||||
outputs_1_lst=vllm_v0_outputs,
|
||||
@ -122,6 +131,7 @@ def test_models(
|
||||
|
||||
if model in V1_SUPPORTED_MODELS:
|
||||
ref_outputs = hf_outputs if hf_outputs is not None else vllm_v0_outputs
|
||||
assert ref_outputs is not None
|
||||
check_logprobs_close(
|
||||
outputs_0_lst=ref_outputs,
|
||||
outputs_1_lst=vllm_v1_outputs,
|
||||
@ -140,6 +150,9 @@ def test_batching(
|
||||
max_tokens: int,
|
||||
num_logprobs: int,
|
||||
) -> None:
|
||||
if model in V0_UNSUPPORTED_MODELS:
|
||||
pytest.skip(
|
||||
f"Unsupported V0 Engine. Skipping `test_batching` on {model}.")
|
||||
|
||||
try:
|
||||
model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
|
||||
@ -392,9 +405,12 @@ def test_full_cuda_graph(
|
||||
else:
|
||||
hf_outputs = None
|
||||
|
||||
with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model:
|
||||
vllm_v0_outputs = vllm_model.generate_greedy_logprobs(
|
||||
example_prompts, max_tokens, num_logprobs)
|
||||
if model not in V0_UNSUPPORTED_MODELS:
|
||||
with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model:
|
||||
vllm_v0_outputs = vllm_model.generate_greedy_logprobs(
|
||||
example_prompts, max_tokens, num_logprobs)
|
||||
else:
|
||||
vllm_v0_outputs = None
|
||||
|
||||
with monkeypatch.context() as m:
|
||||
m.setenv("VLLM_USE_V1", "1")
|
||||
@ -408,7 +424,7 @@ def test_full_cuda_graph(
|
||||
vllm_v1_outputs = vllm_model.generate_greedy_logprobs(
|
||||
example_prompts, max_tokens, num_logprobs)
|
||||
|
||||
if hf_outputs is not None:
|
||||
if hf_outputs is not None and vllm_v0_outputs is not None:
|
||||
check_logprobs_close(
|
||||
outputs_0_lst=hf_outputs,
|
||||
outputs_1_lst=vllm_v0_outputs,
|
||||
@ -417,6 +433,7 @@ def test_full_cuda_graph(
|
||||
)
|
||||
|
||||
ref_outputs = hf_outputs if hf_outputs is not None else vllm_v0_outputs
|
||||
assert ref_outputs is not None
|
||||
check_logprobs_close(
|
||||
outputs_0_lst=ref_outputs,
|
||||
outputs_1_lst=vllm_v1_outputs,
|
||||
|
||||
@ -230,6 +230,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
|
||||
"tiny": "ai21labs/Jamba-tiny-dev",
|
||||
"random": "ai21labs/Jamba-tiny-random", # noqa: E501
|
||||
}),
|
||||
"Lfm2ForCausalLM": _HfExamplesInfo("LiquidAI/LFM2-1.2B",
|
||||
min_transformers_version="4.54"),
|
||||
"LlamaForCausalLM": _HfExamplesInfo("meta-llama/Llama-3.2-1B-Instruct",
|
||||
extras={"guard": "meta-llama/Llama-Guard-3-1B", # noqa: E501
|
||||
"hermes": "NousResearch/Hermes-3-Llama-3.1-8B", # noqa: E501
|
||||
|
||||
@ -95,6 +95,8 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
|
||||
|
||||
@pytest.mark.parametrize("model_arch", HF_EXAMPLE_MODELS.get_supported_archs())
|
||||
def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
|
||||
if model_arch == "Lfm2ForCausalLM":
|
||||
pytest.skip("Skipping until test supports V1-only models")
|
||||
can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user