[DCP] Support Decode Context Parallel (DCP) for GQA with FlashAttention (#24864)
Signed-off-by: yuanyongjie.yyj <yuanyongjie.yyj@antgroup.com> Signed-off-by: FENP <32334296+FENP@users.noreply.github.com> Signed-off-by: Jaya Yuan <yuanyongjie.yyj@antgroup.com>
This commit is contained in:
@ -204,17 +204,21 @@ def _compare_cp_with_tp(
|
||||
|
||||
|
||||
CP_TEXT_GENERATION_MODELS = {
|
||||
# [MLA attention only]
|
||||
"deepseek-ai/DeepSeek-V2-Lite-Chat": [
|
||||
CPTestSettings.detailed(),
|
||||
CPTestSettings.detailed(tp_base=2),
|
||||
],
|
||||
"bigcode/gpt_bigcode-santacoder": [
|
||||
CPTestSettings.detailed(),
|
||||
CPTestSettings.detailed(tp_base=2),
|
||||
],
|
||||
}
|
||||
|
||||
CP_TEST_MODELS = [
|
||||
# TODO support other models
|
||||
# [LANGUAGE GENERATION]
|
||||
"deepseek-ai/DeepSeek-V2-Lite-Chat",
|
||||
"bigcode/gpt_bigcode-santacoder",
|
||||
]
|
||||
|
||||
|
||||
|
||||
@ -262,7 +262,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
|
||||
"GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2", {"alias": "gpt2"}),
|
||||
"GPTBigCodeForCausalLM": _HfExamplesInfo(
|
||||
"bigcode/starcoder",
|
||||
extras={"tiny": "bigcode/tiny_starcoder_py"},
|
||||
extras={
|
||||
"tiny": "bigcode/tiny_starcoder_py",
|
||||
"santacoder": "bigcode/gpt_bigcode-santacoder",
|
||||
},
|
||||
min_transformers_version="4.55.1",
|
||||
transformers_version_reason="HF model broken in 4.55.0",
|
||||
),
|
||||
|
||||
Reference in New Issue
Block a user