[DCP] Support Decode Context Parallel (DCP) for GQA with FlashAttention (#24864)

Signed-off-by: yuanyongjie.yyj <yuanyongjie.yyj@antgroup.com> Signed-off-by: FENP <32334296+FENP@users.noreply.github.com> Signed-off-by: Jaya Yuan <yuanyongjie.yyj@antgroup.com>
2025-10-14 21:07:50 +08:00
parent fdd32750f0
commit ea97940d6c
7 changed files with 209 additions and 33 deletions
--- a/tests/distributed/test_context_parallel.py
+++ b/tests/distributed/test_context_parallel.py
@ -204,17 +204,21 @@ def _compare_cp_with_tp(


 CP_TEXT_GENERATION_MODELS = {
-    # [MLA attention only]
    "deepseek-ai/DeepSeek-V2-Lite-Chat": [
        CPTestSettings.detailed(),
        CPTestSettings.detailed(tp_base=2),
    ],
+    "bigcode/gpt_bigcode-santacoder": [
+        CPTestSettings.detailed(),
+        CPTestSettings.detailed(tp_base=2),
+    ],
 }

 CP_TEST_MODELS = [
    # TODO support other models
    # [LANGUAGE GENERATION]
    "deepseek-ai/DeepSeek-V2-Lite-Chat",
+    "bigcode/gpt_bigcode-santacoder",
 ]


--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@ -262,7 +262,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
    "GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2", {"alias": "gpt2"}),
    "GPTBigCodeForCausalLM": _HfExamplesInfo(
        "bigcode/starcoder",
-        extras={"tiny": "bigcode/tiny_starcoder_py"},
+        extras={
+            "tiny": "bigcode/tiny_starcoder_py",
+            "santacoder": "bigcode/gpt_bigcode-santacoder",
+        },
        min_transformers_version="4.55.1",
        transformers_version_reason="HF model broken in 4.55.0",
    ),