Test: add scenario for embedding_model update when chunk_count > 0 (#13351)

### What problem does this PR solve? Guard embedding_model change when dataset has existing chunks. API must return code 102 with message 'When chunk_num (N) > 0, embedding_model must remain <current_model>' to prevent silent embedding drift. ### Type of change - [x] Add Testcases Co-authored-by: Liu An <asiro@qq.com>
2026-03-26 08:49:56 +08:00 · 2026-03-04 04:41:35 -05:00
parent f47c47df99
commit 8a7272f423
1 changed files with 24 additions and 1 deletions
--- a/test/testcases/test_http_api/test_dataset_management/test_update_dataset.py
+++ b/test/testcases/test_http_api/test_dataset_management/test_update_dataset.py
@ -26,7 +26,6 @@ from utils import encode_avatar
 from utils.file_utils import create_image_file
 from utils.hypothesis_utils import valid_names
 from configs import DEFAULT_PARSER_CONFIG
-# TODO: Missing scenario for updating embedding_model with chunk_count != 0


 class TestAuthorization:
@ -275,6 +274,30 @@ class TestDatasetUpdate:
        assert res["code"] == 0, res
        assert res["data"][0]["embedding_model"] == embedding_model, res

+    @pytest.mark.p1
+    def test_embedding_model_with_existing_chunks(self, HttpApiAuth, add_chunks):
+        """Guard: embedding_model cannot change when dataset has chunks (chunk_count > 0)."""
+        dataset_id, _, _ = add_chunks
+
+        res = list_datasets(HttpApiAuth, {"id": dataset_id})
+        assert res["code"] == 0, res
+        assert res["data"], res
+        dataset = res["data"][0]
+        assert dataset.get("chunk_count", 0) > 0, res
+
+        current_embedding = dataset["embedding_model"]
+        candidates = ["BAAI/bge-small-en-v1.5@Builtin", "embedding-3@ZHIPU-AI"]
+        new_embedding = candidates[0] if current_embedding != candidates[0] else candidates[1]
+
+        payload = {"embedding_model": new_embedding}
+        res = update_dataset(HttpApiAuth, dataset_id, payload)
+        assert res["code"] == 102, res
+        expected_message = (
+            f"When chunk_num ({dataset['chunk_count']}) > 0, "
+            f"embedding_model must remain {current_embedding}"
+        )
+        assert res["message"] == expected_message, res
+
    @pytest.mark.p2
    @pytest.mark.parametrize(
        "name, embedding_model",