Refactor dataset / kb API to RESTFul style (#13263)

### What problem does this PR solve? 1. Split dataset api to gateway and service, and modify web UI to use restful http api. 2. Old KB releated APIs are commented. ### Type of change - [x] Refactoring
2026-05-05 01:37:46 +08:00 · 2026-03-13 20:02:35 +08:00
parent 8b984c9d5f
commit 7c32e206be
53 changed files with 1721 additions and 1207 deletions
--- a/test/testcases/test_web_api/test_kb_app/conftest.py
+++ b/test/testcases/test_web_api/test_kb_app/conftest.py
@ -14,7 +14,7 @@
 #  limitations under the License.
 #
 import pytest
-from common import batch_create_datasets, list_kbs, rm_kb
+from common import batch_create_datasets, list_datasets, delete_datasets
 from libs.auth import RAGFlowWebApiAuth
 from pytest import FixtureRequest
 from ragflow_sdk import RAGFlow
@ -26,11 +26,10 @@ def add_datasets(request: FixtureRequest, client: RAGFlow, WebApiAuth: RAGFlowWe

    def cleanup():
        # Web KB cleanup cannot call SDK dataset bulk delete with empty ids; deletion must stay explicit.
-        res = list_kbs(WebApiAuth, params={"page_size": 1000})
-        existing_ids = {kb["id"] for kb in res["data"]["kbs"]}
-        for dataset_id in dataset_ids:
-            if dataset_id in existing_ids:
-                rm_kb(WebApiAuth, {"kb_id": dataset_id})
+        res = list_datasets(WebApiAuth, params={"page_size": 1000})
+        existing_ids = {kb["id"] for kb in res["data"]}
+        ids_to_delete = list({dataset_id for dataset_id in dataset_ids if dataset_id in existing_ids})
+        delete_datasets(WebApiAuth, {"ids": ids_to_delete})

    request.addfinalizer(cleanup)
    return dataset_ids
@ -42,11 +41,10 @@ def add_datasets_func(request: FixtureRequest, client: RAGFlow, WebApiAuth: RAGF

    def cleanup():
        # Web KB cleanup cannot call SDK dataset bulk delete with empty ids; deletion must stay explicit.
-        res = list_kbs(WebApiAuth, params={"page_size": 1000})
-        existing_ids = {kb["id"] for kb in res["data"]["kbs"]}
-        for dataset_id in dataset_ids:
-            if dataset_id in existing_ids:
-                rm_kb(WebApiAuth, {"kb_id": dataset_id})
+        res = list_datasets(WebApiAuth, params={"page_size": 1000})
+        existing_ids = {kb["id"] for kb in res["data"]}
+        ids_to_delete = list({dataset_id for dataset_id in dataset_ids if dataset_id in existing_ids})
+        delete_datasets(WebApiAuth, {"ids": ids_to_delete})

    request.addfinalizer(cleanup)
    return dataset_ids
--- a/test/testcases/test_web_api/test_kb_app/test_create_kb.py
+++ b/test/testcases/test_web_api/test_kb_app/test_create_kb.py
@ -16,7 +16,7 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from common import create_kb
+from common import create_dataset
 from configs import DATASET_NAME_LIMIT, INVALID_API_TOKEN
 from hypothesis import example, given, settings
 from libs.auth import RAGFlowWebApiAuth
@ -35,7 +35,7 @@ class TestAuthorization:
        ids=["empty_auth", "invalid_api_token"],
    )
    def test_auth_invalid(self, invalid_auth, expected_code, expected_message):
-        res = create_kb(invalid_auth, {"name": "auth_test"})
+        res = create_dataset(invalid_auth, {"name": "auth_test"})
        assert res["code"] == expected_code, res
        assert res["message"] == expected_message, res

@ -46,14 +46,14 @@ class TestCapability:
    def test_create_kb_1k(self, WebApiAuth):
        for i in range(1_000):
            payload = {"name": f"dataset_{i}"}
-            res = create_kb(WebApiAuth, payload)
+            res = create_dataset(WebApiAuth, payload)
            assert res["code"] == 0, f"Failed to create dataset {i}"

    @pytest.mark.p3
    def test_create_kb_concurrent(self, WebApiAuth):
        count = 100
        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [executor.submit(create_kb, WebApiAuth, {"name": f"dataset_{i}"}) for i in range(count)]
+            futures = [executor.submit(create_dataset, WebApiAuth, {"name": f"dataset_{i}"}) for i in range(count)]
        responses = list(as_completed(futures))
        assert len(responses) == count, responses
        assert all(future.result()["code"] == 0 for future in futures)
@ -66,44 +66,44 @@ class TestDatasetCreate:
    @example("a" * 128)
    @settings(max_examples=20)
    def test_name(self, WebApiAuth, name):
-        res = create_kb(WebApiAuth, {"name": name})
+        res = create_dataset(WebApiAuth, {"name": name})
        assert res["code"] == 0, res

    @pytest.mark.p2
    @pytest.mark.parametrize(
        "name, expected_message",
        [
-            ("", "Dataset name can't be empty."),
-            (" ", "Dataset name can't be empty."),
-            ("a" * (DATASET_NAME_LIMIT + 1), "Dataset name length is 129 which is large than 128"),
-            (0, "Dataset name must be string."),
-            (None, "Dataset name must be string."),
+            ("", "Field: <name> - Message: <String should have at least 1 character>"),
+            (" ", "Field: <name> - Message: <String should have at least 1 character>"),
+            ("a" * (DATASET_NAME_LIMIT + 1), "Field: <name> - Message: <String should have at most 128 characters>"),
+            (0, "Field: <name> - Message: <Input should be a valid string>"),
+            (None, "Field: <name> - Message: <Input should be a valid string>"),
        ],
        ids=["empty_name", "space_name", "too_long_name", "invalid_name", "None_name"],
    )
    def test_name_invalid(self, WebApiAuth, name, expected_message):
        payload = {"name": name}
-        res = create_kb(WebApiAuth, payload)
-        assert res["code"] == 102, res
+        res = create_dataset(WebApiAuth, payload)
+        assert res["code"] == 101, res
        assert expected_message in res["message"], res

    @pytest.mark.p3
    def test_name_duplicated(self, WebApiAuth):
        name = "duplicated_name"
        payload = {"name": name}
-        res = create_kb(WebApiAuth, payload)
+        res = create_dataset(WebApiAuth, payload)
        assert res["code"] == 0, res

-        res = create_kb(WebApiAuth, payload)
+        res = create_dataset(WebApiAuth, payload)
        assert res["code"] == 0, res

    @pytest.mark.p3
    def test_name_case_insensitive(self, WebApiAuth):
        name = "CaseInsensitive"
        payload = {"name": name.upper()}
-        res = create_kb(WebApiAuth, payload)
+        res = create_dataset(WebApiAuth, payload)
        assert res["code"] == 0, res

        payload = {"name": name.lower()}
-        res = create_kb(WebApiAuth, payload)
+        res = create_dataset(WebApiAuth, payload)
        assert res["code"] == 0, res
--- a/test/testcases/test_web_api/test_kb_app/test_kb_pipeline_tasks.py
+++ b/test/testcases/test_web_api/test_kb_app/test_kb_pipeline_tasks.py
@ -14,17 +14,17 @@
 #  limitations under the License.
 #
 import pytest
-from common import (
+from test_web_api.common import (
    kb_delete_pipeline_logs,
    kb_list_pipeline_dataset_logs,
    kb_list_pipeline_logs,
    kb_pipeline_log_detail,
-    kb_run_graphrag,
+    run_graphrag,
+    trace_graphrag,
+    run_raptor,
+    trace_raptor,
    kb_run_mindmap,
-    kb_run_raptor,
-    kb_trace_graphrag,
    kb_trace_mindmap,
-    kb_trace_raptor,
    list_documents,
    parse_documents,
 )
@ -101,13 +101,13 @@ class TestKbPipelineTasks:
    @pytest.mark.p3
    def test_graphrag_run_and_trace(self, WebApiAuth, add_chunks):
        kb_id, _, _ = add_chunks
-        run_res = kb_run_graphrag(WebApiAuth, {"kb_id": kb_id})
+        run_res = run_graphrag(WebApiAuth, kb_id)
        assert run_res["code"] == 0, run_res
        task_id = run_res["data"]["graphrag_task_id"]
        assert task_id, run_res

-        _wait_for_task(kb_trace_graphrag, WebApiAuth, kb_id, task_id)
-        trace_res = kb_trace_graphrag(WebApiAuth, {"kb_id": kb_id})
+        _wait_for_task(trace_graphrag, WebApiAuth, kb_id, task_id)
+        trace_res = trace_graphrag(WebApiAuth, kb_id)
        assert trace_res["code"] == 0, trace_res
        task = _find_task(trace_res["data"], task_id)
        assert task, trace_res
@ -118,13 +118,13 @@ class TestKbPipelineTasks:
    @pytest.mark.p3
    def test_raptor_run_and_trace(self, WebApiAuth, add_chunks):
        kb_id, _, _ = add_chunks
-        run_res = kb_run_raptor(WebApiAuth, {"kb_id": kb_id})
+        run_res = run_raptor(WebApiAuth, kb_id)
        assert run_res["code"] == 0, run_res
        task_id = run_res["data"]["raptor_task_id"]
        assert task_id, run_res

-        _wait_for_task(kb_trace_raptor, WebApiAuth, kb_id, task_id)
-        trace_res = kb_trace_raptor(WebApiAuth, {"kb_id": kb_id})
+        _wait_for_task(trace_raptor, WebApiAuth, kb_id, task_id)
+        trace_res = trace_raptor(WebApiAuth, kb_id)
        assert trace_res["code"] == 0, trace_res
        task = _find_task(trace_res["data"], task_id)
        assert task, trace_res
--- a/test/testcases/test_web_api/test_kb_app/test_kb_routes_unit.py
+++ b/test/testcases/test_web_api/test_kb_app/test_kb_routes_unit.py
@ -181,7 +181,7 @@ def set_tenant_info():
    return None


-@pytest.mark.p2
+@pytest.mark.p3
 def test_create_branches(monkeypatch):
    module = _load_kb_module(monkeypatch)

@ -211,7 +211,7 @@ def test_create_branches(monkeypatch):
    assert "save boom" in res["message"], res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_update_branches(monkeypatch):
    module = _load_kb_module(monkeypatch)
    update_route = _unwrap_route(module.update)
@ -326,7 +326,7 @@ def test_update_branches(monkeypatch):
    assert "update boom" in res["message"], res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_update_metadata_setting_not_found(monkeypatch):
    module = _load_kb_module(monkeypatch)
    _set_request_json(monkeypatch, module, {"kb_id": "missing-kb", "metadata": {}})
@ -336,7 +336,7 @@ def test_update_metadata_setting_not_found(monkeypatch):
    assert "Database error" in res["message"], res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_detail_branches(monkeypatch):
    module = _load_kb_module(monkeypatch)

@ -380,7 +380,7 @@ def test_detail_branches(monkeypatch):
    assert "detail boom" in res["message"], res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_list_kbs_owner_ids_and_desc(monkeypatch):
    module = _load_kb_module(monkeypatch)

@ -414,7 +414,7 @@ def test_list_kbs_owner_ids_and_desc(monkeypatch):
    assert "list boom" in res["message"], res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_rm_and_rm_sync_branches(monkeypatch):
    module = _load_kb_module(monkeypatch)

@ -491,7 +491,7 @@ def test_rm_and_rm_sync_branches(monkeypatch):
    assert "rm boom" in res["message"], res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_tags_and_meta_branches(monkeypatch):
    module = _load_kb_module(monkeypatch)

@ -560,7 +560,7 @@ def test_tags_and_meta_branches(monkeypatch):
    assert res["data"]["finished"] == 1, res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_knowledge_graph_branches(monkeypatch):
    module = _load_kb_module(monkeypatch)

@ -636,7 +636,7 @@ def test_knowledge_graph_branches(monkeypatch):
    assert res["data"] is True, res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_list_pipeline_logs_validation_branches(monkeypatch):
    module = _load_kb_module(monkeypatch)

@ -681,7 +681,7 @@ def test_list_pipeline_logs_validation_branches(monkeypatch):
    assert "Create data filter is abnormal." in res["message"], res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_list_pipeline_logs_filter_and_exception_branches(monkeypatch):
    module = _load_kb_module(monkeypatch)

@ -718,7 +718,7 @@ def test_list_pipeline_logs_filter_and_exception_branches(monkeypatch):
    assert "logs boom" in res["message"], res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_list_pipeline_dataset_logs_branches(monkeypatch):
    module = _load_kb_module(monkeypatch)

@ -792,7 +792,7 @@ def test_list_pipeline_dataset_logs_branches(monkeypatch):
    assert "dataset logs boom" in res["message"], res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_pipeline_log_detail_and_delete_routes_branches(monkeypatch):
    module = _load_kb_module(monkeypatch)

@ -841,7 +841,7 @@ def test_pipeline_log_detail_and_delete_routes_branches(monkeypatch):
    assert res["data"]["id"] == "log-1", res


-@pytest.mark.p2
+@pytest.mark.p3
@pytest.mark.parametrize(
    "route_name,task_attr,response_key,task_type",
    [
@ -914,7 +914,7 @@ def test_run_pipeline_task_routes_branch_matrix(monkeypatch, route_name, task_at
    assert queue_calls["doc_ids"] == ["doc-1", "doc-2"], queue_calls


-@pytest.mark.p2
+@pytest.mark.p3
@pytest.mark.parametrize(
    "route_name,task_attr,empty_on_missing_task,error_text",
    [
@ -970,7 +970,7 @@ def test_trace_pipeline_task_routes_branch_matrix(monkeypatch, route_name, task_
    assert res["data"]["id"] == "task-1", res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_unbind_task_branch_matrix(monkeypatch):
    module = _load_kb_module(monkeypatch)
    route = inspect.unwrap(module.delete_kb_task)
@ -1060,7 +1060,7 @@ def test_unbind_task_branch_matrix(monkeypatch):
    assert "cannot delete task" in res["message"], res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_check_embedding_similarity_threshold_matrix_unit(monkeypatch):
    module = _load_kb_module(monkeypatch)
    route = inspect.unwrap(module.check_embedding)
@ -1229,7 +1229,7 @@ def test_check_embedding_similarity_threshold_matrix_unit(monkeypatch):
    assert res["data"]["summary"]["avg_cos_sim"] > 0.9, res


-@pytest.mark.p2
+@pytest.mark.p3
 def test_check_embedding_error_and_empty_sample_paths_unit(monkeypatch):
    module = _load_kb_module(monkeypatch)
    route = inspect.unwrap(module.check_embedding)
--- a/test/testcases/test_web_api/test_kb_app/test_kb_tags_meta.py
+++ b/test/testcases/test_web_api/test_kb_app/test_kb_tags_meta.py
@ -16,7 +16,7 @@
 import uuid

 import pytest
-from common import (
+from test_web_api.common import (
    delete_knowledge_graph,
    kb_basic_info,
    kb_get_meta,
--- a/test/testcases/test_web_api/test_kb_app/test_list_kbs.py
+++ b/test/testcases/test_web_api/test_kb_app/test_list_kbs.py
@ -13,10 +13,11 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+import json
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from common import list_kbs
+from common import list_datasets
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth
 from utils import is_sorted
@ -32,7 +33,7 @@ class TestAuthorization:
        ],
    )
    def test_auth_invalid(self, invalid_auth, expected_code, expected_message):
-        res = list_kbs(invalid_auth)
+        res = list_datasets(invalid_auth)
        assert res["code"] == expected_code, res
        assert res["message"] == expected_message, res

@ -42,7 +43,7 @@ class TestCapability:
    def test_concurrent_list(self, WebApiAuth):
        count = 100
        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [executor.submit(list_kbs, WebApiAuth) for i in range(count)]
+            futures = [executor.submit(list_datasets, WebApiAuth) for i in range(count)]
        responses = list(as_completed(futures))
        assert len(responses) == count, responses
        assert all(future.result()["code"] == 0 for future in futures)
@ -52,15 +53,15 @@ class TestCapability:
 class TestDatasetsList:
    @pytest.mark.p2
    def test_params_unset(self, WebApiAuth):
-        res = list_kbs(WebApiAuth, None)
+        res = list_datasets(WebApiAuth, None)
        assert res["code"] == 0, res
-        assert len(res["data"]["kbs"]) == 5, res
+        assert len(res["data"]) == 5, res

    @pytest.mark.p2
    def test_params_empty(self, WebApiAuth):
-        res = list_kbs(WebApiAuth, {})
+        res = list_datasets(WebApiAuth, {})
        assert res["code"] == 0, res
-        assert len(res["data"]["kbs"]) == 5, res
+        assert len(res["data"]) == 5, res

    @pytest.mark.p1
    @pytest.mark.parametrize(
@ -75,9 +76,9 @@ class TestDatasetsList:
        ids=["normal_middle_page", "normal_last_partial_page", "beyond_max_page", "string_page_number", "full_data_single_page"],
    )
    def test_page(self, WebApiAuth, params, expected_page_size):
-        res = list_kbs(WebApiAuth, params)
+        res = list_datasets(WebApiAuth, params)
        assert res["code"] == 0, res
-        assert len(res["data"]["kbs"]) == expected_page_size, res
+        assert len(res["data"]) == expected_page_size, res

    @pytest.mark.skip
    @pytest.mark.p2
@ -90,16 +91,16 @@ class TestDatasetsList:
        ids=["page_0", "page_a"],
    )
    def test_page_invalid(self, WebApiAuth, params, expected_code, expected_message):
-        res = list_kbs(WebApiAuth, params=params)
+        res = list_datasets(WebApiAuth, params=params)
        assert res["code"] == expected_code, res
        assert expected_message in res["message"], res

    @pytest.mark.p2
    def test_page_none(self, WebApiAuth):
        params = {"page": None}
-        res = list_kbs(WebApiAuth, params)
+        res = list_datasets(WebApiAuth, params)
        assert res["code"] == 0, res
-        assert len(res["data"]["kbs"]) == 5, res
+        assert len(res["data"]) == 5, res

    @pytest.mark.p1
    @pytest.mark.parametrize(
@ -114,9 +115,9 @@ class TestDatasetsList:
        ids=["min_valid_page_size", "medium_page_size", "page_size_equals_total", "page_size_exceeds_total", "string_type_page_size"],
    )
    def test_page_size(self, WebApiAuth, params, expected_page_size):
-        res = list_kbs(WebApiAuth, params)
+        res = list_datasets(WebApiAuth, params)
        assert res["code"] == 0, res
-        assert len(res["data"]["kbs"]) == expected_page_size, res
+        assert len(res["data"]) == expected_page_size, res

    @pytest.mark.skip
    @pytest.mark.p2
@ -128,27 +129,27 @@ class TestDatasetsList:
        ],
    )
    def test_page_size_invalid(self, WebApiAuth, params, expected_code, expected_message):
-        res = list_kbs(WebApiAuth, params)
+        res = list_datasets(WebApiAuth, params)
        assert res["code"] == expected_code, res
        assert expected_message in res["message"], res

    @pytest.mark.p2
    def test_page_size_none(self, WebApiAuth):
        params = {"page_size": None}
-        res = list_kbs(WebApiAuth, params)
+        res = list_datasets(WebApiAuth, params)
        assert res["code"] == 0, res
-        assert len(res["data"]["kbs"]) == 5, res
+        assert len(res["data"]) == 5, res

    @pytest.mark.p3
    @pytest.mark.parametrize(
        "params, assertions",
        [
-            ({"orderby": "update_time"}, lambda r: (is_sorted(r["data"]["kbs"], "update_time", True))),
+            ({"orderby": "update_time"}, lambda r: (is_sorted(r["data"], "update_time", True))),
        ],
        ids=["orderby_update_time"],
    )
    def test_orderby(self, WebApiAuth, params, assertions):
-        res = list_kbs(WebApiAuth, params)
+        res = list_datasets(WebApiAuth, params)
        assert res["code"] == 0, res
        if callable(assertions):
            assert assertions(res), res
@ -157,13 +158,13 @@ class TestDatasetsList:
    @pytest.mark.parametrize(
        "params, assertions",
        [
-            ({"desc": "True"}, lambda r: (is_sorted(r["data"]["kbs"], "update_time", True))),
-            ({"desc": "False"}, lambda r: (is_sorted(r["data"]["kbs"], "update_time", False))),
+            ({"desc": "True"}, lambda r: (is_sorted(r["data"], "update_time", True))),
+            ({"desc": "False"}, lambda r: (is_sorted(r["data"], "update_time", False))),
        ],
        ids=["desc=True", "desc=False"],
    )
    def test_desc(self, WebApiAuth, params, assertions):
-        res = list_kbs(WebApiAuth, params)
+        res = list_datasets(WebApiAuth, params)

        assert res["code"] == 0, res
        if callable(assertions):
@ -173,29 +174,28 @@ class TestDatasetsList:
    @pytest.mark.parametrize(
        "params, expected_page_size",
        [
-            ({"parser_id": "naive"}, 5),
-            ({"parser_id": "qa"}, 0),
+            ({"ext": json.dumps({"parser_id": "naive"})}, 5),
+            ({"ext": json.dumps({"parser_id": "qa"})}, 0),
        ],
        ids=["naive", "dqa"],
    )
    def test_parser_id(self, WebApiAuth, params, expected_page_size):
-        res = list_kbs(WebApiAuth, params)
+        res = list_datasets(WebApiAuth, params)
        assert res["code"] == 0, res
-        assert len(res["data"]["kbs"]) == expected_page_size, res
+        assert len(res["data"]) == expected_page_size, res

    @pytest.mark.p2
    def test_owner_ids_payload_mode(self, WebApiAuth):
-        base_res = list_kbs(WebApiAuth, {"page_size": 10})
+        base_res = list_datasets(WebApiAuth, {"page_size": 10})
        assert base_res["code"] == 0, base_res
-        assert base_res["data"]["kbs"], base_res
-        owner_id = base_res["data"]["kbs"][0]["tenant_id"]
+        assert base_res["data"], base_res
+        owner_id = base_res["data"][0]["tenant_id"]

-        res = list_kbs(
+        res = list_datasets(
            WebApiAuth,
-            params={"page": 1, "page_size": 2, "desc": "false"},
-            payload={"owner_ids": [owner_id]},
+            params={"page": 1, "page_size": 2, "desc": "false", "ext": json.dumps({"owner_ids": [owner_id]})},
        )
        assert res["code"] == 0, res
-        assert res["data"]["total"] >= len(res["data"]["kbs"]), res
-        assert len(res["data"]["kbs"]) <= 2, res
-        assert all(kb["tenant_id"] == owner_id for kb in res["data"]["kbs"]), res
+        assert res["total_datasets"] >= len(res["data"]), res
+        assert len(res["data"]) <= 2, res
+        assert all(kb["tenant_id"] == owner_id for kb in res["data"]), res
--- a/test/testcases/test_web_api/test_kb_app/test_rm_kb.py
+++ b/test/testcases/test_web_api/test_kb_app/test_rm_kb.py
@ -16,8 +16,8 @@

 import pytest
 from common import (
-    list_kbs,
-    rm_kb,
+    list_datasets,
+    delete_datasets,
 )
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth
@ -33,7 +33,7 @@ class TestAuthorization:
        ],
    )
    def test_auth_invalid(self, invalid_auth, expected_code, expected_message):
-        res = rm_kb(invalid_auth)
+        res = delete_datasets(invalid_auth)
        assert res["code"] == expected_code, res
        assert res["message"] == expected_message, res

@ -42,20 +42,20 @@ class TestDatasetsDelete:
    @pytest.mark.p1
    def test_kb_id(self, WebApiAuth, add_datasets_func):
        kb_ids = add_datasets_func
-        payload = {"kb_id": kb_ids[0]}
-        res = rm_kb(WebApiAuth, payload)
+        payload = {"ids": [kb_ids[0]]}
+        res = delete_datasets(WebApiAuth, payload)
        assert res["code"] == 0, res

-        res = list_kbs(WebApiAuth)
-        assert len(res["data"]["kbs"]) == 2, res
+        res = list_datasets(WebApiAuth)
+        assert len(res["data"]) == 2, res

    @pytest.mark.p2
    @pytest.mark.usefixtures("add_dataset_func")
    def test_id_wrong_uuid(self, WebApiAuth):
-        payload = {"kb_id": "d94a8dc02c9711f0930f7fbc369eab6d"}
-        res = rm_kb(WebApiAuth, payload)
-        assert res["code"] == 109, res
-        assert "No authorization." in res["message"], res
+        payload = {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"]}
+        res = delete_datasets(WebApiAuth, payload)
+        assert res["code"] == 102, res
+        assert "lacks permission" in res["message"], res

-        res = list_kbs(WebApiAuth)
-        assert len(res["data"]["kbs"]) == 1, res
+        res = list_datasets(WebApiAuth)
+        assert len(res["data"]) == 1, res
--- a/test/testcases/test_web_api/test_kb_app/test_update_kb.py
+++ b/test/testcases/test_web_api/test_kb_app/test_update_kb.py
@ -17,7 +17,7 @@ import os
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from common import update_kb
+from test_web_api.common import update_dataset
 from configs import DATASET_NAME_LIMIT, INVALID_API_TOKEN
 from hypothesis import HealthCheck, example, given, settings
 from libs.auth import RAGFlowWebApiAuth
@ -37,7 +37,7 @@ class TestAuthorization:
        ids=["empty_auth", "invalid_api_token"],
    )
    def test_auth_invalid(self, invalid_auth, expected_code, expected_message):
-        res = update_kb(invalid_auth, "dataset_id")
+        res = update_dataset(invalid_auth, "dataset_id")
        assert res["code"] == expected_code, res
        assert res["message"] == expected_message, res

@ -50,13 +50,13 @@ class TestCapability:
        with ThreadPoolExecutor(max_workers=5) as executor:
            futures = [
                executor.submit(
-                    update_kb,
+                    update_dataset,
                    WebApiAuth,
+                    dataset_id,
                    {
-                        "kb_id": dataset_id,
                        "name": f"dataset_{i}",
                        "description": "",
-                        "parser_id": "naive",
+                        "chunk_method": "naive",
                    },
                )
                for i in range(count)
@ -69,8 +69,8 @@ class TestCapability:
 class TestDatasetUpdate:
    @pytest.mark.p3
    def test_dataset_id_not_uuid(self, WebApiAuth):
-        payload = {"name": "not uuid", "description": "", "parser_id": "naive", "kb_id": "not_uuid"}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": "not uuid", "description": "", "chunk_method": "naive"}
+        res = update_dataset(WebApiAuth, "not_uuid", payload)
        assert res["code"] == 109, res
        assert "No authorization." in res["message"], res

@ -81,8 +81,8 @@ class TestDatasetUpdate:
    @settings(max_examples=20, suppress_health_check=[HealthCheck.function_scoped_fixture], deadline=None)
    def test_name(self, WebApiAuth, add_dataset_func, name):
        dataset_id = add_dataset_func
-        payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": dataset_id}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": name, "description": "", "chunk_method": "naive"}
+        res = update_dataset(WebApiAuth, dataset_id, payload)
        assert res["code"] == 0, res
        assert res["data"]["name"] == name, res

@ -90,27 +90,27 @@ class TestDatasetUpdate:
    @pytest.mark.parametrize(
        "name, expected_message",
        [
-            ("", "Dataset name can't be empty."),
-            (" ", "Dataset name can't be empty."),
-            ("a" * (DATASET_NAME_LIMIT + 1), "Dataset name length is 129 which is large than 128"),
-            (0, "Dataset name must be string."),
-            (None, "Dataset name must be string."),
+            ("", "Field: <name> - Message: <String should have at least 1 character>"),
+            (" ", "Field: <name> - Message: <String should have at least 1 character>"),
+            ("a" * (DATASET_NAME_LIMIT + 1), "Field: <name> - Message: <String should have at most 128 characters>"),
+            (0, "Field: <name> - Message: <Input should be a valid string>"),
+            (None, "Field: <name> - Message: <Input should be a valid string>"),
        ],
        ids=["empty_name", "space_name", "too_long_name", "invalid_name", "None_name"],
    )
    def test_name_invalid(self, WebApiAuth, add_dataset_func, name, expected_message):
        kb_id = add_dataset_func
-        payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": kb_id}
-        res = update_kb(WebApiAuth, payload)
-        assert res["code"] == 102, res
+        payload = {"name": name, "description": "", "chunk_method": "naive"}
+        res = update_dataset(WebApiAuth, kb_id, payload)
+        assert res["code"] == 101, res
        assert expected_message in res["message"], res

    @pytest.mark.p3
    def test_name_duplicated(self, WebApiAuth, add_datasets_func):
        kb_id = add_datasets_func[0]
        name = "kb_1"
-        payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": kb_id}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": name, "description": "", "chunk_method": "naive"}
+        res = update_dataset(WebApiAuth, kb_id, payload)
        assert res["code"] == 102, res
        assert res["message"] == "Duplicated dataset name.", res

@ -118,8 +118,8 @@ class TestDatasetUpdate:
    def test_name_case_insensitive(self, WebApiAuth, add_datasets_func):
        kb_id = add_datasets_func[0]
        name = "KB_1"
-        payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": kb_id}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": name, "description": "", "chunk_method": "naive"}
+        res = update_dataset(WebApiAuth, kb_id, payload)
        assert res["code"] == 102, res
        assert res["message"] == "Duplicated dataset name.", res

@ -130,19 +130,18 @@ class TestDatasetUpdate:
        payload = {
            "name": "avatar",
            "description": "",
-            "parser_id": "naive",
-            "kb_id": kb_id,
+            "chunk_method": "naive",
            "avatar": f"data:image/png;base64,{encode_avatar(fn)}",
        }
-        res = update_kb(WebApiAuth, payload)
+        res = update_dataset(WebApiAuth, kb_id, payload)
        assert res["code"] == 0, res
        assert res["data"]["avatar"] == f"data:image/png;base64,{encode_avatar(fn)}", res

    @pytest.mark.p2
    def test_description(self, WebApiAuth, add_dataset_func):
        kb_id = add_dataset_func
-        payload = {"name": "description", "description": "description", "parser_id": "naive", "kb_id": kb_id}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": "description", "description": "description", "chunk_method": "naive"}
+        res = update_dataset(WebApiAuth, kb_id, payload)
        assert res["code"] == 0, res
        assert res["data"]["description"] == "description", res

@ -157,10 +156,10 @@ class TestDatasetUpdate:
    )
    def test_embedding_model(self, WebApiAuth, add_dataset_func, embedding_model):
        kb_id = add_dataset_func
-        payload = {"name": "embedding_model", "description": "", "parser_id": "naive", "kb_id": kb_id, "embd_id": embedding_model}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": "embedding_model", "description": "", "chunk_method": "naive", "embedding_model": embedding_model}
+        res = update_dataset(WebApiAuth, kb_id, payload)
        assert res["code"] == 0, res
-        assert res["data"]["embd_id"] == embedding_model, res
+        assert res["data"]["embedding_model"] == embedding_model, res

    @pytest.mark.p2
    @pytest.mark.parametrize(
@ -173,8 +172,8 @@ class TestDatasetUpdate:
    )
    def test_permission(self, WebApiAuth, add_dataset_func, permission):
        kb_id = add_dataset_func
-        payload = {"name": "permission", "description": "", "parser_id": "naive", "kb_id": kb_id, "permission": permission}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": "permission", "description": "", "chunk_method": "naive", "permission": permission}
+        res = update_dataset(WebApiAuth, kb_id, payload)
        assert res["code"] == 0, res
        assert res["data"]["permission"] == permission.lower().strip(), res

@ -199,17 +198,17 @@ class TestDatasetUpdate:
    )
    def test_chunk_method(self, WebApiAuth, add_dataset_func, chunk_method):
        kb_id = add_dataset_func
-        payload = {"name": "chunk_method", "description": "", "parser_id": chunk_method, "kb_id": kb_id}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": "chunk_method", "description": "", "chunk_method": chunk_method}
+        res = update_dataset(WebApiAuth, kb_id, payload)
        assert res["code"] == 0, res
-        assert res["data"]["parser_id"] == chunk_method, res
+        assert res["data"]["chunk_method"] == chunk_method, res

    @pytest.mark.p1
    @pytest.mark.skipif(os.getenv("DOC_ENGINE") != "infinity", reason="Infinity does not support parser_id=tag")
    def test_chunk_method_tag_with_infinity(self, WebApiAuth, add_dataset_func):
        kb_id = add_dataset_func
-        payload = {"name": "chunk_method", "description": "", "parser_id": "tag", "kb_id": kb_id}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": "chunk_method", "description": "", "chunk_method": "tag"}
+        res = update_dataset(WebApiAuth, kb_id, payload)
        assert res["code"] == 103, res
        assert res["message"] == "The chunking method Tag has not been supported by Infinity yet.", res

@ -218,8 +217,8 @@ class TestDatasetUpdate:
    @pytest.mark.parametrize("pagerank", [0, 50, 100], ids=["min", "mid", "max"])
    def test_pagerank(self, WebApiAuth, add_dataset_func, pagerank):
        kb_id = add_dataset_func
-        payload = {"name": "pagerank", "description": "", "parser_id": "naive", "kb_id": kb_id, "pagerank": pagerank}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": "pagerank", "description": "", "chunk_method": "naive", "pagerank": pagerank}
+        res = update_dataset(WebApiAuth, kb_id, payload)
        assert res["code"] == 0, res
        assert res["data"]["pagerank"] == pagerank, res

@ -227,13 +226,13 @@ class TestDatasetUpdate:
    @pytest.mark.p2
    def test_pagerank_set_to_0(self, WebApiAuth, add_dataset_func):
        kb_id = add_dataset_func
-        payload = {"name": "pagerank", "description": "", "parser_id": "naive", "kb_id": kb_id, "pagerank": 50}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": "pagerank", "description": "", "chunk_method": "naive", "pagerank": 50}
+        res = update_dataset(WebApiAuth, kb_id, payload)
        assert res["code"] == 0, res
        assert res["data"]["pagerank"] == 50, res

-        payload = {"name": "pagerank", "description": "", "parser_id": "naive", "kb_id": kb_id, "pagerank": 0}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": "pagerank", "description": "", "chunk_method": "naive", "pagerank": 0}
+        res = update_dataset(WebApiAuth, kb_id, payload)
        assert res["code"] == 0, res
        assert res["data"]["pagerank"] == 0, res

@ -241,8 +240,8 @@ class TestDatasetUpdate:
    @pytest.mark.p2
    def test_pagerank_infinity(self, WebApiAuth, add_dataset_func):
        kb_id = add_dataset_func
-        payload = {"name": "pagerank", "description": "", "parser_id": "naive", "kb_id": kb_id, "pagerank": 50}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": "pagerank", "description": "", "chunk_method": "naive", "pagerank": 50}
+        res = update_dataset(WebApiAuth, kb_id, payload)
        assert res["code"] == 102, res
        assert res["message"] == "'pagerank' can only be set when doc_engine is elasticsearch", res

@ -352,10 +351,15 @@ class TestDatasetUpdate:
    )
    def test_parser_config(self, WebApiAuth, add_dataset_func, parser_config):
        kb_id = add_dataset_func
-        payload = {"name": "parser_config", "description": "", "parser_id": "naive", "kb_id": kb_id, "parser_config": parser_config}
-        res = update_kb(WebApiAuth, payload)
+        payload = {"name": "parser_config", "description": "", "chunk_method": "naive", "parser_config": parser_config}
+        res = update_dataset(WebApiAuth, kb_id, payload)
        assert res["code"] == 0, res
-        assert res["data"]["parser_config"] == parser_config, res
+        for key, value in parser_config.items():
+            if not isinstance(value, dict):
+                assert res["data"]["parser_config"].get(key) == value, res
+            else:
+                for sub_key, sub_value in value.items():
+                    assert res["data"]["parser_config"].get(key, {}).get(sub_key) == sub_value, res

    @pytest.mark.p2
    @pytest.mark.parametrize(
@ -372,7 +376,7 @@ class TestDatasetUpdate:
    )
    def test_field_unsupported(self, WebApiAuth, add_dataset_func, payload):
        kb_id = add_dataset_func
-        full_payload = {"name": "field_unsupported", "description": "", "parser_id": "naive", "kb_id": kb_id, **payload}
-        res = update_kb(WebApiAuth, full_payload)
+        full_payload = {"name": "field_unsupported", "description": "", "chunk_method": "naive", **payload}
+        res = update_dataset(WebApiAuth, kb_id, full_payload)
        assert res["code"] == 101, res
-        assert "isn't allowed" in res["message"], res
+        assert "are not permitted" in res["message"], res