From 3ce236c4e3b6841fa877d8aaf41a89a144a41cf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=A9=E6=B5=B7=E8=92=BC=E7=81=86?= Date: Mon, 9 Mar 2026 12:36:45 +0800 Subject: [PATCH] Feat: add switch_chunks endpoint to manage chunk availability (#13435) ### What problem does this commit solve? This commit introduces a new API endpoint `/datasets//documents//chunks/switch` that allows users to switch the availability status of specified chunks in a document as same as chunk_app.py ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- api/apps/sdk/doc.py | 81 +++++++++++++++++++++++ docs/references/http_api_reference.md | 95 +++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 80d0a2e1e..7ed5d0cca 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -45,6 +45,7 @@ from rag.app.tag import label_question from rag.nlp import rag_tokenizer, search from rag.prompts.generator import cross_languages, keyword_extraction from common.string_utils import remove_redundant_spaces +from common.misc_utils import thread_pool_exec from common.constants import RetCode, LLMType, ParserType, TaskStatus, FileSource from common import settings @@ -1477,6 +1478,86 @@ async def update_chunk(tenant_id, dataset_id, document_id, chunk_id): return get_result() +@manager.route( # noqa: F821 + "/datasets//documents//chunks/switch", methods=["POST"] +) +@token_required +async def switch_chunks(tenant_id, dataset_id, document_id): + """ + Switch availability of specified chunks (same as chunk_app switch). + --- + tags: + - Chunks + security: + - ApiKeyAuth: [] + parameters: + - in: path + name: dataset_id + type: string + required: true + description: ID of the dataset. + - in: path + name: document_id + type: string + required: true + description: ID of the document. + - in: body + name: body + required: true + schema: + type: object + properties: + chunk_ids: + type: array + items: + type: string + description: List of chunk IDs to switch. + available_int: + type: integer + description: 1 for available, 0 for unavailable. + available: + type: boolean + description: Availability status (alternative to available_int). + - in: header + name: Authorization + type: string + required: true + description: Bearer token for authentication. + responses: + 200: + description: Chunks availability switched successfully. + """ + if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id): + return get_error_data_result(message=f"You don't own the dataset {dataset_id}.") + req = await get_request_json() + if not req.get("chunk_ids"): + return get_error_data_result(message="`chunk_ids` is required.") + if "available_int" not in req and "available" not in req: + return get_error_data_result(message="`available_int` or `available` is required.") + available_int = int(req["available_int"]) if "available_int" in req else (1 if req.get("available") else 0) + try: + + def _switch_sync(): + e, doc = DocumentService.get_by_id(document_id) + if not e: + return get_error_data_result(message="Document not found!") + if not doc or str(doc.kb_id) != str(dataset_id): + return get_error_data_result(message="Document not found!") + for cid in req["chunk_ids"]: + if not settings.docStoreConn.update( + {"id": cid}, + {"available_int": available_int}, + search.index_name(tenant_id), + doc.kb_id, + ): + return get_error_data_result(message="Index updating failure") + return get_result(data=True) + + return await thread_pool_exec(_switch_sync) + except Exception as e: + return server_error_response(e) + + @manager.route("/retrieval", methods=["POST"]) # noqa: F821 @token_required async def retrieval_test(tenant_id): diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index a6ccf63fa..907e22023 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -2220,6 +2220,101 @@ Failure: --- +### Switch chunks availability + +**POST** `/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks/switch` + +Switches the availability of specified chunks (enable or disable chunks for retrieval). + +#### Request + +- Method: POST +- URL: `/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks/switch` +- Headers: + - `'Content-Type: application/json'` + - `'Authorization: Bearer '` +- Body: + - `"chunk_ids"`: `list[string]` (*Required*) List of chunk IDs to switch. + - `"available_int"`: `integer` (*Optional*) `1` for available, `0` for unavailable. Mutually exclusive with `"available"`. + - `"available"`: `boolean` (*Optional*) Availability status. Mutually exclusive with `"available_int"`. Must provide either `available_int` or `available`. + +##### Request example + +```bash +curl --request POST \ + --url http://{address}/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks/switch \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer ' \ + --data ' + { + "chunk_ids": ["chunk_id_1", "chunk_id_2"], + "available_int": 1 + }' +``` + +##### Request parameters + +- `dataset_id`: (*Path parameter*) + The ID of the dataset. +- `document_id`: (*Path parameter*) + The ID of the document. +- `"chunk_ids"`: (*Body parameter*), `list[string]`, *Required* + List of chunk IDs whose availability is to be switched. +- `"available_int"`: (*Body parameter*), `integer` + `1` for available (chunk participates in retrieval), `0` for unavailable. Either this or `"available"` must be provided. +- `"available"`: (*Body parameter*), `boolean` + Availability status. `true` for available, `false` for unavailable. Alternative to `"available_int"`. + +#### Response + +Success: + +```json +{ + "code": 0, + "data": true +} +``` + +Failure: + +```json +{ + "code": 101, + "message": "You don't own the dataset {dataset_id}." +} +``` + +```json +{ + "code": 101, + "message": "`chunk_ids` is required." +} +``` + +```json +{ + "code": 101, + "message": "`available_int` or `available` is required." +} +``` + +```json +{ + "code": 101, + "message": "Document not found!" +} +``` + +```json +{ + "code": 101, + "message": "Index updating failure" +} +``` + +--- + ### Retrieve a metadata summary from a dataset **GET** `/api/v1/datasets/{dataset_id}/metadata/summary`