Feat: add switch_chunks endpoint to manage chunk availability (#13435)

### What problem does this commit solve? This commit introduces a new API endpoint `/datasets/<dataset_id>/documents/<document_id>/chunks/switch` that allows users to switch the availability status of specified chunks in a document as same as chunk_app.py ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-04-25 13:05:58 +08:00 · 2026-03-09 12:36:45 +08:00
parent 32d31284cc
commit 3ce236c4e3
2 changed files with 176 additions and 0 deletions
--- a/api/apps/sdk/doc.py
+++ b/api/apps/sdk/doc.py
@ -45,6 +45,7 @@ from rag.app.tag import label_question
 from rag.nlp import rag_tokenizer, search
 from rag.prompts.generator import cross_languages, keyword_extraction
 from common.string_utils import remove_redundant_spaces
+from common.misc_utils import thread_pool_exec
 from common.constants import RetCode, LLMType, ParserType, TaskStatus, FileSource
 from common import settings

@ -1477,6 +1478,86 @@ async def update_chunk(tenant_id, dataset_id, document_id, chunk_id):
    return get_result()


+@manager.route(  # noqa: F821
+    "/datasets/<dataset_id>/documents/<document_id>/chunks/switch", methods=["POST"]
+)
+@token_required
+async def switch_chunks(tenant_id, dataset_id, document_id):
+    """
+    Switch availability of specified chunks (same as chunk_app switch).
+    ---
+    tags:
+      - Chunks
+    security:
+      - ApiKeyAuth: []
+    parameters:
+      - in: path
+        name: dataset_id
+        type: string
+        required: true
+        description: ID of the dataset.
+      - in: path
+        name: document_id
+        type: string
+        required: true
+        description: ID of the document.
+      - in: body
+        name: body
+        required: true
+        schema:
+          type: object
+          properties:
+            chunk_ids:
+              type: array
+              items:
+                type: string
+              description: List of chunk IDs to switch.
+            available_int:
+              type: integer
+              description: 1 for available, 0 for unavailable.
+            available:
+              type: boolean
+              description: Availability status (alternative to available_int).
+      - in: header
+        name: Authorization
+        type: string
+        required: true
+        description: Bearer token for authentication.
+    responses:
+      200:
+        description: Chunks availability switched successfully.
+    """
+    if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
+        return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
+    req = await get_request_json()
+    if not req.get("chunk_ids"):
+        return get_error_data_result(message="`chunk_ids` is required.")
+    if "available_int" not in req and "available" not in req:
+        return get_error_data_result(message="`available_int` or `available` is required.")
+    available_int = int(req["available_int"]) if "available_int" in req else (1 if req.get("available") else 0)
+    try:
+
+        def _switch_sync():
+            e, doc = DocumentService.get_by_id(document_id)
+            if not e:
+                return get_error_data_result(message="Document not found!")
+            if not doc or str(doc.kb_id) != str(dataset_id):
+                return get_error_data_result(message="Document not found!")
+            for cid in req["chunk_ids"]:
+                if not settings.docStoreConn.update(
+                    {"id": cid},
+                    {"available_int": available_int},
+                    search.index_name(tenant_id),
+                    doc.kb_id,
+                ):
+                    return get_error_data_result(message="Index updating failure")
+            return get_result(data=True)
+
+        return await thread_pool_exec(_switch_sync)
+    except Exception as e:
+        return server_error_response(e)
+
+
@manager.route("/retrieval", methods=["POST"])  # noqa: F821
@token_required
 async def retrieval_test(tenant_id):