mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-03-11 02:07:57 +08:00
Feat: add switch_chunks endpoint to manage chunk availability (#13435)
### What problem does this commit solve? This commit introduces a new API endpoint `/datasets/<dataset_id>/documents/<document_id>/chunks/switch` that allows users to switch the availability status of specified chunks in a document as same as chunk_app.py ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -45,6 +45,7 @@ from rag.app.tag import label_question
|
||||
from rag.nlp import rag_tokenizer, search
|
||||
from rag.prompts.generator import cross_languages, keyword_extraction
|
||||
from common.string_utils import remove_redundant_spaces
|
||||
from common.misc_utils import thread_pool_exec
|
||||
from common.constants import RetCode, LLMType, ParserType, TaskStatus, FileSource
|
||||
from common import settings
|
||||
|
||||
@ -1477,6 +1478,86 @@ async def update_chunk(tenant_id, dataset_id, document_id, chunk_id):
|
||||
return get_result()
|
||||
|
||||
|
||||
@manager.route( # noqa: F821
|
||||
"/datasets/<dataset_id>/documents/<document_id>/chunks/switch", methods=["POST"]
|
||||
)
|
||||
@token_required
|
||||
async def switch_chunks(tenant_id, dataset_id, document_id):
|
||||
"""
|
||||
Switch availability of specified chunks (same as chunk_app switch).
|
||||
---
|
||||
tags:
|
||||
- Chunks
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: path
|
||||
name: dataset_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the dataset.
|
||||
- in: path
|
||||
name: document_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the document.
|
||||
- in: body
|
||||
name: body
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
chunk_ids:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: List of chunk IDs to switch.
|
||||
available_int:
|
||||
type: integer
|
||||
description: 1 for available, 0 for unavailable.
|
||||
available:
|
||||
type: boolean
|
||||
description: Availability status (alternative to available_int).
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
required: true
|
||||
description: Bearer token for authentication.
|
||||
responses:
|
||||
200:
|
||||
description: Chunks availability switched successfully.
|
||||
"""
|
||||
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
|
||||
return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
|
||||
req = await get_request_json()
|
||||
if not req.get("chunk_ids"):
|
||||
return get_error_data_result(message="`chunk_ids` is required.")
|
||||
if "available_int" not in req and "available" not in req:
|
||||
return get_error_data_result(message="`available_int` or `available` is required.")
|
||||
available_int = int(req["available_int"]) if "available_int" in req else (1 if req.get("available") else 0)
|
||||
try:
|
||||
|
||||
def _switch_sync():
|
||||
e, doc = DocumentService.get_by_id(document_id)
|
||||
if not e:
|
||||
return get_error_data_result(message="Document not found!")
|
||||
if not doc or str(doc.kb_id) != str(dataset_id):
|
||||
return get_error_data_result(message="Document not found!")
|
||||
for cid in req["chunk_ids"]:
|
||||
if not settings.docStoreConn.update(
|
||||
{"id": cid},
|
||||
{"available_int": available_int},
|
||||
search.index_name(tenant_id),
|
||||
doc.kb_id,
|
||||
):
|
||||
return get_error_data_result(message="Index updating failure")
|
||||
return get_result(data=True)
|
||||
|
||||
return await thread_pool_exec(_switch_sync)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route("/retrieval", methods=["POST"]) # noqa: F821
|
||||
@token_required
|
||||
async def retrieval_test(tenant_id):
|
||||
|
||||
@ -2220,6 +2220,101 @@ Failure:
|
||||
|
||||
---
|
||||
|
||||
### Switch chunks availability
|
||||
|
||||
**POST** `/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks/switch`
|
||||
|
||||
Switches the availability of specified chunks (enable or disable chunks for retrieval).
|
||||
|
||||
#### Request
|
||||
|
||||
- Method: POST
|
||||
- URL: `/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks/switch`
|
||||
- Headers:
|
||||
- `'Content-Type: application/json'`
|
||||
- `'Authorization: Bearer <YOUR_API_KEY>'`
|
||||
- Body:
|
||||
- `"chunk_ids"`: `list[string]` (*Required*) List of chunk IDs to switch.
|
||||
- `"available_int"`: `integer` (*Optional*) `1` for available, `0` for unavailable. Mutually exclusive with `"available"`.
|
||||
- `"available"`: `boolean` (*Optional*) Availability status. Mutually exclusive with `"available_int"`. Must provide either `available_int` or `available`.
|
||||
|
||||
##### Request example
|
||||
|
||||
```bash
|
||||
curl --request POST \
|
||||
--url http://{address}/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks/switch \
|
||||
--header 'Content-Type: application/json' \
|
||||
--header 'Authorization: Bearer <YOUR_API_KEY>' \
|
||||
--data '
|
||||
{
|
||||
"chunk_ids": ["chunk_id_1", "chunk_id_2"],
|
||||
"available_int": 1
|
||||
}'
|
||||
```
|
||||
|
||||
##### Request parameters
|
||||
|
||||
- `dataset_id`: (*Path parameter*)
|
||||
The ID of the dataset.
|
||||
- `document_id`: (*Path parameter*)
|
||||
The ID of the document.
|
||||
- `"chunk_ids"`: (*Body parameter*), `list[string]`, *Required*
|
||||
List of chunk IDs whose availability is to be switched.
|
||||
- `"available_int"`: (*Body parameter*), `integer`
|
||||
`1` for available (chunk participates in retrieval), `0` for unavailable. Either this or `"available"` must be provided.
|
||||
- `"available"`: (*Body parameter*), `boolean`
|
||||
Availability status. `true` for available, `false` for unavailable. Alternative to `"available_int"`.
|
||||
|
||||
#### Response
|
||||
|
||||
Success:
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 0,
|
||||
"data": true
|
||||
}
|
||||
```
|
||||
|
||||
Failure:
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 101,
|
||||
"message": "You don't own the dataset {dataset_id}."
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 101,
|
||||
"message": "`chunk_ids` is required."
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 101,
|
||||
"message": "`available_int` or `available` is required."
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 101,
|
||||
"message": "Document not found!"
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 101,
|
||||
"message": "Index updating failure"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Retrieve a metadata summary from a dataset
|
||||
|
||||
**GET** `/api/v1/datasets/{dataset_id}/metadata/summary`
|
||||
|
||||
Reference in New Issue
Block a user