Refactor dataset / kb API to RESTFul style (#13263)

### What problem does this PR solve?

1. Split dataset api to gateway and service, and modify web UI to use
restful http api.
2. Old KB releated APIs are commented.

### Type of change

- [x] Refactoring
This commit is contained in:
Lynn
2026-03-13 20:02:35 +08:00
committed by GitHub
parent 8b984c9d5f
commit 7c32e206be
53 changed files with 1721 additions and 1207 deletions

View File

@ -27,6 +27,7 @@ from utils.file_utils import create_txt_file
HEADERS = {"Content-Type": "application/json"}
KB_APP_URL = f"/{VERSION}/kb"
DATASETS_URL = f"/api/{VERSION}/datasets"
DOCUMENT_APP_URL = f"/{VERSION}/document"
CHUNK_API_URL = f"/{VERSION}/chunk"
DIALOG_APP_URL = f"/{VERSION}/dialog"
@ -168,25 +169,28 @@ def search_rm(auth, payload=None, *, headers=HEADERS, data=None):
# KB APP
def create_kb(auth, payload=None, *, headers=HEADERS, data=None):
res = requests.post(url=f"{HOST_ADDRESS}{KB_APP_URL}/create", headers=headers, auth=auth, json=payload, data=data)
def create_dataset(auth, payload=None, *, headers=HEADERS, data=None):
res = requests.post(url=f"{HOST_ADDRESS}{DATASETS_URL}", headers=headers, auth=auth, json=payload, data=data)
return res.json()
def list_kbs(auth, params=None, payload=None, *, headers=HEADERS, data=None):
if payload is None:
payload = {}
res = requests.post(url=f"{HOST_ADDRESS}{KB_APP_URL}/list", headers=headers, auth=auth, params=params, json=payload, data=data)
def list_datasets(auth, params=None, *, headers=HEADERS):
res = requests.get(url=f"{HOST_ADDRESS}{DATASETS_URL}", headers=headers, auth=auth, params=params)
return res.json()
def update_kb(auth, payload=None, *, headers=HEADERS, data=None):
res = requests.post(url=f"{HOST_ADDRESS}{KB_APP_URL}/update", headers=headers, auth=auth, json=payload, data=data)
def update_dataset(auth, dataset_id, payload=None, *, headers=HEADERS, data=None):
res = requests.put(url=f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}", headers=headers, auth=auth, json=payload, data=data)
return res.json()
def rm_kb(auth, payload=None, *, headers=HEADERS, data=None):
res = requests.post(url=f"{HOST_ADDRESS}{KB_APP_URL}/rm", headers=headers, auth=auth, json=payload, data=data)
def delete_datasets(auth, payload=None, *, headers=HEADERS, data=None):
"""
Delete datasets.
The endpoint is DELETE /api/{VERSION}/datasets with payload {"ids": [...]}
This is the standard SDK REST API endpoint for dataset deletion.
"""
res = requests.delete(url=f"{HOST_ADDRESS}{DATASETS_URL}", headers=headers, auth=auth, json=payload, data=data)
return res.json()
@ -236,23 +240,43 @@ def kb_pipeline_log_detail(auth, params=None, *, headers=HEADERS):
return res.json()
def kb_run_graphrag(auth, payload=None, *, headers=HEADERS, data=None):
res = requests.post(url=f"{HOST_ADDRESS}{KB_APP_URL}/run_graphrag", headers=headers, auth=auth, json=payload, data=data)
# DATASET GRAPH AND TASKS
def knowledge_graph(auth, dataset_id, params=None):
url = f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/knowledge_graph"
res = requests.get(url=url, headers=HEADERS, auth=auth, params=params)
return res.json()
def kb_trace_graphrag(auth, params=None, *, headers=HEADERS):
res = requests.get(url=f"{HOST_ADDRESS}{KB_APP_URL}/trace_graphrag", headers=headers, auth=auth, params=params)
def delete_knowledge_graph(auth, dataset_id, payload=None):
url = f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/knowledge_graph"
if payload is None:
res = requests.delete(url=url, headers=HEADERS, auth=auth)
else:
res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload)
return res.json()
def kb_run_raptor(auth, payload=None, *, headers=HEADERS, data=None):
res = requests.post(url=f"{HOST_ADDRESS}{KB_APP_URL}/run_raptor", headers=headers, auth=auth, json=payload, data=data)
def run_graphrag(auth, dataset_id, payload=None):
url = f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/run_graphrag"
res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload)
return res.json()
def kb_trace_raptor(auth, params=None, *, headers=HEADERS):
res = requests.get(url=f"{HOST_ADDRESS}{KB_APP_URL}/trace_raptor", headers=headers, auth=auth, params=params)
def trace_graphrag(auth, dataset_id, params=None):
url = f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/trace_graphrag"
res = requests.get(url=url, headers=HEADERS, auth=auth, params=params)
return res.json()
def run_raptor(auth, dataset_id, payload=None):
url = f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/run_raptor"
res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload)
return res.json()
def trace_raptor(auth, dataset_id, params=None):
url = f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/trace_raptor"
res = requests.get(url=url, headers=HEADERS, auth=auth, params=params)
return res.json()
@ -286,21 +310,11 @@ def rename_tags(auth, dataset_id, payload=None, *, headers=HEADERS, data=None):
return res.json()
def knowledge_graph(auth, dataset_id, params=None, *, headers=HEADERS):
res = requests.get(url=f"{HOST_ADDRESS}{KB_APP_URL}/{dataset_id}/knowledge_graph", headers=headers, auth=auth, params=params)
return res.json()
def delete_knowledge_graph(auth, dataset_id, payload=None, *, headers=HEADERS, data=None):
res = requests.delete(url=f"{HOST_ADDRESS}{KB_APP_URL}/{dataset_id}/knowledge_graph", headers=headers, auth=auth, json=payload, data=data)
return res.json()
def batch_create_datasets(auth, num):
ids = []
for i in range(num):
res = create_kb(auth, {"name": f"kb_{i}"})
ids.append(res["data"]["kb_id"])
res = create_dataset(auth, {"name": f"kb_{i}"})
ids.append(res["data"]["id"])
return ids