Merge branch 'feat/rag-2' into feat/add-dataset-service-api-enable

# Conflicts:
#	api/controllers/console/datasets/datasets.py
#	api/controllers/service_api/wraps.py
#	api/services/dataset_service.py
This commit is contained in:
jyong
2025-09-16 15:21:23 +08:00
843 changed files with 25061 additions and 16010 deletions

View File

@ -10,14 +10,50 @@ api = ExternalApi(
version="1.0",
title="Service API",
description="API for application services",
doc="/docs", # Enable Swagger UI at /v1/docs
)
service_api_ns = Namespace("service_api", description="Service operations", path="/")
from . import index
from .app import annotation, app, audio, completion, conversation, file, file_preview, message, site, workflow
from .dataset import dataset, document, hit_testing, metadata, segment, upload_file
from .app import (
annotation,
app,
audio,
completion,
conversation,
file,
file_preview,
message,
site,
workflow,
)
from .dataset import (
dataset,
document,
hit_testing,
metadata,
segment,
)
from .workspace import models
__all__ = [
"annotation",
"app",
"audio",
"completion",
"conversation",
"dataset",
"document",
"file",
"file_preview",
"hit_testing",
"index",
"message",
"metadata",
"models",
"segment",
"site",
"workflow",
]
api.add_namespace(service_api_ns)

View File

@ -165,7 +165,7 @@ class AnnotationUpdateDeleteApi(Resource):
def put(self, app_model: App, annotation_id):
"""Update an existing annotation."""
assert isinstance(current_user, Account)
if not current_user.is_editor:
if not current_user.has_edit_permission:
raise Forbidden()
annotation_id = str(annotation_id)
@ -189,7 +189,7 @@ class AnnotationUpdateDeleteApi(Resource):
"""Delete an annotation."""
assert isinstance(current_user, Account)
if not current_user.is_editor:
if not current_user.has_edit_permission:
raise Forbidden()
annotation_id = str(annotation_id)

View File

@ -29,7 +29,7 @@ class AppParameterApi(Resource):
Returns the input form parameters and configuration for the application.
"""
if app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value}:
if app_model.mode in {AppMode.ADVANCED_CHAT, AppMode.WORKFLOW}:
workflow = app_model.workflow
if workflow is None:
raise AppUnavailableError()

View File

@ -1,4 +1,5 @@
from flask_restx import Resource, reqparse
from flask_restx._http import HTTPStatus
from flask_restx.inputs import int_range
from sqlalchemy.orm import Session
from werkzeug.exceptions import BadRequest, NotFound
@ -121,7 +122,7 @@ class ConversationDetailApi(Resource):
}
)
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON))
@service_api_ns.marshal_with(build_conversation_delete_model(service_api_ns), code=204)
@service_api_ns.marshal_with(build_conversation_delete_model(service_api_ns), code=HTTPStatus.NO_CONTENT)
def delete(self, app_model: App, end_user: EndUser, c_id):
"""Delete a specific conversation."""
app_mode = AppMode.value_of(app_model.mode)

View File

@ -340,6 +340,9 @@ class DatasetApi(DatasetApiResource):
else:
data["embedding_available"] = True
# force update search method to keyword_search if indexing_technique is economic
data["retrieval_model_dict"]["search_method"] = "keyword_search"
if data.get("permission") == "partial_members":
part_users_list = DatasetPermissionService.get_dataset_partial_member_list(dataset_id_str)
data.update({"partial_member_list": part_users_list})
@ -559,7 +562,7 @@ class DatasetTagsApi(DatasetApiResource):
def post(self, _, dataset_id):
"""Add a knowledge type tag."""
assert isinstance(current_user, Account)
if not (current_user.is_editor or current_user.is_dataset_editor):
if not (current_user.has_edit_permission or current_user.is_dataset_editor):
raise Forbidden()
args = tag_create_parser.parse_args()
@ -583,7 +586,7 @@ class DatasetTagsApi(DatasetApiResource):
@validate_dataset_token
def patch(self, _, dataset_id):
assert isinstance(current_user, Account)
if not (current_user.is_editor or current_user.is_dataset_editor):
if not (current_user.has_edit_permission or current_user.is_dataset_editor):
raise Forbidden()
args = tag_update_parser.parse_args()
@ -610,7 +613,7 @@ class DatasetTagsApi(DatasetApiResource):
def delete(self, _, dataset_id):
"""Delete a knowledge type tag."""
assert isinstance(current_user, Account)
if not current_user.is_editor:
if not current_user.has_edit_permission:
raise Forbidden()
args = tag_delete_parser.parse_args()
TagService.delete_tag(args.get("tag_id"))
@ -634,7 +637,7 @@ class DatasetTagBindingApi(DatasetApiResource):
def post(self, _, dataset_id):
# The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
assert isinstance(current_user, Account)
if not (current_user.is_editor or current_user.is_dataset_editor):
if not (current_user.has_edit_permission or current_user.is_dataset_editor):
raise Forbidden()
args = tag_binding_parser.parse_args()
@ -660,7 +663,7 @@ class DatasetTagUnbindingApi(DatasetApiResource):
def post(self, _, dataset_id):
# The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
assert isinstance(current_user, Account)
if not (current_user.is_editor or current_user.is_dataset_editor):
if not (current_user.has_edit_permission or current_user.is_dataset_editor):
raise Forbidden()
args = tag_unbinding_parser.parse_args()

View File

@ -30,6 +30,7 @@ from extensions.ext_database import db
from fields.document_fields import document_fields, document_status_fields
from libs.login import current_user
from models.dataset import Dataset, Document, DocumentSegment
from models.model import EndUser
from services.dataset_service import DatasetService, DocumentService
from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig
from services.file_service import FileService
@ -303,6 +304,8 @@ class DocumentAddByFileApi(DatasetApiResource):
if not file.filename:
raise FilenameNotExistsError
if not isinstance(current_user, EndUser):
raise ValueError("Invalid user account")
upload_file = FileService(db.engine).upload_file(
filename=file.filename,
content=file.read(),
@ -391,6 +394,8 @@ class DocumentUpdateByFileApi(DatasetApiResource):
if not file.filename:
raise FilenameNotExistsError
if not isinstance(current_user, EndUser):
raise ValueError("Invalid user account")
try:
upload_file = FileService(db.engine).upload_file(
filename=file.filename,

View File

@ -1,65 +0,0 @@
from werkzeug.exceptions import NotFound
from controllers.service_api import service_api_ns
from controllers.service_api.wraps import (
DatasetApiResource,
)
from core.file import helpers as file_helpers
from extensions.ext_database import db
from models.dataset import Dataset
from models.model import UploadFile
from services.dataset_service import DocumentService
@service_api_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/upload-file")
class UploadFileApi(DatasetApiResource):
@service_api_ns.doc("get_upload_file")
@service_api_ns.doc(description="Get upload file information and download URL")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@service_api_ns.doc(
responses={
200: "Upload file information retrieved successfully",
401: "Unauthorized - invalid API token",
404: "Dataset, document, or upload file not found",
}
)
def get(self, tenant_id, dataset_id, document_id):
"""Get upload file information and download URL.
Returns information about an uploaded file including its download URL.
"""
# check dataset
dataset_id = str(dataset_id)
tenant_id = str(tenant_id)
dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
if not dataset:
raise NotFound("Dataset not found.")
# check document
document_id = str(document_id)
document = DocumentService.get_document(dataset.id, document_id)
if not document:
raise NotFound("Document not found.")
# check upload file
if document.data_source_type != "upload_file":
raise ValueError(f"Document data source type ({document.data_source_type}) is not upload_file.")
data_source_info = document.data_source_info_dict
if data_source_info and "upload_file_id" in data_source_info:
file_id = data_source_info["upload_file_id"]
upload_file = db.session.query(UploadFile).where(UploadFile.id == file_id).first()
if not upload_file:
raise NotFound("UploadFile not found.")
else:
raise ValueError("Upload file id not found in document data source info.")
url = file_helpers.get_signed_file_url(upload_file_id=upload_file.id)
return {
"id": upload_file.id,
"name": upload_file.name,
"size": upload_file.size,
"extension": upload_file.extension,
"url": url,
"download_url": f"{url}&as_attachment=true",
"mime_type": upload_file.mime_type,
"created_by": upload_file.created_by,
"created_at": upload_file.created_at.timestamp(),
}, 200

View File

@ -19,7 +19,7 @@ class ModelProviderAvailableModelApi(Resource):
}
)
@validate_dataset_token
def get(self, _, model_type):
def get(self, _, model_type: str):
"""Get available models by model type.
Returns a list of available models for the specified model type.

View File

@ -3,7 +3,7 @@ from collections.abc import Callable
from datetime import timedelta
from enum import StrEnum, auto
from functools import wraps
from typing import Optional, ParamSpec, TypeVar
from typing import Concatenate, ParamSpec, TypeVar
from flask import current_app, request
from flask_login import user_logged_in
@ -13,18 +13,18 @@ from sqlalchemy import select, update
from sqlalchemy.orm import Session
from werkzeug.exceptions import Forbidden, NotFound, Unauthorized
from core.file.constants import DEFAULT_SERVICE_API_USER_ID
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from libs.datetime_utils import naive_utc_now
from libs.login import _get_user
from libs.login import current_user
from models.account import Account, Tenant, TenantAccountJoin, TenantStatus
from models.dataset import Dataset, RateLimitLog
from models.model import ApiToken, App, EndUser
from models.model import ApiToken, App, DefaultEndUserSessionID, EndUser
from services.feature_service import FeatureService
P = ParamSpec("P")
R = TypeVar("R")
T = TypeVar("T")
class WhereisUserArg(StrEnum):
@ -42,10 +42,10 @@ class FetchUserArg(BaseModel):
required: bool = False
def validate_app_token(view: Optional[Callable] = None, *, fetch_user_arg: Optional[FetchUserArg] = None):
def decorator(view_func):
def validate_app_token(view: Callable[P, R] | None = None, *, fetch_user_arg: FetchUserArg | None = None):
def decorator(view_func: Callable[P, R]):
@wraps(view_func)
def decorated_view(*args, **kwargs):
def decorated_view(*args: P.args, **kwargs: P.kwargs):
api_token = validate_and_get_api_token("app")
app_model = db.session.query(App).where(App.id == api_token.app_id).first()
@ -189,17 +189,17 @@ def cloud_edition_billing_rate_limit_check(resource: str, api_token_type: str):
return interceptor
def validate_dataset_token(view=None):
def decorator(view):
def validate_dataset_token(view: Callable[Concatenate[T, P], R] | None = None):
def decorator(view: Callable[Concatenate[T, P], R]):
@wraps(view)
def decorated(*args, **kwargs):
def decorated(*args: P.args, **kwargs: P.kwargs):
# get url path dataset_id from positional args or kwargs
# Flask passes URL path parameters as positional arguments
dataset_id = None
# First try to get from kwargs (explicit parameter)
dataset_id = kwargs.get("dataset_id")
# If not in kwargs, try to extract from positional args
if not dataset_id and args:
# For class methods: args[0] is self, args[1] is dataset_id (if exists)
@ -225,7 +225,7 @@ def validate_dataset_token(view=None):
dataset_id = str_id
except:
pass
# Validate dataset if dataset_id is provided
if dataset_id:
dataset_id = str(dataset_id)
@ -250,7 +250,7 @@ def validate_dataset_token(view=None):
if account:
account.current_tenant = tenant
current_app.login_manager._update_request_context_with_user(account) # type: ignore
user_logged_in.send(current_app._get_current_object(), user=_get_user()) # type: ignore
user_logged_in.send(current_app._get_current_object(), user=current_user) # type: ignore
else:
raise Unauthorized("Tenant owner account does not exist.")
else:
@ -308,12 +308,12 @@ def validate_and_get_api_token(scope: str | None = None):
return api_token
def create_or_update_end_user_for_user_id(app_model: App, user_id: Optional[str] = None) -> EndUser:
def create_or_update_end_user_for_user_id(app_model: App, user_id: str | None = None) -> EndUser:
"""
Create or update session terminal based on user ID.
"""
if not user_id:
user_id = DEFAULT_SERVICE_API_USER_ID
user_id = DefaultEndUserSessionID.DEFAULT_SESSION_ID.value
with Session(db.engine, expire_on_commit=False) as session:
end_user = (
@ -332,7 +332,7 @@ def create_or_update_end_user_for_user_id(app_model: App, user_id: Optional[str]
tenant_id=app_model.tenant_id,
app_id=app_model.id,
type="service_api",
is_anonymous=user_id == DEFAULT_SERVICE_API_USER_ID,
is_anonymous=user_id == DefaultEndUserSessionID.DEFAULT_SESSION_ID.value,
session_id=user_id,
)
session.add(end_user)