mirror of
https://github.com/langgenius/dify.git
synced 2026-01-20 20:19:28 +08:00
Compare commits
133 Commits
feat/suppo
...
deploy/dev
| Author | SHA1 | Date | |
|---|---|---|---|
| cd85bc7379 | |||
| 63d33fe93f | |||
| bfc88ffaef | |||
| b9fc549a33 | |||
| ee49d09c8d | |||
| c878094407 | |||
| 51e4756b64 | |||
| 38c72dca0d | |||
| d9ada31927 | |||
| da1f0ef7d1 | |||
| def861bb53 | |||
| 3ce371a63f | |||
| 8eb88eb2fa | |||
| b6ecfe6398 | |||
| 749984fcc9 | |||
| a1482c5587 | |||
| 3c22d22835 | |||
| 008a5f361d | |||
| 67efac5994 | |||
| eaa6f2759d | |||
| 08caa4fce3 | |||
| 5293fbe8ba | |||
| ed555c5fe7 | |||
| 22974ea6b0 | |||
| 754b01366a | |||
| 8af626092e | |||
| 49b3bad26b | |||
| 50616c25d4 | |||
| 3b4b5b332c | |||
| 62c3f14570 | |||
| 41c3b1c57c | |||
| 994357d8b5 | |||
| 5fb9fe3c94 | |||
| 4fb08ae7d2 | |||
| 7481762acb | |||
| fcb2fe55e7 | |||
| a0aa8cdb45 | |||
| ae8618877b | |||
| 1c55602445 | |||
| a3f1220d23 | |||
| 4d7384731e | |||
| d62e16b9bb | |||
| 13f2a43ccc | |||
| 553dd3266b | |||
| 5b0590d58e | |||
| d97f2df85c | |||
| d3c09f16a9 | |||
| fde8efa4a2 | |||
| 5f6d1297b0 | |||
| 869e70964f | |||
| 1f313eb15c | |||
| f02adc26e5 | |||
| 73027eab0a | |||
| 74245fea8e | |||
| 5bc4bba668 | |||
| 1126a2aa95 | |||
| 2107a3c32c | |||
| 22d0c55363 | |||
| 7c3ce7b1e6 | |||
| f4d20a02aa | |||
| 7eb65b07c8 | |||
| 830a7fb034 | |||
| 0421a6ac53 | |||
| 9b7e807690 | |||
| af86f8de6f | |||
| 5186ae54e9 | |||
| 1c3200058b | |||
| eb4212aacc | |||
| 086662b79c | |||
| fc81f11a02 | |||
| ca668fffa4 | |||
| adbfbebfc3 | |||
| ec78676949 | |||
| 01a7dbcee8 | |||
| 4fe8d2491e | |||
| 76da8b4ff3 | |||
| 25bfc1cc3b | |||
| 5c2ae922bc | |||
| 13eec13a14 | |||
| 425a0f9095 | |||
| 3d050f449c | |||
| 905a5b348d | |||
| 3e082e6976 | |||
| ce309bd008 | |||
| 5a2924e13a | |||
| ae3c71a91a | |||
| e7b84b88ba | |||
| 993731e6be | |||
| e153be61c5 | |||
| a01f2267fe | |||
| 3562452ee6 | |||
| 95761051f8 | |||
| 1fcf6e4943 | |||
| f4a7efde3d | |||
| 38d4f0fd96 | |||
| ec4f885dad | |||
| 3781c2a025 | |||
| 3782f17dc7 | |||
| 29698aeed2 | |||
| 15ff8efb15 | |||
| 407e1c8276 | |||
| e368825c21 | |||
| 8dad6b6a6d | |||
| 2f54965a72 | |||
| a1a3fa0283 | |||
| ff7344f3d3 | |||
| bcd33be22a | |||
| 754f1a3cfa | |||
| b22c28b099 | |||
| cd9e28dbf4 | |||
| 04f9637b6f | |||
| b8a29bfb35 | |||
| 5e2b0d7b39 | |||
| b483d5fad5 | |||
| 04196288f8 | |||
| cc349e70b1 | |||
| 50bdbfae69 | |||
| 2f45673694 | |||
| b5fb55069b | |||
| 7ba9d30775 | |||
| e69b588bad | |||
| aadac22ce4 | |||
| d12015c722 | |||
| 2641326432 | |||
| 20109553b9 | |||
| 0e1444d17c | |||
| 65d376bdae | |||
| e3c1310afa | |||
| 38da19a729 | |||
| 91110499dd | |||
| 4dca9a12a8 | |||
| 3e448f0102 | |||
| ca75a1c9a3 |
@ -965,6 +965,16 @@ class MailConfig(BaseSettings):
|
||||
default=None,
|
||||
)
|
||||
|
||||
ENABLE_TRIAL_APP: bool = Field(
|
||||
description="Enable trial app",
|
||||
default=False,
|
||||
)
|
||||
|
||||
ENABLE_EXPLORE_BANNER: bool = Field(
|
||||
description="Enable explore banner",
|
||||
default=False,
|
||||
)
|
||||
|
||||
|
||||
class RagEtlConfig(BaseSettings):
|
||||
"""
|
||||
|
||||
@ -107,10 +107,12 @@ from .datasets.rag_pipeline import (
|
||||
|
||||
# Import explore controllers
|
||||
from .explore import (
|
||||
banner,
|
||||
installed_app,
|
||||
parameter,
|
||||
recommended_app,
|
||||
saved_message,
|
||||
trial,
|
||||
)
|
||||
|
||||
# Import tag controllers
|
||||
@ -145,6 +147,7 @@ __all__ = [
|
||||
"apikey",
|
||||
"app",
|
||||
"audio",
|
||||
"banner",
|
||||
"billing",
|
||||
"bp",
|
||||
"completion",
|
||||
@ -198,6 +201,7 @@ __all__ = [
|
||||
"statistic",
|
||||
"tags",
|
||||
"tool_providers",
|
||||
"trial",
|
||||
"trigger_providers",
|
||||
"version",
|
||||
"website",
|
||||
|
||||
@ -15,7 +15,7 @@ from controllers.console.wraps import only_edition_cloud
|
||||
from core.db.session_factory import session_factory
|
||||
from extensions.ext_database import db
|
||||
from libs.token import extract_access_token
|
||||
from models.model import App, InstalledApp, RecommendedApp
|
||||
from models.model import App, ExporleBanner, InstalledApp, RecommendedApp, TrialApp
|
||||
|
||||
P = ParamSpec("P")
|
||||
R = TypeVar("R")
|
||||
@ -32,6 +32,8 @@ class InsertExploreAppPayload(BaseModel):
|
||||
language: str = Field(...)
|
||||
category: str = Field(...)
|
||||
position: int = Field(...)
|
||||
can_trial: bool = Field(default=False)
|
||||
trial_limit: int = Field(default=0)
|
||||
|
||||
@field_validator("language")
|
||||
@classmethod
|
||||
@ -39,11 +41,33 @@ class InsertExploreAppPayload(BaseModel):
|
||||
return supported_language(value)
|
||||
|
||||
|
||||
class InsertExploreBannerPayload(BaseModel):
|
||||
category: str = Field(...)
|
||||
title: str = Field(...)
|
||||
description: str = Field(...)
|
||||
img_src: str = Field(..., alias="img-src")
|
||||
language: str = Field(default="en-US")
|
||||
link: str = Field(...)
|
||||
sort: int = Field(...)
|
||||
|
||||
@field_validator("language")
|
||||
@classmethod
|
||||
def validate_language(cls, value: str) -> str:
|
||||
return supported_language(value)
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
|
||||
console_ns.schema_model(
|
||||
InsertExploreAppPayload.__name__,
|
||||
InsertExploreAppPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
|
||||
)
|
||||
|
||||
console_ns.schema_model(
|
||||
InsertExploreBannerPayload.__name__,
|
||||
InsertExploreBannerPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
|
||||
)
|
||||
|
||||
|
||||
def admin_required(view: Callable[P, R]):
|
||||
@wraps(view)
|
||||
@ -109,6 +133,20 @@ class InsertExploreAppListApi(Resource):
|
||||
)
|
||||
|
||||
db.session.add(recommended_app)
|
||||
if payload.can_trial:
|
||||
trial_app = db.session.execute(
|
||||
select(TrialApp).where(TrialApp.app_id == payload.app_id)
|
||||
).scalar_one_or_none()
|
||||
if not trial_app:
|
||||
db.session.add(
|
||||
TrialApp(
|
||||
app_id=payload.app_id,
|
||||
tenant_id=app.tenant_id,
|
||||
trial_limit=payload.trial_limit,
|
||||
)
|
||||
)
|
||||
else:
|
||||
trial_app.trial_limit = payload.trial_limit
|
||||
|
||||
app.is_public = True
|
||||
db.session.commit()
|
||||
@ -123,6 +161,20 @@ class InsertExploreAppListApi(Resource):
|
||||
recommended_app.category = payload.category
|
||||
recommended_app.position = payload.position
|
||||
|
||||
if payload.can_trial:
|
||||
trial_app = db.session.execute(
|
||||
select(TrialApp).where(TrialApp.app_id == payload.app_id)
|
||||
).scalar_one_or_none()
|
||||
if not trial_app:
|
||||
db.session.add(
|
||||
TrialApp(
|
||||
app_id=payload.app_id,
|
||||
tenant_id=app.tenant_id,
|
||||
trial_limit=payload.trial_limit,
|
||||
)
|
||||
)
|
||||
else:
|
||||
trial_app.trial_limit = payload.trial_limit
|
||||
app.is_public = True
|
||||
|
||||
db.session.commit()
|
||||
@ -168,7 +220,62 @@ class InsertExploreAppApi(Resource):
|
||||
for installed_app in installed_apps:
|
||||
session.delete(installed_app)
|
||||
|
||||
trial_app = session.execute(
|
||||
select(TrialApp).where(TrialApp.app_id == recommended_app.app_id)
|
||||
).scalar_one_or_none()
|
||||
if trial_app:
|
||||
session.delete(trial_app)
|
||||
|
||||
db.session.delete(recommended_app)
|
||||
db.session.commit()
|
||||
|
||||
return {"result": "success"}, 204
|
||||
|
||||
|
||||
@console_ns.route("/admin/insert-explore-banner")
|
||||
class InsertExploreBannerApi(Resource):
|
||||
@console_ns.doc("insert_explore_banner")
|
||||
@console_ns.doc(description="Insert an explore banner")
|
||||
@console_ns.expect(console_ns.models[InsertExploreBannerPayload.__name__])
|
||||
@console_ns.response(201, "Banner inserted successfully")
|
||||
@only_edition_cloud
|
||||
@admin_required
|
||||
def post(self):
|
||||
payload = InsertExploreBannerPayload.model_validate(console_ns.payload)
|
||||
|
||||
content = {
|
||||
"category": payload.category,
|
||||
"title": payload.title,
|
||||
"description": payload.description,
|
||||
"img-src": payload.img_src,
|
||||
}
|
||||
|
||||
banner = ExporleBanner(
|
||||
content=content,
|
||||
link=payload.link,
|
||||
sort=payload.sort,
|
||||
language=payload.language,
|
||||
)
|
||||
db.session.add(banner)
|
||||
db.session.commit()
|
||||
|
||||
return {"result": "success"}, 201
|
||||
|
||||
|
||||
@console_ns.route("/admin/delete-explore-banner/<uuid:banner_id>")
|
||||
class DeleteExploreBannerApi(Resource):
|
||||
@console_ns.doc("delete_explore_banner")
|
||||
@console_ns.doc(description="Delete an explore banner")
|
||||
@console_ns.doc(params={"banner_id": "Banner ID to delete"})
|
||||
@console_ns.response(204, "Banner deleted successfully")
|
||||
@only_edition_cloud
|
||||
@admin_required
|
||||
def delete(self, banner_id):
|
||||
banner = db.session.execute(select(ExporleBanner).where(ExporleBanner.id == banner_id)).scalar_one_or_none()
|
||||
if not banner:
|
||||
raise NotFound(f"Banner '{banner_id}' is not found")
|
||||
|
||||
db.session.delete(banner)
|
||||
db.session.commit()
|
||||
|
||||
return {"result": "success"}, 204
|
||||
|
||||
@ -115,3 +115,9 @@ class InvokeRateLimitError(BaseHTTPException):
|
||||
error_code = "rate_limit_error"
|
||||
description = "Rate Limit Error"
|
||||
code = 429
|
||||
|
||||
|
||||
class NeedAddIdsError(BaseHTTPException):
|
||||
error_code = "need_add_ids"
|
||||
description = "Need to add ids."
|
||||
code = 400
|
||||
|
||||
@ -23,6 +23,11 @@ def _load_app_model(app_id: str) -> App | None:
|
||||
return app_model
|
||||
|
||||
|
||||
def _load_app_model_with_trial(app_id: str) -> App | None:
|
||||
app_model = db.session.query(App).where(App.id == app_id, App.status == "normal").first()
|
||||
return app_model
|
||||
|
||||
|
||||
def get_app_model(view: Callable[P, R] | None = None, *, mode: Union[AppMode, list[AppMode], None] = None):
|
||||
def decorator(view_func: Callable[P1, R1]):
|
||||
@wraps(view_func)
|
||||
@ -62,3 +67,44 @@ def get_app_model(view: Callable[P, R] | None = None, *, mode: Union[AppMode, li
|
||||
return decorator
|
||||
else:
|
||||
return decorator(view)
|
||||
|
||||
|
||||
def get_app_model_with_trial(view: Callable[P, R] | None = None, *, mode: Union[AppMode, list[AppMode], None] = None):
|
||||
def decorator(view_func: Callable[P, R]):
|
||||
@wraps(view_func)
|
||||
def decorated_view(*args: P.args, **kwargs: P.kwargs):
|
||||
if not kwargs.get("app_id"):
|
||||
raise ValueError("missing app_id in path parameters")
|
||||
|
||||
app_id = kwargs.get("app_id")
|
||||
app_id = str(app_id)
|
||||
|
||||
del kwargs["app_id"]
|
||||
|
||||
app_model = _load_app_model_with_trial(app_id)
|
||||
|
||||
if not app_model:
|
||||
raise AppNotFoundError()
|
||||
|
||||
app_mode = AppMode.value_of(app_model.mode)
|
||||
|
||||
if mode is not None:
|
||||
if isinstance(mode, list):
|
||||
modes = mode
|
||||
else:
|
||||
modes = [mode]
|
||||
|
||||
if app_mode not in modes:
|
||||
mode_values = {m.value for m in modes}
|
||||
raise AppNotFoundError(f"App mode is not in the supported list: {mode_values}")
|
||||
|
||||
kwargs["app_model"] = app_model
|
||||
|
||||
return view_func(*args, **kwargs)
|
||||
|
||||
return decorated_view
|
||||
|
||||
if view is None:
|
||||
return decorator
|
||||
else:
|
||||
return decorator(view)
|
||||
|
||||
@ -146,6 +146,7 @@ class DatasetUpdatePayload(BaseModel):
|
||||
embedding_model: str | None = None
|
||||
embedding_model_provider: str | None = None
|
||||
retrieval_model: dict[str, Any] | None = None
|
||||
summary_index_setting: dict[str, Any] | None = None
|
||||
partial_member_list: list[dict[str, str]] | None = None
|
||||
external_retrieval_model: dict[str, Any] | None = None
|
||||
external_knowledge_id: str | None = None
|
||||
|
||||
@ -41,10 +41,11 @@ from fields.document_fields import (
|
||||
from libs.datetime_utils import naive_utc_now
|
||||
from libs.login import current_account_with_tenant, login_required
|
||||
from models import DatasetProcessRule, Document, DocumentSegment, UploadFile
|
||||
from models.dataset import DocumentPipelineExecutionLog
|
||||
from models.dataset import DocumentPipelineExecutionLog, DocumentSegmentSummary
|
||||
from services.dataset_service import DatasetService, DocumentService
|
||||
from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig, ProcessRule, RetrievalModel
|
||||
from services.file_service import FileService
|
||||
from tasks.generate_summary_index_task import generate_summary_index_task
|
||||
|
||||
from ..app.error import (
|
||||
ProviderModelCurrentlyNotSupportError,
|
||||
@ -110,6 +111,10 @@ class DocumentRenamePayload(BaseModel):
|
||||
name: str
|
||||
|
||||
|
||||
class GenerateSummaryPayload(BaseModel):
|
||||
document_list: list[str]
|
||||
|
||||
|
||||
class DocumentBatchDownloadZipPayload(BaseModel):
|
||||
"""Request payload for bulk downloading documents as a zip archive."""
|
||||
|
||||
@ -132,6 +137,7 @@ register_schema_models(
|
||||
RetrievalModel,
|
||||
DocumentRetryPayload,
|
||||
DocumentRenamePayload,
|
||||
GenerateSummaryPayload,
|
||||
DocumentBatchDownloadZipPayload,
|
||||
)
|
||||
|
||||
@ -319,6 +325,89 @@ class DatasetDocumentListApi(Resource):
|
||||
|
||||
paginated_documents = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False)
|
||||
documents = paginated_documents.items
|
||||
|
||||
# Check if dataset has summary index enabled
|
||||
has_summary_index = dataset.summary_index_setting and dataset.summary_index_setting.get("enable") is True
|
||||
|
||||
# Filter documents that need summary calculation
|
||||
documents_need_summary = [doc for doc in documents if doc.need_summary is True]
|
||||
document_ids_need_summary = [str(doc.id) for doc in documents_need_summary]
|
||||
|
||||
# Calculate summary_index_status for documents that need summary (only if dataset summary index is enabled)
|
||||
summary_status_map = {}
|
||||
if has_summary_index and document_ids_need_summary:
|
||||
# Get all segments for these documents (excluding qa_model and re_segment)
|
||||
segments = (
|
||||
db.session.query(DocumentSegment.id, DocumentSegment.document_id)
|
||||
.where(
|
||||
DocumentSegment.document_id.in_(document_ids_need_summary),
|
||||
DocumentSegment.status != "re_segment",
|
||||
DocumentSegment.tenant_id == current_tenant_id,
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Group segments by document_id
|
||||
document_segments_map = {}
|
||||
for segment in segments:
|
||||
doc_id = str(segment.document_id)
|
||||
if doc_id not in document_segments_map:
|
||||
document_segments_map[doc_id] = []
|
||||
document_segments_map[doc_id].append(segment.id)
|
||||
|
||||
# Get all summary records for these segments
|
||||
all_segment_ids = [seg.id for seg in segments]
|
||||
summaries = {}
|
||||
if all_segment_ids:
|
||||
summary_records = (
|
||||
db.session.query(DocumentSegmentSummary)
|
||||
.where(
|
||||
DocumentSegmentSummary.chunk_id.in_(all_segment_ids),
|
||||
DocumentSegmentSummary.dataset_id == dataset_id,
|
||||
DocumentSegmentSummary.enabled == True, # Only count enabled summaries
|
||||
)
|
||||
.all()
|
||||
)
|
||||
summaries = {summary.chunk_id: summary.status for summary in summary_records}
|
||||
|
||||
# Calculate summary_index_status for each document
|
||||
for doc_id in document_ids_need_summary:
|
||||
segment_ids = document_segments_map.get(doc_id, [])
|
||||
if not segment_ids:
|
||||
# No segments, status is None (not started)
|
||||
summary_status_map[doc_id] = None
|
||||
continue
|
||||
|
||||
# Count summary statuses for this document's segments
|
||||
status_counts = {"completed": 0, "generating": 0, "error": 0, "not_started": 0}
|
||||
for segment_id in segment_ids:
|
||||
status = summaries.get(segment_id, "not_started")
|
||||
if status in status_counts:
|
||||
status_counts[status] += 1
|
||||
else:
|
||||
status_counts["not_started"] += 1
|
||||
|
||||
generating_count = status_counts["generating"]
|
||||
|
||||
# Determine overall status:
|
||||
# - "SUMMARIZING" only when task is queued and at least one summary is generating
|
||||
# - None (empty) for all other cases (not queued, all completed/error)
|
||||
if generating_count > 0:
|
||||
# Task is queued and at least one summary is still generating
|
||||
summary_status_map[doc_id] = "SUMMARIZING"
|
||||
else:
|
||||
# Task not queued yet, or all summaries are completed/error (task finished)
|
||||
summary_status_map[doc_id] = None
|
||||
|
||||
# Add summary_index_status to each document
|
||||
for document in documents:
|
||||
if has_summary_index and document.need_summary is True:
|
||||
# Get status from map, default to None (not queued yet)
|
||||
document.summary_index_status = summary_status_map.get(str(document.id))
|
||||
else:
|
||||
# Return null if summary index is not enabled or document doesn't need summary
|
||||
document.summary_index_status = None
|
||||
|
||||
if fetch:
|
||||
for document in documents:
|
||||
completed_segments = (
|
||||
@ -804,6 +893,7 @@ class DocumentApi(DocumentResource):
|
||||
"display_status": document.display_status,
|
||||
"doc_form": document.doc_form,
|
||||
"doc_language": document.doc_language,
|
||||
"need_summary": document.need_summary if document.need_summary is not None else False,
|
||||
}
|
||||
else:
|
||||
dataset_process_rules = DatasetService.get_process_rules(dataset_id)
|
||||
@ -839,6 +929,7 @@ class DocumentApi(DocumentResource):
|
||||
"display_status": document.display_status,
|
||||
"doc_form": document.doc_form,
|
||||
"doc_language": document.doc_language,
|
||||
"need_summary": document.need_summary if document.need_summary is not None else False,
|
||||
}
|
||||
|
||||
return response, 200
|
||||
@ -1262,3 +1353,216 @@ class DocumentPipelineExecutionLogApi(DocumentResource):
|
||||
"input_data": log.input_data,
|
||||
"datasource_node_id": log.datasource_node_id,
|
||||
}, 200
|
||||
|
||||
|
||||
@console_ns.route("/datasets/<uuid:dataset_id>/documents/generate-summary")
|
||||
class DocumentGenerateSummaryApi(Resource):
|
||||
@console_ns.doc("generate_summary_for_documents")
|
||||
@console_ns.doc(description="Generate summary index for documents")
|
||||
@console_ns.doc(params={"dataset_id": "Dataset ID"})
|
||||
@console_ns.expect(console_ns.models[GenerateSummaryPayload.__name__])
|
||||
@console_ns.response(200, "Summary generation started successfully")
|
||||
@console_ns.response(400, "Invalid request or dataset configuration")
|
||||
@console_ns.response(403, "Permission denied")
|
||||
@console_ns.response(404, "Dataset not found")
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_rate_limit_check("knowledge")
|
||||
def post(self, dataset_id):
|
||||
"""
|
||||
Generate summary index for specified documents.
|
||||
|
||||
This endpoint checks if the dataset configuration supports summary generation
|
||||
(indexing_technique must be 'high_quality' and summary_index_setting.enable must be true),
|
||||
then asynchronously generates summary indexes for the provided documents.
|
||||
"""
|
||||
current_user, _ = current_account_with_tenant()
|
||||
dataset_id = str(dataset_id)
|
||||
|
||||
# Get dataset
|
||||
dataset = DatasetService.get_dataset(dataset_id)
|
||||
if not dataset:
|
||||
raise NotFound("Dataset not found.")
|
||||
|
||||
# Check permissions
|
||||
if not current_user.is_dataset_editor:
|
||||
raise Forbidden()
|
||||
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
# Validate request payload
|
||||
payload = GenerateSummaryPayload.model_validate(console_ns.payload or {})
|
||||
document_list = payload.document_list
|
||||
|
||||
if not document_list:
|
||||
raise ValueError("document_list cannot be empty.")
|
||||
|
||||
# Check if dataset configuration supports summary generation
|
||||
if dataset.indexing_technique != "high_quality":
|
||||
raise ValueError(
|
||||
f"Summary generation is only available for 'high_quality' indexing technique. "
|
||||
f"Current indexing technique: {dataset.indexing_technique}"
|
||||
)
|
||||
|
||||
summary_index_setting = dataset.summary_index_setting
|
||||
if not summary_index_setting or not summary_index_setting.get("enable"):
|
||||
raise ValueError("Summary index is not enabled for this dataset. Please enable it in the dataset settings.")
|
||||
|
||||
# Verify all documents exist and belong to the dataset
|
||||
documents = (
|
||||
db.session.query(Document)
|
||||
.filter(
|
||||
Document.id.in_(document_list),
|
||||
Document.dataset_id == dataset_id,
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
if len(documents) != len(document_list):
|
||||
found_ids = {doc.id for doc in documents}
|
||||
missing_ids = set(document_list) - found_ids
|
||||
raise NotFound(f"Some documents not found: {list(missing_ids)}")
|
||||
|
||||
# Dispatch async tasks for each document
|
||||
for document in documents:
|
||||
# Skip qa_model documents as they don't generate summaries
|
||||
if document.doc_form == "qa_model":
|
||||
logger.info("Skipping summary generation for qa_model document %s", document.id)
|
||||
continue
|
||||
|
||||
# Dispatch async task
|
||||
generate_summary_index_task(dataset_id, document.id)
|
||||
logger.info(
|
||||
"Dispatched summary generation task for document %s in dataset %s",
|
||||
document.id,
|
||||
dataset_id,
|
||||
)
|
||||
|
||||
return {"result": "success"}, 200
|
||||
|
||||
|
||||
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/summary-status")
|
||||
class DocumentSummaryStatusApi(DocumentResource):
|
||||
@console_ns.doc("get_document_summary_status")
|
||||
@console_ns.doc(description="Get summary index generation status for a document")
|
||||
@console_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
|
||||
@console_ns.response(200, "Summary status retrieved successfully")
|
||||
@console_ns.response(404, "Document not found")
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, dataset_id, document_id):
|
||||
"""
|
||||
Get summary index generation status for a document.
|
||||
|
||||
Returns:
|
||||
- total_segments: Total number of segments in the document
|
||||
- summary_status: Dictionary with status counts
|
||||
- completed: Number of summaries completed
|
||||
- generating: Number of summaries being generated
|
||||
- error: Number of summaries with errors
|
||||
- not_started: Number of segments without summary records
|
||||
- summaries: List of summary records with status and content preview
|
||||
"""
|
||||
current_user, _ = current_account_with_tenant()
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
|
||||
# Get document
|
||||
document = self.get_document(dataset_id, document_id)
|
||||
|
||||
# Get dataset
|
||||
dataset = DatasetService.get_dataset(dataset_id)
|
||||
if not dataset:
|
||||
raise NotFound("Dataset not found.")
|
||||
|
||||
# Check permissions
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
# Get all segments for this document
|
||||
segments = (
|
||||
db.session.query(DocumentSegment)
|
||||
.filter(
|
||||
DocumentSegment.document_id == document_id,
|
||||
DocumentSegment.dataset_id == dataset_id,
|
||||
DocumentSegment.status == "completed",
|
||||
DocumentSegment.enabled == True,
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
total_segments = len(segments)
|
||||
|
||||
# Get all summary records for these segments
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
summaries = []
|
||||
if segment_ids:
|
||||
summaries = (
|
||||
db.session.query(DocumentSegmentSummary)
|
||||
.filter(
|
||||
DocumentSegmentSummary.document_id == document_id,
|
||||
DocumentSegmentSummary.dataset_id == dataset_id,
|
||||
DocumentSegmentSummary.chunk_id.in_(segment_ids),
|
||||
DocumentSegmentSummary.enabled == True, # Only return enabled summaries
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Create a mapping of chunk_id to summary
|
||||
summary_map = {summary.chunk_id: summary for summary in summaries}
|
||||
|
||||
# Count statuses
|
||||
status_counts = {
|
||||
"completed": 0,
|
||||
"generating": 0,
|
||||
"error": 0,
|
||||
"not_started": 0,
|
||||
}
|
||||
|
||||
summary_list = []
|
||||
for segment in segments:
|
||||
summary = summary_map.get(segment.id)
|
||||
if summary:
|
||||
status = summary.status
|
||||
status_counts[status] = status_counts.get(status, 0) + 1
|
||||
summary_list.append(
|
||||
{
|
||||
"segment_id": segment.id,
|
||||
"segment_position": segment.position,
|
||||
"status": summary.status,
|
||||
"summary_preview": (
|
||||
summary.summary_content[:100] + "..."
|
||||
if summary.summary_content and len(summary.summary_content) > 100
|
||||
else summary.summary_content
|
||||
),
|
||||
"error": summary.error,
|
||||
"created_at": int(summary.created_at.timestamp()) if summary.created_at else None,
|
||||
"updated_at": int(summary.updated_at.timestamp()) if summary.updated_at else None,
|
||||
}
|
||||
)
|
||||
else:
|
||||
status_counts["not_started"] += 1
|
||||
summary_list.append(
|
||||
{
|
||||
"segment_id": segment.id,
|
||||
"segment_position": segment.position,
|
||||
"status": "not_started",
|
||||
"summary_preview": None,
|
||||
"error": None,
|
||||
"created_at": None,
|
||||
"updated_at": None,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"total_segments": total_segments,
|
||||
"summary_status": status_counts,
|
||||
"summaries": summary_list,
|
||||
}, 200
|
||||
|
||||
@ -32,7 +32,7 @@ from extensions.ext_redis import redis_client
|
||||
from fields.segment_fields import child_chunk_fields, segment_fields
|
||||
from libs.helper import escape_like_pattern
|
||||
from libs.login import current_account_with_tenant, login_required
|
||||
from models.dataset import ChildChunk, DocumentSegment
|
||||
from models.dataset import ChildChunk, DocumentSegment, DocumentSegmentSummary
|
||||
from models.model import UploadFile
|
||||
from services.dataset_service import DatasetService, DocumentService, SegmentService
|
||||
from services.entities.knowledge_entities.knowledge_entities import ChildChunkUpdateArgs, SegmentUpdateArgs
|
||||
@ -41,6 +41,23 @@ from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingS
|
||||
from tasks.batch_create_segment_to_index_task import batch_create_segment_to_index_task
|
||||
|
||||
|
||||
def _get_segment_with_summary(segment, dataset_id):
|
||||
"""Helper function to marshal segment and add summary information."""
|
||||
segment_dict = marshal(segment, segment_fields)
|
||||
# Query summary for this segment (only enabled summaries)
|
||||
summary = (
|
||||
db.session.query(DocumentSegmentSummary)
|
||||
.where(
|
||||
DocumentSegmentSummary.chunk_id == segment.id,
|
||||
DocumentSegmentSummary.dataset_id == dataset_id,
|
||||
DocumentSegmentSummary.enabled == True, # Only return enabled summaries
|
||||
)
|
||||
.first()
|
||||
)
|
||||
segment_dict["summary"] = summary.summary_content if summary else None
|
||||
return segment_dict
|
||||
|
||||
|
||||
class SegmentListQuery(BaseModel):
|
||||
limit: int = Field(default=20, ge=1, le=100)
|
||||
status: list[str] = Field(default_factory=list)
|
||||
@ -63,6 +80,7 @@ class SegmentUpdatePayload(BaseModel):
|
||||
keywords: list[str] | None = None
|
||||
regenerate_child_chunks: bool = False
|
||||
attachment_ids: list[str] | None = None
|
||||
summary: str | None = None # Summary content for summary index
|
||||
|
||||
|
||||
class BatchImportPayload(BaseModel):
|
||||
@ -180,8 +198,32 @@ class DatasetDocumentSegmentListApi(Resource):
|
||||
|
||||
segments = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False)
|
||||
|
||||
# Query summaries for all segments in this page (batch query for efficiency)
|
||||
segment_ids = [segment.id for segment in segments.items]
|
||||
summaries = {}
|
||||
if segment_ids:
|
||||
summary_records = (
|
||||
db.session.query(DocumentSegmentSummary)
|
||||
.where(
|
||||
DocumentSegmentSummary.chunk_id.in_(segment_ids),
|
||||
DocumentSegmentSummary.dataset_id == dataset_id,
|
||||
)
|
||||
.all()
|
||||
)
|
||||
# Only include enabled summaries
|
||||
summaries = {
|
||||
summary.chunk_id: summary.summary_content for summary in summary_records if summary.enabled is True
|
||||
}
|
||||
|
||||
# Add summary to each segment
|
||||
segments_with_summary = []
|
||||
for segment in segments.items:
|
||||
segment_dict = marshal(segment, segment_fields)
|
||||
segment_dict["summary"] = summaries.get(segment.id)
|
||||
segments_with_summary.append(segment_dict)
|
||||
|
||||
response = {
|
||||
"data": marshal(segments.items, segment_fields),
|
||||
"data": segments_with_summary,
|
||||
"limit": limit,
|
||||
"total": segments.total,
|
||||
"total_pages": segments.pages,
|
||||
@ -327,7 +369,7 @@ class DatasetDocumentSegmentAddApi(Resource):
|
||||
payload_dict = payload.model_dump(exclude_none=True)
|
||||
SegmentService.segment_create_args_validate(payload_dict, document)
|
||||
segment = SegmentService.create_segment(payload_dict, document, dataset)
|
||||
return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
|
||||
return {"data": _get_segment_with_summary(segment, dataset_id), "doc_form": document.doc_form}, 200
|
||||
|
||||
|
||||
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>")
|
||||
@ -389,10 +431,12 @@ class DatasetDocumentSegmentUpdateApi(Resource):
|
||||
payload = SegmentUpdatePayload.model_validate(console_ns.payload or {})
|
||||
payload_dict = payload.model_dump(exclude_none=True)
|
||||
SegmentService.segment_create_args_validate(payload_dict, document)
|
||||
|
||||
# Update segment (summary update with change detection is handled in SegmentService.update_segment)
|
||||
segment = SegmentService.update_segment(
|
||||
SegmentUpdateArgs.model_validate(payload.model_dump(exclude_none=True)), segment, document, dataset
|
||||
)
|
||||
return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
|
||||
return {"data": _get_segment_with_summary(segment, dataset_id), "doc_form": document.doc_form}, 200
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
|
||||
@ -1,6 +1,13 @@
|
||||
from flask_restx import Resource
|
||||
from flask_restx import Resource, fields
|
||||
|
||||
from controllers.common.schema import register_schema_model
|
||||
from fields.hit_testing_fields import (
|
||||
child_chunk_fields,
|
||||
document_fields,
|
||||
files_fields,
|
||||
hit_testing_record_fields,
|
||||
segment_fields,
|
||||
)
|
||||
from libs.login import login_required
|
||||
|
||||
from .. import console_ns
|
||||
@ -14,13 +21,45 @@ from ..wraps import (
|
||||
register_schema_model(console_ns, HitTestingPayload)
|
||||
|
||||
|
||||
def _get_or_create_model(model_name: str, field_def):
|
||||
"""Get or create a flask_restx model to avoid dict type issues in Swagger."""
|
||||
existing = console_ns.models.get(model_name)
|
||||
if existing is None:
|
||||
existing = console_ns.model(model_name, field_def)
|
||||
return existing
|
||||
|
||||
|
||||
# Register models for flask_restx to avoid dict type issues in Swagger
|
||||
document_model = _get_or_create_model("HitTestingDocument", document_fields)
|
||||
|
||||
segment_fields_copy = segment_fields.copy()
|
||||
segment_fields_copy["document"] = fields.Nested(document_model)
|
||||
segment_model = _get_or_create_model("HitTestingSegment", segment_fields_copy)
|
||||
|
||||
child_chunk_model = _get_or_create_model("HitTestingChildChunk", child_chunk_fields)
|
||||
files_model = _get_or_create_model("HitTestingFile", files_fields)
|
||||
|
||||
hit_testing_record_fields_copy = hit_testing_record_fields.copy()
|
||||
hit_testing_record_fields_copy["segment"] = fields.Nested(segment_model)
|
||||
hit_testing_record_fields_copy["child_chunks"] = fields.List(fields.Nested(child_chunk_model))
|
||||
hit_testing_record_fields_copy["files"] = fields.List(fields.Nested(files_model))
|
||||
hit_testing_record_model = _get_or_create_model("HitTestingRecord", hit_testing_record_fields_copy)
|
||||
|
||||
# Response model for hit testing API
|
||||
hit_testing_response_fields = {
|
||||
"query": fields.String,
|
||||
"records": fields.List(fields.Nested(hit_testing_record_model)),
|
||||
}
|
||||
hit_testing_response_model = _get_or_create_model("HitTestingResponse", hit_testing_response_fields)
|
||||
|
||||
|
||||
@console_ns.route("/datasets/<uuid:dataset_id>/hit-testing")
|
||||
class HitTestingApi(Resource, DatasetsHitTestingBase):
|
||||
@console_ns.doc("test_dataset_retrieval")
|
||||
@console_ns.doc(description="Test dataset knowledge retrieval")
|
||||
@console_ns.doc(params={"dataset_id": "Dataset ID"})
|
||||
@console_ns.expect(console_ns.models[HitTestingPayload.__name__])
|
||||
@console_ns.response(200, "Hit testing completed successfully")
|
||||
@console_ns.response(200, "Hit testing completed successfully", model=hit_testing_response_model)
|
||||
@console_ns.response(404, "Dataset not found")
|
||||
@console_ns.response(400, "Invalid parameters")
|
||||
@setup_required
|
||||
|
||||
43
api/controllers/console/explore/banner.py
Normal file
43
api/controllers/console/explore/banner.py
Normal file
@ -0,0 +1,43 @@
|
||||
from flask import request
|
||||
from flask_restx import Resource
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.explore.wraps import explore_banner_enabled
|
||||
from extensions.ext_database import db
|
||||
from models.model import ExporleBanner
|
||||
|
||||
|
||||
class BannerApi(Resource):
|
||||
"""Resource for banner list."""
|
||||
|
||||
@explore_banner_enabled
|
||||
def get(self):
|
||||
"""Get banner list."""
|
||||
language = request.args.get("language", "en-US")
|
||||
|
||||
# Build base query for enabled banners
|
||||
base_query = db.session.query(ExporleBanner).where(ExporleBanner.status == "enabled")
|
||||
|
||||
# Try to get banners in the requested language
|
||||
banners = base_query.where(ExporleBanner.language == language).order_by(ExporleBanner.sort).all()
|
||||
|
||||
# Fallback to en-US if no banners found and language is not en-US
|
||||
if not banners and language != "en-US":
|
||||
banners = base_query.where(ExporleBanner.language == "en-US").order_by(ExporleBanner.sort).all()
|
||||
# Convert banners to serializable format
|
||||
result = []
|
||||
for banner in banners:
|
||||
banner_data = {
|
||||
"id": banner.id,
|
||||
"content": banner.content, # Already parsed as JSON by SQLAlchemy
|
||||
"link": banner.link,
|
||||
"sort": banner.sort,
|
||||
"status": banner.status,
|
||||
"created_at": banner.created_at.isoformat() if banner.created_at else None,
|
||||
}
|
||||
result.append(banner_data)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
api.add_resource(BannerApi, "/explore/banners")
|
||||
@ -29,3 +29,25 @@ class AppAccessDeniedError(BaseHTTPException):
|
||||
error_code = "access_denied"
|
||||
description = "App access denied."
|
||||
code = 403
|
||||
|
||||
|
||||
class TrialAppNotAllowed(BaseHTTPException):
|
||||
"""*403* `Trial App Not Allowed`
|
||||
|
||||
Raise if the user has reached the trial app limit.
|
||||
"""
|
||||
|
||||
error_code = "trial_app_not_allowed"
|
||||
code = 403
|
||||
description = "the app is not allowed to be trial."
|
||||
|
||||
|
||||
class TrialAppLimitExceeded(BaseHTTPException):
|
||||
"""*403* `Trial App Limit Exceeded`
|
||||
|
||||
Raise if the user has exceeded the trial app limit.
|
||||
"""
|
||||
|
||||
error_code = "trial_app_limit_exceeded"
|
||||
code = 403
|
||||
description = "The user has exceeded the trial app limit."
|
||||
|
||||
@ -29,6 +29,7 @@ recommended_app_fields = {
|
||||
"category": fields.String,
|
||||
"position": fields.Integer,
|
||||
"is_listed": fields.Boolean,
|
||||
"can_trial": fields.Boolean,
|
||||
}
|
||||
|
||||
recommended_app_list_fields = {
|
||||
|
||||
512
api/controllers/console/explore/trial.py
Normal file
512
api/controllers/console/explore/trial.py
Normal file
@ -0,0 +1,512 @@
|
||||
import logging
|
||||
from typing import Any, cast
|
||||
|
||||
from flask import request
|
||||
from flask_restx import Resource, marshal, marshal_with, reqparse
|
||||
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
|
||||
|
||||
import services
|
||||
from controllers.common.fields import Parameters as ParametersResponse
|
||||
from controllers.common.fields import Site as SiteResponse
|
||||
from controllers.console import api
|
||||
from controllers.console.app.error import (
|
||||
AppUnavailableError,
|
||||
AudioTooLargeError,
|
||||
CompletionRequestError,
|
||||
ConversationCompletedError,
|
||||
NeedAddIdsError,
|
||||
NoAudioUploadedError,
|
||||
ProviderModelCurrentlyNotSupportError,
|
||||
ProviderNotInitializeError,
|
||||
ProviderNotSupportSpeechToTextError,
|
||||
ProviderQuotaExceededError,
|
||||
UnsupportedAudioTypeError,
|
||||
)
|
||||
from controllers.console.app.wraps import get_app_model_with_trial
|
||||
from controllers.console.explore.error import (
|
||||
AppSuggestedQuestionsAfterAnswerDisabledError,
|
||||
NotChatAppError,
|
||||
NotCompletionAppError,
|
||||
NotWorkflowAppError,
|
||||
)
|
||||
from controllers.console.explore.wraps import TrialAppResource, trial_feature_enable
|
||||
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
|
||||
from core.app.app_config.common.parameters_mapping import get_parameters_from_feature_dict
|
||||
from core.app.apps.base_app_queue_manager import AppQueueManager
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||
from core.errors.error import (
|
||||
ModelCurrentlyNotSupportError,
|
||||
ProviderTokenNotInitError,
|
||||
QuotaExceededError,
|
||||
)
|
||||
from core.model_runtime.errors.invoke import InvokeError
|
||||
from core.workflow.graph_engine.manager import GraphEngineManager
|
||||
from extensions.ext_database import db
|
||||
from fields.app_fields import app_detail_fields_with_site
|
||||
from fields.dataset_fields import dataset_fields
|
||||
from fields.workflow_fields import workflow_fields
|
||||
from libs import helper
|
||||
from libs.helper import uuid_value
|
||||
from libs.login import current_user
|
||||
from models import Account
|
||||
from models.account import TenantStatus
|
||||
from models.model import AppMode, Site
|
||||
from models.workflow import Workflow
|
||||
from services.app_generate_service import AppGenerateService
|
||||
from services.app_service import AppService
|
||||
from services.audio_service import AudioService
|
||||
from services.dataset_service import DatasetService
|
||||
from services.errors.audio import (
|
||||
AudioTooLargeServiceError,
|
||||
NoAudioUploadedServiceError,
|
||||
ProviderNotSupportSpeechToTextServiceError,
|
||||
UnsupportedAudioTypeServiceError,
|
||||
)
|
||||
from services.errors.conversation import ConversationNotExistsError
|
||||
from services.errors.llm import InvokeRateLimitError
|
||||
from services.errors.message import (
|
||||
MessageNotExistsError,
|
||||
SuggestedQuestionsAfterAnswerDisabledError,
|
||||
)
|
||||
from services.message_service import MessageService
|
||||
from services.recommended_app_service import RecommendedAppService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TrialAppWorkflowRunApi(TrialAppResource):
|
||||
def post(self, trial_app):
|
||||
"""
|
||||
Run workflow
|
||||
"""
|
||||
app_model = trial_app
|
||||
if not app_model:
|
||||
raise NotWorkflowAppError()
|
||||
app_mode = AppMode.value_of(app_model.mode)
|
||||
if app_mode != AppMode.WORKFLOW:
|
||||
raise NotWorkflowAppError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
|
||||
parser.add_argument("files", type=list, required=False, location="json")
|
||||
args = parser.parse_args()
|
||||
assert current_user is not None
|
||||
try:
|
||||
app_id = app_model.id
|
||||
user_id = current_user.id
|
||||
response = AppGenerateService.generate(
|
||||
app_model=app_model, user=current_user, args=args, invoke_from=InvokeFrom.EXPLORE, streaming=True
|
||||
)
|
||||
RecommendedAppService.add_trial_app_record(app_id, user_id)
|
||||
return helper.compact_generate_response(response)
|
||||
except ProviderTokenNotInitError as ex:
|
||||
raise ProviderNotInitializeError(ex.description)
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(e.description)
|
||||
except InvokeRateLimitError as ex:
|
||||
raise InvokeRateLimitHttpError(ex.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception:
|
||||
logger.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
class TrialAppWorkflowTaskStopApi(TrialAppResource):
|
||||
def post(self, trial_app, task_id: str):
|
||||
"""
|
||||
Stop workflow task
|
||||
"""
|
||||
app_model = trial_app
|
||||
if not app_model:
|
||||
raise NotWorkflowAppError()
|
||||
app_mode = AppMode.value_of(app_model.mode)
|
||||
if app_mode != AppMode.WORKFLOW:
|
||||
raise NotWorkflowAppError()
|
||||
assert current_user is not None
|
||||
|
||||
# Stop using both mechanisms for backward compatibility
|
||||
# Legacy stop flag mechanism (without user check)
|
||||
AppQueueManager.set_stop_flag_no_user_check(task_id)
|
||||
|
||||
# New graph engine command channel mechanism
|
||||
GraphEngineManager.send_stop_command(task_id)
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
|
||||
class TrialChatApi(TrialAppResource):
|
||||
@trial_feature_enable
|
||||
def post(self, trial_app):
|
||||
app_model = trial_app
|
||||
app_mode = AppMode.value_of(app_model.mode)
|
||||
if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
|
||||
raise NotChatAppError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("inputs", type=dict, required=True, location="json")
|
||||
parser.add_argument("query", type=str, required=True, location="json")
|
||||
parser.add_argument("files", type=list, required=False, location="json")
|
||||
parser.add_argument("conversation_id", type=uuid_value, location="json")
|
||||
parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
|
||||
parser.add_argument("retriever_from", type=str, required=False, default="explore_app", location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
args["auto_generate_name"] = False
|
||||
|
||||
try:
|
||||
if not isinstance(current_user, Account):
|
||||
raise ValueError("current_user must be an Account instance")
|
||||
|
||||
# Get IDs before they might be detached from session
|
||||
app_id = app_model.id
|
||||
user_id = current_user.id
|
||||
|
||||
response = AppGenerateService.generate(
|
||||
app_model=app_model, user=current_user, args=args, invoke_from=InvokeFrom.EXPLORE, streaming=True
|
||||
)
|
||||
RecommendedAppService.add_trial_app_record(app_id, user_id)
|
||||
return helper.compact_generate_response(response)
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.conversation.ConversationCompletedError:
|
||||
raise ConversationCompletedError()
|
||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||
logger.exception("App model config broken.")
|
||||
raise AppUnavailableError()
|
||||
except ProviderTokenNotInitError as ex:
|
||||
raise ProviderNotInitializeError(ex.description)
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(e.description)
|
||||
except InvokeRateLimitError as ex:
|
||||
raise InvokeRateLimitHttpError(ex.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception:
|
||||
logger.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
class TrialMessageSuggestedQuestionApi(TrialAppResource):
|
||||
@trial_feature_enable
|
||||
def get(self, trial_app, message_id):
|
||||
app_model = trial_app
|
||||
app_mode = AppMode.value_of(app_model.mode)
|
||||
if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
|
||||
raise NotChatAppError()
|
||||
|
||||
message_id = str(message_id)
|
||||
|
||||
try:
|
||||
if not isinstance(current_user, Account):
|
||||
raise ValueError("current_user must be an Account instance")
|
||||
questions = MessageService.get_suggested_questions_after_answer(
|
||||
app_model=app_model, user=current_user, message_id=message_id, invoke_from=InvokeFrom.EXPLORE
|
||||
)
|
||||
except MessageNotExistsError:
|
||||
raise NotFound("Message not found")
|
||||
except ConversationNotExistsError:
|
||||
raise NotFound("Conversation not found")
|
||||
except SuggestedQuestionsAfterAnswerDisabledError:
|
||||
raise AppSuggestedQuestionsAfterAnswerDisabledError()
|
||||
except ProviderTokenNotInitError as ex:
|
||||
raise ProviderNotInitializeError(ex.description)
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(e.description)
|
||||
except Exception:
|
||||
logger.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
return {"data": questions}
|
||||
|
||||
|
||||
class TrialChatAudioApi(TrialAppResource):
|
||||
@trial_feature_enable
|
||||
def post(self, trial_app):
|
||||
app_model = trial_app
|
||||
|
||||
file = request.files["file"]
|
||||
|
||||
try:
|
||||
if not isinstance(current_user, Account):
|
||||
raise ValueError("current_user must be an Account instance")
|
||||
|
||||
# Get IDs before they might be detached from session
|
||||
app_id = app_model.id
|
||||
user_id = current_user.id
|
||||
|
||||
response = AudioService.transcript_asr(app_model=app_model, file=file, end_user=None)
|
||||
RecommendedAppService.add_trial_app_record(app_id, user_id)
|
||||
return response
|
||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||
logger.exception("App model config broken.")
|
||||
raise AppUnavailableError()
|
||||
except NoAudioUploadedServiceError:
|
||||
raise NoAudioUploadedError()
|
||||
except AudioTooLargeServiceError as e:
|
||||
raise AudioTooLargeError(str(e))
|
||||
except UnsupportedAudioTypeServiceError:
|
||||
raise UnsupportedAudioTypeError()
|
||||
except ProviderNotSupportSpeechToTextServiceError:
|
||||
raise ProviderNotSupportSpeechToTextError()
|
||||
except ProviderTokenNotInitError as ex:
|
||||
raise ProviderNotInitializeError(ex.description)
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
class TrialChatTextApi(TrialAppResource):
|
||||
@trial_feature_enable
|
||||
def post(self, trial_app):
|
||||
app_model = trial_app
|
||||
try:
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("message_id", type=str, required=False, location="json")
|
||||
parser.add_argument("voice", type=str, location="json")
|
||||
parser.add_argument("text", type=str, location="json")
|
||||
parser.add_argument("streaming", type=bool, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
message_id = args.get("message_id", None)
|
||||
text = args.get("text", None)
|
||||
voice = args.get("voice", None)
|
||||
if not isinstance(current_user, Account):
|
||||
raise ValueError("current_user must be an Account instance")
|
||||
|
||||
# Get IDs before they might be detached from session
|
||||
app_id = app_model.id
|
||||
user_id = current_user.id
|
||||
|
||||
response = AudioService.transcript_tts(app_model=app_model, text=text, voice=voice, message_id=message_id)
|
||||
RecommendedAppService.add_trial_app_record(app_id, user_id)
|
||||
return response
|
||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||
logger.exception("App model config broken.")
|
||||
raise AppUnavailableError()
|
||||
except NoAudioUploadedServiceError:
|
||||
raise NoAudioUploadedError()
|
||||
except AudioTooLargeServiceError as e:
|
||||
raise AudioTooLargeError(str(e))
|
||||
except UnsupportedAudioTypeServiceError:
|
||||
raise UnsupportedAudioTypeError()
|
||||
except ProviderNotSupportSpeechToTextServiceError:
|
||||
raise ProviderNotSupportSpeechToTextError()
|
||||
except ProviderTokenNotInitError as ex:
|
||||
raise ProviderNotInitializeError(ex.description)
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
class TrialCompletionApi(TrialAppResource):
|
||||
@trial_feature_enable
|
||||
def post(self, trial_app):
|
||||
app_model = trial_app
|
||||
if app_model.mode != "completion":
|
||||
raise NotCompletionAppError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("inputs", type=dict, required=True, location="json")
|
||||
parser.add_argument("query", type=str, location="json", default="")
|
||||
parser.add_argument("files", type=list, required=False, location="json")
|
||||
parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
|
||||
parser.add_argument("retriever_from", type=str, required=False, default="explore_app", location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
streaming = args["response_mode"] == "streaming"
|
||||
args["auto_generate_name"] = False
|
||||
|
||||
try:
|
||||
if not isinstance(current_user, Account):
|
||||
raise ValueError("current_user must be an Account instance")
|
||||
|
||||
# Get IDs before they might be detached from session
|
||||
app_id = app_model.id
|
||||
user_id = current_user.id
|
||||
|
||||
response = AppGenerateService.generate(
|
||||
app_model=app_model, user=current_user, args=args, invoke_from=InvokeFrom.EXPLORE, streaming=streaming
|
||||
)
|
||||
|
||||
RecommendedAppService.add_trial_app_record(app_id, user_id)
|
||||
return helper.compact_generate_response(response)
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.conversation.ConversationCompletedError:
|
||||
raise ConversationCompletedError()
|
||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||
logger.exception("App model config broken.")
|
||||
raise AppUnavailableError()
|
||||
except ProviderTokenNotInitError as ex:
|
||||
raise ProviderNotInitializeError(ex.description)
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception:
|
||||
logger.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
class TrialSitApi(Resource):
|
||||
"""Resource for trial app sites."""
|
||||
|
||||
@trial_feature_enable
|
||||
@get_app_model_with_trial
|
||||
def get(self, app_model):
|
||||
"""Retrieve app site info.
|
||||
|
||||
Returns the site configuration for the application including theme, icons, and text.
|
||||
"""
|
||||
site = db.session.query(Site).where(Site.app_id == app_model.id).first()
|
||||
|
||||
if not site:
|
||||
raise Forbidden()
|
||||
|
||||
assert app_model.tenant
|
||||
if app_model.tenant.status == TenantStatus.ARCHIVE:
|
||||
raise Forbidden()
|
||||
|
||||
return SiteResponse.model_validate(site).model_dump(mode="json")
|
||||
|
||||
|
||||
class TrialAppParameterApi(Resource):
|
||||
"""Resource for app variables."""
|
||||
|
||||
@trial_feature_enable
|
||||
@get_app_model_with_trial
|
||||
def get(self, app_model):
|
||||
"""Retrieve app parameters."""
|
||||
|
||||
if app_model is None:
|
||||
raise AppUnavailableError()
|
||||
|
||||
if app_model.mode in {AppMode.ADVANCED_CHAT, AppMode.WORKFLOW}:
|
||||
workflow = app_model.workflow
|
||||
if workflow is None:
|
||||
raise AppUnavailableError()
|
||||
|
||||
features_dict = workflow.features_dict
|
||||
user_input_form = workflow.user_input_form(to_old_structure=True)
|
||||
else:
|
||||
app_model_config = app_model.app_model_config
|
||||
if app_model_config is None:
|
||||
raise AppUnavailableError()
|
||||
|
||||
features_dict = app_model_config.to_dict()
|
||||
|
||||
user_input_form = features_dict.get("user_input_form", [])
|
||||
|
||||
parameters = get_parameters_from_feature_dict(features_dict=features_dict, user_input_form=user_input_form)
|
||||
return ParametersResponse.model_validate(parameters).model_dump(mode="json")
|
||||
|
||||
|
||||
class AppApi(Resource):
|
||||
@trial_feature_enable
|
||||
@get_app_model_with_trial
|
||||
@marshal_with(app_detail_fields_with_site)
|
||||
def get(self, app_model):
|
||||
"""Get app detail"""
|
||||
|
||||
app_service = AppService()
|
||||
app_model = app_service.get_app(app_model)
|
||||
|
||||
return app_model
|
||||
|
||||
|
||||
class AppWorkflowApi(Resource):
|
||||
@trial_feature_enable
|
||||
@get_app_model_with_trial
|
||||
@marshal_with(workflow_fields)
|
||||
def get(self, app_model):
|
||||
"""Get workflow detail"""
|
||||
if not app_model.workflow_id:
|
||||
raise AppUnavailableError()
|
||||
|
||||
workflow = (
|
||||
db.session.query(Workflow)
|
||||
.where(
|
||||
Workflow.id == app_model.workflow_id,
|
||||
)
|
||||
.first()
|
||||
)
|
||||
return workflow
|
||||
|
||||
|
||||
class DatasetListApi(Resource):
|
||||
@trial_feature_enable
|
||||
@get_app_model_with_trial
|
||||
def get(self, app_model):
|
||||
page = request.args.get("page", default=1, type=int)
|
||||
limit = request.args.get("limit", default=20, type=int)
|
||||
ids = request.args.getlist("ids")
|
||||
|
||||
tenant_id = app_model.tenant_id
|
||||
if ids:
|
||||
datasets, total = DatasetService.get_datasets_by_ids(ids, tenant_id)
|
||||
else:
|
||||
raise NeedAddIdsError()
|
||||
|
||||
data = cast(list[dict[str, Any]], marshal(datasets, dataset_fields))
|
||||
|
||||
response = {"data": data, "has_more": len(datasets) == limit, "limit": limit, "total": total, "page": page}
|
||||
return response
|
||||
|
||||
|
||||
api.add_resource(TrialChatApi, "/trial-apps/<uuid:app_id>/chat-messages", endpoint="trial_app_chat_completion")
|
||||
|
||||
api.add_resource(
|
||||
TrialMessageSuggestedQuestionApi,
|
||||
"/trial-apps/<uuid:app_id>/messages/<uuid:message_id>/suggested-questions",
|
||||
endpoint="trial_app_suggested_question",
|
||||
)
|
||||
|
||||
api.add_resource(TrialChatAudioApi, "/trial-apps/<uuid:app_id>/audio-to-text", endpoint="trial_app_audio")
|
||||
api.add_resource(TrialChatTextApi, "/trial-apps/<uuid:app_id>/text-to-audio", endpoint="trial_app_text")
|
||||
|
||||
api.add_resource(TrialCompletionApi, "/trial-apps/<uuid:app_id>/completion-messages", endpoint="trial_app_completion")
|
||||
|
||||
api.add_resource(TrialSitApi, "/trial-apps/<uuid:app_id>/site")
|
||||
|
||||
api.add_resource(TrialAppParameterApi, "/trial-apps/<uuid:app_id>/parameters", endpoint="trial_app_parameters")
|
||||
|
||||
api.add_resource(AppApi, "/trial-apps/<uuid:app_id>", endpoint="trial_app")
|
||||
|
||||
api.add_resource(TrialAppWorkflowRunApi, "/trial-apps/<uuid:app_id>/workflows/run", endpoint="trial_app_workflow_run")
|
||||
api.add_resource(TrialAppWorkflowTaskStopApi, "/trial-apps/<uuid:app_id>/workflows/tasks/<string:task_id>/stop")
|
||||
|
||||
api.add_resource(AppWorkflowApi, "/trial-apps/<uuid:app_id>/workflows", endpoint="trial_app_workflow")
|
||||
api.add_resource(DatasetListApi, "/trial-apps/<uuid:app_id>/datasets", endpoint="trial_app_datasets")
|
||||
@ -2,14 +2,15 @@ from collections.abc import Callable
|
||||
from functools import wraps
|
||||
from typing import Concatenate, ParamSpec, TypeVar
|
||||
|
||||
from flask import abort
|
||||
from flask_restx import Resource
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from controllers.console.explore.error import AppAccessDeniedError
|
||||
from controllers.console.explore.error import AppAccessDeniedError, TrialAppLimitExceeded, TrialAppNotAllowed
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from extensions.ext_database import db
|
||||
from libs.login import current_account_with_tenant, login_required
|
||||
from models import InstalledApp
|
||||
from models import AccountTrialAppRecord, App, InstalledApp, TrialApp
|
||||
from services.enterprise.enterprise_service import EnterpriseService
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
@ -71,6 +72,61 @@ def user_allowed_to_access_app(view: Callable[Concatenate[InstalledApp, P], R] |
|
||||
return decorator
|
||||
|
||||
|
||||
def trial_app_required(view: Callable[Concatenate[App, P], R] | None = None):
|
||||
def decorator(view: Callable[Concatenate[App, P], R]):
|
||||
@wraps(view)
|
||||
def decorated(app_id: str, *args: P.args, **kwargs: P.kwargs):
|
||||
current_user, _ = current_account_with_tenant()
|
||||
|
||||
trial_app = db.session.query(TrialApp).where(TrialApp.app_id == str(app_id)).first()
|
||||
|
||||
if trial_app is None:
|
||||
raise TrialAppNotAllowed()
|
||||
app = trial_app.app
|
||||
|
||||
if app is None:
|
||||
raise TrialAppNotAllowed()
|
||||
|
||||
account_trial_app_record = (
|
||||
db.session.query(AccountTrialAppRecord)
|
||||
.where(AccountTrialAppRecord.account_id == current_user.id, AccountTrialAppRecord.app_id == app_id)
|
||||
.first()
|
||||
)
|
||||
if account_trial_app_record:
|
||||
if account_trial_app_record.count >= trial_app.trial_limit:
|
||||
raise TrialAppLimitExceeded()
|
||||
|
||||
return view(app, *args, **kwargs)
|
||||
|
||||
return decorated
|
||||
|
||||
if view:
|
||||
return decorator(view)
|
||||
return decorator
|
||||
|
||||
|
||||
def trial_feature_enable(view: Callable[..., R]) -> Callable[..., R]:
|
||||
@wraps(view)
|
||||
def decorated(*args, **kwargs):
|
||||
features = FeatureService.get_system_features()
|
||||
if not features.enable_trial_app:
|
||||
abort(403, "Trial app feature is not enabled.")
|
||||
return view(*args, **kwargs)
|
||||
|
||||
return decorated
|
||||
|
||||
|
||||
def explore_banner_enabled(view: Callable[..., R]) -> Callable[..., R]:
|
||||
@wraps(view)
|
||||
def decorated(*args, **kwargs):
|
||||
features = FeatureService.get_system_features()
|
||||
if not features.enable_explore_banner:
|
||||
abort(403, "Explore banner feature is not enabled.")
|
||||
return view(*args, **kwargs)
|
||||
|
||||
return decorated
|
||||
|
||||
|
||||
class InstalledAppResource(Resource):
|
||||
# must be reversed if there are multiple decorators
|
||||
|
||||
@ -80,3 +136,13 @@ class InstalledAppResource(Resource):
|
||||
account_initialization_required,
|
||||
login_required,
|
||||
]
|
||||
|
||||
|
||||
class TrialAppResource(Resource):
|
||||
# must be reversed if there are multiple decorators
|
||||
|
||||
method_decorators = [
|
||||
trial_app_required,
|
||||
account_initialization_required,
|
||||
login_required,
|
||||
]
|
||||
|
||||
@ -3,6 +3,7 @@ from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
class PreviewDetail(BaseModel):
|
||||
content: str
|
||||
summary: str | None = None
|
||||
child_chunks: list[str] | None = None
|
||||
|
||||
|
||||
|
||||
@ -311,14 +311,18 @@ class IndexingRunner:
|
||||
qa_preview_texts: list[QAPreviewDetail] = []
|
||||
|
||||
total_segments = 0
|
||||
# doc_form represents the segmentation method (general, parent-child, QA)
|
||||
index_type = doc_form
|
||||
index_processor = IndexProcessorFactory(index_type).init_index_processor()
|
||||
# one extract_setting is one source document
|
||||
for extract_setting in extract_settings:
|
||||
# extract
|
||||
processing_rule = DatasetProcessRule(
|
||||
mode=tmp_processing_rule["mode"], rules=json.dumps(tmp_processing_rule["rules"])
|
||||
)
|
||||
# Extract document content
|
||||
text_docs = index_processor.extract(extract_setting, process_rule_mode=tmp_processing_rule["mode"])
|
||||
# Cleaning and segmentation
|
||||
documents = index_processor.transform(
|
||||
text_docs,
|
||||
current_user=None,
|
||||
@ -361,6 +365,12 @@ class IndexingRunner:
|
||||
|
||||
if doc_form and doc_form == "qa_model":
|
||||
return IndexingEstimate(total_segments=total_segments * 20, qa_preview=qa_preview_texts, preview=[])
|
||||
|
||||
# Generate summary preview
|
||||
summary_index_setting = tmp_processing_rule.get("summary_index_setting")
|
||||
if summary_index_setting and summary_index_setting.get("enable") and preview_texts:
|
||||
preview_texts = index_processor.generate_summary_preview(tenant_id, preview_texts, summary_index_setting)
|
||||
|
||||
return IndexingEstimate(total_segments=total_segments, preview=preview_texts)
|
||||
|
||||
def _extract(
|
||||
|
||||
@ -72,7 +72,7 @@ class LLMGenerator:
|
||||
prompt_messages=list(prompts), model_parameters={"max_tokens": 500, "temperature": 1}, stream=False
|
||||
)
|
||||
answer = response.message.get_text_content()
|
||||
if answer == "":
|
||||
if answer is None:
|
||||
return ""
|
||||
try:
|
||||
result_dict = json.loads(answer)
|
||||
@ -113,9 +113,11 @@ class LLMGenerator:
|
||||
output_parser = SuggestedQuestionsAfterAnswerOutputParser()
|
||||
format_instructions = output_parser.get_format_instructions()
|
||||
|
||||
prompt_template = PromptTemplateParser(template="{{histories}}\n{{format_instructions}}\nquestions:\n")
|
||||
prompt_template = PromptTemplateParser(
|
||||
template="{{histories}}\n{{format_instructions}}\nquestions:\n")
|
||||
|
||||
prompt = prompt_template.format({"histories": histories, "format_instructions": format_instructions})
|
||||
prompt = prompt_template.format(
|
||||
{"histories": histories, "format_instructions": format_instructions})
|
||||
|
||||
try:
|
||||
model_manager = ModelManager()
|
||||
@ -141,11 +143,13 @@ class LLMGenerator:
|
||||
)
|
||||
|
||||
text_content = response.message.get_text_content()
|
||||
questions = output_parser.parse(text_content) if text_content else []
|
||||
questions = output_parser.parse(
|
||||
text_content) if text_content else []
|
||||
except InvokeError:
|
||||
questions = []
|
||||
except Exception:
|
||||
logger.exception("Failed to generate suggested questions after answer")
|
||||
logger.exception(
|
||||
"Failed to generate suggested questions after answer")
|
||||
questions = []
|
||||
|
||||
return questions
|
||||
@ -156,10 +160,12 @@ class LLMGenerator:
|
||||
|
||||
error = ""
|
||||
error_step = ""
|
||||
rule_config = {"prompt": "", "variables": [], "opening_statement": "", "error": ""}
|
||||
rule_config = {"prompt": "", "variables": [],
|
||||
"opening_statement": "", "error": ""}
|
||||
model_parameters = model_config.get("completion_params", {})
|
||||
if no_variable:
|
||||
prompt_template = PromptTemplateParser(WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE)
|
||||
prompt_template = PromptTemplateParser(
|
||||
WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE)
|
||||
|
||||
prompt_generate = prompt_template.format(
|
||||
inputs={
|
||||
@ -190,7 +196,8 @@ class LLMGenerator:
|
||||
error = str(e)
|
||||
error_step = "generate rule config"
|
||||
except Exception as e:
|
||||
logger.exception("Failed to generate rule config, model: %s", model_config.get("name"))
|
||||
logger.exception(
|
||||
"Failed to generate rule config, model: %s", model_config.get("name"))
|
||||
rule_config["error"] = str(e)
|
||||
|
||||
rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""
|
||||
@ -245,7 +252,8 @@ class LLMGenerator:
|
||||
},
|
||||
remove_template_variables=False,
|
||||
)
|
||||
parameter_messages = [UserPromptMessage(content=parameter_generate_prompt)]
|
||||
parameter_messages = [UserPromptMessage(
|
||||
content=parameter_generate_prompt)]
|
||||
|
||||
# the second step to generate the task_parameter and task_statement
|
||||
statement_generate_prompt = statement_template.format(
|
||||
@ -255,13 +263,15 @@ class LLMGenerator:
|
||||
},
|
||||
remove_template_variables=False,
|
||||
)
|
||||
statement_messages = [UserPromptMessage(content=statement_generate_prompt)]
|
||||
statement_messages = [UserPromptMessage(
|
||||
content=statement_generate_prompt)]
|
||||
|
||||
try:
|
||||
parameter_content: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=list(parameter_messages), model_parameters=model_parameters, stream=False
|
||||
)
|
||||
rule_config["variables"] = re.findall(r'"\s*([^"]+)\s*"', parameter_content.message.get_text_content())
|
||||
rule_config["variables"] = re.findall(
|
||||
r'"\s*([^"]+)\s*"', parameter_content.message.get_text_content())
|
||||
except InvokeError as e:
|
||||
error = str(e)
|
||||
error_step = "generate variables"
|
||||
@ -270,13 +280,15 @@ class LLMGenerator:
|
||||
statement_content: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=list(statement_messages), model_parameters=model_parameters, stream=False
|
||||
)
|
||||
rule_config["opening_statement"] = statement_content.message.get_text_content()
|
||||
rule_config["opening_statement"] = statement_content.message.get_text_content(
|
||||
)
|
||||
except InvokeError as e:
|
||||
error = str(e)
|
||||
error_step = "generate conversation opener"
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Failed to generate rule config, model: %s", model_config.get("name"))
|
||||
logger.exception(
|
||||
"Failed to generate rule config, model: %s", model_config.get("name"))
|
||||
rule_config["error"] = str(e)
|
||||
|
||||
rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""
|
||||
@ -286,9 +298,11 @@ class LLMGenerator:
|
||||
@classmethod
|
||||
def generate_code(cls, tenant_id: str, instruction: str, model_config: dict, code_language: str = "javascript"):
|
||||
if code_language == "python":
|
||||
prompt_template = PromptTemplateParser(PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE)
|
||||
prompt_template = PromptTemplateParser(
|
||||
PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE)
|
||||
else:
|
||||
prompt_template = PromptTemplateParser(JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE)
|
||||
prompt_template = PromptTemplateParser(
|
||||
JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE)
|
||||
|
||||
prompt = prompt_template.format(
|
||||
inputs={
|
||||
@ -321,7 +335,8 @@ class LLMGenerator:
|
||||
return {"code": "", "language": code_language, "error": f"Failed to generate code. Error: {error}"}
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
"Failed to invoke LLM model, model: %s, language: %s", model_config.get("name"), code_language
|
||||
"Failed to invoke LLM model, model: %s, language: %s", model_config.get(
|
||||
"name"), code_language
|
||||
)
|
||||
return {"code": "", "language": code_language, "error": f"An unexpected error occurred: {str(e)}"}
|
||||
|
||||
@ -335,7 +350,8 @@ class LLMGenerator:
|
||||
model_type=ModelType.LLM,
|
||||
)
|
||||
|
||||
prompt_messages: list[PromptMessage] = [SystemPromptMessage(content=prompt), UserPromptMessage(content=query)]
|
||||
prompt_messages: list[PromptMessage] = [SystemPromptMessage(
|
||||
content=prompt), UserPromptMessage(content=query)]
|
||||
|
||||
# Explicitly use the non-streaming overload
|
||||
result = model_instance.invoke_llm(
|
||||
@ -381,16 +397,19 @@ class LLMGenerator:
|
||||
parsed_content = json_repair.loads(raw_content)
|
||||
|
||||
if not isinstance(parsed_content, dict | list):
|
||||
raise ValueError(f"Failed to parse structured output from llm: {raw_content}")
|
||||
raise ValueError(
|
||||
f"Failed to parse structured output from llm: {raw_content}")
|
||||
|
||||
generated_json_schema = json.dumps(parsed_content, indent=2, ensure_ascii=False)
|
||||
generated_json_schema = json.dumps(
|
||||
parsed_content, indent=2, ensure_ascii=False)
|
||||
return {"output": generated_json_schema, "error": ""}
|
||||
|
||||
except InvokeError as e:
|
||||
error = str(e)
|
||||
return {"output": "", "error": f"Failed to generate JSON Schema. Error: {error}"}
|
||||
except Exception as e:
|
||||
logger.exception("Failed to invoke LLM model, model: %s", model_config.get("name"))
|
||||
logger.exception(
|
||||
"Failed to invoke LLM model, model: %s", model_config.get("name"))
|
||||
return {"output": "", "error": f"An unexpected error occurred: {str(e)}"}
|
||||
|
||||
@staticmethod
|
||||
@ -398,7 +417,8 @@ class LLMGenerator:
|
||||
tenant_id: str, flow_id: str, current: str, instruction: str, model_config: dict, ideal_output: str | None
|
||||
):
|
||||
last_run: Message | None = (
|
||||
db.session.query(Message).where(Message.app_id == flow_id).order_by(Message.created_at.desc()).first()
|
||||
db.session.query(Message).where(Message.app_id == flow_id).order_by(
|
||||
Message.created_at.desc()).first()
|
||||
)
|
||||
if not last_run:
|
||||
return LLMGenerator.__instruction_modify_common(
|
||||
@ -446,7 +466,8 @@ class LLMGenerator:
|
||||
workflow = workflow_service.get_draft_workflow(app_model=app)
|
||||
if not workflow:
|
||||
raise ValueError("Workflow not found for the given app model.")
|
||||
last_run = workflow_service.get_node_last_run(app_model=app, workflow=workflow, node_id=node_id)
|
||||
last_run = workflow_service.get_node_last_run(
|
||||
app_model=app, workflow=workflow, node_id=node_id)
|
||||
try:
|
||||
node_type = cast(WorkflowNodeExecutionModel, last_run).node_type
|
||||
except Exception:
|
||||
@ -470,7 +491,8 @@ class LLMGenerator:
|
||||
)
|
||||
|
||||
def agent_log_of(node_execution: WorkflowNodeExecutionModel) -> Sequence:
|
||||
raw_agent_log = node_execution.execution_metadata_dict.get(WorkflowNodeExecutionMetadataKey.AGENT_LOG, [])
|
||||
raw_agent_log = node_execution.execution_metadata_dict.get(
|
||||
WorkflowNodeExecutionMetadataKey.AGENT_LOG, [])
|
||||
if not raw_agent_log:
|
||||
return []
|
||||
|
||||
@ -518,11 +540,14 @@ class LLMGenerator:
|
||||
ERROR_MESSAGE = "{{#error_message#}}"
|
||||
injected_instruction = instruction
|
||||
if LAST_RUN in injected_instruction:
|
||||
injected_instruction = injected_instruction.replace(LAST_RUN, json.dumps(last_run))
|
||||
injected_instruction = injected_instruction.replace(
|
||||
LAST_RUN, json.dumps(last_run))
|
||||
if CURRENT in injected_instruction:
|
||||
injected_instruction = injected_instruction.replace(CURRENT, current or "null")
|
||||
injected_instruction = injected_instruction.replace(
|
||||
CURRENT, current or "null")
|
||||
if ERROR_MESSAGE in injected_instruction:
|
||||
injected_instruction = injected_instruction.replace(ERROR_MESSAGE, error_message or "null")
|
||||
injected_instruction = injected_instruction.replace(
|
||||
ERROR_MESSAGE, error_message or "null")
|
||||
model_instance = ModelManager().get_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
model_type=ModelType.LLM,
|
||||
@ -560,11 +585,13 @@ class LLMGenerator:
|
||||
first_brace = generated_raw.find("{")
|
||||
last_brace = generated_raw.rfind("}")
|
||||
if first_brace == -1 or last_brace == -1 or last_brace < first_brace:
|
||||
raise ValueError(f"Could not find a valid JSON object in response: {generated_raw}")
|
||||
json_str = generated_raw[first_brace : last_brace + 1]
|
||||
raise ValueError(
|
||||
f"Could not find a valid JSON object in response: {generated_raw}")
|
||||
json_str = generated_raw[first_brace: last_brace + 1]
|
||||
data = json_repair.loads(json_str)
|
||||
if not isinstance(data, dict):
|
||||
raise TypeError(f"Expected a JSON object, but got {type(data).__name__}")
|
||||
raise TypeError(
|
||||
f"Expected a JSON object, but got {type(data).__name__}")
|
||||
return data
|
||||
except InvokeError as e:
|
||||
error = str(e)
|
||||
|
||||
@ -434,3 +434,20 @@ INSTRUCTION_GENERATE_TEMPLATE_PROMPT = """The output of this prompt is not as ex
|
||||
You should edit the prompt according to the IDEAL OUTPUT."""
|
||||
|
||||
INSTRUCTION_GENERATE_TEMPLATE_CODE = """Please fix the errors in the {{#error_message#}}."""
|
||||
|
||||
DEFAULT_GENERATOR_SUMMARY_PROMPT = (
|
||||
"""Summarize the following content. Extract only the key information and main points. """
|
||||
"""Remove redundant details.
|
||||
|
||||
Requirements:
|
||||
1. Write a concise summary in plain text
|
||||
2. Use the same language as the input content
|
||||
3. Focus on important facts, concepts, and details
|
||||
4. If images are included, describe their key information
|
||||
5. Do not use words like "好的", "ok", "I understand", "This text discusses", "The content mentions"
|
||||
6. Write directly without extra words
|
||||
|
||||
Output only the summary text. Start summarizing now:
|
||||
|
||||
"""
|
||||
)
|
||||
@ -389,15 +389,14 @@ class RetrievalService:
|
||||
.all()
|
||||
}
|
||||
|
||||
records = []
|
||||
include_segment_ids = set()
|
||||
segment_child_map = {}
|
||||
|
||||
valid_dataset_documents = {}
|
||||
image_doc_ids: list[Any] = []
|
||||
child_index_node_ids = []
|
||||
index_node_ids = []
|
||||
doc_to_document_map = {}
|
||||
summary_segment_ids = set() # Track segments retrieved via summary
|
||||
|
||||
# First pass: collect all document IDs and identify summary documents
|
||||
for document in documents:
|
||||
document_id = document.metadata.get("document_id")
|
||||
if document_id not in dataset_documents:
|
||||
@ -408,16 +407,24 @@ class RetrievalService:
|
||||
continue
|
||||
valid_dataset_documents[document_id] = dataset_document
|
||||
|
||||
doc_id = document.metadata.get("doc_id") or ""
|
||||
doc_to_document_map[doc_id] = document
|
||||
|
||||
# Check if this is a summary document
|
||||
is_summary = document.metadata.get("is_summary", False)
|
||||
if is_summary:
|
||||
# For summary documents, find the original chunk via original_chunk_id
|
||||
original_chunk_id = document.metadata.get("original_chunk_id")
|
||||
if original_chunk_id:
|
||||
summary_segment_ids.add(original_chunk_id)
|
||||
continue # Skip adding to other lists for summary documents
|
||||
|
||||
if dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
|
||||
doc_id = document.metadata.get("doc_id") or ""
|
||||
doc_to_document_map[doc_id] = document
|
||||
if document.metadata.get("doc_type") == DocType.IMAGE:
|
||||
image_doc_ids.append(doc_id)
|
||||
else:
|
||||
child_index_node_ids.append(doc_id)
|
||||
else:
|
||||
doc_id = document.metadata.get("doc_id") or ""
|
||||
doc_to_document_map[doc_id] = document
|
||||
if document.metadata.get("doc_type") == DocType.IMAGE:
|
||||
image_doc_ids.append(doc_id)
|
||||
else:
|
||||
@ -433,6 +440,7 @@ class RetrievalService:
|
||||
attachment_map: dict[str, list[dict[str, Any]]] = {}
|
||||
child_chunk_map: dict[str, list[ChildChunk]] = {}
|
||||
doc_segment_map: dict[str, list[str]] = {}
|
||||
segment_summary_map: dict[str, str] = {} # Map segment_id to summary content
|
||||
|
||||
with session_factory.create_session() as session:
|
||||
attachments = cls.get_segment_attachment_infos(image_doc_ids, session)
|
||||
@ -447,6 +455,7 @@ class RetrievalService:
|
||||
doc_segment_map[attachment["segment_id"]].append(attachment["attachment_id"])
|
||||
else:
|
||||
doc_segment_map[attachment["segment_id"]] = [attachment["attachment_id"]]
|
||||
|
||||
child_chunk_stmt = select(ChildChunk).where(ChildChunk.index_node_id.in_(child_index_node_ids))
|
||||
child_index_nodes = session.execute(child_chunk_stmt).scalars().all()
|
||||
|
||||
@ -470,6 +479,7 @@ class RetrievalService:
|
||||
index_node_segments = session.execute(document_segment_stmt).scalars().all() # type: ignore
|
||||
for index_node_segment in index_node_segments:
|
||||
doc_segment_map[index_node_segment.id] = [index_node_segment.index_node_id]
|
||||
|
||||
if segment_ids:
|
||||
document_segment_stmt = select(DocumentSegment).where(
|
||||
DocumentSegment.enabled == True,
|
||||
@ -481,6 +491,42 @@ class RetrievalService:
|
||||
if index_node_segments:
|
||||
segments.extend(index_node_segments)
|
||||
|
||||
# Handle summary documents: query segments by original_chunk_id
|
||||
if summary_segment_ids:
|
||||
summary_segment_ids_list = list(summary_segment_ids)
|
||||
summary_segment_stmt = select(DocumentSegment).where(
|
||||
DocumentSegment.enabled == True,
|
||||
DocumentSegment.status == "completed",
|
||||
DocumentSegment.id.in_(summary_segment_ids_list),
|
||||
)
|
||||
summary_segments = session.execute(summary_segment_stmt).scalars().all() # type: ignore
|
||||
segments.extend(summary_segments)
|
||||
# Add summary segment IDs to segment_ids for summary query
|
||||
for seg in summary_segments:
|
||||
if seg.id not in segment_ids:
|
||||
segment_ids.append(seg.id)
|
||||
|
||||
# Batch query summaries for segments retrieved via summary (only enabled summaries)
|
||||
if summary_segment_ids:
|
||||
from models.dataset import DocumentSegmentSummary
|
||||
|
||||
summaries = (
|
||||
session.query(DocumentSegmentSummary)
|
||||
.filter(
|
||||
DocumentSegmentSummary.chunk_id.in_(list(summary_segment_ids)),
|
||||
DocumentSegmentSummary.status == "completed",
|
||||
DocumentSegmentSummary.enabled == True, # Only retrieve enabled summaries
|
||||
)
|
||||
.all()
|
||||
)
|
||||
for summary in summaries:
|
||||
if summary.summary_content:
|
||||
segment_summary_map[summary.chunk_id] = summary.summary_content
|
||||
|
||||
include_segment_ids = set()
|
||||
segment_child_map: dict[str, dict[str, Any]] = {}
|
||||
records: list[dict[str, Any]] = []
|
||||
|
||||
for segment in segments:
|
||||
child_chunks: list[ChildChunk] = child_chunk_map.get(segment.id, [])
|
||||
attachment_infos: list[dict[str, Any]] = attachment_map.get(segment.id, [])
|
||||
@ -493,7 +539,7 @@ class RetrievalService:
|
||||
child_chunk_details = []
|
||||
max_score = 0.0
|
||||
for child_chunk in child_chunks:
|
||||
document = doc_to_document_map[child_chunk.index_node_id]
|
||||
document = doc_to_document_map.get(child_chunk.index_node_id)
|
||||
child_chunk_detail = {
|
||||
"id": child_chunk.id,
|
||||
"content": child_chunk.content,
|
||||
@ -503,7 +549,7 @@ class RetrievalService:
|
||||
child_chunk_details.append(child_chunk_detail)
|
||||
max_score = max(max_score, document.metadata.get("score", 0.0) if document else 0.0)
|
||||
for attachment_info in attachment_infos:
|
||||
file_document = doc_to_document_map[attachment_info["id"]]
|
||||
file_document = doc_to_document_map.get(attachment_info["id"])
|
||||
max_score = max(
|
||||
max_score, file_document.metadata.get("score", 0.0) if file_document else 0.0
|
||||
)
|
||||
@ -576,9 +622,16 @@ class RetrievalService:
|
||||
else None
|
||||
)
|
||||
|
||||
# Extract summary if this segment was retrieved via summary
|
||||
summary_content = segment_summary_map.get(segment.id)
|
||||
|
||||
# Create RetrievalSegments object
|
||||
retrieval_segment = RetrievalSegments(
|
||||
segment=segment, child_chunks=child_chunks_list, score=score, files=files
|
||||
segment=segment,
|
||||
child_chunks=child_chunks_list,
|
||||
score=score,
|
||||
files=files,
|
||||
summary=summary_content,
|
||||
)
|
||||
result.append(retrieval_segment)
|
||||
|
||||
|
||||
@ -20,3 +20,4 @@ class RetrievalSegments(BaseModel):
|
||||
child_chunks: list[RetrievalChildChunk] | None = None
|
||||
score: float | None = None
|
||||
files: list[dict[str, str | int]] | None = None
|
||||
summary: str | None = None # Summary content if retrieved via summary index
|
||||
|
||||
@ -13,6 +13,7 @@ from urllib.parse import unquote, urlparse
|
||||
import httpx
|
||||
|
||||
from configs import dify_config
|
||||
from core.entities.knowledge_entities import PreviewDetail
|
||||
from core.helper import ssrf_proxy
|
||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||
from core.rag.index_processor.constant.doc_type import DocType
|
||||
@ -45,6 +46,17 @@ class BaseIndexProcessor(ABC):
|
||||
def transform(self, documents: list[Document], current_user: Account | None = None, **kwargs) -> list[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def generate_summary_preview(
|
||||
self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
|
||||
) -> list[PreviewDetail]:
|
||||
"""
|
||||
For each segment in preview_texts, generate a summary using LLM and attach it to the segment.
|
||||
The summary can be stored in a new attribute, e.g., summary.
|
||||
This method should be implemented by subclasses.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def load(
|
||||
self,
|
||||
|
||||
@ -1,9 +1,25 @@
|
||||
"""Paragraph index processor."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import uuid
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from core.entities.knowledge_entities import PreviewDetail
|
||||
from core.file import File, FileTransferMethod, FileType, file_manager
|
||||
from core.llm_generator.prompts import DEFAULT_GENERATOR_SUMMARY_PROMPT
|
||||
from core.model_manager import ModelInstance
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
ImagePromptMessageContent,
|
||||
PromptMessageContentUnionTypes,
|
||||
TextPromptMessageContent,
|
||||
UserPromptMessage,
|
||||
)
|
||||
from core.model_runtime.entities.model_entities import ModelFeature, ModelType
|
||||
from core.provider_manager import ProviderManager
|
||||
from core.rag.cleaner.clean_processor import CleanProcessor
|
||||
from core.rag.datasource.keyword.keyword_factory import Keyword
|
||||
from core.rag.datasource.retrieval_service import RetrievalService
|
||||
@ -17,12 +33,16 @@ from core.rag.index_processor.index_processor_base import BaseIndexProcessor
|
||||
from core.rag.models.document import AttachmentDocument, Document, MultimodalGeneralStructureChunk
|
||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||
from core.tools.utils.text_processing_utils import remove_leading_symbols
|
||||
from extensions.ext_database import db
|
||||
from factories.file_factory import build_from_mapping
|
||||
from libs import helper
|
||||
from models import UploadFile
|
||||
from models.account import Account
|
||||
from models.dataset import Dataset, DatasetProcessRule
|
||||
from models.dataset import Dataset, DatasetProcessRule, DocumentSegment, SegmentAttachmentBinding
|
||||
from models.dataset import Document as DatasetDocument
|
||||
from services.account_service import AccountService
|
||||
from services.entities.knowledge_entities.knowledge_entities import Rule
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
|
||||
class ParagraphIndexProcessor(BaseIndexProcessor):
|
||||
@ -108,6 +128,29 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
||||
keyword.add_texts(documents)
|
||||
|
||||
def clean(self, dataset: Dataset, node_ids: list[str] | None, with_keywords: bool = True, **kwargs):
|
||||
# Note: Summary indexes are now disabled (not deleted) when segments are disabled.
|
||||
# This method is called for actual deletion scenarios (e.g., when segment is deleted).
|
||||
# For disable operations, disable_summaries_for_segments is called directly in the task.
|
||||
# Only delete summaries if explicitly requested (e.g., when segment is actually deleted)
|
||||
delete_summaries = kwargs.get("delete_summaries", False)
|
||||
if delete_summaries:
|
||||
if node_ids:
|
||||
# Find segments by index_node_id
|
||||
segments = (
|
||||
db.session.query(DocumentSegment)
|
||||
.filter(
|
||||
DocumentSegment.dataset_id == dataset.id,
|
||||
DocumentSegment.index_node_id.in_(node_ids),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
if segment_ids:
|
||||
SummaryIndexService.delete_summaries_for_segments(dataset, segment_ids)
|
||||
else:
|
||||
# Delete all summaries for the dataset
|
||||
SummaryIndexService.delete_summaries_for_segments(dataset, None)
|
||||
|
||||
if dataset.indexing_technique == "high_quality":
|
||||
vector = Vector(dataset)
|
||||
if node_ids:
|
||||
@ -227,3 +270,303 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
||||
}
|
||||
else:
|
||||
raise ValueError("Chunks is not a list")
|
||||
|
||||
def generate_summary_preview(
|
||||
self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
|
||||
) -> list[PreviewDetail]:
|
||||
"""
|
||||
For each segment, concurrently call generate_summary to generate a summary
|
||||
and write it to the summary attribute of PreviewDetail.
|
||||
In preview mode (indexing-estimate), if any summary generation fails, the method will raise an exception.
|
||||
"""
|
||||
import concurrent.futures
|
||||
|
||||
from flask import current_app
|
||||
|
||||
# Capture Flask app context for worker threads
|
||||
flask_app = None
|
||||
try:
|
||||
flask_app = current_app._get_current_object() # type: ignore
|
||||
except RuntimeError:
|
||||
logger.warning("No Flask application context available, summary generation may fail")
|
||||
|
||||
def process(preview: PreviewDetail) -> None:
|
||||
"""Generate summary for a single preview item."""
|
||||
if flask_app:
|
||||
# Ensure Flask app context in worker thread
|
||||
with flask_app.app_context():
|
||||
summary = self.generate_summary(tenant_id, preview.content, summary_index_setting)
|
||||
preview.summary = summary
|
||||
else:
|
||||
# Fallback: try without app context (may fail)
|
||||
summary = self.generate_summary(tenant_id, preview.content, summary_index_setting)
|
||||
preview.summary = summary
|
||||
|
||||
# Generate summaries concurrently using ThreadPoolExecutor
|
||||
# Set a reasonable timeout to prevent hanging (60 seconds per chunk, max 5 minutes total)
|
||||
timeout_seconds = min(300, 60 * len(preview_texts))
|
||||
errors: list[Exception] = []
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=min(10, len(preview_texts))) as executor:
|
||||
futures = [
|
||||
executor.submit(process, preview)
|
||||
for preview in preview_texts
|
||||
]
|
||||
# Wait for all tasks to complete with timeout
|
||||
done, not_done = concurrent.futures.wait(futures, timeout=timeout_seconds)
|
||||
|
||||
# Cancel tasks that didn't complete in time
|
||||
if not_done:
|
||||
timeout_error_msg = (
|
||||
f"Summary generation timeout: {len(not_done)} chunks did not complete within {timeout_seconds}s"
|
||||
)
|
||||
logger.warning("%s. Cancelling remaining tasks...", timeout_error_msg)
|
||||
# In preview mode, timeout is also an error
|
||||
errors.append(TimeoutError(timeout_error_msg))
|
||||
for future in not_done:
|
||||
future.cancel()
|
||||
# Wait a bit for cancellation to take effect
|
||||
concurrent.futures.wait(not_done, timeout=5)
|
||||
|
||||
# Collect exceptions from completed futures
|
||||
for future in done:
|
||||
try:
|
||||
future.result() # This will raise any exception that occurred
|
||||
except Exception as e:
|
||||
logger.exception("Error in summary generation future")
|
||||
errors.append(e)
|
||||
|
||||
# In preview mode (indexing-estimate), if there are any errors, fail the request
|
||||
if errors:
|
||||
error_messages = [str(e) for e in errors]
|
||||
error_summary = (
|
||||
f"Failed to generate summaries for {len(errors)} chunk(s). "
|
||||
f"Errors: {'; '.join(error_messages[:3])}" # Show first 3 errors
|
||||
)
|
||||
if len(errors) > 3:
|
||||
error_summary += f" (and {len(errors) - 3} more)"
|
||||
logger.error("Summary generation failed in preview mode: %s", error_summary)
|
||||
raise ValueError(error_summary)
|
||||
|
||||
return preview_texts
|
||||
|
||||
@staticmethod
|
||||
def generate_summary(
|
||||
tenant_id: str,
|
||||
text: str,
|
||||
summary_index_setting: dict | None = None,
|
||||
segment_id: str | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Generate summary for the given text using ModelInstance.invoke_llm and the default or custom summary prompt,
|
||||
and supports vision models by including images from the segment attachments or text content.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant ID
|
||||
text: Text content to summarize
|
||||
summary_index_setting: Summary index configuration
|
||||
segment_id: Optional segment ID to fetch attachments from SegmentAttachmentBinding table
|
||||
"""
|
||||
if not summary_index_setting or not summary_index_setting.get("enable"):
|
||||
raise ValueError("summary_index_setting is required and must be enabled to generate summary.")
|
||||
|
||||
model_name = summary_index_setting.get("model_name")
|
||||
model_provider_name = summary_index_setting.get("model_provider_name")
|
||||
summary_prompt = summary_index_setting.get("summary_prompt")
|
||||
|
||||
# Import default summary prompt
|
||||
if not summary_prompt:
|
||||
summary_prompt = DEFAULT_GENERATOR_SUMMARY_PROMPT
|
||||
|
||||
provider_manager = ProviderManager()
|
||||
provider_model_bundle = provider_manager.get_provider_model_bundle(
|
||||
tenant_id, model_provider_name, ModelType.LLM
|
||||
)
|
||||
model_instance = ModelInstance(provider_model_bundle, model_name)
|
||||
|
||||
# Get model schema to check if vision is supported
|
||||
model_schema = model_instance.model_type_instance.get_model_schema(model_name, model_instance.credentials)
|
||||
supports_vision = model_schema and model_schema.features and ModelFeature.VISION in model_schema.features
|
||||
|
||||
# Extract images if model supports vision
|
||||
image_files = []
|
||||
if supports_vision:
|
||||
# First, try to get images from SegmentAttachmentBinding (preferred method)
|
||||
if segment_id:
|
||||
image_files = ParagraphIndexProcessor._extract_images_from_segment_attachments(tenant_id, segment_id)
|
||||
|
||||
# If no images from attachments, fall back to extracting from text
|
||||
if not image_files:
|
||||
image_files = ParagraphIndexProcessor._extract_images_from_text(tenant_id, text)
|
||||
|
||||
# Build prompt messages
|
||||
prompt_messages = []
|
||||
|
||||
if image_files:
|
||||
# If we have images, create a UserPromptMessage with both text and images
|
||||
prompt_message_contents: list[PromptMessageContentUnionTypes] = []
|
||||
|
||||
# Add images first
|
||||
for file in image_files:
|
||||
try:
|
||||
file_content = file_manager.to_prompt_message_content(
|
||||
file, image_detail_config=ImagePromptMessageContent.DETAIL.LOW
|
||||
)
|
||||
prompt_message_contents.append(file_content)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to convert image file to prompt message content: %s", str(e))
|
||||
continue
|
||||
|
||||
# Add text content
|
||||
if prompt_message_contents: # Only add text if we successfully added images
|
||||
prompt_message_contents.append(TextPromptMessageContent(data=f"{summary_prompt}\n{text}"))
|
||||
prompt_messages.append(UserPromptMessage(content=prompt_message_contents))
|
||||
else:
|
||||
# If image conversion failed, fall back to text-only
|
||||
prompt = f"{summary_prompt}\n{text}"
|
||||
prompt_messages.append(UserPromptMessage(content=prompt))
|
||||
else:
|
||||
# No images, use simple text prompt
|
||||
prompt = f"{summary_prompt}\n{text}"
|
||||
prompt_messages.append(UserPromptMessage(content=prompt))
|
||||
|
||||
result = model_instance.invoke_llm(prompt_messages=prompt_messages, model_parameters={}, stream=False)
|
||||
|
||||
return getattr(result.message, "content", "")
|
||||
|
||||
@staticmethod
|
||||
def _extract_images_from_text(tenant_id: str, text: str) -> list[File]:
|
||||
"""
|
||||
Extract images from markdown text and convert them to File objects.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant ID
|
||||
text: Text content that may contain markdown image links
|
||||
|
||||
Returns:
|
||||
List of File objects representing images found in the text
|
||||
"""
|
||||
# Extract markdown images using regex pattern
|
||||
pattern = r"!\[.*?\]\((.*?)\)"
|
||||
images = re.findall(pattern, text)
|
||||
|
||||
if not images:
|
||||
return []
|
||||
|
||||
upload_file_id_list = []
|
||||
|
||||
for image in images:
|
||||
# For data before v0.10.0
|
||||
pattern = r"/files/([a-f0-9\-]+)/image-preview(?:\?.*?)?"
|
||||
match = re.search(pattern, image)
|
||||
if match:
|
||||
upload_file_id = match.group(1)
|
||||
upload_file_id_list.append(upload_file_id)
|
||||
continue
|
||||
|
||||
# For data after v0.10.0
|
||||
pattern = r"/files/([a-f0-9\-]+)/file-preview(?:\?.*?)?"
|
||||
match = re.search(pattern, image)
|
||||
if match:
|
||||
upload_file_id = match.group(1)
|
||||
upload_file_id_list.append(upload_file_id)
|
||||
continue
|
||||
|
||||
# For tools directory - direct file formats (e.g., .png, .jpg, etc.)
|
||||
pattern = r"/files/tools/([a-f0-9\-]+)\.([a-zA-Z0-9]+)(?:\?[^\s\)\"\']*)?"
|
||||
match = re.search(pattern, image)
|
||||
if match:
|
||||
# Tool files are handled differently, skip for now
|
||||
continue
|
||||
|
||||
if not upload_file_id_list:
|
||||
return []
|
||||
|
||||
# Get unique IDs for database query
|
||||
unique_upload_file_ids = list(set(upload_file_id_list))
|
||||
upload_files = (
|
||||
db.session.query(UploadFile)
|
||||
.where(UploadFile.id.in_(unique_upload_file_ids), UploadFile.tenant_id == tenant_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Create File objects from UploadFile records
|
||||
file_objects = []
|
||||
for upload_file in upload_files:
|
||||
# Only process image files
|
||||
if not upload_file.mime_type or "image" not in upload_file.mime_type:
|
||||
continue
|
||||
|
||||
mapping = {
|
||||
"upload_file_id": upload_file.id,
|
||||
"transfer_method": FileTransferMethod.LOCAL_FILE.value,
|
||||
"type": FileType.IMAGE.value,
|
||||
}
|
||||
|
||||
try:
|
||||
file_obj = build_from_mapping(
|
||||
mapping=mapping,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
file_objects.append(file_obj)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to create File object from UploadFile %s: %s", upload_file.id, str(e))
|
||||
continue
|
||||
|
||||
return file_objects
|
||||
|
||||
@staticmethod
|
||||
def _extract_images_from_segment_attachments(tenant_id: str, segment_id: str) -> list[File]:
|
||||
"""
|
||||
Extract images from SegmentAttachmentBinding table (preferred method).
|
||||
This matches how DatasetRetrieval gets segment attachments.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant ID
|
||||
segment_id: Segment ID to fetch attachments for
|
||||
|
||||
Returns:
|
||||
List of File objects representing images found in segment attachments
|
||||
"""
|
||||
from sqlalchemy import select
|
||||
|
||||
# Query attachments from SegmentAttachmentBinding table
|
||||
attachments_with_bindings = db.session.execute(
|
||||
select(SegmentAttachmentBinding, UploadFile)
|
||||
.join(UploadFile, UploadFile.id == SegmentAttachmentBinding.attachment_id)
|
||||
.where(
|
||||
SegmentAttachmentBinding.segment_id == segment_id,
|
||||
SegmentAttachmentBinding.tenant_id == tenant_id,
|
||||
)
|
||||
).all()
|
||||
|
||||
if not attachments_with_bindings:
|
||||
return []
|
||||
|
||||
file_objects = []
|
||||
for _, upload_file in attachments_with_bindings:
|
||||
# Only process image files
|
||||
if not upload_file.mime_type or "image" not in upload_file.mime_type:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Create File object directly (similar to DatasetRetrieval)
|
||||
file_obj = File(
|
||||
id=upload_file.id,
|
||||
filename=upload_file.name,
|
||||
extension="." + upload_file.extension,
|
||||
mime_type=upload_file.mime_type,
|
||||
tenant_id=tenant_id,
|
||||
type=FileType.IMAGE,
|
||||
transfer_method=FileTransferMethod.LOCAL_FILE,
|
||||
remote_url=upload_file.source_url,
|
||||
related_id=upload_file.id,
|
||||
size=upload_file.size,
|
||||
storage_key=upload_file.key,
|
||||
)
|
||||
file_objects.append(file_obj)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to create File object from UploadFile %s: %s", upload_file.id, str(e))
|
||||
continue
|
||||
|
||||
return file_objects
|
||||
|
||||
@ -1,11 +1,13 @@
|
||||
"""Paragraph index processor."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from configs import dify_config
|
||||
from core.entities.knowledge_entities import PreviewDetail
|
||||
from core.model_manager import ModelInstance
|
||||
from core.rag.cleaner.clean_processor import CleanProcessor
|
||||
from core.rag.datasource.retrieval_service import RetrievalService
|
||||
@ -25,6 +27,9 @@ from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegm
|
||||
from models.dataset import Document as DatasetDocument
|
||||
from services.account_service import AccountService
|
||||
from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ParentChildIndexProcessor(BaseIndexProcessor):
|
||||
@ -135,6 +140,29 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
|
||||
|
||||
def clean(self, dataset: Dataset, node_ids: list[str] | None, with_keywords: bool = True, **kwargs):
|
||||
# node_ids is segment's node_ids
|
||||
# Note: Summary indexes are now disabled (not deleted) when segments are disabled.
|
||||
# This method is called for actual deletion scenarios (e.g., when segment is deleted).
|
||||
# For disable operations, disable_summaries_for_segments is called directly in the task.
|
||||
# Only delete summaries if explicitly requested (e.g., when segment is actually deleted)
|
||||
delete_summaries = kwargs.get("delete_summaries", False)
|
||||
if delete_summaries:
|
||||
if node_ids:
|
||||
# Find segments by index_node_id
|
||||
segments = (
|
||||
db.session.query(DocumentSegment)
|
||||
.filter(
|
||||
DocumentSegment.dataset_id == dataset.id,
|
||||
DocumentSegment.index_node_id.in_(node_ids),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
if segment_ids:
|
||||
SummaryIndexService.delete_summaries_for_segments(dataset, segment_ids)
|
||||
else:
|
||||
# Delete all summaries for the dataset
|
||||
SummaryIndexService.delete_summaries_for_segments(dataset, None)
|
||||
|
||||
if dataset.indexing_technique == "high_quality":
|
||||
delete_child_chunks = kwargs.get("delete_child_chunks") or False
|
||||
precomputed_child_node_ids = kwargs.get("precomputed_child_node_ids")
|
||||
@ -326,3 +354,93 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
|
||||
"preview": preview,
|
||||
"total_segments": len(parent_childs.parent_child_chunks),
|
||||
}
|
||||
|
||||
def generate_summary_preview(
|
||||
self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
|
||||
) -> list[PreviewDetail]:
|
||||
"""
|
||||
For each parent chunk in preview_texts, concurrently call generate_summary to generate a summary
|
||||
and write it to the summary attribute of PreviewDetail.
|
||||
In preview mode (indexing-estimate), if any summary generation fails, the method will raise an exception.
|
||||
|
||||
Note: For parent-child structure, we only generate summaries for parent chunks.
|
||||
"""
|
||||
import concurrent.futures
|
||||
|
||||
from flask import current_app
|
||||
|
||||
# Capture Flask app context for worker threads
|
||||
flask_app = None
|
||||
try:
|
||||
flask_app = current_app._get_current_object() # type: ignore
|
||||
except RuntimeError:
|
||||
logger.warning("No Flask application context available, summary generation may fail")
|
||||
|
||||
def process(preview: PreviewDetail) -> None:
|
||||
"""Generate summary for a single preview item (parent chunk)."""
|
||||
from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
|
||||
if flask_app:
|
||||
# Ensure Flask app context in worker thread
|
||||
with flask_app.app_context():
|
||||
summary = ParagraphIndexProcessor.generate_summary(
|
||||
tenant_id=tenant_id,
|
||||
text=preview.content,
|
||||
summary_index_setting=summary_index_setting,
|
||||
)
|
||||
preview.summary = summary
|
||||
else:
|
||||
# Fallback: try without app context (may fail)
|
||||
summary = ParagraphIndexProcessor.generate_summary(
|
||||
tenant_id=tenant_id,
|
||||
text=preview.content,
|
||||
summary_index_setting=summary_index_setting,
|
||||
)
|
||||
preview.summary = summary
|
||||
|
||||
# Generate summaries concurrently using ThreadPoolExecutor
|
||||
# Set a reasonable timeout to prevent hanging (60 seconds per chunk, max 5 minutes total)
|
||||
timeout_seconds = min(300, 60 * len(preview_texts))
|
||||
errors: list[Exception] = []
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=min(10, len(preview_texts))) as executor:
|
||||
futures = [
|
||||
executor.submit(process, preview)
|
||||
for preview in preview_texts
|
||||
]
|
||||
# Wait for all tasks to complete with timeout
|
||||
done, not_done = concurrent.futures.wait(futures, timeout=timeout_seconds)
|
||||
|
||||
# Cancel tasks that didn't complete in time
|
||||
if not_done:
|
||||
timeout_error_msg = (
|
||||
f"Summary generation timeout: {len(not_done)} chunks did not complete within {timeout_seconds}s"
|
||||
)
|
||||
logger.warning("%s. Cancelling remaining tasks...", timeout_error_msg)
|
||||
# In preview mode, timeout is also an error
|
||||
errors.append(TimeoutError(timeout_error_msg))
|
||||
for future in not_done:
|
||||
future.cancel()
|
||||
# Wait a bit for cancellation to take effect
|
||||
concurrent.futures.wait(not_done, timeout=5)
|
||||
|
||||
# Collect exceptions from completed futures
|
||||
for future in done:
|
||||
try:
|
||||
future.result() # This will raise any exception that occurred
|
||||
except Exception as e:
|
||||
logger.exception("Error in summary generation future")
|
||||
errors.append(e)
|
||||
|
||||
# In preview mode (indexing-estimate), if there are any errors, fail the request
|
||||
if errors:
|
||||
error_messages = [str(e) for e in errors]
|
||||
error_summary = (
|
||||
f"Failed to generate summaries for {len(errors)} chunk(s). "
|
||||
f"Errors: {'; '.join(error_messages[:3])}" # Show first 3 errors
|
||||
)
|
||||
if len(errors) > 3:
|
||||
error_summary += f" (and {len(errors) - 3} more)"
|
||||
logger.error("Summary generation failed in preview mode: %s", error_summary)
|
||||
raise ValueError(error_summary)
|
||||
|
||||
return preview_texts
|
||||
|
||||
@ -11,6 +11,7 @@ import pandas as pd
|
||||
from flask import Flask, current_app
|
||||
from werkzeug.datastructures import FileStorage
|
||||
|
||||
from core.entities.knowledge_entities import PreviewDetail
|
||||
from core.llm_generator.llm_generator import LLMGenerator
|
||||
from core.rag.cleaner.clean_processor import CleanProcessor
|
||||
from core.rag.datasource.retrieval_service import RetrievalService
|
||||
@ -25,9 +26,10 @@ from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||
from core.tools.utils.text_processing_utils import remove_leading_symbols
|
||||
from libs import helper
|
||||
from models.account import Account
|
||||
from models.dataset import Dataset
|
||||
from models.dataset import Dataset, DocumentSegment
|
||||
from models.dataset import Document as DatasetDocument
|
||||
from services.entities.knowledge_entities.knowledge_entities import Rule
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -144,6 +146,30 @@ class QAIndexProcessor(BaseIndexProcessor):
|
||||
vector.create_multimodal(multimodal_documents)
|
||||
|
||||
def clean(self, dataset: Dataset, node_ids: list[str] | None, with_keywords: bool = True, **kwargs):
|
||||
# Note: Summary indexes are now disabled (not deleted) when segments are disabled.
|
||||
# This method is called for actual deletion scenarios (e.g., when segment is deleted).
|
||||
# For disable operations, disable_summaries_for_segments is called directly in the task.
|
||||
# Note: qa_model doesn't generate summaries, but we clean them for completeness
|
||||
# Only delete summaries if explicitly requested (e.g., when segment is actually deleted)
|
||||
delete_summaries = kwargs.get("delete_summaries", False)
|
||||
if delete_summaries:
|
||||
if node_ids:
|
||||
# Find segments by index_node_id
|
||||
segments = (
|
||||
db.session.query(DocumentSegment)
|
||||
.filter(
|
||||
DocumentSegment.dataset_id == dataset.id,
|
||||
DocumentSegment.index_node_id.in_(node_ids),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
if segment_ids:
|
||||
SummaryIndexService.delete_summaries_for_segments(dataset, segment_ids)
|
||||
else:
|
||||
# Delete all summaries for the dataset
|
||||
SummaryIndexService.delete_summaries_for_segments(dataset, None)
|
||||
|
||||
vector = Vector(dataset)
|
||||
if node_ids:
|
||||
vector.delete_by_ids(node_ids)
|
||||
@ -212,6 +238,17 @@ class QAIndexProcessor(BaseIndexProcessor):
|
||||
"total_segments": len(qa_chunks.qa_chunks),
|
||||
}
|
||||
|
||||
def generate_summary_preview(
|
||||
self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
|
||||
) -> list[PreviewDetail]:
|
||||
"""
|
||||
QA model doesn't generate summaries, so this method returns preview_texts unchanged.
|
||||
|
||||
Note: QA model uses question-answer pairs, which don't require summary generation.
|
||||
"""
|
||||
# QA model doesn't generate summaries, return as-is
|
||||
return preview_texts
|
||||
|
||||
def _format_qa_document(self, flask_app: Flask, tenant_id: str, document_node, all_qa_documents, document_language):
|
||||
format_documents = []
|
||||
if document_node.page_content is None or not document_node.page_content.strip():
|
||||
|
||||
@ -62,6 +62,21 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
|
||||
inputs = {"variable_selector": variable_selector}
|
||||
process_data = {"documents": value if isinstance(value, list) else [value]}
|
||||
|
||||
# Ensure storage_key is loaded for File objects
|
||||
files_to_check = value if isinstance(value, list) else [value]
|
||||
files_needing_storage_key = [
|
||||
f for f in files_to_check if isinstance(f, File) and not f.storage_key and f.related_id
|
||||
]
|
||||
if files_needing_storage_key:
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from extensions.ext_database import db
|
||||
from factories.file_factory import StorageKeyLoader
|
||||
|
||||
with Session(bind=db.engine) as session:
|
||||
storage_key_loader = StorageKeyLoader(session, tenant_id=self.tenant_id)
|
||||
storage_key_loader.load_storage_keys(files_needing_storage_key)
|
||||
|
||||
try:
|
||||
if isinstance(value, list):
|
||||
extracted_text_list = list(map(_extract_text_from_file, value))
|
||||
@ -415,6 +430,16 @@ def _download_file_content(file: File) -> bytes:
|
||||
response.raise_for_status()
|
||||
return response.content
|
||||
else:
|
||||
# Check if storage_key is set
|
||||
if not file.storage_key:
|
||||
raise FileDownloadError(f"File storage_key is missing for file: {file.filename}")
|
||||
|
||||
# Check if file exists before downloading
|
||||
from extensions.ext_storage import storage
|
||||
|
||||
if not storage.exists(file.storage_key):
|
||||
raise FileDownloadError(f"File not found in storage: {file.storage_key}")
|
||||
|
||||
return file_manager.download(file)
|
||||
except Exception as e:
|
||||
raise FileDownloadError(f"Error downloading file: {str(e)}") from e
|
||||
|
||||
@ -158,3 +158,5 @@ class KnowledgeIndexNodeData(BaseNodeData):
|
||||
type: str = "knowledge-index"
|
||||
chunk_structure: str
|
||||
index_chunk_variable_selector: list[str]
|
||||
indexing_technique: str | None = None
|
||||
summary_index_setting: dict | None = None
|
||||
|
||||
@ -1,9 +1,11 @@
|
||||
import concurrent.futures
|
||||
import datetime
|
||||
import logging
|
||||
import time
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from flask import current_app
|
||||
from sqlalchemy import func, select
|
||||
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||
@ -16,7 +18,9 @@ from core.workflow.nodes.base.node import Node
|
||||
from core.workflow.nodes.base.template import Template
|
||||
from core.workflow.runtime import VariablePool
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset, Document, DocumentSegment
|
||||
from models.dataset import Dataset, Document, DocumentSegment, DocumentSegmentSummary
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
from tasks.generate_summary_index_task import generate_summary_index_task
|
||||
|
||||
from .entities import KnowledgeIndexNodeData
|
||||
from .exc import (
|
||||
@ -67,7 +71,20 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
|
||||
# index knowledge
|
||||
try:
|
||||
if is_preview:
|
||||
outputs = self._get_preview_output(node_data.chunk_structure, chunks)
|
||||
# Preview mode: generate summaries for chunks directly without saving to database
|
||||
# Format preview and generate summaries on-the-fly
|
||||
# Get indexing_technique and summary_index_setting from node_data (workflow graph config)
|
||||
# or fallback to dataset if not available in node_data
|
||||
indexing_technique = node_data.indexing_technique or dataset.indexing_technique
|
||||
summary_index_setting = node_data.summary_index_setting or dataset.summary_index_setting
|
||||
|
||||
outputs = self._get_preview_output_with_summaries(
|
||||
node_data.chunk_structure,
|
||||
chunks,
|
||||
dataset=dataset,
|
||||
indexing_technique=indexing_technique,
|
||||
summary_index_setting=summary_index_setting,
|
||||
)
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
inputs=variables,
|
||||
@ -163,6 +180,9 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# Generate summary index if enabled
|
||||
self._handle_summary_index_generation(dataset, document, variable_pool)
|
||||
|
||||
return {
|
||||
"dataset_id": ds_id_value,
|
||||
"dataset_name": dataset_name_value,
|
||||
@ -173,9 +193,307 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
|
||||
"display_status": "completed",
|
||||
}
|
||||
|
||||
def _get_preview_output(self, chunk_structure: str, chunks: Any) -> Mapping[str, Any]:
|
||||
def _handle_summary_index_generation(
|
||||
self,
|
||||
dataset: Dataset,
|
||||
document: Document,
|
||||
variable_pool: VariablePool,
|
||||
) -> None:
|
||||
"""
|
||||
Handle summary index generation based on mode (debug/preview or production).
|
||||
|
||||
Args:
|
||||
dataset: Dataset containing the document
|
||||
document: Document to generate summaries for
|
||||
variable_pool: Variable pool to check invoke_from
|
||||
"""
|
||||
# Only generate summary index for high_quality indexing technique
|
||||
if dataset.indexing_technique != "high_quality":
|
||||
return
|
||||
|
||||
# Check if summary index is enabled
|
||||
summary_index_setting = dataset.summary_index_setting
|
||||
if not summary_index_setting or not summary_index_setting.get("enable"):
|
||||
return
|
||||
|
||||
# Skip qa_model documents
|
||||
if document.doc_form == "qa_model":
|
||||
return
|
||||
|
||||
# Determine if in preview/debug mode
|
||||
invoke_from = variable_pool.get(["sys", SystemVariableKey.INVOKE_FROM])
|
||||
is_preview = invoke_from and invoke_from.value == InvokeFrom.DEBUGGER
|
||||
|
||||
# Determine if only parent chunks should be processed
|
||||
only_parent_chunks = dataset.chunk_structure == "parent_child_index"
|
||||
|
||||
if is_preview:
|
||||
try:
|
||||
# Query segments that need summary generation
|
||||
query = db.session.query(DocumentSegment).filter_by(
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
status="completed",
|
||||
enabled=True,
|
||||
)
|
||||
segments = query.all()
|
||||
|
||||
if not segments:
|
||||
logger.info("No segments found for document %s", document.id)
|
||||
return
|
||||
|
||||
# Filter segments based on mode
|
||||
segments_to_process = []
|
||||
for segment in segments:
|
||||
# Skip if summary already exists
|
||||
existing_summary = (
|
||||
db.session.query(DocumentSegmentSummary)
|
||||
.filter_by(chunk_id=segment.id, dataset_id=dataset.id, status="completed")
|
||||
.first()
|
||||
)
|
||||
if existing_summary:
|
||||
continue
|
||||
|
||||
# For parent-child mode, all segments are parent chunks, so process all
|
||||
segments_to_process.append(segment)
|
||||
|
||||
if not segments_to_process:
|
||||
logger.info("No segments need summary generation for document %s", document.id)
|
||||
return
|
||||
|
||||
# Use ThreadPoolExecutor for concurrent generation
|
||||
flask_app = current_app._get_current_object() # type: ignore
|
||||
max_workers = min(10, len(segments_to_process)) # Limit to 10 workers
|
||||
|
||||
def process_segment(segment: DocumentSegment) -> None:
|
||||
"""Process a single segment in a thread with Flask app context."""
|
||||
with flask_app.app_context():
|
||||
try:
|
||||
SummaryIndexService.generate_and_vectorize_summary(segment, dataset, summary_index_setting)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to generate summary for segment %s",
|
||||
segment.id,
|
||||
)
|
||||
# Continue processing other segments
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
futures = [executor.submit(process_segment, segment) for segment in segments_to_process]
|
||||
# Wait for all tasks to complete
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
logger.info(
|
||||
"Successfully generated summary index for %s segments in document %s",
|
||||
len(segments_to_process),
|
||||
document.id,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Failed to generate summary index for document %s", document.id)
|
||||
# Don't fail the entire indexing process if summary generation fails
|
||||
else:
|
||||
# Production mode: asynchronous generation
|
||||
logger.info(
|
||||
"Queuing summary index generation task for document %s (production mode)",
|
||||
document.id,
|
||||
)
|
||||
try:
|
||||
generate_summary_index_task.delay(dataset.id, document.id, None)
|
||||
logger.info("Summary index generation task queued for document %s", document.id)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to queue summary index generation task for document %s",
|
||||
document.id,
|
||||
)
|
||||
# Don't fail the entire indexing process if task queuing fails
|
||||
|
||||
def _get_preview_output_with_summaries(
|
||||
self,
|
||||
chunk_structure: str,
|
||||
chunks: Any,
|
||||
dataset: Dataset,
|
||||
indexing_technique: str | None = None,
|
||||
summary_index_setting: dict | None = None,
|
||||
) -> Mapping[str, Any]:
|
||||
"""
|
||||
Generate preview output with summaries for chunks in preview mode.
|
||||
This method generates summaries on-the-fly without saving to database.
|
||||
|
||||
Args:
|
||||
chunk_structure: Chunk structure type
|
||||
chunks: Chunks to generate preview for
|
||||
dataset: Dataset object (for tenant_id)
|
||||
indexing_technique: Indexing technique from node config or dataset
|
||||
summary_index_setting: Summary index setting from node config or dataset
|
||||
"""
|
||||
index_processor = IndexProcessorFactory(chunk_structure).init_index_processor()
|
||||
return index_processor.format_preview(chunks)
|
||||
preview_output = index_processor.format_preview(chunks)
|
||||
|
||||
# Check if summary index is enabled
|
||||
if indexing_technique != "high_quality":
|
||||
return preview_output
|
||||
|
||||
if not summary_index_setting or not summary_index_setting.get("enable"):
|
||||
return preview_output
|
||||
|
||||
# Generate summaries for chunks
|
||||
if "preview" in preview_output and isinstance(preview_output["preview"], list):
|
||||
chunk_count = len(preview_output["preview"])
|
||||
logger.info(
|
||||
"Generating summaries for %s chunks in preview mode (dataset: %s)",
|
||||
chunk_count,
|
||||
dataset.id,
|
||||
)
|
||||
# Use ParagraphIndexProcessor's generate_summary method
|
||||
from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
|
||||
|
||||
# Get Flask app for application context in worker threads
|
||||
flask_app = None
|
||||
try:
|
||||
flask_app = current_app._get_current_object() # type: ignore
|
||||
except RuntimeError:
|
||||
logger.warning("No Flask application context available, summary generation may fail")
|
||||
|
||||
def generate_summary_for_chunk(preview_item: dict) -> None:
|
||||
"""Generate summary for a single chunk."""
|
||||
if "content" in preview_item:
|
||||
# Set Flask application context in worker thread
|
||||
if flask_app:
|
||||
with flask_app.app_context():
|
||||
summary = ParagraphIndexProcessor.generate_summary(
|
||||
tenant_id=dataset.tenant_id,
|
||||
text=preview_item["content"],
|
||||
summary_index_setting=summary_index_setting,
|
||||
)
|
||||
if summary:
|
||||
preview_item["summary"] = summary
|
||||
else:
|
||||
# Fallback: try without app context (may fail)
|
||||
summary = ParagraphIndexProcessor.generate_summary(
|
||||
tenant_id=dataset.tenant_id,
|
||||
text=preview_item["content"],
|
||||
summary_index_setting=summary_index_setting,
|
||||
)
|
||||
if summary:
|
||||
preview_item["summary"] = summary
|
||||
|
||||
# Generate summaries concurrently using ThreadPoolExecutor
|
||||
# Set a reasonable timeout to prevent hanging (60 seconds per chunk, max 5 minutes total)
|
||||
timeout_seconds = min(300, 60 * len(preview_output["preview"]))
|
||||
errors: list[Exception] = []
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=min(10, len(preview_output["preview"]))) as executor:
|
||||
futures = [
|
||||
executor.submit(generate_summary_for_chunk, preview_item)
|
||||
for preview_item in preview_output["preview"]
|
||||
]
|
||||
# Wait for all tasks to complete with timeout
|
||||
done, not_done = concurrent.futures.wait(futures, timeout=timeout_seconds)
|
||||
|
||||
# Cancel tasks that didn't complete in time
|
||||
if not_done:
|
||||
timeout_error_msg = (
|
||||
f"Summary generation timeout: {len(not_done)} chunks did not complete within {timeout_seconds}s"
|
||||
)
|
||||
logger.warning("%s. Cancelling remaining tasks...", timeout_error_msg)
|
||||
# In preview mode, timeout is also an error
|
||||
errors.append(TimeoutError(timeout_error_msg))
|
||||
for future in not_done:
|
||||
future.cancel()
|
||||
# Wait a bit for cancellation to take effect
|
||||
concurrent.futures.wait(not_done, timeout=5)
|
||||
|
||||
# Collect exceptions from completed futures
|
||||
for future in done:
|
||||
try:
|
||||
future.result() # This will raise any exception that occurred
|
||||
except Exception as e:
|
||||
logger.exception("Error in summary generation future")
|
||||
errors.append(e)
|
||||
|
||||
# In preview mode, if there are any errors, fail the request
|
||||
if errors:
|
||||
error_messages = [str(e) for e in errors]
|
||||
error_summary = (
|
||||
f"Failed to generate summaries for {len(errors)} chunk(s). "
|
||||
f"Errors: {'; '.join(error_messages[:3])}" # Show first 3 errors
|
||||
)
|
||||
if len(errors) > 3:
|
||||
error_summary += f" (and {len(errors) - 3} more)"
|
||||
logger.error("Summary generation failed in preview mode: %s", error_summary)
|
||||
raise KnowledgeIndexNodeError(error_summary)
|
||||
|
||||
completed_count = sum(1 for item in preview_output["preview"] if item.get("summary") is not None)
|
||||
logger.info(
|
||||
"Completed summary generation for preview chunks: %s/%s succeeded",
|
||||
completed_count,
|
||||
len(preview_output["preview"]),
|
||||
)
|
||||
|
||||
return preview_output
|
||||
|
||||
def _get_preview_output(
|
||||
self,
|
||||
chunk_structure: str,
|
||||
chunks: Any,
|
||||
dataset: Dataset | None = None,
|
||||
variable_pool: VariablePool | None = None,
|
||||
) -> Mapping[str, Any]:
|
||||
index_processor = IndexProcessorFactory(chunk_structure).init_index_processor()
|
||||
preview_output = index_processor.format_preview(chunks)
|
||||
|
||||
# If dataset is provided, try to enrich preview with summaries
|
||||
if dataset and variable_pool:
|
||||
document_id = variable_pool.get(["sys", SystemVariableKey.DOCUMENT_ID])
|
||||
if document_id:
|
||||
document = db.session.query(Document).filter_by(id=document_id.value).first()
|
||||
if document:
|
||||
# Query summaries for this document
|
||||
summaries = (
|
||||
db.session.query(DocumentSegmentSummary)
|
||||
.filter_by(
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
status="completed",
|
||||
enabled=True,
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
if summaries:
|
||||
# Create a map of segment content to summary for matching
|
||||
# Use content matching as chunks in preview might not be indexed yet
|
||||
summary_by_content = {}
|
||||
for summary in summaries:
|
||||
segment = (
|
||||
db.session.query(DocumentSegment)
|
||||
.filter_by(id=summary.chunk_id, dataset_id=dataset.id)
|
||||
.first()
|
||||
)
|
||||
if segment:
|
||||
# Normalize content for matching (strip whitespace)
|
||||
normalized_content = segment.content.strip()
|
||||
summary_by_content[normalized_content] = summary.summary_content
|
||||
|
||||
# Enrich preview with summaries by content matching
|
||||
if "preview" in preview_output and isinstance(preview_output["preview"], list):
|
||||
matched_count = 0
|
||||
for preview_item in preview_output["preview"]:
|
||||
if "content" in preview_item:
|
||||
# Normalize content for matching
|
||||
normalized_chunk_content = preview_item["content"].strip()
|
||||
if normalized_chunk_content in summary_by_content:
|
||||
preview_item["summary"] = summary_by_content[normalized_chunk_content]
|
||||
matched_count += 1
|
||||
|
||||
if matched_count > 0:
|
||||
logger.info(
|
||||
"Enriched preview with %s existing summaries (dataset: %s, document: %s)",
|
||||
matched_count,
|
||||
dataset.id,
|
||||
document.id,
|
||||
)
|
||||
|
||||
return preview_output
|
||||
|
||||
@classmethod
|
||||
def version(cls) -> str:
|
||||
|
||||
@ -102,6 +102,8 @@ def init_app(app: DifyApp) -> Celery:
|
||||
imports = [
|
||||
"tasks.async_workflow_tasks", # trigger workers
|
||||
"tasks.trigger_processing_tasks", # async trigger processing
|
||||
"tasks.generate_summary_index_task", # summary index generation
|
||||
"tasks.regenerate_summary_index_task", # summary index regeneration
|
||||
]
|
||||
day = dify_config.CELERY_BEAT_SCHEDULER_TIME
|
||||
|
||||
|
||||
@ -39,6 +39,14 @@ dataset_retrieval_model_fields = {
|
||||
"score_threshold_enabled": fields.Boolean,
|
||||
"score_threshold": fields.Float,
|
||||
}
|
||||
|
||||
dataset_summary_index_fields = {
|
||||
"enable": fields.Boolean,
|
||||
"model_name": fields.String,
|
||||
"model_provider_name": fields.String,
|
||||
"summary_prompt": fields.String,
|
||||
}
|
||||
|
||||
external_retrieval_model_fields = {
|
||||
"top_k": fields.Integer,
|
||||
"score_threshold": fields.Float,
|
||||
@ -83,6 +91,7 @@ dataset_detail_fields = {
|
||||
"embedding_model_provider": fields.String,
|
||||
"embedding_available": fields.Boolean,
|
||||
"retrieval_model_dict": fields.Nested(dataset_retrieval_model_fields),
|
||||
"summary_index_setting": fields.Nested(dataset_summary_index_fields),
|
||||
"tags": fields.List(fields.Nested(tag_fields)),
|
||||
"doc_form": fields.String,
|
||||
"external_knowledge_info": fields.Nested(external_knowledge_info_fields),
|
||||
|
||||
@ -33,6 +33,11 @@ document_fields = {
|
||||
"hit_count": fields.Integer,
|
||||
"doc_form": fields.String,
|
||||
"doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"),
|
||||
# Summary index generation status:
|
||||
# "SUMMARIZING" (when task is queued and generating)
|
||||
"summary_index_status": fields.String,
|
||||
# Whether this document needs summary index generation
|
||||
"need_summary": fields.Boolean,
|
||||
}
|
||||
|
||||
document_with_segments_fields = {
|
||||
@ -60,6 +65,10 @@ document_with_segments_fields = {
|
||||
"completed_segments": fields.Integer,
|
||||
"total_segments": fields.Integer,
|
||||
"doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"),
|
||||
# Summary index generation status:
|
||||
# "SUMMARIZING" (when task is queued and generating)
|
||||
"summary_index_status": fields.String,
|
||||
"need_summary": fields.Boolean, # Whether this document needs summary index generation
|
||||
}
|
||||
|
||||
dataset_and_document_fields = {
|
||||
|
||||
@ -58,4 +58,5 @@ hit_testing_record_fields = {
|
||||
"score": fields.Float,
|
||||
"tsne_position": fields.Raw,
|
||||
"files": fields.List(fields.Nested(files_fields)),
|
||||
"summary": fields.String, # Summary content if retrieved via summary index
|
||||
}
|
||||
|
||||
@ -49,4 +49,5 @@ segment_fields = {
|
||||
"stopped_at": TimestampField,
|
||||
"child_chunks": fields.List(fields.Nested(child_chunk_fields)),
|
||||
"attachments": fields.List(fields.Nested(attachment_fields)),
|
||||
"summary": fields.String, # Summary content for the segment
|
||||
}
|
||||
|
||||
@ -0,0 +1,69 @@
|
||||
"""add SummaryIndex feature
|
||||
|
||||
Revision ID: 562dcce7d77c
|
||||
Revises: 03ea244985ce
|
||||
Create Date: 2026-01-12 13:58:40.584802
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import models as models
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '562dcce7d77c'
|
||||
down_revision = '03ea244985ce'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_table('document_segment_summary',
|
||||
sa.Column('id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('dataset_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('document_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('chunk_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('summary_content', models.types.LongText(), nullable=True),
|
||||
sa.Column('summary_index_node_id', sa.String(length=255), nullable=True),
|
||||
sa.Column('summary_index_node_hash', sa.String(length=255), nullable=True),
|
||||
sa.Column('status', sa.String(length=32), server_default=sa.text("'generating'"), nullable=False),
|
||||
sa.Column('error', models.types.LongText(), nullable=True),
|
||||
sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False),
|
||||
sa.Column('disabled_at', sa.DateTime(), nullable=True),
|
||||
sa.Column('disabled_by', models.types.StringUUID(), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
|
||||
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id', name='document_segment_summary_pkey')
|
||||
)
|
||||
with op.batch_alter_table('document_segment_summary', schema=None) as batch_op:
|
||||
batch_op.create_index('document_segment_summary_chunk_id_idx', ['chunk_id'], unique=False)
|
||||
batch_op.create_index('document_segment_summary_dataset_id_idx', ['dataset_id'], unique=False)
|
||||
batch_op.create_index('document_segment_summary_document_id_idx', ['document_id'], unique=False)
|
||||
batch_op.create_index('document_segment_summary_status_idx', ['status'], unique=False)
|
||||
|
||||
with op.batch_alter_table('datasets', schema=None) as batch_op:
|
||||
batch_op.add_column(sa.Column('summary_index_setting', models.types.AdjustedJSON(), nullable=True))
|
||||
|
||||
with op.batch_alter_table('documents', schema=None) as batch_op:
|
||||
batch_op.add_column(sa.Column('need_summary', sa.Boolean(), server_default=sa.text('false'), nullable=True))
|
||||
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('documents', schema=None) as batch_op:
|
||||
batch_op.drop_column('need_summary')
|
||||
|
||||
with op.batch_alter_table('datasets', schema=None) as batch_op:
|
||||
batch_op.drop_column('summary_index_setting')
|
||||
|
||||
with op.batch_alter_table('document_segment_summary', schema=None) as batch_op:
|
||||
batch_op.drop_index('document_segment_summary_status_idx')
|
||||
batch_op.drop_index('document_segment_summary_document_id_idx')
|
||||
batch_op.drop_index('document_segment_summary_dataset_id_idx')
|
||||
batch_op.drop_index('document_segment_summary_chunk_id_idx')
|
||||
|
||||
op.drop_table('document_segment_summary')
|
||||
# ### end Alembic commands ###
|
||||
@ -0,0 +1,73 @@
|
||||
"""add table explore banner and trial
|
||||
|
||||
Revision ID: f9f6d18a37f9
|
||||
Revises: 288345cd01d1
|
||||
Create Date: 2026-01-017 11:10:18.079355
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import models as models
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = 'f9f6d18a37f9'
|
||||
down_revision = '288345cd01d1'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_table('account_trial_app_records',
|
||||
sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
|
||||
sa.Column('account_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('app_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('count', sa.Integer(), nullable=False),
|
||||
sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id', name='user_trial_app_pkey'),
|
||||
sa.UniqueConstraint('account_id', 'app_id', name='unique_account_trial_app_record')
|
||||
)
|
||||
with op.batch_alter_table('account_trial_app_records', schema=None) as batch_op:
|
||||
batch_op.create_index('account_trial_app_record_account_id_idx', ['account_id'], unique=False)
|
||||
batch_op.create_index('account_trial_app_record_app_id_idx', ['app_id'], unique=False)
|
||||
|
||||
op.create_table('exporle_banners',
|
||||
sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
|
||||
sa.Column('content', sa.JSON(), nullable=False),
|
||||
sa.Column('link', sa.String(length=255), nullable=False),
|
||||
sa.Column('sort', sa.Integer(), nullable=False),
|
||||
sa.Column('status', sa.String(length=255), server_default=sa.text("'enabled'::character varying"), nullable=False),
|
||||
sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
|
||||
sa.Column('language', sa.String(length=255), server_default=sa.text("'en-US'::character varying"), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id', name='exporler_banner_pkey')
|
||||
)
|
||||
op.create_table('trial_apps',
|
||||
sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
|
||||
sa.Column('app_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
|
||||
sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
|
||||
sa.Column('trial_limit', sa.Integer(), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id', name='trial_app_pkey'),
|
||||
sa.UniqueConstraint('app_id', name='unique_trail_app_id')
|
||||
)
|
||||
with op.batch_alter_table('trial_apps', schema=None) as batch_op:
|
||||
batch_op.create_index('trial_app_app_id_idx', ['app_id'], unique=False)
|
||||
batch_op.create_index('trial_app_tenant_id_idx', ['tenant_id'], unique=False)
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('trial_apps', schema=None) as batch_op:
|
||||
batch_op.drop_index('trial_app_tenant_id_idx')
|
||||
batch_op.drop_index('trial_app_app_id_idx')
|
||||
|
||||
op.drop_table('trial_apps')
|
||||
op.drop_table('exporle_banners')
|
||||
with op.batch_alter_table('account_trial_app_records', schema=None) as batch_op:
|
||||
batch_op.drop_index('account_trial_app_record_app_id_idx')
|
||||
batch_op.drop_index('account_trial_app_record_account_id_idx')
|
||||
|
||||
op.drop_table('account_trial_app_records')
|
||||
# ### end Alembic commands ###
|
||||
@ -35,6 +35,7 @@ from .enums import (
|
||||
WorkflowTriggerStatus,
|
||||
)
|
||||
from .model import (
|
||||
AccountTrialAppRecord,
|
||||
ApiRequest,
|
||||
ApiToken,
|
||||
App,
|
||||
@ -47,6 +48,7 @@ from .model import (
|
||||
DatasetRetrieverResource,
|
||||
DifySetup,
|
||||
EndUser,
|
||||
ExporleBanner,
|
||||
IconType,
|
||||
InstalledApp,
|
||||
Message,
|
||||
@ -62,6 +64,7 @@ from .model import (
|
||||
TagBinding,
|
||||
TenantCreditPool,
|
||||
TraceAppConfig,
|
||||
TrialApp,
|
||||
UploadFile,
|
||||
)
|
||||
from .oauth import DatasourceOauthParamConfig, DatasourceProvider
|
||||
@ -114,6 +117,7 @@ __all__ = [
|
||||
"Account",
|
||||
"AccountIntegrate",
|
||||
"AccountStatus",
|
||||
"AccountTrialAppRecord",
|
||||
"ApiRequest",
|
||||
"ApiToken",
|
||||
"ApiToolProvider",
|
||||
@ -150,6 +154,7 @@ __all__ = [
|
||||
"DocumentSegment",
|
||||
"Embedding",
|
||||
"EndUser",
|
||||
"ExporleBanner",
|
||||
"ExternalKnowledgeApis",
|
||||
"ExternalKnowledgeBindings",
|
||||
"IconType",
|
||||
@ -188,6 +193,7 @@ __all__ = [
|
||||
"ToolLabelBinding",
|
||||
"ToolModelInvoke",
|
||||
"TraceAppConfig",
|
||||
"TrialApp",
|
||||
"TriggerOAuthSystemClient",
|
||||
"TriggerOAuthTenantClient",
|
||||
"TriggerSubscription",
|
||||
|
||||
@ -72,6 +72,7 @@ class Dataset(Base):
|
||||
keyword_number = mapped_column(sa.Integer, nullable=True, server_default=sa.text("10"))
|
||||
collection_binding_id = mapped_column(StringUUID, nullable=True)
|
||||
retrieval_model = mapped_column(AdjustedJSON, nullable=True)
|
||||
summary_index_setting = mapped_column(AdjustedJSON, nullable=True)
|
||||
built_in_field_enabled = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false"))
|
||||
icon_info = mapped_column(AdjustedJSON, nullable=True)
|
||||
runtime_mode = mapped_column(sa.String(255), nullable=True, server_default=sa.text("'general'"))
|
||||
@ -419,6 +420,7 @@ class Document(Base):
|
||||
doc_metadata = mapped_column(AdjustedJSON, nullable=True)
|
||||
doc_form = mapped_column(String(255), nullable=False, server_default=sa.text("'text_model'"))
|
||||
doc_language = mapped_column(String(255), nullable=True)
|
||||
need_summary: Mapped[bool | None] = mapped_column(sa.Boolean, nullable=True, server_default=sa.text("false"))
|
||||
|
||||
DATA_SOURCES = ["upload_file", "notion_import", "website_crawl"]
|
||||
|
||||
@ -1575,3 +1577,35 @@ class SegmentAttachmentBinding(Base):
|
||||
segment_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
attachment_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp())
|
||||
|
||||
|
||||
class DocumentSegmentSummary(Base):
|
||||
__tablename__ = "document_segment_summary"
|
||||
__table_args__ = (
|
||||
sa.PrimaryKeyConstraint("id", name="document_segment_summary_pkey"),
|
||||
sa.Index("document_segment_summary_dataset_id_idx", "dataset_id"),
|
||||
sa.Index("document_segment_summary_document_id_idx", "document_id"),
|
||||
sa.Index("document_segment_summary_chunk_id_idx", "chunk_id"),
|
||||
sa.Index("document_segment_summary_status_idx", "status"),
|
||||
)
|
||||
|
||||
id: Mapped[str] = mapped_column(StringUUID, nullable=False, default=lambda: str(uuid4()))
|
||||
dataset_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
document_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
# corresponds to DocumentSegment.id or parent chunk id
|
||||
chunk_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||
summary_content: Mapped[str] = mapped_column(LongText, nullable=True)
|
||||
summary_index_node_id: Mapped[str] = mapped_column(String(255), nullable=True)
|
||||
summary_index_node_hash: Mapped[str] = mapped_column(String(255), nullable=True)
|
||||
status: Mapped[str] = mapped_column(String(32), nullable=False, server_default=sa.text("'generating'"))
|
||||
error: Mapped[str] = mapped_column(LongText, nullable=True)
|
||||
enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"))
|
||||
disabled_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
||||
disabled_by = mapped_column(StringUUID, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp())
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp()
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<DocumentSegmentSummary id={self.id} chunk_id={self.chunk_id} status={self.status}>"
|
||||
|
||||
@ -603,6 +603,64 @@ class InstalledApp(TypeBase):
|
||||
return tenant
|
||||
|
||||
|
||||
class TrialApp(Base):
|
||||
__tablename__ = "trial_apps"
|
||||
__table_args__ = (
|
||||
sa.PrimaryKeyConstraint("id", name="trial_app_pkey"),
|
||||
sa.Index("trial_app_app_id_idx", "app_id"),
|
||||
sa.Index("trial_app_tenant_id_idx", "tenant_id"),
|
||||
sa.UniqueConstraint("app_id", name="unique_trail_app_id"),
|
||||
)
|
||||
|
||||
id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
|
||||
app_id = mapped_column(StringUUID, nullable=False)
|
||||
tenant_id = mapped_column(StringUUID, nullable=False)
|
||||
created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp())
|
||||
trial_limit = mapped_column(sa.Integer, nullable=False, default=3)
|
||||
|
||||
@property
|
||||
def app(self) -> App | None:
|
||||
app = db.session.query(App).where(App.id == self.app_id).first()
|
||||
return app
|
||||
|
||||
|
||||
class AccountTrialAppRecord(Base):
|
||||
__tablename__ = "account_trial_app_records"
|
||||
__table_args__ = (
|
||||
sa.PrimaryKeyConstraint("id", name="user_trial_app_pkey"),
|
||||
sa.Index("account_trial_app_record_account_id_idx", "account_id"),
|
||||
sa.Index("account_trial_app_record_app_id_idx", "app_id"),
|
||||
sa.UniqueConstraint("account_id", "app_id", name="unique_account_trial_app_record"),
|
||||
)
|
||||
id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
|
||||
account_id = mapped_column(StringUUID, nullable=False)
|
||||
app_id = mapped_column(StringUUID, nullable=False)
|
||||
count = mapped_column(sa.Integer, nullable=False, default=0)
|
||||
created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp())
|
||||
|
||||
@property
|
||||
def app(self) -> App | None:
|
||||
app = db.session.query(App).where(App.id == self.app_id).first()
|
||||
return app
|
||||
|
||||
@property
|
||||
def user(self) -> Account | None:
|
||||
user = db.session.query(Account).where(Account.id == self.account_id).first()
|
||||
return user
|
||||
|
||||
|
||||
class ExporleBanner(Base):
|
||||
__tablename__ = "exporle_banners"
|
||||
__table_args__ = (sa.PrimaryKeyConstraint("id", name="exporler_banner_pkey"),)
|
||||
id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
|
||||
content = mapped_column(sa.JSON, nullable=False)
|
||||
link = mapped_column(String(255), nullable=False)
|
||||
sort = mapped_column(sa.Integer, nullable=False)
|
||||
status = mapped_column(sa.String(255), nullable=False, server_default=sa.text("'enabled'::character varying"))
|
||||
created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp())
|
||||
language = mapped_column(String(255), nullable=False, server_default=sa.text("'en-US'::character varying"))
|
||||
|
||||
|
||||
class OAuthProviderApp(TypeBase):
|
||||
"""
|
||||
Globally shared OAuth provider app information.
|
||||
|
||||
@ -89,6 +89,7 @@ from tasks.disable_segments_from_index_task import disable_segments_from_index_t
|
||||
from tasks.document_indexing_update_task import document_indexing_update_task
|
||||
from tasks.enable_segments_to_index_task import enable_segments_to_index_task
|
||||
from tasks.recover_document_indexing_task import recover_document_indexing_task
|
||||
from tasks.regenerate_summary_index_task import regenerate_summary_index_task
|
||||
from tasks.remove_document_from_index_task import remove_document_from_index_task
|
||||
from tasks.retry_document_indexing_task import retry_document_indexing_task
|
||||
from tasks.sync_website_document_indexing_task import sync_website_document_indexing_task
|
||||
@ -476,6 +477,11 @@ class DatasetService:
|
||||
if external_retrieval_model:
|
||||
dataset.retrieval_model = external_retrieval_model
|
||||
|
||||
# Update summary index setting if provided
|
||||
summary_index_setting = data.get("summary_index_setting", None)
|
||||
if summary_index_setting is not None:
|
||||
dataset.summary_index_setting = summary_index_setting
|
||||
|
||||
# Update basic dataset properties
|
||||
dataset.name = data.get("name", dataset.name)
|
||||
dataset.description = data.get("description", dataset.description)
|
||||
@ -558,12 +564,18 @@ class DatasetService:
|
||||
# Handle indexing technique changes and embedding model updates
|
||||
action = DatasetService._handle_indexing_technique_change(dataset, data, filtered_data)
|
||||
|
||||
# Check if summary_index_setting model changed (before updating database)
|
||||
summary_model_changed = DatasetService._check_summary_index_setting_model_changed(dataset, data)
|
||||
|
||||
# Add metadata fields
|
||||
filtered_data["updated_by"] = user.id
|
||||
filtered_data["updated_at"] = naive_utc_now()
|
||||
# update Retrieval model
|
||||
if data.get("retrieval_model"):
|
||||
filtered_data["retrieval_model"] = data["retrieval_model"]
|
||||
# update summary index setting
|
||||
if data.get("summary_index_setting"):
|
||||
filtered_data["summary_index_setting"] = data.get("summary_index_setting")
|
||||
# update icon info
|
||||
if data.get("icon_info"):
|
||||
filtered_data["icon_info"] = data.get("icon_info")
|
||||
@ -572,12 +584,30 @@ class DatasetService:
|
||||
db.session.query(Dataset).filter_by(id=dataset.id).update(filtered_data)
|
||||
db.session.commit()
|
||||
|
||||
# Reload dataset to get updated values
|
||||
db.session.refresh(dataset)
|
||||
|
||||
# update pipeline knowledge base node data
|
||||
DatasetService._update_pipeline_knowledge_base_node_data(dataset, user.id)
|
||||
|
||||
# Trigger vector index task if indexing technique changed
|
||||
if action:
|
||||
deal_dataset_vector_index_task.delay(dataset.id, action)
|
||||
# If embedding_model changed, also regenerate summary vectors
|
||||
if action == "update":
|
||||
regenerate_summary_index_task.delay(
|
||||
dataset.id,
|
||||
regenerate_reason="embedding_model_changed",
|
||||
regenerate_vectors_only=True,
|
||||
)
|
||||
|
||||
# Trigger summary index regeneration if summary model changed
|
||||
if summary_model_changed:
|
||||
regenerate_summary_index_task.delay(
|
||||
dataset.id,
|
||||
regenerate_reason="summary_model_changed",
|
||||
regenerate_vectors_only=False,
|
||||
)
|
||||
|
||||
return dataset
|
||||
|
||||
@ -616,6 +646,7 @@ class DatasetService:
|
||||
knowledge_index_node_data["chunk_structure"] = dataset.chunk_structure
|
||||
knowledge_index_node_data["indexing_technique"] = dataset.indexing_technique # pyright: ignore[reportAttributeAccessIssue]
|
||||
knowledge_index_node_data["keyword_number"] = dataset.keyword_number
|
||||
knowledge_index_node_data["summary_index_setting"] = dataset.summary_index_setting
|
||||
node["data"] = knowledge_index_node_data
|
||||
updated = True
|
||||
except Exception:
|
||||
@ -854,6 +885,53 @@ class DatasetService:
|
||||
)
|
||||
filtered_data["collection_binding_id"] = dataset_collection_binding.id
|
||||
|
||||
@staticmethod
|
||||
def _check_summary_index_setting_model_changed(dataset: Dataset, data: dict[str, Any]) -> bool:
|
||||
"""
|
||||
Check if summary_index_setting model (model_name or model_provider_name) has changed.
|
||||
|
||||
Args:
|
||||
dataset: Current dataset object
|
||||
data: Update data dictionary
|
||||
|
||||
Returns:
|
||||
bool: True if summary model changed, False otherwise
|
||||
"""
|
||||
# Check if summary_index_setting is being updated
|
||||
if "summary_index_setting" not in data or data.get("summary_index_setting") is None:
|
||||
return False
|
||||
|
||||
new_summary_setting = data.get("summary_index_setting")
|
||||
old_summary_setting = dataset.summary_index_setting
|
||||
|
||||
# If old setting doesn't exist or is disabled, no need to regenerate
|
||||
if not old_summary_setting or not old_summary_setting.get("enable"):
|
||||
return False
|
||||
|
||||
# If new setting is disabled, no need to regenerate
|
||||
if not new_summary_setting or not new_summary_setting.get("enable"):
|
||||
return False
|
||||
|
||||
# Compare model_name and model_provider_name
|
||||
old_model_name = old_summary_setting.get("model_name")
|
||||
old_model_provider = old_summary_setting.get("model_provider_name")
|
||||
new_model_name = new_summary_setting.get("model_name")
|
||||
new_model_provider = new_summary_setting.get("model_provider_name")
|
||||
|
||||
# Check if model changed
|
||||
if old_model_name != new_model_name or old_model_provider != new_model_provider:
|
||||
logger.info(
|
||||
"Summary index setting model changed for dataset %s: old=%s/%s, new=%s/%s",
|
||||
dataset.id,
|
||||
old_model_provider,
|
||||
old_model_name,
|
||||
new_model_provider,
|
||||
new_model_name,
|
||||
)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def update_rag_pipeline_dataset_settings(
|
||||
session: Session, dataset: Dataset, knowledge_configuration: KnowledgeConfiguration, has_published: bool = False
|
||||
@ -889,6 +967,9 @@ class DatasetService:
|
||||
else:
|
||||
raise ValueError("Invalid index method")
|
||||
dataset.retrieval_model = knowledge_configuration.retrieval_model.model_dump()
|
||||
# Update summary_index_setting if provided
|
||||
if knowledge_configuration.summary_index_setting is not None:
|
||||
dataset.summary_index_setting = knowledge_configuration.summary_index_setting
|
||||
session.add(dataset)
|
||||
else:
|
||||
if dataset.chunk_structure and dataset.chunk_structure != knowledge_configuration.chunk_structure:
|
||||
@ -994,6 +1075,9 @@ class DatasetService:
|
||||
if dataset.keyword_number != knowledge_configuration.keyword_number:
|
||||
dataset.keyword_number = knowledge_configuration.keyword_number
|
||||
dataset.retrieval_model = knowledge_configuration.retrieval_model.model_dump()
|
||||
# Update summary_index_setting if provided
|
||||
if knowledge_configuration.summary_index_setting is not None:
|
||||
dataset.summary_index_setting = knowledge_configuration.summary_index_setting
|
||||
session.add(dataset)
|
||||
session.commit()
|
||||
if action:
|
||||
@ -1964,6 +2048,8 @@ class DocumentService:
|
||||
DuplicateDocumentIndexingTaskProxy(
|
||||
dataset.tenant_id, dataset.id, duplicate_document_ids
|
||||
).delay()
|
||||
# Note: Summary index generation is triggered in document_indexing_task after indexing completes
|
||||
# to ensure segments are available. See tasks/document_indexing_task.py
|
||||
except LockNotOwnedError:
|
||||
pass
|
||||
|
||||
@ -2268,6 +2354,11 @@ class DocumentService:
|
||||
name: str,
|
||||
batch: str,
|
||||
):
|
||||
# Set need_summary based on dataset's summary_index_setting
|
||||
need_summary = False
|
||||
if dataset.summary_index_setting and dataset.summary_index_setting.get("enable") is True:
|
||||
need_summary = True
|
||||
|
||||
document = Document(
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
@ -2281,6 +2372,7 @@ class DocumentService:
|
||||
created_by=account.id,
|
||||
doc_form=document_form,
|
||||
doc_language=document_language,
|
||||
need_summary=need_summary,
|
||||
)
|
||||
doc_metadata = {}
|
||||
if dataset.built_in_field_enabled:
|
||||
@ -2505,6 +2597,7 @@ class DocumentService:
|
||||
embedding_model_provider=knowledge_config.embedding_model_provider,
|
||||
collection_binding_id=dataset_collection_binding_id,
|
||||
retrieval_model=retrieval_model.model_dump() if retrieval_model else None,
|
||||
summary_index_setting=knowledge_config.summary_index_setting,
|
||||
is_multimodal=knowledge_config.is_multimodal,
|
||||
)
|
||||
|
||||
@ -2686,6 +2779,14 @@ class DocumentService:
|
||||
if not isinstance(args["process_rule"]["rules"]["segmentation"]["max_tokens"], int):
|
||||
raise ValueError("Process rule segmentation max_tokens is invalid")
|
||||
|
||||
# valid summary index setting
|
||||
summary_index_setting = args["process_rule"].get("summary_index_setting")
|
||||
if summary_index_setting and summary_index_setting.get("enable"):
|
||||
if "model_name" not in summary_index_setting or not summary_index_setting["model_name"]:
|
||||
raise ValueError("Summary index model name is required")
|
||||
if "model_provider_name" not in summary_index_setting or not summary_index_setting["model_provider_name"]:
|
||||
raise ValueError("Summary index model provider name is required")
|
||||
|
||||
@staticmethod
|
||||
def batch_update_document_status(
|
||||
dataset: Dataset, document_ids: list[str], action: Literal["enable", "disable", "archive", "un_archive"], user
|
||||
@ -3154,6 +3255,39 @@ class SegmentService:
|
||||
if args.enabled or keyword_changed:
|
||||
# update segment vector index
|
||||
VectorService.update_segment_vector(args.keywords, segment, dataset)
|
||||
# update summary index if summary is provided and has changed
|
||||
if args.summary is not None:
|
||||
# Check if summary index is enabled
|
||||
has_summary_index = (
|
||||
dataset.indexing_technique == "high_quality"
|
||||
and dataset.summary_index_setting
|
||||
and dataset.summary_index_setting.get("enable") is True
|
||||
)
|
||||
|
||||
if has_summary_index:
|
||||
# Query existing summary from database
|
||||
from models.dataset import DocumentSegmentSummary
|
||||
|
||||
existing_summary = (
|
||||
db.session.query(DocumentSegmentSummary)
|
||||
.where(
|
||||
DocumentSegmentSummary.chunk_id == segment.id,
|
||||
DocumentSegmentSummary.dataset_id == dataset.id,
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
# Check if summary has changed
|
||||
existing_summary_content = existing_summary.summary_content if existing_summary else None
|
||||
if existing_summary_content != args.summary:
|
||||
# Summary has changed, update it
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
try:
|
||||
SummaryIndexService.update_summary_for_segment(segment, dataset, args.summary)
|
||||
except Exception:
|
||||
logger.exception("Failed to update summary for segment %s", segment.id)
|
||||
# Don't fail the entire update if summary update fails
|
||||
else:
|
||||
segment_hash = helper.generate_text_hash(content)
|
||||
tokens = 0
|
||||
@ -3228,6 +3362,15 @@ class SegmentService:
|
||||
elif document.doc_form in (IndexStructureType.PARAGRAPH_INDEX, IndexStructureType.QA_INDEX):
|
||||
# update segment vector index
|
||||
VectorService.update_segment_vector(args.keywords, segment, dataset)
|
||||
# update summary index if summary is provided
|
||||
if args.summary is not None:
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
try:
|
||||
SummaryIndexService.update_summary_for_segment(segment, dataset, args.summary)
|
||||
except Exception:
|
||||
logger.exception("Failed to update summary for segment %s", segment.id)
|
||||
# Don't fail the entire update if summary update fails
|
||||
# update multimodel vector index
|
||||
VectorService.update_multimodel_vector(segment, args.attachment_ids or [], dataset)
|
||||
except Exception as e:
|
||||
|
||||
@ -119,6 +119,7 @@ class KnowledgeConfig(BaseModel):
|
||||
data_source: DataSource | None = None
|
||||
process_rule: ProcessRule | None = None
|
||||
retrieval_model: RetrievalModel | None = None
|
||||
summary_index_setting: dict | None = None
|
||||
doc_form: str = "text_model"
|
||||
doc_language: str = "English"
|
||||
embedding_model: str | None = None
|
||||
@ -141,6 +142,7 @@ class SegmentUpdateArgs(BaseModel):
|
||||
regenerate_child_chunks: bool = False
|
||||
enabled: bool | None = None
|
||||
attachment_ids: list[str] | None = None
|
||||
summary: str | None = None # Summary content for summary index
|
||||
|
||||
|
||||
class ChildChunkUpdateArgs(BaseModel):
|
||||
|
||||
@ -116,6 +116,8 @@ class KnowledgeConfiguration(BaseModel):
|
||||
embedding_model: str = ""
|
||||
keyword_number: int | None = 10
|
||||
retrieval_model: RetrievalSetting
|
||||
# add summary index setting
|
||||
summary_index_setting: dict | None = None
|
||||
|
||||
@field_validator("embedding_model_provider", mode="before")
|
||||
@classmethod
|
||||
|
||||
@ -170,6 +170,8 @@ class SystemFeatureModel(BaseModel):
|
||||
plugin_installation_permission: PluginInstallationPermissionModel = PluginInstallationPermissionModel()
|
||||
enable_change_email: bool = True
|
||||
plugin_manager: PluginManagerModel = PluginManagerModel()
|
||||
enable_trial_app: bool = False
|
||||
enable_explore_banner: bool = False
|
||||
|
||||
|
||||
class FeatureService:
|
||||
@ -225,6 +227,8 @@ class FeatureService:
|
||||
system_features.is_allow_register = dify_config.ALLOW_REGISTER
|
||||
system_features.is_allow_create_workspace = dify_config.ALLOW_CREATE_WORKSPACE
|
||||
system_features.is_email_setup = dify_config.MAIL_TYPE is not None and dify_config.MAIL_TYPE != ""
|
||||
system_features.enable_trial_app = dify_config.ENABLE_TRIAL_APP
|
||||
system_features.enable_explore_banner = dify_config.ENABLE_EXPLORE_BANNER
|
||||
|
||||
@classmethod
|
||||
def _fulfill_params_from_env(cls, features: FeatureModel):
|
||||
|
||||
@ -1,4 +1,7 @@
|
||||
from configs import dify_config
|
||||
from extensions.ext_database import db
|
||||
from models.model import AccountTrialAppRecord, TrialApp
|
||||
from services.feature_service import FeatureService
|
||||
from services.recommend_app.recommend_app_factory import RecommendAppRetrievalFactory
|
||||
|
||||
|
||||
@ -20,6 +23,15 @@ class RecommendedAppService:
|
||||
)
|
||||
)
|
||||
|
||||
if FeatureService.get_system_features().enable_trial_app:
|
||||
apps = result["recommended_apps"]
|
||||
for app in apps:
|
||||
app_id = app["app_id"]
|
||||
trial_app_model = db.session.query(TrialApp).where(TrialApp.app_id == app_id).first()
|
||||
if trial_app_model:
|
||||
app["can_trial"] = True
|
||||
else:
|
||||
app["can_trial"] = False
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
@ -32,4 +44,30 @@ class RecommendedAppService:
|
||||
mode = dify_config.HOSTED_FETCH_APP_TEMPLATES_MODE
|
||||
retrieval_instance = RecommendAppRetrievalFactory.get_recommend_app_factory(mode)()
|
||||
result: dict = retrieval_instance.get_recommend_app_detail(app_id)
|
||||
if FeatureService.get_system_features().enable_trial_app:
|
||||
app_id = result["id"]
|
||||
trial_app_model = db.session.query(TrialApp).where(TrialApp.app_id == app_id).first()
|
||||
if trial_app_model:
|
||||
result["can_trial"] = True
|
||||
else:
|
||||
result["can_trial"] = False
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def add_trial_app_record(cls, app_id: str, account_id: str):
|
||||
"""
|
||||
Add trial app record.
|
||||
:param app_id: app id
|
||||
:return:
|
||||
"""
|
||||
account_trial_app_record = (
|
||||
db.session.query(AccountTrialAppRecord)
|
||||
.where(AccountTrialAppRecord.app_id == app_id, AccountTrialAppRecord.account_id == account_id)
|
||||
.first()
|
||||
)
|
||||
if account_trial_app_record:
|
||||
account_trial_app_record.count += 1
|
||||
db.session.commit()
|
||||
else:
|
||||
db.session.add(AccountTrialAppRecord(app_id=app_id, count=1, account_id=account_id))
|
||||
db.session.commit()
|
||||
|
||||
626
api/services/summary_index_service.py
Normal file
626
api/services/summary_index_service.py
Normal file
@ -0,0 +1,626 @@
|
||||
"""Summary index service for generating and managing document segment summaries."""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from core.rag.datasource.vdb.vector_factory import Vector
|
||||
from core.rag.index_processor.constant.doc_type import DocType
|
||||
from core.rag.models.document import Document
|
||||
from extensions.ext_database import db
|
||||
from libs import helper
|
||||
from models.dataset import Dataset, DocumentSegment, DocumentSegmentSummary
|
||||
from models.dataset import Document as DatasetDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SummaryIndexService:
|
||||
"""Service for generating and managing summary indexes."""
|
||||
|
||||
@staticmethod
|
||||
def generate_summary_for_segment(
|
||||
segment: DocumentSegment,
|
||||
dataset: Dataset,
|
||||
summary_index_setting: dict,
|
||||
) -> str:
|
||||
"""
|
||||
Generate summary for a single segment.
|
||||
|
||||
Args:
|
||||
segment: DocumentSegment to generate summary for
|
||||
dataset: Dataset containing the segment
|
||||
summary_index_setting: Summary index configuration
|
||||
|
||||
Returns:
|
||||
Generated summary text
|
||||
|
||||
Raises:
|
||||
ValueError: If summary_index_setting is invalid or generation fails
|
||||
"""
|
||||
# Reuse the existing generate_summary method from ParagraphIndexProcessor
|
||||
# Use lazy import to avoid circular import
|
||||
from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
|
||||
|
||||
summary_content = ParagraphIndexProcessor.generate_summary(
|
||||
tenant_id=dataset.tenant_id,
|
||||
text=segment.content,
|
||||
summary_index_setting=summary_index_setting,
|
||||
segment_id=segment.id,
|
||||
)
|
||||
|
||||
if not summary_content:
|
||||
raise ValueError("Generated summary is empty")
|
||||
|
||||
return summary_content
|
||||
|
||||
@staticmethod
|
||||
def create_summary_record(
|
||||
segment: DocumentSegment,
|
||||
dataset: Dataset,
|
||||
summary_content: str,
|
||||
status: str = "generating",
|
||||
) -> DocumentSegmentSummary:
|
||||
"""
|
||||
Create or update a DocumentSegmentSummary record.
|
||||
If a summary record already exists for this segment, it will be updated instead of creating a new one.
|
||||
|
||||
Args:
|
||||
segment: DocumentSegment to create summary for
|
||||
dataset: Dataset containing the segment
|
||||
summary_content: Generated summary content
|
||||
status: Summary status (default: "generating")
|
||||
|
||||
Returns:
|
||||
Created or updated DocumentSegmentSummary instance
|
||||
"""
|
||||
# Check if summary record already exists
|
||||
existing_summary = (
|
||||
db.session.query(DocumentSegmentSummary).filter_by(chunk_id=segment.id, dataset_id=dataset.id).first()
|
||||
)
|
||||
|
||||
if existing_summary:
|
||||
# Update existing record
|
||||
existing_summary.summary_content = summary_content
|
||||
existing_summary.status = status
|
||||
existing_summary.error = None # Clear any previous errors
|
||||
# Re-enable if it was disabled
|
||||
if not existing_summary.enabled:
|
||||
existing_summary.enabled = True
|
||||
existing_summary.disabled_at = None
|
||||
existing_summary.disabled_by = None
|
||||
db.session.add(existing_summary)
|
||||
db.session.flush()
|
||||
return existing_summary
|
||||
else:
|
||||
# Create new record (enabled by default)
|
||||
summary_record = DocumentSegmentSummary(
|
||||
dataset_id=dataset.id,
|
||||
document_id=segment.document_id,
|
||||
chunk_id=segment.id,
|
||||
summary_content=summary_content,
|
||||
status=status,
|
||||
enabled=True, # Explicitly set enabled to True
|
||||
)
|
||||
db.session.add(summary_record)
|
||||
db.session.flush()
|
||||
return summary_record
|
||||
|
||||
@staticmethod
|
||||
def vectorize_summary(
|
||||
summary_record: DocumentSegmentSummary,
|
||||
segment: DocumentSegment,
|
||||
dataset: Dataset,
|
||||
) -> None:
|
||||
"""
|
||||
Vectorize summary and store in vector database.
|
||||
|
||||
Args:
|
||||
summary_record: DocumentSegmentSummary record
|
||||
segment: Original DocumentSegment
|
||||
dataset: Dataset containing the segment
|
||||
"""
|
||||
if dataset.indexing_technique != "high_quality":
|
||||
logger.warning(
|
||||
"Summary vectorization skipped for dataset %s: indexing_technique is not high_quality",
|
||||
dataset.id,
|
||||
)
|
||||
return
|
||||
|
||||
# Reuse existing index_node_id if available (like segment does), otherwise generate new one
|
||||
old_summary_node_id = summary_record.summary_index_node_id
|
||||
if old_summary_node_id:
|
||||
# Reuse existing index_node_id (like segment behavior)
|
||||
summary_index_node_id = old_summary_node_id
|
||||
else:
|
||||
# Generate new index node ID only for new summaries
|
||||
summary_index_node_id = str(uuid.uuid4())
|
||||
|
||||
# Always regenerate hash (in case summary content changed)
|
||||
summary_hash = helper.generate_text_hash(summary_record.summary_content)
|
||||
|
||||
# Delete old vector only if we're reusing the same index_node_id (to overwrite)
|
||||
# If index_node_id changed, the old vector should have been deleted elsewhere
|
||||
if old_summary_node_id and old_summary_node_id == summary_index_node_id:
|
||||
try:
|
||||
vector = Vector(dataset)
|
||||
vector.delete_by_ids([old_summary_node_id])
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to delete old summary vector for segment %s: %s. Continuing with new vectorization.",
|
||||
segment.id,
|
||||
str(e),
|
||||
)
|
||||
|
||||
# Create document with summary content and metadata
|
||||
summary_document = Document(
|
||||
page_content=summary_record.summary_content,
|
||||
metadata={
|
||||
"doc_id": summary_index_node_id,
|
||||
"doc_hash": summary_hash,
|
||||
"dataset_id": dataset.id,
|
||||
"document_id": segment.document_id,
|
||||
"original_chunk_id": segment.id, # Key: link to original chunk
|
||||
"doc_type": DocType.TEXT,
|
||||
"is_summary": True, # Identifier for summary documents
|
||||
},
|
||||
)
|
||||
|
||||
# Vectorize and store with retry mechanism for connection errors
|
||||
max_retries = 3
|
||||
retry_delay = 2.0
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
vector = Vector(dataset)
|
||||
vector.add_texts([summary_document], duplicate_check=True)
|
||||
|
||||
# Success - update summary record with index node info
|
||||
summary_record.summary_index_node_id = summary_index_node_id
|
||||
summary_record.summary_index_node_hash = summary_hash
|
||||
summary_record.status = "completed"
|
||||
db.session.add(summary_record)
|
||||
db.session.flush()
|
||||
return # Success, exit function
|
||||
|
||||
except (ConnectionError, Exception) as e:
|
||||
error_str = str(e).lower()
|
||||
# Check if it's a connection-related error that might be transient
|
||||
is_connection_error = any(
|
||||
keyword in error_str
|
||||
for keyword in [
|
||||
"connection",
|
||||
"disconnected",
|
||||
"timeout",
|
||||
"network",
|
||||
"could not connect",
|
||||
"server disconnected",
|
||||
"weaviate",
|
||||
]
|
||||
)
|
||||
|
||||
if is_connection_error and attempt < max_retries - 1:
|
||||
# Retry for connection errors
|
||||
wait_time = retry_delay * (2**attempt) # Exponential backoff
|
||||
logger.warning(
|
||||
"Vectorization attempt %s/%s failed for segment %s: %s. Retrying in %.1f seconds...",
|
||||
attempt + 1,
|
||||
max_retries,
|
||||
segment.id,
|
||||
str(e),
|
||||
wait_time,
|
||||
)
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
else:
|
||||
# Final attempt failed or non-connection error - log and update status
|
||||
logger.error(
|
||||
"Failed to vectorize summary for segment %s after %s attempts: %s",
|
||||
segment.id,
|
||||
attempt + 1,
|
||||
str(e),
|
||||
exc_info=True,
|
||||
)
|
||||
summary_record.status = "error"
|
||||
summary_record.error = f"Vectorization failed: {str(e)}"
|
||||
db.session.add(summary_record)
|
||||
db.session.flush()
|
||||
raise
|
||||
|
||||
@staticmethod
|
||||
def generate_and_vectorize_summary(
|
||||
segment: DocumentSegment,
|
||||
dataset: Dataset,
|
||||
summary_index_setting: dict,
|
||||
) -> DocumentSegmentSummary:
|
||||
"""
|
||||
Generate summary for a segment and vectorize it.
|
||||
|
||||
Args:
|
||||
segment: DocumentSegment to generate summary for
|
||||
dataset: Dataset containing the segment
|
||||
summary_index_setting: Summary index configuration
|
||||
|
||||
Returns:
|
||||
Created DocumentSegmentSummary instance
|
||||
|
||||
Raises:
|
||||
ValueError: If summary generation fails
|
||||
"""
|
||||
try:
|
||||
# Generate summary
|
||||
summary_content = SummaryIndexService.generate_summary_for_segment(segment, dataset, summary_index_setting)
|
||||
|
||||
# Create or update summary record (will handle overwrite internally)
|
||||
summary_record = SummaryIndexService.create_summary_record(
|
||||
segment, dataset, summary_content, status="generating"
|
||||
)
|
||||
|
||||
# Vectorize summary (will delete old vector if exists before creating new one)
|
||||
SummaryIndexService.vectorize_summary(summary_record, segment, dataset)
|
||||
|
||||
db.session.commit()
|
||||
logger.info("Successfully generated and vectorized summary for segment %s", segment.id)
|
||||
return summary_record
|
||||
|
||||
except Exception:
|
||||
logger.exception("Failed to generate summary for segment %s", segment.id)
|
||||
# Update summary record with error status if it exists
|
||||
summary_record = (
|
||||
db.session.query(DocumentSegmentSummary).filter_by(chunk_id=segment.id, dataset_id=dataset.id).first()
|
||||
)
|
||||
if summary_record:
|
||||
summary_record.status = "error"
|
||||
summary_record.error = str(e)
|
||||
db.session.add(summary_record)
|
||||
db.session.commit()
|
||||
raise
|
||||
|
||||
@staticmethod
|
||||
def generate_summaries_for_document(
|
||||
dataset: Dataset,
|
||||
document: DatasetDocument,
|
||||
summary_index_setting: dict,
|
||||
segment_ids: list[str] | None = None,
|
||||
only_parent_chunks: bool = False,
|
||||
) -> list[DocumentSegmentSummary]:
|
||||
"""
|
||||
Generate summaries for all segments in a document including vectorization.
|
||||
|
||||
Args:
|
||||
dataset: Dataset containing the document
|
||||
document: DatasetDocument to generate summaries for
|
||||
summary_index_setting: Summary index configuration
|
||||
segment_ids: Optional list of specific segment IDs to process
|
||||
only_parent_chunks: If True, only process parent chunks (for parent-child mode)
|
||||
|
||||
Returns:
|
||||
List of created DocumentSegmentSummary instances
|
||||
"""
|
||||
# Only generate summary index for high_quality indexing technique
|
||||
if dataset.indexing_technique != "high_quality":
|
||||
logger.info(
|
||||
"Skipping summary generation for dataset %s: indexing_technique is %s, not 'high_quality'",
|
||||
dataset.id,
|
||||
dataset.indexing_technique,
|
||||
)
|
||||
return []
|
||||
|
||||
if not summary_index_setting or not summary_index_setting.get("enable"):
|
||||
logger.info("Summary index is disabled for dataset %s", dataset.id)
|
||||
return []
|
||||
|
||||
# Skip qa_model documents
|
||||
if document.doc_form == "qa_model":
|
||||
logger.info("Skipping summary generation for qa_model document %s", document.id)
|
||||
return []
|
||||
|
||||
logger.info(
|
||||
"Starting summary generation for document %s in dataset %s, segment_ids: %s, only_parent_chunks: %s",
|
||||
document.id,
|
||||
dataset.id,
|
||||
len(segment_ids) if segment_ids else "all",
|
||||
only_parent_chunks,
|
||||
)
|
||||
|
||||
# Query segments (only enabled segments)
|
||||
query = db.session.query(DocumentSegment).filter_by(
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
status="completed",
|
||||
enabled=True, # Only generate summaries for enabled segments
|
||||
)
|
||||
|
||||
if segment_ids:
|
||||
query = query.filter(DocumentSegment.id.in_(segment_ids))
|
||||
|
||||
segments = query.all()
|
||||
|
||||
if not segments:
|
||||
logger.info("No segments found for document %s", document.id)
|
||||
return []
|
||||
|
||||
summary_records = []
|
||||
|
||||
for segment in segments:
|
||||
# For parent-child mode, only process parent chunks
|
||||
# In parent-child mode, all DocumentSegments are parent chunks,
|
||||
# so we process all of them. Child chunks are stored in ChildChunk table
|
||||
# and are not DocumentSegments, so they won't be in the segments list.
|
||||
# This check is mainly for clarity and future-proofing.
|
||||
if only_parent_chunks:
|
||||
# In parent-child mode, all segments in the query are parent chunks
|
||||
# Child chunks are not DocumentSegments, so they won't appear here
|
||||
# We can process all segments
|
||||
pass
|
||||
|
||||
try:
|
||||
summary_record = SummaryIndexService.generate_and_vectorize_summary(
|
||||
segment, dataset, summary_index_setting
|
||||
)
|
||||
summary_records.append(summary_record)
|
||||
except Exception:
|
||||
logger.exception("Failed to generate summary for segment %s", segment.id)
|
||||
# Continue with other segments
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
"Completed summary generation for document %s: %s summaries generated and vectorized",
|
||||
document.id,
|
||||
len(summary_records),
|
||||
)
|
||||
return summary_records
|
||||
|
||||
@staticmethod
|
||||
def disable_summaries_for_segments(
|
||||
dataset: Dataset,
|
||||
segment_ids: list[str] | None = None,
|
||||
disabled_by: str | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Disable summary records and remove vectors from vector database for segments.
|
||||
Unlike delete, this preserves the summary records but marks them as disabled.
|
||||
|
||||
Args:
|
||||
dataset: Dataset containing the segments
|
||||
segment_ids: List of segment IDs to disable summaries for. If None, disable all.
|
||||
disabled_by: User ID who disabled the summaries
|
||||
"""
|
||||
from libs.datetime_utils import naive_utc_now
|
||||
|
||||
query = db.session.query(DocumentSegmentSummary).filter_by(
|
||||
dataset_id=dataset.id,
|
||||
enabled=True, # Only disable enabled summaries
|
||||
)
|
||||
|
||||
if segment_ids:
|
||||
query = query.filter(DocumentSegmentSummary.chunk_id.in_(segment_ids))
|
||||
|
||||
summaries = query.all()
|
||||
|
||||
if not summaries:
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"Disabling %s summary records for dataset %s, segment_ids: %s",
|
||||
len(summaries),
|
||||
dataset.id,
|
||||
len(segment_ids) if segment_ids else "all",
|
||||
)
|
||||
|
||||
# Remove from vector database (but keep records)
|
||||
if dataset.indexing_technique == "high_quality":
|
||||
summary_node_ids = [s.summary_index_node_id for s in summaries if s.summary_index_node_id]
|
||||
if summary_node_ids:
|
||||
try:
|
||||
vector = Vector(dataset)
|
||||
vector.delete_by_ids(summary_node_ids)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to remove summary vectors: %s", str(e))
|
||||
|
||||
# Disable summary records (don't delete)
|
||||
now = naive_utc_now()
|
||||
for summary in summaries:
|
||||
summary.enabled = False
|
||||
summary.disabled_at = now
|
||||
summary.disabled_by = disabled_by
|
||||
db.session.add(summary)
|
||||
|
||||
db.session.commit()
|
||||
logger.info("Disabled %s summary records for dataset %s", len(summaries), dataset.id)
|
||||
|
||||
@staticmethod
|
||||
def enable_summaries_for_segments(
|
||||
dataset: Dataset,
|
||||
segment_ids: list[str] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Enable summary records and re-add vectors to vector database for segments.
|
||||
|
||||
Note: This method enables summaries based on chunk status, not summary_index_setting.enable.
|
||||
The summary_index_setting.enable flag only controls automatic generation,
|
||||
not whether existing summaries can be used.
|
||||
Summary.enabled should always be kept in sync with chunk.enabled.
|
||||
|
||||
Args:
|
||||
dataset: Dataset containing the segments
|
||||
segment_ids: List of segment IDs to enable summaries for. If None, enable all.
|
||||
"""
|
||||
# Only enable summary index for high_quality indexing technique
|
||||
if dataset.indexing_technique != "high_quality":
|
||||
return
|
||||
|
||||
query = db.session.query(DocumentSegmentSummary).filter_by(
|
||||
dataset_id=dataset.id,
|
||||
enabled=False, # Only enable disabled summaries
|
||||
)
|
||||
|
||||
if segment_ids:
|
||||
query = query.filter(DocumentSegmentSummary.chunk_id.in_(segment_ids))
|
||||
|
||||
summaries = query.all()
|
||||
|
||||
if not summaries:
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"Enabling %s summary records for dataset %s, segment_ids: %s",
|
||||
len(summaries),
|
||||
dataset.id,
|
||||
len(segment_ids) if segment_ids else "all",
|
||||
)
|
||||
|
||||
# Re-vectorize and re-add to vector database
|
||||
enabled_count = 0
|
||||
for summary in summaries:
|
||||
# Get the original segment
|
||||
segment = (
|
||||
db.session.query(DocumentSegment)
|
||||
.filter_by(
|
||||
id=summary.chunk_id,
|
||||
dataset_id=dataset.id,
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
# Summary.enabled stays in sync with chunk.enabled, only enable summary if the associated chunk is enabled.
|
||||
if not segment or not segment.enabled or segment.status != "completed":
|
||||
continue
|
||||
|
||||
if not summary.summary_content:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Re-vectorize summary
|
||||
SummaryIndexService.vectorize_summary(summary, segment, dataset)
|
||||
|
||||
# Enable summary record
|
||||
summary.enabled = True
|
||||
summary.disabled_at = None
|
||||
summary.disabled_by = None
|
||||
db.session.add(summary)
|
||||
enabled_count += 1
|
||||
except Exception:
|
||||
logger.exception("Failed to re-vectorize summary %s", summary.id)
|
||||
# Keep it disabled if vectorization fails
|
||||
continue
|
||||
|
||||
db.session.commit()
|
||||
logger.info("Enabled %s summary records for dataset %s", enabled_count, dataset.id)
|
||||
|
||||
@staticmethod
|
||||
def delete_summaries_for_segments(
|
||||
dataset: Dataset,
|
||||
segment_ids: list[str] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Delete summary records and vectors for segments (used only for actual deletion scenarios).
|
||||
For disable/enable operations, use disable_summaries_for_segments/enable_summaries_for_segments.
|
||||
|
||||
Args:
|
||||
dataset: Dataset containing the segments
|
||||
segment_ids: List of segment IDs to delete summaries for. If None, delete all.
|
||||
"""
|
||||
query = db.session.query(DocumentSegmentSummary).filter_by(dataset_id=dataset.id)
|
||||
|
||||
if segment_ids:
|
||||
query = query.filter(DocumentSegmentSummary.chunk_id.in_(segment_ids))
|
||||
|
||||
summaries = query.all()
|
||||
|
||||
if not summaries:
|
||||
return
|
||||
|
||||
# Delete from vector database
|
||||
if dataset.indexing_technique == "high_quality":
|
||||
summary_node_ids = [s.summary_index_node_id for s in summaries if s.summary_index_node_id]
|
||||
if summary_node_ids:
|
||||
vector = Vector(dataset)
|
||||
vector.delete_by_ids(summary_node_ids)
|
||||
|
||||
# Delete summary records
|
||||
for summary in summaries:
|
||||
db.session.delete(summary)
|
||||
|
||||
db.session.commit()
|
||||
logger.info("Deleted %s summary records for dataset %s", len(summaries), dataset.id)
|
||||
|
||||
@staticmethod
|
||||
def update_summary_for_segment(
|
||||
segment: DocumentSegment,
|
||||
dataset: Dataset,
|
||||
summary_content: str,
|
||||
) -> DocumentSegmentSummary | None:
|
||||
"""
|
||||
Update summary for a segment and re-vectorize it.
|
||||
|
||||
Args:
|
||||
segment: DocumentSegment to update summary for
|
||||
dataset: Dataset containing the segment
|
||||
summary_content: New summary content
|
||||
|
||||
Returns:
|
||||
Updated DocumentSegmentSummary instance, or None if summary index is not enabled
|
||||
"""
|
||||
# Only update summary index for high_quality indexing technique
|
||||
if dataset.indexing_technique != "high_quality":
|
||||
return None
|
||||
|
||||
# Check if summary index is enabled
|
||||
summary_index_setting = dataset.summary_index_setting
|
||||
if not summary_index_setting or not summary_index_setting.get("enable"):
|
||||
return None
|
||||
|
||||
# Skip qa_model documents
|
||||
if segment.document and segment.document.doc_form == "qa_model":
|
||||
return None
|
||||
|
||||
try:
|
||||
# Find existing summary record
|
||||
summary_record = (
|
||||
db.session.query(DocumentSegmentSummary).filter_by(chunk_id=segment.id, dataset_id=dataset.id).first()
|
||||
)
|
||||
|
||||
if summary_record:
|
||||
# Update existing summary
|
||||
old_summary_node_id = summary_record.summary_index_node_id
|
||||
|
||||
# Update summary content
|
||||
summary_record.summary_content = summary_content
|
||||
summary_record.status = "generating"
|
||||
db.session.add(summary_record)
|
||||
db.session.flush()
|
||||
|
||||
# Delete old vector if exists
|
||||
if old_summary_node_id:
|
||||
vector = Vector(dataset)
|
||||
vector.delete_by_ids([old_summary_node_id])
|
||||
|
||||
# Re-vectorize summary
|
||||
SummaryIndexService.vectorize_summary(summary_record, segment, dataset)
|
||||
|
||||
db.session.commit()
|
||||
logger.info("Successfully updated and re-vectorized summary for segment %s", segment.id)
|
||||
return summary_record
|
||||
else:
|
||||
# Create new summary record if doesn't exist
|
||||
summary_record = SummaryIndexService.create_summary_record(
|
||||
segment, dataset, summary_content, status="generating"
|
||||
)
|
||||
SummaryIndexService.vectorize_summary(summary_record, segment, dataset)
|
||||
db.session.commit()
|
||||
logger.info("Successfully created and vectorized summary for segment %s", segment.id)
|
||||
return summary_record
|
||||
|
||||
except Exception:
|
||||
logger.exception("Failed to update summary for segment %s", segment.id)
|
||||
# Update summary record with error status if it exists
|
||||
summary_record = (
|
||||
db.session.query(DocumentSegmentSummary).filter_by(chunk_id=segment.id, dataset_id=dataset.id).first()
|
||||
)
|
||||
if summary_record:
|
||||
summary_record.status = "error"
|
||||
summary_record.error = str(e)
|
||||
db.session.add(summary_record)
|
||||
db.session.commit()
|
||||
raise
|
||||
@ -117,6 +117,19 @@ def add_document_to_index_task(dataset_document_id: str):
|
||||
)
|
||||
db.session.commit()
|
||||
|
||||
# Enable summary indexes for all segments in this document
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
segment_ids_list = [segment.id for segment in segments]
|
||||
if segment_ids_list:
|
||||
try:
|
||||
SummaryIndexService.enable_summaries_for_segments(
|
||||
dataset=dataset,
|
||||
segment_ids=segment_ids_list,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to enable summaries for document %s: %s", dataset_document.id, str(e))
|
||||
|
||||
end_at = time.perf_counter()
|
||||
logger.info(
|
||||
click.style(f"Document added to index: {dataset_document.id} latency: {end_at - start_at}", fg="green")
|
||||
|
||||
@ -42,6 +42,7 @@ def delete_segment_from_index_task(
|
||||
doc_form = dataset_document.doc_form
|
||||
|
||||
# Proceed with index cleanup using the index_node_ids directly
|
||||
# For actual deletion, we should delete summaries (not just disable them)
|
||||
index_processor = IndexProcessorFactory(doc_form).init_index_processor()
|
||||
index_processor.clean(
|
||||
dataset,
|
||||
@ -49,6 +50,7 @@ def delete_segment_from_index_task(
|
||||
with_keywords=True,
|
||||
delete_child_chunks=True,
|
||||
precomputed_child_node_ids=child_node_ids,
|
||||
delete_summaries=True, # Actually delete summaries when segment is deleted
|
||||
)
|
||||
if dataset.is_multimodal:
|
||||
# delete segment attachment binding
|
||||
|
||||
@ -53,6 +53,18 @@ def disable_segment_from_index_task(segment_id: str):
|
||||
logger.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan"))
|
||||
return
|
||||
|
||||
# Disable summary index for this segment
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
try:
|
||||
SummaryIndexService.disable_summaries_for_segments(
|
||||
dataset=dataset,
|
||||
segment_ids=[segment.id],
|
||||
disabled_by=segment.disabled_by,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to disable summary for segment %s: %s", segment.id, str(e))
|
||||
|
||||
index_type = dataset_document.doc_form
|
||||
index_processor = IndexProcessorFactory(index_type).init_index_processor()
|
||||
index_processor.clean(dataset, [segment.index_node_id])
|
||||
|
||||
@ -58,12 +58,26 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen
|
||||
return
|
||||
|
||||
try:
|
||||
# Disable summary indexes for these segments
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
segment_ids_list = [segment.id for segment in segments]
|
||||
try:
|
||||
# Get disabled_by from first segment (they should all have the same disabled_by)
|
||||
disabled_by = segments[0].disabled_by if segments else None
|
||||
SummaryIndexService.disable_summaries_for_segments(
|
||||
dataset=dataset,
|
||||
segment_ids=segment_ids_list,
|
||||
disabled_by=disabled_by,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to disable summaries for segments: %s", str(e))
|
||||
|
||||
index_node_ids = [segment.index_node_id for segment in segments]
|
||||
if dataset.is_multimodal:
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
segment_attachment_bindings = (
|
||||
db.session.query(SegmentAttachmentBinding)
|
||||
.where(SegmentAttachmentBinding.segment_id.in_(segment_ids))
|
||||
.where(SegmentAttachmentBinding.segment_id.in_(segment_ids_list))
|
||||
.all()
|
||||
)
|
||||
if segment_attachment_bindings:
|
||||
|
||||
@ -14,6 +14,7 @@ from extensions.ext_database import db
|
||||
from libs.datetime_utils import naive_utc_now
|
||||
from models.dataset import Dataset, Document
|
||||
from services.feature_service import FeatureService
|
||||
from tasks.generate_summary_index_task import generate_summary_index_task
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -100,6 +101,69 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]):
|
||||
indexing_runner.run(documents)
|
||||
end_at = time.perf_counter()
|
||||
logger.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green"))
|
||||
|
||||
# Trigger summary index generation for completed documents if enabled
|
||||
# Only generate for high_quality indexing technique and when summary_index_setting is enabled
|
||||
# Re-query dataset to get latest summary_index_setting (in case it was updated)
|
||||
dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first()
|
||||
if not dataset:
|
||||
logger.warning("Dataset %s not found after indexing", dataset_id)
|
||||
return
|
||||
|
||||
if dataset.indexing_technique == "high_quality":
|
||||
summary_index_setting = dataset.summary_index_setting
|
||||
if summary_index_setting and summary_index_setting.get("enable"):
|
||||
# Check each document's indexing status and trigger summary generation if completed
|
||||
for document_id in document_ids:
|
||||
# Re-query document to get latest status (IndexingRunner may have updated it)
|
||||
document = (
|
||||
db.session.query(Document)
|
||||
.where(Document.id == document_id, Document.dataset_id == dataset_id)
|
||||
.first()
|
||||
)
|
||||
if document:
|
||||
logger.info(
|
||||
"Checking document %s for summary generation: status=%s, doc_form=%s",
|
||||
document_id,
|
||||
document.indexing_status,
|
||||
document.doc_form,
|
||||
)
|
||||
if document.indexing_status == "completed" and document.doc_form != "qa_model":
|
||||
try:
|
||||
generate_summary_index_task.delay(dataset.id, document_id, None)
|
||||
logger.info(
|
||||
"Queued summary index generation task for document %s in dataset %s "
|
||||
"after indexing completed",
|
||||
document_id,
|
||||
dataset.id,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to queue summary index generation task for document %s",
|
||||
document_id,
|
||||
)
|
||||
# Don't fail the entire indexing process if summary task queuing fails
|
||||
else:
|
||||
logger.info(
|
||||
"Skipping summary generation for document %s: status=%s, doc_form=%s",
|
||||
document_id,
|
||||
document.indexing_status,
|
||||
document.doc_form,
|
||||
)
|
||||
else:
|
||||
logger.warning("Document %s not found after indexing", document_id)
|
||||
else:
|
||||
logger.info(
|
||||
"Summary index generation skipped for dataset %s: summary_index_setting.enable=%s",
|
||||
dataset.id,
|
||||
summary_index_setting.get("enable") if summary_index_setting else None,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Summary index generation skipped for dataset %s: indexing_technique=%s (not 'high_quality')",
|
||||
dataset.id,
|
||||
dataset.indexing_technique,
|
||||
)
|
||||
except DocumentIsPausedError as ex:
|
||||
logger.info(click.style(str(ex), fg="yellow"))
|
||||
except Exception:
|
||||
|
||||
@ -103,6 +103,17 @@ def enable_segment_to_index_task(segment_id: str):
|
||||
# save vector index
|
||||
index_processor.load(dataset, [document], multimodal_documents=multimodel_documents)
|
||||
|
||||
# Enable summary index for this segment
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
try:
|
||||
SummaryIndexService.enable_summaries_for_segments(
|
||||
dataset=dataset,
|
||||
segment_ids=[segment.id],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to enable summary for segment %s: %s", segment.id, str(e))
|
||||
|
||||
end_at = time.perf_counter()
|
||||
logger.info(click.style(f"Segment enabled to index: {segment.id} latency: {end_at - start_at}", fg="green"))
|
||||
except Exception as e:
|
||||
|
||||
@ -108,6 +108,18 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i
|
||||
# save vector index
|
||||
index_processor.load(dataset, documents, multimodal_documents=multimodal_documents)
|
||||
|
||||
# Enable summary indexes for these segments
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
segment_ids_list = [segment.id for segment in segments]
|
||||
try:
|
||||
SummaryIndexService.enable_summaries_for_segments(
|
||||
dataset=dataset,
|
||||
segment_ids=segment_ids_list,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to enable summaries for segments: %s", str(e))
|
||||
|
||||
end_at = time.perf_counter()
|
||||
logger.info(click.style(f"Segments enabled to index latency: {end_at - start_at}", fg="green"))
|
||||
except Exception as e:
|
||||
|
||||
112
api/tasks/generate_summary_index_task.py
Normal file
112
api/tasks/generate_summary_index_task.py
Normal file
@ -0,0 +1,112 @@
|
||||
"""Async task for generating summary indexes."""
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
import click
|
||||
from celery import shared_task
|
||||
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset, DocumentSegment
|
||||
from models.dataset import Document as DatasetDocument
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@shared_task(queue="dataset")
|
||||
def generate_summary_index_task(dataset_id: str, document_id: str, segment_ids: list[str] | None = None):
|
||||
"""
|
||||
Async generate summary index for document segments.
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset ID
|
||||
document_id: Document ID
|
||||
segment_ids: Optional list of specific segment IDs to process. If None, process all segments.
|
||||
|
||||
Usage:
|
||||
generate_summary_index_task.delay(dataset_id, document_id)
|
||||
generate_summary_index_task.delay(dataset_id, document_id, segment_ids)
|
||||
"""
|
||||
logger.info(
|
||||
click.style(
|
||||
f"Start generating summary index for document {document_id} in dataset {dataset_id}",
|
||||
fg="green",
|
||||
)
|
||||
)
|
||||
start_at = time.perf_counter()
|
||||
|
||||
try:
|
||||
dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first()
|
||||
if not dataset:
|
||||
logger.error(click.style(f"Dataset not found: {dataset_id}", fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
document = db.session.query(DatasetDocument).where(DatasetDocument.id == document_id).first()
|
||||
if not document:
|
||||
logger.error(click.style(f"Document not found: {document_id}", fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
# Only generate summary index for high_quality indexing technique
|
||||
if dataset.indexing_technique != "high_quality":
|
||||
logger.info(
|
||||
click.style(
|
||||
f"Skipping summary generation for dataset {dataset_id}: "
|
||||
f"indexing_technique is {dataset.indexing_technique}, not 'high_quality'",
|
||||
fg="cyan",
|
||||
)
|
||||
)
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
# Check if summary index is enabled
|
||||
summary_index_setting = dataset.summary_index_setting
|
||||
if not summary_index_setting or not summary_index_setting.get("enable"):
|
||||
logger.info(
|
||||
click.style(
|
||||
f"Summary index is disabled for dataset {dataset_id}",
|
||||
fg="cyan",
|
||||
)
|
||||
)
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
# Determine if only parent chunks should be processed
|
||||
only_parent_chunks = dataset.chunk_structure == "parent_child_index"
|
||||
|
||||
# Generate summaries
|
||||
summary_records = SummaryIndexService.generate_summaries_for_document(
|
||||
dataset=dataset,
|
||||
document=document,
|
||||
summary_index_setting=summary_index_setting,
|
||||
segment_ids=segment_ids,
|
||||
only_parent_chunks=only_parent_chunks,
|
||||
)
|
||||
|
||||
end_at = time.perf_counter()
|
||||
logger.info(
|
||||
click.style(
|
||||
f"Summary index generation completed for document {document_id}: "
|
||||
f"{len(summary_records)} summaries generated, latency: {end_at - start_at}",
|
||||
fg="green",
|
||||
)
|
||||
)
|
||||
|
||||
except Exception:
|
||||
logger.exception("Failed to generate summary index for document %s", document_id)
|
||||
# Update document segments with error status if needed
|
||||
if segment_ids:
|
||||
db.session.query(DocumentSegment).filter(
|
||||
DocumentSegment.id.in_(segment_ids),
|
||||
DocumentSegment.dataset_id == dataset_id,
|
||||
).update(
|
||||
{
|
||||
DocumentSegment.error: f"Summary generation failed: {str(e)}",
|
||||
},
|
||||
synchronize_session=False,
|
||||
)
|
||||
db.session.commit()
|
||||
finally:
|
||||
db.session.close()
|
||||
221
api/tasks/regenerate_summary_index_task.py
Normal file
221
api/tasks/regenerate_summary_index_task.py
Normal file
@ -0,0 +1,221 @@
|
||||
"""Task for regenerating summary indexes when dataset settings change."""
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
import click
|
||||
from celery import shared_task
|
||||
from sqlalchemy import select
|
||||
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset, DocumentSegment, DocumentSegmentSummary
|
||||
from models.dataset import Document as DatasetDocument
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@shared_task(queue="dataset")
|
||||
def regenerate_summary_index_task(
|
||||
dataset_id: str,
|
||||
regenerate_reason: str = "summary_model_changed",
|
||||
regenerate_vectors_only: bool = False,
|
||||
):
|
||||
"""
|
||||
Regenerate summary indexes for all documents in a dataset.
|
||||
|
||||
This task is triggered when:
|
||||
1. summary_index_setting model changes (regenerate_reason="summary_model_changed")
|
||||
- Regenerates summary content and vectors for all existing summaries
|
||||
2. embedding_model changes (regenerate_reason="embedding_model_changed")
|
||||
- Only regenerates vectors for existing summaries (keeps summary content)
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset ID
|
||||
regenerate_reason: Reason for regeneration ("summary_model_changed" or "embedding_model_changed")
|
||||
regenerate_vectors_only: If True, only regenerate vectors without regenerating summary content
|
||||
"""
|
||||
logger.info(
|
||||
click.style(
|
||||
f"Start regenerate summary index for dataset {dataset_id}, reason: {regenerate_reason}",
|
||||
fg="green",
|
||||
)
|
||||
)
|
||||
start_at = time.perf_counter()
|
||||
|
||||
try:
|
||||
dataset = db.session.query(Dataset).filter_by(id=dataset_id).first()
|
||||
if not dataset:
|
||||
logger.error(click.style(f"Dataset not found: {dataset_id}", fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
# Only regenerate summary index for high_quality indexing technique
|
||||
if dataset.indexing_technique != "high_quality":
|
||||
logger.info(
|
||||
click.style(
|
||||
f"Skipping summary regeneration for dataset {dataset_id}: "
|
||||
f"indexing_technique is {dataset.indexing_technique}, not 'high_quality'",
|
||||
fg="cyan",
|
||||
)
|
||||
)
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
# Check if summary index is enabled
|
||||
summary_index_setting = dataset.summary_index_setting
|
||||
if not summary_index_setting or not summary_index_setting.get("enable"):
|
||||
logger.info(
|
||||
click.style(
|
||||
f"Summary index is disabled for dataset {dataset_id}",
|
||||
fg="cyan",
|
||||
)
|
||||
)
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
# Get all documents with completed indexing status
|
||||
dataset_documents = db.session.scalars(
|
||||
select(DatasetDocument).where(
|
||||
DatasetDocument.dataset_id == dataset_id,
|
||||
DatasetDocument.indexing_status == "completed",
|
||||
DatasetDocument.enabled == True,
|
||||
DatasetDocument.archived == False,
|
||||
)
|
||||
).all()
|
||||
|
||||
if not dataset_documents:
|
||||
logger.info(
|
||||
click.style(
|
||||
f"No documents found for summary regeneration in dataset {dataset_id}",
|
||||
fg="cyan",
|
||||
)
|
||||
)
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"Found %s documents for summary regeneration in dataset %s",
|
||||
len(dataset_documents),
|
||||
dataset_id,
|
||||
)
|
||||
|
||||
total_segments_processed = 0
|
||||
total_segments_failed = 0
|
||||
|
||||
for dataset_document in dataset_documents:
|
||||
# Skip qa_model documents
|
||||
if dataset_document.doc_form == "qa_model":
|
||||
continue
|
||||
|
||||
try:
|
||||
# Get all segments with existing summaries
|
||||
segments = (
|
||||
db.session.query(DocumentSegment)
|
||||
.join(
|
||||
DocumentSegmentSummary,
|
||||
DocumentSegment.id == DocumentSegmentSummary.chunk_id,
|
||||
)
|
||||
.where(
|
||||
DocumentSegment.document_id == dataset_document.id,
|
||||
DocumentSegment.dataset_id == dataset_id,
|
||||
DocumentSegment.status == "completed",
|
||||
DocumentSegment.enabled == True,
|
||||
DocumentSegmentSummary.dataset_id == dataset_id,
|
||||
)
|
||||
.order_by(DocumentSegment.position.asc())
|
||||
.all()
|
||||
)
|
||||
|
||||
if not segments:
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
"Regenerating summaries for %s segments in document %s",
|
||||
len(segments),
|
||||
dataset_document.id,
|
||||
)
|
||||
|
||||
for segment in segments:
|
||||
try:
|
||||
# Get existing summary record
|
||||
summary_record = (
|
||||
db.session.query(DocumentSegmentSummary)
|
||||
.filter_by(
|
||||
chunk_id=segment.id,
|
||||
dataset_id=dataset_id,
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
if not summary_record:
|
||||
logger.warning("Summary record not found for segment %s, skipping", segment.id)
|
||||
continue
|
||||
|
||||
if regenerate_vectors_only:
|
||||
# Only regenerate vectors (for embedding_model change)
|
||||
# Delete old vector
|
||||
if summary_record.summary_index_node_id:
|
||||
try:
|
||||
from core.rag.datasource.vdb.vector_factory import Vector
|
||||
|
||||
vector = Vector(dataset)
|
||||
vector.delete_by_ids([summary_record.summary_index_node_id])
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to delete old summary vector for segment %s: %s",
|
||||
segment.id,
|
||||
str(e),
|
||||
)
|
||||
|
||||
# Re-vectorize with new embedding model
|
||||
SummaryIndexService.vectorize_summary(summary_record, segment, dataset)
|
||||
db.session.commit()
|
||||
else:
|
||||
# Regenerate both summary content and vectors (for summary_model change)
|
||||
SummaryIndexService.generate_and_vectorize_summary(segment, dataset, summary_index_setting)
|
||||
db.session.commit()
|
||||
|
||||
total_segments_processed += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to regenerate summary for segment %s: %s",
|
||||
segment.id,
|
||||
str(e),
|
||||
exc_info=True,
|
||||
)
|
||||
total_segments_failed += 1
|
||||
# Update summary record with error status
|
||||
if summary_record:
|
||||
summary_record.status = "error"
|
||||
summary_record.error = f"Regeneration failed: {str(e)}"
|
||||
db.session.add(summary_record)
|
||||
db.session.commit()
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to process document %s for summary regeneration: %s",
|
||||
dataset_document.id,
|
||||
str(e),
|
||||
exc_info=True,
|
||||
)
|
||||
continue
|
||||
|
||||
end_at = time.perf_counter()
|
||||
logger.info(
|
||||
click.style(
|
||||
f"Summary index regeneration completed for dataset {dataset_id}: "
|
||||
f"{total_segments_processed} segments processed successfully, "
|
||||
f"{total_segments_failed} segments failed, "
|
||||
f"total documents: {len(dataset_documents)}, "
|
||||
f"latency: {end_at - start_at:.2f}s",
|
||||
fg="green",
|
||||
)
|
||||
)
|
||||
|
||||
except Exception:
|
||||
logger.exception("Regenerate summary index failed for dataset %s", dataset_id)
|
||||
finally:
|
||||
db.session.close()
|
||||
@ -47,6 +47,21 @@ def remove_document_from_index_task(document_id: str):
|
||||
index_processor = IndexProcessorFactory(document.doc_form).init_index_processor()
|
||||
|
||||
segments = db.session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document.id)).all()
|
||||
|
||||
# Disable summary indexes for all segments in this document
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
segment_ids_list = [segment.id for segment in segments]
|
||||
if segment_ids_list:
|
||||
try:
|
||||
SummaryIndexService.disable_summaries_for_segments(
|
||||
dataset=dataset,
|
||||
segment_ids=segment_ids_list,
|
||||
disabled_by=document.disabled_by,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to disable summaries for document %s: %s", document.id, str(e))
|
||||
|
||||
index_node_ids = [segment.index_node_id for segment in segments]
|
||||
if index_node_ids:
|
||||
try:
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
import type { UnsafeUnwrappedHeaders } from 'next/headers'
|
||||
import type { FC } from 'react'
|
||||
import { headers } from 'next/headers'
|
||||
import Script from 'next/script'
|
||||
@ -26,14 +25,14 @@ const extractNonceFromCSP = (cspHeader: string | null): string | undefined => {
|
||||
return nonceMatch ? nonceMatch[1] : undefined
|
||||
}
|
||||
|
||||
const GA: FC<IGAProps> = ({
|
||||
const GA: FC<IGAProps> = async ({
|
||||
gaType,
|
||||
}) => {
|
||||
if (IS_CE_EDITION)
|
||||
return null
|
||||
|
||||
const cspHeader = IS_PROD
|
||||
? (headers() as unknown as UnsafeUnwrappedHeaders).get('content-security-policy')
|
||||
? (await headers()).get('content-security-policy')
|
||||
: null
|
||||
const nonce = extractNonceFromCSP(cspHeader)
|
||||
|
||||
|
||||
@ -0,0 +1,6 @@
|
||||
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M12 7.33337V2.66671H4.00002V13.3334H8.00002C8.36821 13.3334 8.66669 13.6319 8.66669 14C8.66669 14.3682 8.36821 14.6667 8.00002 14.6667H3.33335C2.96516 14.6667 2.66669 14.3682 2.66669 14V2.00004C2.66669 1.63185 2.96516 1.33337 3.33335 1.33337H12.6667C13.0349 1.33337 13.3334 1.63185 13.3334 2.00004V7.33337C13.3334 7.70156 13.0349 8.00004 12.6667 8.00004C12.2985 8.00004 12 7.70156 12 7.33337Z" fill="#354052"/>
|
||||
<path d="M10 4.00004C10.3682 4.00004 10.6667 4.29852 10.6667 4.66671C10.6667 5.0349 10.3682 5.33337 10 5.33337H6.00002C5.63183 5.33337 5.33335 5.0349 5.33335 4.66671C5.33335 4.29852 5.63183 4.00004 6.00002 4.00004H10Z" fill="#354052"/>
|
||||
<path d="M8.00002 6.66671C8.36821 6.66671 8.66669 6.96518 8.66669 7.33337C8.66669 7.70156 8.36821 8.00004 8.00002 8.00004H6.00002C5.63183 8.00004 5.33335 7.70156 5.33335 7.33337C5.33335 6.96518 5.63183 6.66671 6.00002 6.66671H8.00002Z" fill="#354052"/>
|
||||
<path d="M12.827 10.7902L12.3624 9.58224C12.3048 9.43231 12.1607 9.33337 12 9.33337C11.8394 9.33337 11.6953 9.43231 11.6376 9.58224L11.173 10.7902C11.1054 10.9662 10.9662 11.1054 10.7902 11.173L9.58222 11.6376C9.43229 11.6953 9.33335 11.8394 9.33335 12C9.33335 12.1607 9.43229 12.3048 9.58222 12.3624L10.7902 12.827C10.9662 12.8947 11.1054 13.0338 11.173 13.2099L11.6376 14.4178C11.6953 14.5678 11.8394 14.6667 12 14.6667C12.1607 14.6667 12.3048 14.5678 12.3624 14.4178L12.827 13.2099C12.8947 13.0338 13.0338 12.8947 13.2099 12.827L14.4178 12.3624C14.5678 12.3048 14.6667 12.1607 14.6667 12C14.6667 11.8394 14.5678 11.6953 14.4178 11.6376L13.2099 11.173C13.0338 11.1054 12.8947 10.9662 12.827 10.7902Z" fill="#354052"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.7 KiB |
@ -0,0 +1,53 @@
|
||||
{
|
||||
"icon": {
|
||||
"type": "element",
|
||||
"isRootNode": true,
|
||||
"name": "svg",
|
||||
"attributes": {
|
||||
"width": "16",
|
||||
"height": "16",
|
||||
"viewBox": "0 0 16 16",
|
||||
"fill": "none",
|
||||
"xmlns": "http://www.w3.org/2000/svg"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"type": "element",
|
||||
"name": "path",
|
||||
"attributes": {
|
||||
"d": "M12 7.33337V2.66671H4.00002V13.3334H8.00002C8.36821 13.3334 8.66669 13.6319 8.66669 14C8.66669 14.3682 8.36821 14.6667 8.00002 14.6667H3.33335C2.96516 14.6667 2.66669 14.3682 2.66669 14V2.00004C2.66669 1.63185 2.96516 1.33337 3.33335 1.33337H12.6667C13.0349 1.33337 13.3334 1.63185 13.3334 2.00004V7.33337C13.3334 7.70156 13.0349 8.00004 12.6667 8.00004C12.2985 8.00004 12 7.70156 12 7.33337Z",
|
||||
"fill": "currentColor"
|
||||
},
|
||||
"children": []
|
||||
},
|
||||
{
|
||||
"type": "element",
|
||||
"name": "path",
|
||||
"attributes": {
|
||||
"d": "M10 4.00004C10.3682 4.00004 10.6667 4.29852 10.6667 4.66671C10.6667 5.0349 10.3682 5.33337 10 5.33337H6.00002C5.63183 5.33337 5.33335 5.0349 5.33335 4.66671C5.33335 4.29852 5.63183 4.00004 6.00002 4.00004H10Z",
|
||||
"fill": "currentColor"
|
||||
},
|
||||
"children": []
|
||||
},
|
||||
{
|
||||
"type": "element",
|
||||
"name": "path",
|
||||
"attributes": {
|
||||
"d": "M8.00002 6.66671C8.36821 6.66671 8.66669 6.96518 8.66669 7.33337C8.66669 7.70156 8.36821 8.00004 8.00002 8.00004H6.00002C5.63183 8.00004 5.33335 7.70156 5.33335 7.33337C5.33335 6.96518 5.63183 6.66671 6.00002 6.66671H8.00002Z",
|
||||
"fill": "currentColor"
|
||||
},
|
||||
"children": []
|
||||
},
|
||||
{
|
||||
"type": "element",
|
||||
"name": "path",
|
||||
"attributes": {
|
||||
"d": "M12.827 10.7902L12.3624 9.58224C12.3048 9.43231 12.1607 9.33337 12 9.33337C11.8394 9.33337 11.6953 9.43231 11.6376 9.58224L11.173 10.7902C11.1054 10.9662 10.9662 11.1054 10.7902 11.173L9.58222 11.6376C9.43229 11.6953 9.33335 11.8394 9.33335 12C9.33335 12.1607 9.43229 12.3048 9.58222 12.3624L10.7902 12.827C10.9662 12.8947 11.1054 13.0338 11.173 13.2099L11.6376 14.4178C11.6953 14.5678 11.8394 14.6667 12 14.6667C12.1607 14.6667 12.3048 14.5678 12.3624 14.4178L12.827 13.2099C12.8947 13.0338 13.0338 12.8947 13.2099 12.827L14.4178 12.3624C14.5678 12.3048 14.6667 12.1607 14.6667 12C14.6667 11.8394 14.5678 11.6953 14.4178 11.6376L13.2099 11.173C13.0338 11.1054 12.8947 10.9662 12.827 10.7902Z",
|
||||
"fill": "currentColor"
|
||||
},
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"name": "SearchLinesSparkle"
|
||||
}
|
||||
@ -0,0 +1,20 @@
|
||||
// GENERATE BY script
|
||||
// DON NOT EDIT IT MANUALLY
|
||||
|
||||
import type { IconData } from '@/app/components/base/icons/IconBase'
|
||||
import * as React from 'react'
|
||||
import IconBase from '@/app/components/base/icons/IconBase'
|
||||
import data from './SearchLinesSparkle.json'
|
||||
|
||||
const Icon = (
|
||||
{
|
||||
ref,
|
||||
...props
|
||||
}: React.SVGProps<SVGSVGElement> & {
|
||||
ref?: React.RefObject<React.RefObject<HTMLOrSVGElement>>
|
||||
},
|
||||
) => <IconBase {...props} ref={ref} data={data as IconData} />
|
||||
|
||||
Icon.displayName = 'SearchLinesSparkle'
|
||||
|
||||
export default Icon
|
||||
@ -11,5 +11,6 @@ export { default as HighQuality } from './HighQuality'
|
||||
export { default as HybridSearch } from './HybridSearch'
|
||||
export { default as ParentChildChunk } from './ParentChildChunk'
|
||||
export { default as QuestionAndAnswer } from './QuestionAndAnswer'
|
||||
export { default as SearchLinesSparkle } from './SearchLinesSparkle'
|
||||
export { default as SearchMenu } from './SearchMenu'
|
||||
export { default as VectorSearch } from './VectorSearch'
|
||||
|
||||
@ -16,6 +16,7 @@ import { Theme } from '@/types/app'
|
||||
import SVGRenderer from '../svg-gallery' // Assumes svg-gallery.tsx is in /base directory
|
||||
|
||||
const Flowchart = dynamic(() => import('@/app/components/base/mermaid'), { ssr: false })
|
||||
const QuadrantMatrix = dynamic(() => import('@/app/components/base/quadrant-matrix'), { ssr: false })
|
||||
|
||||
// Available language https://github.com/react-syntax-highlighter/react-syntax-highlighter/blob/master/AVAILABLE_LANGUAGES_HLJS.MD
|
||||
const capitalizationLanguageNameMap: Record<string, string> = {
|
||||
@ -40,6 +41,7 @@ const capitalizationLanguageNameMap: Record<string, string> = {
|
||||
latex: 'Latex',
|
||||
svg: 'SVG',
|
||||
abc: 'ABC',
|
||||
quadrant: 'Quadrant',
|
||||
}
|
||||
const getCorrectCapitalizationLanguageName = (language: string) => {
|
||||
if (!language)
|
||||
@ -409,6 +411,12 @@ const CodeBlock: any = memo(({ inline, className, children = '', ...props }: any
|
||||
<MarkdownMusic children={content} />
|
||||
</ErrorBoundary>
|
||||
)
|
||||
case 'quadrant':
|
||||
return (
|
||||
<ErrorBoundary>
|
||||
<QuadrantMatrix content={content} />
|
||||
</ErrorBoundary>
|
||||
)
|
||||
default:
|
||||
return (
|
||||
<SyntaxHighlighter
|
||||
|
||||
153
web/app/components/base/quadrant-matrix/index.tsx
Normal file
153
web/app/components/base/quadrant-matrix/index.tsx
Normal file
@ -0,0 +1,153 @@
|
||||
'use client'
|
||||
import type { FC } from 'react'
|
||||
import type { QuadrantData } from './types'
|
||||
import { RiExpandDiagonalLine } from '@remixicon/react'
|
||||
import { useCallback, useMemo, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import ActionButton from '@/app/components/base/action-button'
|
||||
import FullScreenModal from '@/app/components/base/fullscreen-modal'
|
||||
import QuadrantCard from './quadrant-card'
|
||||
import { isValidQuadrantData, QUADRANT_CONFIGS } from './types'
|
||||
|
||||
type QuadrantMatrixProps = {
|
||||
content: string
|
||||
}
|
||||
|
||||
const QuadrantMatrix: FC<QuadrantMatrixProps> = ({ content }) => {
|
||||
const { t } = useTranslation()
|
||||
const [isExpanded, setIsExpanded] = useState(false)
|
||||
|
||||
const parsedData = useMemo<QuadrantData | null>(() => {
|
||||
try {
|
||||
const trimmed = content.trim()
|
||||
const data = JSON.parse(trimmed)
|
||||
|
||||
if (!isValidQuadrantData(data))
|
||||
return null
|
||||
|
||||
return data
|
||||
}
|
||||
catch {
|
||||
return null
|
||||
}
|
||||
}, [content])
|
||||
|
||||
const handleExpand = useCallback(() => {
|
||||
setIsExpanded(true)
|
||||
}, [])
|
||||
|
||||
const handleClose = useCallback(() => {
|
||||
setIsExpanded(false)
|
||||
}, [])
|
||||
|
||||
if (!parsedData) {
|
||||
return (
|
||||
<div className="flex items-center justify-center rounded-xl bg-components-panel-bg-blur p-8">
|
||||
<div className="text-center text-text-secondary">
|
||||
<div className="system-md-semibold mb-2">{t('quadrantMatrix.invalidData', { ns: 'app' })}</div>
|
||||
<div className="text-sm text-text-tertiary">
|
||||
{t('quadrantMatrix.invalidDataDesc', { ns: 'app' })}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const totalTasks
|
||||
= parsedData.q1.length
|
||||
+ parsedData.q2.length
|
||||
+ parsedData.q3.length
|
||||
+ parsedData.q4.length
|
||||
|
||||
// Shared grid content component
|
||||
const renderGrid = (expanded: boolean) => (
|
||||
<div className="grid grid-cols-2 gap-3">
|
||||
{/* Row 1: Q1 (Do First), Q2 (Schedule) */}
|
||||
<QuadrantCard
|
||||
config={QUADRANT_CONFIGS.q1}
|
||||
tasks={parsedData.q1}
|
||||
expanded={expanded}
|
||||
/>
|
||||
<QuadrantCard
|
||||
config={QUADRANT_CONFIGS.q2}
|
||||
tasks={parsedData.q2}
|
||||
expanded={expanded}
|
||||
/>
|
||||
|
||||
{/* Row 2: Q3 (Delegate), Q4 (Don't Do) */}
|
||||
<QuadrantCard
|
||||
config={QUADRANT_CONFIGS.q3}
|
||||
tasks={parsedData.q3}
|
||||
expanded={expanded}
|
||||
/>
|
||||
<QuadrantCard
|
||||
config={QUADRANT_CONFIGS.q4}
|
||||
tasks={parsedData.q4}
|
||||
expanded={expanded}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className="w-full overflow-hidden rounded-xl bg-components-panel-bg-blur p-4">
|
||||
{/* Header */}
|
||||
<div className="mb-4 flex items-center justify-between">
|
||||
<div>
|
||||
<div className="system-md-semibold text-text-primary">
|
||||
{t('quadrantMatrix.title', { ns: 'app' })}
|
||||
</div>
|
||||
<div className="text-xs text-text-tertiary">
|
||||
{t('quadrantMatrix.taskCount', { ns: 'app', count: totalTasks })}
|
||||
</div>
|
||||
</div>
|
||||
{/* Legend + Expand Button */}
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="flex items-center gap-3 text-[11px] text-text-quaternary">
|
||||
<span>{t('quadrantMatrix.legend.importance', { ns: 'app' })}</span>
|
||||
<span>{t('quadrantMatrix.legend.urgency', { ns: 'app' })}</span>
|
||||
</div>
|
||||
<ActionButton onClick={handleExpand}>
|
||||
<RiExpandDiagonalLine className="h-4 w-4" />
|
||||
</ActionButton>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* 2x2 Grid */}
|
||||
{renderGrid(false)}
|
||||
</div>
|
||||
|
||||
{/* Fullscreen Modal */}
|
||||
<FullScreenModal
|
||||
open={isExpanded}
|
||||
onClose={handleClose}
|
||||
closable
|
||||
>
|
||||
<div className="flex h-full flex-col p-6">
|
||||
{/* Modal Header */}
|
||||
<div className="mb-6 flex items-center justify-between">
|
||||
<div>
|
||||
<div className="text-xl font-semibold text-text-primary">
|
||||
{t('quadrantMatrix.title', { ns: 'app' })}
|
||||
</div>
|
||||
<div className="text-sm text-text-tertiary">
|
||||
{t('quadrantMatrix.taskCount', { ns: 'app', count: totalTasks })}
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-3 text-sm text-text-quaternary">
|
||||
<span>{t('quadrantMatrix.legend.importance', { ns: 'app' })}</span>
|
||||
<span>{t('quadrantMatrix.legend.urgency', { ns: 'app' })}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Expanded Grid */}
|
||||
<div className="min-h-0 flex-1">
|
||||
{renderGrid(true)}
|
||||
</div>
|
||||
</div>
|
||||
</FullScreenModal>
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
export default QuadrantMatrix
|
||||
102
web/app/components/base/quadrant-matrix/quadrant-card.tsx
Normal file
102
web/app/components/base/quadrant-matrix/quadrant-card.tsx
Normal file
@ -0,0 +1,102 @@
|
||||
'use client'
|
||||
import type { FC } from 'react'
|
||||
import type { QuadrantConfig, Task } from './types'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { cn } from '@/utils/classnames'
|
||||
import TaskItem from './task-item'
|
||||
|
||||
type QuadrantCardProps = {
|
||||
config: QuadrantConfig
|
||||
tasks: Task[]
|
||||
expanded?: boolean
|
||||
maxDisplay?: number
|
||||
}
|
||||
|
||||
const QuadrantCard: FC<QuadrantCardProps> = ({
|
||||
config,
|
||||
tasks,
|
||||
expanded = false,
|
||||
maxDisplay = 3,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
const { number, titleKey, subtitleKey, bgClass, borderClass, titleClass } = config
|
||||
const displayLimit = expanded ? Infinity : maxDisplay
|
||||
const displayTasks = tasks.slice(0, displayLimit)
|
||||
const remainingCount = Math.max(0, tasks.length - displayLimit)
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
'flex min-w-0 flex-col rounded-xl border p-3',
|
||||
bgClass,
|
||||
borderClass,
|
||||
expanded ? 'min-h-[280px]' : 'min-h-[200px]',
|
||||
)}
|
||||
>
|
||||
{/* Header with numbered circle */}
|
||||
<div className="mb-2 shrink-0">
|
||||
<div className="flex items-center gap-2">
|
||||
{/* Numbered circle */}
|
||||
<span className={cn(
|
||||
'flex h-5 w-5 items-center justify-center rounded-full border text-xs font-semibold',
|
||||
borderClass,
|
||||
titleClass,
|
||||
)}
|
||||
>
|
||||
{number}
|
||||
</span>
|
||||
<span className={cn('system-sm-semibold', titleClass)}>{t(titleKey, { ns: 'app' })}</span>
|
||||
{tasks.length > 0 && (
|
||||
<span className="bg-components-badge-bg-gray rounded-full px-1.5 py-0.5 text-[10px] font-medium text-text-tertiary">
|
||||
{tasks.length}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="text-[11px] text-text-tertiary">{t(subtitleKey, { ns: 'app' })}</div>
|
||||
</div>
|
||||
|
||||
{/* Task List */}
|
||||
<div className={cn(
|
||||
'flex min-h-0 flex-1 flex-col gap-2',
|
||||
expanded && 'overflow-y-auto',
|
||||
)}
|
||||
>
|
||||
{displayTasks.length > 0
|
||||
? (
|
||||
displayTasks.map((task) => {
|
||||
const taskKey = [
|
||||
task.name,
|
||||
task.deadline ?? 'no-deadline',
|
||||
task.importance_score,
|
||||
task.urgency_score,
|
||||
task.description ?? '',
|
||||
task.action_advice ?? '',
|
||||
].join('|')
|
||||
|
||||
return (
|
||||
<TaskItem
|
||||
key={taskKey}
|
||||
task={task}
|
||||
expanded={expanded}
|
||||
/>
|
||||
)
|
||||
})
|
||||
)
|
||||
: (
|
||||
<div className="flex flex-1 items-center justify-center text-xs text-text-quaternary">
|
||||
{t('quadrantMatrix.noTasks', { ns: 'app' })}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* More indicator (only in non-expanded mode) */}
|
||||
{!expanded && remainingCount > 0 && (
|
||||
<div className="mt-2 shrink-0 text-center text-[11px] text-text-tertiary">
|
||||
{t('quadrantMatrix.more', { ns: 'app', count: remainingCount })}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default QuadrantCard
|
||||
88
web/app/components/base/quadrant-matrix/task-item.tsx
Normal file
88
web/app/components/base/quadrant-matrix/task-item.tsx
Normal file
@ -0,0 +1,88 @@
|
||||
'use client'
|
||||
import type { FC } from 'react'
|
||||
import type { Task } from './types'
|
||||
import { RiCalendarLine } from '@remixicon/react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { cn } from '@/utils/classnames'
|
||||
|
||||
type TaskItemProps = {
|
||||
task: Task
|
||||
expanded?: boolean
|
||||
showScores?: boolean
|
||||
}
|
||||
|
||||
const TaskItem: FC<TaskItemProps> = ({ task, expanded = false, showScores = true }) => {
|
||||
const { t } = useTranslation()
|
||||
const { name, description, deadline, importance_score, urgency_score, action_advice } = task
|
||||
|
||||
return (
|
||||
<div className="group min-w-0 rounded-lg bg-components-panel-bg p-2.5 shadow-xs transition-all hover:shadow-sm">
|
||||
{/* Header: Task Name + Scores */}
|
||||
<div className="flex items-start justify-between gap-2">
|
||||
<div
|
||||
className={cn(
|
||||
'system-sm-medium min-w-0 flex-1 text-text-primary',
|
||||
!expanded && 'truncate',
|
||||
)}
|
||||
title={name}
|
||||
>
|
||||
{name}
|
||||
</div>
|
||||
{showScores && (
|
||||
<div className="flex shrink-0 items-center gap-1 text-[10px] font-medium">
|
||||
<span className="text-text-accent">
|
||||
I:
|
||||
{importance_score}
|
||||
</span>
|
||||
<span className="text-text-warning">
|
||||
U:
|
||||
{urgency_score}
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Description */}
|
||||
{description && (
|
||||
<div className={cn(
|
||||
'mt-1 text-xs text-text-tertiary',
|
||||
!expanded && 'line-clamp-2',
|
||||
)}
|
||||
>
|
||||
{description}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Deadline Badge */}
|
||||
{deadline && (
|
||||
<div className="mt-1.5">
|
||||
<span className="bg-components-badge-bg-gray inline-flex items-center gap-1 rounded px-1.5 py-0.5 text-[10px] text-text-tertiary">
|
||||
<RiCalendarLine className="h-3 w-3" />
|
||||
<span>
|
||||
{t('quadrantMatrix.deadline', { ns: 'app' })}
|
||||
{' '}
|
||||
{deadline}
|
||||
</span>
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Action Advice */}
|
||||
{action_advice && (
|
||||
<div className="mt-2 border-t border-divider-subtle pt-2">
|
||||
<p
|
||||
className={cn(
|
||||
'text-xs italic text-text-quaternary',
|
||||
!expanded && 'line-clamp-2',
|
||||
)}
|
||||
title={!expanded ? action_advice : undefined}
|
||||
>
|
||||
{action_advice}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default TaskItem
|
||||
92
web/app/components/base/quadrant-matrix/types.ts
Normal file
92
web/app/components/base/quadrant-matrix/types.ts
Normal file
@ -0,0 +1,92 @@
|
||||
/**
|
||||
* Type definitions for Eisenhower Matrix (Task Quadrant) visualization
|
||||
*/
|
||||
import type { I18nKeysWithPrefix } from '@/types/i18n'
|
||||
|
||||
export type Task = {
|
||||
name: string
|
||||
description?: string
|
||||
deadline?: string // YYYY-MM-DD format
|
||||
importance_score: number // 0-100, based on goal alignment and long-term value
|
||||
urgency_score: number // 0-100, based on deadline pressure and delay penalty
|
||||
action_advice?: string // Suggested action for this task
|
||||
}
|
||||
|
||||
export type QuadrantData = {
|
||||
q1: Task[] // Urgent & Important - Do First
|
||||
q2: Task[] // Not Urgent & Important - Schedule
|
||||
q3: Task[] // Urgent & Not Important - Delegate
|
||||
q4: Task[] // Not Urgent & Not Important - Don't Do
|
||||
}
|
||||
|
||||
type QuadrantKeyBase = I18nKeysWithPrefix<'app', 'quadrantMatrix.q'>
|
||||
type QuadrantTitleKey = Extract<QuadrantKeyBase, `${string}.title`>
|
||||
type QuadrantSubtitleKey = Extract<QuadrantKeyBase, `${string}.subtitle`>
|
||||
|
||||
export type QuadrantConfig = {
|
||||
key: 'q1' | 'q2' | 'q3' | 'q4'
|
||||
number: number
|
||||
titleKey: QuadrantTitleKey // i18n key for title
|
||||
subtitleKey: QuadrantSubtitleKey // i18n key for subtitle
|
||||
bgClass: string
|
||||
borderClass: string
|
||||
titleClass: string
|
||||
}
|
||||
|
||||
// Layout based on Eisenhower Matrix:
|
||||
// Q1 (Do First) - top-left, Q2 (Schedule) - top-right
|
||||
// Q3 (Delegate) - bottom-left, Q4 (Don't Do) - bottom-right
|
||||
export const QUADRANT_CONFIGS: Record<string, QuadrantConfig> = {
|
||||
q1: {
|
||||
key: 'q1',
|
||||
number: 1,
|
||||
titleKey: 'quadrantMatrix.q1.title',
|
||||
subtitleKey: 'quadrantMatrix.q1.subtitle',
|
||||
bgClass: 'bg-state-destructive-hover',
|
||||
borderClass: 'border-state-destructive-border',
|
||||
titleClass: 'text-text-destructive',
|
||||
},
|
||||
q2: {
|
||||
key: 'q2',
|
||||
number: 2,
|
||||
titleKey: 'quadrantMatrix.q2.title',
|
||||
subtitleKey: 'quadrantMatrix.q2.subtitle',
|
||||
bgClass: 'bg-state-accent-hover',
|
||||
borderClass: 'border-state-accent-border',
|
||||
titleClass: 'text-text-accent',
|
||||
},
|
||||
q3: {
|
||||
key: 'q3',
|
||||
number: 3,
|
||||
titleKey: 'quadrantMatrix.q3.title',
|
||||
subtitleKey: 'quadrantMatrix.q3.subtitle',
|
||||
bgClass: 'bg-state-warning-hover',
|
||||
borderClass: 'border-state-warning-border',
|
||||
titleClass: 'text-text-warning',
|
||||
},
|
||||
q4: {
|
||||
key: 'q4',
|
||||
number: 4,
|
||||
titleKey: 'quadrantMatrix.q4.title',
|
||||
subtitleKey: 'quadrantMatrix.q4.subtitle',
|
||||
bgClass: 'bg-components-panel-on-panel-item-bg',
|
||||
borderClass: 'border-divider-regular',
|
||||
titleClass: 'text-text-tertiary',
|
||||
},
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates if the data structure matches QuadrantData interface
|
||||
*/
|
||||
export function isValidQuadrantData(data: unknown): data is QuadrantData {
|
||||
if (typeof data !== 'object' || data === null)
|
||||
return false
|
||||
|
||||
const d = data as Record<string, unknown>
|
||||
return (
|
||||
Array.isArray(d.q1)
|
||||
&& Array.isArray(d.q2)
|
||||
&& Array.isArray(d.q3)
|
||||
&& Array.isArray(d.q4)
|
||||
)
|
||||
}
|
||||
@ -3,12 +3,30 @@ import { cn } from '@/utils/classnames'
|
||||
type ProgressBarProps = {
|
||||
percent: number
|
||||
color: string
|
||||
indeterminate?: boolean
|
||||
indeterminateFull?: boolean // For Sandbox users: full width stripe
|
||||
}
|
||||
|
||||
const ProgressBar = ({
|
||||
percent = 0,
|
||||
color = '#2970FF',
|
||||
indeterminate = false,
|
||||
indeterminateFull = false,
|
||||
}: ProgressBarProps) => {
|
||||
if (indeterminate) {
|
||||
return (
|
||||
<div className="overflow-hidden rounded-[6px] bg-components-progress-bar-bg">
|
||||
<div
|
||||
data-testid="billing-progress-bar-indeterminate"
|
||||
className={cn('h-1 rounded-[6px]', indeterminateFull ? 'w-full' : 'w-[30px]')}
|
||||
style={{
|
||||
background: 'repeating-linear-gradient(-55deg, #D0D5DD, #D0D5DD 2px, transparent 2px, transparent 5px)',
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="overflow-hidden rounded-[6px] bg-components-progress-bar-bg">
|
||||
<div
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
'use client'
|
||||
import type { FC } from 'react'
|
||||
import type { ComponentType, FC } from 'react'
|
||||
import * as React from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Tooltip from '@/app/components/base/tooltip'
|
||||
@ -9,7 +9,7 @@ import ProgressBar from '../progress-bar'
|
||||
|
||||
type Props = {
|
||||
className?: string
|
||||
Icon: any
|
||||
Icon: ComponentType<{ className?: string }>
|
||||
name: string
|
||||
tooltip?: string
|
||||
usage: number
|
||||
@ -19,6 +19,12 @@ type Props = {
|
||||
resetHint?: string
|
||||
resetInDays?: number
|
||||
hideIcon?: boolean
|
||||
// Props for the 50MB threshold display logic
|
||||
storageMode?: boolean
|
||||
storageThreshold?: number
|
||||
storageTooltip?: string
|
||||
storageTotalDisplay?: string // e.g., "5GB" or "50MB" for formatted display
|
||||
isSandboxPlan?: boolean
|
||||
}
|
||||
|
||||
const WARNING_THRESHOLD = 80
|
||||
@ -35,30 +41,164 @@ const UsageInfo: FC<Props> = ({
|
||||
resetHint,
|
||||
resetInDays,
|
||||
hideIcon = false,
|
||||
storageMode = false,
|
||||
storageThreshold = 50,
|
||||
storageTooltip,
|
||||
storageTotalDisplay,
|
||||
isSandboxPlan = false,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
// Special display logic for usage below threshold (only in storage mode)
|
||||
const isBelowThreshold = storageMode && usage < storageThreshold
|
||||
// Sandbox at full capacity (usage >= threshold and it's sandbox plan)
|
||||
const isSandboxFull = storageMode && isSandboxPlan && usage >= storageThreshold
|
||||
|
||||
const percent = usage / total * 100
|
||||
const color = percent >= 100
|
||||
? 'bg-components-progress-error-progress'
|
||||
: (percent >= WARNING_THRESHOLD ? 'bg-components-progress-warning-progress' : 'bg-components-progress-bar-progress-solid')
|
||||
const getProgressColor = () => {
|
||||
if (percent >= 100)
|
||||
return 'bg-components-progress-error-progress'
|
||||
if (percent >= WARNING_THRESHOLD)
|
||||
return 'bg-components-progress-warning-progress'
|
||||
return 'bg-components-progress-bar-progress-solid'
|
||||
}
|
||||
const color = getProgressColor()
|
||||
const isUnlimited = total === NUM_INFINITE
|
||||
let totalDisplay: string | number = isUnlimited ? t('plansCommon.unlimited', { ns: 'billing' }) : total
|
||||
if (!isUnlimited && unit && unitPosition === 'inline')
|
||||
totalDisplay = `${total}${unit}`
|
||||
const showUnit = !!unit && !isUnlimited && unitPosition === 'suffix'
|
||||
const resetText = resetHint ?? (typeof resetInDays === 'number' ? t('usagePage.resetsIn', { ns: 'billing', count: resetInDays }) : undefined)
|
||||
const rightInfo = resetText
|
||||
? (
|
||||
|
||||
const renderRightInfo = () => {
|
||||
if (resetText) {
|
||||
return (
|
||||
<div className="system-xs-regular ml-auto flex-1 text-right text-text-tertiary">
|
||||
{resetText}
|
||||
</div>
|
||||
)
|
||||
: (showUnit && (
|
||||
}
|
||||
if (showUnit) {
|
||||
return (
|
||||
<div className="system-xs-medium ml-auto text-text-tertiary">
|
||||
{unit}
|
||||
</div>
|
||||
))
|
||||
)
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
// Render usage display
|
||||
const renderUsageDisplay = () => {
|
||||
// Storage mode: special display logic
|
||||
if (storageMode) {
|
||||
// Sandbox user at full capacity
|
||||
if (isSandboxFull) {
|
||||
return (
|
||||
<div className="flex items-center gap-1">
|
||||
<span>
|
||||
{storageThreshold}
|
||||
</span>
|
||||
<span className="system-md-regular text-text-quaternary">/</span>
|
||||
<span>
|
||||
{storageThreshold}
|
||||
{' '}
|
||||
{unit}
|
||||
</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
// Usage below threshold - show "< 50 MB" or "< 50 / 5GB"
|
||||
if (isBelowThreshold) {
|
||||
const totalText = storageTotalDisplay || totalDisplay
|
||||
return (
|
||||
<div className="flex items-center gap-1">
|
||||
<span>
|
||||
<
|
||||
{' '}
|
||||
{storageThreshold}
|
||||
</span>
|
||||
{!isSandboxPlan && (
|
||||
<>
|
||||
<span className="system-md-regular text-text-quaternary">/</span>
|
||||
<span>{totalText}</span>
|
||||
</>
|
||||
)}
|
||||
{isSandboxPlan && <span>{unit}</span>}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
// Pro/Team users with usage >= threshold - show actual usage
|
||||
const totalText = storageTotalDisplay || totalDisplay
|
||||
return (
|
||||
<div className="flex items-center gap-1">
|
||||
<span>{usage}</span>
|
||||
<span className="system-md-regular text-text-quaternary">/</span>
|
||||
<span>{totalText}</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Default display (storageMode = false)
|
||||
return (
|
||||
<div className="flex items-center gap-1">
|
||||
<span>{usage}</span>
|
||||
<span className="system-md-regular text-text-quaternary">/</span>
|
||||
<span>{totalDisplay}</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Render progress bar with optional tooltip wrapper
|
||||
const renderProgressBar = () => {
|
||||
const progressBar = (
|
||||
<ProgressBar
|
||||
percent={isBelowThreshold ? 0 : percent}
|
||||
color={isSandboxFull ? 'bg-components-progress-error-progress' : color}
|
||||
indeterminate={isBelowThreshold}
|
||||
indeterminateFull={isBelowThreshold && isSandboxPlan}
|
||||
/>
|
||||
)
|
||||
|
||||
if (storageMode && storageTooltip) {
|
||||
return (
|
||||
<Tooltip
|
||||
popupContent={(
|
||||
<div className="w-[200px]">
|
||||
{storageTooltip}
|
||||
</div>
|
||||
)}
|
||||
asChild={false}
|
||||
>
|
||||
<div className="cursor-default">{progressBar}</div>
|
||||
</Tooltip>
|
||||
)
|
||||
}
|
||||
|
||||
return progressBar
|
||||
}
|
||||
|
||||
// Render usage text with optional tooltip wrapper
|
||||
const renderUsageWithTooltip = () => {
|
||||
const usageDisplay = renderUsageDisplay()
|
||||
|
||||
if (storageMode && storageTooltip) {
|
||||
return (
|
||||
<Tooltip
|
||||
popupContent={(
|
||||
<div className="w-[200px]">
|
||||
{storageTooltip}
|
||||
</div>
|
||||
)}
|
||||
asChild={false}
|
||||
>
|
||||
<div className="cursor-default">{usageDisplay}</div>
|
||||
</Tooltip>
|
||||
)
|
||||
}
|
||||
|
||||
return usageDisplay
|
||||
}
|
||||
|
||||
return (
|
||||
<div className={cn('flex flex-col gap-2 rounded-xl bg-components-panel-bg p-4', className)}>
|
||||
@ -78,17 +218,10 @@ const UsageInfo: FC<Props> = ({
|
||||
)}
|
||||
</div>
|
||||
<div className="system-md-semibold flex items-center gap-1 text-text-primary">
|
||||
<div className="flex items-center gap-1">
|
||||
{usage}
|
||||
<div className="system-md-regular text-text-quaternary">/</div>
|
||||
<div>{totalDisplay}</div>
|
||||
</div>
|
||||
{rightInfo}
|
||||
{renderUsageWithTooltip()}
|
||||
{renderRightInfo()}
|
||||
</div>
|
||||
<ProgressBar
|
||||
percent={percent}
|
||||
color={color}
|
||||
/>
|
||||
{renderProgressBar()}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@ -6,21 +6,44 @@ import {
|
||||
import * as React from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useProviderContext } from '@/context/provider-context'
|
||||
import { Plan } from '../type'
|
||||
import UsageInfo from '../usage-info'
|
||||
|
||||
type Props = {
|
||||
className?: string
|
||||
}
|
||||
|
||||
// Storage threshold in MB - usage below this shows as "< 50 MB"
|
||||
const STORAGE_THRESHOLD_MB = 50
|
||||
|
||||
const VectorSpaceInfo: FC<Props> = ({
|
||||
className,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
const { plan } = useProviderContext()
|
||||
const {
|
||||
type,
|
||||
usage,
|
||||
total,
|
||||
} = plan
|
||||
|
||||
// Determine total based on plan type (in MB)
|
||||
const getTotalInMB = () => {
|
||||
switch (type) {
|
||||
case Plan.sandbox:
|
||||
return STORAGE_THRESHOLD_MB // 50 MB
|
||||
case Plan.professional:
|
||||
return 5 * 1024 // 5 GB = 5120 MB
|
||||
case Plan.team:
|
||||
return 20 * 1024 // 20 GB = 20480 MB
|
||||
default:
|
||||
return total.vectorSpace
|
||||
}
|
||||
}
|
||||
|
||||
const totalInMB = getTotalInMB()
|
||||
const isSandbox = type === Plan.sandbox
|
||||
|
||||
return (
|
||||
<UsageInfo
|
||||
className={className}
|
||||
@ -28,9 +51,14 @@ const VectorSpaceInfo: FC<Props> = ({
|
||||
name={t('usagePage.vectorSpace', { ns: 'billing' })}
|
||||
tooltip={t('usagePage.vectorSpaceTooltip', { ns: 'billing' }) as string}
|
||||
usage={usage.vectorSpace}
|
||||
total={total.vectorSpace}
|
||||
total={totalInMB}
|
||||
unit="MB"
|
||||
unitPosition="inline"
|
||||
storageMode
|
||||
storageThreshold={STORAGE_THRESHOLD_MB}
|
||||
storageTooltip={t('usagePage.storageThresholdTooltip', { ns: 'billing' }) as string}
|
||||
storageTotalDisplay={`${totalInMB}MB`}
|
||||
isSandboxPlan={isSandbox}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
'use client'
|
||||
|
||||
import type { FC } from 'react'
|
||||
import type { PreProcessingRule } from '@/models/datasets'
|
||||
import type { PreProcessingRule, SummaryIndexSetting as SummaryIndexSettingType } from '@/models/datasets'
|
||||
import {
|
||||
RiAlertFill,
|
||||
RiSearchEyeLine,
|
||||
@ -12,6 +12,7 @@ import Button from '@/app/components/base/button'
|
||||
import Checkbox from '@/app/components/base/checkbox'
|
||||
import Divider from '@/app/components/base/divider'
|
||||
import Tooltip from '@/app/components/base/tooltip'
|
||||
import SummaryIndexSetting from '@/app/components/datasets/settings/summary-index-setting'
|
||||
import { IS_CE_EDITION } from '@/config'
|
||||
import { ChunkingMode } from '@/models/datasets'
|
||||
import SettingCog from '../../assets/setting-gear-mod.svg'
|
||||
@ -52,6 +53,8 @@ type GeneralChunkingOptionsProps = {
|
||||
onReset: () => void
|
||||
// Locale
|
||||
locale: string
|
||||
summaryIndexSetting?: SummaryIndexSettingType
|
||||
onSummaryIndexSettingChange?: (payload: SummaryIndexSettingType) => void
|
||||
}
|
||||
|
||||
export const GeneralChunkingOptions: FC<GeneralChunkingOptionsProps> = ({
|
||||
@ -74,6 +77,8 @@ export const GeneralChunkingOptions: FC<GeneralChunkingOptionsProps> = ({
|
||||
onPreview,
|
||||
onReset,
|
||||
locale,
|
||||
summaryIndexSetting,
|
||||
onSummaryIndexSettingChange,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
@ -146,6 +151,13 @@ export const GeneralChunkingOptions: FC<GeneralChunkingOptionsProps> = ({
|
||||
</label>
|
||||
</div>
|
||||
))}
|
||||
<div className="mt-3">
|
||||
<SummaryIndexSetting
|
||||
entry="create-document"
|
||||
summaryIndexSetting={summaryIndexSetting}
|
||||
onSummaryIndexSettingChange={onSummaryIndexSettingChange}
|
||||
/>
|
||||
</div>
|
||||
{IS_CE_EDITION && (
|
||||
<>
|
||||
<Divider type="horizontal" className="my-4 bg-divider-subtle" />
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
import type { FC } from 'react'
|
||||
import type { ParentChildConfig } from '../hooks'
|
||||
import type { ParentMode, PreProcessingRule } from '@/models/datasets'
|
||||
import type { ParentMode, PreProcessingRule, SummaryIndexSetting as SummaryIndexSettingType } from '@/models/datasets'
|
||||
import { RiSearchEyeLine } from '@remixicon/react'
|
||||
import Image from 'next/image'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
@ -11,6 +11,7 @@ import Checkbox from '@/app/components/base/checkbox'
|
||||
import Divider from '@/app/components/base/divider'
|
||||
import { ParentChildChunk } from '@/app/components/base/icons/src/vender/knowledge'
|
||||
import RadioCard from '@/app/components/base/radio-card'
|
||||
import SummaryIndexSetting from '@/app/components/datasets/settings/summary-index-setting'
|
||||
import { ChunkingMode } from '@/models/datasets'
|
||||
import FileList from '../../assets/file-list-3-fill.svg'
|
||||
import Note from '../../assets/note-mod.svg'
|
||||
@ -31,6 +32,8 @@ type ParentChildOptionsProps = {
|
||||
// State
|
||||
parentChildConfig: ParentChildConfig
|
||||
rules: PreProcessingRule[]
|
||||
summaryIndexSetting?: SummaryIndexSettingType
|
||||
onSummaryIndexSettingChange?: (payload: SummaryIndexSettingType) => void
|
||||
currentDocForm: ChunkingMode
|
||||
// Flags
|
||||
isActive: boolean
|
||||
@ -51,6 +54,7 @@ type ParentChildOptionsProps = {
|
||||
export const ParentChildOptions: FC<ParentChildOptionsProps> = ({
|
||||
parentChildConfig,
|
||||
rules,
|
||||
summaryIndexSetting,
|
||||
currentDocForm: _currentDocForm,
|
||||
isActive,
|
||||
isInUpload,
|
||||
@ -62,6 +66,7 @@ export const ParentChildOptions: FC<ParentChildOptionsProps> = ({
|
||||
onChildDelimiterChange,
|
||||
onChildMaxLengthChange,
|
||||
onRuleToggle,
|
||||
onSummaryIndexSettingChange,
|
||||
onPreview,
|
||||
onReset,
|
||||
}) => {
|
||||
@ -183,6 +188,13 @@ export const ParentChildOptions: FC<ParentChildOptionsProps> = ({
|
||||
</label>
|
||||
</div>
|
||||
))}
|
||||
<div className="mt-3">
|
||||
<SummaryIndexSetting
|
||||
entry="create-document"
|
||||
summaryIndexSetting={summaryIndexSetting}
|
||||
onSummaryIndexSettingChange={onSummaryIndexSettingChange}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -14,6 +14,7 @@ import { ChunkingMode } from '@/models/datasets'
|
||||
import { cn } from '@/utils/classnames'
|
||||
import { ChunkContainer, QAPreview } from '../../../chunk'
|
||||
import PreviewDocumentPicker from '../../../common/document-picker/preview-document-picker'
|
||||
import SummaryLabel from '../../../documents/detail/completed/common/summary-label'
|
||||
import { PreviewSlice } from '../../../formatted-text/flavours/preview-slice'
|
||||
import { FormattedText } from '../../../formatted-text/formatted'
|
||||
import PreviewContainer from '../../../preview/container'
|
||||
@ -99,6 +100,7 @@ export const PreviewPanel: FC<PreviewPanelProps> = ({
|
||||
characterCount={item.content.length}
|
||||
>
|
||||
{item.content}
|
||||
{item.summary && <SummaryLabel summary={item.summary} />}
|
||||
</ChunkContainer>
|
||||
))
|
||||
)}
|
||||
@ -131,6 +133,7 @@ export const PreviewPanel: FC<PreviewPanelProps> = ({
|
||||
)
|
||||
})}
|
||||
</FormattedText>
|
||||
{item.summary && <SummaryLabel summary={item.summary} />}
|
||||
</ChunkContainer>
|
||||
)
|
||||
})
|
||||
|
||||
@ -9,6 +9,7 @@ import type {
|
||||
CustomFile,
|
||||
FullDocumentDetail,
|
||||
ProcessRule,
|
||||
SummaryIndexSetting as SummaryIndexSettingType,
|
||||
} from '@/models/datasets'
|
||||
import type { RetrievalConfig, RETRIEVE_METHOD } from '@/types/app'
|
||||
import { useCallback } from 'react'
|
||||
@ -141,6 +142,7 @@ export const useDocumentCreation = (options: UseDocumentCreationOptions) => {
|
||||
retrievalConfig: RetrievalConfig,
|
||||
embeddingModel: DefaultModel,
|
||||
indexingTechnique: string,
|
||||
summaryIndexSetting?: SummaryIndexSettingType,
|
||||
): CreateDocumentReq | null => {
|
||||
if (isSetting) {
|
||||
return {
|
||||
@ -148,6 +150,7 @@ export const useDocumentCreation = (options: UseDocumentCreationOptions) => {
|
||||
doc_form: currentDocForm,
|
||||
doc_language: docLanguage,
|
||||
process_rule: processRule,
|
||||
summary_index_setting: summaryIndexSetting,
|
||||
retrieval_model: retrievalConfig,
|
||||
embedding_model: embeddingModel.model,
|
||||
embedding_model_provider: embeddingModel.provider,
|
||||
@ -164,6 +167,7 @@ export const useDocumentCreation = (options: UseDocumentCreationOptions) => {
|
||||
},
|
||||
indexing_technique: indexingTechnique,
|
||||
process_rule: processRule,
|
||||
summary_index_setting: summaryIndexSetting,
|
||||
doc_form: currentDocForm,
|
||||
doc_language: docLanguage,
|
||||
retrieval_model: retrievalConfig,
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import type { ParentMode, PreProcessingRule, ProcessRule, Rules } from '@/models/datasets'
|
||||
import { useCallback, useState } from 'react'
|
||||
import type { ParentMode, PreProcessingRule, ProcessRule, Rules, SummaryIndexSetting as SummaryIndexSettingType } from '@/models/datasets'
|
||||
import { useCallback, useRef, useState } from 'react'
|
||||
import { ChunkingMode, ProcessMode } from '@/models/datasets'
|
||||
import escape from './escape'
|
||||
import unescape from './unescape'
|
||||
@ -39,6 +39,7 @@ export const defaultParentChildConfig: ParentChildConfig = {
|
||||
|
||||
export type UseSegmentationStateOptions = {
|
||||
initialSegmentationType?: ProcessMode
|
||||
initialSummaryIndexSetting?: SummaryIndexSettingType
|
||||
}
|
||||
|
||||
export const useSegmentationState = (options: UseSegmentationStateOptions = {}) => {
|
||||
@ -58,6 +59,12 @@ export const useSegmentationState = (options: UseSegmentationStateOptions = {})
|
||||
// Pre-processing rules
|
||||
const [rules, setRules] = useState<PreProcessingRule[]>([])
|
||||
const [defaultConfig, setDefaultConfig] = useState<Rules>()
|
||||
const [summaryIndexSetting, setSummaryIndexSetting] = useState<SummaryIndexSettingType | undefined>()
|
||||
const summaryIndexSettingRef = useRef<SummaryIndexSettingType | undefined>(summaryIndexSetting)
|
||||
const handleSummaryIndexSettingChange = useCallback((payload: SummaryIndexSettingType) => {
|
||||
setSummaryIndexSetting({ ...summaryIndexSettingRef.current, ...payload })
|
||||
summaryIndexSettingRef.current = { ...summaryIndexSettingRef.current, ...payload }
|
||||
}, [])
|
||||
|
||||
// Parent-child config
|
||||
const [parentChildConfig, setParentChildConfig] = useState<ParentChildConfig>(defaultParentChildConfig)
|
||||
@ -134,6 +141,7 @@ export const useSegmentationState = (options: UseSegmentationStateOptions = {})
|
||||
},
|
||||
},
|
||||
mode: 'hierarchical',
|
||||
summary_index_setting: summaryIndexSettingRef.current,
|
||||
} as ProcessRule
|
||||
}
|
||||
|
||||
@ -147,6 +155,7 @@ export const useSegmentationState = (options: UseSegmentationStateOptions = {})
|
||||
},
|
||||
},
|
||||
mode: segmentationType,
|
||||
summary_index_setting: summaryIndexSettingRef.current,
|
||||
} as ProcessRule
|
||||
}, [rules, parentChildConfig, segmentIdentifier, maxChunkLength, overlap, segmentationType])
|
||||
|
||||
@ -204,6 +213,8 @@ export const useSegmentationState = (options: UseSegmentationStateOptions = {})
|
||||
defaultConfig,
|
||||
setDefaultConfig,
|
||||
toggleRule,
|
||||
summaryIndexSetting,
|
||||
handleSummaryIndexSettingChange,
|
||||
|
||||
// Parent-child config
|
||||
parentChildConfig,
|
||||
|
||||
@ -65,6 +65,7 @@ const StepTwo: FC<StepTwoProps> = ({
|
||||
// Custom hooks
|
||||
const segmentation = useSegmentationState({
|
||||
initialSegmentationType: currentDataset?.doc_form === ChunkingMode.parentChild ? ProcessMode.parentChild : ProcessMode.general,
|
||||
initialSummaryIndexSetting: currentDataset?.summary_index_setting,
|
||||
})
|
||||
const indexing = useIndexingConfig({
|
||||
initialIndexType: propsIndexingType,
|
||||
@ -156,7 +157,7 @@ const StepTwo: FC<StepTwoProps> = ({
|
||||
})
|
||||
if (!isValid)
|
||||
return
|
||||
const params = creation.buildCreationParams(currentDocForm, docLanguage, segmentation.getProcessRule(currentDocForm), indexing.retrievalConfig, indexing.embeddingModel, indexing.getIndexingTechnique())
|
||||
const params = creation.buildCreationParams(currentDocForm, docLanguage, segmentation.getProcessRule(currentDocForm), indexing.retrievalConfig, indexing.embeddingModel, indexing.getIndexingTechnique(), segmentation.summaryIndexSetting)
|
||||
if (!params)
|
||||
return
|
||||
await creation.executeCreation(params, indexing.indexType, indexing.retrievalConfig)
|
||||
@ -217,6 +218,8 @@ const StepTwo: FC<StepTwoProps> = ({
|
||||
onPreview={updatePreview}
|
||||
onReset={segmentation.resetToDefaults}
|
||||
locale={locale}
|
||||
summaryIndexSetting={segmentation.summaryIndexSetting}
|
||||
onSummaryIndexSettingChange={segmentation.handleSummaryIndexSettingChange}
|
||||
/>
|
||||
)}
|
||||
{showParentChildOption && (
|
||||
@ -236,6 +239,8 @@ const StepTwo: FC<StepTwoProps> = ({
|
||||
onRuleToggle={segmentation.toggleRule}
|
||||
onPreview={updatePreview}
|
||||
onReset={segmentation.resetToDefaults}
|
||||
summaryIndexSetting={segmentation.summaryIndexSetting}
|
||||
onSummaryIndexSettingChange={segmentation.handleSummaryIndexSettingChange}
|
||||
/>
|
||||
)}
|
||||
<Divider className="my-5" />
|
||||
|
||||
@ -30,12 +30,13 @@ import { useDatasetDetailContextWithSelector as useDatasetDetailContext } from '
|
||||
import useTimestamp from '@/hooks/use-timestamp'
|
||||
import { ChunkingMode, DataSourceType, DocumentActionType } from '@/models/datasets'
|
||||
import { DatasourceType } from '@/models/pipeline'
|
||||
import { useDocumentArchive, useDocumentBatchRetryIndex, useDocumentDelete, useDocumentDisable, useDocumentDownloadZip, useDocumentEnable } from '@/service/knowledge/use-document'
|
||||
import { useDocumentArchive, useDocumentBatchRetryIndex, useDocumentDelete, useDocumentDisable, useDocumentDownloadZip, useDocumentEnable, useDocumentSummary } from '@/service/knowledge/use-document'
|
||||
import { asyncRunSafe } from '@/utils'
|
||||
import { cn } from '@/utils/classnames'
|
||||
import { downloadBlob } from '@/utils/download'
|
||||
import { formatNumber } from '@/utils/format'
|
||||
import BatchAction from '../detail/completed/common/batch-action'
|
||||
import SummaryStatus from '../detail/completed/common/summary-status'
|
||||
import StatusItem from '../status-item'
|
||||
import s from '../style.module.css'
|
||||
import Operations from './operations'
|
||||
@ -219,6 +220,7 @@ const DocumentList: FC<IDocumentListProps> = ({
|
||||
onSelectedIdChange(uniq([...selectedIds, ...localDocs.map(doc => doc.id)]))
|
||||
}, [isAllSelected, localDocs, onSelectedIdChange, selectedIds])
|
||||
const { mutateAsync: archiveDocument } = useDocumentArchive()
|
||||
const { mutateAsync: generateSummary } = useDocumentSummary()
|
||||
const { mutateAsync: enableDocument } = useDocumentEnable()
|
||||
const { mutateAsync: disableDocument } = useDocumentDisable()
|
||||
const { mutateAsync: deleteDocument } = useDocumentDelete()
|
||||
@ -232,6 +234,9 @@ const DocumentList: FC<IDocumentListProps> = ({
|
||||
case DocumentActionType.archive:
|
||||
opApi = archiveDocument
|
||||
break
|
||||
case DocumentActionType.summary:
|
||||
opApi = generateSummary
|
||||
break
|
||||
case DocumentActionType.enable:
|
||||
opApi = enableDocument
|
||||
break
|
||||
@ -444,6 +449,13 @@ const DocumentList: FC<IDocumentListProps> = ({
|
||||
>
|
||||
<span className="grow-1 truncate text-sm">{doc.name}</span>
|
||||
</Tooltip>
|
||||
{
|
||||
doc.summary_index_status && (
|
||||
<div className="ml-1 hidden shrink-0 group-hover:flex">
|
||||
<SummaryStatus status={doc.summary_index_status} />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
<div className="hidden shrink-0 group-hover:ml-auto group-hover:flex">
|
||||
<Tooltip
|
||||
popupContent={t('list.table.rename', { ns: 'datasetDocuments' })}
|
||||
@ -496,6 +508,7 @@ const DocumentList: FC<IDocumentListProps> = ({
|
||||
className="absolute bottom-16 left-0 z-20"
|
||||
selectedIds={selectedIds}
|
||||
onArchive={handleAction(DocumentActionType.archive)}
|
||||
onBatchSummary={handleAction(DocumentActionType.summary)}
|
||||
onBatchEnable={handleAction(DocumentActionType.enable)}
|
||||
onBatchDisable={handleAction(DocumentActionType.disable)}
|
||||
onBatchDownload={downloadableSelectedIds.length > 0 ? handleBatchDownload : undefined}
|
||||
|
||||
@ -21,6 +21,7 @@ import { useTranslation } from 'react-i18next'
|
||||
import { useContext } from 'use-context-selector'
|
||||
import Confirm from '@/app/components/base/confirm'
|
||||
import Divider from '@/app/components/base/divider'
|
||||
import { SearchLinesSparkle } from '@/app/components/base/icons/src/vender/knowledge'
|
||||
import CustomPopover from '@/app/components/base/popover'
|
||||
import Switch from '@/app/components/base/switch'
|
||||
import { ToastContext } from '@/app/components/base/toast'
|
||||
@ -34,6 +35,7 @@ import {
|
||||
useDocumentEnable,
|
||||
useDocumentPause,
|
||||
useDocumentResume,
|
||||
useDocumentSummary,
|
||||
useDocumentUnArchive,
|
||||
useSyncDocument,
|
||||
useSyncWebsite,
|
||||
@ -87,6 +89,7 @@ const Operations = ({
|
||||
const { mutateAsync: downloadDocument, isPending: isDownloading } = useDocumentDownload()
|
||||
const { mutateAsync: syncDocument } = useSyncDocument()
|
||||
const { mutateAsync: syncWebsite } = useSyncWebsite()
|
||||
const { mutateAsync: generateSummary } = useDocumentSummary()
|
||||
const { mutateAsync: pauseDocument } = useDocumentPause()
|
||||
const { mutateAsync: resumeDocument } = useDocumentResume()
|
||||
const isListScene = scene === 'list'
|
||||
@ -112,6 +115,9 @@ const Operations = ({
|
||||
else
|
||||
opApi = syncWebsite
|
||||
break
|
||||
case 'summary':
|
||||
opApi = generateSummary
|
||||
break
|
||||
case 'pause':
|
||||
opApi = pauseDocument
|
||||
break
|
||||
@ -257,6 +263,10 @@ const Operations = ({
|
||||
<span className={s.actionName}>{t('list.action.sync', { ns: 'datasetDocuments' })}</span>
|
||||
</div>
|
||||
)}
|
||||
<div className={s.actionItem} onClick={() => onOperate('summary')}>
|
||||
<SearchLinesSparkle className="h-4 w-4 text-text-tertiary" />
|
||||
<span className={s.actionName}>{t('list.action.summary', { ns: 'datasetDocuments' })}</span>
|
||||
</div>
|
||||
<Divider className="my-1" />
|
||||
</>
|
||||
)}
|
||||
|
||||
@ -6,6 +6,7 @@ import { useTranslation } from 'react-i18next'
|
||||
import Button from '@/app/components/base/button'
|
||||
import Confirm from '@/app/components/base/confirm'
|
||||
import Divider from '@/app/components/base/divider'
|
||||
import { SearchLinesSparkle } from '@/app/components/base/icons/src/vender/knowledge'
|
||||
import { cn } from '@/utils/classnames'
|
||||
|
||||
const i18nPrefix = 'batchAction'
|
||||
@ -16,6 +17,7 @@ type IBatchActionProps = {
|
||||
onBatchDisable: () => void
|
||||
onBatchDownload?: () => void
|
||||
onBatchDelete: () => Promise<void>
|
||||
onBatchSummary?: () => void
|
||||
onArchive?: () => void
|
||||
onEditMetadata?: () => void
|
||||
onBatchReIndex?: () => void
|
||||
@ -27,6 +29,7 @@ const BatchAction: FC<IBatchActionProps> = ({
|
||||
selectedIds,
|
||||
onBatchEnable,
|
||||
onBatchDisable,
|
||||
onBatchSummary,
|
||||
onBatchDownload,
|
||||
onArchive,
|
||||
onBatchDelete,
|
||||
@ -84,7 +87,16 @@ const BatchAction: FC<IBatchActionProps> = ({
|
||||
<span className="px-0.5">{t('metadata.metadata', { ns: 'dataset' })}</span>
|
||||
</Button>
|
||||
)}
|
||||
|
||||
{onBatchSummary && (
|
||||
<Button
|
||||
variant="ghost"
|
||||
className="gap-x-0.5 px-3"
|
||||
onClick={onBatchSummary}
|
||||
>
|
||||
<SearchLinesSparkle className="size-4" />
|
||||
<span className="px-0.5">{t('list.action.summary', { ns: 'datasetDocuments' })}</span>
|
||||
</Button>
|
||||
)}
|
||||
{onArchive && (
|
||||
<Button
|
||||
variant="ghost"
|
||||
|
||||
@ -0,0 +1,26 @@
|
||||
import { memo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { cn } from '@/utils/classnames'
|
||||
|
||||
type SummaryLabelProps = {
|
||||
summary?: string
|
||||
className?: string
|
||||
}
|
||||
const SummaryLabel = ({
|
||||
summary,
|
||||
className,
|
||||
}: SummaryLabelProps) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
return (
|
||||
<div className={cn('space-y-1', className)}>
|
||||
<div className="system-xs-medium-uppercase mt-2 flex items-center justify-between text-text-tertiary">
|
||||
{t('segment.summary', { ns: 'datasetDocuments' })}
|
||||
<div className="ml-2 h-px grow bg-divider-regular"></div>
|
||||
</div>
|
||||
<div className="body-xs-regular text-text-tertiary">{summary}</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default memo(SummaryLabel)
|
||||
@ -0,0 +1,47 @@
|
||||
import { memo, useMemo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Badge from '@/app/components/base/badge'
|
||||
import { SearchLinesSparkle } from '@/app/components/base/icons/src/vender/knowledge'
|
||||
import Tooltip from '@/app/components/base/tooltip'
|
||||
|
||||
type SummaryStatusProps = {
|
||||
status: string
|
||||
}
|
||||
|
||||
const SummaryStatus = ({ status }: SummaryStatusProps) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
const tip = useMemo(() => {
|
||||
if (status === 'COMPLETED') {
|
||||
return t('list.summary.ready', { ns: 'datasetDocuments' })
|
||||
}
|
||||
if (status === 'GENERATING') {
|
||||
return t('list.summary.generatingSummary', { ns: 'datasetDocuments' })
|
||||
}
|
||||
return ''
|
||||
}, [status, t])
|
||||
|
||||
return (
|
||||
<Tooltip
|
||||
popupContent={tip}
|
||||
>
|
||||
{
|
||||
status === 'COMPLETED' && (
|
||||
<Badge>
|
||||
<SearchLinesSparkle className="h-3 w-3" />
|
||||
</Badge>
|
||||
)
|
||||
}
|
||||
{
|
||||
status === 'GENERATING' && (
|
||||
<Badge className="border-text-accent-secondary text-text-accent-secondary">
|
||||
<SearchLinesSparkle className="mr-0.5 h-3 w-3" />
|
||||
<span>{t('list.summary.generating', { ns: 'datasetDocuments' })}</span>
|
||||
</Badge>
|
||||
)
|
||||
}
|
||||
</Tooltip>
|
||||
)
|
||||
}
|
||||
|
||||
export default memo(SummaryStatus)
|
||||
@ -0,0 +1,35 @@
|
||||
import { memo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Textarea from 'react-textarea-autosize'
|
||||
import { cn } from '@/utils/classnames'
|
||||
|
||||
type SummaryTextProps = {
|
||||
value?: string
|
||||
onChange?: (value: string) => void
|
||||
disabled?: boolean
|
||||
}
|
||||
const SummaryText = ({
|
||||
value,
|
||||
onChange,
|
||||
disabled,
|
||||
}: SummaryTextProps) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
return (
|
||||
<div className="space-y-1">
|
||||
<div className="system-xs-medium-uppercase text-text-tertiary">{t('segment.summary', { ns: 'datasetDocuments' })}</div>
|
||||
<Textarea
|
||||
className={cn(
|
||||
'body-sm-regular w-full resize-none bg-transparent leading-6 text-text-secondary outline-none',
|
||||
)}
|
||||
placeholder={t('segment.summaryPlaceholder', { ns: 'datasetDocuments' })}
|
||||
minRows={1}
|
||||
value={value ?? ''}
|
||||
onChange={e => onChange?.(e.target.value)}
|
||||
disabled={disabled}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default memo(SummaryText)
|
||||
@ -322,6 +322,7 @@ const Completed: FC<ICompletedProps> = ({
|
||||
answer: string,
|
||||
keywords: string[],
|
||||
attachments: FileEntity[],
|
||||
summary?: string,
|
||||
needRegenerate = false,
|
||||
) => {
|
||||
const params: SegmentUpdater = { content: '', attachment_ids: [] }
|
||||
@ -351,6 +352,9 @@ const Completed: FC<ICompletedProps> = ({
|
||||
params.attachment_ids = attachments.map(item => item.uploadedId!)
|
||||
}
|
||||
|
||||
if (summary)
|
||||
params.summary = summary
|
||||
|
||||
if (needRegenerate)
|
||||
params.regenerate_child_chunks = needRegenerate
|
||||
|
||||
@ -364,6 +368,7 @@ const Completed: FC<ICompletedProps> = ({
|
||||
if (seg.id === segmentId) {
|
||||
seg.answer = res.data.answer
|
||||
seg.content = res.data.content
|
||||
seg.summary = res.data.summary
|
||||
seg.sign_content = res.data.sign_content
|
||||
seg.keywords = res.data.keywords
|
||||
seg.attachments = res.data.attachments
|
||||
|
||||
@ -19,13 +19,14 @@ import { useDocumentContext } from '../../context'
|
||||
import ChildSegmentList from '../child-segment-list'
|
||||
import Dot from '../common/dot'
|
||||
import { SegmentIndexTag } from '../common/segment-index-tag'
|
||||
import SummaryLabel from '../common/summary-label'
|
||||
import Tag from '../common/tag'
|
||||
import ParentChunkCardSkeleton from '../skeleton/parent-chunk-card-skeleton'
|
||||
import ChunkContent from './chunk-content'
|
||||
|
||||
type ISegmentCardProps = {
|
||||
loading: boolean
|
||||
detail?: SegmentDetailModel & { document?: { name: string } }
|
||||
detail?: SegmentDetailModel & { document?: { name: string }, status?: string }
|
||||
onClick?: () => void
|
||||
onChangeSwitch?: (enabled: boolean, segId?: string) => Promise<void>
|
||||
onDelete?: (segId: string) => Promise<void>
|
||||
@ -43,7 +44,7 @@ type ISegmentCardProps = {
|
||||
}
|
||||
|
||||
const SegmentCard: FC<ISegmentCardProps> = ({
|
||||
detail = {},
|
||||
detail = { status: '' },
|
||||
onClick,
|
||||
onChangeSwitch,
|
||||
onDelete,
|
||||
@ -67,6 +68,7 @@ const SegmentCard: FC<ISegmentCardProps> = ({
|
||||
word_count,
|
||||
hit_count,
|
||||
answer,
|
||||
summary,
|
||||
keywords,
|
||||
child_chunks = [],
|
||||
created_at,
|
||||
@ -237,6 +239,11 @@ const SegmentCard: FC<ISegmentCardProps> = ({
|
||||
className={contentOpacity}
|
||||
/>
|
||||
{images.length > 0 && <ImageList images={images} size="md" className="py-1" />}
|
||||
{
|
||||
summary && (
|
||||
<SummaryLabel summary={summary} className="mt-2" />
|
||||
)
|
||||
}
|
||||
{isGeneralMode && (
|
||||
<div className={cn('flex flex-wrap items-center gap-2 py-1.5', contentOpacity)}>
|
||||
{keywords?.map(keyword => <Tag key={keyword} text={keyword} />)}
|
||||
|
||||
@ -25,6 +25,7 @@ import Dot from './common/dot'
|
||||
import Keywords from './common/keywords'
|
||||
import RegenerationModal from './common/regeneration-modal'
|
||||
import { SegmentIndexTag } from './common/segment-index-tag'
|
||||
import SummaryText from './common/summary-text'
|
||||
import { useSegmentListContext } from './index'
|
||||
|
||||
type ISegmentDetailProps = {
|
||||
@ -35,6 +36,7 @@ type ISegmentDetailProps = {
|
||||
a: string,
|
||||
k: string[],
|
||||
attachments: FileEntity[],
|
||||
summary?: string,
|
||||
needRegenerate?: boolean,
|
||||
) => void
|
||||
onCancel: () => void
|
||||
@ -57,6 +59,7 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
|
||||
const { t } = useTranslation()
|
||||
const [question, setQuestion] = useState(isEditMode ? segInfo?.content || '' : segInfo?.sign_content || '')
|
||||
const [answer, setAnswer] = useState(segInfo?.answer || '')
|
||||
const [summary, setSummary] = useState(segInfo?.summary || '')
|
||||
const [attachments, setAttachments] = useState<FileEntity[]>(() => {
|
||||
return segInfo?.attachments?.map(item => ({
|
||||
id: uuid4(),
|
||||
@ -91,8 +94,8 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
|
||||
}, [onCancel])
|
||||
|
||||
const handleSave = useCallback(() => {
|
||||
onUpdate(segInfo?.id || '', question, answer, keywords, attachments)
|
||||
}, [onUpdate, segInfo?.id, question, answer, keywords, attachments])
|
||||
onUpdate(segInfo?.id || '', question, answer, keywords, attachments, summary, false)
|
||||
}, [onUpdate, segInfo?.id, question, answer, keywords, attachments, summary])
|
||||
|
||||
const handleRegeneration = useCallback(() => {
|
||||
setShowRegenerationModal(true)
|
||||
@ -111,8 +114,8 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
|
||||
}, [onCancel, onModalStateChange])
|
||||
|
||||
const onConfirmRegeneration = useCallback(() => {
|
||||
onUpdate(segInfo?.id || '', question, answer, keywords, attachments, true)
|
||||
}, [onUpdate, segInfo?.id, question, answer, keywords, attachments])
|
||||
onUpdate(segInfo?.id || '', question, answer, keywords, attachments, summary, true)
|
||||
}, [onUpdate, segInfo?.id, question, answer, keywords, attachments, summary])
|
||||
|
||||
const onAttachmentsChange = useCallback((attachments: FileEntity[]) => {
|
||||
setAttachments(attachments)
|
||||
@ -197,6 +200,11 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
|
||||
value={attachments}
|
||||
onChange={onAttachmentsChange}
|
||||
/>
|
||||
<SummaryText
|
||||
value={summary}
|
||||
onChange={summary => setSummary(summary)}
|
||||
disabled={!isEditMode}
|
||||
/>
|
||||
{isECOIndexing && (
|
||||
<Keywords
|
||||
className="w-full"
|
||||
|
||||
@ -1 +1 @@
|
||||
export type OperationName = 'delete' | 'archive' | 'enable' | 'disable' | 'sync' | 'un_archive' | 'pause' | 'resume'
|
||||
export type OperationName = 'delete' | 'archive' | 'enable' | 'disable' | 'sync' | 'un_archive' | 'pause' | 'resume' | 'summary'
|
||||
|
||||
@ -12,6 +12,7 @@ import { cn } from '@/utils/classnames'
|
||||
import ImageList from '../../common/image-list'
|
||||
import Dot from '../../documents/detail/completed/common/dot'
|
||||
import { SegmentIndexTag } from '../../documents/detail/completed/common/segment-index-tag'
|
||||
import SummaryText from '../../documents/detail/completed/common/summary-text'
|
||||
import ChildChunksItem from './child-chunks-item'
|
||||
import Mask from './mask'
|
||||
import Score from './score'
|
||||
@ -28,7 +29,7 @@ const ChunkDetailModal = ({
|
||||
onHide,
|
||||
}: ChunkDetailModalProps) => {
|
||||
const { t } = useTranslation()
|
||||
const { segment, score, child_chunks, files } = payload
|
||||
const { segment, score, child_chunks, files, summary } = payload
|
||||
const { position, content, sign_content, keywords, document, answer } = segment
|
||||
const isParentChildRetrieval = !!(child_chunks && child_chunks.length > 0)
|
||||
const extension = document.name.split('.').slice(-1)[0] as FileAppearanceTypeEnum
|
||||
@ -104,11 +105,14 @@ const ChunkDetailModal = ({
|
||||
{/* Mask */}
|
||||
<Mask className="absolute inset-x-0 bottom-0" />
|
||||
</div>
|
||||
{(showImages || showKeywords) && (
|
||||
{(showImages || showKeywords || !!summary) && (
|
||||
<div className="flex flex-col gap-y-3 pt-3">
|
||||
{showImages && (
|
||||
<ImageList images={images} size="md" className="py-1" />
|
||||
)}
|
||||
{!!summary && (
|
||||
<SummaryText value={summary} disabled />
|
||||
)}
|
||||
{showKeywords && (
|
||||
<div className="flex flex-col gap-y-1">
|
||||
<div className="text-xs font-medium uppercase text-text-tertiary">{t(`${i18nPrefix}keyword`, { ns: 'datasetHitTesting' })}</div>
|
||||
|
||||
@ -7,6 +7,7 @@ import * as React from 'react'
|
||||
import { useMemo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { Markdown } from '@/app/components/base/markdown'
|
||||
import SummaryLabel from '@/app/components/datasets/documents/detail/completed/common/summary-label'
|
||||
import Tag from '@/app/components/datasets/documents/detail/completed/common/tag'
|
||||
import { extensionToFileType } from '@/app/components/datasets/hit-testing/utils/extension-to-file-type'
|
||||
import { cn } from '@/utils/classnames'
|
||||
@ -25,7 +26,7 @@ const ResultItem = ({
|
||||
payload,
|
||||
}: ResultItemProps) => {
|
||||
const { t } = useTranslation()
|
||||
const { segment, score, child_chunks, files } = payload
|
||||
const { segment, score, child_chunks, files, summary } = payload
|
||||
const data = segment
|
||||
const { position, word_count, content, sign_content, keywords, document } = data
|
||||
const isParentChildRetrieval = !!(child_chunks && child_chunks.length > 0)
|
||||
@ -98,6 +99,9 @@ const ResultItem = ({
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
{summary && (
|
||||
<SummaryLabel summary={summary} className="mt-2" />
|
||||
)}
|
||||
</div>
|
||||
{/* Foot */}
|
||||
<ResultItemFooter docType={fileType} docTitle={document.name} showDetailModal={showDetailModal} />
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
import type { AppIconSelection } from '@/app/components/base/app-icon-picker'
|
||||
import type { DefaultModel } from '@/app/components/header/account-setting/model-provider-page/declarations'
|
||||
import type { Member } from '@/models/common'
|
||||
import type { IconInfo } from '@/models/datasets'
|
||||
import type { IconInfo, SummaryIndexSetting as SummaryIndexSettingType } from '@/models/datasets'
|
||||
import type { AppIconType, RetrievalConfig } from '@/types/app'
|
||||
import { RiAlertFill } from '@remixicon/react'
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||
@ -33,6 +33,7 @@ import RetrievalSettings from '../../external-knowledge-base/create/RetrievalSet
|
||||
import ChunkStructure from '../chunk-structure'
|
||||
import IndexMethod from '../index-method'
|
||||
import PermissionSelector from '../permission-selector'
|
||||
import SummaryIndexSetting from '../summary-index-setting'
|
||||
import { checkShowMultiModalTip } from '../utils'
|
||||
|
||||
const rowClass = 'flex gap-x-1'
|
||||
@ -76,6 +77,12 @@ const Form = () => {
|
||||
model: '',
|
||||
},
|
||||
)
|
||||
const [summaryIndexSetting, setSummaryIndexSetting] = useState(currentDataset?.summary_index_setting)
|
||||
const summaryIndexSettingRef = useRef(currentDataset?.summary_index_setting)
|
||||
const handleSummaryIndexSettingChange = useCallback((payload: SummaryIndexSettingType) => {
|
||||
setSummaryIndexSetting({ ...summaryIndexSettingRef.current, ...payload })
|
||||
summaryIndexSettingRef.current = { ...summaryIndexSettingRef.current, ...payload }
|
||||
}, [])
|
||||
const { data: rerankModelList } = useModelList(ModelTypeEnum.rerank)
|
||||
const { data: embeddingModelList } = useModelList(ModelTypeEnum.textEmbedding)
|
||||
const { data: membersData } = useMembers()
|
||||
@ -167,6 +174,7 @@ const Form = () => {
|
||||
},
|
||||
}),
|
||||
keyword_number: keywordNumber,
|
||||
summary_index_setting: summaryIndexSetting,
|
||||
},
|
||||
} as any
|
||||
if (permission === DatasetPermission.partialMembers) {
|
||||
@ -348,6 +356,23 @@ const Form = () => {
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{
|
||||
indexMethod === IndexingType.QUALIFIED
|
||||
&& [ChunkingMode.text, ChunkingMode.parentChild].includes(currentDataset?.doc_form as ChunkingMode)
|
||||
&& (
|
||||
<>
|
||||
<Divider
|
||||
type="horizontal"
|
||||
className="my-1 h-px bg-divider-subtle"
|
||||
/>
|
||||
<SummaryIndexSetting
|
||||
entry="dataset-settings"
|
||||
summaryIndexSetting={summaryIndexSetting}
|
||||
onSummaryIndexSettingChange={handleSummaryIndexSettingChange}
|
||||
/>
|
||||
</>
|
||||
)
|
||||
}
|
||||
{/* Retrieval Method Config */}
|
||||
{currentDataset?.provider === 'external'
|
||||
? (
|
||||
|
||||
228
web/app/components/datasets/settings/summary-index-setting.tsx
Normal file
228
web/app/components/datasets/settings/summary-index-setting.tsx
Normal file
@ -0,0 +1,228 @@
|
||||
import type { ChangeEvent } from 'react'
|
||||
import type { DefaultModel } from '@/app/components/header/account-setting/model-provider-page/declarations'
|
||||
import type { SummaryIndexSetting as SummaryIndexSettingType } from '@/models/datasets'
|
||||
import {
|
||||
memo,
|
||||
useCallback,
|
||||
useMemo,
|
||||
} from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Switch from '@/app/components/base/switch'
|
||||
import Textarea from '@/app/components/base/textarea'
|
||||
import Tooltip from '@/app/components/base/tooltip'
|
||||
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
|
||||
import { useModelList } from '@/app/components/header/account-setting/model-provider-page/hooks'
|
||||
import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
|
||||
|
||||
type SummaryIndexSettingProps = {
|
||||
entry?: 'knowledge-base' | 'dataset-settings' | 'create-document'
|
||||
summaryIndexSetting?: SummaryIndexSettingType
|
||||
onSummaryIndexSettingChange?: (payload: SummaryIndexSettingType) => void
|
||||
readonly?: boolean
|
||||
}
|
||||
const SummaryIndexSetting = ({
|
||||
entry = 'knowledge-base',
|
||||
summaryIndexSetting,
|
||||
onSummaryIndexSettingChange,
|
||||
readonly = false,
|
||||
}: SummaryIndexSettingProps) => {
|
||||
const { t } = useTranslation()
|
||||
const {
|
||||
data: textGenerationModelList,
|
||||
} = useModelList(ModelTypeEnum.textGeneration)
|
||||
const summaryIndexModelConfig = useMemo(() => {
|
||||
if (!summaryIndexSetting?.model_name || !summaryIndexSetting?.model_provider_name)
|
||||
return undefined
|
||||
|
||||
return {
|
||||
providerName: summaryIndexSetting?.model_provider_name,
|
||||
modelName: summaryIndexSetting?.model_name,
|
||||
}
|
||||
}, [summaryIndexSetting?.model_name, summaryIndexSetting?.model_provider_name])
|
||||
|
||||
const handleSummaryIndexEnableChange = useCallback((value: boolean) => {
|
||||
onSummaryIndexSettingChange?.({
|
||||
enable: value,
|
||||
})
|
||||
}, [onSummaryIndexSettingChange])
|
||||
|
||||
const handleSummaryIndexModelChange = useCallback((model: DefaultModel) => {
|
||||
onSummaryIndexSettingChange?.({
|
||||
model_provider_name: model.provider,
|
||||
model_name: model.model,
|
||||
})
|
||||
}, [onSummaryIndexSettingChange])
|
||||
|
||||
const handleSummaryIndexPromptChange = useCallback((e: ChangeEvent<HTMLTextAreaElement>) => {
|
||||
onSummaryIndexSettingChange?.({
|
||||
summary_prompt: e.target.value,
|
||||
})
|
||||
}, [onSummaryIndexSettingChange])
|
||||
|
||||
if (entry === 'knowledge-base') {
|
||||
return (
|
||||
<div>
|
||||
<div className="flex h-6 items-center justify-between">
|
||||
<div className="system-sm-semibold-uppercase flex items-center text-text-secondary">
|
||||
{t('form.summaryAutoGen', { ns: 'datasetSettings' })}
|
||||
<Tooltip
|
||||
triggerClassName="ml-1 h-4 w-4 shrink-0"
|
||||
popupContent={t('form.summaryAutoGenTip', { ns: 'datasetSettings' })}
|
||||
>
|
||||
</Tooltip>
|
||||
</div>
|
||||
<Switch
|
||||
defaultValue={summaryIndexSetting?.enable ?? false}
|
||||
onChange={handleSummaryIndexEnableChange}
|
||||
size="md"
|
||||
/>
|
||||
</div>
|
||||
{
|
||||
summaryIndexSetting?.enable && (
|
||||
<div>
|
||||
<div className="system-xs-medium-uppercase mb-1.5 mt-2 flex h-6 items-center text-text-tertiary">
|
||||
{t('form.summaryModel', { ns: 'datasetSettings' })}
|
||||
</div>
|
||||
<ModelSelector
|
||||
defaultModel={summaryIndexModelConfig && { provider: summaryIndexModelConfig.providerName, model: summaryIndexModelConfig.modelName }}
|
||||
modelList={textGenerationModelList}
|
||||
onSelect={handleSummaryIndexModelChange}
|
||||
readonly={readonly}
|
||||
showDeprecatedWarnIcon
|
||||
/>
|
||||
<div className="system-xs-medium-uppercase mt-3 flex h-6 items-center text-text-tertiary">
|
||||
{t('form.summaryInstructions', { ns: 'datasetSettings' })}
|
||||
</div>
|
||||
<Textarea
|
||||
value={summaryIndexSetting?.summary_prompt ?? ''}
|
||||
onChange={handleSummaryIndexPromptChange}
|
||||
disabled={readonly}
|
||||
placeholder={t('form.summaryInstructionsPlaceholder', { ns: 'datasetSettings' })}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (entry === 'dataset-settings') {
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<div className="flex gap-x-1">
|
||||
<div className="flex h-7 w-[180px] shrink-0 items-center pt-1">
|
||||
<div className="system-sm-semibold text-text-secondary">
|
||||
{t('form.summaryAutoGen', { ns: 'datasetSettings' })}
|
||||
</div>
|
||||
</div>
|
||||
<div className="py-1.5">
|
||||
<div className="system-sm-semibold flex items-center text-text-secondary">
|
||||
<Switch
|
||||
className="mr-2"
|
||||
defaultValue={summaryIndexSetting?.enable ?? false}
|
||||
onChange={handleSummaryIndexEnableChange}
|
||||
size="md"
|
||||
/>
|
||||
{
|
||||
summaryIndexSetting?.enable ? t('list.status.enabled', { ns: 'datasetDocuments' }) : t('list.status.disabled', { ns: 'datasetDocuments' })
|
||||
}
|
||||
</div>
|
||||
<div className="system-sm-regular mt-2 text-text-tertiary">
|
||||
{
|
||||
summaryIndexSetting?.enable && t('form.summaryAutoGenTip', { ns: 'datasetSettings' })
|
||||
}
|
||||
{
|
||||
!summaryIndexSetting?.enable && t('form.summaryAutoGenEnableTip', { ns: 'datasetSettings' })
|
||||
}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{
|
||||
summaryIndexSetting?.enable && (
|
||||
<>
|
||||
<div className="flex gap-x-1">
|
||||
<div className="flex h-7 w-[180px] shrink-0 items-center pt-1">
|
||||
<div className="system-sm-medium text-text-tertiary">
|
||||
{t('form.summaryModel', { ns: 'datasetSettings' })}
|
||||
</div>
|
||||
</div>
|
||||
<div className="grow">
|
||||
<ModelSelector
|
||||
defaultModel={summaryIndexModelConfig && { provider: summaryIndexModelConfig.providerName, model: summaryIndexModelConfig.modelName }}
|
||||
modelList={textGenerationModelList}
|
||||
onSelect={handleSummaryIndexModelChange}
|
||||
readonly={readonly}
|
||||
showDeprecatedWarnIcon
|
||||
triggerClassName="h-8"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex">
|
||||
<div className="flex h-7 w-[180px] shrink-0 items-center pt-1">
|
||||
<div className="system-sm-medium text-text-tertiary">
|
||||
{t('form.summaryInstructions', { ns: 'datasetSettings' })}
|
||||
</div>
|
||||
</div>
|
||||
<div className="grow">
|
||||
<Textarea
|
||||
value={summaryIndexSetting?.summary_prompt ?? ''}
|
||||
onChange={handleSummaryIndexPromptChange}
|
||||
disabled={readonly}
|
||||
placeholder={t('form.summaryInstructionsPlaceholder', { ns: 'datasetSettings' })}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
)
|
||||
}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-3">
|
||||
<div className="flex h-6 items-center">
|
||||
<Switch
|
||||
className="mr-2"
|
||||
defaultValue={summaryIndexSetting?.enable ?? false}
|
||||
onChange={handleSummaryIndexEnableChange}
|
||||
size="md"
|
||||
/>
|
||||
<div className="system-sm-semibold text-text-secondary">
|
||||
{t('form.summaryAutoGen', { ns: 'datasetSettings' })}
|
||||
</div>
|
||||
</div>
|
||||
{
|
||||
summaryIndexSetting?.enable && (
|
||||
<>
|
||||
<div>
|
||||
<div className="system-sm-medium mb-1.5 flex h-6 items-center text-text-secondary">
|
||||
{t('form.summaryModel', { ns: 'datasetSettings' })}
|
||||
</div>
|
||||
<ModelSelector
|
||||
defaultModel={summaryIndexModelConfig && { provider: summaryIndexModelConfig.providerName, model: summaryIndexModelConfig.modelName }}
|
||||
modelList={textGenerationModelList}
|
||||
onSelect={handleSummaryIndexModelChange}
|
||||
readonly={readonly}
|
||||
showDeprecatedWarnIcon
|
||||
triggerClassName="h-8"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<div className="system-sm-medium mb-1.5 flex h-6 items-center text-text-secondary">
|
||||
{t('form.summaryInstructions', { ns: 'datasetSettings' })}
|
||||
</div>
|
||||
<Textarea
|
||||
value={summaryIndexSetting?.summary_prompt ?? ''}
|
||||
onChange={handleSummaryIndexPromptChange}
|
||||
disabled={readonly}
|
||||
placeholder={t('form.summaryInstructionsPlaceholder', { ns: 'datasetSettings' })}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
)
|
||||
}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
export default memo(SummaryIndexSetting)
|
||||
@ -1,10 +1,11 @@
|
||||
import type { QAChunk } from './types'
|
||||
import type { GeneralChunk, ParentChildChunk, QAChunk } from './types'
|
||||
import type { ParentMode } from '@/models/datasets'
|
||||
import * as React from 'react'
|
||||
import { useMemo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Dot from '@/app/components/datasets/documents/detail/completed/common/dot'
|
||||
import SegmentIndexTag from '@/app/components/datasets/documents/detail/completed/common/segment-index-tag'
|
||||
import SummaryLabel from '@/app/components/datasets/documents/detail/completed/common/summary-label'
|
||||
import { PreviewSlice } from '@/app/components/datasets/formatted-text/flavours/preview-slice'
|
||||
import { ChunkingMode } from '@/models/datasets'
|
||||
import { formatNumber } from '@/utils/format'
|
||||
@ -14,7 +15,7 @@ import { QAItemType } from './types'
|
||||
type ChunkCardProps = {
|
||||
chunkType: ChunkingMode
|
||||
parentMode?: ParentMode
|
||||
content: string | string[] | QAChunk
|
||||
content: ParentChildChunk | QAChunk | GeneralChunk
|
||||
positionId?: string | number
|
||||
wordCount: number
|
||||
}
|
||||
@ -33,7 +34,7 @@ const ChunkCard = (props: ChunkCardProps) => {
|
||||
|
||||
const contentElement = useMemo(() => {
|
||||
if (chunkType === ChunkingMode.parentChild) {
|
||||
return (content as string[]).map((child, index) => {
|
||||
return (content as ParentChildChunk).child_contents.map((child, index) => {
|
||||
const indexForLabel = index + 1
|
||||
return (
|
||||
<PreviewSlice
|
||||
@ -57,7 +58,17 @@ const ChunkCard = (props: ChunkCardProps) => {
|
||||
)
|
||||
}
|
||||
|
||||
return content as string
|
||||
return (content as GeneralChunk).content
|
||||
}, [content, chunkType])
|
||||
|
||||
const summaryElement = useMemo(() => {
|
||||
if (chunkType === ChunkingMode.parentChild) {
|
||||
return (content as ParentChildChunk).parent_summary
|
||||
}
|
||||
if (chunkType === ChunkingMode.text) {
|
||||
return (content as GeneralChunk).summary
|
||||
}
|
||||
return null
|
||||
}, [content, chunkType])
|
||||
|
||||
return (
|
||||
@ -73,6 +84,7 @@ const ChunkCard = (props: ChunkCardProps) => {
|
||||
</div>
|
||||
)}
|
||||
<div className="body-md-regular text-text-secondary">{contentElement}</div>
|
||||
{summaryElement && <SummaryLabel summary={summaryElement} />}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@ -10,13 +10,13 @@ import { QAItemType } from './types'
|
||||
// Test Data Factories
|
||||
// =============================================================================
|
||||
|
||||
const createGeneralChunks = (overrides: string[] = []): GeneralChunks => {
|
||||
const createGeneralChunks = (overrides: GeneralChunks = []): GeneralChunks => {
|
||||
if (overrides.length > 0)
|
||||
return overrides
|
||||
return [
|
||||
'This is the first chunk of text content.',
|
||||
'This is the second chunk with different content.',
|
||||
'Third chunk here with more text.',
|
||||
{ content: 'This is the first chunk of text content.' },
|
||||
{ content: 'This is the second chunk with different content.' },
|
||||
{ content: 'Third chunk here with more text.' },
|
||||
]
|
||||
}
|
||||
|
||||
@ -152,7 +152,7 @@ describe('ChunkCard', () => {
|
||||
render(
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.text}
|
||||
content="This is plain text content."
|
||||
content={createGeneralChunks()[0]}
|
||||
wordCount={27}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -196,7 +196,7 @@ describe('ChunkCard', () => {
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.parentChild}
|
||||
parentMode="paragraph"
|
||||
content={childContents}
|
||||
content={createParentChildChunk({ child_contents: childContents })}
|
||||
wordCount={50}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -218,7 +218,7 @@ describe('ChunkCard', () => {
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.parentChild}
|
||||
parentMode="paragraph"
|
||||
content={['Child content']}
|
||||
content={createParentChildChunk({ child_contents: ['Child content'] })}
|
||||
wordCount={13}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -234,7 +234,7 @@ describe('ChunkCard', () => {
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.parentChild}
|
||||
parentMode="full-doc"
|
||||
content={['Child content']}
|
||||
content={createParentChildChunk({ child_contents: ['Child content'] })}
|
||||
wordCount={13}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -250,7 +250,7 @@ describe('ChunkCard', () => {
|
||||
render(
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.text}
|
||||
content="Text content"
|
||||
content={createGeneralChunks()[0]}
|
||||
wordCount={12}
|
||||
positionId={5}
|
||||
/>,
|
||||
@ -268,7 +268,7 @@ describe('ChunkCard', () => {
|
||||
render(
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.text}
|
||||
content="Some content"
|
||||
content={createGeneralChunks()[0]}
|
||||
wordCount={1234}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -283,7 +283,7 @@ describe('ChunkCard', () => {
|
||||
render(
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.text}
|
||||
content="Some content"
|
||||
content={createGeneralChunks()[0]}
|
||||
wordCount={100}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -299,7 +299,7 @@ describe('ChunkCard', () => {
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.parentChild}
|
||||
parentMode="full-doc"
|
||||
content={['Child']}
|
||||
content={createParentChildChunk({ child_contents: ['Child'] })}
|
||||
wordCount={500}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -317,7 +317,7 @@ describe('ChunkCard', () => {
|
||||
render(
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.text}
|
||||
content="Content"
|
||||
content={createGeneralChunks()[0]}
|
||||
wordCount={7}
|
||||
positionId={42}
|
||||
/>,
|
||||
@ -332,7 +332,7 @@ describe('ChunkCard', () => {
|
||||
render(
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.text}
|
||||
content="Content"
|
||||
content={createGeneralChunks()[0]}
|
||||
wordCount={7}
|
||||
positionId="99"
|
||||
/>,
|
||||
@ -347,7 +347,7 @@ describe('ChunkCard', () => {
|
||||
render(
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.text}
|
||||
content="Content"
|
||||
content={createGeneralChunks()[0]}
|
||||
wordCount={7}
|
||||
positionId={3}
|
||||
/>,
|
||||
@ -366,7 +366,7 @@ describe('ChunkCard', () => {
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.parentChild}
|
||||
parentMode="paragraph"
|
||||
content={['Child']}
|
||||
content={createParentChildChunk({ child_contents: ['Child'] })}
|
||||
wordCount={5}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -380,7 +380,7 @@ describe('ChunkCard', () => {
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.parentChild}
|
||||
parentMode="full-doc"
|
||||
content={['Child']}
|
||||
content={createParentChildChunk({ child_contents: ['Child'] })}
|
||||
wordCount={5}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -395,7 +395,7 @@ describe('ChunkCard', () => {
|
||||
const { rerender } = render(
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.text}
|
||||
content="Initial content"
|
||||
content={createGeneralChunks()[0]}
|
||||
wordCount={15}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -408,7 +408,7 @@ describe('ChunkCard', () => {
|
||||
rerender(
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.text}
|
||||
content="Updated content"
|
||||
content={createGeneralChunks()[0]}
|
||||
wordCount={15}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -424,7 +424,7 @@ describe('ChunkCard', () => {
|
||||
const { rerender } = render(
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.text}
|
||||
content="Text content"
|
||||
content={createGeneralChunks()[0]}
|
||||
wordCount={12}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -458,7 +458,7 @@ describe('ChunkCard', () => {
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.parentChild}
|
||||
parentMode="paragraph"
|
||||
content={[]}
|
||||
content={createParentChildChunk({ child_contents: [] })}
|
||||
wordCount={0}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -495,7 +495,7 @@ describe('ChunkCard', () => {
|
||||
render(
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.text}
|
||||
content={longContent}
|
||||
content={createGeneralChunks()[0]}
|
||||
wordCount={10000}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -510,7 +510,7 @@ describe('ChunkCard', () => {
|
||||
render(
|
||||
<ChunkCard
|
||||
chunkType={ChunkingMode.text}
|
||||
content=""
|
||||
content={createGeneralChunks()[0]}
|
||||
wordCount={0}
|
||||
positionId={1}
|
||||
/>,
|
||||
@ -546,9 +546,9 @@ describe('ChunkCardList', () => {
|
||||
)
|
||||
|
||||
// Assert
|
||||
expect(screen.getByText(chunks[0])).toBeInTheDocument()
|
||||
expect(screen.getByText(chunks[1])).toBeInTheDocument()
|
||||
expect(screen.getByText(chunks[2])).toBeInTheDocument()
|
||||
expect(screen.getByText(chunks[0].content)).toBeInTheDocument()
|
||||
expect(screen.getByText(chunks[1].content)).toBeInTheDocument()
|
||||
expect(screen.getByText(chunks[2].content)).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should render parent-child chunks correctly', () => {
|
||||
@ -594,7 +594,10 @@ describe('ChunkCardList', () => {
|
||||
describe('Memoization - chunkList', () => {
|
||||
it('should extract chunks from GeneralChunks for text mode', () => {
|
||||
// Arrange
|
||||
const chunks: GeneralChunks = ['Chunk 1', 'Chunk 2']
|
||||
const chunks: GeneralChunks = [
|
||||
{ content: 'Chunk 1' },
|
||||
{ content: 'Chunk 2' },
|
||||
]
|
||||
|
||||
// Act
|
||||
render(
|
||||
@ -653,7 +656,7 @@ describe('ChunkCardList', () => {
|
||||
|
||||
it('should update chunkList when chunkInfo changes', () => {
|
||||
// Arrange
|
||||
const initialChunks = createGeneralChunks(['Initial chunk'])
|
||||
const initialChunks = createGeneralChunks([{ content: 'Initial chunk' }])
|
||||
|
||||
const { rerender } = render(
|
||||
<ChunkCardList
|
||||
@ -666,7 +669,7 @@ describe('ChunkCardList', () => {
|
||||
expect(screen.getByText('Initial chunk')).toBeInTheDocument()
|
||||
|
||||
// Act - update chunks
|
||||
const updatedChunks = createGeneralChunks(['Updated chunk'])
|
||||
const updatedChunks = createGeneralChunks([{ content: 'Updated chunk' }])
|
||||
rerender(
|
||||
<ChunkCardList
|
||||
chunkType={ChunkingMode.text}
|
||||
@ -684,7 +687,7 @@ describe('ChunkCardList', () => {
|
||||
describe('Word Count Calculation', () => {
|
||||
it('should calculate word count for text chunks using string length', () => {
|
||||
// Arrange - "Hello" has 5 characters
|
||||
const chunks = createGeneralChunks(['Hello'])
|
||||
const chunks = createGeneralChunks([{ content: 'Hello' }])
|
||||
|
||||
// Act
|
||||
render(
|
||||
@ -747,7 +750,11 @@ describe('ChunkCardList', () => {
|
||||
describe('Position ID', () => {
|
||||
it('should assign 1-based position IDs to chunks', () => {
|
||||
// Arrange
|
||||
const chunks = createGeneralChunks(['First', 'Second', 'Third'])
|
||||
const chunks = createGeneralChunks([
|
||||
{ content: 'First' },
|
||||
{ content: 'Second' },
|
||||
{ content: 'Third' },
|
||||
])
|
||||
|
||||
// Act
|
||||
render(
|
||||
@ -768,7 +775,7 @@ describe('ChunkCardList', () => {
|
||||
describe('Custom className', () => {
|
||||
it('should apply custom className to container', () => {
|
||||
// Arrange
|
||||
const chunks = createGeneralChunks(['Test'])
|
||||
const chunks = createGeneralChunks([{ content: 'Test' }])
|
||||
|
||||
// Act
|
||||
const { container } = render(
|
||||
@ -785,7 +792,7 @@ describe('ChunkCardList', () => {
|
||||
|
||||
it('should merge custom className with default classes', () => {
|
||||
// Arrange
|
||||
const chunks = createGeneralChunks(['Test'])
|
||||
const chunks = createGeneralChunks([{ content: 'Test' }])
|
||||
|
||||
// Act
|
||||
const { container } = render(
|
||||
@ -805,7 +812,7 @@ describe('ChunkCardList', () => {
|
||||
|
||||
it('should render without className prop', () => {
|
||||
// Arrange
|
||||
const chunks = createGeneralChunks(['Test'])
|
||||
const chunks = createGeneralChunks([{ content: 'Test' }])
|
||||
|
||||
// Act
|
||||
const { container } = render(
|
||||
@ -860,7 +867,7 @@ describe('ChunkCardList', () => {
|
||||
|
||||
it('should not use parentMode for text type', () => {
|
||||
// Arrange
|
||||
const chunks = createGeneralChunks(['Text'])
|
||||
const chunks = createGeneralChunks([{ content: 'Text' }])
|
||||
|
||||
// Act
|
||||
render(
|
||||
@ -937,7 +944,7 @@ describe('ChunkCardList', () => {
|
||||
|
||||
it('should handle single item in chunks', () => {
|
||||
// Arrange
|
||||
const chunks = createGeneralChunks(['Single chunk'])
|
||||
const chunks = createGeneralChunks([{ content: 'Single chunk' }])
|
||||
|
||||
// Act
|
||||
render(
|
||||
@ -954,7 +961,7 @@ describe('ChunkCardList', () => {
|
||||
|
||||
it('should handle large number of chunks', () => {
|
||||
// Arrange
|
||||
const chunks = Array.from({ length: 100 }, (_, i) => `Chunk number ${i + 1}`)
|
||||
const chunks = Array.from({ length: 100 }, (_, i) => ({ content: `Chunk number ${i + 1}` }))
|
||||
|
||||
// Act
|
||||
render(
|
||||
@ -975,8 +982,11 @@ describe('ChunkCardList', () => {
|
||||
describe('Key Generation', () => {
|
||||
it('should generate unique keys for chunks', () => {
|
||||
// Arrange - chunks with same content
|
||||
const chunks = createGeneralChunks(['Same content', 'Same content', 'Same content'])
|
||||
|
||||
const chunks = createGeneralChunks([
|
||||
{ content: 'Same content' },
|
||||
{ content: 'Same content' },
|
||||
{ content: 'Same content' },
|
||||
])
|
||||
// Act
|
||||
const { container } = render(
|
||||
<ChunkCardList
|
||||
@ -1006,9 +1016,9 @@ describe('ChunkCardList Integration', () => {
|
||||
it('should render complete text chunking workflow', () => {
|
||||
// Arrange
|
||||
const textChunks = createGeneralChunks([
|
||||
'First paragraph of the document.',
|
||||
'Second paragraph with more information.',
|
||||
'Final paragraph concluding the content.',
|
||||
{ content: 'First paragraph of the document.' },
|
||||
{ content: 'Second paragraph with more information.' },
|
||||
{ content: 'Final paragraph concluding the content.' },
|
||||
])
|
||||
|
||||
// Act
|
||||
@ -1104,7 +1114,7 @@ describe('ChunkCardList Integration', () => {
|
||||
describe('Type Switching', () => {
|
||||
it('should handle switching from text to QA type', () => {
|
||||
// Arrange
|
||||
const textChunks = createGeneralChunks(['Text content'])
|
||||
const textChunks = createGeneralChunks([{ content: 'Text content' }])
|
||||
const qaChunks = createQAChunks()
|
||||
|
||||
const { rerender } = render(
|
||||
@ -1132,7 +1142,7 @@ describe('ChunkCardList Integration', () => {
|
||||
|
||||
it('should handle switching from text to parent-child type', () => {
|
||||
// Arrange
|
||||
const textChunks = createGeneralChunks(['Simple text'])
|
||||
const textChunks = createGeneralChunks([{ content: 'Simple text' }])
|
||||
const parentChildChunks = createParentChildChunks()
|
||||
|
||||
const { rerender } = render(
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import type { ChunkInfo, GeneralChunks, ParentChildChunk, ParentChildChunks, QAChunk, QAChunks } from './types'
|
||||
import type { ChunkInfo, GeneralChunk, GeneralChunks, ParentChildChunk, ParentChildChunks, QAChunk, QAChunks } from './types'
|
||||
import type { ParentMode } from '@/models/datasets'
|
||||
import { useMemo } from 'react'
|
||||
import { ChunkingMode } from '@/models/datasets'
|
||||
@ -21,13 +21,13 @@ export const ChunkCardList = (props: ChunkCardListProps) => {
|
||||
if (chunkType === ChunkingMode.parentChild)
|
||||
return (chunkInfo as ParentChildChunks).parent_child_chunks
|
||||
return (chunkInfo as QAChunks).qa_chunks
|
||||
}, [chunkInfo])
|
||||
}, [chunkInfo, chunkType])
|
||||
|
||||
const getWordCount = (seg: string | ParentChildChunk | QAChunk) => {
|
||||
const getWordCount = (seg: GeneralChunk | ParentChildChunk | QAChunk) => {
|
||||
if (chunkType === ChunkingMode.parentChild)
|
||||
return (seg as ParentChildChunk).parent_content.length
|
||||
return (seg as ParentChildChunk).parent_content?.length
|
||||
if (chunkType === ChunkingMode.text)
|
||||
return (seg as string).length
|
||||
return (seg as GeneralChunk).content.length
|
||||
return (seg as QAChunk).question.length + (seg as QAChunk).answer.length
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ export const ChunkCardList = (props: ChunkCardListProps) => {
|
||||
key={`${chunkType}-${index}`}
|
||||
chunkType={chunkType}
|
||||
parentMode={parentMode}
|
||||
content={chunkType === ChunkingMode.parentChild ? (seg as ParentChildChunk).child_contents : (seg as string | QAChunk)}
|
||||
content={seg}
|
||||
wordCount={wordCount}
|
||||
positionId={index + 1}
|
||||
/>
|
||||
|
||||
@ -1,8 +1,12 @@
|
||||
export type GeneralChunks = string[]
|
||||
|
||||
export type GeneralChunk = {
|
||||
content: string
|
||||
summary?: string
|
||||
}
|
||||
export type GeneralChunks = GeneralChunk[]
|
||||
export type ParentChildChunk = {
|
||||
child_contents: string[]
|
||||
parent_content: string
|
||||
parent_summary?: string
|
||||
parent_mode: string
|
||||
}
|
||||
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
import type { GeneralChunks } from '@/app/components/rag-pipeline/components/chunk-card-list/types'
|
||||
import type { WorkflowRunningData } from '@/app/components/workflow/types'
|
||||
import { fireEvent, render, screen, waitFor } from '@testing-library/react'
|
||||
import { WorkflowRunningStatus } from '@/app/components/workflow/types'
|
||||
import { ChunkingMode } from '@/models/datasets'
|
||||
|
||||
import Header from './header'
|
||||
// Import components after mocks
|
||||
import TestRunPanel from './index'
|
||||
@ -836,7 +836,7 @@ describe('formatPreviewChunks', () => {
|
||||
it('should limit to RAG_PIPELINE_PREVIEW_CHUNK_NUM chunks', () => {
|
||||
const manyChunks = Array.from({ length: 10 }, (_, i) => `chunk${i}`)
|
||||
const outputs = createMockGeneralOutputs(manyChunks)
|
||||
const result = formatPreviewChunks(outputs) as string[]
|
||||
const result = formatPreviewChunks(outputs) as GeneralChunks
|
||||
|
||||
// RAG_PIPELINE_PREVIEW_CHUNK_NUM is mocked to 5
|
||||
expect(result).toHaveLength(5)
|
||||
|
||||
@ -5,13 +5,17 @@ import { ChunkingMode } from '@/models/datasets'
|
||||
|
||||
type GeneralChunkPreview = {
|
||||
content: string
|
||||
summary?: string
|
||||
}
|
||||
|
||||
const formatGeneralChunks = (outputs: any) => {
|
||||
const chunkInfo: GeneralChunks = []
|
||||
const chunks = outputs.preview as GeneralChunkPreview[]
|
||||
chunks.slice(0, RAG_PIPELINE_PREVIEW_CHUNK_NUM).forEach((chunk) => {
|
||||
chunkInfo.push(chunk.content)
|
||||
chunkInfo.push({
|
||||
content: chunk.content,
|
||||
summary: chunk.summary,
|
||||
})
|
||||
})
|
||||
|
||||
return chunkInfo
|
||||
@ -20,6 +24,7 @@ const formatGeneralChunks = (outputs: any) => {
|
||||
type ParentChildChunkPreview = {
|
||||
content: string
|
||||
child_chunks: string[]
|
||||
summary?: string
|
||||
}
|
||||
|
||||
const formatParentChildChunks = (outputs: any, parentMode: ParentMode) => {
|
||||
@ -32,6 +37,7 @@ const formatParentChildChunks = (outputs: any, parentMode: ParentMode) => {
|
||||
chunks.slice(0, RAG_PIPELINE_PREVIEW_CHUNK_NUM).forEach((chunk) => {
|
||||
chunkInfo.parent_child_chunks?.push({
|
||||
parent_content: chunk.content,
|
||||
parent_summary: chunk.summary,
|
||||
child_contents: chunk.child_chunks,
|
||||
parent_mode: parentMode,
|
||||
})
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import type {
|
||||
KnowledgeBaseNodeType,
|
||||
RerankingModel,
|
||||
SummaryIndexSetting,
|
||||
} from '../types'
|
||||
import type { ValueSelector } from '@/app/components/workflow/types'
|
||||
import { produce } from 'immer'
|
||||
@ -246,6 +247,16 @@ export const useConfig = (id: string) => {
|
||||
})
|
||||
}, [handleNodeDataUpdate])
|
||||
|
||||
const handleSummaryIndexSettingChange = useCallback((summaryIndexSetting: SummaryIndexSetting) => {
|
||||
const nodeData = getNodeData()
|
||||
handleNodeDataUpdate({
|
||||
summary_index_setting: {
|
||||
...nodeData?.data.summary_index_setting,
|
||||
...summaryIndexSetting,
|
||||
},
|
||||
})
|
||||
}, [handleNodeDataUpdate, getNodeData])
|
||||
|
||||
return {
|
||||
handleChunkStructureChange,
|
||||
handleIndexMethodChange,
|
||||
@ -260,5 +271,6 @@ export const useConfig = (id: string) => {
|
||||
handleScoreThresholdChange,
|
||||
handleScoreThresholdEnabledChange,
|
||||
handleInputVariableChange,
|
||||
handleSummaryIndexSettingChange,
|
||||
}
|
||||
}
|
||||
|
||||
@ -7,6 +7,7 @@ import {
|
||||
useMemo,
|
||||
} from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import SummaryIndexSetting from '@/app/components/datasets/settings/summary-index-setting'
|
||||
import { checkShowMultiModalTip } from '@/app/components/datasets/settings/utils'
|
||||
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
|
||||
import { useModelList } from '@/app/components/header/account-setting/model-provider-page/hooks'
|
||||
@ -51,6 +52,7 @@ const Panel: FC<NodePanelProps<KnowledgeBaseNodeType>> = ({
|
||||
handleScoreThresholdChange,
|
||||
handleScoreThresholdEnabledChange,
|
||||
handleInputVariableChange,
|
||||
handleSummaryIndexSettingChange,
|
||||
} = useConfig(id)
|
||||
|
||||
const filterVar = useCallback((variable: Var) => {
|
||||
@ -167,6 +169,22 @@ const Panel: FC<NodePanelProps<KnowledgeBaseNodeType>> = ({
|
||||
<div className="pt-1">
|
||||
<Split className="h-[1px]" />
|
||||
</div>
|
||||
{
|
||||
data.indexing_technique === IndexMethodEnum.QUALIFIED
|
||||
&& [ChunkStructureEnum.general, ChunkStructureEnum.parent_child].includes(data.chunk_structure)
|
||||
&& (
|
||||
<>
|
||||
<SummaryIndexSetting
|
||||
summaryIndexSetting={data.summary_index_setting}
|
||||
onSummaryIndexSettingChange={handleSummaryIndexSettingChange}
|
||||
readonly={nodesReadOnly}
|
||||
/>
|
||||
<div className="pt-1">
|
||||
<Split className="h-[1px]" />
|
||||
</div>
|
||||
</>
|
||||
)
|
||||
}
|
||||
<RetrievalSetting
|
||||
indexMethod={data.indexing_technique}
|
||||
searchMethod={data.retrieval_model.search_method}
|
||||
|
||||
@ -42,6 +42,12 @@ export type RetrievalSetting = {
|
||||
score_threshold: number
|
||||
reranking_mode?: RerankingModeEnum
|
||||
}
|
||||
export type SummaryIndexSetting = {
|
||||
enable?: boolean
|
||||
model_name?: string
|
||||
model_provider_name?: string
|
||||
summary_prompt?: string
|
||||
}
|
||||
export type KnowledgeBaseNodeType = CommonNodeType & {
|
||||
index_chunk_variable_selector: string[]
|
||||
chunk_structure?: ChunkStructureEnum
|
||||
@ -52,4 +58,5 @@ export type KnowledgeBaseNodeType = CommonNodeType & {
|
||||
retrieval_model: RetrievalSetting
|
||||
_embeddingModelList?: Model[]
|
||||
_rerankModelList?: Model[]
|
||||
summary_index_setting?: SummaryIndexSetting
|
||||
}
|
||||
|
||||
@ -644,6 +644,9 @@
|
||||
}
|
||||
},
|
||||
"app/components/apps/new-app-card.tsx": {
|
||||
"react-hooks-extra/no-direct-set-state-in-use-effect": {
|
||||
"count": 1
|
||||
},
|
||||
"ts/no-explicit-any": {
|
||||
"count": 1
|
||||
}
|
||||
@ -738,6 +741,11 @@
|
||||
"count": 1
|
||||
}
|
||||
},
|
||||
"app/components/base/carousel/index.tsx": {
|
||||
"react-hooks-extra/no-direct-set-state-in-use-effect": {
|
||||
"count": 3
|
||||
}
|
||||
},
|
||||
"app/components/base/chat/chat-with-history/chat-wrapper.tsx": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 6
|
||||
@ -858,7 +866,7 @@
|
||||
"count": 6
|
||||
},
|
||||
"ts/no-explicit-any": {
|
||||
"count": 16
|
||||
"count": 17
|
||||
}
|
||||
},
|
||||
"app/components/base/chat/embedded-chatbot/inputs-form/content.tsx": {
|
||||
@ -1559,11 +1567,6 @@
|
||||
"count": 3
|
||||
}
|
||||
},
|
||||
"app/components/billing/usage-info/index.tsx": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 1
|
||||
}
|
||||
},
|
||||
"app/components/custom/custom-web-app-brand/index.spec.tsx": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 7
|
||||
@ -1953,6 +1956,16 @@
|
||||
"count": 1
|
||||
}
|
||||
},
|
||||
"app/components/explore/banner/banner-item.tsx": {
|
||||
"react-hooks-extra/no-direct-set-state-in-use-effect": {
|
||||
"count": 3
|
||||
}
|
||||
},
|
||||
"app/components/explore/banner/indicator-button.tsx": {
|
||||
"react-hooks-extra/no-direct-set-state-in-use-effect": {
|
||||
"count": 2
|
||||
}
|
||||
},
|
||||
"app/components/explore/create-app-modal/index.tsx": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 1
|
||||
@ -1971,9 +1984,40 @@
|
||||
"count": 1
|
||||
}
|
||||
},
|
||||
"app/components/explore/sidebar/index.spec.tsx": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 1
|
||||
}
|
||||
},
|
||||
"app/components/explore/sidebar/index.tsx": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 1
|
||||
},
|
||||
"unused-imports/no-unused-vars": {
|
||||
"count": 2
|
||||
}
|
||||
},
|
||||
"app/components/explore/try-app/app/chat.tsx": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 1
|
||||
}
|
||||
},
|
||||
"app/components/explore/try-app/app/text-generation.tsx": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 6
|
||||
}
|
||||
},
|
||||
"app/components/explore/try-app/index.tsx": {
|
||||
"style/multiline-ternary": {
|
||||
"count": 2
|
||||
}
|
||||
},
|
||||
"app/components/explore/try-app/preview/basic-app-preview.tsx": {
|
||||
"style/multiline-ternary": {
|
||||
"count": 2
|
||||
},
|
||||
"ts/no-explicit-any": {
|
||||
"count": 12
|
||||
}
|
||||
},
|
||||
"app/components/goto-anything/actions/commands/command-bus.ts": {
|
||||
@ -2658,6 +2702,11 @@
|
||||
"count": 8
|
||||
}
|
||||
},
|
||||
"app/components/share/text-generation/types.ts": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 1
|
||||
}
|
||||
},
|
||||
"app/components/share/utils.ts": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 2
|
||||
@ -4263,11 +4312,6 @@
|
||||
"count": 1
|
||||
}
|
||||
},
|
||||
"middleware.ts": {
|
||||
"node/prefer-global/buffer": {
|
||||
"count": 1
|
||||
}
|
||||
},
|
||||
"models/common.ts": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 3
|
||||
@ -4343,12 +4387,12 @@
|
||||
},
|
||||
"service/debug.ts": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 7
|
||||
"count": 6
|
||||
}
|
||||
},
|
||||
"service/explore.ts": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 1
|
||||
"count": 2
|
||||
}
|
||||
},
|
||||
"service/fetch.ts": {
|
||||
@ -4361,7 +4405,7 @@
|
||||
},
|
||||
"service/share.ts": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 4
|
||||
"count": 3
|
||||
}
|
||||
},
|
||||
"service/tools.ts": {
|
||||
@ -4369,6 +4413,11 @@
|
||||
"count": 2
|
||||
}
|
||||
},
|
||||
"service/try-app.ts": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 1
|
||||
}
|
||||
},
|
||||
"service/use-apps.ts": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 1
|
||||
|
||||
@ -196,6 +196,24 @@
|
||||
"publishApp.notSet": "Not set",
|
||||
"publishApp.notSetDesc": "Currently nobody can access the web app. Please set permissions.",
|
||||
"publishApp.title": "Who can access web app",
|
||||
"quadrantMatrix.deadline": "DDL:",
|
||||
"quadrantMatrix.invalidData": "Invalid Quadrant Data",
|
||||
"quadrantMatrix.invalidDataDesc": "Expected JSON format with q1, q2, q3, q4 arrays",
|
||||
"quadrantMatrix.legend.importance": "I = Importance",
|
||||
"quadrantMatrix.legend.urgency": "U = Urgency",
|
||||
"quadrantMatrix.more": "+{{count}} more",
|
||||
"quadrantMatrix.noTasks": "No tasks",
|
||||
"quadrantMatrix.q1.subtitle": "Urgent & Important",
|
||||
"quadrantMatrix.q1.title": "Do First",
|
||||
"quadrantMatrix.q2.subtitle": "Important & Not Urgent",
|
||||
"quadrantMatrix.q2.title": "Schedule",
|
||||
"quadrantMatrix.q3.subtitle": "Urgent & Not Important",
|
||||
"quadrantMatrix.q3.title": "Delegate",
|
||||
"quadrantMatrix.q4.subtitle": "Not Urgent & Not Important",
|
||||
"quadrantMatrix.q4.title": "Don't Do",
|
||||
"quadrantMatrix.taskCount_one": "{{count}} task prioritized",
|
||||
"quadrantMatrix.taskCount_other": "{{count}} tasks prioritized",
|
||||
"quadrantMatrix.title": "Eisenhower Matrix",
|
||||
"removeOriginal": "Delete the original app",
|
||||
"roadmap": "See our roadmap",
|
||||
"showMyCreatedAppsOnly": "Created by me",
|
||||
|
||||
@ -172,6 +172,7 @@
|
||||
"usagePage.documentsUploadQuota": "Documents Upload Quota",
|
||||
"usagePage.perMonth": "per month",
|
||||
"usagePage.resetsIn": "Resets in {{count,number}} days",
|
||||
"usagePage.storageThresholdTooltip": "Detailed usage is shown once storage exceeds 50 MB.",
|
||||
"usagePage.teamMembers": "Team Members",
|
||||
"usagePage.triggerEvents": "Trigger Events",
|
||||
"usagePage.vectorSpace": "Knowledge Data Storage",
|
||||
|
||||
@ -31,6 +31,7 @@
|
||||
"list.action.pause": "Pause",
|
||||
"list.action.resume": "Resume",
|
||||
"list.action.settings": "Chunking Settings",
|
||||
"list.action.summary": "Generate summary",
|
||||
"list.action.sync": "Sync",
|
||||
"list.action.unarchive": "Unarchive",
|
||||
"list.action.uploadFile": "Upload new file",
|
||||
@ -75,6 +76,9 @@
|
||||
"list.status.indexing": "Indexing",
|
||||
"list.status.paused": "Paused",
|
||||
"list.status.queuing": "Queuing",
|
||||
"list.summary.generating": "Generating...",
|
||||
"list.summary.generatingSummary": "Generating summary",
|
||||
"list.summary.ready": "Summary ready",
|
||||
"list.table.header.action": "ACTION",
|
||||
"list.table.header.chunkingMode": "CHUNKING MODE",
|
||||
"list.table.header.fileName": "NAME",
|
||||
@ -329,5 +333,7 @@
|
||||
"segment.searchResults_one": "RESULT",
|
||||
"segment.searchResults_other": "RESULTS",
|
||||
"segment.searchResults_zero": "RESULT",
|
||||
"segment.summary": "SUMMARY",
|
||||
"segment.summaryPlaceholder": "Write a brief summary for better retrieval…",
|
||||
"segment.vectorHash": "Vector hash: "
|
||||
}
|
||||
|
||||
@ -39,6 +39,12 @@
|
||||
"form.retrievalSettings": "Retrieval Settings",
|
||||
"form.save": "Save",
|
||||
"form.searchModel": "Search model",
|
||||
"form.summaryAutoGen": "Summary Auto-Gen",
|
||||
"form.summaryAutoGenEnableTip": "Once enabled, summaries will be generated automatically for newly added documents. Existing documents can still be summarized manually.",
|
||||
"form.summaryAutoGenTip": "Summaries are automatically generated for newly added documents. Existing documents can still be summarized manually.",
|
||||
"form.summaryInstructions": "Instructions",
|
||||
"form.summaryInstructionsPlaceholder": "Describe the rules or style for auto-generated summaries…",
|
||||
"form.summaryModel": "Summary Model",
|
||||
"form.upgradeHighQualityTip": "Once upgrading to High Quality mode, reverting to Economical mode is not available",
|
||||
"title": "Knowledge settings"
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user