mirror of
https://github.com/langgenius/dify.git
synced 2026-02-06 03:35:36 +08:00
Compare commits
7 Commits
refactor/m
...
inject-con
| Author | SHA1 | Date | |
|---|---|---|---|
| c2a11ffa97 | |||
| 5eaf535a0d | |||
| 9fb72c151c | |||
| 603a896c49 | |||
| 41177757e6 | |||
| 4f826b4641 | |||
| 3216b67bfa |
3
.github/CODEOWNERS
vendored
3
.github/CODEOWNERS
vendored
@ -9,6 +9,9 @@
|
||||
# CODEOWNERS file
|
||||
/.github/CODEOWNERS @laipz8200 @crazywoola
|
||||
|
||||
# Agents
|
||||
/.agents/skills/ @hyoban
|
||||
|
||||
# Docs
|
||||
/docs/ @crazywoola
|
||||
|
||||
|
||||
@ -112,7 +112,6 @@ ignore_imports =
|
||||
core.workflow.nodes.datasource.datasource_node -> models.model
|
||||
core.workflow.nodes.datasource.datasource_node -> models.tools
|
||||
core.workflow.nodes.datasource.datasource_node -> services.datasource_provider_service
|
||||
core.workflow.nodes.document_extractor.node -> configs
|
||||
core.workflow.nodes.document_extractor.node -> core.file.file_manager
|
||||
core.workflow.nodes.document_extractor.node -> core.helper.ssrf_proxy
|
||||
core.workflow.nodes.http_request.entities -> configs
|
||||
|
||||
@ -107,10 +107,11 @@ class AnnotationReplyActionApi(Resource):
|
||||
def post(self, app_id, action: Literal["enable", "disable"]):
|
||||
app_id = str(app_id)
|
||||
args = AnnotationReplyPayload.model_validate(console_ns.payload)
|
||||
if action == "enable":
|
||||
result = AppAnnotationService.enable_app_annotation(args.model_dump(), app_id)
|
||||
elif action == "disable":
|
||||
result = AppAnnotationService.disable_app_annotation(app_id)
|
||||
match action:
|
||||
case "enable":
|
||||
result = AppAnnotationService.enable_app_annotation(args.model_dump(), app_id)
|
||||
case "disable":
|
||||
result = AppAnnotationService.disable_app_annotation(app_id)
|
||||
return result, 200
|
||||
|
||||
|
||||
|
||||
@ -155,43 +155,43 @@ class OAuthServerUserTokenApi(Resource):
|
||||
grant_type = OAuthGrantType(payload.grant_type)
|
||||
except ValueError:
|
||||
raise BadRequest("invalid grant_type")
|
||||
match grant_type:
|
||||
case OAuthGrantType.AUTHORIZATION_CODE:
|
||||
if not payload.code:
|
||||
raise BadRequest("code is required")
|
||||
|
||||
if grant_type == OAuthGrantType.AUTHORIZATION_CODE:
|
||||
if not payload.code:
|
||||
raise BadRequest("code is required")
|
||||
if payload.client_secret != oauth_provider_app.client_secret:
|
||||
raise BadRequest("client_secret is invalid")
|
||||
|
||||
if payload.client_secret != oauth_provider_app.client_secret:
|
||||
raise BadRequest("client_secret is invalid")
|
||||
if payload.redirect_uri not in oauth_provider_app.redirect_uris:
|
||||
raise BadRequest("redirect_uri is invalid")
|
||||
|
||||
if payload.redirect_uri not in oauth_provider_app.redirect_uris:
|
||||
raise BadRequest("redirect_uri is invalid")
|
||||
access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
|
||||
grant_type, code=payload.code, client_id=oauth_provider_app.client_id
|
||||
)
|
||||
return jsonable_encoder(
|
||||
{
|
||||
"access_token": access_token,
|
||||
"token_type": "Bearer",
|
||||
"expires_in": OAUTH_ACCESS_TOKEN_EXPIRES_IN,
|
||||
"refresh_token": refresh_token,
|
||||
}
|
||||
)
|
||||
case OAuthGrantType.REFRESH_TOKEN:
|
||||
if not payload.refresh_token:
|
||||
raise BadRequest("refresh_token is required")
|
||||
|
||||
access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
|
||||
grant_type, code=payload.code, client_id=oauth_provider_app.client_id
|
||||
)
|
||||
return jsonable_encoder(
|
||||
{
|
||||
"access_token": access_token,
|
||||
"token_type": "Bearer",
|
||||
"expires_in": OAUTH_ACCESS_TOKEN_EXPIRES_IN,
|
||||
"refresh_token": refresh_token,
|
||||
}
|
||||
)
|
||||
elif grant_type == OAuthGrantType.REFRESH_TOKEN:
|
||||
if not payload.refresh_token:
|
||||
raise BadRequest("refresh_token is required")
|
||||
|
||||
access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
|
||||
grant_type, refresh_token=payload.refresh_token, client_id=oauth_provider_app.client_id
|
||||
)
|
||||
return jsonable_encoder(
|
||||
{
|
||||
"access_token": access_token,
|
||||
"token_type": "Bearer",
|
||||
"expires_in": OAUTH_ACCESS_TOKEN_EXPIRES_IN,
|
||||
"refresh_token": refresh_token,
|
||||
}
|
||||
)
|
||||
access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
|
||||
grant_type, refresh_token=payload.refresh_token, client_id=oauth_provider_app.client_id
|
||||
)
|
||||
return jsonable_encoder(
|
||||
{
|
||||
"access_token": access_token,
|
||||
"token_type": "Bearer",
|
||||
"expires_in": OAUTH_ACCESS_TOKEN_EXPIRES_IN,
|
||||
"refresh_token": refresh_token,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@console_ns.route("/oauth/provider/account")
|
||||
|
||||
@ -1339,6 +1339,18 @@ class DocumentGenerateSummaryApi(Resource):
|
||||
missing_ids = set(document_list) - found_ids
|
||||
raise NotFound(f"Some documents not found: {list(missing_ids)}")
|
||||
|
||||
# Update need_summary to True for documents that don't have it set
|
||||
# This handles the case where documents were created when summary_index_setting was disabled
|
||||
documents_to_update = [doc for doc in documents if not doc.need_summary and doc.doc_form != "qa_model"]
|
||||
|
||||
if documents_to_update:
|
||||
document_ids_to_update = [str(doc.id) for doc in documents_to_update]
|
||||
DocumentService.update_documents_need_summary(
|
||||
dataset_id=dataset_id,
|
||||
document_ids=document_ids_to_update,
|
||||
need_summary=True,
|
||||
)
|
||||
|
||||
# Dispatch async tasks for each document
|
||||
for document in documents:
|
||||
# Skip qa_model documents as they don't generate summaries
|
||||
|
||||
@ -250,7 +250,7 @@ class WorkflowResponseConverter:
|
||||
data=WorkflowFinishStreamResponse.Data(
|
||||
id=run_id,
|
||||
workflow_id=workflow_id,
|
||||
status=status.value,
|
||||
status=status,
|
||||
outputs=encoded_outputs,
|
||||
error=error,
|
||||
elapsed_time=elapsed_time,
|
||||
@ -340,13 +340,13 @@ class WorkflowResponseConverter:
|
||||
metadata = self._merge_metadata(event.execution_metadata, snapshot)
|
||||
|
||||
if isinstance(event, QueueNodeSucceededEvent):
|
||||
status = WorkflowNodeExecutionStatus.SUCCEEDED.value
|
||||
status = WorkflowNodeExecutionStatus.SUCCEEDED
|
||||
error_message = event.error
|
||||
elif isinstance(event, QueueNodeFailedEvent):
|
||||
status = WorkflowNodeExecutionStatus.FAILED.value
|
||||
status = WorkflowNodeExecutionStatus.FAILED
|
||||
error_message = event.error
|
||||
else:
|
||||
status = WorkflowNodeExecutionStatus.EXCEPTION.value
|
||||
status = WorkflowNodeExecutionStatus.EXCEPTION
|
||||
error_message = event.error
|
||||
|
||||
return NodeFinishStreamResponse(
|
||||
@ -413,7 +413,7 @@ class WorkflowResponseConverter:
|
||||
process_data_truncated=process_data_truncated,
|
||||
outputs=outputs,
|
||||
outputs_truncated=outputs_truncated,
|
||||
status=WorkflowNodeExecutionStatus.RETRY.value,
|
||||
status=WorkflowNodeExecutionStatus.RETRY,
|
||||
error=event.error,
|
||||
elapsed_time=elapsed_time,
|
||||
execution_metadata=metadata,
|
||||
|
||||
@ -7,7 +7,7 @@ from pydantic import BaseModel, ConfigDict, Field
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage
|
||||
from core.rag.entities.citation_metadata import RetrievalSourceMetadata
|
||||
from core.workflow.entities import AgentNodeStrategyInit
|
||||
from core.workflow.enums import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
|
||||
from core.workflow.enums import WorkflowExecutionStatus, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
|
||||
|
||||
|
||||
class AnnotationReplyAccount(BaseModel):
|
||||
@ -223,7 +223,7 @@ class WorkflowFinishStreamResponse(StreamResponse):
|
||||
|
||||
id: str
|
||||
workflow_id: str
|
||||
status: str
|
||||
status: WorkflowExecutionStatus
|
||||
outputs: Mapping[str, Any] | None = None
|
||||
error: str | None = None
|
||||
elapsed_time: float
|
||||
@ -311,7 +311,7 @@ class NodeFinishStreamResponse(StreamResponse):
|
||||
process_data_truncated: bool = False
|
||||
outputs: Mapping[str, Any] | None = None
|
||||
outputs_truncated: bool = True
|
||||
status: str
|
||||
status: WorkflowNodeExecutionStatus
|
||||
error: str | None = None
|
||||
elapsed_time: float
|
||||
execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = None
|
||||
@ -375,7 +375,7 @@ class NodeRetryStreamResponse(StreamResponse):
|
||||
process_data_truncated: bool = False
|
||||
outputs: Mapping[str, Any] | None = None
|
||||
outputs_truncated: bool = False
|
||||
status: str
|
||||
status: WorkflowNodeExecutionStatus
|
||||
error: str | None = None
|
||||
elapsed_time: float
|
||||
execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = None
|
||||
@ -719,7 +719,7 @@ class WorkflowAppBlockingResponse(AppBlockingResponse):
|
||||
|
||||
id: str
|
||||
workflow_id: str
|
||||
status: str
|
||||
status: WorkflowExecutionStatus
|
||||
outputs: Mapping[str, Any] | None = None
|
||||
error: str | None = None
|
||||
elapsed_time: float
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
from collections.abc import Callable, Sequence
|
||||
from typing import TYPE_CHECKING, final
|
||||
from typing import TYPE_CHECKING, Any, cast, final
|
||||
|
||||
from typing_extensions import override
|
||||
|
||||
@ -15,6 +15,7 @@ from core.workflow.graph.graph import NodeFactory
|
||||
from core.workflow.nodes.base.node import Node
|
||||
from core.workflow.nodes.code.code_node import CodeNode
|
||||
from core.workflow.nodes.code.limits import CodeNodeLimits
|
||||
from core.workflow.nodes.document_extractor import DocumentExtractorNode, UnstructuredApiConfig
|
||||
from core.workflow.nodes.http_request.node import HttpRequestNode
|
||||
from core.workflow.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING
|
||||
from core.workflow.nodes.protocols import FileManagerProtocol, HttpClientProtocol
|
||||
@ -50,6 +51,7 @@ class DifyNodeFactory(NodeFactory):
|
||||
http_request_http_client: HttpClientProtocol | None = None,
|
||||
http_request_tool_file_manager_factory: Callable[[], ToolFileManager] = ToolFileManager,
|
||||
http_request_file_manager: FileManagerProtocol | None = None,
|
||||
document_extractor_unstructured_api_config: UnstructuredApiConfig | None = None,
|
||||
) -> None:
|
||||
self.graph_init_params = graph_init_params
|
||||
self.graph_runtime_state = graph_runtime_state
|
||||
@ -71,6 +73,13 @@ class DifyNodeFactory(NodeFactory):
|
||||
self._http_request_http_client = http_request_http_client or ssrf_proxy
|
||||
self._http_request_tool_file_manager_factory = http_request_tool_file_manager_factory
|
||||
self._http_request_file_manager = http_request_file_manager or file_manager
|
||||
self._document_extractor_unstructured_api_config = (
|
||||
document_extractor_unstructured_api_config
|
||||
or UnstructuredApiConfig(
|
||||
api_url=dify_config.UNSTRUCTURED_API_URL,
|
||||
api_key=dify_config.UNSTRUCTURED_API_KEY or "",
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def create_node(self, node_config: NodeConfigDict) -> Node:
|
||||
@ -103,13 +112,17 @@ class DifyNodeFactory(NodeFactory):
|
||||
if not node_class:
|
||||
raise ValueError(f"No latest version class found for node type: {node_type}")
|
||||
|
||||
common_kwargs: dict[str, Any] = {
|
||||
"id": node_id,
|
||||
"config": node_config,
|
||||
"graph_init_params": self.graph_init_params,
|
||||
"graph_runtime_state": self.graph_runtime_state,
|
||||
}
|
||||
|
||||
# Create node instance
|
||||
if node_type == NodeType.CODE:
|
||||
return CodeNode(
|
||||
id=node_id,
|
||||
config=node_config,
|
||||
graph_init_params=self.graph_init_params,
|
||||
graph_runtime_state=self.graph_runtime_state,
|
||||
**common_kwargs,
|
||||
code_executor=self._code_executor,
|
||||
code_providers=self._code_providers,
|
||||
code_limits=self._code_limits,
|
||||
@ -117,27 +130,23 @@ class DifyNodeFactory(NodeFactory):
|
||||
|
||||
if node_type == NodeType.TEMPLATE_TRANSFORM:
|
||||
return TemplateTransformNode(
|
||||
id=node_id,
|
||||
config=node_config,
|
||||
graph_init_params=self.graph_init_params,
|
||||
graph_runtime_state=self.graph_runtime_state,
|
||||
**common_kwargs,
|
||||
template_renderer=self._template_renderer,
|
||||
)
|
||||
|
||||
if node_type == NodeType.HTTP_REQUEST:
|
||||
return HttpRequestNode(
|
||||
id=node_id,
|
||||
config=node_config,
|
||||
graph_init_params=self.graph_init_params,
|
||||
graph_runtime_state=self.graph_runtime_state,
|
||||
**common_kwargs,
|
||||
http_client=self._http_request_http_client,
|
||||
tool_file_manager_factory=self._http_request_tool_file_manager_factory,
|
||||
file_manager=self._http_request_file_manager,
|
||||
)
|
||||
|
||||
return node_class(
|
||||
id=node_id,
|
||||
config=node_config,
|
||||
graph_init_params=self.graph_init_params,
|
||||
graph_runtime_state=self.graph_runtime_state,
|
||||
)
|
||||
if node_type == NodeType.DOCUMENT_EXTRACTOR:
|
||||
document_extractor_class = cast(type[DocumentExtractorNode], node_class)
|
||||
return document_extractor_class(
|
||||
**common_kwargs,
|
||||
unstructured_api_config=self._document_extractor_unstructured_api_config,
|
||||
)
|
||||
|
||||
return node_class(**common_kwargs)
|
||||
|
||||
@ -369,7 +369,9 @@ class IndexingRunner:
|
||||
# Generate summary preview
|
||||
summary_index_setting = tmp_processing_rule.get("summary_index_setting")
|
||||
if summary_index_setting and summary_index_setting.get("enable") and preview_texts:
|
||||
preview_texts = index_processor.generate_summary_preview(tenant_id, preview_texts, summary_index_setting)
|
||||
preview_texts = index_processor.generate_summary_preview(
|
||||
tenant_id, preview_texts, summary_index_setting, doc_language
|
||||
)
|
||||
|
||||
return IndexingEstimate(total_segments=total_segments, preview=preview_texts)
|
||||
|
||||
|
||||
@ -441,11 +441,13 @@ DEFAULT_GENERATOR_SUMMARY_PROMPT = (
|
||||
|
||||
Requirements:
|
||||
1. Write a concise summary in plain text
|
||||
2. Use the same language as the input content
|
||||
2. You must write in {language}. No language other than {language} should be used.
|
||||
3. Focus on important facts, concepts, and details
|
||||
4. If images are included, describe their key information
|
||||
5. Do not use words like "好的", "ok", "I understand", "This text discusses", "The content mentions"
|
||||
6. Write directly without extra words
|
||||
7. If there is not enough content to generate a meaningful summary,
|
||||
return an empty string without any explanation or prompt
|
||||
|
||||
Output only the summary text. Start summarizing now:
|
||||
|
||||
|
||||
@ -48,12 +48,22 @@ class BaseIndexProcessor(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def generate_summary_preview(
|
||||
self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
|
||||
self,
|
||||
tenant_id: str,
|
||||
preview_texts: list[PreviewDetail],
|
||||
summary_index_setting: dict,
|
||||
doc_language: str | None = None,
|
||||
) -> list[PreviewDetail]:
|
||||
"""
|
||||
For each segment in preview_texts, generate a summary using LLM and attach it to the segment.
|
||||
The summary can be stored in a new attribute, e.g., summary.
|
||||
This method should be implemented by subclasses.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant ID
|
||||
preview_texts: List of preview details to generate summaries for
|
||||
summary_index_setting: Summary index configuration
|
||||
doc_language: Optional document language to ensure summary is generated in the correct language
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@ -275,7 +275,11 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
||||
raise ValueError("Chunks is not a list")
|
||||
|
||||
def generate_summary_preview(
|
||||
self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
|
||||
self,
|
||||
tenant_id: str,
|
||||
preview_texts: list[PreviewDetail],
|
||||
summary_index_setting: dict,
|
||||
doc_language: str | None = None,
|
||||
) -> list[PreviewDetail]:
|
||||
"""
|
||||
For each segment, concurrently call generate_summary to generate a summary
|
||||
@ -298,11 +302,15 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
||||
if flask_app:
|
||||
# Ensure Flask app context in worker thread
|
||||
with flask_app.app_context():
|
||||
summary, _ = self.generate_summary(tenant_id, preview.content, summary_index_setting)
|
||||
summary, _ = self.generate_summary(
|
||||
tenant_id, preview.content, summary_index_setting, document_language=doc_language
|
||||
)
|
||||
preview.summary = summary
|
||||
else:
|
||||
# Fallback: try without app context (may fail)
|
||||
summary, _ = self.generate_summary(tenant_id, preview.content, summary_index_setting)
|
||||
summary, _ = self.generate_summary(
|
||||
tenant_id, preview.content, summary_index_setting, document_language=doc_language
|
||||
)
|
||||
preview.summary = summary
|
||||
|
||||
# Generate summaries concurrently using ThreadPoolExecutor
|
||||
@ -356,6 +364,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
||||
text: str,
|
||||
summary_index_setting: dict | None = None,
|
||||
segment_id: str | None = None,
|
||||
document_language: str | None = None,
|
||||
) -> tuple[str, LLMUsage]:
|
||||
"""
|
||||
Generate summary for the given text using ModelInstance.invoke_llm and the default or custom summary prompt,
|
||||
@ -366,6 +375,8 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
||||
text: Text content to summarize
|
||||
summary_index_setting: Summary index configuration
|
||||
segment_id: Optional segment ID to fetch attachments from SegmentAttachmentBinding table
|
||||
document_language: Optional document language (e.g., "Chinese", "English")
|
||||
to ensure summary is generated in the correct language
|
||||
|
||||
Returns:
|
||||
Tuple of (summary_content, llm_usage) where llm_usage is LLMUsage object
|
||||
@ -381,8 +392,22 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
||||
raise ValueError("model_name and model_provider_name are required in summary_index_setting")
|
||||
|
||||
# Import default summary prompt
|
||||
is_default_prompt = False
|
||||
if not summary_prompt:
|
||||
summary_prompt = DEFAULT_GENERATOR_SUMMARY_PROMPT
|
||||
is_default_prompt = True
|
||||
|
||||
# Format prompt with document language only for default prompt
|
||||
# Custom prompts are used as-is to avoid interfering with user-defined templates
|
||||
# If document_language is provided, use it; otherwise, use "the same language as the input content"
|
||||
# This is especially important for image-only chunks where text is empty or minimal
|
||||
if is_default_prompt:
|
||||
language_for_prompt = document_language or "the same language as the input content"
|
||||
try:
|
||||
summary_prompt = summary_prompt.format(language=language_for_prompt)
|
||||
except KeyError:
|
||||
# If default prompt doesn't have {language} placeholder, use it as-is
|
||||
pass
|
||||
|
||||
provider_manager = ProviderManager()
|
||||
provider_model_bundle = provider_manager.get_provider_model_bundle(
|
||||
|
||||
@ -358,7 +358,11 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
|
||||
}
|
||||
|
||||
def generate_summary_preview(
|
||||
self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
|
||||
self,
|
||||
tenant_id: str,
|
||||
preview_texts: list[PreviewDetail],
|
||||
summary_index_setting: dict,
|
||||
doc_language: str | None = None,
|
||||
) -> list[PreviewDetail]:
|
||||
"""
|
||||
For each parent chunk in preview_texts, concurrently call generate_summary to generate a summary
|
||||
@ -389,6 +393,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
|
||||
tenant_id=tenant_id,
|
||||
text=preview.content,
|
||||
summary_index_setting=summary_index_setting,
|
||||
document_language=doc_language,
|
||||
)
|
||||
preview.summary = summary
|
||||
else:
|
||||
@ -397,6 +402,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
|
||||
tenant_id=tenant_id,
|
||||
text=preview.content,
|
||||
summary_index_setting=summary_index_setting,
|
||||
document_language=doc_language,
|
||||
)
|
||||
preview.summary = summary
|
||||
|
||||
|
||||
@ -241,7 +241,11 @@ class QAIndexProcessor(BaseIndexProcessor):
|
||||
}
|
||||
|
||||
def generate_summary_preview(
|
||||
self, tenant_id: str, preview_texts: list[PreviewDetail], summary_index_setting: dict
|
||||
self,
|
||||
tenant_id: str,
|
||||
preview_texts: list[PreviewDetail],
|
||||
summary_index_setting: dict,
|
||||
doc_language: str | None = None,
|
||||
) -> list[PreviewDetail]:
|
||||
"""
|
||||
QA model doesn't generate summaries, so this method returns preview_texts unchanged.
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from .entities import DocumentExtractorNodeData
|
||||
from .entities import DocumentExtractorNodeData, UnstructuredApiConfig
|
||||
from .node import DocumentExtractorNode
|
||||
|
||||
__all__ = ["DocumentExtractorNode", "DocumentExtractorNodeData"]
|
||||
__all__ = ["DocumentExtractorNode", "DocumentExtractorNodeData", "UnstructuredApiConfig"]
|
||||
|
||||
@ -1,7 +1,14 @@
|
||||
from collections.abc import Sequence
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.workflow.nodes.base import BaseNodeData
|
||||
|
||||
|
||||
class DocumentExtractorNodeData(BaseNodeData):
|
||||
variable_selector: Sequence[str]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class UnstructuredApiConfig:
|
||||
api_url: str | None = None
|
||||
api_key: str = ""
|
||||
|
||||
@ -5,7 +5,7 @@ import logging
|
||||
import os
|
||||
import tempfile
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import Any
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import charset_normalizer
|
||||
import docx
|
||||
@ -20,7 +20,6 @@ from docx.oxml.text.paragraph import CT_P
|
||||
from docx.table import Table
|
||||
from docx.text.paragraph import Paragraph
|
||||
|
||||
from configs import dify_config
|
||||
from core.file import File, FileTransferMethod, file_manager
|
||||
from core.helper import ssrf_proxy
|
||||
from core.variables import ArrayFileSegment
|
||||
@ -29,11 +28,15 @@ from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus
|
||||
from core.workflow.node_events import NodeRunResult
|
||||
from core.workflow.nodes.base.node import Node
|
||||
|
||||
from .entities import DocumentExtractorNodeData
|
||||
from .entities import DocumentExtractorNodeData, UnstructuredApiConfig
|
||||
from .exc import DocumentExtractorError, FileDownloadError, TextExtractionError, UnsupportedFileTypeError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from core.workflow.entities import GraphInitParams
|
||||
from core.workflow.runtime import GraphRuntimeState
|
||||
|
||||
|
||||
class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
|
||||
"""
|
||||
@ -47,6 +50,23 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
|
||||
def version(cls) -> str:
|
||||
return "1"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
id: str,
|
||||
config: Mapping[str, Any],
|
||||
graph_init_params: "GraphInitParams",
|
||||
graph_runtime_state: "GraphRuntimeState",
|
||||
*,
|
||||
unstructured_api_config: UnstructuredApiConfig | None = None,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
id=id,
|
||||
config=config,
|
||||
graph_init_params=graph_init_params,
|
||||
graph_runtime_state=graph_runtime_state,
|
||||
)
|
||||
self._unstructured_api_config = unstructured_api_config or UnstructuredApiConfig()
|
||||
|
||||
def _run(self):
|
||||
variable_selector = self.node_data.variable_selector
|
||||
variable = self.graph_runtime_state.variable_pool.get(variable_selector)
|
||||
@ -64,7 +84,10 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
|
||||
|
||||
try:
|
||||
if isinstance(value, list):
|
||||
extracted_text_list = list(map(_extract_text_from_file, value))
|
||||
extracted_text_list = [
|
||||
_extract_text_from_file(file, unstructured_api_config=self._unstructured_api_config)
|
||||
for file in value
|
||||
]
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
inputs=inputs,
|
||||
@ -72,7 +95,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
|
||||
outputs={"text": ArrayStringSegment(value=extracted_text_list)},
|
||||
)
|
||||
elif isinstance(value, File):
|
||||
extracted_text = _extract_text_from_file(value)
|
||||
extracted_text = _extract_text_from_file(value, unstructured_api_config=self._unstructured_api_config)
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
inputs=inputs,
|
||||
@ -103,7 +126,12 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]):
|
||||
return {node_id + ".files": typed_node_data.variable_selector}
|
||||
|
||||
|
||||
def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
|
||||
def _extract_text_by_mime_type(
|
||||
*,
|
||||
file_content: bytes,
|
||||
mime_type: str,
|
||||
unstructured_api_config: UnstructuredApiConfig,
|
||||
) -> str:
|
||||
"""Extract text from a file based on its MIME type."""
|
||||
match mime_type:
|
||||
case "text/plain" | "text/html" | "text/htm" | "text/markdown" | "text/xml":
|
||||
@ -111,7 +139,7 @@ def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
|
||||
case "application/pdf":
|
||||
return _extract_text_from_pdf(file_content)
|
||||
case "application/msword":
|
||||
return _extract_text_from_doc(file_content)
|
||||
return _extract_text_from_doc(file_content, unstructured_api_config=unstructured_api_config)
|
||||
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
||||
return _extract_text_from_docx(file_content)
|
||||
case "text/csv":
|
||||
@ -119,11 +147,11 @@ def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
|
||||
case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | "application/vnd.ms-excel":
|
||||
return _extract_text_from_excel(file_content)
|
||||
case "application/vnd.ms-powerpoint":
|
||||
return _extract_text_from_ppt(file_content)
|
||||
return _extract_text_from_ppt(file_content, unstructured_api_config=unstructured_api_config)
|
||||
case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
|
||||
return _extract_text_from_pptx(file_content)
|
||||
return _extract_text_from_pptx(file_content, unstructured_api_config=unstructured_api_config)
|
||||
case "application/epub+zip":
|
||||
return _extract_text_from_epub(file_content)
|
||||
return _extract_text_from_epub(file_content, unstructured_api_config=unstructured_api_config)
|
||||
case "message/rfc822":
|
||||
return _extract_text_from_eml(file_content)
|
||||
case "application/vnd.ms-outlook":
|
||||
@ -140,7 +168,12 @@ def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
|
||||
raise UnsupportedFileTypeError(f"Unsupported MIME type: {mime_type}")
|
||||
|
||||
|
||||
def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) -> str:
|
||||
def _extract_text_by_file_extension(
|
||||
*,
|
||||
file_content: bytes,
|
||||
file_extension: str,
|
||||
unstructured_api_config: UnstructuredApiConfig,
|
||||
) -> str:
|
||||
"""Extract text from a file based on its file extension."""
|
||||
match file_extension:
|
||||
case (
|
||||
@ -203,7 +236,7 @@ def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str)
|
||||
case ".pdf":
|
||||
return _extract_text_from_pdf(file_content)
|
||||
case ".doc":
|
||||
return _extract_text_from_doc(file_content)
|
||||
return _extract_text_from_doc(file_content, unstructured_api_config=unstructured_api_config)
|
||||
case ".docx":
|
||||
return _extract_text_from_docx(file_content)
|
||||
case ".csv":
|
||||
@ -211,11 +244,11 @@ def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str)
|
||||
case ".xls" | ".xlsx":
|
||||
return _extract_text_from_excel(file_content)
|
||||
case ".ppt":
|
||||
return _extract_text_from_ppt(file_content)
|
||||
return _extract_text_from_ppt(file_content, unstructured_api_config=unstructured_api_config)
|
||||
case ".pptx":
|
||||
return _extract_text_from_pptx(file_content)
|
||||
return _extract_text_from_pptx(file_content, unstructured_api_config=unstructured_api_config)
|
||||
case ".epub":
|
||||
return _extract_text_from_epub(file_content)
|
||||
return _extract_text_from_epub(file_content, unstructured_api_config=unstructured_api_config)
|
||||
case ".eml":
|
||||
return _extract_text_from_eml(file_content)
|
||||
case ".msg":
|
||||
@ -312,14 +345,14 @@ def _extract_text_from_pdf(file_content: bytes) -> str:
|
||||
raise TextExtractionError(f"Failed to extract text from PDF: {str(e)}") from e
|
||||
|
||||
|
||||
def _extract_text_from_doc(file_content: bytes) -> str:
|
||||
def _extract_text_from_doc(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
|
||||
"""
|
||||
Extract text from a DOC file.
|
||||
"""
|
||||
from unstructured.partition.api import partition_via_api
|
||||
|
||||
if not dify_config.UNSTRUCTURED_API_URL:
|
||||
raise TextExtractionError("UNSTRUCTURED_API_URL must be set")
|
||||
if not unstructured_api_config.api_url:
|
||||
raise TextExtractionError("Unstructured API URL is not configured for DOC file processing.")
|
||||
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(suffix=".doc", delete=False) as temp_file:
|
||||
@ -329,8 +362,8 @@ def _extract_text_from_doc(file_content: bytes) -> str:
|
||||
elements = partition_via_api(
|
||||
file=file,
|
||||
metadata_filename=temp_file.name,
|
||||
api_url=dify_config.UNSTRUCTURED_API_URL,
|
||||
api_key=dify_config.UNSTRUCTURED_API_KEY, # type: ignore
|
||||
api_url=unstructured_api_config.api_url,
|
||||
api_key=unstructured_api_config.api_key,
|
||||
)
|
||||
os.unlink(temp_file.name)
|
||||
return "\n".join([getattr(element, "text", "") for element in elements])
|
||||
@ -420,12 +453,20 @@ def _download_file_content(file: File) -> bytes:
|
||||
raise FileDownloadError(f"Error downloading file: {str(e)}") from e
|
||||
|
||||
|
||||
def _extract_text_from_file(file: File):
|
||||
def _extract_text_from_file(file: File, *, unstructured_api_config: UnstructuredApiConfig) -> str:
|
||||
file_content = _download_file_content(file)
|
||||
if file.extension:
|
||||
extracted_text = _extract_text_by_file_extension(file_content=file_content, file_extension=file.extension)
|
||||
extracted_text = _extract_text_by_file_extension(
|
||||
file_content=file_content,
|
||||
file_extension=file.extension,
|
||||
unstructured_api_config=unstructured_api_config,
|
||||
)
|
||||
elif file.mime_type:
|
||||
extracted_text = _extract_text_by_mime_type(file_content=file_content, mime_type=file.mime_type)
|
||||
extracted_text = _extract_text_by_mime_type(
|
||||
file_content=file_content,
|
||||
mime_type=file.mime_type,
|
||||
unstructured_api_config=unstructured_api_config,
|
||||
)
|
||||
else:
|
||||
raise UnsupportedFileTypeError("Unable to determine file type: MIME type or file extension is missing")
|
||||
return extracted_text
|
||||
@ -517,12 +558,12 @@ def _extract_text_from_excel(file_content: bytes) -> str:
|
||||
raise TextExtractionError(f"Failed to extract text from Excel file: {str(e)}") from e
|
||||
|
||||
|
||||
def _extract_text_from_ppt(file_content: bytes) -> str:
|
||||
def _extract_text_from_ppt(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
|
||||
from unstructured.partition.api import partition_via_api
|
||||
from unstructured.partition.ppt import partition_ppt
|
||||
|
||||
try:
|
||||
if dify_config.UNSTRUCTURED_API_URL:
|
||||
if unstructured_api_config.api_url:
|
||||
with tempfile.NamedTemporaryFile(suffix=".ppt", delete=False) as temp_file:
|
||||
temp_file.write(file_content)
|
||||
temp_file.flush()
|
||||
@ -530,8 +571,8 @@ def _extract_text_from_ppt(file_content: bytes) -> str:
|
||||
elements = partition_via_api(
|
||||
file=file,
|
||||
metadata_filename=temp_file.name,
|
||||
api_url=dify_config.UNSTRUCTURED_API_URL,
|
||||
api_key=dify_config.UNSTRUCTURED_API_KEY, # type: ignore
|
||||
api_url=unstructured_api_config.api_url,
|
||||
api_key=unstructured_api_config.api_key,
|
||||
)
|
||||
os.unlink(temp_file.name)
|
||||
else:
|
||||
@ -543,12 +584,12 @@ def _extract_text_from_ppt(file_content: bytes) -> str:
|
||||
raise TextExtractionError(f"Failed to extract text from PPTX: {str(e)}") from e
|
||||
|
||||
|
||||
def _extract_text_from_pptx(file_content: bytes) -> str:
|
||||
def _extract_text_from_pptx(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
|
||||
from unstructured.partition.api import partition_via_api
|
||||
from unstructured.partition.pptx import partition_pptx
|
||||
|
||||
try:
|
||||
if dify_config.UNSTRUCTURED_API_URL:
|
||||
if unstructured_api_config.api_url:
|
||||
with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as temp_file:
|
||||
temp_file.write(file_content)
|
||||
temp_file.flush()
|
||||
@ -556,8 +597,8 @@ def _extract_text_from_pptx(file_content: bytes) -> str:
|
||||
elements = partition_via_api(
|
||||
file=file,
|
||||
metadata_filename=temp_file.name,
|
||||
api_url=dify_config.UNSTRUCTURED_API_URL,
|
||||
api_key=dify_config.UNSTRUCTURED_API_KEY, # type: ignore
|
||||
api_url=unstructured_api_config.api_url,
|
||||
api_key=unstructured_api_config.api_key,
|
||||
)
|
||||
os.unlink(temp_file.name)
|
||||
else:
|
||||
@ -568,12 +609,12 @@ def _extract_text_from_pptx(file_content: bytes) -> str:
|
||||
raise TextExtractionError(f"Failed to extract text from PPTX: {str(e)}") from e
|
||||
|
||||
|
||||
def _extract_text_from_epub(file_content: bytes) -> str:
|
||||
def _extract_text_from_epub(file_content: bytes, *, unstructured_api_config: UnstructuredApiConfig) -> str:
|
||||
from unstructured.partition.api import partition_via_api
|
||||
from unstructured.partition.epub import partition_epub
|
||||
|
||||
try:
|
||||
if dify_config.UNSTRUCTURED_API_URL:
|
||||
if unstructured_api_config.api_url:
|
||||
with tempfile.NamedTemporaryFile(suffix=".epub", delete=False) as temp_file:
|
||||
temp_file.write(file_content)
|
||||
temp_file.flush()
|
||||
@ -581,8 +622,8 @@ def _extract_text_from_epub(file_content: bytes) -> str:
|
||||
elements = partition_via_api(
|
||||
file=file,
|
||||
metadata_filename=temp_file.name,
|
||||
api_url=dify_config.UNSTRUCTURED_API_URL,
|
||||
api_key=dify_config.UNSTRUCTURED_API_KEY, # type: ignore
|
||||
api_url=unstructured_api_config.api_url,
|
||||
api_key=unstructured_api_config.api_key,
|
||||
)
|
||||
os.unlink(temp_file.name)
|
||||
else:
|
||||
|
||||
@ -78,12 +78,21 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
|
||||
indexing_technique = node_data.indexing_technique or dataset.indexing_technique
|
||||
summary_index_setting = node_data.summary_index_setting or dataset.summary_index_setting
|
||||
|
||||
# Try to get document language if document_id is available
|
||||
doc_language = None
|
||||
document_id = variable_pool.get(["sys", SystemVariableKey.DOCUMENT_ID])
|
||||
if document_id:
|
||||
document = db.session.query(Document).filter_by(id=document_id.value).first()
|
||||
if document and document.doc_language:
|
||||
doc_language = document.doc_language
|
||||
|
||||
outputs = self._get_preview_output_with_summaries(
|
||||
node_data.chunk_structure,
|
||||
chunks,
|
||||
dataset=dataset,
|
||||
indexing_technique=indexing_technique,
|
||||
summary_index_setting=summary_index_setting,
|
||||
doc_language=doc_language,
|
||||
)
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
@ -315,6 +324,7 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
|
||||
dataset: Dataset,
|
||||
indexing_technique: str | None = None,
|
||||
summary_index_setting: dict | None = None,
|
||||
doc_language: str | None = None,
|
||||
) -> Mapping[str, Any]:
|
||||
"""
|
||||
Generate preview output with summaries for chunks in preview mode.
|
||||
@ -326,6 +336,7 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
|
||||
dataset: Dataset object (for tenant_id)
|
||||
indexing_technique: Indexing technique from node config or dataset
|
||||
summary_index_setting: Summary index setting from node config or dataset
|
||||
doc_language: Optional document language to ensure summary is generated in the correct language
|
||||
"""
|
||||
index_processor = IndexProcessorFactory(chunk_structure).init_index_processor()
|
||||
preview_output = index_processor.format_preview(chunks)
|
||||
@ -365,6 +376,7 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
|
||||
tenant_id=dataset.tenant_id,
|
||||
text=preview_item["content"],
|
||||
summary_index_setting=summary_index_setting,
|
||||
document_language=doc_language,
|
||||
)
|
||||
if summary:
|
||||
preview_item["summary"] = summary
|
||||
@ -374,6 +386,7 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
|
||||
tenant_id=dataset.tenant_id,
|
||||
text=preview_item["content"],
|
||||
summary_index_setting=summary_index_setting,
|
||||
document_language=doc_language,
|
||||
)
|
||||
if summary:
|
||||
preview_item["summary"] = summary
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "dify-api"
|
||||
version = "1.12.0"
|
||||
version = "1.11.4"
|
||||
requires-python = ">=3.11,<3.13"
|
||||
|
||||
dependencies = [
|
||||
|
||||
@ -16,6 +16,7 @@ from sqlalchemy.orm import Session
|
||||
from werkzeug.exceptions import Forbidden, NotFound
|
||||
|
||||
from configs import dify_config
|
||||
from core.db.session_factory import session_factory
|
||||
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||
from core.file import helpers as file_helpers
|
||||
from core.helper.name_generator import generate_incremental_name
|
||||
@ -1388,6 +1389,46 @@ class DocumentService:
|
||||
).all()
|
||||
return documents
|
||||
|
||||
@staticmethod
|
||||
def update_documents_need_summary(dataset_id: str, document_ids: Sequence[str], need_summary: bool = True) -> int:
|
||||
"""
|
||||
Update need_summary field for multiple documents.
|
||||
|
||||
This method handles the case where documents were created when summary_index_setting was disabled,
|
||||
and need to be updated when summary_index_setting is later enabled.
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset ID
|
||||
document_ids: List of document IDs to update
|
||||
need_summary: Value to set for need_summary field (default: True)
|
||||
|
||||
Returns:
|
||||
Number of documents updated
|
||||
"""
|
||||
if not document_ids:
|
||||
return 0
|
||||
|
||||
document_id_list: list[str] = [str(document_id) for document_id in document_ids]
|
||||
|
||||
with session_factory.create_session() as session:
|
||||
updated_count = (
|
||||
session.query(Document)
|
||||
.filter(
|
||||
Document.id.in_(document_id_list),
|
||||
Document.dataset_id == dataset_id,
|
||||
Document.doc_form != "qa_model", # Skip qa_model documents
|
||||
)
|
||||
.update({Document.need_summary: need_summary}, synchronize_session=False)
|
||||
)
|
||||
session.commit()
|
||||
logger.info(
|
||||
"Updated need_summary to %s for %d documents in dataset %s",
|
||||
need_summary,
|
||||
updated_count,
|
||||
dataset_id,
|
||||
)
|
||||
return updated_count
|
||||
|
||||
@staticmethod
|
||||
def get_document_download_url(document: Document) -> str:
|
||||
"""
|
||||
|
||||
@ -174,6 +174,10 @@ class RagPipelineTransformService:
|
||||
else:
|
||||
dataset.retrieval_model = knowledge_configuration.retrieval_model.model_dump()
|
||||
|
||||
# Copy summary_index_setting from dataset to knowledge_index node configuration
|
||||
if dataset.summary_index_setting:
|
||||
knowledge_configuration.summary_index_setting = dataset.summary_index_setting
|
||||
|
||||
knowledge_configuration_dict.update(knowledge_configuration.model_dump())
|
||||
node["data"] = knowledge_configuration_dict
|
||||
return node
|
||||
|
||||
@ -49,11 +49,18 @@ class SummaryIndexService:
|
||||
# Use lazy import to avoid circular import
|
||||
from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
|
||||
|
||||
# Get document language to ensure summary is generated in the correct language
|
||||
# This is especially important for image-only chunks where text is empty or minimal
|
||||
document_language = None
|
||||
if segment.document and segment.document.doc_language:
|
||||
document_language = segment.document.doc_language
|
||||
|
||||
summary_content, usage = ParagraphIndexProcessor.generate_summary(
|
||||
tenant_id=dataset.tenant_id,
|
||||
text=segment.content,
|
||||
summary_index_setting=summary_index_setting,
|
||||
segment_id=segment.id,
|
||||
document_language=document_language,
|
||||
)
|
||||
|
||||
if not summary_content:
|
||||
@ -558,6 +565,9 @@ class SummaryIndexService:
|
||||
)
|
||||
session.add(summary_record)
|
||||
|
||||
# Commit the batch created records
|
||||
session.commit()
|
||||
|
||||
@staticmethod
|
||||
def update_summary_record_error(
|
||||
segment: DocumentSegment,
|
||||
@ -762,7 +772,6 @@ class SummaryIndexService:
|
||||
dataset=dataset,
|
||||
status="not_started",
|
||||
)
|
||||
session.commit() # Commit initial records
|
||||
|
||||
summary_records = []
|
||||
|
||||
|
||||
2
api/uv.lock
generated
2
api/uv.lock
generated
@ -1368,7 +1368,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "dify-api"
|
||||
version = "1.12.0"
|
||||
version = "1.11.4"
|
||||
source = { virtual = "." }
|
||||
dependencies = [
|
||||
{ name = "aliyun-log-python-sdk" },
|
||||
|
||||
@ -21,7 +21,7 @@ services:
|
||||
|
||||
# API service
|
||||
api:
|
||||
image: langgenius/dify-api:1.12.0
|
||||
image: langgenius/dify-api:1.11.4
|
||||
restart: always
|
||||
environment:
|
||||
# Use the shared environment variables.
|
||||
@ -63,7 +63,7 @@ services:
|
||||
# worker service
|
||||
# The Celery worker for processing all queues (dataset, workflow, mail, etc.)
|
||||
worker:
|
||||
image: langgenius/dify-api:1.12.0
|
||||
image: langgenius/dify-api:1.11.4
|
||||
restart: always
|
||||
environment:
|
||||
# Use the shared environment variables.
|
||||
@ -102,7 +102,7 @@ services:
|
||||
# worker_beat service
|
||||
# Celery beat for scheduling periodic tasks.
|
||||
worker_beat:
|
||||
image: langgenius/dify-api:1.12.0
|
||||
image: langgenius/dify-api:1.11.4
|
||||
restart: always
|
||||
environment:
|
||||
# Use the shared environment variables.
|
||||
@ -132,7 +132,7 @@ services:
|
||||
|
||||
# Frontend web application.
|
||||
web:
|
||||
image: langgenius/dify-web:1.12.0
|
||||
image: langgenius/dify-web:1.11.4
|
||||
restart: always
|
||||
environment:
|
||||
CONSOLE_API_URL: ${CONSOLE_API_URL:-}
|
||||
|
||||
@ -707,7 +707,7 @@ services:
|
||||
|
||||
# API service
|
||||
api:
|
||||
image: langgenius/dify-api:1.12.0
|
||||
image: langgenius/dify-api:1.11.4
|
||||
restart: always
|
||||
environment:
|
||||
# Use the shared environment variables.
|
||||
@ -749,7 +749,7 @@ services:
|
||||
# worker service
|
||||
# The Celery worker for processing all queues (dataset, workflow, mail, etc.)
|
||||
worker:
|
||||
image: langgenius/dify-api:1.12.0
|
||||
image: langgenius/dify-api:1.11.4
|
||||
restart: always
|
||||
environment:
|
||||
# Use the shared environment variables.
|
||||
@ -788,7 +788,7 @@ services:
|
||||
# worker_beat service
|
||||
# Celery beat for scheduling periodic tasks.
|
||||
worker_beat:
|
||||
image: langgenius/dify-api:1.12.0
|
||||
image: langgenius/dify-api:1.11.4
|
||||
restart: always
|
||||
environment:
|
||||
# Use the shared environment variables.
|
||||
@ -818,7 +818,7 @@ services:
|
||||
|
||||
# Frontend web application.
|
||||
web:
|
||||
image: langgenius/dify-web:1.12.0
|
||||
image: langgenius/dify-web:1.11.4
|
||||
restart: always
|
||||
environment:
|
||||
CONSOLE_API_URL: ${CONSOLE_API_URL:-}
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
import type { ListChildComponentProps } from 'react-window'
|
||||
import type { DataSourceNotionPage, DataSourceNotionPageMap } from '@/models/common'
|
||||
import { RiArrowDownSLine, RiArrowRightSLine } from '@remixicon/react'
|
||||
import { useVirtualizer } from '@tanstack/react-virtual'
|
||||
import { memo, useCallback, useMemo, useRef, useState } from 'react'
|
||||
import { memo, useEffect, useMemo, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { areEqual, FixedSizeList as List } from 'react-window'
|
||||
import { cn } from '@/utils/classnames'
|
||||
import Checkbox from '../../checkbox'
|
||||
import NotionIcon from '../../notion-icon'
|
||||
@ -31,22 +32,6 @@ type NotionPageItem = {
|
||||
depth: number
|
||||
} & DataSourceNotionPage
|
||||
|
||||
type ItemProps = {
|
||||
virtualStart: number
|
||||
virtualSize: number
|
||||
current: NotionPageItem
|
||||
onToggle: (pageId: string) => void
|
||||
checkedIds: Set<string>
|
||||
disabledCheckedIds: Set<string>
|
||||
onCheck: (pageId: string) => void
|
||||
canPreview?: boolean
|
||||
onPreview: (pageId: string) => void
|
||||
listMapWithChildrenAndDescendants: NotionPageTreeMap
|
||||
searchValue: string
|
||||
previewPageId: string
|
||||
pagesMap: DataSourceNotionPageMap
|
||||
}
|
||||
|
||||
const recursivePushInParentDescendants = (
|
||||
pagesMap: DataSourceNotionPageMap,
|
||||
listTreeMap: NotionPageTreeMap,
|
||||
@ -84,22 +69,34 @@ const recursivePushInParentDescendants = (
|
||||
}
|
||||
}
|
||||
|
||||
const ItemComponent = ({
|
||||
virtualStart,
|
||||
virtualSize,
|
||||
current,
|
||||
onToggle,
|
||||
checkedIds,
|
||||
disabledCheckedIds,
|
||||
onCheck,
|
||||
canPreview,
|
||||
onPreview,
|
||||
listMapWithChildrenAndDescendants,
|
||||
searchValue,
|
||||
previewPageId,
|
||||
pagesMap,
|
||||
}: ItemProps) => {
|
||||
const ItemComponent = ({ index, style, data }: ListChildComponentProps<{
|
||||
dataList: NotionPageItem[]
|
||||
handleToggle: (index: number) => void
|
||||
checkedIds: Set<string>
|
||||
disabledCheckedIds: Set<string>
|
||||
handleCheck: (index: number) => void
|
||||
canPreview?: boolean
|
||||
handlePreview: (index: number) => void
|
||||
listMapWithChildrenAndDescendants: NotionPageTreeMap
|
||||
searchValue: string
|
||||
previewPageId: string
|
||||
pagesMap: DataSourceNotionPageMap
|
||||
}>) => {
|
||||
const { t } = useTranslation()
|
||||
const {
|
||||
dataList,
|
||||
handleToggle,
|
||||
checkedIds,
|
||||
disabledCheckedIds,
|
||||
handleCheck,
|
||||
canPreview,
|
||||
handlePreview,
|
||||
listMapWithChildrenAndDescendants,
|
||||
searchValue,
|
||||
previewPageId,
|
||||
pagesMap,
|
||||
} = data
|
||||
const current = dataList[index]
|
||||
const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[current.page_id]
|
||||
const hasChild = currentWithChildrenAndDescendants.descendants.size > 0
|
||||
const ancestors = currentWithChildrenAndDescendants.ancestors
|
||||
@ -112,7 +109,7 @@ const ItemComponent = ({
|
||||
<div
|
||||
className="mr-1 flex h-5 w-5 shrink-0 items-center justify-center rounded-md hover:bg-components-button-ghost-bg-hover"
|
||||
style={{ marginLeft: current.depth * 8 }}
|
||||
onClick={() => onToggle(current.page_id)}
|
||||
onClick={() => handleToggle(index)}
|
||||
>
|
||||
{
|
||||
current.expand
|
||||
@ -135,21 +132,15 @@ const ItemComponent = ({
|
||||
return (
|
||||
<div
|
||||
className={cn('group flex cursor-pointer items-center rounded-md pl-2 pr-[2px] hover:bg-state-base-hover', previewPageId === current.page_id && 'bg-state-base-hover')}
|
||||
style={{
|
||||
position: 'absolute',
|
||||
top: 0,
|
||||
left: 8,
|
||||
right: 8,
|
||||
width: 'calc(100% - 16px)',
|
||||
height: virtualSize,
|
||||
transform: `translateY(${virtualStart + 8}px)`,
|
||||
}}
|
||||
style={{ ...style, top: style.top as number + 8, left: 8, right: 8, width: 'calc(100% - 16px)' }}
|
||||
>
|
||||
<Checkbox
|
||||
className="mr-2 shrink-0"
|
||||
checked={checkedIds.has(current.page_id)}
|
||||
disabled={disabled}
|
||||
onCheck={() => onCheck(current.page_id)}
|
||||
onCheck={() => {
|
||||
handleCheck(index)
|
||||
}}
|
||||
/>
|
||||
{!searchValue && renderArrow()}
|
||||
<NotionIcon
|
||||
@ -169,7 +160,7 @@ const ItemComponent = ({
|
||||
className="ml-1 hidden h-6 shrink-0 cursor-pointer items-center rounded-md border-[0.5px] border-components-button-secondary-border bg-components-button-secondary-bg px-2 text-xs
|
||||
font-medium leading-4 text-components-button-secondary-text shadow-xs shadow-shadow-shadow-3 backdrop-blur-[10px]
|
||||
hover:border-components-button-secondary-border-hover hover:bg-components-button-secondary-bg-hover group-hover:flex"
|
||||
onClick={() => onPreview(current.page_id)}
|
||||
onClick={() => handlePreview(index)}
|
||||
>
|
||||
{t('dataSource.notion.selector.preview', { ns: 'common' })}
|
||||
</div>
|
||||
@ -188,7 +179,7 @@ const ItemComponent = ({
|
||||
</div>
|
||||
)
|
||||
}
|
||||
const Item = memo(ItemComponent)
|
||||
const Item = memo(ItemComponent, areEqual)
|
||||
|
||||
const PageSelector = ({
|
||||
value,
|
||||
@ -202,10 +193,31 @@ const PageSelector = ({
|
||||
onPreview,
|
||||
}: PageSelectorProps) => {
|
||||
const { t } = useTranslation()
|
||||
const parentRef = useRef<HTMLDivElement>(null)
|
||||
const [expandedIds, setExpandedIds] = useState<Set<string>>(() => new Set())
|
||||
const [dataList, setDataList] = useState<NotionPageItem[]>([])
|
||||
const [localPreviewPageId, setLocalPreviewPageId] = useState('')
|
||||
|
||||
useEffect(() => {
|
||||
setDataList(list.filter(item => item.parent_id === 'root' || !pagesMap[item.parent_id]).map((item) => {
|
||||
return {
|
||||
...item,
|
||||
expand: false,
|
||||
depth: 0,
|
||||
}
|
||||
}))
|
||||
}, [list])
|
||||
|
||||
const searchDataList = list.filter((item) => {
|
||||
return item.page_name.includes(searchValue)
|
||||
}).map((item) => {
|
||||
return {
|
||||
...item,
|
||||
expand: false,
|
||||
depth: 0,
|
||||
}
|
||||
})
|
||||
const currentDataList = searchValue ? searchDataList : dataList
|
||||
const currentPreviewPageId = previewPageId === undefined ? localPreviewPageId : previewPageId
|
||||
|
||||
const listMapWithChildrenAndDescendants = useMemo(() => {
|
||||
return list.reduce((prev: NotionPageTreeMap, next: DataSourceNotionPage) => {
|
||||
const pageId = next.page_id
|
||||
@ -217,89 +229,47 @@ const PageSelector = ({
|
||||
}, {})
|
||||
}, [list, pagesMap])
|
||||
|
||||
const childrenByParent = useMemo(() => {
|
||||
const map = new Map<string | null, DataSourceNotionPage[]>()
|
||||
for (const item of list) {
|
||||
const isRoot = item.parent_id === 'root' || !pagesMap[item.parent_id]
|
||||
const parentKey = isRoot ? null : item.parent_id
|
||||
const children = map.get(parentKey) || []
|
||||
children.push(item)
|
||||
map.set(parentKey, children)
|
||||
}
|
||||
return map
|
||||
}, [list, pagesMap])
|
||||
|
||||
const dataList = useMemo(() => {
|
||||
const result: NotionPageItem[] = []
|
||||
|
||||
const buildVisibleList = (parentId: string | null, depth: number) => {
|
||||
const items = childrenByParent.get(parentId) || []
|
||||
|
||||
for (const item of items) {
|
||||
const isExpanded = expandedIds.has(item.page_id)
|
||||
result.push({
|
||||
...item,
|
||||
expand: isExpanded,
|
||||
depth,
|
||||
})
|
||||
if (isExpanded) {
|
||||
buildVisibleList(item.page_id, depth + 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buildVisibleList(null, 0)
|
||||
return result
|
||||
}, [childrenByParent, expandedIds])
|
||||
|
||||
const searchDataList = useMemo(() => list.filter((item) => {
|
||||
return item.page_name.includes(searchValue)
|
||||
}).map((item) => {
|
||||
return {
|
||||
...item,
|
||||
expand: false,
|
||||
depth: 0,
|
||||
}
|
||||
}), [list, searchValue])
|
||||
|
||||
const currentDataList = searchValue ? searchDataList : dataList
|
||||
const currentPreviewPageId = previewPageId === undefined ? localPreviewPageId : previewPageId
|
||||
|
||||
const virtualizer = useVirtualizer({
|
||||
count: currentDataList.length,
|
||||
getScrollElement: () => parentRef.current,
|
||||
estimateSize: () => 28,
|
||||
overscan: 5,
|
||||
getItemKey: index => currentDataList[index].page_id,
|
||||
})
|
||||
|
||||
const handleToggle = useCallback((pageId: string) => {
|
||||
setExpandedIds((prev) => {
|
||||
const next = new Set(prev)
|
||||
if (prev.has(pageId)) {
|
||||
next.delete(pageId)
|
||||
const descendants = listMapWithChildrenAndDescendants[pageId]?.descendants
|
||||
if (descendants) {
|
||||
for (const descendantId of descendants)
|
||||
next.delete(descendantId)
|
||||
}
|
||||
}
|
||||
else {
|
||||
next.add(pageId)
|
||||
}
|
||||
return next
|
||||
})
|
||||
}, [listMapWithChildrenAndDescendants])
|
||||
|
||||
const handleCheck = useCallback((pageId: string) => {
|
||||
const handleToggle = (index: number) => {
|
||||
const current = dataList[index]
|
||||
const pageId = current.page_id
|
||||
const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[pageId]
|
||||
const descendantsIds = Array.from(currentWithChildrenAndDescendants.descendants)
|
||||
const childrenIds = Array.from(currentWithChildrenAndDescendants.children)
|
||||
let newDataList = []
|
||||
|
||||
if (current.expand) {
|
||||
current.expand = false
|
||||
|
||||
newDataList = dataList.filter(item => !descendantsIds.includes(item.page_id))
|
||||
}
|
||||
else {
|
||||
current.expand = true
|
||||
|
||||
newDataList = [
|
||||
...dataList.slice(0, index + 1),
|
||||
...childrenIds.map(item => ({
|
||||
...pagesMap[item],
|
||||
expand: false,
|
||||
depth: listMapWithChildrenAndDescendants[item].depth,
|
||||
})),
|
||||
...dataList.slice(index + 1),
|
||||
]
|
||||
}
|
||||
setDataList(newDataList)
|
||||
}
|
||||
|
||||
const copyValue = new Set(value)
|
||||
const handleCheck = (index: number) => {
|
||||
const current = currentDataList[index]
|
||||
const pageId = current.page_id
|
||||
const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[pageId]
|
||||
const copyValue = new Set(value)
|
||||
|
||||
if (copyValue.has(pageId)) {
|
||||
if (!searchValue) {
|
||||
for (const item of currentWithChildrenAndDescendants.descendants)
|
||||
copyValue.delete(item)
|
||||
}
|
||||
|
||||
copyValue.delete(pageId)
|
||||
}
|
||||
else {
|
||||
@ -307,17 +277,22 @@ const PageSelector = ({
|
||||
for (const item of currentWithChildrenAndDescendants.descendants)
|
||||
copyValue.add(item)
|
||||
}
|
||||
|
||||
copyValue.add(pageId)
|
||||
}
|
||||
|
||||
onSelect(copyValue)
|
||||
}, [listMapWithChildrenAndDescendants, onSelect, searchValue, value])
|
||||
onSelect(new Set(copyValue))
|
||||
}
|
||||
|
||||
const handlePreview = (index: number) => {
|
||||
const current = currentDataList[index]
|
||||
const pageId = current.page_id
|
||||
|
||||
const handlePreview = useCallback((pageId: string) => {
|
||||
setLocalPreviewPageId(pageId)
|
||||
|
||||
if (onPreview)
|
||||
onPreview(pageId)
|
||||
}, [onPreview])
|
||||
}
|
||||
|
||||
if (!currentDataList.length) {
|
||||
return (
|
||||
@ -328,41 +303,29 @@ const PageSelector = ({
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={parentRef}
|
||||
<List
|
||||
className="py-2"
|
||||
style={{ height: 296, width: '100%', overflow: 'auto' }}
|
||||
height={296}
|
||||
itemCount={currentDataList.length}
|
||||
itemSize={28}
|
||||
width="100%"
|
||||
itemKey={(index, data) => data.dataList[index].page_id}
|
||||
itemData={{
|
||||
dataList: currentDataList,
|
||||
handleToggle,
|
||||
checkedIds: value,
|
||||
disabledCheckedIds: disabledValue,
|
||||
handleCheck,
|
||||
canPreview,
|
||||
handlePreview,
|
||||
listMapWithChildrenAndDescendants,
|
||||
searchValue,
|
||||
previewPageId: currentPreviewPageId,
|
||||
pagesMap,
|
||||
}}
|
||||
>
|
||||
<div
|
||||
style={{
|
||||
height: virtualizer.getTotalSize(),
|
||||
width: '100%',
|
||||
position: 'relative',
|
||||
}}
|
||||
>
|
||||
{virtualizer.getVirtualItems().map((virtualRow) => {
|
||||
const current = currentDataList[virtualRow.index]
|
||||
return (
|
||||
<Item
|
||||
key={virtualRow.key}
|
||||
virtualStart={virtualRow.start}
|
||||
virtualSize={virtualRow.size}
|
||||
current={current}
|
||||
onToggle={handleToggle}
|
||||
checkedIds={value}
|
||||
disabledCheckedIds={disabledValue}
|
||||
onCheck={handleCheck}
|
||||
canPreview={canPreview}
|
||||
onPreview={handlePreview}
|
||||
listMapWithChildrenAndDescendants={listMapWithChildrenAndDescendants}
|
||||
searchValue={searchValue}
|
||||
previewPageId={currentPreviewPageId}
|
||||
pagesMap={pagesMap}
|
||||
/>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
{Item}
|
||||
</List>
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@ -11,18 +11,21 @@ import { recursivePushInParentDescendants } from './utils'
|
||||
|
||||
// Note: react-i18next uses global mock from web/vitest.setup.ts
|
||||
|
||||
// Mock @tanstack/react-virtual useVirtualizer hook - renders items directly for testing
|
||||
vi.mock('@tanstack/react-virtual', () => ({
|
||||
useVirtualizer: ({ count, getItemKey }: { count: number, getItemKey?: (index: number) => string }) => ({
|
||||
getVirtualItems: () =>
|
||||
Array.from({ length: count }).map((_, index) => ({
|
||||
index,
|
||||
key: getItemKey ? getItemKey(index) : index,
|
||||
start: index * 28,
|
||||
size: 28,
|
||||
})),
|
||||
getTotalSize: () => count * 28,
|
||||
}),
|
||||
// Mock react-window FixedSizeList - renders items directly for testing
|
||||
vi.mock('react-window', () => ({
|
||||
FixedSizeList: ({ children: ItemComponent, itemCount, itemData, itemKey }: any) => (
|
||||
<div data-testid="virtual-list">
|
||||
{Array.from({ length: itemCount }).map((_, index) => (
|
||||
<ItemComponent
|
||||
key={itemKey?.(index, itemData) || index}
|
||||
index={index}
|
||||
style={{ top: index * 28, left: 0, right: 0, width: '100%', position: 'absolute' }}
|
||||
data={itemData}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
),
|
||||
areEqual: (prevProps: any, nextProps: any) => prevProps === nextProps,
|
||||
}))
|
||||
|
||||
// Note: NotionIcon from @/app/components/base/ is NOT mocked - using real component per testing guidelines
|
||||
@ -116,7 +119,7 @@ describe('PageSelector', () => {
|
||||
render(<PageSelector {...props} />)
|
||||
|
||||
// Assert
|
||||
expect(screen.getByText('Test Page')).toBeInTheDocument()
|
||||
expect(screen.getByTestId('virtual-list')).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should render empty state when list is empty', () => {
|
||||
@ -131,7 +134,7 @@ describe('PageSelector', () => {
|
||||
|
||||
// Assert
|
||||
expect(screen.getByText('common.dataSource.notion.selector.noSearchResult')).toBeInTheDocument()
|
||||
expect(screen.queryByText('Test Page')).not.toBeInTheDocument()
|
||||
expect(screen.queryByTestId('virtual-list')).not.toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should render items using FixedSizeList', () => {
|
||||
@ -1163,7 +1166,7 @@ describe('PageSelector', () => {
|
||||
render(<PageSelector {...props} />)
|
||||
|
||||
// Assert
|
||||
expect(screen.getByText('Test Page')).toBeInTheDocument()
|
||||
expect(screen.getByTestId('virtual-list')).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should handle special characters in page name', () => {
|
||||
@ -1337,7 +1340,7 @@ describe('PageSelector', () => {
|
||||
render(<PageSelector {...props} />)
|
||||
|
||||
// Assert
|
||||
expect(screen.getByText('Test Page')).toBeInTheDocument()
|
||||
expect(screen.getByTestId('virtual-list')).toBeInTheDocument()
|
||||
if (propVariation.canPreview)
|
||||
expect(screen.getByText('common.dataSource.notion.selector.preview')).toBeInTheDocument()
|
||||
else
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import type { DataSourceNotionPage, DataSourceNotionPageMap } from '@/models/common'
|
||||
import { useVirtualizer } from '@tanstack/react-virtual'
|
||||
import { useCallback, useMemo, useRef, useState } from 'react'
|
||||
import { useCallback, useEffect, useMemo, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { FixedSizeList as List } from 'react-window'
|
||||
import Item from './item'
|
||||
import { recursivePushInParentDescendants } from './utils'
|
||||
|
||||
@ -45,16 +45,29 @@ const PageSelector = ({
|
||||
currentCredentialId,
|
||||
}: PageSelectorProps) => {
|
||||
const { t } = useTranslation()
|
||||
const parentRef = useRef<HTMLDivElement>(null)
|
||||
const [expandedIds, setExpandedIds] = useState<Set<string>>(() => new Set())
|
||||
const [dataList, setDataList] = useState<NotionPageItem[]>([])
|
||||
const [currentPreviewPageId, setCurrentPreviewPageId] = useState('')
|
||||
const prevCredentialIdRef = useRef(currentCredentialId)
|
||||
|
||||
// Reset expanded state when credential changes (render-time detection)
|
||||
if (prevCredentialIdRef.current !== currentCredentialId) {
|
||||
prevCredentialIdRef.current = currentCredentialId
|
||||
setExpandedIds(new Set())
|
||||
}
|
||||
useEffect(() => {
|
||||
setDataList(list.filter(item => item.parent_id === 'root' || !pagesMap[item.parent_id]).map((item) => {
|
||||
return {
|
||||
...item,
|
||||
expand: false,
|
||||
depth: 0,
|
||||
}
|
||||
}))
|
||||
}, [currentCredentialId])
|
||||
|
||||
const searchDataList = list.filter((item) => {
|
||||
return item.page_name.includes(searchValue)
|
||||
}).map((item) => {
|
||||
return {
|
||||
...item,
|
||||
expand: false,
|
||||
depth: 0,
|
||||
}
|
||||
})
|
||||
const currentDataList = searchValue ? searchDataList : dataList
|
||||
|
||||
const listMapWithChildrenAndDescendants = useMemo(() => {
|
||||
return list.reduce((prev: NotionPageTreeMap, next: DataSourceNotionPage) => {
|
||||
@ -67,86 +80,39 @@ const PageSelector = ({
|
||||
}, {})
|
||||
}, [list, pagesMap])
|
||||
|
||||
// Pre-build children index for O(1) lookup instead of O(n) filter
|
||||
const childrenByParent = useMemo(() => {
|
||||
const map = new Map<string | null, DataSourceNotionPage[]>()
|
||||
for (const item of list) {
|
||||
const isRoot = item.parent_id === 'root' || !pagesMap[item.parent_id]
|
||||
const parentKey = isRoot ? null : item.parent_id
|
||||
const children = map.get(parentKey) || []
|
||||
children.push(item)
|
||||
map.set(parentKey, children)
|
||||
const handleToggle = useCallback((index: number) => {
|
||||
const current = dataList[index]
|
||||
const pageId = current.page_id
|
||||
const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[pageId]
|
||||
const descendantsIds = Array.from(currentWithChildrenAndDescendants.descendants)
|
||||
const childrenIds = Array.from(currentWithChildrenAndDescendants.children)
|
||||
let newDataList = []
|
||||
|
||||
if (current.expand) {
|
||||
current.expand = false
|
||||
|
||||
newDataList = dataList.filter(item => !descendantsIds.includes(item.page_id))
|
||||
}
|
||||
return map
|
||||
}, [list, pagesMap])
|
||||
else {
|
||||
current.expand = true
|
||||
|
||||
// Compute visible data list based on expanded state
|
||||
const dataList = useMemo(() => {
|
||||
const result: NotionPageItem[] = []
|
||||
|
||||
const buildVisibleList = (parentId: string | null, depth: number) => {
|
||||
const items = childrenByParent.get(parentId) || []
|
||||
|
||||
for (const item of items) {
|
||||
const isExpanded = expandedIds.has(item.page_id)
|
||||
result.push({
|
||||
...item,
|
||||
expand: isExpanded,
|
||||
depth,
|
||||
})
|
||||
if (isExpanded) {
|
||||
buildVisibleList(item.page_id, depth + 1)
|
||||
}
|
||||
}
|
||||
newDataList = [
|
||||
...dataList.slice(0, index + 1),
|
||||
...childrenIds.map(item => ({
|
||||
...pagesMap[item],
|
||||
expand: false,
|
||||
depth: listMapWithChildrenAndDescendants[item].depth,
|
||||
})),
|
||||
...dataList.slice(index + 1),
|
||||
]
|
||||
}
|
||||
setDataList(newDataList)
|
||||
}, [dataList, listMapWithChildrenAndDescendants, pagesMap])
|
||||
|
||||
buildVisibleList(null, 0)
|
||||
return result
|
||||
}, [childrenByParent, expandedIds])
|
||||
|
||||
const searchDataList = useMemo(() => list.filter((item) => {
|
||||
return item.page_name.includes(searchValue)
|
||||
}).map((item) => {
|
||||
return {
|
||||
...item,
|
||||
expand: false,
|
||||
depth: 0,
|
||||
}
|
||||
}), [list, searchValue])
|
||||
|
||||
const currentDataList = searchValue ? searchDataList : dataList
|
||||
|
||||
const virtualizer = useVirtualizer({
|
||||
count: currentDataList.length,
|
||||
getScrollElement: () => parentRef.current,
|
||||
estimateSize: () => 28,
|
||||
overscan: 5,
|
||||
getItemKey: index => currentDataList[index].page_id,
|
||||
})
|
||||
|
||||
// Stable callback - no dependencies on dataList
|
||||
const handleToggle = useCallback((pageId: string) => {
|
||||
setExpandedIds((prev) => {
|
||||
const next = new Set(prev)
|
||||
if (prev.has(pageId)) {
|
||||
// Collapse: remove current and all descendants
|
||||
next.delete(pageId)
|
||||
const descendants = listMapWithChildrenAndDescendants[pageId]?.descendants
|
||||
if (descendants) {
|
||||
for (const descendantId of descendants)
|
||||
next.delete(descendantId)
|
||||
}
|
||||
}
|
||||
else {
|
||||
next.add(pageId)
|
||||
}
|
||||
return next
|
||||
})
|
||||
}, [listMapWithChildrenAndDescendants])
|
||||
|
||||
// Stable callback - uses pageId parameter instead of index
|
||||
const handleCheck = useCallback((pageId: string) => {
|
||||
const handleCheck = useCallback((index: number) => {
|
||||
const copyValue = new Set(checkedIds)
|
||||
const current = currentDataList[index]
|
||||
const pageId = current.page_id
|
||||
const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[pageId]
|
||||
|
||||
if (copyValue.has(pageId)) {
|
||||
@ -154,6 +120,7 @@ const PageSelector = ({
|
||||
for (const item of currentWithChildrenAndDescendants.descendants)
|
||||
copyValue.delete(item)
|
||||
}
|
||||
|
||||
copyValue.delete(pageId)
|
||||
}
|
||||
else {
|
||||
@ -171,15 +138,18 @@ const PageSelector = ({
|
||||
}
|
||||
}
|
||||
|
||||
onSelect(copyValue)
|
||||
}, [checkedIds, isMultipleChoice, listMapWithChildrenAndDescendants, onSelect, searchValue])
|
||||
onSelect(new Set(copyValue))
|
||||
}, [currentDataList, isMultipleChoice, listMapWithChildrenAndDescendants, onSelect, searchValue, checkedIds])
|
||||
|
||||
const handlePreview = useCallback((index: number) => {
|
||||
const current = currentDataList[index]
|
||||
const pageId = current.page_id
|
||||
|
||||
// Stable callback
|
||||
const handlePreview = useCallback((pageId: string) => {
|
||||
setCurrentPreviewPageId(pageId)
|
||||
|
||||
if (onPreview)
|
||||
onPreview(pageId)
|
||||
}, [onPreview])
|
||||
}, [currentDataList, onPreview])
|
||||
|
||||
if (!currentDataList.length) {
|
||||
return (
|
||||
@ -190,42 +160,30 @@ const PageSelector = ({
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={parentRef}
|
||||
<List
|
||||
className="py-2"
|
||||
style={{ height: 296, width: '100%', overflow: 'auto' }}
|
||||
height={296}
|
||||
itemCount={currentDataList.length}
|
||||
itemSize={28}
|
||||
width="100%"
|
||||
itemKey={(index, data) => data.dataList[index].page_id}
|
||||
itemData={{
|
||||
dataList: currentDataList,
|
||||
handleToggle,
|
||||
checkedIds,
|
||||
disabledCheckedIds: disabledValue,
|
||||
handleCheck,
|
||||
canPreview,
|
||||
handlePreview,
|
||||
listMapWithChildrenAndDescendants,
|
||||
searchValue,
|
||||
previewPageId: currentPreviewPageId,
|
||||
pagesMap,
|
||||
isMultipleChoice,
|
||||
}}
|
||||
>
|
||||
<div
|
||||
style={{
|
||||
height: virtualizer.getTotalSize(),
|
||||
width: '100%',
|
||||
position: 'relative',
|
||||
}}
|
||||
>
|
||||
{virtualizer.getVirtualItems().map((virtualRow) => {
|
||||
const current = currentDataList[virtualRow.index]
|
||||
return (
|
||||
<Item
|
||||
key={virtualRow.key}
|
||||
virtualStart={virtualRow.start}
|
||||
virtualSize={virtualRow.size}
|
||||
current={current}
|
||||
onToggle={handleToggle}
|
||||
checkedIds={checkedIds}
|
||||
disabledCheckedIds={disabledValue}
|
||||
onCheck={handleCheck}
|
||||
canPreview={canPreview}
|
||||
onPreview={handlePreview}
|
||||
listMapWithChildrenAndDescendants={listMapWithChildrenAndDescendants}
|
||||
searchValue={searchValue}
|
||||
previewPageId={currentPreviewPageId}
|
||||
pagesMap={pagesMap}
|
||||
isMultipleChoice={isMultipleChoice}
|
||||
/>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
{Item}
|
||||
</List>
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
import type { ListChildComponentProps } from 'react-window'
|
||||
import type { DataSourceNotionPage, DataSourceNotionPageMap } from '@/models/common'
|
||||
import { RiArrowDownSLine, RiArrowRightSLine } from '@remixicon/react'
|
||||
import { memo } from 'react'
|
||||
import * as React from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { areEqual } from 'react-window'
|
||||
import Checkbox from '@/app/components/base/checkbox'
|
||||
import NotionIcon from '@/app/components/base/notion-icon'
|
||||
import Radio from '@/app/components/base/radio/ui'
|
||||
@ -21,40 +23,36 @@ type NotionPageItem = {
|
||||
depth: number
|
||||
} & DataSourceNotionPage
|
||||
|
||||
type ItemProps = {
|
||||
virtualStart: number
|
||||
virtualSize: number
|
||||
current: NotionPageItem
|
||||
onToggle: (pageId: string) => void
|
||||
const Item = ({ index, style, data }: ListChildComponentProps<{
|
||||
dataList: NotionPageItem[]
|
||||
handleToggle: (index: number) => void
|
||||
checkedIds: Set<string>
|
||||
disabledCheckedIds: Set<string>
|
||||
onCheck: (pageId: string) => void
|
||||
handleCheck: (index: number) => void
|
||||
canPreview?: boolean
|
||||
onPreview: (pageId: string) => void
|
||||
handlePreview: (index: number) => void
|
||||
listMapWithChildrenAndDescendants: NotionPageTreeMap
|
||||
searchValue: string
|
||||
previewPageId: string
|
||||
pagesMap: DataSourceNotionPageMap
|
||||
isMultipleChoice?: boolean
|
||||
}
|
||||
|
||||
const Item = ({
|
||||
virtualStart,
|
||||
virtualSize,
|
||||
current,
|
||||
onToggle,
|
||||
checkedIds,
|
||||
disabledCheckedIds,
|
||||
onCheck,
|
||||
canPreview,
|
||||
onPreview,
|
||||
listMapWithChildrenAndDescendants,
|
||||
searchValue,
|
||||
previewPageId,
|
||||
pagesMap,
|
||||
isMultipleChoice,
|
||||
}: ItemProps) => {
|
||||
}>) => {
|
||||
const { t } = useTranslation()
|
||||
const {
|
||||
dataList,
|
||||
handleToggle,
|
||||
checkedIds,
|
||||
disabledCheckedIds,
|
||||
handleCheck,
|
||||
canPreview,
|
||||
handlePreview,
|
||||
listMapWithChildrenAndDescendants,
|
||||
searchValue,
|
||||
previewPageId,
|
||||
pagesMap,
|
||||
isMultipleChoice,
|
||||
} = data
|
||||
const current = dataList[index]
|
||||
const currentWithChildrenAndDescendants = listMapWithChildrenAndDescendants[current.page_id]
|
||||
const hasChild = currentWithChildrenAndDescendants.descendants.size > 0
|
||||
const ancestors = currentWithChildrenAndDescendants.ancestors
|
||||
@ -67,7 +65,7 @@ const Item = ({
|
||||
<div
|
||||
className="mr-1 flex h-5 w-5 shrink-0 items-center justify-center rounded-md hover:bg-components-button-ghost-bg-hover"
|
||||
style={{ marginLeft: current.depth * 8 }}
|
||||
onClick={() => onToggle(current.page_id)}
|
||||
onClick={() => handleToggle(index)}
|
||||
>
|
||||
{
|
||||
current.expand
|
||||
@ -90,15 +88,7 @@ const Item = ({
|
||||
return (
|
||||
<div
|
||||
className={cn('group flex cursor-pointer items-center rounded-md pl-2 pr-[2px] hover:bg-state-base-hover', previewPageId === current.page_id && 'bg-state-base-hover')}
|
||||
style={{
|
||||
position: 'absolute',
|
||||
top: 0,
|
||||
left: 8,
|
||||
right: 8,
|
||||
width: 'calc(100% - 16px)',
|
||||
height: virtualSize,
|
||||
transform: `translateY(${virtualStart + 8}px)`,
|
||||
}}
|
||||
style={{ ...style, top: style.top as number + 8, left: 8, right: 8, width: 'calc(100% - 16px)' }}
|
||||
>
|
||||
{isMultipleChoice
|
||||
? (
|
||||
@ -106,7 +96,9 @@ const Item = ({
|
||||
className="mr-2 shrink-0"
|
||||
checked={checkedIds.has(current.page_id)}
|
||||
disabled={disabled}
|
||||
onCheck={() => onCheck(current.page_id)}
|
||||
onCheck={() => {
|
||||
handleCheck(index)
|
||||
}}
|
||||
/>
|
||||
)
|
||||
: (
|
||||
@ -114,7 +106,9 @@ const Item = ({
|
||||
className="mr-2 shrink-0"
|
||||
isChecked={checkedIds.has(current.page_id)}
|
||||
disabled={disabled}
|
||||
onCheck={() => onCheck(current.page_id)}
|
||||
onCheck={() => {
|
||||
handleCheck(index)
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
{!searchValue && renderArrow()}
|
||||
@ -135,7 +129,7 @@ const Item = ({
|
||||
className="ml-1 hidden h-6 shrink-0 cursor-pointer items-center rounded-md border-[0.5px] border-components-button-secondary-border bg-components-button-secondary-bg px-2 text-xs
|
||||
font-medium leading-4 text-components-button-secondary-text shadow-xs shadow-shadow-shadow-3 backdrop-blur-[10px]
|
||||
hover:border-components-button-secondary-border-hover hover:bg-components-button-secondary-bg-hover group-hover:flex"
|
||||
onClick={() => onPreview(current.page_id)}
|
||||
onClick={() => handlePreview(index)}
|
||||
>
|
||||
{t('dataSource.notion.selector.preview', { ns: 'common' })}
|
||||
</div>
|
||||
@ -155,4 +149,4 @@ const Item = ({
|
||||
)
|
||||
}
|
||||
|
||||
export default memo(Item)
|
||||
export default React.memo(Item, areEqual)
|
||||
|
||||
@ -1399,6 +1399,11 @@
|
||||
"count": 2
|
||||
}
|
||||
},
|
||||
"app/components/base/notion-page-selector/page-selector/index.tsx": {
|
||||
"react-hooks-extra/no-direct-set-state-in-use-effect": {
|
||||
"count": 1
|
||||
}
|
||||
},
|
||||
"app/components/base/pagination/index.tsx": {
|
||||
"unicorn/prefer-number-properties": {
|
||||
"count": 1
|
||||
@ -1843,7 +1848,12 @@
|
||||
},
|
||||
"app/components/datasets/documents/create-from-pipeline/data-source/online-documents/page-selector/index.spec.tsx": {
|
||||
"ts/no-explicit-any": {
|
||||
"count": 2
|
||||
"count": 5
|
||||
}
|
||||
},
|
||||
"app/components/datasets/documents/create-from-pipeline/data-source/online-documents/page-selector/index.tsx": {
|
||||
"react-hooks-extra/no-direct-set-state-in-use-effect": {
|
||||
"count": 1
|
||||
}
|
||||
},
|
||||
"app/components/datasets/documents/create-from-pipeline/data-source/online-drive/connect/index.spec.tsx": {
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "dify-web",
|
||||
"type": "module",
|
||||
"version": "1.12.0",
|
||||
"version": "1.11.4",
|
||||
"private": true,
|
||||
"packageManager": "pnpm@10.27.0+sha512.72d699da16b1179c14ba9e64dc71c9a40988cbdc65c264cb0e489db7de917f20dcf4d64d8723625f2969ba52d4b7e2a1170682d9ac2a5dcaeaab732b7e16f04a",
|
||||
"imports": {
|
||||
@ -84,7 +84,6 @@
|
||||
"@tailwindcss/typography": "0.5.19",
|
||||
"@tanstack/react-form": "1.23.7",
|
||||
"@tanstack/react-query": "5.90.5",
|
||||
"@tanstack/react-virtual": "3.13.18",
|
||||
"abcjs": "6.5.2",
|
||||
"ahooks": "3.9.5",
|
||||
"class-variance-authority": "0.7.1",
|
||||
@ -143,6 +142,7 @@
|
||||
"react-sortablejs": "6.1.4",
|
||||
"react-syntax-highlighter": "15.6.6",
|
||||
"react-textarea-autosize": "8.5.9",
|
||||
"react-window": "1.8.11",
|
||||
"reactflow": "11.11.4",
|
||||
"rehype-katex": "7.0.1",
|
||||
"rehype-raw": "7.0.0",
|
||||
@ -199,6 +199,7 @@
|
||||
"@types/react-dom": "19.2.3",
|
||||
"@types/react-slider": "1.3.6",
|
||||
"@types/react-syntax-highlighter": "15.5.13",
|
||||
"@types/react-window": "1.8.8",
|
||||
"@types/semver": "7.7.1",
|
||||
"@types/sortablejs": "1.15.8",
|
||||
"@types/uuid": "10.0.0",
|
||||
|
||||
1905
web/pnpm-lock.yaml
generated
1905
web/pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user