mirror of
https://github.com/langgenius/dify.git
synced 2026-04-25 05:06:15 +08:00
refactor: move workflow package to dify_graph (#32844)
This commit is contained in:
152
api/dify_graph/nodes/knowledge_index/knowledge_index_node.py
Normal file
152
api/dify_graph/nodes/knowledge_index/knowledge_index_node.py
Normal file
@ -0,0 +1,152 @@
|
||||
import logging
|
||||
from collections.abc import Mapping
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||
from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus
|
||||
from dify_graph.enums import NodeExecutionType, NodeType, SystemVariableKey
|
||||
from dify_graph.node_events import NodeRunResult
|
||||
from dify_graph.nodes.base.node import Node
|
||||
from dify_graph.nodes.base.template import Template
|
||||
from dify_graph.repositories.index_processor_protocol import IndexProcessorProtocol
|
||||
from dify_graph.repositories.summary_index_service_protocol import SummaryIndexServiceProtocol
|
||||
|
||||
from .entities import KnowledgeIndexNodeData
|
||||
from .exc import (
|
||||
KnowledgeIndexNodeError,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from dify_graph.entities import GraphInitParams
|
||||
from dify_graph.runtime import GraphRuntimeState
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
|
||||
node_type = NodeType.KNOWLEDGE_INDEX
|
||||
execution_type = NodeExecutionType.RESPONSE
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
id: str,
|
||||
config: Mapping[str, Any],
|
||||
graph_init_params: "GraphInitParams",
|
||||
graph_runtime_state: "GraphRuntimeState",
|
||||
index_processor: IndexProcessorProtocol,
|
||||
summary_index_service: SummaryIndexServiceProtocol,
|
||||
) -> None:
|
||||
super().__init__(id, config, graph_init_params, graph_runtime_state)
|
||||
self.index_processor = index_processor
|
||||
self.summary_index_service = summary_index_service
|
||||
|
||||
def _run(self) -> NodeRunResult: # type: ignore
|
||||
node_data = self.node_data
|
||||
variable_pool = self.graph_runtime_state.variable_pool
|
||||
|
||||
# get dataset id as string
|
||||
dataset_id_segment = variable_pool.get(["sys", SystemVariableKey.DATASET_ID])
|
||||
if not dataset_id_segment:
|
||||
raise KnowledgeIndexNodeError("Dataset ID is required.")
|
||||
dataset_id: str = dataset_id_segment.value
|
||||
|
||||
# get document id as string (may be empty when not provided)
|
||||
document_id_segment = variable_pool.get(["sys", SystemVariableKey.DOCUMENT_ID])
|
||||
document_id: str = document_id_segment.value if document_id_segment else ""
|
||||
|
||||
# extract variables
|
||||
variable = variable_pool.get(node_data.index_chunk_variable_selector)
|
||||
if not variable:
|
||||
raise KnowledgeIndexNodeError("Index chunk variable is required.")
|
||||
invoke_from = variable_pool.get(["sys", SystemVariableKey.INVOKE_FROM])
|
||||
is_preview = invoke_from.value == InvokeFrom.DEBUGGER if invoke_from else False
|
||||
|
||||
chunks = variable.value
|
||||
variables = {"chunks": chunks}
|
||||
if not chunks:
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.FAILED, inputs=variables, error="Chunks is required."
|
||||
)
|
||||
|
||||
try:
|
||||
summary_index_setting = node_data.summary_index_setting
|
||||
if is_preview:
|
||||
# Preview mode: generate summaries for chunks directly without saving to database
|
||||
# Format preview and generate summaries on-the-fly
|
||||
# Get indexing_technique and summary_index_setting from node_data (workflow graph config)
|
||||
# or fallback to dataset if not available in node_data
|
||||
|
||||
outputs = self.index_processor.get_preview_output(
|
||||
chunks, dataset_id, document_id, node_data.chunk_structure, summary_index_setting
|
||||
)
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
inputs=variables,
|
||||
outputs=outputs.model_dump(exclude_none=True),
|
||||
)
|
||||
|
||||
original_document_id_segment = variable_pool.get(["sys", SystemVariableKey.ORIGINAL_DOCUMENT_ID])
|
||||
batch = variable_pool.get(["sys", SystemVariableKey.BATCH])
|
||||
if not batch:
|
||||
raise KnowledgeIndexNodeError("Batch is required.")
|
||||
|
||||
results = self._invoke_knowledge_index(
|
||||
dataset_id=dataset_id,
|
||||
document_id=document_id,
|
||||
original_document_id=original_document_id_segment.value if original_document_id_segment else "",
|
||||
is_preview=is_preview,
|
||||
batch=batch.value,
|
||||
chunks=chunks,
|
||||
summary_index_setting=summary_index_setting,
|
||||
)
|
||||
return NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED, inputs=variables, outputs=results)
|
||||
|
||||
except KnowledgeIndexNodeError as e:
|
||||
logger.warning("Error when running knowledge index node", exc_info=True)
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.FAILED,
|
||||
inputs=variables,
|
||||
error=str(e),
|
||||
error_type=type(e).__name__,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(e, exc_info=True)
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.FAILED,
|
||||
inputs=variables,
|
||||
error=str(e),
|
||||
error_type=type(e).__name__,
|
||||
)
|
||||
|
||||
def _invoke_knowledge_index(
|
||||
self,
|
||||
dataset_id: str,
|
||||
document_id: str,
|
||||
original_document_id: str,
|
||||
is_preview: bool,
|
||||
batch: Any,
|
||||
chunks: Mapping[str, Any],
|
||||
summary_index_setting: dict | None = None,
|
||||
):
|
||||
if not document_id:
|
||||
raise KnowledgeIndexNodeError("document_id is required.")
|
||||
rst = self.index_processor.index_and_clean(
|
||||
dataset_id, document_id, original_document_id, chunks, batch, summary_index_setting
|
||||
)
|
||||
self.summary_index_service.generate_and_vectorize_summary(
|
||||
dataset_id, document_id, is_preview, summary_index_setting
|
||||
)
|
||||
return rst
|
||||
|
||||
@classmethod
|
||||
def version(cls) -> str:
|
||||
return "1"
|
||||
|
||||
def get_streaming_template(self) -> Template:
|
||||
"""
|
||||
Get the template for streaming.
|
||||
|
||||
Returns:
|
||||
Template instance for this knowledge index node
|
||||
"""
|
||||
return Template(segments=[])
|
||||
Reference in New Issue
Block a user