This commit is contained in:
jyong
2025-05-06 13:56:13 +08:00
parent 3c386c63a6
commit a25cc4e8af
3 changed files with 22 additions and 82 deletions

View File

@ -1,7 +1,7 @@
"""Paragraph index processor."""
import uuid
from typing import Optional
from typing import Any, Mapping, Optional
from core.rag.cleaner.clean_processor import CleanProcessor
from core.rag.datasource.keyword.keyword_factory import Keyword
@ -125,3 +125,8 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
doc = Document(page_content=result.page_content, metadata=metadata)
docs.append(doc)
return docs
def index(self, dataset: Dataset, document: Document, chunks: list[Mapping[str, Any]]):
for chunk in chunks:
GeneralDocument.create(
pass

View File

@ -43,7 +43,7 @@ from extensions.ext_redis import redis_client
from libs.json_in_md_parser import parse_and_check_json_markdown
from models.dataset import Dataset, DatasetMetadata, Document, RateLimitLog
from models.workflow import WorkflowNodeExecutionStatus
from services.dataset_service import DatasetService
from services.dataset_service import DatasetService, DocumentService
from services.feature_service import FeatureService
from .entities import KnowledgeIndexNodeData, KnowledgeRetrievalNodeData, ModelConfig
@ -139,14 +139,20 @@ class KnowledgeIndexNode(LLMNode):
)
def _invoke_knowledge_index(self, node_data: KnowledgeIndexNodeData, chunks: list[any]) -> Any:
def _invoke_knowledge_index(self, node_data: KnowledgeIndexNodeData, document_id: str, chunks: list[any]) -> Any:
dataset = Dataset.query.filter_by(id=node_data.dataset_id).first()
if not dataset:
raise KnowledgeIndexNodeError(f"Dataset {node_data.dataset_id} not found.")
DatasetService.invoke_knowledge_index(
document = Document.query.filter_by(id=document_id).first()
if not document:
raise KnowledgeIndexNodeError(f"Document {document_id} not found.")
DocumentService.invoke_knowledge_index(
dataset=dataset,
document=document,
chunks=chunks,
chunk_structure=node_data.chunk_structure,
index_method=node_data.index_method,
retrieval_setting=node_data.retrieval_setting,
)