This commit is contained in:
jyong
2025-05-27 00:01:23 +08:00
parent 83ca7f8deb
commit 5fc2bc58a9
10 changed files with 301 additions and 147 deletions

View File

@ -13,7 +13,7 @@ from core.rag.splitter.fixed_text_splitter import (
FixedRecursiveCharacterTextSplitter,
)
from core.rag.splitter.text_splitter import TextSplitter
from models.dataset import Dataset, DatasetProcessRule
from models.dataset import Dataset, Document as DatasetDocument, DatasetProcessRule
class BaseIndexProcessor(ABC):
@ -35,7 +35,7 @@ class BaseIndexProcessor(ABC):
raise NotImplementedError
@abstractmethod
def index(self, dataset: Dataset, document: Document, chunks: Mapping[str, Any]):
def index(self, dataset: Dataset, document: DatasetDocument, chunks: Mapping[str, Any]):
raise NotImplementedError
@abstractmethod

View File

@ -15,7 +15,7 @@ from core.rag.index_processor.index_processor_base import BaseIndexProcessor
from core.rag.models.document import Document, GeneralStructureChunk
from core.tools.utils.text_processing_utils import remove_leading_symbols
from libs import helper
from models.dataset import Dataset, DatasetProcessRule
from models.dataset import Dataset, Document as DatasetDocument, DatasetProcessRule
from services.entities.knowledge_entities.knowledge_entities import Rule
@ -128,7 +128,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
docs.append(doc)
return docs
def index(self, dataset: Dataset, document: Document, chunks: Mapping[str, Any]):
def index(self, dataset: Dataset, document: DatasetDocument, chunks: Mapping[str, Any]):
paragraph = GeneralStructureChunk(**chunks)
documents = []
for content in paragraph.general_chunk:

View File

@ -16,7 +16,7 @@ from core.rag.index_processor.index_processor_base import BaseIndexProcessor
from core.rag.models.document import ChildDocument, Document, ParentChildStructureChunk
from extensions.ext_database import db
from libs import helper
from models.dataset import ChildChunk, Dataset, DocumentSegment
from models.dataset import ChildChunk, Dataset, Document as DatasetDocument, DocumentSegment
from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule
@ -205,7 +205,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
child_nodes.append(child_document)
return child_nodes
def index(self, dataset: Dataset, document: Document, chunks: Mapping[str, Any]):
def index(self, dataset: Dataset, document: DatasetDocument, chunks: Mapping[str, Any]):
parent_childs = ParentChildStructureChunk(**chunks)
documents = []
for parent_child in parent_childs.parent_child_chunks: