use model_validate (#26182)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
Asuka Minato
2025-10-10 17:30:13 +09:00
committed by GitHub
parent aead192743
commit ab2eacb6c1
70 changed files with 260 additions and 241 deletions

View File

@ -38,11 +38,11 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
raise ValueError("No process rule found.")
if process_rule.get("mode") == "automatic":
automatic_rule = DatasetProcessRule.AUTOMATIC_RULES
rules = Rule(**automatic_rule)
rules = Rule.model_validate(automatic_rule)
else:
if not process_rule.get("rules"):
raise ValueError("No rules found in process rule.")
rules = Rule(**process_rule.get("rules"))
rules = Rule.model_validate(process_rule.get("rules"))
# Split the text documents into nodes.
if not rules.segmentation:
raise ValueError("No segmentation found in rules.")

View File

@ -40,7 +40,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
raise ValueError("No process rule found.")
if not process_rule.get("rules"):
raise ValueError("No rules found in process rule.")
rules = Rule(**process_rule.get("rules"))
rules = Rule.model_validate(process_rule.get("rules"))
all_documents: list[Document] = []
if rules.parent_mode == ParentMode.PARAGRAPH:
# Split the text documents into nodes.
@ -110,7 +110,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
child_documents = document.children
if child_documents:
formatted_child_documents = [
Document(**child_document.model_dump()) for child_document in child_documents
Document.model_validate(child_document.model_dump()) for child_document in child_documents
]
vector.create(formatted_child_documents)
@ -224,7 +224,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
return child_nodes
def index(self, dataset: Dataset, document: DatasetDocument, chunks: Any):
parent_childs = ParentChildStructureChunk(**chunks)
parent_childs = ParentChildStructureChunk.model_validate(chunks)
documents = []
for parent_child in parent_childs.parent_child_chunks:
metadata = {
@ -274,7 +274,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
vector.create(all_child_documents)
def format_preview(self, chunks: Any) -> Mapping[str, Any]:
parent_childs = ParentChildStructureChunk(**chunks)
parent_childs = ParentChildStructureChunk.model_validate(chunks)
preview = []
for parent_child in parent_childs.parent_child_chunks:
preview.append({"content": parent_child.parent_content, "child_chunks": parent_child.child_contents})

View File

@ -47,7 +47,7 @@ class QAIndexProcessor(BaseIndexProcessor):
raise ValueError("No process rule found.")
if not process_rule.get("rules"):
raise ValueError("No rules found in process rule.")
rules = Rule(**process_rule.get("rules"))
rules = Rule.model_validate(process_rule.get("rules"))
splitter = self._get_splitter(
processing_rule_mode=process_rule.get("mode"),
max_tokens=rules.segmentation.max_tokens if rules.segmentation else 0,
@ -168,7 +168,7 @@ class QAIndexProcessor(BaseIndexProcessor):
return docs
def index(self, dataset: Dataset, document: DatasetDocument, chunks: Any):
qa_chunks = QAStructureChunk(**chunks)
qa_chunks = QAStructureChunk.model_validate(chunks)
documents = []
for qa_chunk in qa_chunks.qa_chunks:
metadata = {
@ -191,7 +191,7 @@ class QAIndexProcessor(BaseIndexProcessor):
raise ValueError("Indexing technique must be high quality.")
def format_preview(self, chunks: Any) -> Mapping[str, Any]:
qa_chunks = QAStructureChunk(**chunks)
qa_chunks = QAStructureChunk.model_validate(chunks)
preview = []
for qa_chunk in qa_chunks.qa_chunks:
preview.append({"question": qa_chunk.question, "answer": qa_chunk.answer})