use model_validate (#26182)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
2026-05-05 18:08:07 +08:00 · 2025-10-10 17:30:13 +09:00
parent aead192743
commit ab2eacb6c1
70 changed files with 260 additions and 241 deletions
--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@ -20,7 +20,7 @@ from core.rag.cleaner.clean_processor import CleanProcessor
 from core.rag.datasource.keyword.keyword_factory import Keyword
 from core.rag.docstore.dataset_docstore import DatasetDocumentStore
 from core.rag.extractor.entity.datasource_type import DatasourceType
-from core.rag.extractor.entity.extract_setting import ExtractSetting
+from core.rag.extractor.entity.extract_setting import ExtractSetting, NotionInfo, WebsiteInfo
 from core.rag.index_processor.constant.index_type import IndexType
 from core.rag.index_processor.index_processor_base import BaseIndexProcessor
 from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
@ -357,14 +357,16 @@ class IndexingRunner:
                raise ValueError("no notion import info found")
            extract_setting = ExtractSetting(
                datasource_type=DatasourceType.NOTION.value,
-                notion_info={
-                    "credential_id": data_source_info["credential_id"],
-                    "notion_workspace_id": data_source_info["notion_workspace_id"],
-                    "notion_obj_id": data_source_info["notion_page_id"],
-                    "notion_page_type": data_source_info["type"],
-                    "document": dataset_document,
-                    "tenant_id": dataset_document.tenant_id,
-                },
+                notion_info=NotionInfo.model_validate(
+                    {
+                        "credential_id": data_source_info["credential_id"],
+                        "notion_workspace_id": data_source_info["notion_workspace_id"],
+                        "notion_obj_id": data_source_info["notion_page_id"],
+                        "notion_page_type": data_source_info["type"],
+                        "document": dataset_document,
+                        "tenant_id": dataset_document.tenant_id,
+                    }
+                ),
                document_model=dataset_document.doc_form,
            )
            text_docs = index_processor.extract(extract_setting, process_rule_mode=process_rule["mode"])
@ -378,14 +380,16 @@ class IndexingRunner:
                raise ValueError("no website import info found")
            extract_setting = ExtractSetting(
                datasource_type=DatasourceType.WEBSITE.value,
-                website_info={
-                    "provider": data_source_info["provider"],
-                    "job_id": data_source_info["job_id"],
-                    "tenant_id": dataset_document.tenant_id,
-                    "url": data_source_info["url"],
-                    "mode": data_source_info["mode"],
-                    "only_main_content": data_source_info["only_main_content"],
-                },
+                website_info=WebsiteInfo.model_validate(
+                    {
+                        "provider": data_source_info["provider"],
+                        "job_id": data_source_info["job_id"],
+                        "tenant_id": dataset_document.tenant_id,
+                        "url": data_source_info["url"],
+                        "mode": data_source_info["mode"],
+                        "only_main_content": data_source_info["only_main_content"],
+                    }
+                ),
                document_model=dataset_document.doc_form,
            )
            text_docs = index_processor.extract(extract_setting, process_rule_mode=process_rule["mode"])