refactor: select in console datasets document controller (#34029)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-05-01 07:58:02 +08:00 · 2026-03-25 04:47:25 +01:00
parent 4c32acf857
commit d87263f7c3
55 changed files with 233 additions and 195 deletions
--- a/api/core/rag/docstore/dataset_docstore.py
+++ b/api/core/rag/docstore/dataset_docstore.py
@ -6,6 +6,7 @@ from typing import Any
 from sqlalchemy import func, select

 from core.model_manager import ModelManager
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from core.rag.models.document import AttachmentDocument, Document
 from dify_graph.model_runtime.entities.model_entities import ModelType
 from extensions.ext_database import db
@ -71,7 +72,7 @@ class DatasetDocumentStore:
        if max_position is None:
            max_position = 0
        embedding_model = None
-        if self._dataset.indexing_technique == "high_quality":
+        if self._dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            model_manager = ModelManager()
            embedding_model = model_manager.get_model_instance(
                tenant_id=self._dataset.tenant_id,
--- a/api/core/rag/index_processor/index_processor.py
+++ b/api/core/rag/index_processor/index_processor.py
@ -9,6 +9,7 @@ from flask import current_app
 from sqlalchemy import delete, func, select

 from core.db.session_factory import session_factory
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from core.rag.index_processor.index_processor_base import SummaryIndexSettingDict
 from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError
 from core.workflow.nodes.knowledge_index.protocols import Preview, PreviewItem, QaPreview
@ -159,7 +160,7 @@ class IndexProcessor:
            tenant_id = dataset.tenant_id

        preview_output = self.format_preview(chunk_structure, chunks)
-        if indexing_technique != "high_quality":
+        if indexing_technique != IndexTechniqueType.HIGH_QUALITY:
            return preview_output

        if not summary_index_setting or not summary_index_setting.get("enable"):
--- a/api/core/rag/index_processor/processor/paragraph_index_processor.py
+++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py
@ -22,7 +22,7 @@ from core.rag.docstore.dataset_docstore import DatasetDocumentStore
 from core.rag.extractor.entity.extract_setting import ExtractSetting
 from core.rag.extractor.extract_processor import ExtractProcessor
 from core.rag.index_processor.constant.doc_type import DocType
-from core.rag.index_processor.constant.index_type import IndexStructureType
+from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
 from core.rag.index_processor.index_processor_base import BaseIndexProcessor, SummaryIndexSettingDict
 from core.rag.models.document import AttachmentDocument, Document, MultimodalGeneralStructureChunk
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
@ -117,7 +117,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
        with_keywords: bool = True,
        **kwargs,
    ) -> None:
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            vector = Vector(dataset)
            vector.create(documents)
            if multimodal_documents and dataset.is_multimodal:
@ -155,7 +155,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
                # Delete all summaries for the dataset
                SummaryIndexService.delete_summaries_for_segments(dataset, None)

-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            vector = Vector(dataset)
            if node_ids:
                vector.delete_by_ids(node_ids)
@ -253,12 +253,12 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
            doc_store = DatasetDocumentStore(dataset=dataset, user_id=document.created_by, document_id=document.id)
            # add document segments
            doc_store.add_documents(docs=documents, save_child=False)
-            if dataset.indexing_technique == "high_quality":
+            if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
                vector = Vector(dataset)
                vector.create(documents)
                if all_multimodal_documents and dataset.is_multimodal:
                    vector.create_multimodal(all_multimodal_documents)
-            elif dataset.indexing_technique == "economy":
+            elif dataset.indexing_technique == IndexTechniqueType.ECONOMY:
                keyword = Keyword(dataset)
                keyword.add_texts(documents)

--- a/api/core/rag/index_processor/processor/parent_child_index_processor.py
+++ b/api/core/rag/index_processor/processor/parent_child_index_processor.py
@ -18,7 +18,7 @@ from core.rag.docstore.dataset_docstore import DatasetDocumentStore
 from core.rag.extractor.entity.extract_setting import ExtractSetting
 from core.rag.extractor.extract_processor import ExtractProcessor
 from core.rag.index_processor.constant.doc_type import DocType
-from core.rag.index_processor.constant.index_type import IndexStructureType
+from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
 from core.rag.index_processor.index_processor_base import BaseIndexProcessor, SummaryIndexSettingDict
 from core.rag.models.document import AttachmentDocument, ChildDocument, Document, ParentChildStructureChunk
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
@ -128,7 +128,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
        with_keywords: bool = True,
        **kwargs,
    ) -> None:
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            vector = Vector(dataset)
            for document in documents:
                child_documents = document.children
@ -166,7 +166,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
                # Delete all summaries for the dataset
                SummaryIndexService.delete_summaries_for_segments(dataset, None)

-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            delete_child_chunks = kwargs.get("delete_child_chunks") or False
            precomputed_child_node_ids = kwargs.get("precomputed_child_node_ids")
            vector = Vector(dataset)
@ -332,7 +332,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
            doc_store = DatasetDocumentStore(dataset=dataset, user_id=document.created_by, document_id=document.id)
            # add document segments
            doc_store.add_documents(docs=documents, save_child=True)
-            if dataset.indexing_technique == "high_quality":
+            if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
                all_child_documents = []
                all_multimodal_documents = []
                for doc in documents:
--- a/api/core/rag/index_processor/processor/qa_index_processor.py
+++ b/api/core/rag/index_processor/processor/qa_index_processor.py
@ -21,7 +21,7 @@ from core.rag.datasource.vdb.vector_factory import Vector
 from core.rag.docstore.dataset_docstore import DatasetDocumentStore
 from core.rag.extractor.entity.extract_setting import ExtractSetting
 from core.rag.extractor.extract_processor import ExtractProcessor
-from core.rag.index_processor.constant.index_type import IndexStructureType
+from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
 from core.rag.index_processor.index_processor_base import BaseIndexProcessor, SummaryIndexSettingDict
 from core.rag.models.document import AttachmentDocument, Document, QAStructureChunk
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
@ -141,7 +141,7 @@ class QAIndexProcessor(BaseIndexProcessor):
        with_keywords: bool = True,
        **kwargs,
    ) -> None:
-        if dataset.indexing_technique == "high_quality":
+        if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
            vector = Vector(dataset)
            vector.create(documents)
            if multimodal_documents and dataset.is_multimodal:
@ -224,7 +224,7 @@ class QAIndexProcessor(BaseIndexProcessor):
            # save node to document segment
            doc_store = DatasetDocumentStore(dataset=dataset, user_id=document.created_by, document_id=document.id)
            doc_store.add_documents(docs=documents, save_child=False)
-            if dataset.indexing_technique == "high_quality":
+            if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
                vector = Vector(dataset)
                vector.create(documents)
            else:
--- a/api/core/rag/retrieval/dataset_retrieval.py
+++ b/api/core/rag/retrieval/dataset_retrieval.py
@ -675,7 +675,7 @@ class DatasetRetrieval:
                    # get top k
                    top_k = retrieval_model_config["top_k"]
                    # get retrieval method
-                    if selected_dataset.indexing_technique == "economy":
+                    if selected_dataset.indexing_technique == IndexTechniqueType.ECONOMY:
                        retrieval_method = RetrievalMethod.KEYWORD_SEARCH
                    else:
                        retrieval_method = retrieval_model_config["search_method"]
@ -752,7 +752,7 @@ class DatasetRetrieval:
                "The configured knowledge base list have different indexing technique, please set reranking model."
            )
        index_type = available_datasets[0].indexing_technique
-        if index_type == "high_quality":
+        if index_type == IndexTechniqueType.HIGH_QUALITY:
            embedding_model_check = all(
                item.embedding_model == available_datasets[0].embedding_model for item in available_datasets
            )
@ -1068,7 +1068,7 @@ class DatasetRetrieval:
                    else default_retrieval_model
                )

-                if dataset.indexing_technique == "economy":
+                if dataset.indexing_technique == IndexTechniqueType.ECONOMY:
                    # use keyword table query
                    documents = RetrievalService.retrieve(
                        retrieval_method=RetrievalMethod.KEYWORD_SEARCH,
--- a/api/core/rag/summary_index/summary_index.py
+++ b/api/core/rag/summary_index/summary_index.py
@ -2,6 +2,7 @@ import concurrent.futures
 import logging

 from core.db.session_factory import session_factory
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from core.rag.index_processor.index_processor_base import SummaryIndexSettingDict
 from models.dataset import Dataset, Document, DocumentSegment, DocumentSegmentSummary
 from services.summary_index_service import SummaryIndexService
@ -21,7 +22,7 @@ class SummaryIndex:
        if is_preview:
            with session_factory.create_session() as session:
                dataset = session.query(Dataset).filter_by(id=dataset_id).first()
-                if not dataset or dataset.indexing_technique != "high_quality":
+                if not dataset or dataset.indexing_technique != IndexTechniqueType.HIGH_QUALITY:
                    return

                if summary_index_setting is None: