refactor: select in console datasets document controller (#34029)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
tmimmanuel
2026-03-25 04:47:25 +01:00
committed by GitHub
parent 4c32acf857
commit d87263f7c3
55 changed files with 233 additions and 195 deletions

View File

@ -6,6 +6,7 @@ from typing import Any
from sqlalchemy import func, select
from core.model_manager import ModelManager
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.rag.models.document import AttachmentDocument, Document
from dify_graph.model_runtime.entities.model_entities import ModelType
from extensions.ext_database import db
@ -71,7 +72,7 @@ class DatasetDocumentStore:
if max_position is None:
max_position = 0
embedding_model = None
if self._dataset.indexing_technique == "high_quality":
if self._dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
model_manager = ModelManager()
embedding_model = model_manager.get_model_instance(
tenant_id=self._dataset.tenant_id,

View File

@ -9,6 +9,7 @@ from flask import current_app
from sqlalchemy import delete, func, select
from core.db.session_factory import session_factory
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.rag.index_processor.index_processor_base import SummaryIndexSettingDict
from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError
from core.workflow.nodes.knowledge_index.protocols import Preview, PreviewItem, QaPreview
@ -159,7 +160,7 @@ class IndexProcessor:
tenant_id = dataset.tenant_id
preview_output = self.format_preview(chunk_structure, chunks)
if indexing_technique != "high_quality":
if indexing_technique != IndexTechniqueType.HIGH_QUALITY:
return preview_output
if not summary_index_setting or not summary_index_setting.get("enable"):

View File

@ -22,7 +22,7 @@ from core.rag.docstore.dataset_docstore import DatasetDocumentStore
from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.extractor.extract_processor import ExtractProcessor
from core.rag.index_processor.constant.doc_type import DocType
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from core.rag.index_processor.index_processor_base import BaseIndexProcessor, SummaryIndexSettingDict
from core.rag.models.document import AttachmentDocument, Document, MultimodalGeneralStructureChunk
from core.rag.retrieval.retrieval_methods import RetrievalMethod
@ -117,7 +117,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
with_keywords: bool = True,
**kwargs,
) -> None:
if dataset.indexing_technique == "high_quality":
if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
vector = Vector(dataset)
vector.create(documents)
if multimodal_documents and dataset.is_multimodal:
@ -155,7 +155,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
# Delete all summaries for the dataset
SummaryIndexService.delete_summaries_for_segments(dataset, None)
if dataset.indexing_technique == "high_quality":
if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
vector = Vector(dataset)
if node_ids:
vector.delete_by_ids(node_ids)
@ -253,12 +253,12 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
doc_store = DatasetDocumentStore(dataset=dataset, user_id=document.created_by, document_id=document.id)
# add document segments
doc_store.add_documents(docs=documents, save_child=False)
if dataset.indexing_technique == "high_quality":
if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
vector = Vector(dataset)
vector.create(documents)
if all_multimodal_documents and dataset.is_multimodal:
vector.create_multimodal(all_multimodal_documents)
elif dataset.indexing_technique == "economy":
elif dataset.indexing_technique == IndexTechniqueType.ECONOMY:
keyword = Keyword(dataset)
keyword.add_texts(documents)

View File

@ -18,7 +18,7 @@ from core.rag.docstore.dataset_docstore import DatasetDocumentStore
from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.extractor.extract_processor import ExtractProcessor
from core.rag.index_processor.constant.doc_type import DocType
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from core.rag.index_processor.index_processor_base import BaseIndexProcessor, SummaryIndexSettingDict
from core.rag.models.document import AttachmentDocument, ChildDocument, Document, ParentChildStructureChunk
from core.rag.retrieval.retrieval_methods import RetrievalMethod
@ -128,7 +128,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
with_keywords: bool = True,
**kwargs,
) -> None:
if dataset.indexing_technique == "high_quality":
if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
vector = Vector(dataset)
for document in documents:
child_documents = document.children
@ -166,7 +166,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
# Delete all summaries for the dataset
SummaryIndexService.delete_summaries_for_segments(dataset, None)
if dataset.indexing_technique == "high_quality":
if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
delete_child_chunks = kwargs.get("delete_child_chunks") or False
precomputed_child_node_ids = kwargs.get("precomputed_child_node_ids")
vector = Vector(dataset)
@ -332,7 +332,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
doc_store = DatasetDocumentStore(dataset=dataset, user_id=document.created_by, document_id=document.id)
# add document segments
doc_store.add_documents(docs=documents, save_child=True)
if dataset.indexing_technique == "high_quality":
if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
all_child_documents = []
all_multimodal_documents = []
for doc in documents:

View File

@ -21,7 +21,7 @@ from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.extractor.extract_processor import ExtractProcessor
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from core.rag.index_processor.index_processor_base import BaseIndexProcessor, SummaryIndexSettingDict
from core.rag.models.document import AttachmentDocument, Document, QAStructureChunk
from core.rag.retrieval.retrieval_methods import RetrievalMethod
@ -141,7 +141,7 @@ class QAIndexProcessor(BaseIndexProcessor):
with_keywords: bool = True,
**kwargs,
) -> None:
if dataset.indexing_technique == "high_quality":
if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
vector = Vector(dataset)
vector.create(documents)
if multimodal_documents and dataset.is_multimodal:
@ -224,7 +224,7 @@ class QAIndexProcessor(BaseIndexProcessor):
# save node to document segment
doc_store = DatasetDocumentStore(dataset=dataset, user_id=document.created_by, document_id=document.id)
doc_store.add_documents(docs=documents, save_child=False)
if dataset.indexing_technique == "high_quality":
if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
vector = Vector(dataset)
vector.create(documents)
else:

View File

@ -675,7 +675,7 @@ class DatasetRetrieval:
# get top k
top_k = retrieval_model_config["top_k"]
# get retrieval method
if selected_dataset.indexing_technique == "economy":
if selected_dataset.indexing_technique == IndexTechniqueType.ECONOMY:
retrieval_method = RetrievalMethod.KEYWORD_SEARCH
else:
retrieval_method = retrieval_model_config["search_method"]
@ -752,7 +752,7 @@ class DatasetRetrieval:
"The configured knowledge base list have different indexing technique, please set reranking model."
)
index_type = available_datasets[0].indexing_technique
if index_type == "high_quality":
if index_type == IndexTechniqueType.HIGH_QUALITY:
embedding_model_check = all(
item.embedding_model == available_datasets[0].embedding_model for item in available_datasets
)
@ -1068,7 +1068,7 @@ class DatasetRetrieval:
else default_retrieval_model
)
if dataset.indexing_technique == "economy":
if dataset.indexing_technique == IndexTechniqueType.ECONOMY:
# use keyword table query
documents = RetrievalService.retrieve(
retrieval_method=RetrievalMethod.KEYWORD_SEARCH,

View File

@ -2,6 +2,7 @@ import concurrent.futures
import logging
from core.db.session_factory import session_factory
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.rag.index_processor.index_processor_base import SummaryIndexSettingDict
from models.dataset import Dataset, Document, DocumentSegment, DocumentSegmentSummary
from services.summary_index_service import SummaryIndexService
@ -21,7 +22,7 @@ class SummaryIndex:
if is_preview:
with session_factory.create_session() as session:
dataset = session.query(Dataset).filter_by(id=dataset_id).first()
if not dataset or dataset.indexing_technique != "high_quality":
if not dataset or dataset.indexing_technique != IndexTechniqueType.HIGH_QUALITY:
return
if summary_index_setting is None: