refactor: select in console datasets document controller (#34029)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
tmimmanuel
2026-03-25 04:47:25 +01:00
committed by GitHub
parent 4c32acf857
commit d87263f7c3
55 changed files with 233 additions and 195 deletions

View File

@ -22,6 +22,7 @@ from sqlalchemy.orm import Session
from core.helper import ssrf_proxy
from core.helper.name_generator import generate_incremental_name
from core.plugin.entities.plugin import PluginDependency
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.workflow.nodes.datasource.entities import DatasourceNodeData
from core.workflow.nodes.knowledge_index import KNOWLEDGE_INDEX_NODE_TYPE
from core.workflow.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData
@ -311,13 +312,13 @@ class RagPipelineDslService:
"icon_background": icon_background,
"icon_url": icon_url,
},
indexing_technique=knowledge_configuration.indexing_technique,
indexing_technique=IndexTechniqueType(knowledge_configuration.indexing_technique),
created_by=account.id,
retrieval_model=knowledge_configuration.retrieval_model.model_dump(),
runtime_mode=DatasetRuntimeMode.RAG_PIPELINE,
chunk_structure=knowledge_configuration.chunk_structure,
)
if knowledge_configuration.indexing_technique == "high_quality":
if knowledge_configuration.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
dataset_collection_binding = (
self._session.query(DatasetCollectionBinding)
.where(
@ -343,7 +344,7 @@ class RagPipelineDslService:
dataset.collection_binding_id = dataset_collection_binding_id
dataset.embedding_model = knowledge_configuration.embedding_model
dataset.embedding_model_provider = knowledge_configuration.embedding_model_provider
elif knowledge_configuration.indexing_technique == "economy":
elif knowledge_configuration.indexing_technique == IndexTechniqueType.ECONOMY:
dataset.keyword_number = knowledge_configuration.keyword_number
# Update summary_index_setting if provided
if knowledge_configuration.summary_index_setting is not None:
@ -443,18 +444,18 @@ class RagPipelineDslService:
"icon_background": icon_background,
"icon_url": icon_url,
},
indexing_technique=knowledge_configuration.indexing_technique,
indexing_technique=IndexTechniqueType(knowledge_configuration.indexing_technique),
created_by=account.id,
retrieval_model=knowledge_configuration.retrieval_model.model_dump(),
runtime_mode=DatasetRuntimeMode.RAG_PIPELINE,
chunk_structure=knowledge_configuration.chunk_structure,
)
else:
dataset.indexing_technique = knowledge_configuration.indexing_technique
dataset.indexing_technique = IndexTechniqueType(knowledge_configuration.indexing_technique)
dataset.retrieval_model = knowledge_configuration.retrieval_model.model_dump()
dataset.runtime_mode = DatasetRuntimeMode.RAG_PIPELINE
dataset.chunk_structure = knowledge_configuration.chunk_structure
if knowledge_configuration.indexing_technique == "high_quality":
if knowledge_configuration.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
dataset_collection_binding = (
self._session.query(DatasetCollectionBinding)
.where(
@ -480,7 +481,7 @@ class RagPipelineDslService:
dataset.collection_binding_id = dataset_collection_binding_id
dataset.embedding_model = knowledge_configuration.embedding_model
dataset.embedding_model_provider = knowledge_configuration.embedding_model_provider
elif knowledge_configuration.indexing_technique == "economy":
elif knowledge_configuration.indexing_technique == IndexTechniqueType.ECONOMY:
dataset.keyword_number = knowledge_configuration.keyword_number
# Update summary_index_setting if provided
if knowledge_configuration.summary_index_setting is not None:
@ -772,7 +773,7 @@ class RagPipelineDslService:
)
case _ if typ == KNOWLEDGE_INDEX_NODE_TYPE:
knowledge_index_entity = KnowledgeConfiguration.model_validate(node["data"])
if knowledge_index_entity.indexing_technique == "high_quality":
if knowledge_index_entity.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
if knowledge_index_entity.embedding_model_provider:
dependencies.append(
DependenciesAnalysisService.analyze_model_provider_dependency(

View File

@ -9,7 +9,7 @@ from flask_login import current_user
from constants import DOCUMENT_EXTENSIONS
from core.plugin.impl.plugin import PluginInstaller
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from extensions.ext_database import db
from factories import variable_factory
@ -105,29 +105,29 @@ class RagPipelineTransformService:
if doc_form == IndexStructureType.PARAGRAPH_INDEX:
match datasource_type:
case DataSourceType.UPLOAD_FILE:
if indexing_technique == "high_quality":
if indexing_technique == IndexTechniqueType.HIGH_QUALITY:
# get graph from transform.file-general-high-quality.yml
with open(f"{Path(__file__).parent}/transform/file-general-high-quality.yml") as f:
pipeline_yaml = yaml.safe_load(f)
if indexing_technique == "economy":
if indexing_technique == IndexTechniqueType.ECONOMY:
# get graph from transform.file-general-economy.yml
with open(f"{Path(__file__).parent}/transform/file-general-economy.yml") as f:
pipeline_yaml = yaml.safe_load(f)
case DataSourceType.NOTION_IMPORT:
if indexing_technique == "high_quality":
if indexing_technique == IndexTechniqueType.HIGH_QUALITY:
# get graph from transform.notion-general-high-quality.yml
with open(f"{Path(__file__).parent}/transform/notion-general-high-quality.yml") as f:
pipeline_yaml = yaml.safe_load(f)
if indexing_technique == "economy":
if indexing_technique == IndexTechniqueType.ECONOMY:
# get graph from transform.notion-general-economy.yml
with open(f"{Path(__file__).parent}/transform/notion-general-economy.yml") as f:
pipeline_yaml = yaml.safe_load(f)
case DataSourceType.WEBSITE_CRAWL:
if indexing_technique == "high_quality":
if indexing_technique == IndexTechniqueType.HIGH_QUALITY:
# get graph from transform.website-crawl-general-high-quality.yml
with open(f"{Path(__file__).parent}/transform/website-crawl-general-high-quality.yml") as f:
pipeline_yaml = yaml.safe_load(f)
if indexing_technique == "economy":
if indexing_technique == IndexTechniqueType.ECONOMY:
# get graph from transform.website-crawl-general-economy.yml
with open(f"{Path(__file__).parent}/transform/website-crawl-general-economy.yml") as f:
pipeline_yaml = yaml.safe_load(f)
@ -170,11 +170,11 @@ class RagPipelineTransformService:
):
knowledge_configuration_dict = node.get("data", {})
if indexing_technique == "high_quality":
if indexing_technique == IndexTechniqueType.HIGH_QUALITY:
knowledge_configuration.embedding_model = dataset.embedding_model
knowledge_configuration.embedding_model_provider = dataset.embedding_model_provider
if retrieval_model:
if indexing_technique == "economy":
if indexing_technique == IndexTechniqueType.ECONOMY:
retrieval_model.search_method = RetrievalMethod.KEYWORD_SEARCH
knowledge_configuration.retrieval_model = retrieval_model
else: