Merge remote-tracking branch 'origin/main' into feat/support-agent-sandbox

2026-05-01 07:58:02 +08:00 · 2026-03-25 11:50:33 +08:00
parent 79e59d8957 d87263f7c3
commit 21bdb6da47
82 changed files with 1074 additions and 1135 deletions
--- a/api/tests/unit_tests/core/rag/indexing/processor/test_paragraph_index_processor.py
+++ b/api/tests/unit_tests/core/rag/indexing/processor/test_paragraph_index_processor.py
@ -4,6 +4,7 @@ from unittest.mock import Mock, patch
 import pytest

 from core.entities.knowledge_entities import PreviewDetail
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
 from core.rag.models.document import AttachmentDocument, Document
 from dify_graph.model_runtime.entities.llm_entities import LLMResult, LLMUsage
@ -21,7 +22,7 @@ class TestParagraphIndexProcessor:
        dataset = Mock()
        dataset.id = "dataset-1"
        dataset.tenant_id = "tenant-1"
-        dataset.indexing_technique = "high_quality"
+        dataset.indexing_technique = IndexTechniqueType.HIGH_QUALITY
        dataset.is_multimodal = True
        return dataset

@ -167,7 +168,7 @@ class TestParagraphIndexProcessor:
    def test_load_uses_keyword_add_texts_with_keywords_when_economy(
        self, processor: ParagraphIndexProcessor, dataset: Mock
    ) -> None:
-        dataset.indexing_technique = "economy"
+        dataset.indexing_technique = IndexTechniqueType.ECONOMY
        docs = [Document(page_content="chunk", metadata={})]

        with patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls:
@ -178,7 +179,7 @@ class TestParagraphIndexProcessor:
    def test_load_uses_keyword_add_texts_without_keywords_when_economy(
        self, processor: ParagraphIndexProcessor, dataset: Mock
    ) -> None:
-        dataset.indexing_technique = "economy"
+        dataset.indexing_technique = IndexTechniqueType.ECONOMY
        docs = [Document(page_content="chunk", metadata={})]

        with patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls:
@ -208,7 +209,7 @@ class TestParagraphIndexProcessor:
    def test_clean_economy_deletes_summaries_and_keywords(
        self, processor: ParagraphIndexProcessor, dataset: Mock
    ) -> None:
-        dataset.indexing_technique = "economy"
+        dataset.indexing_technique = IndexTechniqueType.ECONOMY

        with (
            patch(
@ -222,7 +223,7 @@ class TestParagraphIndexProcessor:
        mock_keyword_cls.return_value.delete.assert_called_once()

    def test_clean_deletes_keywords_by_ids(self, processor: ParagraphIndexProcessor, dataset: Mock) -> None:
-        dataset.indexing_technique = "economy"
+        dataset.indexing_technique = IndexTechniqueType.ECONOMY
        with patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls:
            processor.clean(dataset, ["node-2"], with_keywords=True)

@ -267,7 +268,7 @@ class TestParagraphIndexProcessor:
    def test_index_list_chunks_economy(
        self, processor: ParagraphIndexProcessor, dataset: Mock, dataset_document: Mock
    ) -> None:
-        dataset.indexing_technique = "economy"
+        dataset.indexing_technique = IndexTechniqueType.ECONOMY
        with (
            patch(
                "core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash",
--- a/api/tests/unit_tests/core/rag/indexing/processor/test_parent_child_index_processor.py
+++ b/api/tests/unit_tests/core/rag/indexing/processor/test_parent_child_index_processor.py
@ -4,6 +4,7 @@ from unittest.mock import MagicMock, Mock, patch
 import pytest

 from core.entities.knowledge_entities import PreviewDetail
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from core.rag.index_processor.processor.parent_child_index_processor import ParentChildIndexProcessor
 from core.rag.models.document import AttachmentDocument, ChildDocument, Document
 from services.entities.knowledge_entities.knowledge_entities import ParentMode
@ -19,7 +20,7 @@ class TestParentChildIndexProcessor:
        dataset = Mock()
        dataset.id = "dataset-1"
        dataset.tenant_id = "tenant-1"
-        dataset.indexing_technique = "high_quality"
+        dataset.indexing_technique = IndexTechniqueType.HIGH_QUALITY
        dataset.is_multimodal = True
        return dataset

--- a/api/tests/unit_tests/core/rag/indexing/processor/test_qa_index_processor.py
+++ b/api/tests/unit_tests/core/rag/indexing/processor/test_qa_index_processor.py
@ -6,6 +6,7 @@ import pytest
 from werkzeug.datastructures import FileStorage

 from core.entities.knowledge_entities import PreviewDetail
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from core.rag.index_processor.processor.qa_index_processor import QAIndexProcessor
 from core.rag.models.document import AttachmentDocument, Document

@ -33,7 +34,7 @@ class TestQAIndexProcessor:
        dataset = Mock()
        dataset.id = "dataset-1"
        dataset.tenant_id = "tenant-1"
-        dataset.indexing_technique = "high_quality"
+        dataset.indexing_technique = IndexTechniqueType.HIGH_QUALITY
        dataset.is_multimodal = True
        return dataset

@ -207,7 +208,7 @@ class TestQAIndexProcessor:
        vector.create_multimodal.assert_called_once_with(multimodal_docs)

    def test_load_skips_vector_for_non_high_quality(self, processor: QAIndexProcessor, dataset: Mock) -> None:
-        dataset.indexing_technique = "economy"
+        dataset.indexing_technique = IndexTechniqueType.ECONOMY
        docs = [Document(page_content="Q1", metadata={"answer": "A1"})]

        with patch("core.rag.index_processor.processor.qa_index_processor.Vector") as mock_vector_cls:
@ -298,7 +299,7 @@ class TestQAIndexProcessor:
    def test_index_requires_high_quality(
        self, processor: QAIndexProcessor, dataset: Mock, dataset_document: Mock
    ) -> None:
-        dataset.indexing_technique = "economy"
+        dataset.indexing_technique = IndexTechniqueType.ECONOMY
        qa_chunks = SimpleNamespace(qa_chunks=[SimpleNamespace(question="Q1", answer="A1")])

        with (
--- a/api/tests/unit_tests/core/rag/indexing/test_indexing_runner.py
+++ b/api/tests/unit_tests/core/rag/indexing/test_indexing_runner.py
@ -61,7 +61,7 @@ from core.indexing_runner import (
    DocumentIsPausedError,
    IndexingRunner,
 )
-from core.rag.index_processor.constant.index_type import IndexStructureType
+from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
 from core.rag.models.document import ChildDocument, Document
 from dify_graph.model_runtime.entities.model_entities import ModelType
 from libs.datetime_utils import naive_utc_now
@ -76,7 +76,7 @@ from models.dataset import Document as DatasetDocument
 def create_mock_dataset(
    dataset_id: str | None = None,
    tenant_id: str | None = None,
-    indexing_technique: str = "high_quality",
+    indexing_technique: str = IndexTechniqueType.HIGH_QUALITY,
    embedding_provider: str = "openai",
    embedding_model: str = "text-embedding-ada-002",
 ) -> Mock:
@ -458,7 +458,7 @@ class TestIndexingRunnerTransform:
        dataset = Mock(spec=Dataset)
        dataset.id = str(uuid.uuid4())
        dataset.tenant_id = str(uuid.uuid4())
-        dataset.indexing_technique = "high_quality"
+        dataset.indexing_technique = IndexTechniqueType.HIGH_QUALITY
        dataset.embedding_model_provider = "openai"
        dataset.embedding_model = "text-embedding-ada-002"
        return dataset
@ -521,7 +521,7 @@ class TestIndexingRunnerTransform:
        """Test transformation with economy indexing (no embeddings)."""
        # Arrange
        runner = IndexingRunner()
-        sample_dataset.indexing_technique = "economy"
+        sample_dataset.indexing_technique = IndexTechniqueType.ECONOMY

        mock_processor = MagicMock()
        transformed_docs = [
@ -605,7 +605,7 @@ class TestIndexingRunnerLoad:
        dataset = Mock(spec=Dataset)
        dataset.id = str(uuid.uuid4())
        dataset.tenant_id = str(uuid.uuid4())
-        dataset.indexing_technique = "high_quality"
+        dataset.indexing_technique = IndexTechniqueType.HIGH_QUALITY
        dataset.embedding_model_provider = "openai"
        dataset.embedding_model = "text-embedding-ada-002"
        return dataset
@ -674,7 +674,7 @@ class TestIndexingRunnerLoad:
        """Test loading with economy indexing (keyword only)."""
        # Arrange
        runner = IndexingRunner()
-        sample_dataset.indexing_technique = "economy"
+        sample_dataset.indexing_technique = IndexTechniqueType.ECONOMY

        mock_processor = MagicMock()

@ -701,7 +701,7 @@ class TestIndexingRunnerLoad:
        # Arrange
        runner = IndexingRunner()
        sample_dataset_document.doc_form = IndexStructureType.PARENT_CHILD_INDEX
-        sample_dataset.indexing_technique = "high_quality"
+        sample_dataset.indexing_technique = IndexTechniqueType.HIGH_QUALITY

        # Add child documents
        for doc in sample_documents:
@ -795,7 +795,7 @@ class TestIndexingRunnerRun:
        mock_dataset = Mock(spec=Dataset)
        mock_dataset.id = doc.dataset_id
        mock_dataset.tenant_id = doc.tenant_id
-        mock_dataset.indexing_technique = "economy"
+        mock_dataset.indexing_technique = IndexTechniqueType.ECONOMY
        mock_dependencies["db"].session.query.return_value.filter_by.return_value.first.return_value = mock_dataset

        mock_process_rule = Mock(spec=DatasetProcessRule)
@ -949,7 +949,7 @@ class TestIndexingRunnerRun:
        mock_dependencies["db"].session.get.side_effect = get_side_effect

        mock_dataset = Mock(spec=Dataset)
-        mock_dataset.indexing_technique = "economy"
+        mock_dataset.indexing_technique = IndexTechniqueType.ECONOMY
        mock_dependencies["db"].session.query.return_value.filter_by.return_value.first.return_value = mock_dataset

        mock_process_rule = Mock(spec=DatasetProcessRule)
--- a/api/tests/unit_tests/core/workflow/nodes/knowledge_index/test_knowledge_index_node.py
+++ b/api/tests/unit_tests/core/workflow/nodes/knowledge_index/test_knowledge_index_node.py
@ -5,6 +5,7 @@ from unittest.mock import Mock
 import pytest

 from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom
+from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from core.workflow.nodes.knowledge_index.entities import KnowledgeIndexNodeData
 from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError
 from core.workflow.nodes.knowledge_index.knowledge_index_node import KnowledgeIndexNode
@ -78,7 +79,7 @@ def sample_node_data():
        type="knowledge-index",
        chunk_structure="general_structure",
        index_chunk_variable_selector=["start", "chunks"],
-        indexing_technique="high_quality",
+        indexing_technique=IndexTechniqueType.HIGH_QUALITY,
        summary_index_setting=None,
    )