Merge remote-tracking branch 'origin/main' into feat/support-agent-sandbox

This commit is contained in:
yyh
2026-03-25 11:50:33 +08:00
82 changed files with 1074 additions and 1135 deletions

View File

@ -4,6 +4,7 @@ from unittest.mock import Mock, patch
import pytest
from core.entities.knowledge_entities import PreviewDetail
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
from core.rag.models.document import AttachmentDocument, Document
from dify_graph.model_runtime.entities.llm_entities import LLMResult, LLMUsage
@ -21,7 +22,7 @@ class TestParagraphIndexProcessor:
dataset = Mock()
dataset.id = "dataset-1"
dataset.tenant_id = "tenant-1"
dataset.indexing_technique = "high_quality"
dataset.indexing_technique = IndexTechniqueType.HIGH_QUALITY
dataset.is_multimodal = True
return dataset
@ -167,7 +168,7 @@ class TestParagraphIndexProcessor:
def test_load_uses_keyword_add_texts_with_keywords_when_economy(
self, processor: ParagraphIndexProcessor, dataset: Mock
) -> None:
dataset.indexing_technique = "economy"
dataset.indexing_technique = IndexTechniqueType.ECONOMY
docs = [Document(page_content="chunk", metadata={})]
with patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls:
@ -178,7 +179,7 @@ class TestParagraphIndexProcessor:
def test_load_uses_keyword_add_texts_without_keywords_when_economy(
self, processor: ParagraphIndexProcessor, dataset: Mock
) -> None:
dataset.indexing_technique = "economy"
dataset.indexing_technique = IndexTechniqueType.ECONOMY
docs = [Document(page_content="chunk", metadata={})]
with patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls:
@ -208,7 +209,7 @@ class TestParagraphIndexProcessor:
def test_clean_economy_deletes_summaries_and_keywords(
self, processor: ParagraphIndexProcessor, dataset: Mock
) -> None:
dataset.indexing_technique = "economy"
dataset.indexing_technique = IndexTechniqueType.ECONOMY
with (
patch(
@ -222,7 +223,7 @@ class TestParagraphIndexProcessor:
mock_keyword_cls.return_value.delete.assert_called_once()
def test_clean_deletes_keywords_by_ids(self, processor: ParagraphIndexProcessor, dataset: Mock) -> None:
dataset.indexing_technique = "economy"
dataset.indexing_technique = IndexTechniqueType.ECONOMY
with patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls:
processor.clean(dataset, ["node-2"], with_keywords=True)
@ -267,7 +268,7 @@ class TestParagraphIndexProcessor:
def test_index_list_chunks_economy(
self, processor: ParagraphIndexProcessor, dataset: Mock, dataset_document: Mock
) -> None:
dataset.indexing_technique = "economy"
dataset.indexing_technique = IndexTechniqueType.ECONOMY
with (
patch(
"core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash",

View File

@ -4,6 +4,7 @@ from unittest.mock import MagicMock, Mock, patch
import pytest
from core.entities.knowledge_entities import PreviewDetail
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.rag.index_processor.processor.parent_child_index_processor import ParentChildIndexProcessor
from core.rag.models.document import AttachmentDocument, ChildDocument, Document
from services.entities.knowledge_entities.knowledge_entities import ParentMode
@ -19,7 +20,7 @@ class TestParentChildIndexProcessor:
dataset = Mock()
dataset.id = "dataset-1"
dataset.tenant_id = "tenant-1"
dataset.indexing_technique = "high_quality"
dataset.indexing_technique = IndexTechniqueType.HIGH_QUALITY
dataset.is_multimodal = True
return dataset

View File

@ -6,6 +6,7 @@ import pytest
from werkzeug.datastructures import FileStorage
from core.entities.knowledge_entities import PreviewDetail
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.rag.index_processor.processor.qa_index_processor import QAIndexProcessor
from core.rag.models.document import AttachmentDocument, Document
@ -33,7 +34,7 @@ class TestQAIndexProcessor:
dataset = Mock()
dataset.id = "dataset-1"
dataset.tenant_id = "tenant-1"
dataset.indexing_technique = "high_quality"
dataset.indexing_technique = IndexTechniqueType.HIGH_QUALITY
dataset.is_multimodal = True
return dataset
@ -207,7 +208,7 @@ class TestQAIndexProcessor:
vector.create_multimodal.assert_called_once_with(multimodal_docs)
def test_load_skips_vector_for_non_high_quality(self, processor: QAIndexProcessor, dataset: Mock) -> None:
dataset.indexing_technique = "economy"
dataset.indexing_technique = IndexTechniqueType.ECONOMY
docs = [Document(page_content="Q1", metadata={"answer": "A1"})]
with patch("core.rag.index_processor.processor.qa_index_processor.Vector") as mock_vector_cls:
@ -298,7 +299,7 @@ class TestQAIndexProcessor:
def test_index_requires_high_quality(
self, processor: QAIndexProcessor, dataset: Mock, dataset_document: Mock
) -> None:
dataset.indexing_technique = "economy"
dataset.indexing_technique = IndexTechniqueType.ECONOMY
qa_chunks = SimpleNamespace(qa_chunks=[SimpleNamespace(question="Q1", answer="A1")])
with (

View File

@ -61,7 +61,7 @@ from core.indexing_runner import (
DocumentIsPausedError,
IndexingRunner,
)
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
from core.rag.models.document import ChildDocument, Document
from dify_graph.model_runtime.entities.model_entities import ModelType
from libs.datetime_utils import naive_utc_now
@ -76,7 +76,7 @@ from models.dataset import Document as DatasetDocument
def create_mock_dataset(
dataset_id: str | None = None,
tenant_id: str | None = None,
indexing_technique: str = "high_quality",
indexing_technique: str = IndexTechniqueType.HIGH_QUALITY,
embedding_provider: str = "openai",
embedding_model: str = "text-embedding-ada-002",
) -> Mock:
@ -458,7 +458,7 @@ class TestIndexingRunnerTransform:
dataset = Mock(spec=Dataset)
dataset.id = str(uuid.uuid4())
dataset.tenant_id = str(uuid.uuid4())
dataset.indexing_technique = "high_quality"
dataset.indexing_technique = IndexTechniqueType.HIGH_QUALITY
dataset.embedding_model_provider = "openai"
dataset.embedding_model = "text-embedding-ada-002"
return dataset
@ -521,7 +521,7 @@ class TestIndexingRunnerTransform:
"""Test transformation with economy indexing (no embeddings)."""
# Arrange
runner = IndexingRunner()
sample_dataset.indexing_technique = "economy"
sample_dataset.indexing_technique = IndexTechniqueType.ECONOMY
mock_processor = MagicMock()
transformed_docs = [
@ -605,7 +605,7 @@ class TestIndexingRunnerLoad:
dataset = Mock(spec=Dataset)
dataset.id = str(uuid.uuid4())
dataset.tenant_id = str(uuid.uuid4())
dataset.indexing_technique = "high_quality"
dataset.indexing_technique = IndexTechniqueType.HIGH_QUALITY
dataset.embedding_model_provider = "openai"
dataset.embedding_model = "text-embedding-ada-002"
return dataset
@ -674,7 +674,7 @@ class TestIndexingRunnerLoad:
"""Test loading with economy indexing (keyword only)."""
# Arrange
runner = IndexingRunner()
sample_dataset.indexing_technique = "economy"
sample_dataset.indexing_technique = IndexTechniqueType.ECONOMY
mock_processor = MagicMock()
@ -701,7 +701,7 @@ class TestIndexingRunnerLoad:
# Arrange
runner = IndexingRunner()
sample_dataset_document.doc_form = IndexStructureType.PARENT_CHILD_INDEX
sample_dataset.indexing_technique = "high_quality"
sample_dataset.indexing_technique = IndexTechniqueType.HIGH_QUALITY
# Add child documents
for doc in sample_documents:
@ -795,7 +795,7 @@ class TestIndexingRunnerRun:
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = doc.dataset_id
mock_dataset.tenant_id = doc.tenant_id
mock_dataset.indexing_technique = "economy"
mock_dataset.indexing_technique = IndexTechniqueType.ECONOMY
mock_dependencies["db"].session.query.return_value.filter_by.return_value.first.return_value = mock_dataset
mock_process_rule = Mock(spec=DatasetProcessRule)
@ -949,7 +949,7 @@ class TestIndexingRunnerRun:
mock_dependencies["db"].session.get.side_effect = get_side_effect
mock_dataset = Mock(spec=Dataset)
mock_dataset.indexing_technique = "economy"
mock_dataset.indexing_technique = IndexTechniqueType.ECONOMY
mock_dependencies["db"].session.query.return_value.filter_by.return_value.first.return_value = mock_dataset
mock_process_rule = Mock(spec=DatasetProcessRule)

View File

@ -5,6 +5,7 @@ from unittest.mock import Mock
import pytest
from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.workflow.nodes.knowledge_index.entities import KnowledgeIndexNodeData
from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError
from core.workflow.nodes.knowledge_index.knowledge_index_node import KnowledgeIndexNode
@ -78,7 +79,7 @@ def sample_node_data():
type="knowledge-index",
chunk_structure="general_structure",
index_chunk_variable_selector=["start", "chunks"],
indexing_technique="high_quality",
indexing_technique=IndexTechniqueType.HIGH_QUALITY,
summary_index_setting=None,
)