mirror of
https://github.com/langgenius/dify.git
synced 2026-05-04 17:38:04 +08:00
refactor: use EnumText for ApiToolProvider.schema_type_str and Docume… (#33983)
This commit is contained in:
@ -111,6 +111,7 @@ from unittest.mock import Mock, patch
|
||||
import pytest
|
||||
|
||||
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||
from dify_graph.model_runtime.entities.model_entities import ModelType
|
||||
from models.dataset import Dataset, DatasetProcessRule, Document
|
||||
from services.dataset_service import DatasetService, DocumentService
|
||||
@ -188,7 +189,7 @@ class DocumentValidationTestDataFactory:
|
||||
def create_knowledge_config_mock(
|
||||
data_source: DataSource | None = None,
|
||||
process_rule: ProcessRule | None = None,
|
||||
doc_form: str = "text_model",
|
||||
doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
|
||||
indexing_technique: str = "high_quality",
|
||||
**kwargs,
|
||||
) -> Mock:
|
||||
@ -326,8 +327,8 @@ class TestDatasetServiceCheckDocForm:
|
||||
- Validation logic works correctly
|
||||
"""
|
||||
# Arrange
|
||||
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form="text_model")
|
||||
doc_form = "text_model"
|
||||
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
|
||||
doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||
|
||||
# Act (should not raise)
|
||||
DatasetService.check_doc_form(dataset, doc_form)
|
||||
@ -349,7 +350,7 @@ class TestDatasetServiceCheckDocForm:
|
||||
"""
|
||||
# Arrange
|
||||
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form=None)
|
||||
doc_form = "text_model"
|
||||
doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||
|
||||
# Act (should not raise)
|
||||
DatasetService.check_doc_form(dataset, doc_form)
|
||||
@ -370,8 +371,8 @@ class TestDatasetServiceCheckDocForm:
|
||||
- Error type is correct
|
||||
"""
|
||||
# Arrange
|
||||
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form="text_model")
|
||||
doc_form = "table_model" # Different form
|
||||
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
|
||||
doc_form = IndexStructureType.PARENT_CHILD_INDEX # Different form
|
||||
|
||||
# Act & Assert
|
||||
with pytest.raises(ValueError, match="doc_form is different from the dataset doc_form"):
|
||||
@ -390,7 +391,7 @@ class TestDatasetServiceCheckDocForm:
|
||||
"""
|
||||
# Arrange
|
||||
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form="knowledge_card")
|
||||
doc_form = "text_model" # Different form
|
||||
doc_form = IndexStructureType.PARAGRAPH_INDEX # Different form
|
||||
|
||||
# Act & Assert
|
||||
with pytest.raises(ValueError, match="doc_form is different from the dataset doc_form"):
|
||||
|
||||
@ -2,6 +2,7 @@ from unittest.mock import MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||
from models.account import Account
|
||||
from models.dataset import ChildChunk, Dataset, Document, DocumentSegment
|
||||
from models.enums import SegmentType
|
||||
@ -91,7 +92,7 @@ class SegmentTestDataFactory:
|
||||
document_id: str = "doc-123",
|
||||
dataset_id: str = "dataset-123",
|
||||
tenant_id: str = "tenant-123",
|
||||
doc_form: str = "text_model",
|
||||
doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
|
||||
word_count: int = 100,
|
||||
**kwargs,
|
||||
) -> Mock:
|
||||
@ -210,7 +211,7 @@ class TestSegmentServiceCreateSegment:
|
||||
def test_create_segment_with_qa_model(self, mock_db_session, mock_current_user):
|
||||
"""Test creation of segment with QA model (requires answer)."""
|
||||
# Arrange
|
||||
document = SegmentTestDataFactory.create_document_mock(doc_form="qa_model", word_count=100)
|
||||
document = SegmentTestDataFactory.create_document_mock(doc_form=IndexStructureType.QA_INDEX, word_count=100)
|
||||
dataset = SegmentTestDataFactory.create_dataset_mock(indexing_technique="economy")
|
||||
args = {"content": "What is AI?", "answer": "AI is Artificial Intelligence", "keywords": ["ai"]}
|
||||
|
||||
@ -429,7 +430,7 @@ class TestSegmentServiceUpdateSegment:
|
||||
"""Test update segment with QA model (includes answer)."""
|
||||
# Arrange
|
||||
segment = SegmentTestDataFactory.create_segment_mock(enabled=True, word_count=10)
|
||||
document = SegmentTestDataFactory.create_document_mock(doc_form="qa_model", word_count=100)
|
||||
document = SegmentTestDataFactory.create_document_mock(doc_form=IndexStructureType.QA_INDEX, word_count=100)
|
||||
dataset = SegmentTestDataFactory.create_dataset_mock(indexing_technique="economy")
|
||||
args = SegmentUpdateArgs(content="Updated question", answer="Updated answer", keywords=["qa"])
|
||||
|
||||
|
||||
@ -4,6 +4,7 @@ from unittest.mock import Mock, create_autospec
|
||||
import pytest
|
||||
from redis.exceptions import LockNotOwnedError
|
||||
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||
from models.account import Account
|
||||
from models.dataset import Dataset, Document
|
||||
from services.dataset_service import DocumentService, SegmentService
|
||||
@ -76,7 +77,7 @@ def test_save_document_with_dataset_id_ignores_lock_not_owned(
|
||||
info_list = types.SimpleNamespace(data_source_type="upload_file")
|
||||
data_source = types.SimpleNamespace(info_list=info_list)
|
||||
knowledge_config = types.SimpleNamespace(
|
||||
doc_form="qa_model",
|
||||
doc_form=IndexStructureType.QA_INDEX,
|
||||
original_document_id=None, # go into "new document" branch
|
||||
data_source=data_source,
|
||||
indexing_technique="high_quality",
|
||||
@ -131,7 +132,7 @@ def test_add_segment_ignores_lock_not_owned(
|
||||
document.id = "doc-1"
|
||||
document.dataset_id = dataset.id
|
||||
document.word_count = 0
|
||||
document.doc_form = "qa_model"
|
||||
document.doc_form = IndexStructureType.QA_INDEX
|
||||
|
||||
# Minimal args required by add_segment
|
||||
args = {
|
||||
@ -174,4 +175,4 @@ def test_multi_create_segment_ignores_lock_not_owned(
|
||||
document.id = "doc-1"
|
||||
document.dataset_id = dataset.id
|
||||
document.word_count = 0
|
||||
document.doc_form = "qa_model"
|
||||
document.doc_form = IndexStructureType.QA_INDEX
|
||||
|
||||
@ -11,6 +11,7 @@ from unittest.mock import MagicMock
|
||||
import pytest
|
||||
|
||||
import services.summary_index_service as summary_module
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||
from models.enums import SegmentStatus, SummaryStatus
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
@ -48,7 +49,7 @@ def _segment(*, has_document: bool = True) -> MagicMock:
|
||||
if has_document:
|
||||
doc = MagicMock(name="document")
|
||||
doc.doc_language = "en"
|
||||
doc.doc_form = "text_model"
|
||||
doc.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||
segment.document = doc
|
||||
else:
|
||||
segment.document = None
|
||||
@ -623,13 +624,13 @@ def test_generate_summaries_for_document_skip_conditions(monkeypatch: pytest.Mon
|
||||
dataset = _dataset(indexing_technique="economy")
|
||||
document = MagicMock(spec=summary_module.DatasetDocument)
|
||||
document.id = "doc-1"
|
||||
document.doc_form = "text_model"
|
||||
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||
assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) == []
|
||||
|
||||
dataset = _dataset()
|
||||
assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": False}) == []
|
||||
|
||||
document.doc_form = "qa_model"
|
||||
document.doc_form = IndexStructureType.QA_INDEX
|
||||
assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) == []
|
||||
|
||||
|
||||
@ -637,7 +638,7 @@ def test_generate_summaries_for_document_runs_and_handles_errors(monkeypatch: py
|
||||
dataset = _dataset()
|
||||
document = MagicMock(spec=summary_module.DatasetDocument)
|
||||
document.id = "doc-1"
|
||||
document.doc_form = "text_model"
|
||||
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||
|
||||
seg1 = _segment()
|
||||
seg2 = _segment()
|
||||
@ -673,7 +674,7 @@ def test_generate_summaries_for_document_no_segments_returns_empty(monkeypatch:
|
||||
dataset = _dataset()
|
||||
document = MagicMock(spec=summary_module.DatasetDocument)
|
||||
document.id = "doc-1"
|
||||
document.doc_form = "text_model"
|
||||
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||
|
||||
session = MagicMock()
|
||||
query = MagicMock()
|
||||
@ -696,7 +697,7 @@ def test_generate_summaries_for_document_applies_segment_ids_and_only_parent_chu
|
||||
dataset = _dataset()
|
||||
document = MagicMock(spec=summary_module.DatasetDocument)
|
||||
document.id = "doc-1"
|
||||
document.doc_form = "text_model"
|
||||
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||
seg = _segment()
|
||||
|
||||
session = MagicMock()
|
||||
@ -935,7 +936,7 @@ def test_update_summary_for_segment_skip_conditions() -> None:
|
||||
SummaryIndexService.update_summary_for_segment(_segment(), _dataset(indexing_technique="economy"), "x") is None
|
||||
)
|
||||
seg = _segment(has_document=True)
|
||||
seg.document.doc_form = "qa_model"
|
||||
seg.document.doc_form = IndexStructureType.QA_INDEX
|
||||
assert SummaryIndexService.update_summary_for_segment(seg, _dataset(), "x") is None
|
||||
|
||||
|
||||
|
||||
@ -9,6 +9,7 @@ from unittest.mock import MagicMock
|
||||
import pytest
|
||||
|
||||
import services.vector_service as vector_service_module
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||
from services.vector_service import VectorService
|
||||
|
||||
|
||||
@ -32,7 +33,7 @@ class _ParentDocStub:
|
||||
def _make_dataset(
|
||||
*,
|
||||
indexing_technique: str = "high_quality",
|
||||
doc_form: str = "text_model",
|
||||
doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
|
||||
tenant_id: str = "tenant-1",
|
||||
dataset_id: str = "dataset-1",
|
||||
is_multimodal: bool = False,
|
||||
@ -106,7 +107,7 @@ def test_create_segments_vector_regular_indexing_loads_documents_and_keywords(mo
|
||||
factory_instance.init_index_processor.return_value = index_processor
|
||||
monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance))
|
||||
|
||||
VectorService.create_segments_vector([["k1"]], [segment], dataset, "text_model")
|
||||
VectorService.create_segments_vector([["k1"]], [segment], dataset, IndexStructureType.PARAGRAPH_INDEX)
|
||||
|
||||
index_processor.load.assert_called_once()
|
||||
args, kwargs = index_processor.load.call_args
|
||||
@ -131,7 +132,7 @@ def test_create_segments_vector_regular_indexing_loads_multimodal_documents(monk
|
||||
factory_instance.init_index_processor.return_value = index_processor
|
||||
monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance))
|
||||
|
||||
VectorService.create_segments_vector([["k1"]], [segment], dataset, "text_model")
|
||||
VectorService.create_segments_vector([["k1"]], [segment], dataset, IndexStructureType.PARAGRAPH_INDEX)
|
||||
|
||||
assert index_processor.load.call_count == 2
|
||||
first_args, first_kwargs = index_processor.load.call_args_list[0]
|
||||
@ -153,7 +154,7 @@ def test_create_segments_vector_with_no_segments_does_not_load(monkeypatch: pyte
|
||||
factory_instance.init_index_processor.return_value = index_processor
|
||||
monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance))
|
||||
|
||||
VectorService.create_segments_vector(None, [], dataset, "text_model")
|
||||
VectorService.create_segments_vector(None, [], dataset, IndexStructureType.PARAGRAPH_INDEX)
|
||||
index_processor.load.assert_not_called()
|
||||
|
||||
|
||||
@ -392,7 +393,7 @@ def test_update_segment_vector_economy_uses_keyword_without_keywords_list(monkey
|
||||
|
||||
|
||||
def test_generate_child_chunks_regenerate_cleans_then_saves_children(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
dataset = _make_dataset(doc_form="text_model", tenant_id="tenant-1", dataset_id="dataset-1")
|
||||
dataset = _make_dataset(doc_form=IndexStructureType.PARAGRAPH_INDEX, tenant_id="tenant-1", dataset_id="dataset-1")
|
||||
segment = _make_segment(segment_id="seg-1")
|
||||
|
||||
dataset_document = MagicMock()
|
||||
@ -439,7 +440,7 @@ def test_generate_child_chunks_regenerate_cleans_then_saves_children(monkeypatch
|
||||
|
||||
|
||||
def test_generate_child_chunks_commits_even_when_no_children(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
dataset = _make_dataset(doc_form="text_model")
|
||||
dataset = _make_dataset(doc_form=IndexStructureType.PARAGRAPH_INDEX)
|
||||
segment = _make_segment()
|
||||
dataset_document = MagicMock()
|
||||
dataset_document.doc_language = "en"
|
||||
|
||||
@ -121,6 +121,7 @@ import pytest
|
||||
from core.rag.datasource.vdb.vector_base import BaseVector
|
||||
from core.rag.datasource.vdb.vector_factory import Vector
|
||||
from core.rag.datasource.vdb.vector_type import VectorType
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||
from core.rag.models.document import Document
|
||||
from models.dataset import ChildChunk, Dataset, DatasetDocument, DatasetProcessRule, DocumentSegment
|
||||
from services.vector_service import VectorService
|
||||
@ -151,7 +152,7 @@ class VectorServiceTestDataFactory:
|
||||
def create_dataset_mock(
|
||||
dataset_id: str = "dataset-123",
|
||||
tenant_id: str = "tenant-123",
|
||||
doc_form: str = "text_model",
|
||||
doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
|
||||
indexing_technique: str = "high_quality",
|
||||
embedding_model_provider: str = "openai",
|
||||
embedding_model: str = "text-embedding-ada-002",
|
||||
@ -493,7 +494,7 @@ class TestVectorService:
|
||||
"""
|
||||
# Arrange
|
||||
dataset = VectorServiceTestDataFactory.create_dataset_mock(
|
||||
doc_form="text_model", indexing_technique="high_quality"
|
||||
doc_form=IndexStructureType.PARAGRAPH_INDEX, indexing_technique="high_quality"
|
||||
)
|
||||
|
||||
segment = VectorServiceTestDataFactory.create_document_segment_mock()
|
||||
@ -505,7 +506,7 @@ class TestVectorService:
|
||||
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_index_processor
|
||||
|
||||
# Act
|
||||
VectorService.create_segments_vector(keywords_list, [segment], dataset, "text_model")
|
||||
VectorService.create_segments_vector(keywords_list, [segment], dataset, IndexStructureType.PARAGRAPH_INDEX)
|
||||
|
||||
# Assert
|
||||
mock_index_processor.load.assert_called_once()
|
||||
@ -649,7 +650,7 @@ class TestVectorService:
|
||||
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_index_processor
|
||||
|
||||
# Act
|
||||
VectorService.create_segments_vector(None, [], dataset, "text_model")
|
||||
VectorService.create_segments_vector(None, [], dataset, IndexStructureType.PARAGRAPH_INDEX)
|
||||
|
||||
# Assert
|
||||
mock_index_processor.load.assert_not_called()
|
||||
|
||||
Reference in New Issue
Block a user