mirror of
https://github.com/langgenius/dify.git
synced 2026-05-06 02:18:08 +08:00
Merge remote-tracking branch 'origin/main' into feat/trigger
This commit is contained in:
@ -11,8 +11,8 @@ from controllers.console.app import completion as completion_api
|
||||
from controllers.console.app import message as message_api
|
||||
from controllers.console.app import wraps
|
||||
from libs.datetime_utils import naive_utc_now
|
||||
from models import Account, App, Tenant
|
||||
from models.account import TenantAccountRole
|
||||
from models import App, Tenant
|
||||
from models.account import Account, TenantAccountJoin, TenantAccountRole
|
||||
from models.model import AppMode
|
||||
from services.app_generate_service import AppGenerateService
|
||||
|
||||
@ -31,9 +31,8 @@ class TestChatMessageApiPermissions:
|
||||
return app
|
||||
|
||||
@pytest.fixture
|
||||
def mock_account(self):
|
||||
def mock_account(self, monkeypatch: pytest.MonkeyPatch):
|
||||
"""Create a mock Account for testing."""
|
||||
|
||||
account = Account()
|
||||
account.id = str(uuid.uuid4())
|
||||
account.name = "Test User"
|
||||
@ -42,12 +41,24 @@ class TestChatMessageApiPermissions:
|
||||
account.created_at = naive_utc_now()
|
||||
account.updated_at = naive_utc_now()
|
||||
|
||||
# Create mock tenant
|
||||
tenant = Tenant()
|
||||
tenant.id = str(uuid.uuid4())
|
||||
tenant.name = "Test Tenant"
|
||||
|
||||
account._current_tenant = tenant
|
||||
mock_session_instance = mock.Mock()
|
||||
|
||||
mock_tenant_join = TenantAccountJoin(role=TenantAccountRole.OWNER)
|
||||
monkeypatch.setattr(mock_session_instance, "scalar", mock.Mock(return_value=mock_tenant_join))
|
||||
|
||||
mock_scalars_result = mock.Mock()
|
||||
mock_scalars_result.one.return_value = tenant
|
||||
monkeypatch.setattr(mock_session_instance, "scalars", mock.Mock(return_value=mock_scalars_result))
|
||||
|
||||
mock_session_context = mock.Mock()
|
||||
mock_session_context.__enter__.return_value = mock_session_instance
|
||||
monkeypatch.setattr("models.account.Session", lambda _, expire_on_commit: mock_session_context)
|
||||
|
||||
account.current_tenant = tenant
|
||||
return account
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
||||
@ -18,124 +18,87 @@ class TestAppDescriptionValidationUnit:
|
||||
"""Unit tests for description validation function"""
|
||||
|
||||
def test_validate_description_length_function(self):
|
||||
"""Test the _validate_description_length function directly"""
|
||||
from controllers.console.app.app import _validate_description_length
|
||||
"""Test the validate_description_length function directly"""
|
||||
from libs.validators import validate_description_length
|
||||
|
||||
# Test valid descriptions
|
||||
assert _validate_description_length("") == ""
|
||||
assert _validate_description_length("x" * 400) == "x" * 400
|
||||
assert _validate_description_length(None) is None
|
||||
assert validate_description_length("") == ""
|
||||
assert validate_description_length("x" * 400) == "x" * 400
|
||||
assert validate_description_length(None) is None
|
||||
|
||||
# Test invalid descriptions
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
_validate_description_length("x" * 401)
|
||||
validate_description_length("x" * 401)
|
||||
assert "Description cannot exceed 400 characters." in str(exc_info.value)
|
||||
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
_validate_description_length("x" * 500)
|
||||
validate_description_length("x" * 500)
|
||||
assert "Description cannot exceed 400 characters." in str(exc_info.value)
|
||||
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
_validate_description_length("x" * 1000)
|
||||
validate_description_length("x" * 1000)
|
||||
assert "Description cannot exceed 400 characters." in str(exc_info.value)
|
||||
|
||||
def test_validation_consistency_with_dataset(self):
|
||||
"""Test that App and Dataset validation functions are consistent"""
|
||||
from controllers.console.app.app import _validate_description_length as app_validate
|
||||
from controllers.console.datasets.datasets import _validate_description_length as dataset_validate
|
||||
from controllers.service_api.dataset.dataset import _validate_description_length as service_dataset_validate
|
||||
|
||||
# Test same valid inputs
|
||||
valid_desc = "x" * 400
|
||||
assert app_validate(valid_desc) == dataset_validate(valid_desc) == service_dataset_validate(valid_desc)
|
||||
assert app_validate("") == dataset_validate("") == service_dataset_validate("")
|
||||
assert app_validate(None) == dataset_validate(None) == service_dataset_validate(None)
|
||||
|
||||
# Test same invalid inputs produce same error
|
||||
invalid_desc = "x" * 401
|
||||
|
||||
app_error = None
|
||||
dataset_error = None
|
||||
service_dataset_error = None
|
||||
|
||||
try:
|
||||
app_validate(invalid_desc)
|
||||
except ValueError as e:
|
||||
app_error = str(e)
|
||||
|
||||
try:
|
||||
dataset_validate(invalid_desc)
|
||||
except ValueError as e:
|
||||
dataset_error = str(e)
|
||||
|
||||
try:
|
||||
service_dataset_validate(invalid_desc)
|
||||
except ValueError as e:
|
||||
service_dataset_error = str(e)
|
||||
|
||||
assert app_error == dataset_error == service_dataset_error
|
||||
assert app_error == "Description cannot exceed 400 characters."
|
||||
|
||||
def test_boundary_values(self):
|
||||
"""Test boundary values for description validation"""
|
||||
from controllers.console.app.app import _validate_description_length
|
||||
from libs.validators import validate_description_length
|
||||
|
||||
# Test exact boundary
|
||||
exactly_400 = "x" * 400
|
||||
assert _validate_description_length(exactly_400) == exactly_400
|
||||
assert validate_description_length(exactly_400) == exactly_400
|
||||
|
||||
# Test just over boundary
|
||||
just_over_400 = "x" * 401
|
||||
with pytest.raises(ValueError):
|
||||
_validate_description_length(just_over_400)
|
||||
validate_description_length(just_over_400)
|
||||
|
||||
# Test just under boundary
|
||||
just_under_400 = "x" * 399
|
||||
assert _validate_description_length(just_under_400) == just_under_400
|
||||
assert validate_description_length(just_under_400) == just_under_400
|
||||
|
||||
def test_edge_cases(self):
|
||||
"""Test edge cases for description validation"""
|
||||
from controllers.console.app.app import _validate_description_length
|
||||
from libs.validators import validate_description_length
|
||||
|
||||
# Test None input
|
||||
assert _validate_description_length(None) is None
|
||||
assert validate_description_length(None) is None
|
||||
|
||||
# Test empty string
|
||||
assert _validate_description_length("") == ""
|
||||
assert validate_description_length("") == ""
|
||||
|
||||
# Test single character
|
||||
assert _validate_description_length("a") == "a"
|
||||
assert validate_description_length("a") == "a"
|
||||
|
||||
# Test unicode characters
|
||||
unicode_desc = "测试" * 200 # 400 characters in Chinese
|
||||
assert _validate_description_length(unicode_desc) == unicode_desc
|
||||
assert validate_description_length(unicode_desc) == unicode_desc
|
||||
|
||||
# Test unicode over limit
|
||||
unicode_over = "测试" * 201 # 402 characters
|
||||
with pytest.raises(ValueError):
|
||||
_validate_description_length(unicode_over)
|
||||
validate_description_length(unicode_over)
|
||||
|
||||
def test_whitespace_handling(self):
|
||||
"""Test how validation handles whitespace"""
|
||||
from controllers.console.app.app import _validate_description_length
|
||||
from libs.validators import validate_description_length
|
||||
|
||||
# Test description with spaces
|
||||
spaces_400 = " " * 400
|
||||
assert _validate_description_length(spaces_400) == spaces_400
|
||||
assert validate_description_length(spaces_400) == spaces_400
|
||||
|
||||
# Test description with spaces over limit
|
||||
spaces_401 = " " * 401
|
||||
with pytest.raises(ValueError):
|
||||
_validate_description_length(spaces_401)
|
||||
validate_description_length(spaces_401)
|
||||
|
||||
# Test mixed content
|
||||
mixed_400 = "a" * 200 + " " * 200
|
||||
assert _validate_description_length(mixed_400) == mixed_400
|
||||
assert validate_description_length(mixed_400) == mixed_400
|
||||
|
||||
# Test mixed over limit
|
||||
mixed_401 = "a" * 200 + " " * 201
|
||||
with pytest.raises(ValueError):
|
||||
_validate_description_length(mixed_401)
|
||||
validate_description_length(mixed_401)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -9,8 +9,8 @@ from flask.testing import FlaskClient
|
||||
from controllers.console.app import model_config as model_config_api
|
||||
from controllers.console.app import wraps
|
||||
from libs.datetime_utils import naive_utc_now
|
||||
from models import Account, App, Tenant
|
||||
from models.account import TenantAccountRole
|
||||
from models import App, Tenant
|
||||
from models.account import Account, TenantAccountJoin, TenantAccountRole
|
||||
from models.model import AppMode
|
||||
from services.app_model_config_service import AppModelConfigService
|
||||
|
||||
@ -30,9 +30,8 @@ class TestModelConfigResourcePermissions:
|
||||
return app
|
||||
|
||||
@pytest.fixture
|
||||
def mock_account(self):
|
||||
def mock_account(self, monkeypatch: pytest.MonkeyPatch):
|
||||
"""Create a mock Account for testing."""
|
||||
|
||||
account = Account()
|
||||
account.id = str(uuid.uuid4())
|
||||
account.name = "Test User"
|
||||
@ -41,12 +40,24 @@ class TestModelConfigResourcePermissions:
|
||||
account.created_at = naive_utc_now()
|
||||
account.updated_at = naive_utc_now()
|
||||
|
||||
# Create mock tenant
|
||||
tenant = Tenant()
|
||||
tenant.id = str(uuid.uuid4())
|
||||
tenant.name = "Test Tenant"
|
||||
|
||||
account._current_tenant = tenant
|
||||
mock_session_instance = mock.Mock()
|
||||
|
||||
mock_tenant_join = TenantAccountJoin(role=TenantAccountRole.OWNER)
|
||||
monkeypatch.setattr(mock_session_instance, "scalar", mock.Mock(return_value=mock_tenant_join))
|
||||
|
||||
mock_scalars_result = mock.Mock()
|
||||
mock_scalars_result.one.return_value = tenant
|
||||
monkeypatch.setattr(mock_session_instance, "scalars", mock.Mock(return_value=mock_scalars_result))
|
||||
|
||||
mock_session_context = mock.Mock()
|
||||
mock_session_context.__enter__.return_value = mock_session_instance
|
||||
monkeypatch.setattr("models.account.Session", lambda _, expire_on_commit: mock_session_context)
|
||||
|
||||
account.current_tenant = tenant
|
||||
return account
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
||||
@ -0,0 +1,505 @@
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from core.rag.index_processor.constant.index_type import IndexType
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||
from models.dataset import Dataset, Document, DocumentSegment
|
||||
from tasks.enable_segments_to_index_task import enable_segments_to_index_task
|
||||
|
||||
|
||||
class TestEnableSegmentsToIndexTask:
|
||||
"""Integration tests for enable_segments_to_index_task using testcontainers."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_external_service_dependencies(self):
|
||||
"""Mock setup for external service dependencies."""
|
||||
with (
|
||||
patch("tasks.enable_segments_to_index_task.IndexProcessorFactory") as mock_index_processor_factory,
|
||||
):
|
||||
# Setup mock index processor
|
||||
mock_processor = MagicMock()
|
||||
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
yield {
|
||||
"index_processor_factory": mock_index_processor_factory,
|
||||
"index_processor": mock_processor,
|
||||
}
|
||||
|
||||
def _create_test_dataset_and_document(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Helper method to create a test dataset and document for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
mock_external_service_dependencies: Mock dependencies
|
||||
|
||||
Returns:
|
||||
tuple: (dataset, document) - Created dataset and document instances
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
status="active",
|
||||
)
|
||||
db.session.add(account)
|
||||
db.session.commit()
|
||||
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
status="normal",
|
||||
)
|
||||
db.session.add(tenant)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant-account join
|
||||
join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER.value,
|
||||
current=True,
|
||||
)
|
||||
db.session.add(join)
|
||||
db.session.commit()
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=tenant.id,
|
||||
name=fake.company(),
|
||||
description=fake.text(max_nb_chars=100),
|
||||
data_source_type="upload_file",
|
||||
indexing_technique="high_quality",
|
||||
created_by=account.id,
|
||||
)
|
||||
db.session.add(dataset)
|
||||
db.session.commit()
|
||||
|
||||
# Create document
|
||||
document = Document(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
batch="test_batch",
|
||||
name=fake.file_name(),
|
||||
created_from="upload_file",
|
||||
created_by=account.id,
|
||||
indexing_status="completed",
|
||||
enabled=True,
|
||||
doc_form=IndexType.PARAGRAPH_INDEX,
|
||||
)
|
||||
db.session.add(document)
|
||||
db.session.commit()
|
||||
|
||||
# Refresh dataset to ensure doc_form property works correctly
|
||||
db.session.refresh(dataset)
|
||||
|
||||
return dataset, document
|
||||
|
||||
def _create_test_segments(
|
||||
self, db_session_with_containers, document, dataset, count=3, enabled=False, status="completed"
|
||||
):
|
||||
"""
|
||||
Helper method to create test document segments.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
document: Document instance
|
||||
dataset: Dataset instance
|
||||
count: Number of segments to create
|
||||
enabled: Whether segments should be enabled
|
||||
status: Status of the segments
|
||||
|
||||
Returns:
|
||||
list: List of created DocumentSegment instances
|
||||
"""
|
||||
fake = Faker()
|
||||
segments = []
|
||||
|
||||
for i in range(count):
|
||||
text = fake.text(max_nb_chars=200)
|
||||
segment = DocumentSegment(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=document.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
position=i,
|
||||
content=text,
|
||||
word_count=len(text.split()),
|
||||
tokens=len(text.split()) * 2,
|
||||
index_node_id=f"node_{i}",
|
||||
index_node_hash=f"hash_{i}",
|
||||
enabled=enabled,
|
||||
status=status,
|
||||
created_by=document.created_by,
|
||||
)
|
||||
db.session.add(segment)
|
||||
segments.append(segment)
|
||||
|
||||
db.session.commit()
|
||||
return segments
|
||||
|
||||
def test_enable_segments_to_index_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful segments indexing with paragraph index type.
|
||||
|
||||
This test verifies:
|
||||
- Proper dataset and document retrieval from database
|
||||
- Correct segment processing and document creation
|
||||
- Index processor integration
|
||||
- Database state updates
|
||||
- Redis cache key deletion
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
|
||||
# Set up Redis cache keys to simulate indexing in progress
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300) # 5 minutes expiry
|
||||
|
||||
# Verify cache keys exist
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
assert redis_client.exists(indexing_cache_key) == 1
|
||||
|
||||
# Act: Execute the task
|
||||
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
# Verify index processor was called correctly
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_called_once()
|
||||
|
||||
# Verify the load method was called with correct parameters
|
||||
call_args = mock_external_service_dependencies["index_processor"].load.call_args
|
||||
assert call_args is not None
|
||||
documents = call_args[0][1] # Second argument should be documents list
|
||||
assert len(documents) == 3
|
||||
|
||||
# Verify document structure
|
||||
for i, doc in enumerate(documents):
|
||||
assert doc.page_content == segments[i].content
|
||||
assert doc.metadata["doc_id"] == segments[i].index_node_id
|
||||
assert doc.metadata["doc_hash"] == segments[i].index_node_hash
|
||||
assert doc.metadata["document_id"] == document.id
|
||||
assert doc.metadata["dataset_id"] == dataset.id
|
||||
|
||||
# Verify Redis cache keys were deleted
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_enable_segments_to_index_with_different_index_type(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test segments indexing with different index types.
|
||||
|
||||
This test verifies:
|
||||
- Proper handling of different index types
|
||||
- Index processor factory integration
|
||||
- Document processing with various configurations
|
||||
- Redis cache key deletion
|
||||
"""
|
||||
# Arrange: Create test data with different index type
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Update document to use different index type
|
||||
document.doc_form = IndexType.QA_INDEX
|
||||
db.session.commit()
|
||||
|
||||
# Refresh dataset to ensure doc_form property reflects the updated document
|
||||
db.session.refresh(dataset)
|
||||
|
||||
# Create segments
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
|
||||
# Set up Redis cache keys
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Act: Execute the task
|
||||
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert: Verify different index type handling
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.QA_INDEX)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_called_once()
|
||||
|
||||
# Verify the load method was called with correct parameters
|
||||
call_args = mock_external_service_dependencies["index_processor"].load.call_args
|
||||
assert call_args is not None
|
||||
documents = call_args[0][1] # Second argument should be documents list
|
||||
assert len(documents) == 3
|
||||
|
||||
# Verify Redis cache keys were deleted
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_enable_segments_to_index_dataset_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling of non-existent dataset.
|
||||
|
||||
This test verifies:
|
||||
- Proper error handling for missing datasets
|
||||
- Early return without processing
|
||||
- Database session cleanup
|
||||
- No unnecessary index processor calls
|
||||
"""
|
||||
# Arrange: Use non-existent dataset ID
|
||||
fake = Faker()
|
||||
non_existent_dataset_id = fake.uuid4()
|
||||
non_existent_document_id = fake.uuid4()
|
||||
segment_ids = [fake.uuid4()]
|
||||
|
||||
# Act: Execute the task with non-existent dataset
|
||||
enable_segments_to_index_task(segment_ids, non_existent_dataset_id, non_existent_document_id)
|
||||
|
||||
# Assert: Verify no processing occurred
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
|
||||
mock_external_service_dependencies["index_processor"].load.assert_not_called()
|
||||
|
||||
def test_enable_segments_to_index_document_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling of non-existent document.
|
||||
|
||||
This test verifies:
|
||||
- Proper error handling for missing documents
|
||||
- Early return without processing
|
||||
- Database session cleanup
|
||||
- No unnecessary index processor calls
|
||||
"""
|
||||
# Arrange: Create dataset but use non-existent document ID
|
||||
dataset, _ = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
fake = Faker()
|
||||
non_existent_document_id = fake.uuid4()
|
||||
segment_ids = [fake.uuid4()]
|
||||
|
||||
# Act: Execute the task with non-existent document
|
||||
enable_segments_to_index_task(segment_ids, dataset.id, non_existent_document_id)
|
||||
|
||||
# Assert: Verify no processing occurred
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
|
||||
mock_external_service_dependencies["index_processor"].load.assert_not_called()
|
||||
|
||||
def test_enable_segments_to_index_invalid_document_status(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling of document with invalid status.
|
||||
|
||||
This test verifies:
|
||||
- Early return when document is disabled, archived, or not completed
|
||||
- No index processing for documents not ready for indexing
|
||||
- Proper database session cleanup
|
||||
- No unnecessary external service calls
|
||||
"""
|
||||
# Arrange: Create test data with invalid document status
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Test different invalid statuses
|
||||
invalid_statuses = [
|
||||
("disabled", {"enabled": False}),
|
||||
("archived", {"archived": True}),
|
||||
("not_completed", {"indexing_status": "processing"}),
|
||||
]
|
||||
|
||||
for _, status_attrs in invalid_statuses:
|
||||
# Reset document status
|
||||
document.enabled = True
|
||||
document.archived = False
|
||||
document.indexing_status = "completed"
|
||||
db.session.commit()
|
||||
|
||||
# Set invalid status
|
||||
for attr, value in status_attrs.items():
|
||||
setattr(document, attr, value)
|
||||
db.session.commit()
|
||||
|
||||
# Create segments
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
|
||||
# Act: Execute the task
|
||||
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert: Verify no processing occurred
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
|
||||
mock_external_service_dependencies["index_processor"].load.assert_not_called()
|
||||
|
||||
# Clean up segments for next iteration
|
||||
for segment in segments:
|
||||
db.session.delete(segment)
|
||||
db.session.commit()
|
||||
|
||||
def test_enable_segments_to_index_segments_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling when no segments are found.
|
||||
|
||||
This test verifies:
|
||||
- Proper handling when segments don't exist
|
||||
- Early return without processing
|
||||
- Database session cleanup
|
||||
- Index processor is created but load is not called
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Use non-existent segment IDs
|
||||
fake = Faker()
|
||||
non_existent_segment_ids = [fake.uuid4() for _ in range(3)]
|
||||
|
||||
# Act: Execute the task with non-existent segments
|
||||
enable_segments_to_index_task(non_existent_segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert: Verify index processor was created but load was not called
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_not_called()
|
||||
|
||||
def test_enable_segments_to_index_with_parent_child_structure(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test segments indexing with parent-child structure.
|
||||
|
||||
This test verifies:
|
||||
- Proper handling of PARENT_CHILD_INDEX type
|
||||
- Child document creation from segments
|
||||
- Correct document structure for parent-child indexing
|
||||
- Index processor receives properly structured documents
|
||||
- Redis cache key deletion
|
||||
"""
|
||||
# Arrange: Create test data with parent-child index type
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Update document to use parent-child index type
|
||||
document.doc_form = IndexType.PARENT_CHILD_INDEX
|
||||
db.session.commit()
|
||||
|
||||
# Refresh dataset to ensure doc_form property reflects the updated document
|
||||
db.session.refresh(dataset)
|
||||
|
||||
# Create segments with mock child chunks
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
|
||||
# Set up Redis cache keys
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Mock the get_child_chunks method for each segment
|
||||
with patch.object(DocumentSegment, "get_child_chunks") as mock_get_child_chunks:
|
||||
# Setup mock to return child chunks for each segment
|
||||
mock_child_chunks = []
|
||||
for i in range(2): # Each segment has 2 child chunks
|
||||
mock_child = MagicMock()
|
||||
mock_child.content = f"child_content_{i}"
|
||||
mock_child.index_node_id = f"child_node_{i}"
|
||||
mock_child.index_node_hash = f"child_hash_{i}"
|
||||
mock_child_chunks.append(mock_child)
|
||||
|
||||
mock_get_child_chunks.return_value = mock_child_chunks
|
||||
|
||||
# Act: Execute the task
|
||||
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert: Verify parent-child index processing
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
|
||||
IndexType.PARENT_CHILD_INDEX
|
||||
)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_called_once()
|
||||
|
||||
# Verify the load method was called with correct parameters
|
||||
call_args = mock_external_service_dependencies["index_processor"].load.call_args
|
||||
assert call_args is not None
|
||||
documents = call_args[0][1] # Second argument should be documents list
|
||||
assert len(documents) == 3 # 3 segments
|
||||
|
||||
# Verify each document has children
|
||||
for doc in documents:
|
||||
assert hasattr(doc, "children")
|
||||
assert len(doc.children) == 2 # Each document has 2 children
|
||||
|
||||
# Verify Redis cache keys were deleted
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_enable_segments_to_index_general_exception_handling(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test general exception handling during indexing process.
|
||||
|
||||
This test verifies:
|
||||
- Exceptions are properly caught and handled
|
||||
- Segment status is set to error
|
||||
- Segments are disabled
|
||||
- Error information is recorded
|
||||
- Redis cache is still cleared
|
||||
- Database session is properly closed
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
|
||||
# Set up Redis cache keys
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Mock the index processor to raise an exception
|
||||
mock_external_service_dependencies["index_processor"].load.side_effect = Exception("Index processing failed")
|
||||
|
||||
# Act: Execute the task
|
||||
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert: Verify error handling
|
||||
for segment in segments:
|
||||
db.session.refresh(segment)
|
||||
assert segment.enabled is False
|
||||
assert segment.status == "error"
|
||||
assert segment.error is not None
|
||||
assert "Index processing failed" in segment.error
|
||||
assert segment.disabled_at is not None
|
||||
|
||||
# Verify Redis cache keys were still cleared despite error
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
@ -1,174 +1,53 @@
|
||||
import pytest
|
||||
|
||||
from controllers.console.app.app import _validate_description_length as app_validate
|
||||
from controllers.console.datasets.datasets import _validate_description_length as dataset_validate
|
||||
from controllers.service_api.dataset.dataset import _validate_description_length as service_dataset_validate
|
||||
from libs.validators import validate_description_length
|
||||
|
||||
|
||||
class TestDescriptionValidationUnit:
|
||||
"""Unit tests for description validation functions in App and Dataset APIs"""
|
||||
"""Unit tests for the centralized description validation function."""
|
||||
|
||||
def test_app_validate_description_length_valid(self):
|
||||
"""Test App validation function with valid descriptions"""
|
||||
def test_validate_description_length_valid(self):
|
||||
"""Test validation function with valid descriptions."""
|
||||
# Empty string should be valid
|
||||
assert app_validate("") == ""
|
||||
assert validate_description_length("") == ""
|
||||
|
||||
# None should be valid
|
||||
assert app_validate(None) is None
|
||||
assert validate_description_length(None) is None
|
||||
|
||||
# Short description should be valid
|
||||
short_desc = "Short description"
|
||||
assert app_validate(short_desc) == short_desc
|
||||
assert validate_description_length(short_desc) == short_desc
|
||||
|
||||
# Exactly 400 characters should be valid
|
||||
exactly_400 = "x" * 400
|
||||
assert app_validate(exactly_400) == exactly_400
|
||||
assert validate_description_length(exactly_400) == exactly_400
|
||||
|
||||
# Just under limit should be valid
|
||||
just_under = "x" * 399
|
||||
assert app_validate(just_under) == just_under
|
||||
assert validate_description_length(just_under) == just_under
|
||||
|
||||
def test_app_validate_description_length_invalid(self):
|
||||
"""Test App validation function with invalid descriptions"""
|
||||
def test_validate_description_length_invalid(self):
|
||||
"""Test validation function with invalid descriptions."""
|
||||
# 401 characters should fail
|
||||
just_over = "x" * 401
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
app_validate(just_over)
|
||||
validate_description_length(just_over)
|
||||
assert "Description cannot exceed 400 characters." in str(exc_info.value)
|
||||
|
||||
# 500 characters should fail
|
||||
way_over = "x" * 500
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
app_validate(way_over)
|
||||
validate_description_length(way_over)
|
||||
assert "Description cannot exceed 400 characters." in str(exc_info.value)
|
||||
|
||||
# 1000 characters should fail
|
||||
very_long = "x" * 1000
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
app_validate(very_long)
|
||||
validate_description_length(very_long)
|
||||
assert "Description cannot exceed 400 characters." in str(exc_info.value)
|
||||
|
||||
def test_dataset_validate_description_length_valid(self):
|
||||
"""Test Dataset validation function with valid descriptions"""
|
||||
# Empty string should be valid
|
||||
assert dataset_validate("") == ""
|
||||
|
||||
# Short description should be valid
|
||||
short_desc = "Short description"
|
||||
assert dataset_validate(short_desc) == short_desc
|
||||
|
||||
# Exactly 400 characters should be valid
|
||||
exactly_400 = "x" * 400
|
||||
assert dataset_validate(exactly_400) == exactly_400
|
||||
|
||||
# Just under limit should be valid
|
||||
just_under = "x" * 399
|
||||
assert dataset_validate(just_under) == just_under
|
||||
|
||||
def test_dataset_validate_description_length_invalid(self):
|
||||
"""Test Dataset validation function with invalid descriptions"""
|
||||
# 401 characters should fail
|
||||
just_over = "x" * 401
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
dataset_validate(just_over)
|
||||
assert "Description cannot exceed 400 characters." in str(exc_info.value)
|
||||
|
||||
# 500 characters should fail
|
||||
way_over = "x" * 500
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
dataset_validate(way_over)
|
||||
assert "Description cannot exceed 400 characters." in str(exc_info.value)
|
||||
|
||||
def test_service_dataset_validate_description_length_valid(self):
|
||||
"""Test Service Dataset validation function with valid descriptions"""
|
||||
# Empty string should be valid
|
||||
assert service_dataset_validate("") == ""
|
||||
|
||||
# None should be valid
|
||||
assert service_dataset_validate(None) is None
|
||||
|
||||
# Short description should be valid
|
||||
short_desc = "Short description"
|
||||
assert service_dataset_validate(short_desc) == short_desc
|
||||
|
||||
# Exactly 400 characters should be valid
|
||||
exactly_400 = "x" * 400
|
||||
assert service_dataset_validate(exactly_400) == exactly_400
|
||||
|
||||
# Just under limit should be valid
|
||||
just_under = "x" * 399
|
||||
assert service_dataset_validate(just_under) == just_under
|
||||
|
||||
def test_service_dataset_validate_description_length_invalid(self):
|
||||
"""Test Service Dataset validation function with invalid descriptions"""
|
||||
# 401 characters should fail
|
||||
just_over = "x" * 401
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
service_dataset_validate(just_over)
|
||||
assert "Description cannot exceed 400 characters." in str(exc_info.value)
|
||||
|
||||
# 500 characters should fail
|
||||
way_over = "x" * 500
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
service_dataset_validate(way_over)
|
||||
assert "Description cannot exceed 400 characters." in str(exc_info.value)
|
||||
|
||||
def test_app_dataset_validation_consistency(self):
|
||||
"""Test that App and Dataset validation functions behave identically"""
|
||||
test_cases = [
|
||||
"", # Empty string
|
||||
"Short description", # Normal description
|
||||
"x" * 100, # Medium description
|
||||
"x" * 400, # Exactly at limit
|
||||
]
|
||||
|
||||
# Test valid cases produce same results
|
||||
for test_desc in test_cases:
|
||||
assert app_validate(test_desc) == dataset_validate(test_desc) == service_dataset_validate(test_desc)
|
||||
|
||||
# Test invalid cases produce same errors
|
||||
invalid_cases = [
|
||||
"x" * 401, # Just over limit
|
||||
"x" * 500, # Way over limit
|
||||
"x" * 1000, # Very long
|
||||
]
|
||||
|
||||
for invalid_desc in invalid_cases:
|
||||
app_error = None
|
||||
dataset_error = None
|
||||
service_dataset_error = None
|
||||
|
||||
# Capture App validation error
|
||||
try:
|
||||
app_validate(invalid_desc)
|
||||
except ValueError as e:
|
||||
app_error = str(e)
|
||||
|
||||
# Capture Dataset validation error
|
||||
try:
|
||||
dataset_validate(invalid_desc)
|
||||
except ValueError as e:
|
||||
dataset_error = str(e)
|
||||
|
||||
# Capture Service Dataset validation error
|
||||
try:
|
||||
service_dataset_validate(invalid_desc)
|
||||
except ValueError as e:
|
||||
service_dataset_error = str(e)
|
||||
|
||||
# All should produce errors
|
||||
assert app_error is not None, f"App validation should fail for {len(invalid_desc)} characters"
|
||||
assert dataset_error is not None, f"Dataset validation should fail for {len(invalid_desc)} characters"
|
||||
error_msg = f"Service Dataset validation should fail for {len(invalid_desc)} characters"
|
||||
assert service_dataset_error is not None, error_msg
|
||||
|
||||
# Errors should be identical
|
||||
error_msg = f"Error messages should be identical for {len(invalid_desc)} characters"
|
||||
assert app_error == dataset_error == service_dataset_error, error_msg
|
||||
assert app_error == "Description cannot exceed 400 characters."
|
||||
|
||||
def test_boundary_values(self):
|
||||
"""Test boundary values around the 400 character limit"""
|
||||
"""Test boundary values around the 400 character limit."""
|
||||
boundary_tests = [
|
||||
(0, True), # Empty
|
||||
(1, True), # Minimum
|
||||
@ -184,69 +63,45 @@ class TestDescriptionValidationUnit:
|
||||
|
||||
if should_pass:
|
||||
# Should not raise exception
|
||||
assert app_validate(test_desc) == test_desc
|
||||
assert dataset_validate(test_desc) == test_desc
|
||||
assert service_dataset_validate(test_desc) == test_desc
|
||||
assert validate_description_length(test_desc) == test_desc
|
||||
else:
|
||||
# Should raise ValueError
|
||||
with pytest.raises(ValueError):
|
||||
app_validate(test_desc)
|
||||
with pytest.raises(ValueError):
|
||||
dataset_validate(test_desc)
|
||||
with pytest.raises(ValueError):
|
||||
service_dataset_validate(test_desc)
|
||||
validate_description_length(test_desc)
|
||||
|
||||
def test_special_characters(self):
|
||||
"""Test validation with special characters, Unicode, etc."""
|
||||
# Unicode characters
|
||||
unicode_desc = "测试描述" * 100 # Chinese characters
|
||||
if len(unicode_desc) <= 400:
|
||||
assert app_validate(unicode_desc) == unicode_desc
|
||||
assert dataset_validate(unicode_desc) == unicode_desc
|
||||
assert service_dataset_validate(unicode_desc) == unicode_desc
|
||||
assert validate_description_length(unicode_desc) == unicode_desc
|
||||
|
||||
# Special characters
|
||||
special_desc = "Special chars: !@#$%^&*()_+-=[]{}|;':\",./<>?" * 10
|
||||
if len(special_desc) <= 400:
|
||||
assert app_validate(special_desc) == special_desc
|
||||
assert dataset_validate(special_desc) == special_desc
|
||||
assert service_dataset_validate(special_desc) == special_desc
|
||||
assert validate_description_length(special_desc) == special_desc
|
||||
|
||||
# Mixed content
|
||||
mixed_desc = "Mixed content: 测试 123 !@# " * 15
|
||||
if len(mixed_desc) <= 400:
|
||||
assert app_validate(mixed_desc) == mixed_desc
|
||||
assert dataset_validate(mixed_desc) == mixed_desc
|
||||
assert service_dataset_validate(mixed_desc) == mixed_desc
|
||||
assert validate_description_length(mixed_desc) == mixed_desc
|
||||
elif len(mixed_desc) > 400:
|
||||
with pytest.raises(ValueError):
|
||||
app_validate(mixed_desc)
|
||||
with pytest.raises(ValueError):
|
||||
dataset_validate(mixed_desc)
|
||||
with pytest.raises(ValueError):
|
||||
service_dataset_validate(mixed_desc)
|
||||
validate_description_length(mixed_desc)
|
||||
|
||||
def test_whitespace_handling(self):
|
||||
"""Test validation with various whitespace scenarios"""
|
||||
"""Test validation with various whitespace scenarios."""
|
||||
# Leading/trailing whitespace
|
||||
whitespace_desc = " Description with whitespace "
|
||||
if len(whitespace_desc) <= 400:
|
||||
assert app_validate(whitespace_desc) == whitespace_desc
|
||||
assert dataset_validate(whitespace_desc) == whitespace_desc
|
||||
assert service_dataset_validate(whitespace_desc) == whitespace_desc
|
||||
assert validate_description_length(whitespace_desc) == whitespace_desc
|
||||
|
||||
# Newlines and tabs
|
||||
multiline_desc = "Line 1\nLine 2\tTabbed content"
|
||||
if len(multiline_desc) <= 400:
|
||||
assert app_validate(multiline_desc) == multiline_desc
|
||||
assert dataset_validate(multiline_desc) == multiline_desc
|
||||
assert service_dataset_validate(multiline_desc) == multiline_desc
|
||||
assert validate_description_length(multiline_desc) == multiline_desc
|
||||
|
||||
# Only whitespace over limit
|
||||
only_spaces = " " * 401
|
||||
with pytest.raises(ValueError):
|
||||
app_validate(only_spaces)
|
||||
with pytest.raises(ValueError):
|
||||
dataset_validate(only_spaces)
|
||||
with pytest.raises(ValueError):
|
||||
service_dataset_validate(only_spaces)
|
||||
validate_description_length(only_spaces)
|
||||
|
||||
Reference in New Issue
Block a user