Merge remote-tracking branch 'origin/main' into feat/trigger

2026-05-06 02:18:08 +08:00 · 2025-10-04 11:30:26 +08:00
parent cc5a315039 f5161d9add
commit 3370736e09
15 changed files with 1053 additions and 668 deletions
--- a/api/tests/integration_tests/controllers/console/app/test_chat_message_permissions.py
+++ b/api/tests/integration_tests/controllers/console/app/test_chat_message_permissions.py
@ -11,8 +11,8 @@ from controllers.console.app import completion as completion_api
 from controllers.console.app import message as message_api
 from controllers.console.app import wraps
 from libs.datetime_utils import naive_utc_now
-from models import Account, App, Tenant
-from models.account import TenantAccountRole
+from models import App, Tenant
+from models.account import Account, TenantAccountJoin, TenantAccountRole
 from models.model import AppMode
 from services.app_generate_service import AppGenerateService

@ -31,9 +31,8 @@ class TestChatMessageApiPermissions:
        return app

    @pytest.fixture
-    def mock_account(self):
+    def mock_account(self, monkeypatch: pytest.MonkeyPatch):
        """Create a mock Account for testing."""
-
        account = Account()
        account.id = str(uuid.uuid4())
        account.name = "Test User"
@ -42,12 +41,24 @@ class TestChatMessageApiPermissions:
        account.created_at = naive_utc_now()
        account.updated_at = naive_utc_now()

-        # Create mock tenant
        tenant = Tenant()
        tenant.id = str(uuid.uuid4())
        tenant.name = "Test Tenant"

-        account._current_tenant = tenant
+        mock_session_instance = mock.Mock()
+
+        mock_tenant_join = TenantAccountJoin(role=TenantAccountRole.OWNER)
+        monkeypatch.setattr(mock_session_instance, "scalar", mock.Mock(return_value=mock_tenant_join))
+
+        mock_scalars_result = mock.Mock()
+        mock_scalars_result.one.return_value = tenant
+        monkeypatch.setattr(mock_session_instance, "scalars", mock.Mock(return_value=mock_scalars_result))
+
+        mock_session_context = mock.Mock()
+        mock_session_context.__enter__.return_value = mock_session_instance
+        monkeypatch.setattr("models.account.Session", lambda _, expire_on_commit: mock_session_context)
+
+        account.current_tenant = tenant
        return account

    @pytest.mark.parametrize(
--- a/api/tests/integration_tests/controllers/console/app/test_description_validation.py
+++ b/api/tests/integration_tests/controllers/console/app/test_description_validation.py
@ -18,124 +18,87 @@ class TestAppDescriptionValidationUnit:
    """Unit tests for description validation function"""

    def test_validate_description_length_function(self):
-        """Test the _validate_description_length function directly"""
-        from controllers.console.app.app import _validate_description_length
+        """Test the validate_description_length function directly"""
+        from libs.validators import validate_description_length

        # Test valid descriptions
-        assert _validate_description_length("") == ""
-        assert _validate_description_length("x" * 400) == "x" * 400
-        assert _validate_description_length(None) is None
+        assert validate_description_length("") == ""
+        assert validate_description_length("x" * 400) == "x" * 400
+        assert validate_description_length(None) is None

        # Test invalid descriptions
        with pytest.raises(ValueError) as exc_info:
-            _validate_description_length("x" * 401)
+            validate_description_length("x" * 401)
        assert "Description cannot exceed 400 characters." in str(exc_info.value)

        with pytest.raises(ValueError) as exc_info:
-            _validate_description_length("x" * 500)
+            validate_description_length("x" * 500)
        assert "Description cannot exceed 400 characters." in str(exc_info.value)

        with pytest.raises(ValueError) as exc_info:
-            _validate_description_length("x" * 1000)
+            validate_description_length("x" * 1000)
        assert "Description cannot exceed 400 characters." in str(exc_info.value)

-    def test_validation_consistency_with_dataset(self):
-        """Test that App and Dataset validation functions are consistent"""
-        from controllers.console.app.app import _validate_description_length as app_validate
-        from controllers.console.datasets.datasets import _validate_description_length as dataset_validate
-        from controllers.service_api.dataset.dataset import _validate_description_length as service_dataset_validate
-
-        # Test same valid inputs
-        valid_desc = "x" * 400
-        assert app_validate(valid_desc) == dataset_validate(valid_desc) == service_dataset_validate(valid_desc)
-        assert app_validate("") == dataset_validate("") == service_dataset_validate("")
-        assert app_validate(None) == dataset_validate(None) == service_dataset_validate(None)
-
-        # Test same invalid inputs produce same error
-        invalid_desc = "x" * 401
-
-        app_error = None
-        dataset_error = None
-        service_dataset_error = None
-
-        try:
-            app_validate(invalid_desc)
-        except ValueError as e:
-            app_error = str(e)
-
-        try:
-            dataset_validate(invalid_desc)
-        except ValueError as e:
-            dataset_error = str(e)
-
-        try:
-            service_dataset_validate(invalid_desc)
-        except ValueError as e:
-            service_dataset_error = str(e)
-
-        assert app_error == dataset_error == service_dataset_error
-        assert app_error == "Description cannot exceed 400 characters."
-
    def test_boundary_values(self):
        """Test boundary values for description validation"""
-        from controllers.console.app.app import _validate_description_length
+        from libs.validators import validate_description_length

        # Test exact boundary
        exactly_400 = "x" * 400
-        assert _validate_description_length(exactly_400) == exactly_400
+        assert validate_description_length(exactly_400) == exactly_400

        # Test just over boundary
        just_over_400 = "x" * 401
        with pytest.raises(ValueError):
-            _validate_description_length(just_over_400)
+            validate_description_length(just_over_400)

        # Test just under boundary
        just_under_400 = "x" * 399
-        assert _validate_description_length(just_under_400) == just_under_400
+        assert validate_description_length(just_under_400) == just_under_400

    def test_edge_cases(self):
        """Test edge cases for description validation"""
-        from controllers.console.app.app import _validate_description_length
+        from libs.validators import validate_description_length

        # Test None input
-        assert _validate_description_length(None) is None
+        assert validate_description_length(None) is None

        # Test empty string
-        assert _validate_description_length("") == ""
+        assert validate_description_length("") == ""

        # Test single character
-        assert _validate_description_length("a") == "a"
+        assert validate_description_length("a") == "a"

        # Test unicode characters
        unicode_desc = "测试" * 200  # 400 characters in Chinese
-        assert _validate_description_length(unicode_desc) == unicode_desc
+        assert validate_description_length(unicode_desc) == unicode_desc

        # Test unicode over limit
        unicode_over = "测试" * 201  # 402 characters
        with pytest.raises(ValueError):
-            _validate_description_length(unicode_over)
+            validate_description_length(unicode_over)

    def test_whitespace_handling(self):
        """Test how validation handles whitespace"""
-        from controllers.console.app.app import _validate_description_length
+        from libs.validators import validate_description_length

        # Test description with spaces
        spaces_400 = " " * 400
-        assert _validate_description_length(spaces_400) == spaces_400
+        assert validate_description_length(spaces_400) == spaces_400

        # Test description with spaces over limit
        spaces_401 = " " * 401
        with pytest.raises(ValueError):
-            _validate_description_length(spaces_401)
+            validate_description_length(spaces_401)

        # Test mixed content
        mixed_400 = "a" * 200 + " " * 200
-        assert _validate_description_length(mixed_400) == mixed_400
+        assert validate_description_length(mixed_400) == mixed_400

        # Test mixed over limit
        mixed_401 = "a" * 200 + " " * 201
        with pytest.raises(ValueError):
-            _validate_description_length(mixed_401)
+            validate_description_length(mixed_401)


 if __name__ == "__main__":
--- a/api/tests/integration_tests/controllers/console/app/test_model_config_permissions.py
+++ b/api/tests/integration_tests/controllers/console/app/test_model_config_permissions.py
@ -9,8 +9,8 @@ from flask.testing import FlaskClient
 from controllers.console.app import model_config as model_config_api
 from controllers.console.app import wraps
 from libs.datetime_utils import naive_utc_now
-from models import Account, App, Tenant
-from models.account import TenantAccountRole
+from models import App, Tenant
+from models.account import Account, TenantAccountJoin, TenantAccountRole
 from models.model import AppMode
 from services.app_model_config_service import AppModelConfigService

@ -30,9 +30,8 @@ class TestModelConfigResourcePermissions:
        return app

    @pytest.fixture
-    def mock_account(self):
+    def mock_account(self, monkeypatch: pytest.MonkeyPatch):
        """Create a mock Account for testing."""
-
        account = Account()
        account.id = str(uuid.uuid4())
        account.name = "Test User"
@ -41,12 +40,24 @@ class TestModelConfigResourcePermissions:
        account.created_at = naive_utc_now()
        account.updated_at = naive_utc_now()

-        # Create mock tenant
        tenant = Tenant()
        tenant.id = str(uuid.uuid4())
        tenant.name = "Test Tenant"

-        account._current_tenant = tenant
+        mock_session_instance = mock.Mock()
+
+        mock_tenant_join = TenantAccountJoin(role=TenantAccountRole.OWNER)
+        monkeypatch.setattr(mock_session_instance, "scalar", mock.Mock(return_value=mock_tenant_join))
+
+        mock_scalars_result = mock.Mock()
+        mock_scalars_result.one.return_value = tenant
+        monkeypatch.setattr(mock_session_instance, "scalars", mock.Mock(return_value=mock_scalars_result))
+
+        mock_session_context = mock.Mock()
+        mock_session_context.__enter__.return_value = mock_session_instance
+        monkeypatch.setattr("models.account.Session", lambda _, expire_on_commit: mock_session_context)
+
+        account.current_tenant = tenant
        return account

    @pytest.mark.parametrize(
--- a/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_enable_segments_to_index_task.py
@ -0,0 +1,505 @@
+from unittest.mock import MagicMock, patch
+
+import pytest
+from faker import Faker
+
+from core.rag.index_processor.constant.index_type import IndexType
+from extensions.ext_database import db
+from extensions.ext_redis import redis_client
+from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
+from models.dataset import Dataset, Document, DocumentSegment
+from tasks.enable_segments_to_index_task import enable_segments_to_index_task
+
+
+class TestEnableSegmentsToIndexTask:
+    """Integration tests for enable_segments_to_index_task using testcontainers."""
+
+    @pytest.fixture
+    def mock_external_service_dependencies(self):
+        """Mock setup for external service dependencies."""
+        with (
+            patch("tasks.enable_segments_to_index_task.IndexProcessorFactory") as mock_index_processor_factory,
+        ):
+            # Setup mock index processor
+            mock_processor = MagicMock()
+            mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
+
+            yield {
+                "index_processor_factory": mock_index_processor_factory,
+                "index_processor": mock_processor,
+            }
+
+    def _create_test_dataset_and_document(self, db_session_with_containers, mock_external_service_dependencies):
+        """
+        Helper method to create a test dataset and document for testing.
+
+        Args:
+            db_session_with_containers: Database session from testcontainers infrastructure
+            mock_external_service_dependencies: Mock dependencies
+
+        Returns:
+            tuple: (dataset, document) - Created dataset and document instances
+        """
+        fake = Faker()
+
+        # Create account and tenant
+        account = Account(
+            email=fake.email(),
+            name=fake.name(),
+            interface_language="en-US",
+            status="active",
+        )
+        db.session.add(account)
+        db.session.commit()
+
+        tenant = Tenant(
+            name=fake.company(),
+            status="normal",
+        )
+        db.session.add(tenant)
+        db.session.commit()
+
+        # Create tenant-account join
+        join = TenantAccountJoin(
+            tenant_id=tenant.id,
+            account_id=account.id,
+            role=TenantAccountRole.OWNER.value,
+            current=True,
+        )
+        db.session.add(join)
+        db.session.commit()
+
+        # Create dataset
+        dataset = Dataset(
+            id=fake.uuid4(),
+            tenant_id=tenant.id,
+            name=fake.company(),
+            description=fake.text(max_nb_chars=100),
+            data_source_type="upload_file",
+            indexing_technique="high_quality",
+            created_by=account.id,
+        )
+        db.session.add(dataset)
+        db.session.commit()
+
+        # Create document
+        document = Document(
+            id=fake.uuid4(),
+            tenant_id=tenant.id,
+            dataset_id=dataset.id,
+            position=1,
+            data_source_type="upload_file",
+            batch="test_batch",
+            name=fake.file_name(),
+            created_from="upload_file",
+            created_by=account.id,
+            indexing_status="completed",
+            enabled=True,
+            doc_form=IndexType.PARAGRAPH_INDEX,
+        )
+        db.session.add(document)
+        db.session.commit()
+
+        # Refresh dataset to ensure doc_form property works correctly
+        db.session.refresh(dataset)
+
+        return dataset, document
+
+    def _create_test_segments(
+        self, db_session_with_containers, document, dataset, count=3, enabled=False, status="completed"
+    ):
+        """
+        Helper method to create test document segments.
+
+        Args:
+            db_session_with_containers: Database session from testcontainers infrastructure
+            document: Document instance
+            dataset: Dataset instance
+            count: Number of segments to create
+            enabled: Whether segments should be enabled
+            status: Status of the segments
+
+        Returns:
+            list: List of created DocumentSegment instances
+        """
+        fake = Faker()
+        segments = []
+
+        for i in range(count):
+            text = fake.text(max_nb_chars=200)
+            segment = DocumentSegment(
+                id=fake.uuid4(),
+                tenant_id=document.tenant_id,
+                dataset_id=dataset.id,
+                document_id=document.id,
+                position=i,
+                content=text,
+                word_count=len(text.split()),
+                tokens=len(text.split()) * 2,
+                index_node_id=f"node_{i}",
+                index_node_hash=f"hash_{i}",
+                enabled=enabled,
+                status=status,
+                created_by=document.created_by,
+            )
+            db.session.add(segment)
+            segments.append(segment)
+
+        db.session.commit()
+        return segments
+
+    def test_enable_segments_to_index_success(self, db_session_with_containers, mock_external_service_dependencies):
+        """
+        Test successful segments indexing with paragraph index type.
+
+        This test verifies:
+        - Proper dataset and document retrieval from database
+        - Correct segment processing and document creation
+        - Index processor integration
+        - Database state updates
+        - Redis cache key deletion
+        """
+        # Arrange: Create test data
+        dataset, document = self._create_test_dataset_and_document(
+            db_session_with_containers, mock_external_service_dependencies
+        )
+        segments = self._create_test_segments(db_session_with_containers, document, dataset)
+
+        # Set up Redis cache keys to simulate indexing in progress
+        segment_ids = [segment.id for segment in segments]
+        for segment in segments:
+            indexing_cache_key = f"segment_{segment.id}_indexing"
+            redis_client.set(indexing_cache_key, "processing", ex=300)  # 5 minutes expiry
+
+        # Verify cache keys exist
+        for segment in segments:
+            indexing_cache_key = f"segment_{segment.id}_indexing"
+            assert redis_client.exists(indexing_cache_key) == 1
+
+        # Act: Execute the task
+        enable_segments_to_index_task(segment_ids, dataset.id, document.id)
+
+        # Assert: Verify the expected outcomes
+        # Verify index processor was called correctly
+        mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
+        mock_external_service_dependencies["index_processor"].load.assert_called_once()
+
+        # Verify the load method was called with correct parameters
+        call_args = mock_external_service_dependencies["index_processor"].load.call_args
+        assert call_args is not None
+        documents = call_args[0][1]  # Second argument should be documents list
+        assert len(documents) == 3
+
+        # Verify document structure
+        for i, doc in enumerate(documents):
+            assert doc.page_content == segments[i].content
+            assert doc.metadata["doc_id"] == segments[i].index_node_id
+            assert doc.metadata["doc_hash"] == segments[i].index_node_hash
+            assert doc.metadata["document_id"] == document.id
+            assert doc.metadata["dataset_id"] == dataset.id
+
+        # Verify Redis cache keys were deleted
+        for segment in segments:
+            indexing_cache_key = f"segment_{segment.id}_indexing"
+            assert redis_client.exists(indexing_cache_key) == 0
+
+    def test_enable_segments_to_index_with_different_index_type(
+        self, db_session_with_containers, mock_external_service_dependencies
+    ):
+        """
+        Test segments indexing with different index types.
+
+        This test verifies:
+        - Proper handling of different index types
+        - Index processor factory integration
+        - Document processing with various configurations
+        - Redis cache key deletion
+        """
+        # Arrange: Create test data with different index type
+        dataset, document = self._create_test_dataset_and_document(
+            db_session_with_containers, mock_external_service_dependencies
+        )
+
+        # Update document to use different index type
+        document.doc_form = IndexType.QA_INDEX
+        db.session.commit()
+
+        # Refresh dataset to ensure doc_form property reflects the updated document
+        db.session.refresh(dataset)
+
+        # Create segments
+        segments = self._create_test_segments(db_session_with_containers, document, dataset)
+
+        # Set up Redis cache keys
+        segment_ids = [segment.id for segment in segments]
+        for segment in segments:
+            indexing_cache_key = f"segment_{segment.id}_indexing"
+            redis_client.set(indexing_cache_key, "processing", ex=300)
+
+        # Act: Execute the task
+        enable_segments_to_index_task(segment_ids, dataset.id, document.id)
+
+        # Assert: Verify different index type handling
+        mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.QA_INDEX)
+        mock_external_service_dependencies["index_processor"].load.assert_called_once()
+
+        # Verify the load method was called with correct parameters
+        call_args = mock_external_service_dependencies["index_processor"].load.call_args
+        assert call_args is not None
+        documents = call_args[0][1]  # Second argument should be documents list
+        assert len(documents) == 3
+
+        # Verify Redis cache keys were deleted
+        for segment in segments:
+            indexing_cache_key = f"segment_{segment.id}_indexing"
+            assert redis_client.exists(indexing_cache_key) == 0
+
+    def test_enable_segments_to_index_dataset_not_found(
+        self, db_session_with_containers, mock_external_service_dependencies
+    ):
+        """
+        Test handling of non-existent dataset.
+
+        This test verifies:
+        - Proper error handling for missing datasets
+        - Early return without processing
+        - Database session cleanup
+        - No unnecessary index processor calls
+        """
+        # Arrange: Use non-existent dataset ID
+        fake = Faker()
+        non_existent_dataset_id = fake.uuid4()
+        non_existent_document_id = fake.uuid4()
+        segment_ids = [fake.uuid4()]
+
+        # Act: Execute the task with non-existent dataset
+        enable_segments_to_index_task(segment_ids, non_existent_dataset_id, non_existent_document_id)
+
+        # Assert: Verify no processing occurred
+        mock_external_service_dependencies["index_processor_factory"].assert_not_called()
+        mock_external_service_dependencies["index_processor"].load.assert_not_called()
+
+    def test_enable_segments_to_index_document_not_found(
+        self, db_session_with_containers, mock_external_service_dependencies
+    ):
+        """
+        Test handling of non-existent document.
+
+        This test verifies:
+        - Proper error handling for missing documents
+        - Early return without processing
+        - Database session cleanup
+        - No unnecessary index processor calls
+        """
+        # Arrange: Create dataset but use non-existent document ID
+        dataset, _ = self._create_test_dataset_and_document(
+            db_session_with_containers, mock_external_service_dependencies
+        )
+        fake = Faker()
+        non_existent_document_id = fake.uuid4()
+        segment_ids = [fake.uuid4()]
+
+        # Act: Execute the task with non-existent document
+        enable_segments_to_index_task(segment_ids, dataset.id, non_existent_document_id)
+
+        # Assert: Verify no processing occurred
+        mock_external_service_dependencies["index_processor_factory"].assert_not_called()
+        mock_external_service_dependencies["index_processor"].load.assert_not_called()
+
+    def test_enable_segments_to_index_invalid_document_status(
+        self, db_session_with_containers, mock_external_service_dependencies
+    ):
+        """
+        Test handling of document with invalid status.
+
+        This test verifies:
+        - Early return when document is disabled, archived, or not completed
+        - No index processing for documents not ready for indexing
+        - Proper database session cleanup
+        - No unnecessary external service calls
+        """
+        # Arrange: Create test data with invalid document status
+        dataset, document = self._create_test_dataset_and_document(
+            db_session_with_containers, mock_external_service_dependencies
+        )
+
+        # Test different invalid statuses
+        invalid_statuses = [
+            ("disabled", {"enabled": False}),
+            ("archived", {"archived": True}),
+            ("not_completed", {"indexing_status": "processing"}),
+        ]
+
+        for _, status_attrs in invalid_statuses:
+            # Reset document status
+            document.enabled = True
+            document.archived = False
+            document.indexing_status = "completed"
+            db.session.commit()
+
+            # Set invalid status
+            for attr, value in status_attrs.items():
+                setattr(document, attr, value)
+            db.session.commit()
+
+            # Create segments
+            segments = self._create_test_segments(db_session_with_containers, document, dataset)
+            segment_ids = [segment.id for segment in segments]
+
+            # Act: Execute the task
+            enable_segments_to_index_task(segment_ids, dataset.id, document.id)
+
+            # Assert: Verify no processing occurred
+            mock_external_service_dependencies["index_processor_factory"].assert_not_called()
+            mock_external_service_dependencies["index_processor"].load.assert_not_called()
+
+            # Clean up segments for next iteration
+            for segment in segments:
+                db.session.delete(segment)
+            db.session.commit()
+
+    def test_enable_segments_to_index_segments_not_found(
+        self, db_session_with_containers, mock_external_service_dependencies
+    ):
+        """
+        Test handling when no segments are found.
+
+        This test verifies:
+        - Proper handling when segments don't exist
+        - Early return without processing
+        - Database session cleanup
+        - Index processor is created but load is not called
+        """
+        # Arrange: Create test data
+        dataset, document = self._create_test_dataset_and_document(
+            db_session_with_containers, mock_external_service_dependencies
+        )
+
+        # Use non-existent segment IDs
+        fake = Faker()
+        non_existent_segment_ids = [fake.uuid4() for _ in range(3)]
+
+        # Act: Execute the task with non-existent segments
+        enable_segments_to_index_task(non_existent_segment_ids, dataset.id, document.id)
+
+        # Assert: Verify index processor was created but load was not called
+        mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
+        mock_external_service_dependencies["index_processor"].load.assert_not_called()
+
+    def test_enable_segments_to_index_with_parent_child_structure(
+        self, db_session_with_containers, mock_external_service_dependencies
+    ):
+        """
+        Test segments indexing with parent-child structure.
+
+        This test verifies:
+        - Proper handling of PARENT_CHILD_INDEX type
+        - Child document creation from segments
+        - Correct document structure for parent-child indexing
+        - Index processor receives properly structured documents
+        - Redis cache key deletion
+        """
+        # Arrange: Create test data with parent-child index type
+        dataset, document = self._create_test_dataset_and_document(
+            db_session_with_containers, mock_external_service_dependencies
+        )
+
+        # Update document to use parent-child index type
+        document.doc_form = IndexType.PARENT_CHILD_INDEX
+        db.session.commit()
+
+        # Refresh dataset to ensure doc_form property reflects the updated document
+        db.session.refresh(dataset)
+
+        # Create segments with mock child chunks
+        segments = self._create_test_segments(db_session_with_containers, document, dataset)
+
+        # Set up Redis cache keys
+        segment_ids = [segment.id for segment in segments]
+        for segment in segments:
+            indexing_cache_key = f"segment_{segment.id}_indexing"
+            redis_client.set(indexing_cache_key, "processing", ex=300)
+
+        # Mock the get_child_chunks method for each segment
+        with patch.object(DocumentSegment, "get_child_chunks") as mock_get_child_chunks:
+            # Setup mock to return child chunks for each segment
+            mock_child_chunks = []
+            for i in range(2):  # Each segment has 2 child chunks
+                mock_child = MagicMock()
+                mock_child.content = f"child_content_{i}"
+                mock_child.index_node_id = f"child_node_{i}"
+                mock_child.index_node_hash = f"child_hash_{i}"
+                mock_child_chunks.append(mock_child)
+
+            mock_get_child_chunks.return_value = mock_child_chunks
+
+            # Act: Execute the task
+            enable_segments_to_index_task(segment_ids, dataset.id, document.id)
+
+            # Assert: Verify parent-child index processing
+            mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
+                IndexType.PARENT_CHILD_INDEX
+            )
+            mock_external_service_dependencies["index_processor"].load.assert_called_once()
+
+            # Verify the load method was called with correct parameters
+            call_args = mock_external_service_dependencies["index_processor"].load.call_args
+            assert call_args is not None
+            documents = call_args[0][1]  # Second argument should be documents list
+            assert len(documents) == 3  # 3 segments
+
+            # Verify each document has children
+            for doc in documents:
+                assert hasattr(doc, "children")
+                assert len(doc.children) == 2  # Each document has 2 children
+
+            # Verify Redis cache keys were deleted
+            for segment in segments:
+                indexing_cache_key = f"segment_{segment.id}_indexing"
+                assert redis_client.exists(indexing_cache_key) == 0
+
+    def test_enable_segments_to_index_general_exception_handling(
+        self, db_session_with_containers, mock_external_service_dependencies
+    ):
+        """
+        Test general exception handling during indexing process.
+
+        This test verifies:
+        - Exceptions are properly caught and handled
+        - Segment status is set to error
+        - Segments are disabled
+        - Error information is recorded
+        - Redis cache is still cleared
+        - Database session is properly closed
+        """
+        # Arrange: Create test data
+        dataset, document = self._create_test_dataset_and_document(
+            db_session_with_containers, mock_external_service_dependencies
+        )
+        segments = self._create_test_segments(db_session_with_containers, document, dataset)
+
+        # Set up Redis cache keys
+        segment_ids = [segment.id for segment in segments]
+        for segment in segments:
+            indexing_cache_key = f"segment_{segment.id}_indexing"
+            redis_client.set(indexing_cache_key, "processing", ex=300)
+
+        # Mock the index processor to raise an exception
+        mock_external_service_dependencies["index_processor"].load.side_effect = Exception("Index processing failed")
+
+        # Act: Execute the task
+        enable_segments_to_index_task(segment_ids, dataset.id, document.id)
+
+        # Assert: Verify error handling
+        for segment in segments:
+            db.session.refresh(segment)
+            assert segment.enabled is False
+            assert segment.status == "error"
+            assert segment.error is not None
+            assert "Index processing failed" in segment.error
+            assert segment.disabled_at is not None
+
+        # Verify Redis cache keys were still cleared despite error
+        for segment in segments:
+            indexing_cache_key = f"segment_{segment.id}_indexing"
+            assert redis_client.exists(indexing_cache_key) == 0
--- a/api/tests/unit_tests/controllers/console/app/test_description_validation.py
+++ b/api/tests/unit_tests/controllers/console/app/test_description_validation.py
@ -1,174 +1,53 @@
 import pytest

-from controllers.console.app.app import _validate_description_length as app_validate
-from controllers.console.datasets.datasets import _validate_description_length as dataset_validate
-from controllers.service_api.dataset.dataset import _validate_description_length as service_dataset_validate
+from libs.validators import validate_description_length


 class TestDescriptionValidationUnit:
-    """Unit tests for description validation functions in App and Dataset APIs"""
+    """Unit tests for the centralized description validation function."""

-    def test_app_validate_description_length_valid(self):
-        """Test App validation function with valid descriptions"""
+    def test_validate_description_length_valid(self):
+        """Test validation function with valid descriptions."""
        # Empty string should be valid
-        assert app_validate("") == ""
+        assert validate_description_length("") == ""

        # None should be valid
-        assert app_validate(None) is None
+        assert validate_description_length(None) is None

        # Short description should be valid
        short_desc = "Short description"
-        assert app_validate(short_desc) == short_desc
+        assert validate_description_length(short_desc) == short_desc

        # Exactly 400 characters should be valid
        exactly_400 = "x" * 400
-        assert app_validate(exactly_400) == exactly_400
+        assert validate_description_length(exactly_400) == exactly_400

        # Just under limit should be valid
        just_under = "x" * 399
-        assert app_validate(just_under) == just_under
+        assert validate_description_length(just_under) == just_under

-    def test_app_validate_description_length_invalid(self):
-        """Test App validation function with invalid descriptions"""
+    def test_validate_description_length_invalid(self):
+        """Test validation function with invalid descriptions."""
        # 401 characters should fail
        just_over = "x" * 401
        with pytest.raises(ValueError) as exc_info:
-            app_validate(just_over)
+            validate_description_length(just_over)
        assert "Description cannot exceed 400 characters." in str(exc_info.value)

        # 500 characters should fail
        way_over = "x" * 500
        with pytest.raises(ValueError) as exc_info:
-            app_validate(way_over)
+            validate_description_length(way_over)
        assert "Description cannot exceed 400 characters." in str(exc_info.value)

        # 1000 characters should fail
        very_long = "x" * 1000
        with pytest.raises(ValueError) as exc_info:
-            app_validate(very_long)
+            validate_description_length(very_long)
        assert "Description cannot exceed 400 characters." in str(exc_info.value)

-    def test_dataset_validate_description_length_valid(self):
-        """Test Dataset validation function with valid descriptions"""
-        # Empty string should be valid
-        assert dataset_validate("") == ""
-
-        # Short description should be valid
-        short_desc = "Short description"
-        assert dataset_validate(short_desc) == short_desc
-
-        # Exactly 400 characters should be valid
-        exactly_400 = "x" * 400
-        assert dataset_validate(exactly_400) == exactly_400
-
-        # Just under limit should be valid
-        just_under = "x" * 399
-        assert dataset_validate(just_under) == just_under
-
-    def test_dataset_validate_description_length_invalid(self):
-        """Test Dataset validation function with invalid descriptions"""
-        # 401 characters should fail
-        just_over = "x" * 401
-        with pytest.raises(ValueError) as exc_info:
-            dataset_validate(just_over)
-        assert "Description cannot exceed 400 characters." in str(exc_info.value)
-
-        # 500 characters should fail
-        way_over = "x" * 500
-        with pytest.raises(ValueError) as exc_info:
-            dataset_validate(way_over)
-        assert "Description cannot exceed 400 characters." in str(exc_info.value)
-
-    def test_service_dataset_validate_description_length_valid(self):
-        """Test Service Dataset validation function with valid descriptions"""
-        # Empty string should be valid
-        assert service_dataset_validate("") == ""
-
-        # None should be valid
-        assert service_dataset_validate(None) is None
-
-        # Short description should be valid
-        short_desc = "Short description"
-        assert service_dataset_validate(short_desc) == short_desc
-
-        # Exactly 400 characters should be valid
-        exactly_400 = "x" * 400
-        assert service_dataset_validate(exactly_400) == exactly_400
-
-        # Just under limit should be valid
-        just_under = "x" * 399
-        assert service_dataset_validate(just_under) == just_under
-
-    def test_service_dataset_validate_description_length_invalid(self):
-        """Test Service Dataset validation function with invalid descriptions"""
-        # 401 characters should fail
-        just_over = "x" * 401
-        with pytest.raises(ValueError) as exc_info:
-            service_dataset_validate(just_over)
-        assert "Description cannot exceed 400 characters." in str(exc_info.value)
-
-        # 500 characters should fail
-        way_over = "x" * 500
-        with pytest.raises(ValueError) as exc_info:
-            service_dataset_validate(way_over)
-        assert "Description cannot exceed 400 characters." in str(exc_info.value)
-
-    def test_app_dataset_validation_consistency(self):
-        """Test that App and Dataset validation functions behave identically"""
-        test_cases = [
-            "",  # Empty string
-            "Short description",  # Normal description
-            "x" * 100,  # Medium description
-            "x" * 400,  # Exactly at limit
-        ]
-
-        # Test valid cases produce same results
-        for test_desc in test_cases:
-            assert app_validate(test_desc) == dataset_validate(test_desc) == service_dataset_validate(test_desc)
-
-        # Test invalid cases produce same errors
-        invalid_cases = [
-            "x" * 401,  # Just over limit
-            "x" * 500,  # Way over limit
-            "x" * 1000,  # Very long
-        ]
-
-        for invalid_desc in invalid_cases:
-            app_error = None
-            dataset_error = None
-            service_dataset_error = None
-
-            # Capture App validation error
-            try:
-                app_validate(invalid_desc)
-            except ValueError as e:
-                app_error = str(e)
-
-            # Capture Dataset validation error
-            try:
-                dataset_validate(invalid_desc)
-            except ValueError as e:
-                dataset_error = str(e)
-
-            # Capture Service Dataset validation error
-            try:
-                service_dataset_validate(invalid_desc)
-            except ValueError as e:
-                service_dataset_error = str(e)
-
-            # All should produce errors
-            assert app_error is not None, f"App validation should fail for {len(invalid_desc)} characters"
-            assert dataset_error is not None, f"Dataset validation should fail for {len(invalid_desc)} characters"
-            error_msg = f"Service Dataset validation should fail for {len(invalid_desc)} characters"
-            assert service_dataset_error is not None, error_msg
-
-            # Errors should be identical
-            error_msg = f"Error messages should be identical for {len(invalid_desc)} characters"
-            assert app_error == dataset_error == service_dataset_error, error_msg
-            assert app_error == "Description cannot exceed 400 characters."
-
    def test_boundary_values(self):
-        """Test boundary values around the 400 character limit"""
+        """Test boundary values around the 400 character limit."""
        boundary_tests = [
            (0, True),  # Empty
            (1, True),  # Minimum
@ -184,69 +63,45 @@ class TestDescriptionValidationUnit:

            if should_pass:
                # Should not raise exception
-                assert app_validate(test_desc) == test_desc
-                assert dataset_validate(test_desc) == test_desc
-                assert service_dataset_validate(test_desc) == test_desc
+                assert validate_description_length(test_desc) == test_desc
            else:
                # Should raise ValueError
                with pytest.raises(ValueError):
-                    app_validate(test_desc)
-                with pytest.raises(ValueError):
-                    dataset_validate(test_desc)
-                with pytest.raises(ValueError):
-                    service_dataset_validate(test_desc)
+                    validate_description_length(test_desc)

    def test_special_characters(self):
        """Test validation with special characters, Unicode, etc."""
        # Unicode characters
        unicode_desc = "测试描述" * 100  # Chinese characters
        if len(unicode_desc) <= 400:
-            assert app_validate(unicode_desc) == unicode_desc
-            assert dataset_validate(unicode_desc) == unicode_desc
-            assert service_dataset_validate(unicode_desc) == unicode_desc
+            assert validate_description_length(unicode_desc) == unicode_desc

        # Special characters
        special_desc = "Special chars: !@#$%^&*()_+-=[]{}|;':\",./<>?" * 10
        if len(special_desc) <= 400:
-            assert app_validate(special_desc) == special_desc
-            assert dataset_validate(special_desc) == special_desc
-            assert service_dataset_validate(special_desc) == special_desc
+            assert validate_description_length(special_desc) == special_desc

        # Mixed content
        mixed_desc = "Mixed content: 测试 123 !@# " * 15
        if len(mixed_desc) <= 400:
-            assert app_validate(mixed_desc) == mixed_desc
-            assert dataset_validate(mixed_desc) == mixed_desc
-            assert service_dataset_validate(mixed_desc) == mixed_desc
+            assert validate_description_length(mixed_desc) == mixed_desc
        elif len(mixed_desc) > 400:
            with pytest.raises(ValueError):
-                app_validate(mixed_desc)
-            with pytest.raises(ValueError):
-                dataset_validate(mixed_desc)
-            with pytest.raises(ValueError):
-                service_dataset_validate(mixed_desc)
+                validate_description_length(mixed_desc)

    def test_whitespace_handling(self):
-        """Test validation with various whitespace scenarios"""
+        """Test validation with various whitespace scenarios."""
        # Leading/trailing whitespace
        whitespace_desc = "   Description with whitespace   "
        if len(whitespace_desc) <= 400:
-            assert app_validate(whitespace_desc) == whitespace_desc
-            assert dataset_validate(whitespace_desc) == whitespace_desc
-            assert service_dataset_validate(whitespace_desc) == whitespace_desc
+            assert validate_description_length(whitespace_desc) == whitespace_desc

        # Newlines and tabs
        multiline_desc = "Line 1\nLine 2\tTabbed content"
        if len(multiline_desc) <= 400:
-            assert app_validate(multiline_desc) == multiline_desc
-            assert dataset_validate(multiline_desc) == multiline_desc
-            assert service_dataset_validate(multiline_desc) == multiline_desc
+            assert validate_description_length(multiline_desc) == multiline_desc

        # Only whitespace over limit
        only_spaces = " " * 401
        with pytest.raises(ValueError):
-            app_validate(only_spaces)
-        with pytest.raises(ValueError):
-            dataset_validate(only_spaces)
-        with pytest.raises(ValueError):
-            service_dataset_validate(only_spaces)
+            validate_description_length(only_spaces)