diff --git a/AGENTS.md b/AGENTS.md index 51fa6e4527..d25d2eed96 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -29,7 +29,7 @@ The codebase is split into: ## Language Style -- **Python**: Keep type hints on functions and attributes, and implement relevant special methods (e.g., `__repr__`, `__str__`). +- **Python**: Keep type hints on functions and attributes, and implement relevant special methods (e.g., `__repr__`, `__str__`). Prefer `TypedDict` over `dict` or `Mapping` for type safety and better code documentation. - **TypeScript**: Use the strict config, rely on ESLint (`pnpm lint:fix` preferred) plus `pnpm type-check:tsgo`, and avoid `any` types. ## General Practices diff --git a/api/tests/test_containers_integration_tests/services/test_document_service_rename_document.py b/api/tests/test_containers_integration_tests/services/test_document_service_rename_document.py new file mode 100644 index 0000000000..f641da6576 --- /dev/null +++ b/api/tests/test_containers_integration_tests/services/test_document_service_rename_document.py @@ -0,0 +1,252 @@ +"""Container-backed integration tests for DocumentService.rename_document real SQL paths.""" + +import datetime +import json +from unittest.mock import create_autospec, patch +from uuid import uuid4 + +import pytest + +from models import Account +from models.dataset import Dataset, Document +from models.enums import CreatorUserRole +from models.model import UploadFile +from services.dataset_service import DocumentService + +FIXED_UPLOAD_CREATED_AT = datetime.datetime(2024, 1, 1, 0, 0, 0) + + +@pytest.fixture +def mock_env(): + """Patch only non-SQL dependency used by rename_document: current_user context.""" + with patch("services.dataset_service.current_user", create_autospec(Account, instance=True)) as current_user: + current_user.current_tenant_id = str(uuid4()) + current_user.id = str(uuid4()) + yield {"current_user": current_user} + + +def make_dataset(db_session_with_containers, dataset_id=None, tenant_id=None, built_in_field_enabled=False): + """Persist a dataset row for rename_document integration scenarios.""" + dataset_id = dataset_id or str(uuid4()) + tenant_id = tenant_id or str(uuid4()) + + dataset = Dataset( + tenant_id=tenant_id, + name=f"dataset-{uuid4()}", + data_source_type="upload_file", + created_by=str(uuid4()), + ) + dataset.id = dataset_id + dataset.built_in_field_enabled = built_in_field_enabled + + db_session_with_containers.add(dataset) + db_session_with_containers.commit() + return dataset + + +def make_document( + db_session_with_containers, + document_id=None, + dataset_id=None, + tenant_id=None, + name="Old Name", + data_source_info=None, + doc_metadata=None, +): + """Persist a document row used by rename_document integration scenarios.""" + document_id = document_id or str(uuid4()) + dataset_id = dataset_id or str(uuid4()) + tenant_id = tenant_id or str(uuid4()) + + doc = Document( + tenant_id=tenant_id, + dataset_id=dataset_id, + position=1, + data_source_type="upload_file", + data_source_info=json.dumps(data_source_info or {}), + batch=f"batch-{uuid4()}", + name=name, + created_from="web", + created_by=str(uuid4()), + doc_form="text_model", + ) + doc.id = document_id + doc.indexing_status = "completed" + doc.doc_metadata = dict(doc_metadata or {}) + + db_session_with_containers.add(doc) + db_session_with_containers.commit() + return doc + + +def make_upload_file(db_session_with_containers, tenant_id: str, file_id: str, name: str): + """Persist an upload file row referenced by document.data_source_info.""" + upload_file = UploadFile( + tenant_id=tenant_id, + storage_type="local", + key=f"uploads/{uuid4()}", + name=name, + size=128, + extension="pdf", + mime_type="application/pdf", + created_by_role=CreatorUserRole.ACCOUNT, + created_by=str(uuid4()), + created_at=FIXED_UPLOAD_CREATED_AT, + used=False, + ) + upload_file.id = file_id + + db_session_with_containers.add(upload_file) + db_session_with_containers.commit() + return upload_file + + +def test_rename_document_success(db_session_with_containers, mock_env): + """Rename succeeds and returns the renamed document identity by id.""" + # Arrange + dataset_id = str(uuid4()) + document_id = str(uuid4()) + new_name = "New Document Name" + dataset = make_dataset(db_session_with_containers, dataset_id, mock_env["current_user"].current_tenant_id) + document = make_document( + db_session_with_containers, + document_id=document_id, + dataset_id=dataset_id, + tenant_id=mock_env["current_user"].current_tenant_id, + ) + + # Act + result = DocumentService.rename_document(dataset.id, document_id, new_name) + + # Assert + db_session_with_containers.refresh(document) + assert result.id == document.id + assert document.name == new_name + + +def test_rename_document_with_built_in_fields(db_session_with_containers, mock_env): + """Built-in document_name metadata is updated while existing metadata keys are preserved.""" + # Arrange + dataset_id = str(uuid4()) + document_id = str(uuid4()) + new_name = "Renamed" + dataset = make_dataset( + db_session_with_containers, + dataset_id, + mock_env["current_user"].current_tenant_id, + built_in_field_enabled=True, + ) + document = make_document( + db_session_with_containers, + document_id=document_id, + dataset_id=dataset.id, + tenant_id=mock_env["current_user"].current_tenant_id, + doc_metadata={"foo": "bar"}, + ) + + # Act + DocumentService.rename_document(dataset.id, document.id, new_name) + + # Assert + db_session_with_containers.refresh(document) + assert document.name == new_name + assert document.doc_metadata["document_name"] == new_name + assert document.doc_metadata["foo"] == "bar" + + +def test_rename_document_updates_upload_file_when_present(db_session_with_containers, mock_env): + """Rename propagates to UploadFile.name when upload_file_id is present in data_source_info.""" + # Arrange + dataset_id = str(uuid4()) + document_id = str(uuid4()) + file_id = str(uuid4()) + new_name = "Renamed" + dataset = make_dataset(db_session_with_containers, dataset_id, mock_env["current_user"].current_tenant_id) + document = make_document( + db_session_with_containers, + document_id=document_id, + dataset_id=dataset.id, + tenant_id=mock_env["current_user"].current_tenant_id, + data_source_info={"upload_file_id": file_id}, + ) + upload_file = make_upload_file( + db_session_with_containers, + tenant_id=mock_env["current_user"].current_tenant_id, + file_id=file_id, + name="old.pdf", + ) + + # Act + DocumentService.rename_document(dataset.id, document.id, new_name) + + # Assert + db_session_with_containers.refresh(document) + db_session_with_containers.refresh(upload_file) + assert document.name == new_name + assert upload_file.name == new_name + + +def test_rename_document_does_not_update_upload_file_when_missing_id(db_session_with_containers, mock_env): + """Rename does not update UploadFile when data_source_info lacks upload_file_id.""" + # Arrange + dataset_id = str(uuid4()) + document_id = str(uuid4()) + new_name = "Another Name" + dataset = make_dataset(db_session_with_containers, dataset_id, mock_env["current_user"].current_tenant_id) + document = make_document( + db_session_with_containers, + document_id=document_id, + dataset_id=dataset.id, + tenant_id=mock_env["current_user"].current_tenant_id, + data_source_info={"url": "https://example.com"}, + ) + untouched_file = make_upload_file( + db_session_with_containers, + tenant_id=mock_env["current_user"].current_tenant_id, + file_id=str(uuid4()), + name="untouched.pdf", + ) + + # Act + DocumentService.rename_document(dataset.id, document.id, new_name) + + # Assert + db_session_with_containers.refresh(document) + db_session_with_containers.refresh(untouched_file) + assert document.name == new_name + assert untouched_file.name == "untouched.pdf" + + +def test_rename_document_dataset_not_found(db_session_with_containers, mock_env): + """Rename raises Dataset not found when dataset id does not exist.""" + # Arrange + missing_dataset_id = str(uuid4()) + + # Act / Assert + with pytest.raises(ValueError, match="Dataset not found"): + DocumentService.rename_document(missing_dataset_id, str(uuid4()), "x") + + +def test_rename_document_not_found(db_session_with_containers, mock_env): + """Rename raises Document not found when document id is absent in the dataset.""" + # Arrange + dataset = make_dataset(db_session_with_containers, str(uuid4()), mock_env["current_user"].current_tenant_id) + + # Act / Assert + with pytest.raises(ValueError, match="Document not found"): + DocumentService.rename_document(dataset.id, str(uuid4()), "x") + + +def test_rename_document_permission_denied_when_tenant_mismatch(db_session_with_containers, mock_env): + """Rename raises No permission when document tenant differs from current_user tenant.""" + # Arrange + dataset = make_dataset(db_session_with_containers, str(uuid4()), mock_env["current_user"].current_tenant_id) + document = make_document( + db_session_with_containers, + dataset_id=dataset.id, + tenant_id=str(uuid4()), + ) + + # Act / Assert + with pytest.raises(ValueError, match="No permission"): + DocumentService.rename_document(dataset.id, document.id, "x") diff --git a/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py b/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py index b2e1ce3b89..c61e37b1e9 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_duplicate_document_indexing_task.py @@ -3,6 +3,7 @@ from unittest.mock import MagicMock, patch import pytest from faker import Faker +from core.indexing_runner import DocumentIsPausedError from enums.cloud_plan import CloudPlan from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models.dataset import Dataset, Document, DocumentSegment @@ -282,7 +283,7 @@ class TestDuplicateDocumentIndexingTasks: return dataset, documents - def test_duplicate_document_indexing_task_success( + def _test_duplicate_document_indexing_task_success( self, db_session_with_containers, mock_external_service_dependencies ): """ @@ -324,7 +325,7 @@ class TestDuplicateDocumentIndexingTasks: processed_documents = call_args[0][0] # First argument should be documents list assert len(processed_documents) == 3 - def test_duplicate_document_indexing_task_with_segment_cleanup( + def _test_duplicate_document_indexing_task_with_segment_cleanup( self, db_session_with_containers, mock_external_service_dependencies ): """ @@ -374,7 +375,7 @@ class TestDuplicateDocumentIndexingTasks: mock_external_service_dependencies["indexing_runner"].assert_called_once() mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() - def test_duplicate_document_indexing_task_dataset_not_found( + def _test_duplicate_document_indexing_task_dataset_not_found( self, db_session_with_containers, mock_external_service_dependencies ): """ @@ -445,7 +446,7 @@ class TestDuplicateDocumentIndexingTasks: processed_documents = call_args[0][0] # First argument should be documents list assert len(processed_documents) == 2 # Only existing documents - def test_duplicate_document_indexing_task_indexing_runner_exception( + def _test_duplicate_document_indexing_task_indexing_runner_exception( self, db_session_with_containers, mock_external_service_dependencies ): """ @@ -486,7 +487,7 @@ class TestDuplicateDocumentIndexingTasks: assert updated_document.indexing_status == "parsing" assert updated_document.processing_started_at is not None - def test_duplicate_document_indexing_task_billing_sandbox_plan_batch_limit( + def _test_duplicate_document_indexing_task_billing_sandbox_plan_batch_limit( self, db_session_with_containers, mock_external_service_dependencies ): """ @@ -549,7 +550,7 @@ class TestDuplicateDocumentIndexingTasks: # Verify indexing runner was not called due to early validation error mock_external_service_dependencies["indexing_runner_instance"].run.assert_not_called() - def test_duplicate_document_indexing_task_billing_vector_space_limit_exceeded( + def _test_duplicate_document_indexing_task_billing_vector_space_limit_exceeded( self, db_session_with_containers, mock_external_service_dependencies ): """ @@ -783,3 +784,90 @@ class TestDuplicateDocumentIndexingTasks: document_ids=document_ids, ) mock_queue.delete_task_key.assert_not_called() + + def test_successful_duplicate_document_indexing( + self, db_session_with_containers, mock_external_service_dependencies + ): + """Test successful duplicate document indexing flow.""" + self._test_duplicate_document_indexing_task_success( + db_session_with_containers, mock_external_service_dependencies + ) + + def test_duplicate_document_indexing_dataset_not_found( + self, db_session_with_containers, mock_external_service_dependencies + ): + """Test duplicate document indexing when dataset is not found.""" + self._test_duplicate_document_indexing_task_dataset_not_found( + db_session_with_containers, mock_external_service_dependencies + ) + + def test_duplicate_document_indexing_with_billing_enabled_sandbox_plan( + self, db_session_with_containers, mock_external_service_dependencies + ): + """Test duplicate document indexing with billing enabled and sandbox plan.""" + self._test_duplicate_document_indexing_task_billing_sandbox_plan_batch_limit( + db_session_with_containers, mock_external_service_dependencies + ) + + def test_duplicate_document_indexing_with_billing_limit_exceeded( + self, db_session_with_containers, mock_external_service_dependencies + ): + """Test duplicate document indexing when billing limit is exceeded.""" + self._test_duplicate_document_indexing_task_billing_vector_space_limit_exceeded( + db_session_with_containers, mock_external_service_dependencies + ) + + def test_duplicate_document_indexing_runner_error( + self, db_session_with_containers, mock_external_service_dependencies + ): + """Test duplicate document indexing when IndexingRunner raises an error.""" + self._test_duplicate_document_indexing_task_indexing_runner_exception( + db_session_with_containers, mock_external_service_dependencies + ) + + def _test_duplicate_document_indexing_task_document_is_paused( + self, db_session_with_containers, mock_external_service_dependencies + ): + """Test duplicate document indexing when document is paused.""" + # Arrange + dataset, documents = self._create_test_dataset_and_documents( + db_session_with_containers, mock_external_service_dependencies, document_count=2 + ) + for document in documents: + document.is_paused = True + db_session_with_containers.add(document) + db_session_with_containers.commit() + + document_ids = [doc.id for doc in documents] + mock_external_service_dependencies["indexing_runner_instance"].run.side_effect = DocumentIsPausedError( + "Document paused" + ) + + # Act + _duplicate_document_indexing_task(dataset.id, document_ids) + db_session_with_containers.expire_all() + + # Assert + for doc_id in document_ids: + updated_document = db_session_with_containers.query(Document).where(Document.id == doc_id).first() + assert updated_document.is_paused is True + assert updated_document.indexing_status == "parsing" + assert updated_document.display_status == "paused" + assert updated_document.processing_started_at is not None + mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() + + def test_duplicate_document_indexing_document_is_paused( + self, db_session_with_containers, mock_external_service_dependencies + ): + """Test duplicate document indexing when document is paused.""" + self._test_duplicate_document_indexing_task_document_is_paused( + db_session_with_containers, mock_external_service_dependencies + ) + + def test_duplicate_document_indexing_cleans_old_segments( + self, db_session_with_containers, mock_external_service_dependencies + ): + """Test that duplicate document indexing cleans old segments.""" + self._test_duplicate_document_indexing_task_with_segment_cleanup( + db_session_with_containers, mock_external_service_dependencies + ) diff --git a/api/tests/unit_tests/services/test_document_service_rename_document.py b/api/tests/unit_tests/services/test_document_service_rename_document.py deleted file mode 100644 index 94850ecb09..0000000000 --- a/api/tests/unit_tests/services/test_document_service_rename_document.py +++ /dev/null @@ -1,176 +0,0 @@ -from types import SimpleNamespace -from unittest.mock import Mock, create_autospec, patch - -import pytest - -from models import Account -from services.dataset_service import DocumentService - - -@pytest.fixture -def mock_env(): - """Patch dependencies used by DocumentService.rename_document. - - Mocks: - - DatasetService.get_dataset - - DocumentService.get_document - - current_user (with current_tenant_id) - - db.session - """ - with ( - patch("services.dataset_service.DatasetService.get_dataset") as get_dataset, - patch("services.dataset_service.DocumentService.get_document") as get_document, - patch("services.dataset_service.current_user", create_autospec(Account, instance=True)) as current_user, - patch("extensions.ext_database.db.session") as db_session, - ): - current_user.current_tenant_id = "tenant-123" - yield { - "get_dataset": get_dataset, - "get_document": get_document, - "current_user": current_user, - "db_session": db_session, - } - - -def make_dataset(dataset_id="dataset-123", tenant_id="tenant-123", built_in_field_enabled=False): - return SimpleNamespace(id=dataset_id, tenant_id=tenant_id, built_in_field_enabled=built_in_field_enabled) - - -def make_document( - document_id="document-123", - dataset_id="dataset-123", - tenant_id="tenant-123", - name="Old Name", - data_source_info=None, - doc_metadata=None, -): - doc = Mock() - doc.id = document_id - doc.dataset_id = dataset_id - doc.tenant_id = tenant_id - doc.name = name - doc.data_source_info = data_source_info or {} - # property-like usage in code relies on a dict - doc.data_source_info_dict = dict(doc.data_source_info) - doc.doc_metadata = dict(doc_metadata or {}) - return doc - - -def test_rename_document_success(mock_env): - dataset_id = "dataset-123" - document_id = "document-123" - new_name = "New Document Name" - - dataset = make_dataset(dataset_id) - document = make_document(document_id=document_id, dataset_id=dataset_id) - - mock_env["get_dataset"].return_value = dataset - mock_env["get_document"].return_value = document - - result = DocumentService.rename_document(dataset_id, document_id, new_name) - - assert result is document - assert document.name == new_name - mock_env["db_session"].add.assert_called_once_with(document) - mock_env["db_session"].commit.assert_called_once() - - -def test_rename_document_with_built_in_fields(mock_env): - dataset_id = "dataset-123" - document_id = "document-123" - new_name = "Renamed" - - dataset = make_dataset(dataset_id, built_in_field_enabled=True) - document = make_document(document_id=document_id, dataset_id=dataset_id, doc_metadata={"foo": "bar"}) - - mock_env["get_dataset"].return_value = dataset - mock_env["get_document"].return_value = document - - DocumentService.rename_document(dataset_id, document_id, new_name) - - assert document.name == new_name - # BuiltInField.document_name == "document_name" in service code - assert document.doc_metadata["document_name"] == new_name - assert document.doc_metadata["foo"] == "bar" - - -def test_rename_document_updates_upload_file_when_present(mock_env): - dataset_id = "dataset-123" - document_id = "document-123" - new_name = "Renamed" - file_id = "file-123" - - dataset = make_dataset(dataset_id) - document = make_document( - document_id=document_id, - dataset_id=dataset_id, - data_source_info={"upload_file_id": file_id}, - ) - - mock_env["get_dataset"].return_value = dataset - mock_env["get_document"].return_value = document - - # Intercept UploadFile rename UPDATE chain - mock_query = Mock() - mock_query.where.return_value = mock_query - mock_env["db_session"].query.return_value = mock_query - - DocumentService.rename_document(dataset_id, document_id, new_name) - - assert document.name == new_name - mock_env["db_session"].query.assert_called() # update executed - - -def test_rename_document_does_not_update_upload_file_when_missing_id(mock_env): - """ - When data_source_info_dict exists but does not contain "upload_file_id", - UploadFile should not be updated. - """ - dataset_id = "dataset-123" - document_id = "document-123" - new_name = "Another Name" - - dataset = make_dataset(dataset_id) - # Ensure data_source_info_dict is truthy but lacks the key - document = make_document( - document_id=document_id, - dataset_id=dataset_id, - data_source_info={"url": "https://example.com"}, - ) - - mock_env["get_dataset"].return_value = dataset - mock_env["get_document"].return_value = document - - DocumentService.rename_document(dataset_id, document_id, new_name) - - assert document.name == new_name - # Should NOT attempt to update UploadFile - mock_env["db_session"].query.assert_not_called() - - -def test_rename_document_dataset_not_found(mock_env): - mock_env["get_dataset"].return_value = None - - with pytest.raises(ValueError, match="Dataset not found"): - DocumentService.rename_document("missing", "doc", "x") - - -def test_rename_document_not_found(mock_env): - dataset = make_dataset("dataset-123") - mock_env["get_dataset"].return_value = dataset - mock_env["get_document"].return_value = None - - with pytest.raises(ValueError, match="Document not found"): - DocumentService.rename_document(dataset.id, "missing", "x") - - -def test_rename_document_permission_denied_when_tenant_mismatch(mock_env): - dataset = make_dataset("dataset-123") - # different tenant than current_user.current_tenant_id - document = make_document(dataset_id=dataset.id, tenant_id="tenant-other") - - mock_env["get_dataset"].return_value = dataset - mock_env["get_document"].return_value = document - - with pytest.raises(ValueError, match="No permission"): - DocumentService.rename_document(dataset.id, document.id, "x") diff --git a/api/tests/unit_tests/tasks/test_duplicate_document_indexing_task.py b/api/tests/unit_tests/tasks/test_duplicate_document_indexing_task.py index 68fb8b748f..f6dbc4275b 100644 --- a/api/tests/unit_tests/tasks/test_duplicate_document_indexing_task.py +++ b/api/tests/unit_tests/tasks/test_duplicate_document_indexing_task.py @@ -1,158 +1,38 @@ -""" -Unit tests for duplicate document indexing tasks. - -This module tests the duplicate document indexing task functionality including: -- Task enqueuing to different queues (normal, priority, tenant-isolated) -- Batch processing of multiple duplicate documents -- Progress tracking through task lifecycle -- Error handling and retry mechanisms -- Cleanup of old document data before re-indexing -""" +"""Unit tests for queue/wrapper behaviors in duplicate document indexing tasks (non-database logic).""" import uuid -from unittest.mock import MagicMock, Mock, patch +from unittest.mock import Mock, patch import pytest -from core.indexing_runner import DocumentIsPausedError, IndexingRunner from core.rag.pipeline.queue import TenantIsolatedTaskQueue -from enums.cloud_plan import CloudPlan -from models.dataset import Dataset, Document, DocumentSegment from tasks.duplicate_document_indexing_task import ( - _duplicate_document_indexing_task, _duplicate_document_indexing_task_with_tenant_queue, duplicate_document_indexing_task, normal_duplicate_document_indexing_task, priority_duplicate_document_indexing_task, ) -# ============================================================================ -# Fixtures -# ============================================================================ - @pytest.fixture def tenant_id(): - """Generate a unique tenant ID for testing.""" return str(uuid.uuid4()) @pytest.fixture def dataset_id(): - """Generate a unique dataset ID for testing.""" return str(uuid.uuid4()) @pytest.fixture def document_ids(): - """Generate a list of document IDs for testing.""" return [str(uuid.uuid4()) for _ in range(3)] -@pytest.fixture -def mock_dataset(dataset_id, tenant_id): - """Create a mock Dataset object.""" - dataset = Mock(spec=Dataset) - dataset.id = dataset_id - dataset.tenant_id = tenant_id - dataset.indexing_technique = "high_quality" - dataset.embedding_model_provider = "openai" - dataset.embedding_model = "text-embedding-ada-002" - return dataset - - -@pytest.fixture -def mock_documents(document_ids, dataset_id): - """Create mock Document objects.""" - documents = [] - for doc_id in document_ids: - doc = Mock(spec=Document) - doc.id = doc_id - doc.dataset_id = dataset_id - doc.indexing_status = "waiting" - doc.error = None - doc.stopped_at = None - doc.processing_started_at = None - doc.doc_form = "text_model" - documents.append(doc) - return documents - - -@pytest.fixture -def mock_document_segments(document_ids): - """Create mock DocumentSegment objects.""" - segments = [] - for doc_id in document_ids: - for i in range(3): - segment = Mock(spec=DocumentSegment) - segment.id = str(uuid.uuid4()) - segment.document_id = doc_id - segment.index_node_id = f"node-{doc_id}-{i}" - segments.append(segment) - return segments - - -@pytest.fixture -def mock_db_session(): - """Mock database session via session_factory.create_session().""" - with patch("tasks.duplicate_document_indexing_task.session_factory", autospec=True) as mock_sf: - session = MagicMock() - # Allow tests to observe session.close() via context manager teardown - session.close = MagicMock() - cm = MagicMock() - cm.__enter__.return_value = session - - def _exit_side_effect(*args, **kwargs): - session.close() - - cm.__exit__.side_effect = _exit_side_effect - mock_sf.create_session.return_value = cm - - query = MagicMock() - session.query.return_value = query - query.where.return_value = query - session.scalars.return_value = MagicMock() - yield session - - -@pytest.fixture -def mock_indexing_runner(): - """Mock IndexingRunner.""" - with patch("tasks.duplicate_document_indexing_task.IndexingRunner", autospec=True) as mock_runner_class: - mock_runner = MagicMock(spec=IndexingRunner) - mock_runner_class.return_value = mock_runner - yield mock_runner - - -@pytest.fixture -def mock_feature_service(): - """Mock FeatureService.""" - with patch("tasks.duplicate_document_indexing_task.FeatureService", autospec=True) as mock_service: - mock_features = Mock() - mock_features.billing = Mock() - mock_features.billing.enabled = False - mock_features.vector_space = Mock() - mock_features.vector_space.size = 0 - mock_features.vector_space.limit = 1000 - mock_service.get_features.return_value = mock_features - yield mock_service - - -@pytest.fixture -def mock_index_processor_factory(): - """Mock IndexProcessorFactory.""" - with patch("tasks.duplicate_document_indexing_task.IndexProcessorFactory", autospec=True) as mock_factory: - mock_processor = MagicMock() - mock_processor.clean = Mock() - mock_factory.return_value.init_index_processor.return_value = mock_processor - yield mock_factory - - @pytest.fixture def mock_tenant_isolated_queue(): - """Mock TenantIsolatedTaskQueue.""" with patch("tasks.duplicate_document_indexing_task.TenantIsolatedTaskQueue", autospec=True) as mock_queue_class: - mock_queue = MagicMock(spec=TenantIsolatedTaskQueue) + mock_queue = Mock(spec=TenantIsolatedTaskQueue) mock_queue.pull_tasks.return_value = [] mock_queue.delete_task_key = Mock() mock_queue.set_task_waiting_time = Mock() @@ -160,11 +40,6 @@ def mock_tenant_isolated_queue(): yield mock_queue -# ============================================================================ -# Tests for deprecated duplicate_document_indexing_task -# ============================================================================ - - class TestDuplicateDocumentIndexingTask: """Tests for the deprecated duplicate_document_indexing_task function.""" @@ -190,258 +65,6 @@ class TestDuplicateDocumentIndexingTask: mock_core_func.assert_called_once_with(dataset_id, document_ids) -# ============================================================================ -# Tests for _duplicate_document_indexing_task core function -# ============================================================================ - - -class TestDuplicateDocumentIndexingTaskCore: - """Tests for the _duplicate_document_indexing_task core function.""" - - def test_successful_duplicate_document_indexing( - self, - mock_db_session, - mock_indexing_runner, - mock_feature_service, - mock_index_processor_factory, - mock_dataset, - mock_documents, - mock_document_segments, - dataset_id, - document_ids, - ): - """Test successful duplicate document indexing flow.""" - # Arrange - # Dataset via query.first() - mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset - # scalars() call sequence: - # 1) documents list - # 2..N) segments per document - - def _scalars_side_effect(*args, **kwargs): - m = MagicMock() - # First call returns documents; subsequent calls return segments - if not hasattr(_scalars_side_effect, "_calls"): - _scalars_side_effect._calls = 0 - if _scalars_side_effect._calls == 0: - m.all.return_value = mock_documents - else: - m.all.return_value = mock_document_segments - _scalars_side_effect._calls += 1 - return m - - mock_db_session.scalars.side_effect = _scalars_side_effect - - # Act - _duplicate_document_indexing_task(dataset_id, document_ids) - - # Assert - # Verify IndexingRunner was called - mock_indexing_runner.run.assert_called_once() - - # Verify all documents were set to parsing status - for doc in mock_documents: - assert doc.indexing_status == "parsing" - assert doc.processing_started_at is not None - - # Verify session operations - assert mock_db_session.commit.called - assert mock_db_session.close.called - - def test_duplicate_document_indexing_dataset_not_found(self, mock_db_session, dataset_id, document_ids): - """Test duplicate document indexing when dataset is not found.""" - # Arrange - mock_db_session.query.return_value.where.return_value.first.return_value = None - - # Act - _duplicate_document_indexing_task(dataset_id, document_ids) - - # Assert - # Should close the session at least once - assert mock_db_session.close.called - - def test_duplicate_document_indexing_with_billing_enabled_sandbox_plan( - self, - mock_db_session, - mock_feature_service, - mock_dataset, - dataset_id, - document_ids, - ): - """Test duplicate document indexing with billing enabled and sandbox plan.""" - # Arrange - mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset - mock_features = mock_feature_service.get_features.return_value - mock_features.billing.enabled = True - mock_features.billing.subscription.plan = CloudPlan.SANDBOX - - # Act - _duplicate_document_indexing_task(dataset_id, document_ids) - - # Assert - # For sandbox plan with multiple documents, should fail - mock_db_session.commit.assert_called() - - def test_duplicate_document_indexing_with_billing_limit_exceeded( - self, - mock_db_session, - mock_feature_service, - mock_dataset, - mock_documents, - dataset_id, - document_ids, - ): - """Test duplicate document indexing when billing limit is exceeded.""" - # Arrange - mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset - # First scalars() -> documents; subsequent -> empty segments - - def _scalars_side_effect(*args, **kwargs): - m = MagicMock() - if not hasattr(_scalars_side_effect, "_calls"): - _scalars_side_effect._calls = 0 - if _scalars_side_effect._calls == 0: - m.all.return_value = mock_documents - else: - m.all.return_value = [] - _scalars_side_effect._calls += 1 - return m - - mock_db_session.scalars.side_effect = _scalars_side_effect - mock_features = mock_feature_service.get_features.return_value - mock_features.billing.enabled = True - mock_features.billing.subscription.plan = CloudPlan.TEAM - mock_features.vector_space.size = 990 - mock_features.vector_space.limit = 1000 - - # Act - _duplicate_document_indexing_task(dataset_id, document_ids) - - # Assert - # Should commit the session - assert mock_db_session.commit.called - # Should close the session - assert mock_db_session.close.called - - def test_duplicate_document_indexing_runner_error( - self, - mock_db_session, - mock_indexing_runner, - mock_feature_service, - mock_index_processor_factory, - mock_dataset, - mock_documents, - dataset_id, - document_ids, - ): - """Test duplicate document indexing when IndexingRunner raises an error.""" - # Arrange - mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset - - def _scalars_side_effect(*args, **kwargs): - m = MagicMock() - if not hasattr(_scalars_side_effect, "_calls"): - _scalars_side_effect._calls = 0 - if _scalars_side_effect._calls == 0: - m.all.return_value = mock_documents - else: - m.all.return_value = [] - _scalars_side_effect._calls += 1 - return m - - mock_db_session.scalars.side_effect = _scalars_side_effect - mock_indexing_runner.run.side_effect = Exception("Indexing error") - - # Act - _duplicate_document_indexing_task(dataset_id, document_ids) - - # Assert - # Should close the session even after error - mock_db_session.close.assert_called_once() - - def test_duplicate_document_indexing_document_is_paused( - self, - mock_db_session, - mock_indexing_runner, - mock_feature_service, - mock_index_processor_factory, - mock_dataset, - mock_documents, - dataset_id, - document_ids, - ): - """Test duplicate document indexing when document is paused.""" - # Arrange - mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset - - def _scalars_side_effect(*args, **kwargs): - m = MagicMock() - if not hasattr(_scalars_side_effect, "_calls"): - _scalars_side_effect._calls = 0 - if _scalars_side_effect._calls == 0: - m.all.return_value = mock_documents - else: - m.all.return_value = [] - _scalars_side_effect._calls += 1 - return m - - mock_db_session.scalars.side_effect = _scalars_side_effect - mock_indexing_runner.run.side_effect = DocumentIsPausedError("Document paused") - - # Act - _duplicate_document_indexing_task(dataset_id, document_ids) - - # Assert - # Should handle DocumentIsPausedError gracefully - mock_db_session.close.assert_called_once() - - def test_duplicate_document_indexing_cleans_old_segments( - self, - mock_db_session, - mock_indexing_runner, - mock_feature_service, - mock_index_processor_factory, - mock_dataset, - mock_documents, - mock_document_segments, - dataset_id, - document_ids, - ): - """Test that duplicate document indexing cleans old segments.""" - # Arrange - mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset - - def _scalars_side_effect(*args, **kwargs): - m = MagicMock() - if not hasattr(_scalars_side_effect, "_calls"): - _scalars_side_effect._calls = 0 - if _scalars_side_effect._calls == 0: - m.all.return_value = mock_documents - else: - m.all.return_value = mock_document_segments - _scalars_side_effect._calls += 1 - return m - - mock_db_session.scalars.side_effect = _scalars_side_effect - mock_processor = mock_index_processor_factory.return_value.init_index_processor.return_value - - # Act - _duplicate_document_indexing_task(dataset_id, document_ids) - - # Assert - # Verify clean was called for each document - assert mock_processor.clean.call_count == len(mock_documents) - - # Verify segments were deleted in batch (DELETE FROM document_segments) - execute_sqls = [" ".join(str(c[0][0]).split()) for c in mock_db_session.execute.call_args_list] - assert any("DELETE FROM document_segments" in sql for sql in execute_sqls) - - -# ============================================================================ -# Tests for tenant queue wrapper function -# ============================================================================ - - class TestDuplicateDocumentIndexingTaskWithTenantQueue: """Tests for _duplicate_document_indexing_task_with_tenant_queue function.""" @@ -536,11 +159,6 @@ class TestDuplicateDocumentIndexingTaskWithTenantQueue: mock_tenant_isolated_queue.pull_tasks.assert_called_once() -# ============================================================================ -# Tests for normal_duplicate_document_indexing_task -# ============================================================================ - - class TestNormalDuplicateDocumentIndexingTask: """Tests for normal_duplicate_document_indexing_task function.""" @@ -581,11 +199,6 @@ class TestNormalDuplicateDocumentIndexingTask: ) -# ============================================================================ -# Tests for priority_duplicate_document_indexing_task -# ============================================================================ - - class TestPriorityDuplicateDocumentIndexingTask: """Tests for priority_duplicate_document_indexing_task function.""" diff --git a/api/uv.lock b/api/uv.lock index 42b010286b..5a9ac096dc 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -441,14 +441,14 @@ wheels = [ [[package]] name = "authlib" -version = "1.6.6" +version = "1.6.7" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cryptography" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bb/9b/b1661026ff24bc641b76b78c5222d614776b0c085bcfdac9bd15a1cb4b35/authlib-1.6.6.tar.gz", hash = "sha256:45770e8e056d0f283451d9996fbb59b70d45722b45d854d58f32878d0a40c38e", size = 164894, upload-time = "2025-12-12T08:01:41.464Z" } +sdist = { url = "https://files.pythonhosted.org/packages/49/dc/ed1681bf1339dd6ea1ce56136bad4baabc6f7ad466e375810702b0237047/authlib-1.6.7.tar.gz", hash = "sha256:dbf10100011d1e1b34048c9d120e83f13b35d69a826ae762b93d2fb5aafc337b", size = 164950, upload-time = "2026-02-06T14:04:14.171Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/54/51/321e821856452f7386c4e9df866f196720b1ad0c5ea1623ea7399969ae3b/authlib-1.6.6-py2.py3-none-any.whl", hash = "sha256:7d9e9bc535c13974313a87f53e8430eb6ea3d1cf6ae4f6efcd793f2e949143fd", size = 244005, upload-time = "2025-12-12T08:01:40.209Z" }, + { url = "https://files.pythonhosted.org/packages/f8/00/3ed12264094ec91f534fae429945efbaa9f8c666f3aa7061cc3b2a26a0cd/authlib-1.6.7-py2.py3-none-any.whl", hash = "sha256:c637340d9a02789d2efa1d003a7437d10d3e565237bcb5fcbc6c134c7b95bab0", size = 244115, upload-time = "2026-02-06T14:04:12.141Z" }, ] [[package]] @@ -1989,11 +1989,11 @@ wheels = [ [[package]] name = "fickling" -version = "0.1.8" +version = "0.1.9" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/88/be/cd91e3921f064230ac9462479e4647fb91a7b0d01677103fce89f52e3042/fickling-0.1.8.tar.gz", hash = "sha256:25a0bc7acda76176a9087b405b05f7f5021f76079aa26c6fe3270855ec57d9bf", size = 336756, upload-time = "2026-02-21T00:57:26.106Z" } +sdist = { url = "https://files.pythonhosted.org/packages/25/bd/ca7127df0201596b0b30f9ab3d36e565bb9d6f8f4da1560758b817e81b65/fickling-0.1.9.tar.gz", hash = "sha256:bb518c2fd833555183bc46b6903bb4022f3ae0436a69c3fb149cfc75eebaac33", size = 336940, upload-time = "2026-03-03T23:32:19.449Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/02/92/af72f783ac57fa2452f8f921c9441366c42ae1f03f5af41718445114c82f/fickling-0.1.8-py3-none-any.whl", hash = "sha256:97218785cfe00a93150808dcf9e3eb512371e0484e3ce0b05bc460b97240f292", size = 52613, upload-time = "2026-02-21T00:57:24.82Z" }, + { url = "https://files.pythonhosted.org/packages/92/49/c597bad508c74917901432b41ae5a8f036839a7fb8d0d29a89765f5d3643/fickling-0.1.9-py3-none-any.whl", hash = "sha256:ccc3ce3b84733406ade2fe749717f6e428047335157c6431eefd3e7e970a06d1", size = 52786, upload-time = "2026-03-03T23:32:17.533Z" }, ] [[package]] diff --git a/web/app/(commonLayout)/layout.tsx b/web/app/(commonLayout)/layout.tsx index abd5dd96fd..bd067fde6a 100644 --- a/web/app/(commonLayout)/layout.tsx +++ b/web/app/(commonLayout)/layout.tsx @@ -8,7 +8,7 @@ import GotoAnything from '@/app/components/goto-anything' import Header from '@/app/components/header' import HeaderWrapper from '@/app/components/header/header-wrapper' import ReadmePanel from '@/app/components/plugins/readme-panel' -import { AppContextProvider } from '@/context/app-context' +import { AppContextProvider } from '@/context/app-context-provider' import { EventEmitterContextProvider } from '@/context/event-emitter' import { ModalContextProvider } from '@/context/modal-context' import { ProviderContextProvider } from '@/context/provider-context' diff --git a/web/app/account/(commonLayout)/layout.tsx b/web/app/account/(commonLayout)/layout.tsx index e4125015d9..47fb47b02b 100644 --- a/web/app/account/(commonLayout)/layout.tsx +++ b/web/app/account/(commonLayout)/layout.tsx @@ -4,7 +4,7 @@ import { AppInitializer } from '@/app/components/app-initializer' import AmplitudeProvider from '@/app/components/base/amplitude' import GA, { GaType } from '@/app/components/base/ga' import HeaderWrapper from '@/app/components/header/header-wrapper' -import { AppContextProvider } from '@/context/app-context' +import { AppContextProvider } from '@/context/app-context-provider' import { EventEmitterContextProvider } from '@/context/event-emitter' import { ModalContextProvider } from '@/context/modal-context' import { ProviderContextProvider } from '@/context/provider-context' diff --git a/web/app/account/oauth/authorize/layout.tsx b/web/app/account/oauth/authorize/layout.tsx index 189971b16f..7f6b270b45 100644 --- a/web/app/account/oauth/authorize/layout.tsx +++ b/web/app/account/oauth/authorize/layout.tsx @@ -2,7 +2,7 @@ import Loading from '@/app/components/base/loading' import Header from '@/app/signin/_header' -import { AppContextProvider } from '@/context/app-context' +import { AppContextProvider } from '@/context/app-context-provider' import { useGlobalPublicStore } from '@/context/global-public-context' import useDocumentTitle from '@/hooks/use-document-title' import { useIsLogin } from '@/service/use-common' @@ -38,7 +38,7 @@ export default function SignInLayout({ children }: any) { {systemFeatures.branding.enabled === false && ( -