refactor: reuse redis connection instead of create new one (#32678)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-05-04 09:28:04 +08:00 · 2026-03-09 15:53:21 +08:00
parent cbb19cce39
commit 9970f4449a
10 changed files with 1360 additions and 112 deletions
--- a/api/tests/test_containers_integration_tests/tasks/test_dataset_indexing_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_dataset_indexing_task.py
@ -322,11 +322,14 @@ class TestDatasetIndexingTaskIntegration:
            _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)

        # Assert
-        task_dispatch_spy.delay.assert_called_once_with(
-            tenant_id=next_task["tenant_id"],
-            dataset_id=next_task["dataset_id"],
-            document_ids=next_task["document_ids"],
-        )
+        # apply_async is used by implementation; assert it was called once with expected kwargs
+        assert task_dispatch_spy.apply_async.call_count == 1
+        call_kwargs = task_dispatch_spy.apply_async.call_args.kwargs.get("kwargs", {})
+        assert call_kwargs == {
+            "tenant_id": next_task["tenant_id"],
+            "dataset_id": next_task["dataset_id"],
+            "document_ids": next_task["document_ids"],
+        }
        set_waiting_spy.assert_called_once()
        delete_key_spy.assert_not_called()

@ -352,7 +355,7 @@ class TestDatasetIndexingTaskIntegration:
            _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)

        # Assert
-        task_dispatch_spy.delay.assert_not_called()
+        task_dispatch_spy.apply_async.assert_not_called()
        delete_key_spy.assert_called_once()

    def test_validation_failure_sets_error_status_when_vector_space_at_limit(
@ -447,7 +450,7 @@ class TestDatasetIndexingTaskIntegration:
            _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)

        # Assert
-        task_dispatch_spy.delay.assert_called_once()
+        task_dispatch_spy.apply_async.assert_called_once()

    def test_sessions_close_on_successful_indexing(
        self,
@ -534,7 +537,7 @@ class TestDatasetIndexingTaskIntegration:
            _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)

        # Assert
-        assert task_dispatch_spy.delay.call_count == concurrency_limit
+        assert task_dispatch_spy.apply_async.call_count == concurrency_limit
        assert set_waiting_spy.call_count == concurrency_limit

    def test_task_queue_fifo_ordering(self, db_session_with_containers, patched_external_dependencies):
@ -565,9 +568,10 @@ class TestDatasetIndexingTaskIntegration:
            _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)

        # Assert
-        assert task_dispatch_spy.delay.call_count == 3
+        assert task_dispatch_spy.apply_async.call_count == 3
        for index, expected_task in enumerate(ordered_tasks):
-            assert task_dispatch_spy.delay.call_args_list[index].kwargs["document_ids"] == expected_task["document_ids"]
+            call_kwargs = task_dispatch_spy.apply_async.call_args_list[index].kwargs.get("kwargs", {})
+            assert call_kwargs.get("document_ids") == expected_task["document_ids"]

    def test_billing_disabled_skips_limit_checks(self, db_session_with_containers, patched_external_dependencies):
        """Skip limit checks when billing feature is disabled."""
--- a/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py
@ -762,11 +762,12 @@ class TestDocumentIndexingTasks:
        mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()

        # Verify task function was called for each waiting task
-        assert mock_task_func.delay.call_count == 1
+        assert mock_task_func.apply_async.call_count == 1

        # Verify correct parameters for each call
-        calls = mock_task_func.delay.call_args_list
-        assert calls[0][1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]}
+        calls = mock_task_func.apply_async.call_args_list
+        sent_kwargs = calls[0][1]["kwargs"]
+        assert sent_kwargs == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]}

        # Verify queue is empty after processing (tasks were pulled)
        remaining_tasks = queue.pull_tasks(count=10)  # Pull more than we added
@ -830,11 +831,15 @@ class TestDocumentIndexingTasks:
            assert updated_document.processing_started_at is not None

        # Verify waiting task was still processed despite core processing error
-        mock_task_func.delay.assert_called_once()
+        mock_task_func.apply_async.assert_called_once()

        # Verify correct parameters for the call
-        call = mock_task_func.delay.call_args
-        assert call[1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]}
+        call = mock_task_func.apply_async.call_args
+        assert call[1]["kwargs"] == {
+            "tenant_id": tenant_id,
+            "dataset_id": dataset_id,
+            "document_ids": ["waiting-doc-1"],
+        }

        # Verify queue is empty after processing (task was pulled)
        remaining_tasks = queue.pull_tasks(count=10)
@ -896,9 +901,13 @@ class TestDocumentIndexingTasks:
        mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()

        # Verify only tenant1's waiting task was processed
-        mock_task_func.delay.assert_called_once()
-        call = mock_task_func.delay.call_args
-        assert call[1] == {"tenant_id": tenant1_id, "dataset_id": dataset1_id, "document_ids": ["tenant1-doc-1"]}
+        mock_task_func.apply_async.assert_called_once()
+        call = mock_task_func.apply_async.call_args
+        assert call[1]["kwargs"] == {
+            "tenant_id": tenant1_id,
+            "dataset_id": dataset1_id,
+            "document_ids": ["tenant1-doc-1"],
+        }

        # Verify tenant1's queue is empty
        remaining_tasks1 = queue1.pull_tasks(count=10)
--- a/api/tests/test_containers_integration_tests/tasks/test_rag_pipeline_run_tasks.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_rag_pipeline_run_tasks.py
@ -1,6 +1,6 @@
 import json
 import uuid
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch

 import pytest
 from faker import Faker
@ -388,8 +388,10 @@ class TestRagPipelineRunTasks:
        # Set the task key to indicate there are waiting tasks (legacy behavior)
        redis_client.set(legacy_task_key, 1, ex=60 * 60)

-        # Mock the task function calls
-        with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
+        # Mock the Celery group scheduling used by the implementation
+        with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
+            mock_group.return_value.apply_async = MagicMock()
+
            # Act: Execute the priority task with new code but legacy queue data
            rag_pipeline_run_task(file_id, tenant.id)

@ -398,13 +400,14 @@ class TestRagPipelineRunTasks:
            mock_file_service["delete_file"].assert_called_once_with(file_id)
            assert mock_pipeline_generator.call_count == 1

-            # Verify waiting tasks were processed, pull 1 task a time by default
-            assert mock_delay.call_count == 1
+            # Verify waiting tasks were processed via group, pull 1 task a time by default
+            assert mock_group.return_value.apply_async.called

-            # Verify correct parameters for the call
-            call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
-            assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == legacy_file_ids[0]
-            assert call_kwargs.get("tenant_id") == tenant.id
+            # Verify correct parameters for the first scheduled job signature
+            jobs = mock_group.call_args.args[0] if mock_group.call_args else []
+            first_kwargs = jobs[0].kwargs if jobs else {}
+            assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == legacy_file_ids[0]
+            assert first_kwargs.get("tenant_id") == tenant.id

            # Verify that new code can process legacy queue entries
            # The new TenantIsolatedTaskQueue should be able to read from the legacy format
@ -446,8 +449,10 @@ class TestRagPipelineRunTasks:
        waiting_file_ids = [str(uuid.uuid4()) for _ in range(3)]
        queue.push_tasks(waiting_file_ids)

-        # Mock the task function calls
-        with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
+        # Mock the Celery group scheduling used by the implementation
+        with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
+            mock_group.return_value.apply_async = MagicMock()
+
            # Act: Execute the regular task
            rag_pipeline_run_task(file_id, tenant.id)

@ -456,13 +461,14 @@ class TestRagPipelineRunTasks:
            mock_file_service["delete_file"].assert_called_once_with(file_id)
            assert mock_pipeline_generator.call_count == 1

-            # Verify waiting tasks were processed, pull 1 task a time by default
-            assert mock_delay.call_count == 1
+            # Verify waiting tasks were processed via group.apply_async
+            assert mock_group.return_value.apply_async.called

-            # Verify correct parameters for the call
-            call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
-            assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0]
-            assert call_kwargs.get("tenant_id") == tenant.id
+            # Verify correct parameters for the first scheduled job signature
+            jobs = mock_group.call_args.args[0] if mock_group.call_args else []
+            first_kwargs = jobs[0].kwargs if jobs else {}
+            assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0]
+            assert first_kwargs.get("tenant_id") == tenant.id

            # Verify queue still has remaining tasks (only 1 was pulled)
            remaining_tasks = queue.pull_tasks(count=10)
@ -557,8 +563,10 @@ class TestRagPipelineRunTasks:
        waiting_file_id = str(uuid.uuid4())
        queue.push_tasks([waiting_file_id])

-        # Mock the task function calls
-        with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
+        # Mock the Celery group scheduling used by the implementation
+        with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
+            mock_group.return_value.apply_async = MagicMock()
+
            # Act: Execute the regular task (should not raise exception)
            rag_pipeline_run_task(file_id, tenant.id)

@ -569,12 +577,13 @@ class TestRagPipelineRunTasks:
            assert mock_pipeline_generator.call_count == 1

            # Verify waiting task was still processed despite core processing error
-            mock_delay.assert_called_once()
+            assert mock_group.return_value.apply_async.called

-            # Verify correct parameters for the call
-            call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
-            assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
-            assert call_kwargs.get("tenant_id") == tenant.id
+            # Verify correct parameters for the first scheduled job signature
+            jobs = mock_group.call_args.args[0] if mock_group.call_args else []
+            first_kwargs = jobs[0].kwargs if jobs else {}
+            assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
+            assert first_kwargs.get("tenant_id") == tenant.id

            # Verify queue is empty after processing (task was pulled)
            remaining_tasks = queue.pull_tasks(count=10)
@ -684,8 +693,10 @@ class TestRagPipelineRunTasks:
        queue1.push_tasks([waiting_file_id1])
        queue2.push_tasks([waiting_file_id2])

-        # Mock the task function calls
-        with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
+        # Mock the Celery group scheduling used by the implementation
+        with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
+            mock_group.return_value.apply_async = MagicMock()
+
            # Act: Execute the regular task for tenant1 only
            rag_pipeline_run_task(file_id1, tenant1.id)

@ -694,11 +705,12 @@ class TestRagPipelineRunTasks:
            assert mock_file_service["delete_file"].call_count == 1
            assert mock_pipeline_generator.call_count == 1

-            # Verify only tenant1's waiting task was processed
-            mock_delay.assert_called_once()
-            call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
-            assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1
-            assert call_kwargs.get("tenant_id") == tenant1.id
+            # Verify only tenant1's waiting task was processed (via group)
+            assert mock_group.return_value.apply_async.called
+            jobs = mock_group.call_args.args[0] if mock_group.call_args else []
+            first_kwargs = jobs[0].kwargs if jobs else {}
+            assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1
+            assert first_kwargs.get("tenant_id") == tenant1.id

            # Verify tenant1's queue is empty
            remaining_tasks1 = queue1.pull_tasks(count=10)
@ -913,8 +925,10 @@ class TestRagPipelineRunTasks:
        waiting_file_id = str(uuid.uuid4())
        queue.push_tasks([waiting_file_id])

-        # Mock the task function calls
-        with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
+        # Mock the Celery group scheduling used by the implementation
+        with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
+            mock_group.return_value.apply_async = MagicMock()
+
            # Act & Assert: Execute the regular task (should raise Exception)
            with pytest.raises(Exception, match="File not found"):
                rag_pipeline_run_task(file_id, tenant.id)
@ -924,12 +938,13 @@ class TestRagPipelineRunTasks:
            mock_pipeline_generator.assert_not_called()

            # Verify waiting task was still processed despite file error
-            mock_delay.assert_called_once()
+            assert mock_group.return_value.apply_async.called

-            # Verify correct parameters for the call
-            call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
-            assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
-            assert call_kwargs.get("tenant_id") == tenant.id
+            # Verify correct parameters for the first scheduled job signature
+            jobs = mock_group.call_args.args[0] if mock_group.call_args else []
+            first_kwargs = jobs[0].kwargs if jobs else {}
+            assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
+            assert first_kwargs.get("tenant_id") == tenant.id

            # Verify queue is empty after processing (task was pulled)
            remaining_tasks = queue.pull_tasks(count=10)