refactor: reuse redis connection instead of create new one (#32678)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
wangxiaolei
2026-03-09 15:53:21 +08:00
committed by GitHub
parent cbb19cce39
commit 9970f4449a
10 changed files with 1360 additions and 112 deletions

View File

@ -322,11 +322,14 @@ class TestDatasetIndexingTaskIntegration:
_document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)
# Assert
task_dispatch_spy.delay.assert_called_once_with(
tenant_id=next_task["tenant_id"],
dataset_id=next_task["dataset_id"],
document_ids=next_task["document_ids"],
)
# apply_async is used by implementation; assert it was called once with expected kwargs
assert task_dispatch_spy.apply_async.call_count == 1
call_kwargs = task_dispatch_spy.apply_async.call_args.kwargs.get("kwargs", {})
assert call_kwargs == {
"tenant_id": next_task["tenant_id"],
"dataset_id": next_task["dataset_id"],
"document_ids": next_task["document_ids"],
}
set_waiting_spy.assert_called_once()
delete_key_spy.assert_not_called()
@ -352,7 +355,7 @@ class TestDatasetIndexingTaskIntegration:
_document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)
# Assert
task_dispatch_spy.delay.assert_not_called()
task_dispatch_spy.apply_async.assert_not_called()
delete_key_spy.assert_called_once()
def test_validation_failure_sets_error_status_when_vector_space_at_limit(
@ -447,7 +450,7 @@ class TestDatasetIndexingTaskIntegration:
_document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)
# Assert
task_dispatch_spy.delay.assert_called_once()
task_dispatch_spy.apply_async.assert_called_once()
def test_sessions_close_on_successful_indexing(
self,
@ -534,7 +537,7 @@ class TestDatasetIndexingTaskIntegration:
_document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)
# Assert
assert task_dispatch_spy.delay.call_count == concurrency_limit
assert task_dispatch_spy.apply_async.call_count == concurrency_limit
assert set_waiting_spy.call_count == concurrency_limit
def test_task_queue_fifo_ordering(self, db_session_with_containers, patched_external_dependencies):
@ -565,9 +568,10 @@ class TestDatasetIndexingTaskIntegration:
_document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)
# Assert
assert task_dispatch_spy.delay.call_count == 3
assert task_dispatch_spy.apply_async.call_count == 3
for index, expected_task in enumerate(ordered_tasks):
assert task_dispatch_spy.delay.call_args_list[index].kwargs["document_ids"] == expected_task["document_ids"]
call_kwargs = task_dispatch_spy.apply_async.call_args_list[index].kwargs.get("kwargs", {})
assert call_kwargs.get("document_ids") == expected_task["document_ids"]
def test_billing_disabled_skips_limit_checks(self, db_session_with_containers, patched_external_dependencies):
"""Skip limit checks when billing feature is disabled."""

View File

@ -762,11 +762,12 @@ class TestDocumentIndexingTasks:
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
# Verify task function was called for each waiting task
assert mock_task_func.delay.call_count == 1
assert mock_task_func.apply_async.call_count == 1
# Verify correct parameters for each call
calls = mock_task_func.delay.call_args_list
assert calls[0][1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]}
calls = mock_task_func.apply_async.call_args_list
sent_kwargs = calls[0][1]["kwargs"]
assert sent_kwargs == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]}
# Verify queue is empty after processing (tasks were pulled)
remaining_tasks = queue.pull_tasks(count=10) # Pull more than we added
@ -830,11 +831,15 @@ class TestDocumentIndexingTasks:
assert updated_document.processing_started_at is not None
# Verify waiting task was still processed despite core processing error
mock_task_func.delay.assert_called_once()
mock_task_func.apply_async.assert_called_once()
# Verify correct parameters for the call
call = mock_task_func.delay.call_args
assert call[1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]}
call = mock_task_func.apply_async.call_args
assert call[1]["kwargs"] == {
"tenant_id": tenant_id,
"dataset_id": dataset_id,
"document_ids": ["waiting-doc-1"],
}
# Verify queue is empty after processing (task was pulled)
remaining_tasks = queue.pull_tasks(count=10)
@ -896,9 +901,13 @@ class TestDocumentIndexingTasks:
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
# Verify only tenant1's waiting task was processed
mock_task_func.delay.assert_called_once()
call = mock_task_func.delay.call_args
assert call[1] == {"tenant_id": tenant1_id, "dataset_id": dataset1_id, "document_ids": ["tenant1-doc-1"]}
mock_task_func.apply_async.assert_called_once()
call = mock_task_func.apply_async.call_args
assert call[1]["kwargs"] == {
"tenant_id": tenant1_id,
"dataset_id": dataset1_id,
"document_ids": ["tenant1-doc-1"],
}
# Verify tenant1's queue is empty
remaining_tasks1 = queue1.pull_tasks(count=10)

View File

@ -1,6 +1,6 @@
import json
import uuid
from unittest.mock import patch
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
@ -388,8 +388,10 @@ class TestRagPipelineRunTasks:
# Set the task key to indicate there are waiting tasks (legacy behavior)
redis_client.set(legacy_task_key, 1, ex=60 * 60)
# Mock the task function calls
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
# Mock the Celery group scheduling used by the implementation
with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
mock_group.return_value.apply_async = MagicMock()
# Act: Execute the priority task with new code but legacy queue data
rag_pipeline_run_task(file_id, tenant.id)
@ -398,13 +400,14 @@ class TestRagPipelineRunTasks:
mock_file_service["delete_file"].assert_called_once_with(file_id)
assert mock_pipeline_generator.call_count == 1
# Verify waiting tasks were processed, pull 1 task a time by default
assert mock_delay.call_count == 1
# Verify waiting tasks were processed via group, pull 1 task a time by default
assert mock_group.return_value.apply_async.called
# Verify correct parameters for the call
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == legacy_file_ids[0]
assert call_kwargs.get("tenant_id") == tenant.id
# Verify correct parameters for the first scheduled job signature
jobs = mock_group.call_args.args[0] if mock_group.call_args else []
first_kwargs = jobs[0].kwargs if jobs else {}
assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == legacy_file_ids[0]
assert first_kwargs.get("tenant_id") == tenant.id
# Verify that new code can process legacy queue entries
# The new TenantIsolatedTaskQueue should be able to read from the legacy format
@ -446,8 +449,10 @@ class TestRagPipelineRunTasks:
waiting_file_ids = [str(uuid.uuid4()) for _ in range(3)]
queue.push_tasks(waiting_file_ids)
# Mock the task function calls
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
# Mock the Celery group scheduling used by the implementation
with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
mock_group.return_value.apply_async = MagicMock()
# Act: Execute the regular task
rag_pipeline_run_task(file_id, tenant.id)
@ -456,13 +461,14 @@ class TestRagPipelineRunTasks:
mock_file_service["delete_file"].assert_called_once_with(file_id)
assert mock_pipeline_generator.call_count == 1
# Verify waiting tasks were processed, pull 1 task a time by default
assert mock_delay.call_count == 1
# Verify waiting tasks were processed via group.apply_async
assert mock_group.return_value.apply_async.called
# Verify correct parameters for the call
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0]
assert call_kwargs.get("tenant_id") == tenant.id
# Verify correct parameters for the first scheduled job signature
jobs = mock_group.call_args.args[0] if mock_group.call_args else []
first_kwargs = jobs[0].kwargs if jobs else {}
assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0]
assert first_kwargs.get("tenant_id") == tenant.id
# Verify queue still has remaining tasks (only 1 was pulled)
remaining_tasks = queue.pull_tasks(count=10)
@ -557,8 +563,10 @@ class TestRagPipelineRunTasks:
waiting_file_id = str(uuid.uuid4())
queue.push_tasks([waiting_file_id])
# Mock the task function calls
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
# Mock the Celery group scheduling used by the implementation
with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
mock_group.return_value.apply_async = MagicMock()
# Act: Execute the regular task (should not raise exception)
rag_pipeline_run_task(file_id, tenant.id)
@ -569,12 +577,13 @@ class TestRagPipelineRunTasks:
assert mock_pipeline_generator.call_count == 1
# Verify waiting task was still processed despite core processing error
mock_delay.assert_called_once()
assert mock_group.return_value.apply_async.called
# Verify correct parameters for the call
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
assert call_kwargs.get("tenant_id") == tenant.id
# Verify correct parameters for the first scheduled job signature
jobs = mock_group.call_args.args[0] if mock_group.call_args else []
first_kwargs = jobs[0].kwargs if jobs else {}
assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
assert first_kwargs.get("tenant_id") == tenant.id
# Verify queue is empty after processing (task was pulled)
remaining_tasks = queue.pull_tasks(count=10)
@ -684,8 +693,10 @@ class TestRagPipelineRunTasks:
queue1.push_tasks([waiting_file_id1])
queue2.push_tasks([waiting_file_id2])
# Mock the task function calls
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
# Mock the Celery group scheduling used by the implementation
with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
mock_group.return_value.apply_async = MagicMock()
# Act: Execute the regular task for tenant1 only
rag_pipeline_run_task(file_id1, tenant1.id)
@ -694,11 +705,12 @@ class TestRagPipelineRunTasks:
assert mock_file_service["delete_file"].call_count == 1
assert mock_pipeline_generator.call_count == 1
# Verify only tenant1's waiting task was processed
mock_delay.assert_called_once()
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1
assert call_kwargs.get("tenant_id") == tenant1.id
# Verify only tenant1's waiting task was processed (via group)
assert mock_group.return_value.apply_async.called
jobs = mock_group.call_args.args[0] if mock_group.call_args else []
first_kwargs = jobs[0].kwargs if jobs else {}
assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1
assert first_kwargs.get("tenant_id") == tenant1.id
# Verify tenant1's queue is empty
remaining_tasks1 = queue1.pull_tasks(count=10)
@ -913,8 +925,10 @@ class TestRagPipelineRunTasks:
waiting_file_id = str(uuid.uuid4())
queue.push_tasks([waiting_file_id])
# Mock the task function calls
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
# Mock the Celery group scheduling used by the implementation
with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
mock_group.return_value.apply_async = MagicMock()
# Act & Assert: Execute the regular task (should raise Exception)
with pytest.raises(Exception, match="File not found"):
rag_pipeline_run_task(file_id, tenant.id)
@ -924,12 +938,13 @@ class TestRagPipelineRunTasks:
mock_pipeline_generator.assert_not_called()
# Verify waiting task was still processed despite file error
mock_delay.assert_called_once()
assert mock_group.return_value.apply_async.called
# Verify correct parameters for the call
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
assert call_kwargs.get("tenant_id") == tenant.id
# Verify correct parameters for the first scheduled job signature
jobs = mock_group.call_args.args[0] if mock_group.call_args else []
first_kwargs = jobs[0].kwargs if jobs else {}
assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
assert first_kwargs.get("tenant_id") == tenant.id
# Verify queue is empty after processing (task was pulled)
remaining_tasks = queue.pull_tasks(count=10)