test: migrate Dataset/Document property tests to testcontainers (#32487)

Co-authored-by: KinomotoMio <200703522+KinomotoMio@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
木之本澪
2026-02-24 00:23:48 +08:00
committed by GitHub
parent f76ee7cfa4
commit 737575d637
2 changed files with 272 additions and 151 deletions

View File

@ -12,7 +12,7 @@ This test suite covers:
import json
import pickle
from datetime import UTC, datetime
from unittest.mock import MagicMock, patch
from unittest.mock import patch
from uuid import uuid4
from models.dataset import (
@ -954,156 +954,6 @@ class TestChildChunk:
assert child_chunk.index_node_hash == index_node_hash
class TestDatasetDocumentCascadeDeletes:
"""Test suite for Dataset-Document cascade delete operations."""
def test_dataset_with_documents_relationship(self):
"""Test dataset can track its documents."""
# Arrange
dataset_id = str(uuid4())
dataset = Dataset(
tenant_id=str(uuid4()),
name="Test Dataset",
data_source_type="upload_file",
created_by=str(uuid4()),
)
dataset.id = dataset_id
# Mock the database session query
mock_query = MagicMock()
mock_query.where.return_value.scalar.return_value = 3
with patch("models.dataset.db.session.query", return_value=mock_query):
# Act
total_docs = dataset.total_documents
# Assert
assert total_docs == 3
def test_dataset_available_documents_count(self):
"""Test dataset can count available documents."""
# Arrange
dataset_id = str(uuid4())
dataset = Dataset(
tenant_id=str(uuid4()),
name="Test Dataset",
data_source_type="upload_file",
created_by=str(uuid4()),
)
dataset.id = dataset_id
# Mock the database session query
mock_query = MagicMock()
mock_query.where.return_value.scalar.return_value = 2
with patch("models.dataset.db.session.query", return_value=mock_query):
# Act
available_docs = dataset.total_available_documents
# Assert
assert available_docs == 2
def test_dataset_word_count_aggregation(self):
"""Test dataset can aggregate word count from documents."""
# Arrange
dataset_id = str(uuid4())
dataset = Dataset(
tenant_id=str(uuid4()),
name="Test Dataset",
data_source_type="upload_file",
created_by=str(uuid4()),
)
dataset.id = dataset_id
# Mock the database session query
mock_query = MagicMock()
mock_query.with_entities.return_value.where.return_value.scalar.return_value = 5000
with patch("models.dataset.db.session.query", return_value=mock_query):
# Act
total_words = dataset.word_count
# Assert
assert total_words == 5000
def test_dataset_available_segment_count(self):
"""Test dataset can count available segments."""
# Arrange
dataset_id = str(uuid4())
dataset = Dataset(
tenant_id=str(uuid4()),
name="Test Dataset",
data_source_type="upload_file",
created_by=str(uuid4()),
)
dataset.id = dataset_id
# Mock the database session query
mock_query = MagicMock()
mock_query.where.return_value.scalar.return_value = 15
with patch("models.dataset.db.session.query", return_value=mock_query):
# Act
segment_count = dataset.available_segment_count
# Assert
assert segment_count == 15
def test_document_segment_count_property(self):
"""Test document can count its segments."""
# Arrange
document_id = str(uuid4())
document = Document(
tenant_id=str(uuid4()),
dataset_id=str(uuid4()),
position=1,
data_source_type="upload_file",
batch="batch_001",
name="test.pdf",
created_from="web",
created_by=str(uuid4()),
)
document.id = document_id
# Mock the database session query
mock_query = MagicMock()
mock_query.where.return_value.count.return_value = 10
with patch("models.dataset.db.session.query", return_value=mock_query):
# Act
segment_count = document.segment_count
# Assert
assert segment_count == 10
def test_document_hit_count_aggregation(self):
"""Test document can aggregate hit count from segments."""
# Arrange
document_id = str(uuid4())
document = Document(
tenant_id=str(uuid4()),
dataset_id=str(uuid4()),
position=1,
data_source_type="upload_file",
batch="batch_001",
name="test.pdf",
created_from="web",
created_by=str(uuid4()),
)
document.id = document_id
# Mock the database session query
mock_query = MagicMock()
mock_query.with_entities.return_value.where.return_value.scalar.return_value = 25
with patch("models.dataset.db.session.query", return_value=mock_query):
# Act
hit_count = document.hit_count
# Assert
assert hit_count == 25
class TestDocumentSegmentNavigation:
"""Test suite for DocumentSegment navigation properties."""