mirror of
https://github.com/langgenius/dify.git
synced 2026-05-04 01:18:05 +08:00
refactor: use EnumText for dataset and replace string literals 4 (#33606)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
@ -25,6 +25,13 @@ from models.dataset import (
|
||||
DocumentSegment,
|
||||
Embedding,
|
||||
)
|
||||
from models.enums import (
|
||||
DataSourceType,
|
||||
DocumentCreatedFrom,
|
||||
IndexingStatus,
|
||||
ProcessRuleMode,
|
||||
SegmentStatus,
|
||||
)
|
||||
|
||||
|
||||
class TestDatasetModelValidation:
|
||||
@ -40,14 +47,14 @@ class TestDatasetModelValidation:
|
||||
dataset = Dataset(
|
||||
tenant_id=tenant_id,
|
||||
name="Test Dataset",
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
created_by=created_by,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert dataset.name == "Test Dataset"
|
||||
assert dataset.tenant_id == tenant_id
|
||||
assert dataset.data_source_type == "upload_file"
|
||||
assert dataset.data_source_type == DataSourceType.UPLOAD_FILE
|
||||
assert dataset.created_by == created_by
|
||||
# Note: Default values are set by database, not by model instantiation
|
||||
|
||||
@ -57,7 +64,7 @@ class TestDatasetModelValidation:
|
||||
dataset = Dataset(
|
||||
tenant_id=str(uuid4()),
|
||||
name="Test Dataset",
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
created_by=str(uuid4()),
|
||||
description="Test description",
|
||||
indexing_technique="high_quality",
|
||||
@ -77,14 +84,14 @@ class TestDatasetModelValidation:
|
||||
dataset_high_quality = Dataset(
|
||||
tenant_id=str(uuid4()),
|
||||
name="High Quality Dataset",
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
created_by=str(uuid4()),
|
||||
indexing_technique="high_quality",
|
||||
)
|
||||
dataset_economy = Dataset(
|
||||
tenant_id=str(uuid4()),
|
||||
name="Economy Dataset",
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
created_by=str(uuid4()),
|
||||
indexing_technique="economy",
|
||||
)
|
||||
@ -101,14 +108,14 @@ class TestDatasetModelValidation:
|
||||
dataset_vendor = Dataset(
|
||||
tenant_id=str(uuid4()),
|
||||
name="Vendor Dataset",
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
created_by=str(uuid4()),
|
||||
provider="vendor",
|
||||
)
|
||||
dataset_external = Dataset(
|
||||
tenant_id=str(uuid4()),
|
||||
name="External Dataset",
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
created_by=str(uuid4()),
|
||||
provider="external",
|
||||
)
|
||||
@ -126,7 +133,7 @@ class TestDatasetModelValidation:
|
||||
dataset = Dataset(
|
||||
tenant_id=str(uuid4()),
|
||||
name="Test Dataset",
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
created_by=str(uuid4()),
|
||||
index_struct=json.dumps(index_struct_data),
|
||||
)
|
||||
@ -145,7 +152,7 @@ class TestDatasetModelValidation:
|
||||
dataset = Dataset(
|
||||
tenant_id=str(uuid4()),
|
||||
name="Test Dataset",
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
created_by=str(uuid4()),
|
||||
)
|
||||
|
||||
@ -161,7 +168,7 @@ class TestDatasetModelValidation:
|
||||
dataset = Dataset(
|
||||
tenant_id=str(uuid4()),
|
||||
name="Test Dataset",
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
created_by=str(uuid4()),
|
||||
)
|
||||
|
||||
@ -178,7 +185,7 @@ class TestDatasetModelValidation:
|
||||
dataset = Dataset(
|
||||
tenant_id=str(uuid4()),
|
||||
name="Test Dataset",
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
created_by=str(uuid4()),
|
||||
)
|
||||
|
||||
@ -218,10 +225,10 @@ class TestDocumentModelRelationships:
|
||||
tenant_id=tenant_id,
|
||||
dataset_id=dataset_id,
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test_document.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=created_by,
|
||||
)
|
||||
|
||||
@ -229,10 +236,10 @@ class TestDocumentModelRelationships:
|
||||
assert document.tenant_id == tenant_id
|
||||
assert document.dataset_id == dataset_id
|
||||
assert document.position == 1
|
||||
assert document.data_source_type == "upload_file"
|
||||
assert document.data_source_type == DataSourceType.UPLOAD_FILE
|
||||
assert document.batch == "batch_001"
|
||||
assert document.name == "test_document.pdf"
|
||||
assert document.created_from == "web"
|
||||
assert document.created_from == DocumentCreatedFrom.WEB
|
||||
assert document.created_by == created_by
|
||||
# Note: Default values are set by database, not by model instantiation
|
||||
|
||||
@ -250,12 +257,12 @@ class TestDocumentModelRelationships:
|
||||
tenant_id=str(uuid4()),
|
||||
dataset_id=str(uuid4()),
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=str(uuid4()),
|
||||
indexing_status="waiting",
|
||||
indexing_status=IndexingStatus.WAITING,
|
||||
)
|
||||
|
||||
# Act
|
||||
@ -271,12 +278,12 @@ class TestDocumentModelRelationships:
|
||||
tenant_id=str(uuid4()),
|
||||
dataset_id=str(uuid4()),
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=str(uuid4()),
|
||||
indexing_status="parsing",
|
||||
indexing_status=IndexingStatus.PARSING,
|
||||
is_paused=True,
|
||||
)
|
||||
|
||||
@ -289,15 +296,20 @@ class TestDocumentModelRelationships:
|
||||
def test_document_display_status_indexing(self):
|
||||
"""Test document display_status property for indexing state."""
|
||||
# Arrange
|
||||
for indexing_status in ["parsing", "cleaning", "splitting", "indexing"]:
|
||||
for indexing_status in [
|
||||
IndexingStatus.PARSING,
|
||||
IndexingStatus.CLEANING,
|
||||
IndexingStatus.SPLITTING,
|
||||
IndexingStatus.INDEXING,
|
||||
]:
|
||||
document = Document(
|
||||
tenant_id=str(uuid4()),
|
||||
dataset_id=str(uuid4()),
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=str(uuid4()),
|
||||
indexing_status=indexing_status,
|
||||
)
|
||||
@ -315,12 +327,12 @@ class TestDocumentModelRelationships:
|
||||
tenant_id=str(uuid4()),
|
||||
dataset_id=str(uuid4()),
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=str(uuid4()),
|
||||
indexing_status="error",
|
||||
indexing_status=IndexingStatus.ERROR,
|
||||
)
|
||||
|
||||
# Act
|
||||
@ -336,12 +348,12 @@ class TestDocumentModelRelationships:
|
||||
tenant_id=str(uuid4()),
|
||||
dataset_id=str(uuid4()),
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=str(uuid4()),
|
||||
indexing_status="completed",
|
||||
indexing_status=IndexingStatus.COMPLETED,
|
||||
enabled=True,
|
||||
archived=False,
|
||||
)
|
||||
@ -359,12 +371,12 @@ class TestDocumentModelRelationships:
|
||||
tenant_id=str(uuid4()),
|
||||
dataset_id=str(uuid4()),
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=str(uuid4()),
|
||||
indexing_status="completed",
|
||||
indexing_status=IndexingStatus.COMPLETED,
|
||||
enabled=False,
|
||||
archived=False,
|
||||
)
|
||||
@ -382,12 +394,12 @@ class TestDocumentModelRelationships:
|
||||
tenant_id=str(uuid4()),
|
||||
dataset_id=str(uuid4()),
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=str(uuid4()),
|
||||
indexing_status="completed",
|
||||
indexing_status=IndexingStatus.COMPLETED,
|
||||
archived=True,
|
||||
)
|
||||
|
||||
@ -405,10 +417,10 @@ class TestDocumentModelRelationships:
|
||||
tenant_id=str(uuid4()),
|
||||
dataset_id=str(uuid4()),
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=str(uuid4()),
|
||||
data_source_info=json.dumps(data_source_info),
|
||||
)
|
||||
@ -428,10 +440,10 @@ class TestDocumentModelRelationships:
|
||||
tenant_id=str(uuid4()),
|
||||
dataset_id=str(uuid4()),
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=str(uuid4()),
|
||||
)
|
||||
|
||||
@ -448,10 +460,10 @@ class TestDocumentModelRelationships:
|
||||
tenant_id=str(uuid4()),
|
||||
dataset_id=str(uuid4()),
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=str(uuid4()),
|
||||
word_count=1000,
|
||||
)
|
||||
@ -471,10 +483,10 @@ class TestDocumentModelRelationships:
|
||||
tenant_id=str(uuid4()),
|
||||
dataset_id=str(uuid4()),
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=str(uuid4()),
|
||||
word_count=0,
|
||||
)
|
||||
@ -582,7 +594,7 @@ class TestDocumentSegmentIndexing:
|
||||
word_count=1,
|
||||
tokens=2,
|
||||
created_by=str(uuid4()),
|
||||
status="waiting",
|
||||
status=SegmentStatus.WAITING,
|
||||
)
|
||||
segment_completed = DocumentSegment(
|
||||
tenant_id=str(uuid4()),
|
||||
@ -593,12 +605,12 @@ class TestDocumentSegmentIndexing:
|
||||
word_count=1,
|
||||
tokens=2,
|
||||
created_by=str(uuid4()),
|
||||
status="completed",
|
||||
status=SegmentStatus.COMPLETED,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert segment_waiting.status == "waiting"
|
||||
assert segment_completed.status == "completed"
|
||||
assert segment_waiting.status == SegmentStatus.WAITING
|
||||
assert segment_completed.status == SegmentStatus.COMPLETED
|
||||
|
||||
def test_document_segment_enabled_disabled_tracking(self):
|
||||
"""Test document segment enabled/disabled state tracking."""
|
||||
@ -769,13 +781,13 @@ class TestDatasetProcessRule:
|
||||
# Act
|
||||
process_rule = DatasetProcessRule(
|
||||
dataset_id=dataset_id,
|
||||
mode="automatic",
|
||||
mode=ProcessRuleMode.AUTOMATIC,
|
||||
created_by=created_by,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert process_rule.dataset_id == dataset_id
|
||||
assert process_rule.mode == "automatic"
|
||||
assert process_rule.mode == ProcessRuleMode.AUTOMATIC
|
||||
assert process_rule.created_by == created_by
|
||||
|
||||
def test_dataset_process_rule_modes(self):
|
||||
@ -797,7 +809,7 @@ class TestDatasetProcessRule:
|
||||
}
|
||||
process_rule = DatasetProcessRule(
|
||||
dataset_id=str(uuid4()),
|
||||
mode="custom",
|
||||
mode=ProcessRuleMode.CUSTOM,
|
||||
created_by=str(uuid4()),
|
||||
rules=json.dumps(rules_data),
|
||||
)
|
||||
@ -817,7 +829,7 @@ class TestDatasetProcessRule:
|
||||
rules_data = {"test": "data"}
|
||||
process_rule = DatasetProcessRule(
|
||||
dataset_id=dataset_id,
|
||||
mode="automatic",
|
||||
mode=ProcessRuleMode.AUTOMATIC,
|
||||
created_by=str(uuid4()),
|
||||
rules=json.dumps(rules_data),
|
||||
)
|
||||
@ -827,7 +839,7 @@ class TestDatasetProcessRule:
|
||||
|
||||
# Assert
|
||||
assert result["dataset_id"] == dataset_id
|
||||
assert result["mode"] == "automatic"
|
||||
assert result["mode"] == ProcessRuleMode.AUTOMATIC
|
||||
assert result["rules"] == rules_data
|
||||
|
||||
def test_dataset_process_rule_automatic_rules(self):
|
||||
@ -969,7 +981,7 @@ class TestModelIntegration:
|
||||
dataset = Dataset(
|
||||
tenant_id=tenant_id,
|
||||
name="Test Dataset",
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
created_by=created_by,
|
||||
indexing_technique="high_quality",
|
||||
)
|
||||
@ -980,10 +992,10 @@ class TestModelIntegration:
|
||||
tenant_id=tenant_id,
|
||||
dataset_id=dataset_id,
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=created_by,
|
||||
word_count=100,
|
||||
)
|
||||
@ -999,7 +1011,7 @@ class TestModelIntegration:
|
||||
word_count=3,
|
||||
tokens=5,
|
||||
created_by=created_by,
|
||||
status="completed",
|
||||
status=SegmentStatus.COMPLETED,
|
||||
)
|
||||
|
||||
# Assert
|
||||
@ -1009,7 +1021,7 @@ class TestModelIntegration:
|
||||
assert segment.document_id == document_id
|
||||
assert dataset.indexing_technique == "high_quality"
|
||||
assert document.word_count == 100
|
||||
assert segment.status == "completed"
|
||||
assert segment.status == SegmentStatus.COMPLETED
|
||||
|
||||
def test_document_to_dict_serialization(self):
|
||||
"""Test document to_dict method for serialization."""
|
||||
@ -1022,13 +1034,13 @@ class TestModelIntegration:
|
||||
tenant_id=tenant_id,
|
||||
dataset_id=dataset_id,
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch="batch_001",
|
||||
name="test.pdf",
|
||||
created_from="web",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=created_by,
|
||||
word_count=100,
|
||||
indexing_status="completed",
|
||||
indexing_status=IndexingStatus.COMPLETED,
|
||||
)
|
||||
|
||||
# Mock segment_count and hit_count
|
||||
@ -1044,6 +1056,6 @@ class TestModelIntegration:
|
||||
assert result["dataset_id"] == dataset_id
|
||||
assert result["name"] == "test.pdf"
|
||||
assert result["word_count"] == 100
|
||||
assert result["indexing_status"] == "completed"
|
||||
assert result["indexing_status"] == IndexingStatus.COMPLETED
|
||||
assert result["segment_count"] == 5
|
||||
assert result["hit_count"] == 10
|
||||
|
||||
Reference in New Issue
Block a user