Merge branch 'main' into feat/rag-2

2026-04-30 07:28:05 +08:00 · 2025-08-11 11:15:58 +08:00
parent 58aca75ee0 69c3439c3a
commit fc779d00df
214 changed files with 8987 additions and 838 deletions
--- a/api/tests/integration_tests/controllers/console/app/test_description_validation.py
+++ b/api/tests/integration_tests/controllers/console/app/test_description_validation.py
@ -0,0 +1,168 @@
+"""
+Unit tests for App description validation functions.
+
+This test module validates the 400-character limit enforcement
+for App descriptions across all creation and editing endpoints.
+"""
+
+import os
+import sys
+
+import pytest
+
+# Add the API root to Python path for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
+
+
+class TestAppDescriptionValidationUnit:
+    """Unit tests for description validation function"""
+
+    def test_validate_description_length_function(self):
+        """Test the _validate_description_length function directly"""
+        from controllers.console.app.app import _validate_description_length
+
+        # Test valid descriptions
+        assert _validate_description_length("") == ""
+        assert _validate_description_length("x" * 400) == "x" * 400
+        assert _validate_description_length(None) is None
+
+        # Test invalid descriptions
+        with pytest.raises(ValueError) as exc_info:
+            _validate_description_length("x" * 401)
+        assert "Description cannot exceed 400 characters." in str(exc_info.value)
+
+        with pytest.raises(ValueError) as exc_info:
+            _validate_description_length("x" * 500)
+        assert "Description cannot exceed 400 characters." in str(exc_info.value)
+
+        with pytest.raises(ValueError) as exc_info:
+            _validate_description_length("x" * 1000)
+        assert "Description cannot exceed 400 characters." in str(exc_info.value)
+
+    def test_validation_consistency_with_dataset(self):
+        """Test that App and Dataset validation functions are consistent"""
+        from controllers.console.app.app import _validate_description_length as app_validate
+        from controllers.console.datasets.datasets import _validate_description_length as dataset_validate
+        from controllers.service_api.dataset.dataset import _validate_description_length as service_dataset_validate
+
+        # Test same valid inputs
+        valid_desc = "x" * 400
+        assert app_validate(valid_desc) == dataset_validate(valid_desc) == service_dataset_validate(valid_desc)
+        assert app_validate("") == dataset_validate("") == service_dataset_validate("")
+        assert app_validate(None) == dataset_validate(None) == service_dataset_validate(None)
+
+        # Test same invalid inputs produce same error
+        invalid_desc = "x" * 401
+
+        app_error = None
+        dataset_error = None
+        service_dataset_error = None
+
+        try:
+            app_validate(invalid_desc)
+        except ValueError as e:
+            app_error = str(e)
+
+        try:
+            dataset_validate(invalid_desc)
+        except ValueError as e:
+            dataset_error = str(e)
+
+        try:
+            service_dataset_validate(invalid_desc)
+        except ValueError as e:
+            service_dataset_error = str(e)
+
+        assert app_error == dataset_error == service_dataset_error
+        assert app_error == "Description cannot exceed 400 characters."
+
+    def test_boundary_values(self):
+        """Test boundary values for description validation"""
+        from controllers.console.app.app import _validate_description_length
+
+        # Test exact boundary
+        exactly_400 = "x" * 400
+        assert _validate_description_length(exactly_400) == exactly_400
+
+        # Test just over boundary
+        just_over_400 = "x" * 401
+        with pytest.raises(ValueError):
+            _validate_description_length(just_over_400)
+
+        # Test just under boundary
+        just_under_400 = "x" * 399
+        assert _validate_description_length(just_under_400) == just_under_400
+
+    def test_edge_cases(self):
+        """Test edge cases for description validation"""
+        from controllers.console.app.app import _validate_description_length
+
+        # Test None input
+        assert _validate_description_length(None) is None
+
+        # Test empty string
+        assert _validate_description_length("") == ""
+
+        # Test single character
+        assert _validate_description_length("a") == "a"
+
+        # Test unicode characters
+        unicode_desc = "测试" * 200  # 400 characters in Chinese
+        assert _validate_description_length(unicode_desc) == unicode_desc
+
+        # Test unicode over limit
+        unicode_over = "测试" * 201  # 402 characters
+        with pytest.raises(ValueError):
+            _validate_description_length(unicode_over)
+
+    def test_whitespace_handling(self):
+        """Test how validation handles whitespace"""
+        from controllers.console.app.app import _validate_description_length
+
+        # Test description with spaces
+        spaces_400 = " " * 400
+        assert _validate_description_length(spaces_400) == spaces_400
+
+        # Test description with spaces over limit
+        spaces_401 = " " * 401
+        with pytest.raises(ValueError):
+            _validate_description_length(spaces_401)
+
+        # Test mixed content
+        mixed_400 = "a" * 200 + " " * 200
+        assert _validate_description_length(mixed_400) == mixed_400
+
+        # Test mixed over limit
+        mixed_401 = "a" * 200 + " " * 201
+        with pytest.raises(ValueError):
+            _validate_description_length(mixed_401)
+
+
+if __name__ == "__main__":
+    # Run tests directly
+    import traceback
+
+    test_instance = TestAppDescriptionValidationUnit()
+    test_methods = [method for method in dir(test_instance) if method.startswith("test_")]
+
+    passed = 0
+    failed = 0
+
+    for test_method in test_methods:
+        try:
+            print(f"Running {test_method}...")
+            getattr(test_instance, test_method)()
+            print(f"✅ {test_method} PASSED")
+            passed += 1
+        except Exception as e:
+            print(f"❌ {test_method} FAILED: {str(e)}")
+            traceback.print_exc()
+            failed += 1
+
+    print(f"\n📊 Test Results: {passed} passed, {failed} failed")
+
+    if failed == 0:
+        print("🎉 All tests passed!")
+    else:
+        print("💥 Some tests failed!")
+        sys.exit(1)
--- a/api/tests/integration_tests/vdb/clickzetta/test_clickzetta.py
+++ b/api/tests/integration_tests/vdb/clickzetta/test_clickzetta.py
@ -39,10 +39,7 @@ class TestClickzettaVector(AbstractVectorTest):
        )

        with setup_mock_redis():
-            vector = ClickzettaVector(
-                collection_name="test_collection_" + str(os.getpid()),
-                config=config
-            )
+            vector = ClickzettaVector(collection_name="test_collection_" + str(os.getpid()), config=config)

            yield vector

@ -114,7 +111,7 @@ class TestClickzettaVector(AbstractVectorTest):
                    "category": "technical" if i % 2 == 0 else "general",
                    "document_id": f"doc_{i // 3}",  # Group documents
                    "importance": i,
-                }
+                },
            )
            documents.append(doc)
            # Create varied embeddings
@ -124,22 +121,14 @@ class TestClickzettaVector(AbstractVectorTest):

        # Test vector search with document filter
        query_vector = [0.5, 1.0, 1.5, 2.0]
-        results = vector_store.search_by_vector(
-            query_vector,
-            top_k=5,
-            document_ids_filter=["doc_0", "doc_1"]
-        )
+        results = vector_store.search_by_vector(query_vector, top_k=5, document_ids_filter=["doc_0", "doc_1"])
        assert len(results) > 0
        # All results should belong to doc_0 or doc_1 groups
        for result in results:
            assert result.metadata["document_id"] in ["doc_0", "doc_1"]

        # Test score threshold
-        results = vector_store.search_by_vector(
-            query_vector,
-            top_k=10,
-            score_threshold=0.5
-        )
+        results = vector_store.search_by_vector(query_vector, top_k=10, score_threshold=0.5)
        # Check that all results have a score above threshold
        for result in results:
            assert result.metadata.get("score", 0) >= 0.5
@ -154,7 +143,7 @@ class TestClickzettaVector(AbstractVectorTest):
        for i in range(batch_size):
            doc = Document(
                page_content=f"Batch document {i}: This is a test document for batch processing.",
-                metadata={"doc_id": f"batch_doc_{i}", "batch": "test_batch"}
+                metadata={"doc_id": f"batch_doc_{i}", "batch": "test_batch"},
            )
            documents.append(doc)
            embeddings.append([0.1 * (i % 10), 0.2 * (i % 10), 0.3 * (i % 10), 0.4 * (i % 10)])
@ -179,7 +168,7 @@ class TestClickzettaVector(AbstractVectorTest):
        # Test special characters in content
        special_doc = Document(
            page_content="Special chars: 'quotes', \"double\", \\backslash, \n newline",
-            metadata={"doc_id": "special_doc", "test": "edge_case"}
+            metadata={"doc_id": "special_doc", "test": "edge_case"},
        )
        embeddings = [[0.1, 0.2, 0.3, 0.4]]

@ -199,20 +188,18 @@ class TestClickzettaVector(AbstractVectorTest):
        # Prepare documents with various language content
        documents = [
            Document(
-                page_content="云器科技提供强大的Lakehouse解决方案",
-                metadata={"doc_id": "cn_doc_1", "lang": "chinese"}
+                page_content="云器科技提供强大的Lakehouse解决方案", metadata={"doc_id": "cn_doc_1", "lang": "chinese"}
            ),
            Document(
                page_content="Clickzetta provides powerful Lakehouse solutions",
-                metadata={"doc_id": "en_doc_1", "lang": "english"}
+                metadata={"doc_id": "en_doc_1", "lang": "english"},
            ),
            Document(
-                page_content="Lakehouse是现代数据架构的重要组成部分",
-                metadata={"doc_id": "cn_doc_2", "lang": "chinese"}
+                page_content="Lakehouse是现代数据架构的重要组成部分", metadata={"doc_id": "cn_doc_2", "lang": "chinese"}
            ),
            Document(
                page_content="Modern data architecture includes Lakehouse technology",
-                metadata={"doc_id": "en_doc_2", "lang": "english"}
+                metadata={"doc_id": "en_doc_2", "lang": "english"},
            ),
        ]

--- a/api/tests/integration_tests/vdb/clickzetta/test_docker_integration.py
+++ b/api/tests/integration_tests/vdb/clickzetta/test_docker_integration.py
@ -2,6 +2,7 @@
 """
 Test Clickzetta integration in Docker environment
 """
+
 import os
 import time

@ -20,7 +21,7 @@ def test_clickzetta_connection():
            service=os.getenv("CLICKZETTA_SERVICE", "api.clickzetta.com"),
            workspace=os.getenv("CLICKZETTA_WORKSPACE", "test_workspace"),
            vcluster=os.getenv("CLICKZETTA_VCLUSTER", "default"),
-            database=os.getenv("CLICKZETTA_SCHEMA", "dify")
+            database=os.getenv("CLICKZETTA_SCHEMA", "dify"),
        )

        with conn.cursor() as cursor:
@ -36,7 +37,7 @@ def test_clickzetta_connection():

            # Check if test collection exists
            test_collection = "collection_test_dataset"
-            if test_collection in [t[1] for t in tables if t[0] == 'dify']:
+            if test_collection in [t[1] for t in tables if t[0] == "dify"]:
                cursor.execute(f"DESCRIBE dify.{test_collection}")
                columns = cursor.fetchall()
                print(f"✓ Table structure for {test_collection}:")
@ -55,6 +56,7 @@ def test_clickzetta_connection():
        print(f"✗ Connection test failed: {e}")
        return False

+
 def test_dify_api():
    """Test Dify API with Clickzetta backend"""
    print("\n=== Testing Dify API ===")
@ -83,6 +85,7 @@ def test_dify_api():
        print(f"✗ API test failed: {e}")
        return False

+
 def verify_table_structure():
    """Verify the table structure meets Dify requirements"""
    print("\n=== Verifying Table Structure ===")
@ -91,15 +94,10 @@ def verify_table_structure():
        "id": "VARCHAR",
        "page_content": "VARCHAR",
        "metadata": "VARCHAR",  # JSON stored as VARCHAR in Clickzetta
-        "vector": "ARRAY<FLOAT>"
+        "vector": "ARRAY<FLOAT>",
    }

-    expected_metadata_fields = [
-        "doc_id",
-        "doc_hash",
-        "document_id",
-        "dataset_id"
-    ]
+    expected_metadata_fields = ["doc_id", "doc_hash", "document_id", "dataset_id"]

    print("✓ Expected table structure:")
    for col, dtype in expected_columns.items():
@ -117,6 +115,7 @@ def verify_table_structure():

    return True

+
 def main():
    """Run all tests"""
    print("Starting Clickzetta integration tests for Dify Docker\n")
@ -137,9 +136,9 @@ def main():
            results.append((test_name, False))

    # Summary
-    print("\n" + "="*50)
+    print("\n" + "=" * 50)
    print("Test Summary:")
-    print("="*50)
+    print("=" * 50)

    passed = sum(1 for _, success in results if success)
    total = len(results)
@ -161,5 +160,6 @@ def main():
        print("\n⚠️  Some tests failed. Please check the errors above.")
        return 1

+
 if __name__ == "__main__":
    exit(main())