Merge branch 'main' into feat/rag-2

# Conflicts: # web/app/components/workflow/hooks/use-workflow.ts
2026-05-03 17:08:03 +08:00 · 2025-07-31 10:30:28 +08:00
parent 1b6a925b34 1b2046da3f
commit a7a4c8228e
151 changed files with 2950 additions and 920 deletions
--- a/api/tests/integration_tests/vdb/elasticsearch/test_elasticsearch.py
+++ b/api/tests/integration_tests/vdb/elasticsearch/test_elasticsearch.py
@ -11,7 +11,9 @@ class ElasticSearchVectorTest(AbstractVectorTest):
        self.attributes = ["doc_id", "dataset_id", "document_id", "doc_hash"]
        self.vector = ElasticSearchVector(
            index_name=self.collection_name.lower(),
-            config=ElasticSearchConfig(host="http://localhost", port="9200", username="elastic", password="elastic"),
+            config=ElasticSearchConfig(
+                use_cloud=False, host="http://localhost", port="9200", username="elastic", password="elastic"
+            ),
            attributes=self.attributes,
        )

--- a/api/tests/unit_tests/configs/test_dify_config.py
+++ b/api/tests/unit_tests/configs/test_dify_config.py
@ -1,5 +1,6 @@
 import os

+import pytest
 from flask import Flask
 from packaging.version import Version
 from yarl import URL
@ -137,3 +138,61 @@ def test_db_extras_options_merging(monkeypatch):
    options = engine_options["connect_args"]["options"]
    assert "search_path=myschema" in options
    assert "timezone=UTC" in options
+
+
+@pytest.mark.parametrize(
+    ("broker_url", "expected_host", "expected_port", "expected_username", "expected_password", "expected_db"),
+    [
+        ("redis://localhost:6379/1", "localhost", 6379, None, None, "1"),
+        ("redis://:password@localhost:6379/1", "localhost", 6379, None, "password", "1"),
+        ("redis://:mypass%23123@localhost:6379/1", "localhost", 6379, None, "mypass#123", "1"),
+        ("redis://user:pass%40word@redis-host:6380/2", "redis-host", 6380, "user", "pass@word", "2"),
+        ("redis://admin:complex%23pass%40word@127.0.0.1:6379/0", "127.0.0.1", 6379, "admin", "complex#pass@word", "0"),
+        (
+            "redis://user%40domain:secret%23123@redis.example.com:6380/3",
+            "redis.example.com",
+            6380,
+            "user@domain",
+            "secret#123",
+            "3",
+        ),
+        # Password containing %23 substring (double encoding scenario)
+        ("redis://:mypass%2523@localhost:6379/1", "localhost", 6379, None, "mypass%23", "1"),
+        # Username and password both containing encoded characters
+        ("redis://user%2525%40:pass%2523@localhost:6379/1", "localhost", 6379, "user%25@", "pass%23", "1"),
+    ],
+)
+def test_celery_broker_url_with_special_chars_password(
+    monkeypatch, broker_url, expected_host, expected_port, expected_username, expected_password, expected_db
+):
+    """Test that CELERY_BROKER_URL with various formats are handled correctly."""
+    from kombu.utils.url import parse_url
+
+    # clear system environment variables
+    os.environ.clear()
+
+    # Set up basic required environment variables (following existing pattern)
+    monkeypatch.setenv("CONSOLE_API_URL", "https://example.com")
+    monkeypatch.setenv("CONSOLE_WEB_URL", "https://example.com")
+    monkeypatch.setenv("DB_USERNAME", "postgres")
+    monkeypatch.setenv("DB_PASSWORD", "postgres")
+    monkeypatch.setenv("DB_HOST", "localhost")
+    monkeypatch.setenv("DB_PORT", "5432")
+    monkeypatch.setenv("DB_DATABASE", "dify")
+
+    # Set the CELERY_BROKER_URL to test
+    monkeypatch.setenv("CELERY_BROKER_URL", broker_url)
+
+    # Create config and verify the URL is stored correctly
+    config = DifyConfig()
+    assert broker_url == config.CELERY_BROKER_URL
+
+    # Test actual parsing behavior using kombu's parse_url (same as production)
+    redis_config = parse_url(config.CELERY_BROKER_URL)
+
+    # Verify the parsing results match expectations (using kombu's field names)
+    assert redis_config["hostname"] == expected_host
+    assert redis_config["port"] == expected_port
+    assert redis_config["userid"] == expected_username  # kombu uses 'userid' not 'username'
+    assert redis_config["password"] == expected_password
+    assert redis_config["virtual_host"] == expected_db  # kombu uses 'virtual_host' not 'db'
--- a/api/tests/unit_tests/core/ops/test_config_entity.py
+++ b/api/tests/unit_tests/core/ops/test_config_entity.py
@ -102,9 +102,14 @@ class TestPhoenixConfig:
        assert config.project == "default"

    def test_endpoint_validation_with_path(self):
-        """Test endpoint validation normalizes URL by removing path"""
-        config = PhoenixConfig(endpoint="https://custom.phoenix.com/api/v1")
-        assert config.endpoint == "https://custom.phoenix.com"
+        """Test endpoint validation with path"""
+        config = PhoenixConfig(endpoint="https://app.phoenix.arize.com/s/dify-integration")
+        assert config.endpoint == "https://app.phoenix.arize.com/s/dify-integration"
+
+    def test_endpoint_validation_without_path(self):
+        """Test endpoint validation without path"""
+        config = PhoenixConfig(endpoint="https://app.phoenix.arize.com")
+        assert config.endpoint == "https://app.phoenix.arize.com"


 class TestLangfuseConfig:
@ -118,7 +123,7 @@ class TestLangfuseConfig:
        assert config.host == "https://custom.langfuse.com"

    def test_valid_config_with_path(self):
-        host = host = "https://custom.langfuse.com/api/v1"
+        host = "https://custom.langfuse.com/api/v1"
        config = LangfuseConfig(public_key="public_key", secret_key="secret_key", host=host)
        assert config.public_key == "public_key"
        assert config.secret_key == "secret_key"
@ -368,13 +373,15 @@ class TestConfigIntegration:
        """Test that URL normalization works consistently across configs"""
        # Test that paths are removed from endpoints
        arize_config = ArizeConfig(endpoint="https://arize.com/api/v1/test")
-        phoenix_config = PhoenixConfig(endpoint="https://phoenix.com/api/v2/")
+        phoenix_with_path_config = PhoenixConfig(endpoint="https://app.phoenix.arize.com/s/dify-integration")
+        phoenix_without_path_config = PhoenixConfig(endpoint="https://app.phoenix.arize.com")
        aliyun_config = AliyunConfig(
            license_key="test_license", endpoint="https://tracing-analysis-dc-hz.aliyuncs.com/api/v1/traces"
        )

        assert arize_config.endpoint == "https://arize.com"
-        assert phoenix_config.endpoint == "https://phoenix.com"
+        assert phoenix_with_path_config.endpoint == "https://app.phoenix.arize.com/s/dify-integration"
+        assert phoenix_without_path_config.endpoint == "https://app.phoenix.arize.com"
        assert aliyun_config.endpoint == "https://tracing-analysis-dc-hz.aliyuncs.com"

    def test_project_default_values(self):
--- a/api/tests/unit_tests/services/test_metadata_bug_complete.py
+++ b/api/tests/unit_tests/services/test_metadata_bug_complete.py
@ -0,0 +1,189 @@
+from unittest.mock import Mock, patch
+
+import pytest
+from flask_restful import reqparse
+from werkzeug.exceptions import BadRequest
+
+from services.entities.knowledge_entities.knowledge_entities import MetadataArgs
+from services.metadata_service import MetadataService
+
+
+class TestMetadataBugCompleteValidation:
+    """Complete test suite to verify the metadata nullable bug and its fix."""
+
+    def test_1_pydantic_layer_validation(self):
+        """Test Layer 1: Pydantic model validation correctly rejects None values."""
+        # Pydantic should reject None values for required fields
+        with pytest.raises((ValueError, TypeError)):
+            MetadataArgs(type=None, name=None)
+
+        with pytest.raises((ValueError, TypeError)):
+            MetadataArgs(type="string", name=None)
+
+        with pytest.raises((ValueError, TypeError)):
+            MetadataArgs(type=None, name="test")
+
+        # Valid values should work
+        valid_args = MetadataArgs(type="string", name="test_name")
+        assert valid_args.type == "string"
+        assert valid_args.name == "test_name"
+
+    def test_2_business_logic_layer_crashes_on_none(self):
+        """Test Layer 2: Business logic crashes when None values slip through."""
+        # Create mock that bypasses Pydantic validation
+        mock_metadata_args = Mock()
+        mock_metadata_args.name = None
+        mock_metadata_args.type = "string"
+
+        with patch("services.metadata_service.current_user") as mock_user:
+            mock_user.current_tenant_id = "tenant-123"
+            mock_user.id = "user-456"
+
+            # Should crash with TypeError
+            with pytest.raises(TypeError, match="object of type 'NoneType' has no len"):
+                MetadataService.create_metadata("dataset-123", mock_metadata_args)
+
+        # Test update method as well
+        with patch("services.metadata_service.current_user") as mock_user:
+            mock_user.current_tenant_id = "tenant-123"
+            mock_user.id = "user-456"
+
+            with pytest.raises(TypeError, match="object of type 'NoneType' has no len"):
+                MetadataService.update_metadata_name("dataset-123", "metadata-456", None)
+
+    def test_3_database_constraints_verification(self):
+        """Test Layer 3: Verify database model has nullable=False constraints."""
+        from sqlalchemy import inspect
+
+        from models.dataset import DatasetMetadata
+
+        # Get table info
+        mapper = inspect(DatasetMetadata)
+
+        # Check that type and name columns are not nullable
+        type_column = mapper.columns["type"]
+        name_column = mapper.columns["name"]
+
+        assert type_column.nullable is False, "type column should be nullable=False"
+        assert name_column.nullable is False, "name column should be nullable=False"
+
+    def test_4_fixed_api_layer_rejects_null(self, app):
+        """Test Layer 4: Fixed API configuration properly rejects null values."""
+        # Test Console API create endpoint (fixed)
+        parser = reqparse.RequestParser()
+        parser.add_argument("type", type=str, required=True, nullable=False, location="json")
+        parser.add_argument("name", type=str, required=True, nullable=False, location="json")
+
+        with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"):
+            with pytest.raises(BadRequest):
+                parser.parse_args()
+
+        # Test with just name being null
+        with app.test_request_context(json={"type": "string", "name": None}, content_type="application/json"):
+            with pytest.raises(BadRequest):
+                parser.parse_args()
+
+        # Test with just type being null
+        with app.test_request_context(json={"type": None, "name": "test"}, content_type="application/json"):
+            with pytest.raises(BadRequest):
+                parser.parse_args()
+
+    def test_5_fixed_api_accepts_valid_values(self, app):
+        """Test that fixed API still accepts valid non-null values."""
+        parser = reqparse.RequestParser()
+        parser.add_argument("type", type=str, required=True, nullable=False, location="json")
+        parser.add_argument("name", type=str, required=True, nullable=False, location="json")
+
+        with app.test_request_context(json={"type": "string", "name": "valid_name"}, content_type="application/json"):
+            args = parser.parse_args()
+            assert args["type"] == "string"
+            assert args["name"] == "valid_name"
+
+    def test_6_simulated_buggy_behavior(self, app):
+        """Test simulating the original buggy behavior with nullable=True."""
+        # Simulate the old buggy configuration
+        buggy_parser = reqparse.RequestParser()
+        buggy_parser.add_argument("type", type=str, required=True, nullable=True, location="json")
+        buggy_parser.add_argument("name", type=str, required=True, nullable=True, location="json")
+
+        with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"):
+            # This would pass in the buggy version
+            args = buggy_parser.parse_args()
+            assert args["type"] is None
+            assert args["name"] is None
+
+            # But would crash when trying to create MetadataArgs
+            with pytest.raises((ValueError, TypeError)):
+                MetadataArgs(**args)
+
+    def test_7_end_to_end_validation_layers(self):
+        """Test all validation layers work together correctly."""
+        # Layer 1: API should reject null at parameter level (with fix)
+        # Layer 2: Pydantic should reject null at model level
+        # Layer 3: Business logic expects non-null
+        # Layer 4: Database enforces non-null
+
+        # Test that valid data flows through all layers
+        valid_data = {"type": "string", "name": "test_metadata"}
+
+        # Should create valid Pydantic object
+        metadata_args = MetadataArgs(**valid_data)
+        assert metadata_args.type == "string"
+        assert metadata_args.name == "test_metadata"
+
+        # Should not crash in business logic length check
+        assert len(metadata_args.name) <= 255  # This should not crash
+        assert len(metadata_args.type) > 0  # This should not crash
+
+    def test_8_verify_specific_fix_locations(self):
+        """Verify that the specific locations mentioned in bug report are fixed."""
+        # Read the actual files to verify fixes
+        import os
+
+        # Console API create
+        console_create_file = "api/controllers/console/datasets/metadata.py"
+        if os.path.exists(console_create_file):
+            with open(console_create_file) as f:
+                content = f.read()
+                # Should contain nullable=False, not nullable=True
+                assert "nullable=True" not in content.split("class DatasetMetadataCreateApi")[1].split("class")[0]
+
+        # Service API create
+        service_create_file = "api/controllers/service_api/dataset/metadata.py"
+        if os.path.exists(service_create_file):
+            with open(service_create_file) as f:
+                content = f.read()
+                # Should contain nullable=False, not nullable=True
+                create_api_section = content.split("class DatasetMetadataCreateServiceApi")[1].split("class")[0]
+                assert "nullable=True" not in create_api_section
+
+
+class TestMetadataValidationSummary:
+    """Summary tests that demonstrate the complete validation architecture."""
+
+    def test_validation_layer_architecture(self):
+        """Document and test the 4-layer validation architecture."""
+        # Layer 1: API Parameter Validation (Flask-RESTful reqparse)
+        # - Role: First line of defense, validates HTTP request parameters
+        # - Fixed: nullable=False ensures null values are rejected at API boundary
+
+        # Layer 2: Pydantic Model Validation
+        # - Role: Validates data structure and types before business logic
+        # - Working: Required fields without Optional[] reject None values
+
+        # Layer 3: Business Logic Validation
+        # - Role: Domain-specific validation (length checks, uniqueness, etc.)
+        # - Vulnerable: Direct len() calls crash on None values
+
+        # Layer 4: Database Constraints
+        # - Role: Final data integrity enforcement
+        # - Working: nullable=False prevents None values in database
+
+        # The bug was: Layer 1 allowed None, but Layers 2-4 expected non-None
+        # The fix: Make Layer 1 consistent with Layers 2-4
+
+        assert True  # This test documents the architecture
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
--- a/api/tests/unit_tests/services/test_metadata_nullable_bug.py
+++ b/api/tests/unit_tests/services/test_metadata_nullable_bug.py
@ -0,0 +1,108 @@
+from unittest.mock import Mock, patch
+
+import pytest
+from flask_restful import reqparse
+
+from services.entities.knowledge_entities.knowledge_entities import MetadataArgs
+from services.metadata_service import MetadataService
+
+
+class TestMetadataNullableBug:
+    """Test case to reproduce the metadata nullable validation bug."""
+
+    def test_metadata_args_with_none_values_should_fail(self):
+        """Test that MetadataArgs validation should reject None values."""
+        # This test demonstrates the expected behavior - should fail validation
+        with pytest.raises((ValueError, TypeError)):
+            # This should fail because Pydantic expects non-None values
+            MetadataArgs(type=None, name=None)
+
+    def test_metadata_service_create_with_none_name_crashes(self):
+        """Test that MetadataService.create_metadata crashes when name is None."""
+        # Mock the MetadataArgs to bypass Pydantic validation
+        mock_metadata_args = Mock()
+        mock_metadata_args.name = None  # This will cause len() to crash
+        mock_metadata_args.type = "string"
+
+        with patch("services.metadata_service.current_user") as mock_user:
+            mock_user.current_tenant_id = "tenant-123"
+            mock_user.id = "user-456"
+
+            # This should crash with TypeError when calling len(None)
+            with pytest.raises(TypeError, match="object of type 'NoneType' has no len"):
+                MetadataService.create_metadata("dataset-123", mock_metadata_args)
+
+    def test_metadata_service_update_with_none_name_crashes(self):
+        """Test that MetadataService.update_metadata_name crashes when name is None."""
+        with patch("services.metadata_service.current_user") as mock_user:
+            mock_user.current_tenant_id = "tenant-123"
+            mock_user.id = "user-456"
+
+            # This should crash with TypeError when calling len(None)
+            with pytest.raises(TypeError, match="object of type 'NoneType' has no len"):
+                MetadataService.update_metadata_name("dataset-123", "metadata-456", None)
+
+    def test_api_parser_accepts_null_values(self, app):
+        """Test that API parser configuration incorrectly accepts null values."""
+        # Simulate the current API parser configuration
+        parser = reqparse.RequestParser()
+        parser.add_argument("type", type=str, required=True, nullable=True, location="json")
+        parser.add_argument("name", type=str, required=True, nullable=True, location="json")
+
+        # Simulate request data with null values
+        with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"):
+            # This should parse successfully due to nullable=True
+            args = parser.parse_args()
+
+            # Verify that null values are accepted
+            assert args["type"] is None
+            assert args["name"] is None
+
+            # This demonstrates the bug: API accepts None but business logic will crash
+
+    def test_integration_bug_scenario(self, app):
+        """Test the complete bug scenario from API to service layer."""
+        # Step 1: API parser accepts null values (current buggy behavior)
+        parser = reqparse.RequestParser()
+        parser.add_argument("type", type=str, required=True, nullable=True, location="json")
+        parser.add_argument("name", type=str, required=True, nullable=True, location="json")
+
+        with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"):
+            args = parser.parse_args()
+
+            # Step 2: Try to create MetadataArgs with None values
+            # This should fail at Pydantic validation level
+            with pytest.raises((ValueError, TypeError)):
+                metadata_args = MetadataArgs(**args)
+
+        # Step 3: If we bypass Pydantic (simulating the bug scenario)
+        # Move this outside the request context to avoid Flask-Login issues
+        mock_metadata_args = Mock()
+        mock_metadata_args.name = None  # From args["name"]
+        mock_metadata_args.type = None  # From args["type"]
+
+        with patch("services.metadata_service.current_user") as mock_user:
+            mock_user.current_tenant_id = "tenant-123"
+            mock_user.id = "user-456"
+
+            # Step 4: Service layer crashes on len(None)
+            with pytest.raises(TypeError, match="object of type 'NoneType' has no len"):
+                MetadataService.create_metadata("dataset-123", mock_metadata_args)
+
+    def test_correct_nullable_false_configuration_works(self, app):
+        """Test that the correct nullable=False configuration works as expected."""
+        # This tests the FIXED configuration
+        parser = reqparse.RequestParser()
+        parser.add_argument("type", type=str, required=True, nullable=False, location="json")
+        parser.add_argument("name", type=str, required=True, nullable=False, location="json")
+
+        with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"):
+            # This should fail with BadRequest due to nullable=False
+            from werkzeug.exceptions import BadRequest
+
+            with pytest.raises(BadRequest):
+                parser.parse_args()
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])