Merge branch 'p284' into deploy/dev

2026-05-03 08:58:09 +08:00 · 2025-10-09 09:39:06 +08:00
parent 85dfc013ea 33d4c95470
commit 9b0f172f91
377 changed files with 7018 additions and 5036 deletions
--- a/api/services/app_service.py
+++ b/api/services/app_service.py
@ -2,6 +2,7 @@ import json
 import logging
 from typing import TypedDict, cast

+import sqlalchemy as sa
 from flask_sqlalchemy.pagination import Pagination

 from configs import dify_config
@ -65,7 +66,7 @@ class AppService:
                return None

        app_models = db.paginate(
-            db.select(App).where(*filters).order_by(App.created_at.desc()),
+            sa.select(App).where(*filters).order_by(App.created_at.desc()),
            page=args["page"],
            per_page=args["limit"],
            error_out=False,
--- a/api/services/auth/firecrawl/firecrawl.py
+++ b/api/services/auth/firecrawl/firecrawl.py
@ -1,6 +1,6 @@
 import json

-import requests
+import httpx

 from services.auth.api_key_auth_base import ApiKeyAuthBase

@ -36,7 +36,7 @@ class FirecrawlAuth(ApiKeyAuthBase):
        return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}

    def _post_request(self, url, data, headers):
-        return requests.post(url, headers=headers, json=data)
+        return httpx.post(url, headers=headers, json=data)

    def _handle_error(self, response):
        if response.status_code in {402, 409, 500}:
--- a/api/services/auth/jina.py
+++ b/api/services/auth/jina.py
@ -1,6 +1,6 @@
 import json

-import requests
+import httpx

 from services.auth.api_key_auth_base import ApiKeyAuthBase

@ -31,7 +31,7 @@ class JinaAuth(ApiKeyAuthBase):
        return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}

    def _post_request(self, url, data, headers):
-        return requests.post(url, headers=headers, json=data)
+        return httpx.post(url, headers=headers, json=data)

    def _handle_error(self, response):
        if response.status_code in {402, 409, 500}:
--- a/api/services/auth/jina/jina.py
+++ b/api/services/auth/jina/jina.py
@ -1,6 +1,6 @@
 import json

-import requests
+import httpx

 from services.auth.api_key_auth_base import ApiKeyAuthBase

@ -31,7 +31,7 @@ class JinaAuth(ApiKeyAuthBase):
        return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}

    def _post_request(self, url, data, headers):
-        return requests.post(url, headers=headers, json=data)
+        return httpx.post(url, headers=headers, json=data)

    def _handle_error(self, response):
        if response.status_code in {402, 409, 500}:
--- a/api/services/auth/watercrawl/watercrawl.py
+++ b/api/services/auth/watercrawl/watercrawl.py
@ -1,7 +1,7 @@
 import json
 from urllib.parse import urljoin

-import requests
+import httpx

 from services.auth.api_key_auth_base import ApiKeyAuthBase

@ -31,7 +31,7 @@ class WatercrawlAuth(ApiKeyAuthBase):
        return {"Content-Type": "application/json", "X-API-KEY": self.api_key}

    def _get_request(self, url, headers):
-        return requests.get(url, headers=headers)
+        return httpx.get(url, headers=headers)

    def _handle_error(self, response):
        if response.status_code in {402, 409, 500}:
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@ -93,7 +93,7 @@ logger = logging.getLogger(__name__)
 class DatasetService:
    @staticmethod
    def get_datasets(page, per_page, tenant_id=None, user=None, search=None, tag_ids=None, include_all=False):
-        query = select(Dataset).where(Dataset.tenant_id == tenant_id).order_by(Dataset.created_at.desc())
+        query = select(Dataset).where(Dataset.tenant_id == tenant_id).order_by(Dataset.created_at.desc(), Dataset.id)

        if user:
            # get permitted dataset ids
@ -115,12 +115,12 @@ class DatasetService:
                    # Check if permitted_dataset_ids is not empty to avoid WHERE false condition
                    if permitted_dataset_ids and len(permitted_dataset_ids) > 0:
                        query = query.where(
-                            db.or_(
+                            sa.or_(
                                Dataset.permission == DatasetPermissionEnum.ALL_TEAM,
-                                db.and_(
+                                sa.and_(
                                    Dataset.permission == DatasetPermissionEnum.ONLY_ME, Dataset.created_by == user.id
                                ),
-                                db.and_(
+                                sa.and_(
                                    Dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM,
                                    Dataset.id.in_(permitted_dataset_ids),
                                ),
@ -128,9 +128,9 @@ class DatasetService:
                        )
                    else:
                        query = query.where(
-                            db.or_(
+                            sa.or_(
                                Dataset.permission == DatasetPermissionEnum.ALL_TEAM,
-                                db.and_(
+                                sa.and_(
                                    Dataset.permission == DatasetPermissionEnum.ONLY_ME, Dataset.created_by == user.id
                                ),
                            )
@ -1879,7 +1879,7 @@ class DocumentService:
    #                 for notion_info in notion_info_list:
    #                     workspace_id = notion_info.workspace_id
    #                     data_source_binding = DataSourceOauthBinding.query.filter(
-    #                         db.and_(
+    #                         sa.and_(
    #                             DataSourceOauthBinding.tenant_id == current_user.current_tenant_id,
    #                             DataSourceOauthBinding.provider == "notion",
    #                             DataSourceOauthBinding.disabled == False,
--- a/api/services/entities/knowledge_entities/rag_pipeline_entities.py
+++ b/api/services/entities/knowledge_entities/rag_pipeline_entities.py
@ -83,7 +83,7 @@ class RetrievalSetting(BaseModel):
    Retrieval Setting.
    """

-    search_method: Literal["semantic_search", "fulltext_search", "keyword_search", "hybrid_search"]
+    search_method: Literal["semantic_search", "full_text_search", "keyword_search", "hybrid_search"]
    top_k: int
    score_threshold: float | None = 0.5
    score_threshold_enabled: bool = False
--- a/api/services/operation_service.py
+++ b/api/services/operation_service.py
@ -1,6 +1,6 @@
 import os

-import requests
+import httpx


 class OperationService:
@ -12,7 +12,7 @@ class OperationService:
        headers = {"Content-Type": "application/json", "Billing-Api-Secret-Key": cls.secret_key}

        url = f"{cls.base_url}{endpoint}"
-        response = requests.request(method, url, json=json, params=params, headers=headers)
+        response = httpx.request(method, url, json=json, params=params, headers=headers)

        return response.json()

--- a/api/services/plugin/plugin_migration.py
+++ b/api/services/plugin/plugin_migration.py
@ -471,7 +471,7 @@ class PluginMigration:
        total_failed_tenant = 0
        while True:
            # paginate
-            tenants = db.paginate(db.select(Tenant).order_by(Tenant.created_at.desc()), page=page, per_page=100)
+            tenants = db.paginate(sa.select(Tenant).order_by(Tenant.created_at.desc()), page=page, per_page=100)
            if tenants.items is None or len(tenants.items) == 0:
                break

--- a/api/services/tag_service.py
+++ b/api/services/tag_service.py
@ -1,5 +1,6 @@
 import uuid

+import sqlalchemy as sa
 from flask_login import current_user
 from sqlalchemy import func, select
 from werkzeug.exceptions import NotFound
@ -18,7 +19,7 @@ class TagService:
            .where(Tag.type == tag_type, Tag.tenant_id == current_tenant_id)
        )
        if keyword:
-            query = query.where(db.and_(Tag.name.ilike(f"%{keyword}%")))
+            query = query.where(sa.and_(Tag.name.ilike(f"%{keyword}%")))
        query = query.group_by(Tag.id, Tag.type, Tag.name, Tag.created_at)
        results: list = query.order_by(Tag.created_at.desc()).all()
        return results
--- a/api/services/variable_truncator.py
+++ b/api/services/variable_truncator.py
@ -262,6 +262,14 @@ class VariableTruncator:
        target_length = self._array_element_limit

        for i, item in enumerate(value):
+            # Dirty fix:
+            # The output of `Start` node may contain list of `File` elements,
+            # causing `AssertionError` while invoking `_truncate_json_primitives`.
+            #
+            # This check ensures that `list[File]` are handled separately
+            if isinstance(item, File):
+                truncated_value.append(item)
+                continue
            if i >= target_length:
                return _PartResult(truncated_value, used_size, True)
            if i > 0:
--- a/api/services/website_service.py
+++ b/api/services/website_service.py
@ -3,7 +3,7 @@ import json
 from dataclasses import dataclass
 from typing import Any

-import requests
+import httpx
 from flask_login import current_user

 from core.helper import encrypter
@ -216,7 +216,7 @@ class WebsiteService:
    @classmethod
    def _crawl_with_jinareader(cls, request: CrawlRequest, api_key: str) -> dict[str, Any]:
        if not request.options.crawl_sub_pages:
-            response = requests.get(
+            response = httpx.get(
                f"https://r.jina.ai/{request.url}",
                headers={"Accept": "application/json", "Authorization": f"Bearer {api_key}"},
            )
@ -224,7 +224,7 @@ class WebsiteService:
                raise ValueError("Failed to crawl:")
            return {"status": "active", "data": response.json().get("data")}
        else:
-            response = requests.post(
+            response = httpx.post(
                "https://adaptivecrawl-kir3wx7b3a-uc.a.run.app",
                json={
                    "url": request.url,
@ -287,7 +287,7 @@ class WebsiteService:

    @classmethod
    def _get_jinareader_status(cls, job_id: str, api_key: str) -> dict[str, Any]:
-        response = requests.post(
+        response = httpx.post(
            "https://adaptivecrawlstatus-kir3wx7b3a-uc.a.run.app",
            headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
            json={"taskId": job_id},
@ -303,7 +303,7 @@ class WebsiteService:
        }

        if crawl_status_data["status"] == "completed":
-            response = requests.post(
+            response = httpx.post(
                "https://adaptivecrawlstatus-kir3wx7b3a-uc.a.run.app",
                headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
                json={"taskId": job_id, "urls": list(data.get("processed", {}).keys())},
@ -362,7 +362,7 @@ class WebsiteService:
    @classmethod
    def _get_jinareader_url_data(cls, job_id: str, url: str, api_key: str) -> dict[str, Any] | None:
        if not job_id:
-            response = requests.get(
+            response = httpx.get(
                f"https://r.jina.ai/{url}",
                headers={"Accept": "application/json", "Authorization": f"Bearer {api_key}"},
            )
@ -371,7 +371,7 @@ class WebsiteService:
            return dict(response.json().get("data", {}))
        else:
            # Get crawl status first
-            status_response = requests.post(
+            status_response = httpx.post(
                "https://adaptivecrawlstatus-kir3wx7b3a-uc.a.run.app",
                headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
                json={"taskId": job_id},
@ -381,7 +381,7 @@ class WebsiteService:
                raise ValueError("Crawl job is not completed")

            # Get processed data
-            data_response = requests.post(
+            data_response = httpx.post(
                "https://adaptivecrawlstatus-kir3wx7b3a-uc.a.run.app",
                headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
                json={"taskId": job_id, "urls": list(status_data.get("processed", {}).keys())},