Merge remote-tracking branch 'origin/main' into feat/collaboration

This commit is contained in:
lyzno1
2025-10-11 14:43:20 +08:00
463 changed files with 9705 additions and 5680 deletions

View File

@ -10,14 +10,14 @@ from dify_app import DifyApp
def init_app(app: DifyApp):
@app.after_request
def after_request(response):
def after_request(response): # pyright: ignore[reportUnusedFunction]
"""Add Version headers to the response."""
response.headers.add("X-Version", dify_config.project.version)
response.headers.add("X-Env", dify_config.DEPLOY_ENV)
return response
@app.route("/health")
def health():
def health(): # pyright: ignore[reportUnusedFunction]
return Response(
json.dumps({"pid": os.getpid(), "status": "ok", "version": dify_config.project.version}),
status=200,
@ -25,7 +25,7 @@ def init_app(app: DifyApp):
)
@app.route("/threads")
def threads():
def threads(): # pyright: ignore[reportUnusedFunction]
num_threads = threading.active_count()
threads = threading.enumerate()
@ -50,7 +50,7 @@ def init_app(app: DifyApp):
}
@app.route("/db-pool-stat")
def pool_stat():
def pool_stat(): # pyright: ignore[reportUnusedFunction]
from extensions.ext_database import db
engine = db.engine

View File

@ -10,7 +10,7 @@ from models.engine import db
logger = logging.getLogger(__name__)
# Global flag to avoid duplicate registration of event listener
_GEVENT_COMPATIBILITY_SETUP: bool = False
_gevent_compatibility_setup: bool = False
def _safe_rollback(connection):
@ -26,14 +26,14 @@ def _safe_rollback(connection):
def _setup_gevent_compatibility():
global _GEVENT_COMPATIBILITY_SETUP # pylint: disable=global-statement
global _gevent_compatibility_setup # pylint: disable=global-statement
# Avoid duplicate registration
if _GEVENT_COMPATIBILITY_SETUP:
if _gevent_compatibility_setup:
return
@event.listens_for(Pool, "reset")
def _safe_reset(dbapi_connection, connection_record, reset_state): # pylint: disable=unused-argument
def _safe_reset(dbapi_connection, connection_record, reset_state): # pyright: ignore[reportUnusedFunction]
if reset_state.terminate_only:
return
@ -47,7 +47,7 @@ def _setup_gevent_compatibility():
except (AttributeError, ImportError):
_safe_rollback(dbapi_connection)
_GEVENT_COMPATIBILITY_SETUP = True
_gevent_compatibility_setup = True
def init_app(app: DifyApp):

View File

@ -2,4 +2,4 @@ from dify_app import DifyApp
def init_app(app: DifyApp):
from events import event_handlers # noqa: F401
from events import event_handlers # noqa: F401 # pyright: ignore[reportUnusedImport]

View File

@ -136,8 +136,8 @@ def init_app(app: DifyApp):
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPSpanExporter
from opentelemetry.instrumentation.celery import CeleryInstrumentor
from opentelemetry.instrumentation.flask import FlaskInstrumentor
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
from opentelemetry.instrumentation.redis import RedisInstrumentor
from opentelemetry.instrumentation.requests import RequestsInstrumentor
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
from opentelemetry.metrics import get_meter, get_meter_provider, set_meter_provider
from opentelemetry.propagate import set_global_textmap
@ -237,7 +237,7 @@ def init_app(app: DifyApp):
instrument_exception_logging()
init_sqlalchemy_instrumentor(app)
RedisInstrumentor().instrument()
RequestsInstrumentor().instrument()
HTTPXClientInstrumentor().instrument()
atexit.register(shutdown_tracer)

View File

@ -4,7 +4,6 @@ from dify_app import DifyApp
def init_app(app: DifyApp):
if dify_config.SENTRY_DSN:
import openai
import sentry_sdk
from langfuse import parse_error # type: ignore
from sentry_sdk.integrations.celery import CeleryIntegration
@ -28,7 +27,6 @@ def init_app(app: DifyApp):
HTTPException,
ValueError,
FileNotFoundError,
openai.APIStatusError,
InvokeRateLimitError,
parse_error.defaultErrorResponse,
],

View File

@ -33,7 +33,9 @@ class AliyunOssStorage(BaseStorage):
def load_once(self, filename: str) -> bytes:
obj = self.client.get_object(self.__wrapper_folder_filename(filename))
data: bytes = obj.read()
data = obj.read()
if not isinstance(data, bytes):
return b""
return data
def load_stream(self, filename: str) -> Generator:

View File

@ -39,10 +39,10 @@ class AwsS3Storage(BaseStorage):
self.client.head_bucket(Bucket=self.bucket_name)
except ClientError as e:
# if bucket not exists, create it
if e.response["Error"]["Code"] == "404":
if e.response.get("Error", {}).get("Code") == "404":
self.client.create_bucket(Bucket=self.bucket_name)
# if bucket is not accessible, pass, maybe the bucket is existing but not accessible
elif e.response["Error"]["Code"] == "403":
elif e.response.get("Error", {}).get("Code") == "403":
pass
else:
# other error, raise exception
@ -55,7 +55,7 @@ class AwsS3Storage(BaseStorage):
try:
data: bytes = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read()
except ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
if ex.response.get("Error", {}).get("Code") == "NoSuchKey":
raise FileNotFoundError("File not found")
else:
raise
@ -66,7 +66,7 @@ class AwsS3Storage(BaseStorage):
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
yield from response["Body"].iter_chunks()
except ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
if ex.response.get("Error", {}).get("Code") == "NoSuchKey":
raise FileNotFoundError("file not found")
elif "reached max retries" in str(ex):
raise ValueError("please do not request the same file too frequently")

View File

@ -27,24 +27,38 @@ class AzureBlobStorage(BaseStorage):
self.credential = None
def save(self, filename, data):
if not self.bucket_name:
return
client = self._sync_client()
blob_container = client.get_container_client(container=self.bucket_name)
blob_container.upload_blob(filename, data)
def load_once(self, filename: str) -> bytes:
if not self.bucket_name:
raise FileNotFoundError("Azure bucket name is not configured.")
client = self._sync_client()
blob = client.get_container_client(container=self.bucket_name)
blob = blob.get_blob_client(blob=filename)
data: bytes = blob.download_blob().readall()
data = blob.download_blob().readall()
if not isinstance(data, bytes):
raise TypeError(f"Expected bytes from blob.readall(), got {type(data).__name__}")
return data
def load_stream(self, filename: str) -> Generator:
if not self.bucket_name:
raise FileNotFoundError("Azure bucket name is not configured.")
client = self._sync_client()
blob = client.get_blob_client(container=self.bucket_name, blob=filename)
blob_data = blob.download_blob()
yield from blob_data.chunks()
def download(self, filename, target_filepath):
if not self.bucket_name:
return
client = self._sync_client()
blob = client.get_blob_client(container=self.bucket_name, blob=filename)
@ -53,12 +67,18 @@ class AzureBlobStorage(BaseStorage):
blob_data.readinto(my_blob)
def exists(self, filename):
if not self.bucket_name:
return False
client = self._sync_client()
blob = client.get_blob_client(container=self.bucket_name, blob=filename)
return blob.exists()
def delete(self, filename):
if not self.bucket_name:
return
client = self._sync_client()
blob_container = client.get_container_client(container=self.bucket_name)

View File

@ -430,7 +430,7 @@ class ClickZettaVolumeStorage(BaseStorage):
rows = self._execute_sql(sql, fetch=True)
exists = len(rows) > 0
exists = len(rows) > 0 if rows else False
logger.debug("File %s exists check: %s", filename, exists)
return exists
except Exception as e:
@ -509,16 +509,17 @@ class ClickZettaVolumeStorage(BaseStorage):
rows = self._execute_sql(sql, fetch=True)
result = []
for row in rows:
file_path = row[0] # relative_path column
if rows:
for row in rows:
file_path = row[0] # relative_path column
# For User Volume, remove dify prefix from results
dify_prefix_with_slash = f"{self._config.dify_prefix}/"
if volume_prefix == "USER VOLUME" and file_path.startswith(dify_prefix_with_slash):
file_path = file_path[len(dify_prefix_with_slash) :] # Remove prefix
# For User Volume, remove dify prefix from results
dify_prefix_with_slash = f"{self._config.dify_prefix}/"
if volume_prefix == "USER VOLUME" and file_path.startswith(dify_prefix_with_slash):
file_path = file_path[len(dify_prefix_with_slash) :] # Remove prefix
if files and not file_path.endswith("/") or directories and file_path.endswith("/"):
result.append(file_path)
if files and not file_path.endswith("/") or directories and file_path.endswith("/"):
result.append(file_path)
logger.debug("Scanned %d items in path %s", len(result), path)
return result

View File

@ -264,7 +264,7 @@ class FileLifecycleManager:
logger.warning("File %s not found in metadata", filename)
return False
metadata_dict[filename]["status"] = FileStatus.ARCHIVED.value
metadata_dict[filename]["status"] = FileStatus.ARCHIVED
metadata_dict[filename]["modified_at"] = datetime.now().isoformat()
self._save_metadata(metadata_dict)
@ -309,7 +309,7 @@ class FileLifecycleManager:
# Update metadata
metadata_dict = self._load_metadata()
if filename in metadata_dict:
metadata_dict[filename]["status"] = FileStatus.DELETED.value
metadata_dict[filename]["status"] = FileStatus.DELETED
metadata_dict[filename]["modified_at"] = datetime.now().isoformat()
self._save_metadata(metadata_dict)

View File

@ -439,6 +439,11 @@ class VolumePermissionManager:
self._permission_cache.clear()
logger.debug("Permission cache cleared")
@property
def volume_type(self) -> str | None:
"""Get the volume type."""
return self._volume_type
def get_permission_summary(self, dataset_id: str | None = None) -> dict[str, bool]:
"""Get permission summary
@ -632,13 +637,13 @@ def check_volume_permission(permission_manager: VolumePermissionManager, operati
VolumePermissionError: If no permission
"""
if not permission_manager.validate_operation(operation, dataset_id):
error_message = f"Permission denied for operation '{operation}' on {permission_manager._volume_type} volume"
error_message = f"Permission denied for operation '{operation}' on {permission_manager.volume_type} volume"
if dataset_id:
error_message += f" (dataset: {dataset_id})"
raise VolumePermissionError(
error_message,
operation=operation,
volume_type=permission_manager._volume_type or "unknown",
volume_type=permission_manager.volume_type or "unknown",
dataset_id=dataset_id,
)

View File

@ -35,12 +35,16 @@ class GoogleCloudStorage(BaseStorage):
def load_once(self, filename: str) -> bytes:
bucket = self.client.get_bucket(self.bucket_name)
blob = bucket.get_blob(filename)
if blob is None:
raise FileNotFoundError("File not found")
data: bytes = blob.download_as_bytes()
return data
def load_stream(self, filename: str) -> Generator:
bucket = self.client.get_bucket(self.bucket_name)
blob = bucket.get_blob(filename)
if blob is None:
raise FileNotFoundError("File not found")
with blob.open(mode="rb") as blob_stream:
while chunk := blob_stream.read(4096):
yield chunk
@ -48,6 +52,8 @@ class GoogleCloudStorage(BaseStorage):
def download(self, filename, target_filepath):
bucket = self.client.get_bucket(self.bucket_name)
blob = bucket.get_blob(filename)
if blob is None:
raise FileNotFoundError("File not found")
blob.download_to_filename(target_filepath)
def exists(self, filename):

View File

@ -45,7 +45,7 @@ class HuaweiObsStorage(BaseStorage):
def _get_meta(self, filename):
res = self.client.getObjectMetadata(bucketName=self.bucket_name, objectKey=filename)
if res.status < 300:
if res and res.status and res.status < 300:
return res
else:
return None

View File

@ -3,9 +3,9 @@ import os
from collections.abc import Generator
from pathlib import Path
import opendal
from dotenv import dotenv_values
from opendal import Operator
from opendal.layers import RetryLayer
from extensions.storage.base_storage import BaseStorage
@ -35,7 +35,7 @@ class OpenDALStorage(BaseStorage):
root = kwargs.get("root", "storage")
Path(root).mkdir(parents=True, exist_ok=True)
retry_layer = RetryLayer(max_times=3, factor=2.0, jitter=True)
retry_layer = opendal.layers.RetryLayer(max_times=3, factor=2.0, jitter=True)
self.op = Operator(scheme=scheme, **kwargs).layer(retry_layer)
logger.debug("opendal operator created with scheme %s", scheme)
logger.debug("added retry layer to opendal operator")

View File

@ -29,7 +29,7 @@ class OracleOCIStorage(BaseStorage):
try:
data: bytes = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read()
except ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
if ex.response.get("Error", {}).get("Code") == "NoSuchKey":
raise FileNotFoundError("File not found")
else:
raise
@ -40,7 +40,7 @@ class OracleOCIStorage(BaseStorage):
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
yield from response["Body"].iter_chunks()
except ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
if ex.response.get("Error", {}).get("Code") == "NoSuchKey":
raise FileNotFoundError("File not found")
else:
raise

View File

@ -46,13 +46,13 @@ class SupabaseStorage(BaseStorage):
Path(target_filepath).write_bytes(result)
def exists(self, filename):
result = self.client.storage.from_(self.bucket_name).list(filename)
if result.count() > 0:
result = self.client.storage.from_(self.bucket_name).list(path=filename)
if len(result) > 0:
return True
return False
def delete(self, filename):
self.client.storage.from_(self.bucket_name).remove(filename)
self.client.storage.from_(self.bucket_name).remove([filename])
def bucket_exists(self):
buckets = self.client.storage.list_buckets()

View File

@ -11,6 +11,14 @@ class VolcengineTosStorage(BaseStorage):
def __init__(self):
super().__init__()
if not dify_config.VOLCENGINE_TOS_ACCESS_KEY:
raise ValueError("VOLCENGINE_TOS_ACCESS_KEY is not set")
if not dify_config.VOLCENGINE_TOS_SECRET_KEY:
raise ValueError("VOLCENGINE_TOS_SECRET_KEY is not set")
if not dify_config.VOLCENGINE_TOS_ENDPOINT:
raise ValueError("VOLCENGINE_TOS_ENDPOINT is not set")
if not dify_config.VOLCENGINE_TOS_REGION:
raise ValueError("VOLCENGINE_TOS_REGION is not set")
self.bucket_name = dify_config.VOLCENGINE_TOS_BUCKET_NAME
self.client = tos.TosClientV2(
ak=dify_config.VOLCENGINE_TOS_ACCESS_KEY,
@ -20,27 +28,39 @@ class VolcengineTosStorage(BaseStorage):
)
def save(self, filename, data):
if not self.bucket_name:
raise ValueError("VOLCENGINE_TOS_BUCKET_NAME is not set")
self.client.put_object(bucket=self.bucket_name, key=filename, content=data)
def load_once(self, filename: str) -> bytes:
if not self.bucket_name:
raise FileNotFoundError("VOLCENGINE_TOS_BUCKET_NAME is not set")
data = self.client.get_object(bucket=self.bucket_name, key=filename).read()
if not isinstance(data, bytes):
raise TypeError(f"Expected bytes, got {type(data).__name__}")
return data
def load_stream(self, filename: str) -> Generator:
if not self.bucket_name:
raise FileNotFoundError("VOLCENGINE_TOS_BUCKET_NAME is not set")
response = self.client.get_object(bucket=self.bucket_name, key=filename)
while chunk := response.read(4096):
yield chunk
def download(self, filename, target_filepath):
if not self.bucket_name:
raise ValueError("VOLCENGINE_TOS_BUCKET_NAME is not set")
self.client.get_object_to_file(bucket=self.bucket_name, key=filename, file_path=target_filepath)
def exists(self, filename):
if not self.bucket_name:
return False
res = self.client.head_object(bucket=self.bucket_name, key=filename)
if res.status_code != 200:
return False
return True
def delete(self, filename):
if not self.bucket_name:
return
self.client.delete_object(bucket=self.bucket_name, key=filename)