Merge branch 'main' into feat/r2

# Conflicts:
#	docker/docker-compose.middleware.yaml
#	web/app/components/workflow-app/components/workflow-main.tsx
#	web/app/components/workflow-app/hooks/index.ts
#	web/app/components/workflow/hooks-store/store.ts
#	web/app/components/workflow/hooks/index.ts
#	web/app/components/workflow/nodes/_base/components/variable/var-reference-picker.tsx
This commit is contained in:
jyong
2025-07-02 18:20:05 +08:00
201 changed files with 7572 additions and 5289 deletions

View File

@ -889,7 +889,7 @@ class RegisterService:
TenantService.create_owner_tenant_if_not_exist(account=account, is_setup=True)
dify_setup = DifySetup(version=dify_config.CURRENT_VERSION)
dify_setup = DifySetup(version=dify_config.project.version)
db.session.add(dify_setup)
db.session.commit()
except Exception as e:

View File

@ -323,6 +323,23 @@ class DatasetService:
except ProviderTokenNotInitError as ex:
raise ValueError(ex.description)
@staticmethod
def check_reranking_model_setting(tenant_id: str, reranking_model_provider: str, reranking_model: str):
try:
model_manager = ModelManager()
model_manager.get_model_instance(
tenant_id=tenant_id,
provider=reranking_model_provider,
model_type=ModelType.RERANK,
model=reranking_model,
)
except LLMBadRequestError:
raise ValueError(
"No Rerank Model available. Please configure a valid provider in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ValueError(ex.description)
@staticmethod
def update_dataset(dataset_id, data, user):
"""
@ -645,6 +662,10 @@ class DatasetService:
)
except ProviderTokenNotInitError:
# If we can't get the embedding model, preserve existing settings
logging.warning(
f"Failed to initialize embedding model {data['embedding_model_provider']}/{data['embedding_model']}, "
f"preserving existing settings"
)
if dataset.embedding_model_provider and dataset.embedding_model:
filtered_data["embedding_model_provider"] = dataset.embedding_model_provider
filtered_data["embedding_model"] = dataset.embedding_model
@ -2661,6 +2682,7 @@ class SegmentService:
# calc embedding use tokens
if document.doc_form == "qa_model":
segment.answer = args.answer
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])[0]
else:
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0]

View File

@ -1,23 +0,0 @@
from typing import Optional
from core.moderation.factory import ModerationFactory, ModerationOutputsResult
from extensions.ext_database import db
from models.model import App, AppModelConfig
class ModerationService:
def moderation_for_outputs(self, app_id: str, app_model: App, text: str) -> ModerationOutputsResult:
app_model_config: Optional[AppModelConfig] = None
app_model_config = (
db.session.query(AppModelConfig).filter(AppModelConfig.id == app_model.app_model_config_id).first()
)
if not app_model_config:
raise ValueError("app model config not found")
name = app_model_config.sensitive_word_avoidance_dict["type"]
config = app_model_config.sensitive_word_avoidance_dict["config"]
moderation = ModerationFactory(name, app_id, app_model.tenant_id, config)
return moderation.moderation_for_outputs(text)

View File

@ -8,9 +8,10 @@ from extensions.ext_redis import redis_client
class OAuthProxyService(BasePluginClient):
# Default max age for proxy context parameter in seconds
__MAX_AGE__ = 5 * 60 # 5 minutes
__KEY_PREFIX__ = "oauth_proxy_context:"
@staticmethod
def create_proxy_context(user_id, tenant_id, plugin_id, provider):
def create_proxy_context(user_id: str, tenant_id: str, plugin_id: str, provider: str):
"""
Create a proxy context for an OAuth 2.0 authorization request.
@ -23,26 +24,22 @@ class OAuthProxyService(BasePluginClient):
is used to verify the state, ensuring the request's integrity and authenticity,
and mitigating replay attacks.
"""
seconds, _ = redis_client.time()
context_id = str(uuid.uuid4())
data = {
"user_id": user_id,
"plugin_id": plugin_id,
"tenant_id": tenant_id,
"provider": provider,
# encode redis time to avoid distribution time skew
"timestamp": seconds,
}
# ignore nonce collision
redis_client.setex(
f"oauth_proxy_context:{context_id}",
f"{OAuthProxyService.__KEY_PREFIX__}{context_id}",
OAuthProxyService.__MAX_AGE__,
json.dumps(data),
)
return context_id
@staticmethod
def use_proxy_context(context_id, max_age=__MAX_AGE__):
def use_proxy_context(context_id: str):
"""
Validate the proxy context parameter.
This checks if the context_id is valid and not expired.
@ -50,12 +47,7 @@ class OAuthProxyService(BasePluginClient):
if not context_id:
raise ValueError("context_id is required")
# get data from redis
data = redis_client.getdel(f"oauth_proxy_context:{context_id}")
data = redis_client.getdel(f"{OAuthProxyService.__KEY_PREFIX__}{context_id}")
if not data:
raise ValueError("context_id is invalid")
# check if data is expired
seconds, _ = redis_client.time()
state = json.loads(data)
if state.get("timestamp") < seconds - max_age:
raise ValueError("context_id is expired")
return state
return json.loads(data)

View File

@ -0,0 +1,74 @@
from collections.abc import Mapping, Sequence
from typing import Any, Literal
from sqlalchemy.orm import Session
from core.plugin.entities.parameters import PluginParameterOption
from core.plugin.impl.dynamic_select import DynamicSelectClient
from core.tools.tool_manager import ToolManager
from core.tools.utils.configuration import ProviderConfigEncrypter
from extensions.ext_database import db
from models.tools import BuiltinToolProvider
class PluginParameterService:
@staticmethod
def get_dynamic_select_options(
tenant_id: str,
user_id: str,
plugin_id: str,
provider: str,
action: str,
parameter: str,
provider_type: Literal["tool"],
) -> Sequence[PluginParameterOption]:
"""
Get dynamic select options for a plugin parameter.
Args:
tenant_id: The tenant ID.
plugin_id: The plugin ID.
provider: The provider name.
action: The action name.
parameter: The parameter name.
"""
credentials: Mapping[str, Any] = {}
match provider_type:
case "tool":
provider_controller = ToolManager.get_builtin_provider(provider, tenant_id)
# init tool configuration
tool_configuration = ProviderConfigEncrypter(
tenant_id=tenant_id,
config=[x.to_basic_provider_config() for x in provider_controller.get_credentials_schema()],
provider_type=provider_controller.provider_type.value,
provider_identity=provider_controller.entity.identity.name,
)
# check if credentials are required
if not provider_controller.need_credentials:
credentials = {}
else:
# fetch credentials from db
with Session(db.engine) as session:
db_record = (
session.query(BuiltinToolProvider)
.filter(
BuiltinToolProvider.tenant_id == tenant_id,
BuiltinToolProvider.provider == provider,
)
.first()
)
if db_record is None:
raise ValueError(f"Builtin provider {provider} not found when fetching credentials")
credentials = tool_configuration.decrypt(db_record.credentials)
case _:
raise ValueError(f"Invalid provider type: {provider_type}")
return (
DynamicSelectClient()
.fetch_dynamic_select_options(tenant_id, user_id, plugin_id, provider, action, credentials, parameter)
.options
)

View File

@ -97,16 +97,16 @@ class VectorService:
vector = Vector(dataset=dataset)
vector.delete_by_ids([segment.index_node_id])
vector.add_texts([document], duplicate_check=True)
# update keyword index
keyword = Keyword(dataset)
keyword.delete_by_ids([segment.index_node_id])
# save keyword index
if keywords and len(keywords) > 0:
keyword.add_texts([document], keywords_list=[keywords])
else:
keyword.add_texts([document])
# update keyword index
keyword = Keyword(dataset)
keyword.delete_by_ids([segment.index_node_id])
# save keyword index
if keywords and len(keywords) > 0:
keyword.add_texts([document], keywords_list=[keywords])
else:
keyword.add_texts([document])
@classmethod
def generate_child_chunks(

View File

@ -154,7 +154,7 @@ class WorkflowDraftVariableService:
variables = (
# Do not load the `value` field.
query.options(orm.defer(WorkflowDraftVariable.value))
.order_by(WorkflowDraftVariable.id.desc())
.order_by(WorkflowDraftVariable.created_at.desc())
.limit(limit)
.offset((page - 1) * limit)
.all()
@ -168,7 +168,7 @@ class WorkflowDraftVariableService:
WorkflowDraftVariable.node_id == node_id,
)
query = self._session.query(WorkflowDraftVariable).filter(*criteria)
variables = query.order_by(WorkflowDraftVariable.id.desc()).all()
variables = query.order_by(WorkflowDraftVariable.created_at.desc()).all()
return WorkflowDraftVariableList(variables=variables)
def list_node_variables(self, app_id: str, node_id: str) -> WorkflowDraftVariableList:
@ -235,7 +235,9 @@ class WorkflowDraftVariableService:
self._session.flush()
return variable
def _reset_node_var(self, workflow: Workflow, variable: WorkflowDraftVariable) -> WorkflowDraftVariable | None:
def _reset_node_var_or_sys_var(
self, workflow: Workflow, variable: WorkflowDraftVariable
) -> WorkflowDraftVariable | None:
# If a variable does not allow updating, it makes no sence to resetting it.
if not variable.editable:
return variable
@ -259,28 +261,35 @@ class WorkflowDraftVariableService:
self._session.flush()
return None
# Get node type for proper value extraction
node_config = workflow.get_node_config_by_id(variable.node_id)
node_type = workflow.get_node_type_from_node_config(node_config)
outputs_dict = node_exec.outputs_dict or {}
# a sentinel value used to check the absent of the output variable key.
absent = object()
# Note: Based on the implementation in `_build_from_variable_assigner_mapping`,
# VariableAssignerNode (both v1 and v2) can only create conversation draft variables.
# For consistency, we should simply return when processing VARIABLE_ASSIGNER nodes.
#
# This implementation must remain synchronized with the `_build_from_variable_assigner_mapping`
# and `save` methods.
if node_type == NodeType.VARIABLE_ASSIGNER:
return variable
if variable.get_variable_type() == DraftVariableType.NODE:
# Get node type for proper value extraction
node_config = workflow.get_node_config_by_id(variable.node_id)
node_type = workflow.get_node_type_from_node_config(node_config)
if variable.name not in outputs_dict:
# Note: Based on the implementation in `_build_from_variable_assigner_mapping`,
# VariableAssignerNode (both v1 and v2) can only create conversation draft variables.
# For consistency, we should simply return when processing VARIABLE_ASSIGNER nodes.
#
# This implementation must remain synchronized with the `_build_from_variable_assigner_mapping`
# and `save` methods.
if node_type == NodeType.VARIABLE_ASSIGNER:
return variable
output_value = outputs_dict.get(variable.name, absent)
else:
output_value = outputs_dict.get(f"sys.{variable.name}", absent)
# We cannot use `is None` to check the existence of an output variable here as
# the value of the output may be `None`.
if output_value is absent:
# If variable not found in execution data, delete the variable
self._session.delete(instance=variable)
self._session.flush()
return None
value = outputs_dict[variable.name]
value_seg = WorkflowDraftVariable.build_segment_with_type(variable.value_type, value)
value_seg = WorkflowDraftVariable.build_segment_with_type(variable.value_type, output_value)
# Extract variable value using unified logic
variable.set_value(value_seg)
variable.last_edited_at = None # Reset to indicate this is a reset operation
@ -291,10 +300,8 @@ class WorkflowDraftVariableService:
variable_type = variable.get_variable_type()
if variable_type == DraftVariableType.CONVERSATION:
return self._reset_conv_var(workflow, variable)
elif variable_type == DraftVariableType.NODE:
return self._reset_node_var(workflow, variable)
else:
raise VariableResetError(f"cannot reset system variable, variable_id={variable.id}")
return self._reset_node_var_or_sys_var(workflow, variable)
def delete_variable(self, variable: WorkflowDraftVariable):
self._session.delete(variable)
@ -439,6 +446,9 @@ def _batch_upsert_draft_varaible(
stmt = stmt.on_conflict_do_update(
index_elements=WorkflowDraftVariable.unique_app_id_node_id_name(),
set_={
# Refresh creation timestamp to ensure updated variables
# appear first in chronologically sorted result sets.
"created_at": stmt.excluded.created_at,
"updated_at": stmt.excluded.updated_at,
"last_edited_at": stmt.excluded.last_edited_at,
"description": stmt.excluded.description,
@ -525,9 +535,6 @@ class DraftVariableSaver:
# The type of the current node (see NodeType).
_node_type: NodeType
# Indicates how the workflow execution was triggered (see InvokeFrom).
_invoke_from: InvokeFrom
#
_node_execution_id: str
@ -546,15 +553,16 @@ class DraftVariableSaver:
app_id: str,
node_id: str,
node_type: NodeType,
invoke_from: InvokeFrom,
node_execution_id: str,
enclosing_node_id: str | None = None,
):
# Important: `node_execution_id` parameter refers to the primary key (`id`) of the
# WorkflowNodeExecutionModel/WorkflowNodeExecution, not their `node_execution_id`
# field. These are distinct database fields with different purposes.
self._session = session
self._app_id = app_id
self._node_id = node_id
self._node_type = node_type
self._invoke_from = invoke_from
self._node_execution_id = node_execution_id
self._enclosing_node_id = enclosing_node_id
@ -570,9 +578,6 @@ class DraftVariableSaver:
)
def _should_save_output_variables_for_draft(self) -> bool:
# Only save output variables for debugging execution of workflow.
if self._invoke_from != InvokeFrom.DEBUGGER:
return False
if self._enclosing_node_id is not None and self._node_type != NodeType.VARIABLE_ASSIGNER:
# Currently we do not save output variables for nodes inside loop or iteration.
return False

View File

@ -12,7 +12,6 @@ from sqlalchemy.orm import Session
from core.app.app_config.entities import VariableEntityType
from core.app.apps.advanced_chat.app_config_manager import AdvancedChatAppConfigManager
from core.app.apps.workflow.app_config_manager import WorkflowAppConfigManager
from core.app.entities.app_invoke_entities import InvokeFrom
from core.file import File
from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
from core.variables import Variable
@ -413,7 +412,6 @@ class WorkflowService:
app_id=app_model.id,
node_id=workflow_node_execution.node_id,
node_type=NodeType(workflow_node_execution.node_type),
invoke_from=InvokeFrom.DEBUGGER,
enclosing_node_id=enclosing_node_id,
node_execution_id=node_execution.id,
)