Merge branch 'fix/chore-fix' into dev/plugin-deploy

2026-04-30 23:48:04 +08:00 · 2024-11-15 15:56:45 +08:00
parent 1e81476e3a 6300e506fb
commit 6da877c6a6
112 changed files with 4206 additions and 219 deletions
--- a/api/app.py
+++ b/api/app.py
@ -1,4 +1,5 @@
 import os
+import sys

 from configs import dify_config

@ -29,6 +30,9 @@ from models import account, dataset, model, source, task, tool, tools, web  # no

 # DO NOT REMOVE ABOVE

+if sys.version_info[:2] == (3, 10):
+    print("Warning: Python 3.10 will not be supported in the next version.")
+

 warnings.simplefilter("ignore", ResourceWarning)

@ -49,7 +53,6 @@ if dify_config.TESTING:
@app.after_request
 def after_request(response):
    """Add Version headers to the response."""
-    response.set_cookie("remember_token", "", expires=0)
    response.headers.add("X-Version", dify_config.CURRENT_VERSION)
    response.headers.add("X-Env", dify_config.DEPLOY_ENV)
    return response
--- a/api/configs/packaging/init.py
+++ b/api/configs/packaging/init.py
@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):

    CURRENT_VERSION: str = Field(
        description="Dify version",
-        default="0.11.0",
+        default="0.11.1",
    )

    COMMIT_SHA: str = Field(
--- a/api/constants/languages.py
+++ b/api/constants/languages.py
@ -17,6 +17,7 @@ language_timezone_mapping = {
    "hi-IN": "Asia/Kolkata",
    "tr-TR": "Europe/Istanbul",
    "fa-IR": "Asia/Tehran",
+    "sl-SI": "Europe/Ljubljana",
 }

 languages = list(language_timezone_mapping.keys())
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@ -361,6 +361,7 @@ class WorkflowBasedAppRunner(AppRunner):
                    node_run_index=workflow_entry.graph_engine.graph_runtime_state.node_run_steps,
                    output=event.pre_iteration_output,
                    parallel_mode_run_id=event.parallel_mode_run_id,
+                    duration=event.duration,
                )
            )
        elif isinstance(event, (IterationRunSucceededEvent | IterationRunFailedEvent)):
--- a/api/core/app/entities/queue_entities.py
+++ b/api/core/app/entities/queue_entities.py
@ -111,6 +111,7 @@ class QueueIterationNextEvent(AppQueueEvent):
    """iteratoin run in parallel mode run id"""
    node_run_index: int
    output: Optional[Any] = None  # output for the current iteration
+    duration: Optional[float] = None

    @field_validator("output", mode="before")
    @classmethod
@ -307,6 +308,8 @@ class QueueNodeSucceededEvent(AppQueueEvent):
    execution_metadata: Optional[dict[NodeRunMetadataKey, Any]] = None

    error: Optional[str] = None
+    """single iteration duration map"""
+    iteration_duration_map: Optional[dict[str, float]] = None


 class QueueNodeInIterationFailedEvent(AppQueueEvent):
--- a/api/core/app/entities/task_entities.py
+++ b/api/core/app/entities/task_entities.py
@ -434,6 +434,7 @@ class IterationNodeNextStreamResponse(StreamResponse):
        parallel_id: Optional[str] = None
        parallel_start_node_id: Optional[str] = None
        parallel_mode_run_id: Optional[str] = None
+        duration: Optional[float] = None

    event: StreamEvent = StreamEvent.ITERATION_NEXT
    workflow_run_id: str
--- a/api/core/app/task_pipeline/workflow_cycle_manage.py
+++ b/api/core/app/task_pipeline/workflow_cycle_manage.py
@ -624,6 +624,7 @@ class WorkflowCycleManage:
                parallel_id=event.parallel_id,
                parallel_start_node_id=event.parallel_start_node_id,
                parallel_mode_run_id=event.parallel_mode_run_id,
+                duration=event.duration,
            ),
        )

--- a/api/core/model_runtime/model_providers/gitee_ai/llm/Qwen2.5-72B-Instruct.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/Qwen2.5-72B-Instruct.yaml
@ -0,0 +1,95 @@
+model: Qwen2.5-72B-Instruct
+label:
+  zh_Hans: Qwen2.5-72B-Instruct
+  en_US: Qwen2.5-72B-Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    label:
+      en_US: "Max Tokens"
+      zh_Hans: "最大Token数"
+    type: int
+    default: 512
+    min: 1
+    required: true
+    help:
+      en_US: "The maximum number of tokens that can be generated by the model varies depending on the model."
+      zh_Hans: "模型可生成的最大 token 个数，不同模型上限不同。"
+
+  - name: temperature
+    use_template: temperature
+    label:
+      en_US: "Temperature"
+      zh_Hans: "采样温度"
+    type: float
+    default: 0.7
+    min: 0.0
+    max: 1.0
+    precision: 1
+    required: true
+    help:
+      en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
+      zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内，值越高，输出越随机和创造性；值越低，输出越稳定。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
+
+  - name: top_p
+    use_template: top_p
+    label:
+      en_US: "Top P"
+      zh_Hans: "Top P"
+    type: float
+    default: 0.7
+    min: 0.0
+    max: 1.0
+    precision: 1
+    required: true
+    help:
+      en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
+      zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens；当 top_p 为 0 时，此参数无效。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
+
+  - name: top_k
+    use_template: top_k
+    label:
+      en_US: "Top K"
+      zh_Hans: "Top K"
+    type: int
+    default: 50
+    min: 0
+    max: 100
+    required: true
+    help:
+      en_US: "The value range is [0,100], which limits the model to only select from the top k words with the highest probability when choosing the next word at each step. The larger the value, the more diverse text generation will be."
+      zh_Hans: "取值范围为 [0,100]，限制模型在每一步选择下一个词时，只从概率最高的前 k 个词中选取。数值越大，文本生成越多样。"
+
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    label:
+      en_US: "Frequency Penalty"
+      zh_Hans: "频率惩罚"
+    type: float
+    default: 0
+    min: -1.0
+    max: 1.0
+    precision: 1
+    required: false
+    help:
+      en_US: "Used to adjust the frequency of repeated content in automatically generated text. Positive numbers reduce repetition, while negative numbers increase repetition. After setting this parameter, if a word has already appeared in the text, the model will decrease the probability of choosing that word for subsequent generation."
+      zh_Hans: "用于调整自动生成文本中重复内容的频率。正数减少重复，负数增加重复。设置此参数后，如果一个词在文本中已经出现过，模型在后续生成中选择该词的概率会降低。"
+
+  - name: user
+    use_template: text
+    label:
+      en_US: "User"
+      zh_Hans: "用户"
+    type: string
+    required: false
+    help:
+      en_US: "Used to track and differentiate conversation requests from different users."
+      zh_Hans: "用于追踪和区分不同用户的对话请求。"
--- a/api/core/model_runtime/model_providers/gitee_ai/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/_position.yaml
@ -1,3 +1,4 @@
+- Qwen2.5-72B-Instruct
 - Qwen2-7B-Instruct
 - Qwen2-72B-Instruct
 - Yi-1.5-34B-Chat
--- a/api/core/model_runtime/model_providers/gitee_ai/llm/llm.py
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/llm.py
@ -6,6 +6,7 @@ from core.model_runtime.entities.message_entities import (
    PromptMessage,
    PromptMessageTool,
 )
+from core.model_runtime.entities.model_entities import ModelFeature
 from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel


@ -28,14 +29,13 @@ class GiteeAILargeLanguageModel(OAIAPICompatLargeLanguageModel):
        user: Optional[str] = None,
    ) -> Union[LLMResult, Generator]:
        self._add_custom_parameters(credentials, model, model_parameters)
-        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream)
+        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)

    def validate_credentials(self, model: str, credentials: dict) -> None:
        self._add_custom_parameters(credentials, model, None)
        super().validate_credentials(model, credentials)

-    @staticmethod
-    def _add_custom_parameters(credentials: dict, model: str, model_parameters: dict) -> None:
+    def _add_custom_parameters(self, credentials: dict, model: str, model_parameters: dict) -> None:
        if model is None:
            model = "bge-large-zh-v1.5"

@ -45,3 +45,7 @@ class GiteeAILargeLanguageModel(OAIAPICompatLargeLanguageModel):
            credentials["mode"] = LLMMode.COMPLETION.value
        else:
            credentials["mode"] = LLMMode.CHAT.value
+
+        schema = self.get_model_schema(model, credentials)
+        if ModelFeature.TOOL_CALL in schema.features or ModelFeature.MULTI_TOOL_CALL in schema.features:
+            credentials["function_calling_type"] = "tool_call"
--- a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py
+++ b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py
@ -178,6 +178,7 @@ class ElasticSearchVector(BaseVector):
                        Field.VECTOR.value: {  # Make sure the dimension is correct here
                            "type": "dense_vector",
                            "dims": dim,
+                            "index": True,
                            "similarity": "cosine",
                        },
                        Field.METADATA_KEY.value: {
--- a/api/core/rag/extractor/word_extractor.py
+++ b/api/core/rag/extractor/word_extractor.py
@ -50,9 +50,9 @@ class WordExtractor(BaseExtractor):

            self.web_path = self.file_path
            # TODO: use a better way to handle the file
-            with tempfile.NamedTemporaryFile(delete=False) as self.temp_file:
-                self.temp_file.write(r.content)
-                self.file_path = self.temp_file.name
+            self.temp_file = tempfile.NamedTemporaryFile()  # noqa: SIM115
+            self.temp_file.write(r.content)
+            self.file_path = self.temp_file.name
        elif not os.path.isfile(self.file_path):
            raise ValueError(f"File path {self.file_path} is not a valid file or url")

--- a/api/core/tools/entities/api_entities.py
+++ b/api/core/tools/entities/api_entities.py
@ -1,6 +1,6 @@
 from typing import Literal, Optional

-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator

 from core.model_runtime.utils.encoders import jsonable_encoder
 from core.tools.__base.tool import ToolParameter
@ -37,6 +37,11 @@ class ToolProviderApiEntity(BaseModel):
    tools: list[ToolApiEntity] = Field(default_factory=list)
    labels: list[str] = Field(default_factory=list)

+    @field_validator("tools", mode="before")
+    @classmethod
+    def convert_none_to_empty_list(cls, v):
+        return v if v is not None else []
+
    def to_dict(self) -> dict:
        # -------------
        # overwrite tool parameter types for temp fix
--- a/api/core/tools/tool_manager.py
+++ b/api/core/tools/tool_manager.py
@ -698,7 +698,11 @@ class ToolManager:
        """
        get api provider
        """
-        provider_obj: ApiToolProvider | None = (
+        """
+            get tool provider
+        """
+        provider_name = provider
+        provider_obj: ApiToolProvider = (
            db.session.query(ApiToolProvider)
            .filter(
                ApiToolProvider.tenant_id == tenant_id,
@ -708,7 +712,7 @@ class ToolManager:
        )

        if provider_obj is None:
-            raise ValueError(f"you have not added provider {provider}")
+            raise ValueError(f"you have not added provider {provider_name}")

        try:
            credentials = json.loads(provider_obj.credentials_str) or {}
--- a/api/core/tools/utils/rag_web_reader.py
+++ b/api/core/tools/utils/rag_web_reader.py
@ -0,0 +1,17 @@
+import re
+
+
+def get_image_upload_file_ids(content):
+    pattern = r"!\[image\]\((http?://.*?(file-preview|image-preview))\)"
+    matches = re.findall(pattern, content)
+    image_upload_file_ids = []
+    for match in matches:
+        if match[1] == "file-preview":
+            content_pattern = r"files/([^/]+)/file-preview"
+        else:
+            content_pattern = r"files/([^/]+)/image-preview"
+        content_match = re.search(content_pattern, match[0])
+        if content_match:
+            image_upload_file_id = content_match.group(1)
+            image_upload_file_ids.append(image_upload_file_id)
+    return image_upload_file_ids
--- a/api/core/variables/init.py
+++ b/api/core/variables/init.py
@ -17,6 +17,7 @@ from .segments import (
 from .types import SegmentType
 from .variables import (
    ArrayAnyVariable,
+    ArrayFileVariable,
    ArrayNumberVariable,
    ArrayObjectVariable,
    ArrayStringVariable,
@ -58,4 +59,5 @@ __all__ = [
    "ArrayStringSegment",
    "FileSegment",
    "FileVariable",
+    "ArrayFileVariable",
 ]
--- a/api/core/variables/variables.py
+++ b/api/core/variables/variables.py
@ -1,9 +1,13 @@
+from collections.abc import Sequence
+from uuid import uuid4
+
 from pydantic import Field

 from core.helper import encrypter

 from .segments import (
    ArrayAnySegment,
+    ArrayFileSegment,
    ArrayNumberSegment,
    ArrayObjectSegment,
    ArrayStringSegment,
@ -24,11 +28,12 @@ class Variable(Segment):
    """

    id: str = Field(
-        default="",
-        description="Unique identity for variable. It's only used by environment variables now.",
+        default=lambda _: str(uuid4()),
+        description="Unique identity for variable.",
    )
    name: str
    description: str = Field(default="", description="Description of the variable.")
+    selector: Sequence[str] = Field(default_factory=list)


 class StringVariable(StringSegment, Variable):
@ -78,3 +83,7 @@ class NoneVariable(NoneSegment, Variable):

 class FileVariable(FileSegment, Variable):
    pass
+
+
+class ArrayFileVariable(ArrayFileSegment, Variable):
+    pass
--- a/api/core/workflow/entities/node_entities.py
+++ b/api/core/workflow/entities/node_entities.py
@ -24,6 +24,7 @@ class NodeRunMetadataKey(str, Enum):
    PARENT_PARALLEL_ID = "parent_parallel_id"
    PARENT_PARALLEL_START_NODE_ID = "parent_parallel_start_node_id"
    PARALLEL_MODE_RUN_ID = "parallel_mode_run_id"
+    ITERATION_DURATION_MAP = "iteration_duration_map"  # single iteration duration if iteration node runs


 class NodeRunResult(BaseModel):
--- a/api/core/workflow/entities/variable_pool.py
+++ b/api/core/workflow/entities/variable_pool.py
@ -95,13 +95,16 @@ class VariablePool(BaseModel):
        if len(selector) < 2:
            raise ValueError("Invalid selector")

+        if isinstance(value, Variable):
+            variable = value
        if isinstance(value, Segment):
-            v = value
+            variable = variable_factory.segment_to_variable(segment=value, selector=selector)
        else:
-            v = variable_factory.build_segment(value)
+            segment = variable_factory.build_segment(value)
+            variable = variable_factory.segment_to_variable(segment=segment, selector=selector)

        hash_key = hash(tuple(selector[1:]))
-        self.variable_dictionary[selector[0]][hash_key] = v
+        self.variable_dictionary[selector[0]][hash_key] = variable

    def get(self, selector: Sequence[str], /) -> Segment | None:
        """
--- a/api/core/workflow/graph_engine/entities/event.py
+++ b/api/core/workflow/graph_engine/entities/event.py
@ -148,6 +148,7 @@ class IterationRunStartedEvent(BaseIterationEvent):
 class IterationRunNextEvent(BaseIterationEvent):
    index: int = Field(..., description="index")
    pre_iteration_output: Optional[Any] = Field(None, description="pre iteration output")
+    duration: Optional[float] = Field(None, description="duration")


 class IterationRunSucceededEvent(BaseIterationEvent):
@ -156,6 +157,7 @@ class IterationRunSucceededEvent(BaseIterationEvent):
    outputs: Optional[dict[str, Any]] = None
    metadata: Optional[dict[str, Any]] = None
    steps: int = 0
+    iteration_duration_map: Optional[dict[str, float]] = None


 class IterationRunFailedEvent(BaseIterationEvent):
--- a/api/core/workflow/nodes/document_extractor/node.py
+++ b/api/core/workflow/nodes/document_extractor/node.py
@ -143,14 +143,14 @@ def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str)

 def _extract_text_from_plain_text(file_content: bytes) -> str:
    try:
-        return file_content.decode("utf-8")
+        return file_content.decode("utf-8", "ignore")
    except UnicodeDecodeError as e:
        raise TextExtractionError("Failed to decode plain text file") from e


 def _extract_text_from_json(file_content: bytes) -> str:
    try:
-        json_data = json.loads(file_content.decode("utf-8"))
+        json_data = json.loads(file_content.decode("utf-8", "ignore"))
        return json.dumps(json_data, indent=2, ensure_ascii=False)
    except (UnicodeDecodeError, json.JSONDecodeError) as e:
        raise TextExtractionError(f"Failed to decode or parse JSON file: {e}") from e
@ -159,7 +159,7 @@ def _extract_text_from_json(file_content: bytes) -> str:
 def _extract_text_from_yaml(file_content: bytes) -> str:
    """Extract the content from yaml file"""
    try:
-        yaml_data = yaml.safe_load_all(file_content.decode("utf-8"))
+        yaml_data = yaml.safe_load_all(file_content.decode("utf-8", "ignore"))
        return yaml.dump_all(yaml_data, allow_unicode=True, sort_keys=False)
    except (UnicodeDecodeError, yaml.YAMLError) as e:
        raise TextExtractionError(f"Failed to decode or parse YAML file: {e}") from e
@ -217,7 +217,7 @@ def _extract_text_from_file(file: File):

 def _extract_text_from_csv(file_content: bytes) -> str:
    try:
-        csv_file = io.StringIO(file_content.decode("utf-8"))
+        csv_file = io.StringIO(file_content.decode("utf-8", "ignore"))
        csv_reader = csv.reader(csv_file)
        rows = list(csv_reader)

--- a/api/core/workflow/nodes/iteration/iteration_node.py
+++ b/api/core/workflow/nodes/iteration/iteration_node.py
@ -156,6 +156,7 @@ class IterationNode(BaseNode[IterationNodeData]):
            index=0,
            pre_iteration_output=None,
        )
+        iter_run_map: dict[str, float] = {}
        outputs: list[Any] = [None] * len(iterator_list_value)
        try:
            if self.node_data.is_parallel:
@ -175,6 +176,7 @@ class IterationNode(BaseNode[IterationNodeData]):
                        iteration_graph,
                        index,
                        item,
+                        iter_run_map,
                    )
                    future.add_done_callback(thread_pool.task_done_callback)
                    futures.append(future)
@ -213,6 +215,7 @@ class IterationNode(BaseNode[IterationNodeData]):
                        start_at,
                        graph_engine,
                        iteration_graph,
+                        iter_run_map,
                    )
            if self.node_data.error_handle_mode == ErrorHandleMode.REMOVE_ABNORMAL_OUTPUT:
                outputs = [output for output in outputs if output is not None]
@ -230,7 +233,9 @@ class IterationNode(BaseNode[IterationNodeData]):

            yield RunCompletedEvent(
                run_result=NodeRunResult(
-                    status=WorkflowNodeExecutionStatus.SUCCEEDED, outputs={"output": jsonable_encoder(outputs)}
+                    status=WorkflowNodeExecutionStatus.SUCCEEDED,
+                    outputs={"output": jsonable_encoder(outputs)},
+                    metadata={NodeRunMetadataKey.ITERATION_DURATION_MAP: iter_run_map},
                )
            )
        except IterationNodeError as e:
@ -356,15 +361,19 @@ class IterationNode(BaseNode[IterationNodeData]):
        start_at: datetime,
        graph_engine: "GraphEngine",
        iteration_graph: Graph,
+        iter_run_map: dict[str, float],
        parallel_mode_run_id: Optional[str] = None,
    ) -> Generator[NodeEvent | InNodeEvent, None, None]:
        """
        run single iteration
        """
+        iter_start_at = datetime.now(timezone.utc).replace(tzinfo=None)
+
        try:
            rst = graph_engine.run()
            # get current iteration index
            current_index = variable_pool.get([self.node_id, "index"]).value
+            iteration_run_id = parallel_mode_run_id if parallel_mode_run_id is not None else f"{current_index}"
            next_index = int(current_index) + 1

            if current_index is None:
@ -431,6 +440,8 @@ class IterationNode(BaseNode[IterationNodeData]):
                            variable_pool.add([self.node_id, "index"], next_index)
                            if next_index < len(iterator_list_value):
                                variable_pool.add([self.node_id, "item"], iterator_list_value[next_index])
+                            duration = (datetime.now(timezone.utc).replace(tzinfo=None) - iter_start_at).total_seconds()
+                            iter_run_map[iteration_run_id] = duration
                            yield IterationRunNextEvent(
                                iteration_id=self.id,
                                iteration_node_id=self.node_id,
@ -439,6 +450,7 @@ class IterationNode(BaseNode[IterationNodeData]):
                                index=next_index,
                                parallel_mode_run_id=parallel_mode_run_id,
                                pre_iteration_output=None,
+                                duration=duration,
                            )
                            return
                        elif self.node_data.error_handle_mode == ErrorHandleMode.REMOVE_ABNORMAL_OUTPUT:
@ -449,6 +461,8 @@ class IterationNode(BaseNode[IterationNodeData]):

                            if next_index < len(iterator_list_value):
                                variable_pool.add([self.node_id, "item"], iterator_list_value[next_index])
+                            duration = (datetime.now(timezone.utc).replace(tzinfo=None) - iter_start_at).total_seconds()
+                            iter_run_map[iteration_run_id] = duration
                            yield IterationRunNextEvent(
                                iteration_id=self.id,
                                iteration_node_id=self.node_id,
@ -457,6 +471,7 @@ class IterationNode(BaseNode[IterationNodeData]):
                                index=next_index,
                                parallel_mode_run_id=parallel_mode_run_id,
                                pre_iteration_output=None,
+                                duration=duration,
                            )
                            return
                        elif self.node_data.error_handle_mode == ErrorHandleMode.TERMINATED:
@ -474,7 +489,10 @@ class IterationNode(BaseNode[IterationNodeData]):
                            )
                    yield metadata_event

-            current_iteration_output = variable_pool.get(self.node_data.output_selector).value
+            current_output_segment = variable_pool.get(self.node_data.output_selector)
+            if current_output_segment is None:
+                raise IterationNodeError("iteration output selector not found")
+            current_iteration_output = current_output_segment.value
            outputs[current_index] = current_iteration_output
            # remove all nodes outputs from variable pool
            for node_id in iteration_graph.node_ids:
@ -485,6 +503,8 @@ class IterationNode(BaseNode[IterationNodeData]):

            if next_index < len(iterator_list_value):
                variable_pool.add([self.node_id, "item"], iterator_list_value[next_index])
+            duration = (datetime.now(timezone.utc).replace(tzinfo=None) - iter_start_at).total_seconds()
+            iter_run_map[iteration_run_id] = duration
            yield IterationRunNextEvent(
                iteration_id=self.id,
                iteration_node_id=self.node_id,
@ -493,6 +513,7 @@ class IterationNode(BaseNode[IterationNodeData]):
                index=next_index,
                parallel_mode_run_id=parallel_mode_run_id,
                pre_iteration_output=jsonable_encoder(current_iteration_output) if current_iteration_output else None,
+                duration=duration,
            )

        except IterationNodeError as e:
@ -528,6 +549,7 @@ class IterationNode(BaseNode[IterationNodeData]):
        iteration_graph: Graph,
        index: int,
        item: Any,
+        iter_run_map: dict[str, float],
    ) -> Generator[NodeEvent | InNodeEvent, None, None]:
        """
        run single iteration in parallel mode
@ -546,6 +568,7 @@ class IterationNode(BaseNode[IterationNodeData]):
                start_at=start_at,
                graph_engine=graph_engine_copy,
                iteration_graph=iteration_graph,
+                iter_run_map=iter_run_map,
                parallel_mode_run_id=parallel_mode_run_id,
            ):
                q.put(event)
--- a/api/core/workflow/nodes/list_operator/entities.py
+++ b/api/core/workflow/nodes/list_operator/entities.py
@ -59,4 +59,4 @@ class ListOperatorNodeData(BaseNodeData):
    filter_by: FilterBy
    order_by: OrderBy
    limit: Limit
-    extract_by: ExtractConfig
+    extract_by: ExtractConfig = Field(default_factory=ExtractConfig)
--- a/api/docker/entrypoint.sh
+++ b/api/docker/entrypoint.sh
@ -21,7 +21,7 @@ if [[ "${MODE}" == "worker" ]]; then
  fi

  exec celery -A app.celery worker -P ${CELERY_WORKER_CLASS:-gevent} $CONCURRENCY_OPTION --loglevel ${LOG_LEVEL} \
-    -Q ${CELERY_QUEUES:-dataset,generation,mail,ops_trace,app_deletion}
+    -Q ${CELERY_QUEUES:-dataset,mail,ops_trace,app_deletion}

 elif [[ "${MODE}" == "beat" ]]; then
  exec celery -A app.celery beat --loglevel ${LOG_LEVEL}
--- a/api/extensions/ext_celery.py
+++ b/api/extensions/ext_celery.py
@ -46,7 +46,6 @@ def init_app(app: Flask) -> Celery:
        broker_connection_retry_on_startup=True,
        worker_log_format=dify_config.LOG_FORMAT,
        worker_task_log_format=dify_config.LOG_FORMAT,
-        worker_logfile=dify_config.LOG_FILE,
        worker_hijack_root_logger=False,
        timezone=pytz.timezone(dify_config.LOG_TZ),
    )
@ -56,6 +55,11 @@ def init_app(app: Flask) -> Celery:
            broker_use_ssl=ssl_options,  # Add the SSL options to the broker configuration
        )

+    if dify_config.LOG_FILE:
+        celery_app.conf.update(
+            worker_logfile=dify_config.LOG_FILE,
+        )
+
    celery_app.set_default()
    app.extensions["celery"] = celery_app

--- a/api/extensions/ext_logging.py
+++ b/api/extensions/ext_logging.py
@ -9,19 +9,21 @@ from configs import dify_config


 def init_app(app: Flask):
-    log_handlers = None
+    log_handlers = []
    log_file = dify_config.LOG_FILE
    if log_file:
        log_dir = os.path.dirname(log_file)
        os.makedirs(log_dir, exist_ok=True)
-        log_handlers = [
+        log_handlers.append(
            RotatingFileHandler(
                filename=log_file,
                maxBytes=dify_config.LOG_FILE_MAX_SIZE * 1024 * 1024,
                backupCount=dify_config.LOG_FILE_BACKUP_COUNT,
-            ),
-            logging.StreamHandler(sys.stdout),
-        ]
+            )
+        )
+
+    # Always add StreamHandler to log to console
+    log_handlers.append(logging.StreamHandler(sys.stdout))

    logging.basicConfig(
        level=dify_config.LOG_LEVEL,
--- a/api/factories/file_factory.py
+++ b/api/factories/file_factory.py
@ -180,6 +180,20 @@ def _get_remote_file_info(url: str):
    return mime_type, filename, file_size


+def _get_file_type_by_mimetype(mime_type: str) -> FileType:
+    if "image" in mime_type:
+        file_type = FileType.IMAGE
+    elif "video" in mime_type:
+        file_type = FileType.VIDEO
+    elif "audio" in mime_type:
+        file_type = FileType.AUDIO
+    elif "text" in mime_type or "pdf" in mime_type:
+        file_type = FileType.DOCUMENT
+    else:
+        file_type = FileType.CUSTOM
+    return file_type
+
+
 def _build_from_tool_file(
    *,
    mapping: Mapping[str, Any],
@ -199,12 +213,13 @@ def _build_from_tool_file(
        raise ValueError(f"ToolFile {mapping.get('tool_file_id')} not found")

    extension = "." + tool_file.file_key.split(".")[-1] if "." in tool_file.file_key else ".bin"
+    file_type = mapping.get("type", _get_file_type_by_mimetype(tool_file.mimetype))

    return File(
        id=mapping.get("id"),
        tenant_id=tenant_id,
        filename=tool_file.name,
-        type=FileType.value_of(mapping.get("type")),
+        type=file_type,
        transfer_method=transfer_method,
        remote_url=tool_file.original_url,
        related_id=tool_file.id,
--- a/api/factories/variable_factory.py
+++ b/api/factories/variable_factory.py
@ -1,34 +1,65 @@
-from collections.abc import Mapping
+from collections.abc import Mapping, Sequence
 from typing import Any
+from uuid import uuid4

 from configs import dify_config
 from core.file import File
-from core.variables import (
+from core.variables.exc import VariableError
+from core.variables.segments import (
    ArrayAnySegment,
    ArrayFileSegment,
    ArrayNumberSegment,
-    ArrayNumberVariable,
    ArrayObjectSegment,
-    ArrayObjectVariable,
    ArraySegment,
    ArrayStringSegment,
-    ArrayStringVariable,
    FileSegment,
    FloatSegment,
-    FloatVariable,
    IntegerSegment,
-    IntegerVariable,
    NoneSegment,
    ObjectSegment,
+    Segment,
+    StringSegment,
+)
+from core.variables.types import SegmentType
+from core.variables.variables import (
+    ArrayAnyVariable,
+    ArrayFileVariable,
+    ArrayNumberVariable,
+    ArrayObjectVariable,
+    ArrayStringVariable,
+    FileVariable,
+    FloatVariable,
+    IntegerVariable,
+    NoneVariable,
    ObjectVariable,
    SecretVariable,
-    Segment,
-    SegmentType,
-    StringSegment,
    StringVariable,
    Variable,
 )
-from core.variables.exc import VariableError
+
+
+class InvalidSelectorError(ValueError):
+    pass
+
+
+class UnsupportedSegmentTypeError(Exception):
+    pass
+
+
+# Define the constant
+SEGMENT_TO_VARIABLE_MAP = {
+    StringSegment: StringVariable,
+    IntegerSegment: IntegerVariable,
+    FloatSegment: FloatVariable,
+    ObjectSegment: ObjectVariable,
+    FileSegment: FileVariable,
+    ArrayStringSegment: ArrayStringVariable,
+    ArrayNumberSegment: ArrayNumberVariable,
+    ArrayObjectSegment: ArrayObjectVariable,
+    ArrayFileSegment: ArrayFileVariable,
+    ArrayAnySegment: ArrayAnyVariable,
+    NoneSegment: NoneVariable,
+}


 def build_variable_from_mapping(mapping: Mapping[str, Any], /) -> Variable:
@ -96,3 +127,30 @@ def build_segment(value: Any, /) -> Segment:
            case _:
                raise ValueError(f"not supported value {value}")
    raise ValueError(f"not supported value {value}")
+
+
+def segment_to_variable(
+    *,
+    segment: Segment,
+    selector: Sequence[str],
+    id: str | None = None,
+    name: str | None = None,
+    description: str = "",
+) -> Variable:
+    if isinstance(segment, Variable):
+        return segment
+    name = name or selector[-1]
+    id = id or str(uuid4())
+
+    segment_type = type(segment)
+    if segment_type not in SEGMENT_TO_VARIABLE_MAP:
+        raise UnsupportedSegmentTypeError(f"not supported segment type {segment_type}")
+
+    variable_class = SEGMENT_TO_VARIABLE_MAP[segment_type]
+    return variable_class(
+        id=id,
+        name=name,
+        description=description,
+        value=segment.value,
+        selector=selector,
+    )
--- a/api/poetry.lock
+++ b/api/poetry.lock
@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.

 [[package]]
 name = "aiohappyeyeballs"
@ -950,6 +950,10 @@ files = [
    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a37b8f0391212d29b3a91a799c8e4a2855e0576911cdfb2515487e30e322253d"},
    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e84799f09591700a4154154cab9787452925578841a94321d5ee8fb9a9a328f0"},
    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f66b5337fa213f1da0d9000bc8dc0cb5b896b726eefd9c6046f699b169c41b9e"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5dab0844f2cf82be357a0eb11a9087f70c5430b2c241493fc122bb6f2bb0917c"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e4fe605b917c70283db7dfe5ada75e04561479075761a0b3866c081d035b01c1"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1e9a65b5736232e7a7f91ff3d02277f11d339bf34099a56cdab6a8b3410a02b2"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:58d4b711689366d4a03ac7957ab8c28890415e267f9b6589969e74b6e42225ec"},
    {file = "Brotli-1.1.0-cp310-cp310-win32.whl", hash = "sha256:be36e3d172dc816333f33520154d708a2657ea63762ec16b62ece02ab5e4daf2"},
    {file = "Brotli-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0c6244521dda65ea562d5a69b9a26120769b7a9fb3db2fe9545935ed6735b128"},
    {file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc"},
@ -962,8 +966,14 @@ files = [
    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9"},
    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265"},
    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c247dd99d39e0338a604f8c2b3bc7061d5c2e9e2ac7ba9cc1be5a69cb6cd832f"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1b2c248cd517c222d89e74669a4adfa5577e06ab68771a529060cf5a156e9757"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2a24c50840d89ded6c9a8fdc7b6ed3692ed4e86f1c4a4a938e1e92def92933e0"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f31859074d57b4639318523d6ffdca586ace54271a73ad23ad021acd807eb14b"},
    {file = "Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50"},
    {file = "Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1"},
+    {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:32d95b80260d79926f5fab3c41701dbb818fde1c9da590e77e571eefd14abe28"},
+    {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b760c65308ff1e462f65d69c12e4ae085cff3b332d894637f6273a12a482d09f"},
    {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409"},
    {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2"},
    {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451"},
@ -974,8 +984,24 @@ files = [
    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180"},
    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248"},
    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:87a3044c3a35055527ac75e419dfa9f4f3667a1e887ee80360589eb8c90aabb9"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c5529b34c1c9d937168297f2c1fde7ebe9ebdd5e121297ff9c043bdb2ae3d6fb"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ca63e1890ede90b2e4454f9a65135a4d387a4585ff8282bb72964fab893f2111"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e79e6520141d792237c70bcd7a3b122d00f2613769ae0cb61c52e89fd3443839"},
    {file = "Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0"},
    {file = "Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951"},
+    {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8bf32b98b75c13ec7cf774164172683d6e7891088f6316e54425fde1efc276d5"},
+    {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7bc37c4d6b87fb1017ea28c9508b36bbcb0c3d18b4260fcdf08b200c74a6aee8"},
+    {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c0ef38c7a7014ffac184db9e04debe495d317cc9c6fb10071f7fefd93100a4f"},
+    {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91d7cc2a76b5567591d12c01f019dd7afce6ba8cba6571187e21e2fc418ae648"},
+    {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a93dde851926f4f2678e704fadeb39e16c35d8baebd5252c9fd94ce8ce68c4a0"},
+    {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0db75f47be8b8abc8d9e31bc7aad0547ca26f24a54e6fd10231d623f183d089"},
+    {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6967ced6730aed543b8673008b5a391c3b1076d834ca438bbd70635c73775368"},
+    {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7eedaa5d036d9336c95915035fb57422054014ebdeb6f3b42eac809928e40d0c"},
+    {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d487f5432bf35b60ed625d7e1b448e2dc855422e87469e3f450aa5552b0eb284"},
+    {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832436e59afb93e1836081a20f324cb185836c617659b07b129141a8426973c7"},
+    {file = "Brotli-1.1.0-cp313-cp313-win32.whl", hash = "sha256:43395e90523f9c23a3d5bdf004733246fba087f2948f87ab28015f12359ca6a0"},
+    {file = "Brotli-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:9011560a466d2eb3f5a6e4929cf4a09be405c64154e12df0dd72713f6500e32b"},
    {file = "Brotli-1.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a090ca607cbb6a34b0391776f0cb48062081f5f60ddcce5d11838e67a01928d1"},
    {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de9d02f5bda03d27ede52e8cfe7b865b066fa49258cbab568720aa5be80a47d"},
    {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2333e30a5e00fe0fe55903c8832e08ee9c3b1382aacf4db26664a16528d51b4b"},
@ -985,6 +1011,10 @@ files = [
    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:fd5f17ff8f14003595ab414e45fce13d073e0762394f957182e69035c9f3d7c2"},
    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:069a121ac97412d1fe506da790b3e69f52254b9df4eb665cd42460c837193354"},
    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e93dfc1a1165e385cc8239fab7c036fb2cd8093728cbd85097b284d7b99249a2"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:aea440a510e14e818e67bfc4027880e2fb500c2ccb20ab21c7a7c8b5b4703d75"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_i686.whl", hash = "sha256:6974f52a02321b36847cd19d1b8e381bf39939c21efd6ee2fc13a28b0d99348c"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_ppc64le.whl", hash = "sha256:a7e53012d2853a07a4a79c00643832161a910674a893d296c9f1259859a289d2"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:d7702622a8b40c49bffb46e1e3ba2e81268d5c04a34f460978c6b5517a34dd52"},
    {file = "Brotli-1.1.0-cp36-cp36m-win32.whl", hash = "sha256:a599669fd7c47233438a56936988a2478685e74854088ef5293802123b5b2460"},
    {file = "Brotli-1.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d143fd47fad1db3d7c27a1b1d66162e855b5d50a89666af46e1679c496e8e579"},
    {file = "Brotli-1.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:11d00ed0a83fa22d29bc6b64ef636c4552ebafcef57154b4ddd132f5638fbd1c"},
@ -996,6 +1026,10 @@ files = [
    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:919e32f147ae93a09fe064d77d5ebf4e35502a8df75c29fb05788528e330fe74"},
    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:23032ae55523cc7bccb4f6a0bf368cd25ad9bcdcc1990b64a647e7bbcce9cb5b"},
    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:224e57f6eac61cc449f498cc5f0e1725ba2071a3d4f48d5d9dffba42db196438"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:cb1dac1770878ade83f2ccdf7d25e494f05c9165f5246b46a621cc849341dc01"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:3ee8a80d67a4334482d9712b8e83ca6b1d9bc7e351931252ebef5d8f7335a547"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:5e55da2c8724191e5b557f8e18943b1b4839b8efc3ef60d65985bcf6f587dd38"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:d342778ef319e1026af243ed0a07c97acf3bad33b9f29e7ae6a1f68fd083e90c"},
    {file = "Brotli-1.1.0-cp37-cp37m-win32.whl", hash = "sha256:587ca6d3cef6e4e868102672d3bd9dc9698c309ba56d41c2b9c85bbb903cdb95"},
    {file = "Brotli-1.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2954c1c23f81c2eaf0b0717d9380bd348578a94161a65b3a2afc62c86467dd68"},
    {file = "Brotli-1.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:efa8b278894b14d6da122a72fefcebc28445f2d3f880ac59d46c90f4c13be9a3"},
@ -1008,6 +1042,10 @@ files = [
    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ab4fbee0b2d9098c74f3057b2bc055a8bd92ccf02f65944a241b4349229185a"},
    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:141bd4d93984070e097521ed07e2575b46f817d08f9fa42b16b9b5f27b5ac088"},
    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fce1473f3ccc4187f75b4690cfc922628aed4d3dd013d047f95a9b3919a86596"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d2b35ca2c7f81d173d2fadc2f4f31e88cc5f7a39ae5b6db5513cf3383b0e0ec7"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:af6fa6817889314555aede9a919612b23739395ce767fe7fcbea9a80bf140fe5"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:2feb1d960f760a575dbc5ab3b1c00504b24caaf6986e2dc2b01c09c87866a943"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4410f84b33374409552ac9b6903507cdb31cd30d2501fc5ca13d18f73548444a"},
    {file = "Brotli-1.1.0-cp38-cp38-win32.whl", hash = "sha256:db85ecf4e609a48f4b29055f1e144231b90edc90af7481aa731ba2d059226b1b"},
    {file = "Brotli-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3d7954194c36e304e1523f55d7042c59dc53ec20dd4e9ea9d151f1b62b4415c0"},
    {file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5fb2ce4b8045c78ebbc7b8f3c15062e435d47e7393cc57c25115cfd49883747a"},
@ -1020,6 +1058,10 @@ files = [
    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:949f3b7c29912693cee0afcf09acd6ebc04c57af949d9bf77d6101ebb61e388c"},
    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:89f4988c7203739d48c6f806f1e87a1d96e0806d44f0fba61dba81392c9e474d"},
    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:de6551e370ef19f8de1807d0a9aa2cdfdce2e85ce88b122fe9f6b2b076837e59"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0737ddb3068957cf1b054899b0883830bb1fec522ec76b1098f9b6e0f02d9419"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4f3607b129417e111e30637af1b56f24f7a49e64763253bbc275c75fa887d4b2"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:6c6e0c425f22c1c719c42670d561ad682f7bfeeef918edea971a79ac5252437f"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:494994f807ba0b92092a163a0a283961369a65f6cbe01e8891132b7a320e61eb"},
    {file = "Brotli-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f0d8a7a6b5983c2496e364b969f0e526647a06b075d034f3297dc66f3b360c64"},
    {file = "Brotli-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdad5b9014d83ca68c25d2e9444e28e967ef16e80f6b436918c700c117a85467"},
    {file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724"},
@ -2432,6 +2474,26 @@ files = [
 [package.extras]
 test = ["pytest (>=6)"]

+[[package]]
+name = "fal-client"
+version = "0.5.6"
+description = "Python client for fal.ai"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "fal_client-0.5.6-py3-none-any.whl", hash = "sha256:631fd857a3c44753ee46a2eea1e7276471453aca58faac9c3702f744c7c84050"},
+    {file = "fal_client-0.5.6.tar.gz", hash = "sha256:d3afc4b6250023d0ee8437ec504558231d3b106d7aabc12cda8c39883faddecb"},
+]
+
+[package.dependencies]
+httpx = ">=0.21.0,<1"
+httpx-sse = ">=0.4.0,<0.5"
+
+[package.extras]
+dev = ["fal-client[docs,test]"]
+docs = ["sphinx", "sphinx-autodoc-typehints", "sphinx-rtd-theme"]
+test = ["pillow", "pytest", "pytest-asyncio"]
+
 [[package]]
 name = "fastapi"
 version = "0.115.4"
@ -4070,6 +4132,17 @@ http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
 zstd = ["zstandard (>=0.18.0)"]

+[[package]]
+name = "httpx-sse"
+version = "0.4.0"
+description = "Consume Server-Sent Event (SSE) messages with HTTPX."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "httpx-sse-0.4.0.tar.gz", hash = "sha256:1e81a3a3070ce322add1d3529ed42eb5f70817f45ed6ec915ab753f961139721"},
+    {file = "httpx_sse-0.4.0-py3-none-any.whl", hash = "sha256:f329af6eae57eaa2bdfd962b42524764af68075ea87370a2de920af5341e318f"},
+]
+
 [[package]]
 name = "huggingface-hub"
 version = "0.16.4"
@ -8463,29 +8536,29 @@ pyasn1 = ">=0.1.3"

 [[package]]
 name = "ruff"
-version = "0.6.9"
+version = "0.7.3"
 description = "An extremely fast Python linter and code formatter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "ruff-0.6.9-py3-none-linux_armv6l.whl", hash = "sha256:064df58d84ccc0ac0fcd63bc3090b251d90e2a372558c0f057c3f75ed73e1ccd"},
-    {file = "ruff-0.6.9-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:140d4b5c9f5fc7a7b074908a78ab8d384dd7f6510402267bc76c37195c02a7ec"},
-    {file = "ruff-0.6.9-py3-none-macosx_11_0_arm64.whl", hash = "sha256:53fd8ca5e82bdee8da7f506d7b03a261f24cd43d090ea9db9a1dc59d9313914c"},
-    {file = "ruff-0.6.9-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:645d7d8761f915e48a00d4ecc3686969761df69fb561dd914a773c1a8266e14e"},
-    {file = "ruff-0.6.9-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eae02b700763e3847595b9d2891488989cac00214da7f845f4bcf2989007d577"},
-    {file = "ruff-0.6.9-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d5ccc9e58112441de8ad4b29dcb7a86dc25c5f770e3c06a9d57e0e5eba48829"},
-    {file = "ruff-0.6.9-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:417b81aa1c9b60b2f8edc463c58363075412866ae4e2b9ab0f690dc1e87ac1b5"},
-    {file = "ruff-0.6.9-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c866b631f5fbce896a74a6e4383407ba7507b815ccc52bcedabb6810fdb3ef7"},
-    {file = "ruff-0.6.9-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7b118afbb3202f5911486ad52da86d1d52305b59e7ef2031cea3425142b97d6f"},
-    {file = "ruff-0.6.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a67267654edc23c97335586774790cde402fb6bbdb3c2314f1fc087dee320bfa"},
-    {file = "ruff-0.6.9-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3ef0cc774b00fec123f635ce5c547dac263f6ee9fb9cc83437c5904183b55ceb"},
-    {file = "ruff-0.6.9-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:12edd2af0c60fa61ff31cefb90aef4288ac4d372b4962c2864aeea3a1a2460c0"},
-    {file = "ruff-0.6.9-py3-none-musllinux_1_2_i686.whl", hash = "sha256:55bb01caeaf3a60b2b2bba07308a02fca6ab56233302406ed5245180a05c5625"},
-    {file = "ruff-0.6.9-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:925d26471fa24b0ce5a6cdfab1bb526fb4159952385f386bdcc643813d472039"},
-    {file = "ruff-0.6.9-py3-none-win32.whl", hash = "sha256:eb61ec9bdb2506cffd492e05ac40e5bc6284873aceb605503d8494180d6fc84d"},
-    {file = "ruff-0.6.9-py3-none-win_amd64.whl", hash = "sha256:785d31851c1ae91f45b3d8fe23b8ae4b5170089021fbb42402d811135f0b7117"},
-    {file = "ruff-0.6.9-py3-none-win_arm64.whl", hash = "sha256:a9641e31476d601f83cd602608739a0840e348bda93fec9f1ee816f8b6798b93"},
-    {file = "ruff-0.6.9.tar.gz", hash = "sha256:b076ef717a8e5bc819514ee1d602bbdca5b4420ae13a9cf61a0c0a4f53a2baa2"},
+    {file = "ruff-0.7.3-py3-none-linux_armv6l.whl", hash = "sha256:34f2339dc22687ec7e7002792d1f50712bf84a13d5152e75712ac08be565d344"},
+    {file = "ruff-0.7.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:fb397332a1879b9764a3455a0bb1087bda876c2db8aca3a3cbb67b3dbce8cda0"},
+    {file = "ruff-0.7.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:37d0b619546103274e7f62643d14e1adcbccb242efda4e4bdb9544d7764782e9"},
+    {file = "ruff-0.7.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d59f0c3ee4d1a6787614e7135b72e21024875266101142a09a61439cb6e38a5"},
+    {file = "ruff-0.7.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:44eb93c2499a169d49fafd07bc62ac89b1bc800b197e50ff4633aed212569299"},
+    {file = "ruff-0.7.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d0242ce53f3a576c35ee32d907475a8d569944c0407f91d207c8af5be5dae4e"},
+    {file = "ruff-0.7.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6b6224af8b5e09772c2ecb8dc9f3f344c1aa48201c7f07e7315367f6dd90ac29"},
+    {file = "ruff-0.7.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c50f95a82b94421c964fae4c27c0242890a20fe67d203d127e84fbb8013855f5"},
+    {file = "ruff-0.7.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7f3eff9961b5d2644bcf1616c606e93baa2d6b349e8aa8b035f654df252c8c67"},
+    {file = "ruff-0.7.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8963cab06d130c4df2fd52c84e9f10d297826d2e8169ae0c798b6221be1d1d2"},
+    {file = "ruff-0.7.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:61b46049d6edc0e4317fb14b33bd693245281a3007288b68a3f5b74a22a0746d"},
+    {file = "ruff-0.7.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:10ebce7696afe4644e8c1a23b3cf8c0f2193a310c18387c06e583ae9ef284de2"},
+    {file = "ruff-0.7.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3f36d56326b3aef8eeee150b700e519880d1aab92f471eefdef656fd57492aa2"},
+    {file = "ruff-0.7.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5d024301109a0007b78d57ab0ba190087b43dce852e552734ebf0b0b85e4fb16"},
+    {file = "ruff-0.7.3-py3-none-win32.whl", hash = "sha256:4ba81a5f0c5478aa61674c5a2194de8b02652f17addf8dfc40c8937e6e7d79fc"},
+    {file = "ruff-0.7.3-py3-none-win_amd64.whl", hash = "sha256:588a9ff2fecf01025ed065fe28809cd5a53b43505f48b69a1ac7707b1b7e4088"},
+    {file = "ruff-0.7.3-py3-none-win_arm64.whl", hash = "sha256:1713e2c5545863cdbfe2cbce21f69ffaf37b813bfd1fb3b90dc9a6f1963f5a8c"},
+    {file = "ruff-0.7.3.tar.gz", hash = "sha256:e1d1ba2e40b6e71a61b063354d04be669ab0d39c352461f3d789cac68b54a313"},
 ]

 [[package]]
@ -10998,4 +11071,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "f20bd678044926913dbbc24bd0cf22503a75817aa55f59457ff7822032139b77"
+content-hash = "2ba4b464eebc26598f290fa94713acc44c588f902176e6efa80622911d40f0ac"
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -122,6 +122,7 @@ celery = "~5.4.0"
 chardet = "~5.1.0"
 cohere = "~5.2.4"
 dashscope = { version = "~1.17.0", extras = ["tokenizer"] }
+fal-client = "0.5.6"
 flask = "~3.0.1"
 flask-compress = "~1.14"
 flask-cors = "~4.0.0"
@ -278,4 +279,4 @@ pytest-mock = "~3.14.0"
 optional = true
 [tool.poetry.group.lint.dependencies]
 dotenv-linter = "~0.5.0"
-ruff = "~0.6.9"
+ruff = "~0.7.3"
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@ -1458,6 +1458,7 @@ class SegmentService:
            pre_segment_data_list = []
            segment_data_list = []
            keywords_list = []
+            position = max_position + 1 if max_position else 1
            for segment_item in segments:
                content = segment_item["content"]
                doc_id = str(uuid.uuid4())
@ -1475,7 +1476,7 @@ class SegmentService:
                    document_id=document.id,
                    index_node_id=doc_id,
                    index_node_hash=segment_hash,
-                    position=max_position + 1 if max_position else 1,
+                    position=position,
                    content=content,
                    word_count=len(content),
                    tokens=tokens,
@ -1490,6 +1491,7 @@ class SegmentService:
                increment_word_count += segment_document.word_count
                db.session.add(segment_document)
                segment_data_list.append(segment_document)
+                position += 1

                pre_segment_data_list.append(segment_document)
                if "keywords" in segment_item:
--- a/api/services/tools/api_tools_manage_service.py
+++ b/api/services/tools/api_tools_manage_service.py
@ -109,8 +109,10 @@ class ApiToolManageService:
        if schema_type not in [member.value for member in ApiProviderSchemaType]:
            raise ValueError(f"invalid schema type {schema}")

+        provider_name = provider_name.strip()
+
        # check if the provider exists
-        provider: ApiToolProvider | None = (
+        provider = (
            db.session.query(ApiToolProvider)
            .filter(
                ApiToolProvider.tenant_id == tenant_id,
@ -249,8 +251,10 @@ class ApiToolManageService:
        if schema_type not in [member.value for member in ApiProviderSchemaType]:
            raise ValueError(f"invalid schema type {schema}")

+        provider_name = provider_name.strip()
+
        # check if the provider exists
-        provider: ApiToolProvider | None = (
+        provider = (
            db.session.query(ApiToolProvider)
            .filter(
                ApiToolProvider.tenant_id == tenant_id,
@ -322,7 +326,7 @@ class ApiToolManageService:
        """
        delete tool provider
        """
-        provider: ApiToolProvider | None = (
+        provider = (
            db.session.query(ApiToolProvider)
            .filter(
                ApiToolProvider.tenant_id == tenant_id,
@ -372,7 +376,7 @@ class ApiToolManageService:
        if tool_bundle is None:
            raise ValueError(f"invalid tool name {tool_name}")

-        db_provider: ApiToolProvider | None = (
+        db_provider = (
            db.session.query(ApiToolProvider)
            .filter(
                ApiToolProvider.tenant_id == tenant_id,
--- a/api/tasks/clean_dataset_task.py
+++ b/api/tasks/clean_dataset_task.py
@ -5,6 +5,7 @@ import click
 from celery import shared_task

 from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
+from core.tools.utils.rag_web_reader import get_image_upload_file_ids
 from extensions.ext_database import db
 from extensions.ext_storage import storage
 from models.dataset import (
@ -67,6 +68,16 @@ def clean_dataset_task(
                db.session.delete(document)

            for segment in segments:
+                image_upload_file_ids = get_image_upload_file_ids(segment.content)
+                for upload_file_id in image_upload_file_ids:
+                    image_file = db.session.query(UploadFile).filter(UploadFile.id == upload_file_id).first()
+                    try:
+                        storage.delete(image_file.key)
+                    except Exception:
+                        logging.exception(
+                            "Delete image_files failed when storage deleted, \
+                                          image_upload_file_is: {}".format(upload_file_id)
+                        )
                db.session.delete(segment)

        db.session.query(DatasetProcessRule).filter(DatasetProcessRule.dataset_id == dataset_id).delete()
--- a/api/tasks/clean_document_task.py
+++ b/api/tasks/clean_document_task.py
@ -6,6 +6,7 @@ import click
 from celery import shared_task

 from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
+from core.tools.utils.rag_web_reader import get_image_upload_file_ids
 from extensions.ext_database import db
 from extensions.ext_storage import storage
 from models.dataset import Dataset, DocumentSegment
@ -40,6 +41,16 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i
            index_processor.clean(dataset, index_node_ids)

            for segment in segments:
+                image_upload_file_ids = get_image_upload_file_ids(segment.content)
+                for upload_file_id in image_upload_file_ids:
+                    image_file = db.session.query(UploadFile).filter(UploadFile.id == upload_file_id).first()
+                    try:
+                        storage.delete(image_file.key)
+                    except Exception:
+                        logging.exception(
+                            "Delete image_files failed when storage deleted, \
+                                          image_upload_file_is: {}".format(upload_file_id)
+                        )
                db.session.delete(segment)

            db.session.commit()
--- a/api/tasks/document_indexing_task.py
+++ b/api/tasks/document_indexing_task.py
@ -25,7 +25,9 @@ def document_indexing_task(dataset_id: str, document_ids: list):
    start_at = time.perf_counter()

    dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
-
+    if not dataset:
+        logging.info(click.style("Dataset is not found: {}".format(dataset_id), fg="yellow"))
+        return
    # check document limit
    features = FeatureService.get_features(dataset.tenant_id)
    try:
--- a/api/tests/integration_tests/vdb/__mock/baiduvectordb.py
+++ b/api/tests/integration_tests/vdb/__mock/baiduvectordb.py
@ -1,4 +1,5 @@
 import os
+from collections import UserDict
 from unittest.mock import MagicMock

 import pytest
@ -11,7 +12,7 @@ from pymochow.model.table import Table
 from requests.adapters import HTTPAdapter


-class AttrDict(dict):
+class AttrDict(UserDict):
    def __getattr__(self, item):
        return self.get(item)

--- a/api/tests/integration_tests/vdb/__mock/upstashvectordb.py
+++ b/api/tests/integration_tests/vdb/__mock/upstashvectordb.py
@ -1,4 +1,5 @@
 import os
+from collections import UserDict
 from typing import Optional

 import pytest
@ -50,7 +51,7 @@ class MockIndex:
        return AttrDict({"dimension": 1024})


-class AttrDict(dict):
+class AttrDict(UserDict):
    def __getattr__(self, item):
        return self.get(item)

--- a/api/tests/unit_tests/core/app/segments/test_segment.py
+++ b/api/tests/unit_tests/core/app/segments/test_segment.py
@ -1,5 +1,5 @@
 from core.helper import encrypter
-from core.variables import SecretVariable, StringSegment
+from core.variables import SecretVariable, StringVariable
 from core.workflow.entities.variable_pool import VariablePool
 from core.workflow.enums import SystemVariableKey

@ -54,4 +54,5 @@ def test_convert_variable_to_segment_group():
    segments_group = variable_pool.convert_template(template)
    assert segments_group.text == "fake-user-id"
    assert segments_group.log == "fake-user-id"
-    assert segments_group.value == [StringSegment(value="fake-user-id")]
+    assert isinstance(segments_group.value[0], StringVariable)
+    assert segments_group.value[0].value == "fake-user-id"
--- a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py
+++ b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py
@ -140,6 +140,17 @@ def test_extract_text_from_plain_text():
    assert text == "Hello, world!"


+def test_extract_text_from_plain_text_non_utf8():
+    import tempfile
+
+    non_utf8_content = b"Hello, world\xa9."  # \xA9 represents © in Latin-1
+    with tempfile.NamedTemporaryFile(delete=True) as temp_file:
+        temp_file.write(non_utf8_content)
+        temp_file.seek(0)
+        text = _extract_text_from_plain_text(temp_file.read())
+    assert text == "Hello, world."
+
+
@patch("pypdfium2.PdfDocument")
 def test_extract_text_from_pdf(mock_pdf_document):
    mock_page = Mock()
--- a/api/tests/unit_tests/oss/__mock/volcengine_tos.py
+++ b/api/tests/unit_tests/oss/__mock/volcengine_tos.py
@ -1,4 +1,5 @@
 import os
+from collections import UserDict
 from unittest.mock import MagicMock

 import pytest
@ -14,7 +15,7 @@ from tests.unit_tests.oss.__mock.base import (
 )


-class AttrDict(dict):
+class AttrDict(UserDict):
    def __getattr__(self, item):
        return self.get(item)