mirror of
https://github.com/langgenius/dify.git
synced 2026-05-05 01:48:04 +08:00
Merge branch 'main' into feat/r2
This commit is contained in:
@ -1,3 +1,4 @@
|
||||
import logging
|
||||
import time
|
||||
from collections.abc import Generator, Mapping, Sequence
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
@ -33,6 +34,8 @@ from models.model import App, AppMode, Message, MessageAnnotation
|
||||
if TYPE_CHECKING:
|
||||
from core.file.models import File
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AppRunner:
|
||||
def get_pre_calculate_rest_tokens(
|
||||
@ -298,7 +301,7 @@ class AppRunner:
|
||||
)
|
||||
|
||||
def _handle_invoke_result_stream(
|
||||
self, invoke_result: Generator, queue_manager: AppQueueManager, agent: bool
|
||||
self, invoke_result: Generator[LLMResultChunk, None, None], queue_manager: AppQueueManager, agent: bool
|
||||
) -> None:
|
||||
"""
|
||||
Handle invoke result
|
||||
@ -317,18 +320,28 @@ class AppRunner:
|
||||
else:
|
||||
queue_manager.publish(QueueAgentMessageEvent(chunk=result), PublishFrom.APPLICATION_MANAGER)
|
||||
|
||||
text += result.delta.message.content
|
||||
message = result.delta.message
|
||||
if isinstance(message.content, str):
|
||||
text += message.content
|
||||
elif isinstance(message.content, list):
|
||||
for content in message.content:
|
||||
if not isinstance(content, str):
|
||||
# TODO(QuantumGhost): Add multimodal output support for easy ui.
|
||||
_logger.warning("received multimodal output, type=%s", type(content))
|
||||
text += content.data
|
||||
else:
|
||||
text += content # failback to str
|
||||
|
||||
if not model:
|
||||
model = result.model
|
||||
|
||||
if not prompt_messages:
|
||||
prompt_messages = result.prompt_messages
|
||||
prompt_messages = list(result.prompt_messages)
|
||||
|
||||
if result.delta.usage:
|
||||
usage = result.delta.usage
|
||||
|
||||
if not usage:
|
||||
if usage is None:
|
||||
usage = LLMUsage.empty_usage()
|
||||
|
||||
llm_result = LLMResult(
|
||||
|
||||
@ -48,6 +48,7 @@ from core.model_manager import ModelInstance
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
AssistantPromptMessage,
|
||||
TextPromptMessageContent,
|
||||
)
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
from core.ops.entities.trace_entity import TraceTaskName
|
||||
@ -309,6 +310,23 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
|
||||
delta_text = chunk.delta.message.content
|
||||
if delta_text is None:
|
||||
continue
|
||||
if isinstance(chunk.delta.message.content, list):
|
||||
delta_text = ""
|
||||
for content in chunk.delta.message.content:
|
||||
logger.debug(
|
||||
"The content type %s in LLM chunk delta message content.: %r", type(content), content
|
||||
)
|
||||
if isinstance(content, TextPromptMessageContent):
|
||||
delta_text += content.data
|
||||
elif isinstance(content, str):
|
||||
delta_text += content # failback to str
|
||||
else:
|
||||
logger.warning(
|
||||
"Unsupported content type %s in LLM chunk delta message content.: %r",
|
||||
type(content),
|
||||
content,
|
||||
)
|
||||
continue
|
||||
|
||||
if not self._task_state.llm_result.prompt_messages:
|
||||
self._task_state.llm_result.prompt_messages = chunk.prompt_messages
|
||||
|
||||
@ -80,6 +80,23 @@ class OceanBaseVector(BaseVector):
|
||||
|
||||
self.delete()
|
||||
|
||||
vals = []
|
||||
params = self._client.perform_raw_text_sql("SHOW PARAMETERS LIKE '%ob_vector_memory_limit_percentage%'")
|
||||
for row in params:
|
||||
val = int(row[6])
|
||||
vals.append(val)
|
||||
if len(vals) == 0:
|
||||
raise ValueError("ob_vector_memory_limit_percentage not found in parameters.")
|
||||
if any(val == 0 for val in vals):
|
||||
try:
|
||||
self._client.perform_raw_text_sql("ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30")
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
"Failed to set ob_vector_memory_limit_percentage. "
|
||||
+ "Maybe the database user has insufficient privilege.",
|
||||
e,
|
||||
)
|
||||
|
||||
cols = [
|
||||
Column("id", String(36), primary_key=True, autoincrement=False),
|
||||
Column("vector", VECTOR(self._vec_dim)),
|
||||
@ -110,22 +127,6 @@ class OceanBaseVector(BaseVector):
|
||||
+ "to support fulltext index and vector index in the same table",
|
||||
e,
|
||||
)
|
||||
vals = []
|
||||
params = self._client.perform_raw_text_sql("SHOW PARAMETERS LIKE '%ob_vector_memory_limit_percentage%'")
|
||||
for row in params:
|
||||
val = int(row[6])
|
||||
vals.append(val)
|
||||
if len(vals) == 0:
|
||||
raise ValueError("ob_vector_memory_limit_percentage not found in parameters.")
|
||||
if any(val == 0 for val in vals):
|
||||
try:
|
||||
self._client.perform_raw_text_sql("ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30")
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
"Failed to set ob_vector_memory_limit_percentage. "
|
||||
+ "Maybe the database user has insufficient privilege.",
|
||||
e,
|
||||
)
|
||||
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
||||
|
||||
def _check_hybrid_search_support(self) -> bool:
|
||||
|
||||
@ -6,7 +6,7 @@ import json
|
||||
import logging
|
||||
from typing import Optional, Union
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
@ -151,11 +151,11 @@ class SQLAlchemyWorkflowExecutionRepository(WorkflowExecutionRepository):
|
||||
existing = session.scalar(select(WorkflowRun).where(WorkflowRun.id == domain_model.id_))
|
||||
if not existing:
|
||||
# For new records, get the next sequence number
|
||||
stmt = select(WorkflowRun.sequence_number).where(
|
||||
stmt = select(func.max(WorkflowRun.sequence_number)).where(
|
||||
WorkflowRun.app_id == self._app_id,
|
||||
WorkflowRun.tenant_id == self._tenant_id,
|
||||
)
|
||||
max_sequence = session.scalar(stmt.order_by(WorkflowRun.sequence_number.desc()))
|
||||
max_sequence = session.scalar(stmt)
|
||||
db_model.sequence_number = (max_sequence or 0) + 1
|
||||
else:
|
||||
# For updates, keep the existing sequence number
|
||||
|
||||
@ -6,7 +6,6 @@ from pydantic import BaseModel, Field
|
||||
from core.model_runtime.entities.llm_entities import LLMUsage
|
||||
from core.rag.entities.citation_metadata import RetrievalSourceMetadata
|
||||
from core.workflow.entities.node_entities import NodeRunResult
|
||||
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus
|
||||
|
||||
|
||||
class RunCompletedEvent(BaseModel):
|
||||
@ -39,11 +38,3 @@ class RunRetryEvent(BaseModel):
|
||||
error: str = Field(..., description="error")
|
||||
retry_index: int = Field(..., description="Retry attempt number")
|
||||
start_at: datetime = Field(..., description="Retry start time")
|
||||
|
||||
|
||||
class SingleStepRetryEvent(NodeRunResult):
|
||||
"""Single step retry event"""
|
||||
|
||||
status: WorkflowNodeExecutionStatus = WorkflowNodeExecutionStatus.RETRY
|
||||
|
||||
elapsed_time: float = Field(..., description="elapsed time")
|
||||
|
||||
@ -525,6 +525,8 @@ class LLMNode(BaseNode[LLMNodeData]):
|
||||
# Set appropriate response format based on model capabilities
|
||||
self._set_response_format(completion_params, model_schema.parameter_rules)
|
||||
model_config_with_cred.parameters = completion_params
|
||||
# NOTE(-LAN-): This line modify the `self.node_data.model`, which is used in `_invoke_llm()`.
|
||||
node_data_model.completion_params = completion_params
|
||||
return model, model_config_with_cred
|
||||
|
||||
def _fetch_prompt_messages(
|
||||
|
||||
Reference in New Issue
Block a user