Merge branch 'main' into feat/add-flashrank

This commit is contained in:
jyong
2024-06-06 14:03:55 +08:00
1105 changed files with 47905 additions and 8602 deletions

View File

@ -39,6 +39,7 @@ from core.tools.entities.tool_entities import (
from core.tools.tool.dataset_retriever_tool import DatasetRetrieverTool
from core.tools.tool.tool import Tool
from core.tools.tool_manager import ToolManager
from core.tools.utils.tool_parameter_converter import ToolParameterConverter
from extensions.ext_database import db
from models.model import Conversation, Message, MessageAgentThought
from models.tools import ToolConversationVariables
@ -128,6 +129,8 @@ class BaseAgentRunner(AppRunner):
self.files = application_generate_entity.files
else:
self.files = []
self.query = None
self._current_thoughts: list[PromptMessage] = []
def _repack_app_generate_entity(self, app_generate_entity: AgentChatAppGenerateEntity) \
-> AgentChatAppGenerateEntity:
@ -165,6 +168,7 @@ class BaseAgentRunner(AppRunner):
tenant_id=self.tenant_id,
app_id=self.app_config.app_id,
agent_tool=tool,
invoke_from=self.application_generate_entity.invoke_from
)
tool_entity.load_variables(self.variables_pool)
@ -183,21 +187,11 @@ class BaseAgentRunner(AppRunner):
if parameter.form != ToolParameter.ToolParameterForm.LLM:
continue
parameter_type = 'string'
parameter_type = ToolParameterConverter.get_parameter_type(parameter.type)
enum = []
if parameter.type == ToolParameter.ToolParameterType.STRING:
parameter_type = 'string'
elif parameter.type == ToolParameter.ToolParameterType.BOOLEAN:
parameter_type = 'boolean'
elif parameter.type == ToolParameter.ToolParameterType.NUMBER:
parameter_type = 'number'
elif parameter.type == ToolParameter.ToolParameterType.SELECT:
for option in parameter.options:
enum.append(option.value)
parameter_type = 'string'
else:
raise ValueError(f"parameter type {parameter.type} is not supported")
if parameter.type == ToolParameter.ToolParameterType.SELECT:
enum = [option.value for option in parameter.options]
message_tool.parameters['properties'][parameter.name] = {
"type": parameter_type,
"description": parameter.llm_description or '',
@ -278,20 +272,10 @@ class BaseAgentRunner(AppRunner):
if parameter.form != ToolParameter.ToolParameterForm.LLM:
continue
parameter_type = 'string'
parameter_type = ToolParameterConverter.get_parameter_type(parameter.type)
enum = []
if parameter.type == ToolParameter.ToolParameterType.STRING:
parameter_type = 'string'
elif parameter.type == ToolParameter.ToolParameterType.BOOLEAN:
parameter_type = 'boolean'
elif parameter.type == ToolParameter.ToolParameterType.NUMBER:
parameter_type = 'number'
elif parameter.type == ToolParameter.ToolParameterType.SELECT:
for option in parameter.options:
enum.append(option.value)
parameter_type = 'string'
else:
raise ValueError(f"parameter type {parameter.type} is not supported")
if parameter.type == ToolParameter.ToolParameterType.SELECT:
enum = [option.value for option in parameter.options]
prompt_tool.parameters['properties'][parameter.name] = {
"type": parameter_type,
@ -463,7 +447,7 @@ class BaseAgentRunner(AppRunner):
for message in messages:
if message.id == self.message.id:
continue
result.append(self.organize_agent_user_prompt(message))
agent_thoughts: list[MessageAgentThought] = message.agent_thoughts
if agent_thoughts:

View File

@ -15,6 +15,7 @@ from core.model_runtime.entities.message_entities import (
ToolPromptMessage,
UserPromptMessage,
)
from core.prompt.agent_history_prompt_transform import AgentHistoryPromptTransform
from core.tools.entities.tool_entities import ToolInvokeMeta
from core.tools.tool.tool import Tool
from core.tools.tool_engine import ToolEngine
@ -121,7 +122,7 @@ class CotAgentRunner(BaseAgentRunner, ABC):
raise ValueError("failed to invoke llm")
usage_dict = {}
react_chunks = CotAgentOutputParser.handle_react_stream_output(chunks)
react_chunks = CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)
scratchpad = AgentScratchpadUnit(
agent_response='',
thought='',
@ -189,7 +190,7 @@ class CotAgentRunner(BaseAgentRunner, ABC):
if not scratchpad.action:
# failed to extract action, return final answer directly
final_answer = scratchpad.agent_response or ''
final_answer = ''
else:
if scratchpad.action.action_name.lower() == "final answer":
# action is final answer, return final answer directly
@ -373,7 +374,7 @@ class CotAgentRunner(BaseAgentRunner, ABC):
return message
def _organize_historic_prompt_messages(self) -> list[PromptMessage]:
def _organize_historic_prompt_messages(self, current_session_messages: list[PromptMessage] = None) -> list[PromptMessage]:
"""
organize historic prompt messages
"""
@ -381,6 +382,13 @@ class CotAgentRunner(BaseAgentRunner, ABC):
scratchpad: list[AgentScratchpadUnit] = []
current_scratchpad: AgentScratchpadUnit = None
self.history_prompt_messages = AgentHistoryPromptTransform(
model_config=self.model_config,
prompt_messages=current_session_messages or [],
history_messages=self.history_prompt_messages,
memory=self.memory
).get_prompt()
for message in self.history_prompt_messages:
if isinstance(message, AssistantPromptMessage):
current_scratchpad = AgentScratchpadUnit(

View File

@ -32,9 +32,6 @@ class CotChatAgentRunner(CotAgentRunner):
# organize system prompt
system_message = self._organize_system_prompt()
# organize historic prompt messages
historic_messages = self._historic_prompt_messages
# organize current assistant messages
agent_scratchpad = self._agent_scratchpad
if not agent_scratchpad:
@ -57,6 +54,13 @@ class CotChatAgentRunner(CotAgentRunner):
query_messages = UserPromptMessage(content=self._query)
if assistant_messages:
# organize historic prompt messages
historic_messages = self._organize_historic_prompt_messages([
system_message,
query_messages,
*assistant_messages,
UserPromptMessage(content='continue')
])
messages = [
system_message,
*historic_messages,
@ -65,6 +69,8 @@ class CotChatAgentRunner(CotAgentRunner):
UserPromptMessage(content='continue')
]
else:
# organize historic prompt messages
historic_messages = self._organize_historic_prompt_messages([system_message, query_messages])
messages = [system_message, *historic_messages, query_messages]
# join all messages

View File

@ -19,11 +19,11 @@ class CotCompletionAgentRunner(CotAgentRunner):
return system_prompt
def _organize_historic_prompt(self) -> str:
def _organize_historic_prompt(self, current_session_messages: list[PromptMessage] = None) -> str:
"""
Organize historic prompt
"""
historic_prompt_messages = self._historic_prompt_messages
historic_prompt_messages = self._organize_historic_prompt_messages(current_session_messages)
historic_prompt = ""
for message in historic_prompt_messages:

View File

@ -8,7 +8,7 @@ class AgentToolEntity(BaseModel):
"""
Agent Tool Entity.
"""
provider_type: Literal["builtin", "api"]
provider_type: Literal["builtin", "api", "workflow"]
provider_id: str
tool_name: str
tool_parameters: dict[str, Any] = {}

View File

@ -17,6 +17,7 @@ from core.model_runtime.entities.message_entities import (
ToolPromptMessage,
UserPromptMessage,
)
from core.prompt.agent_history_prompt_transform import AgentHistoryPromptTransform
from core.tools.entities.tool_entities import ToolInvokeMeta
from core.tools.tool_engine import ToolEngine
from models.model import Message
@ -24,21 +25,18 @@ from models.model import Message
logger = logging.getLogger(__name__)
class FunctionCallAgentRunner(BaseAgentRunner):
def run(self,
message: Message, query: str, **kwargs: Any
) -> Generator[LLMResultChunk, None, None]:
"""
Run FunctionCall agent application
"""
self.query = query
app_generate_entity = self.application_generate_entity
app_config = self.app_config
prompt_template = app_config.prompt_template.simple_prompt_template or ''
prompt_messages = self.history_prompt_messages
prompt_messages = self._init_system_message(prompt_template, prompt_messages)
prompt_messages = self._organize_user_query(query, prompt_messages)
# convert tools into ModelRuntime Tool format
tool_instances, prompt_messages_tools = self._init_prompt_tools()
@ -81,6 +79,7 @@ class FunctionCallAgentRunner(BaseAgentRunner):
)
# recalc llm max tokens
prompt_messages = self._organize_prompt_messages()
self.recalc_llm_max_tokens(self.model_config, prompt_messages)
# invoke model
chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = model_instance.invoke_llm(
@ -203,7 +202,7 @@ class FunctionCallAgentRunner(BaseAgentRunner):
else:
assistant_message.content = response
prompt_messages.append(assistant_message)
self._current_thoughts.append(assistant_message)
# save thought
self.save_agent_thought(
@ -265,12 +264,14 @@ class FunctionCallAgentRunner(BaseAgentRunner):
}
tool_responses.append(tool_response)
prompt_messages = self._organize_assistant_message(
tool_call_id=tool_call_id,
tool_call_name=tool_call_name,
tool_response=tool_response['tool_response'],
prompt_messages=prompt_messages,
)
if tool_response['tool_response'] is not None:
self._current_thoughts.append(
ToolPromptMessage(
content=tool_response['tool_response'],
tool_call_id=tool_call_id,
name=tool_call_name,
)
)
if len(tool_responses) > 0:
# save agent thought
@ -300,8 +301,6 @@ class FunctionCallAgentRunner(BaseAgentRunner):
iteration_step += 1
prompt_messages = self._clear_user_prompt_image_messages(prompt_messages)
self.update_db_variables(self.variables_pool, self.db_variables_pool)
# publish end event
self.queue_manager.publish(QueueMessageEndEvent(llm_result=LLMResult(
@ -393,24 +392,6 @@ class FunctionCallAgentRunner(BaseAgentRunner):
return prompt_messages
def _organize_assistant_message(self, tool_call_id: str = None, tool_call_name: str = None, tool_response: str = None,
prompt_messages: list[PromptMessage] = None) -> list[PromptMessage]:
"""
Organize assistant message
"""
prompt_messages = deepcopy(prompt_messages)
if tool_response is not None:
prompt_messages.append(
ToolPromptMessage(
content=tool_response,
tool_call_id=tool_call_id,
name=tool_call_name,
)
)
return prompt_messages
def _clear_user_prompt_image_messages(self, prompt_messages: list[PromptMessage]) -> list[PromptMessage]:
"""
As for now, gpt supports both fc and vision at the first iteration.
@ -428,4 +409,26 @@ class FunctionCallAgentRunner(BaseAgentRunner):
for content in prompt_message.content
])
return prompt_messages
return prompt_messages
def _organize_prompt_messages(self):
prompt_template = self.app_config.prompt_template.simple_prompt_template or ''
self.history_prompt_messages = self._init_system_message(prompt_template, self.history_prompt_messages)
query_prompt_messages = self._organize_user_query(self.query, [])
self.history_prompt_messages = AgentHistoryPromptTransform(
model_config=self.model_config,
prompt_messages=[*query_prompt_messages, *self._current_thoughts],
history_messages=self.history_prompt_messages,
memory=self.memory
).get_prompt()
prompt_messages = [
*self.history_prompt_messages,
*query_prompt_messages,
*self._current_thoughts
]
if len(self._current_thoughts) != 0:
# clear messages after the first iteration
prompt_messages = self._clear_user_prompt_image_messages(prompt_messages)
return prompt_messages

View File

@ -9,7 +9,7 @@ from core.model_runtime.entities.llm_entities import LLMResultChunk
class CotAgentOutputParser:
@classmethod
def handle_react_stream_output(cls, llm_response: Generator[LLMResultChunk, None, None]) -> \
def handle_react_stream_output(cls, llm_response: Generator[LLMResultChunk, None, None], usage_dict: dict) -> \
Generator[Union[str, AgentScratchpadUnit.Action], None, None]:
def parse_action(json_str):
try:
@ -58,6 +58,8 @@ class CotAgentOutputParser:
thought_idx = 0
for response in llm_response:
if response.delta.usage:
usage_dict['usage'] = response.delta.usage
response = response.delta.message.content
if not isinstance(response, str):
continue

View File

@ -11,7 +11,7 @@ class SensitiveWordAvoidanceConfigManager:
if not sensitive_word_avoidance_dict:
return None
if 'enabled' in sensitive_word_avoidance_dict and sensitive_word_avoidance_dict['enabled']:
if sensitive_word_avoidance_dict.get('enabled'):
return SensitiveWordAvoidanceEntity(
type=sensitive_word_avoidance_dict.get('type'),
config=sensitive_word_avoidance_dict.get('config'),

View File

@ -1,7 +1,7 @@
from typing import Optional
from core.agent.entities import AgentEntity, AgentPromptEntity, AgentToolEntity
from core.tools.prompt.template import REACT_PROMPT_TEMPLATES
from core.agent.prompt.template import REACT_PROMPT_TEMPLATES
class AgentConfigManager:

View File

@ -239,4 +239,4 @@ class WorkflowUIBasedAppConfig(AppConfig):
"""
Workflow UI Based App Config Entity.
"""
workflow_id: str
workflow_id: str

View File

@ -14,7 +14,7 @@ class FileUploadConfigManager:
"""
file_upload_dict = config.get('file_upload')
if file_upload_dict:
if 'image' in file_upload_dict and file_upload_dict['image']:
if file_upload_dict.get('image'):
if 'enabled' in file_upload_dict['image'] and file_upload_dict['image']['enabled']:
image_config = {
'number_limits': file_upload_dict['image']['number_limits'],

View File

@ -9,7 +9,7 @@ class MoreLikeThisConfigManager:
more_like_this = False
more_like_this_dict = config.get('more_like_this')
if more_like_this_dict:
if 'enabled' in more_like_this_dict and more_like_this_dict['enabled']:
if more_like_this_dict.get('enabled'):
more_like_this = True
return more_like_this

View File

@ -4,7 +4,7 @@ class RetrievalResourceConfigManager:
show_retrieve_source = False
retriever_resource_dict = config.get('retriever_resource')
if retriever_resource_dict:
if 'enabled' in retriever_resource_dict and retriever_resource_dict['enabled']:
if retriever_resource_dict.get('enabled'):
show_retrieve_source = True
return show_retrieve_source

View File

@ -9,7 +9,7 @@ class SpeechToTextConfigManager:
speech_to_text = False
speech_to_text_dict = config.get('speech_to_text')
if speech_to_text_dict:
if 'enabled' in speech_to_text_dict and speech_to_text_dict['enabled']:
if speech_to_text_dict.get('enabled'):
speech_to_text = True
return speech_to_text

View File

@ -9,7 +9,7 @@ class SuggestedQuestionsAfterAnswerConfigManager:
suggested_questions_after_answer = False
suggested_questions_after_answer_dict = config.get('suggested_questions_after_answer')
if suggested_questions_after_answer_dict:
if 'enabled' in suggested_questions_after_answer_dict and suggested_questions_after_answer_dict['enabled']:
if suggested_questions_after_answer_dict.get('enabled'):
suggested_questions_after_answer = True
return suggested_questions_after_answer

View File

@ -12,7 +12,7 @@ class TextToSpeechConfigManager:
text_to_speech = False
text_to_speech_dict = config.get('text_to_speech')
if text_to_speech_dict:
if 'enabled' in text_to_speech_dict and text_to_speech_dict['enabled']:
if text_to_speech_dict.get('enabled'):
text_to_speech = TextToSpeechEntity(
enabled=text_to_speech_dict.get('enabled'),
voice=text_to_speech_dict.get('voice'),

View File

@ -66,7 +66,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
conversation = self._get_conversation_by_user(app_model, args.get('conversation_id'), user)
# parse files
files = args['files'] if 'files' in args and args['files'] else []
files = args['files'] if args.get('files') else []
message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id)
file_extra_config = FileUploadConfigManager.convert(workflow.features_dict, is_vision=False)
if file_extra_config:
@ -98,6 +98,90 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
extras=extras
)
return self._generate(
app_model=app_model,
workflow=workflow,
user=user,
invoke_from=invoke_from,
application_generate_entity=application_generate_entity,
conversation=conversation,
stream=stream
)
def single_iteration_generate(self, app_model: App,
workflow: Workflow,
node_id: str,
user: Account,
args: dict,
stream: bool = True) \
-> Union[dict, Generator[dict, None, None]]:
"""
Generate App response.
:param app_model: App
:param workflow: Workflow
:param user: account or end user
:param args: request args
:param invoke_from: invoke from source
:param stream: is stream
"""
if not node_id:
raise ValueError('node_id is required')
if args.get('inputs') is None:
raise ValueError('inputs is required')
extras = {
"auto_generate_conversation_name": False
}
# get conversation
conversation = None
if args.get('conversation_id'):
conversation = self._get_conversation_by_user(app_model, args.get('conversation_id'), user)
# convert to app config
app_config = AdvancedChatAppConfigManager.get_app_config(
app_model=app_model,
workflow=workflow
)
# init application generate entity
application_generate_entity = AdvancedChatAppGenerateEntity(
task_id=str(uuid.uuid4()),
app_config=app_config,
conversation_id=conversation.id if conversation else None,
inputs={},
query='',
files=[],
user_id=user.id,
stream=stream,
invoke_from=InvokeFrom.DEBUGGER,
extras=extras,
single_iteration_run=AdvancedChatAppGenerateEntity.SingleIterationRunEntity(
node_id=node_id,
inputs=args['inputs']
)
)
return self._generate(
app_model=app_model,
workflow=workflow,
user=user,
invoke_from=InvokeFrom.DEBUGGER,
application_generate_entity=application_generate_entity,
conversation=conversation,
stream=stream
)
def _generate(self, app_model: App,
workflow: Workflow,
user: Union[Account, EndUser],
invoke_from: InvokeFrom,
application_generate_entity: AdvancedChatAppGenerateEntity,
conversation: Conversation = None,
stream: bool = True) \
-> Union[dict, Generator[dict, None, None]]:
is_first_conversation = False
if not conversation:
is_first_conversation = True
@ -167,18 +251,30 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
"""
with flask_app.app_context():
try:
# get conversation and message
conversation = self._get_conversation(conversation_id)
message = self._get_message(message_id)
# chatbot app
runner = AdvancedChatAppRunner()
runner.run(
application_generate_entity=application_generate_entity,
queue_manager=queue_manager,
conversation=conversation,
message=message
)
if application_generate_entity.single_iteration_run:
single_iteration_run = application_generate_entity.single_iteration_run
runner.single_iteration_run(
app_id=application_generate_entity.app_config.app_id,
workflow_id=application_generate_entity.app_config.workflow_id,
queue_manager=queue_manager,
inputs=single_iteration_run.inputs,
node_id=single_iteration_run.node_id,
user_id=application_generate_entity.user_id
)
else:
# get conversation and message
conversation = self._get_conversation(conversation_id)
message = self._get_message(message_id)
# chatbot app
runner = AdvancedChatAppRunner()
runner.run(
application_generate_entity=application_generate_entity,
queue_manager=queue_manager,
conversation=conversation,
message=message
)
except GenerateTaskStoppedException:
pass
except InvokeAuthorizationError:

View File

@ -102,6 +102,7 @@ class AdvancedChatAppRunner(AppRunner):
user_from=UserFrom.ACCOUNT
if application_generate_entity.invoke_from in [InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER]
else UserFrom.END_USER,
invoke_from=application_generate_entity.invoke_from,
user_inputs=inputs,
system_inputs={
SystemVariable.QUERY: query,
@ -109,6 +110,35 @@ class AdvancedChatAppRunner(AppRunner):
SystemVariable.CONVERSATION_ID: conversation.id,
SystemVariable.USER_ID: user_id
},
callbacks=workflow_callbacks,
call_depth=application_generate_entity.call_depth
)
def single_iteration_run(self, app_id: str, workflow_id: str,
queue_manager: AppQueueManager,
inputs: dict, node_id: str, user_id: str) -> None:
"""
Single iteration run
"""
app_record: App = db.session.query(App).filter(App.id == app_id).first()
if not app_record:
raise ValueError("App not found")
workflow = self.get_workflow(app_model=app_record, workflow_id=workflow_id)
if not workflow:
raise ValueError("Workflow not initialized")
workflow_callbacks = [WorkflowEventTriggerCallback(
queue_manager=queue_manager,
workflow=workflow
)]
workflow_engine_manager = WorkflowEngineManager()
workflow_engine_manager.single_step_run_iteration_workflow_node(
workflow=workflow,
node_id=node_id,
user_id=user_id,
user_inputs=inputs,
callbacks=workflow_callbacks
)

View File

@ -8,6 +8,8 @@ from core.app.entities.task_entities import (
ChatbotAppStreamResponse,
ErrorStreamResponse,
MessageEndStreamResponse,
NodeFinishStreamResponse,
NodeStartStreamResponse,
PingStreamResponse,
)
@ -111,6 +113,8 @@ class AdvancedChatAppGenerateResponseConverter(AppGenerateResponseConverter):
if isinstance(sub_stream_response, ErrorStreamResponse):
data = cls._error_to_stream_response(sub_stream_response.err)
response_chunk.update(data)
elif isinstance(sub_stream_response, NodeStartStreamResponse | NodeFinishStreamResponse):
response_chunk.update(sub_stream_response.to_ignore_detail_dict())
else:
response_chunk.update(sub_stream_response.to_dict())

View File

@ -12,6 +12,9 @@ from core.app.entities.queue_entities import (
QueueAdvancedChatMessageEndEvent,
QueueAnnotationReplyEvent,
QueueErrorEvent,
QueueIterationCompletedEvent,
QueueIterationNextEvent,
QueueIterationStartEvent,
QueueMessageReplaceEvent,
QueueNodeFailedEvent,
QueueNodeStartedEvent,
@ -64,6 +67,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
_workflow: Workflow
_user: Union[Account, EndUser]
_workflow_system_variables: dict[SystemVariable, Any]
_iteration_nested_relations: dict[str, list[str]]
def __init__(self, application_generate_entity: AdvancedChatAppGenerateEntity,
workflow: Workflow,
@ -103,6 +107,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
usage=LLMUsage.empty_usage()
)
self._iteration_nested_relations = self._get_iteration_nested_relations(self._workflow.graph_dict)
self._stream_generate_routes = self._get_stream_generate_routes()
self._conversation_name_generate_thread = None
@ -204,6 +209,8 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
# search stream_generate_routes if node id is answer start at node
if not self._task_state.current_stream_generate_state and event.node_id in self._stream_generate_routes:
self._task_state.current_stream_generate_state = self._stream_generate_routes[event.node_id]
# reset current route position to 0
self._task_state.current_stream_generate_state.current_route_position = 0
# generate stream outputs when node started
yield from self._generate_stream_outputs_when_node_started()
@ -225,6 +232,22 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
task_id=self._application_generate_entity.task_id,
workflow_node_execution=workflow_node_execution
)
if isinstance(event, QueueNodeFailedEvent):
yield from self._handle_iteration_exception(
task_id=self._application_generate_entity.task_id,
error=f'Child node failed: {event.error}'
)
elif isinstance(event, QueueIterationStartEvent | QueueIterationNextEvent | QueueIterationCompletedEvent):
if isinstance(event, QueueIterationNextEvent):
# clear ran node execution infos of current iteration
iteration_relations = self._iteration_nested_relations.get(event.node_id)
if iteration_relations:
for node_id in iteration_relations:
self._task_state.ran_node_execution_infos.pop(node_id, None)
yield self._handle_iteration_to_stream_response(self._application_generate_entity.task_id, event)
self._handle_iteration_operation(event)
elif isinstance(event, QueueStopEvent | QueueWorkflowSucceededEvent | QueueWorkflowFailedEvent):
workflow_run = self._handle_workflow_finished(event)
if workflow_run:
@ -263,10 +286,6 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
self._handle_retriever_resources(event)
elif isinstance(event, QueueAnnotationReplyEvent):
self._handle_annotation_reply(event)
# elif isinstance(event, QueueMessageFileEvent):
# response = self._message_file_to_stream_response(event)
# if response:
# yield response
elif isinstance(event, QueueTextChunkEvent):
delta_text = event.text
if delta_text is None:
@ -342,7 +361,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
id=self._message.id,
**extras
)
def _get_stream_generate_routes(self) -> dict[str, ChatflowStreamGenerateRoute]:
"""
Get stream generate routes.
@ -372,7 +391,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
)
return stream_generate_routes
def _get_answer_start_at_node_ids(self, graph: dict, target_node_id: str) \
-> list[str]:
"""
@ -391,6 +410,18 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
ingoing_edges.append(edge)
if not ingoing_edges:
# check if it's the first node in the iteration
target_node = next((node for node in nodes if node.get('id') == target_node_id), None)
if not target_node:
return []
node_iteration_id = target_node.get('data', {}).get('iteration_id')
# get iteration start node id
for node in nodes:
if node.get('id') == node_iteration_id:
if node.get('data', {}).get('start_node_id') == target_node_id:
return [target_node_id]
return []
start_node_ids = []
@ -401,14 +432,23 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
continue
node_type = source_node.get('data', {}).get('type')
node_iteration_id = source_node.get('data', {}).get('iteration_id')
iteration_start_node_id = None
if node_iteration_id:
iteration_node = next((node for node in nodes if node.get('id') == node_iteration_id), None)
iteration_start_node_id = iteration_node.get('data', {}).get('start_node_id')
if node_type in [
NodeType.ANSWER.value,
NodeType.IF_ELSE.value,
NodeType.QUESTION_CLASSIFIER.value
NodeType.QUESTION_CLASSIFIER.value,
NodeType.ITERATION.value,
NodeType.LOOP.value
]:
start_node_id = target_node_id
start_node_ids.append(start_node_id)
elif node_type == NodeType.START.value:
elif node_type == NodeType.START.value or \
node_iteration_id is not None and iteration_start_node_id == source_node.get('id'):
start_node_id = source_node_id
start_node_ids.append(start_node_id)
else:
@ -417,7 +457,27 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
start_node_ids.extend(sub_start_node_ids)
return start_node_ids
def _get_iteration_nested_relations(self, graph: dict) -> dict[str, list[str]]:
"""
Get iteration nested relations.
:param graph: graph
:return:
"""
nodes = graph.get('nodes')
iteration_ids = [node.get('id') for node in nodes
if node.get('data', {}).get('type') in [
NodeType.ITERATION.value,
NodeType.LOOP.value,
]]
return {
iteration_id: [
node.get('id') for node in nodes if node.get('data', {}).get('iteration_id') == iteration_id
] for iteration_id in iteration_ids
}
def _generate_stream_outputs_when_node_started(self) -> Generator:
"""
Generate stream outputs.
@ -425,7 +485,8 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
"""
if self._task_state.current_stream_generate_state:
route_chunks = self._task_state.current_stream_generate_state.generate_route[
self._task_state.current_stream_generate_state.current_route_position:]
self._task_state.current_stream_generate_state.current_route_position:
]
for route_chunk in route_chunks:
if route_chunk.type == 'text':
@ -458,13 +519,14 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
route_chunks = self._task_state.current_stream_generate_state.generate_route[
self._task_state.current_stream_generate_state.current_route_position:]
for route_chunk in route_chunks:
if route_chunk.type == 'text':
route_chunk = cast(TextGenerateRouteChunk, route_chunk)
self._task_state.answer += route_chunk.text
yield self._message_to_stream_response(route_chunk.text, self._message.id)
else:
value = None
route_chunk = cast(VarGenerateRouteChunk, route_chunk)
value_selector = route_chunk.value_selector
if not value_selector:
@ -476,6 +538,20 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
if route_chunk_node_id == 'sys':
# system variable
value = self._workflow_system_variables.get(SystemVariable.value_of(value_selector[1]))
elif route_chunk_node_id in self._iteration_nested_relations:
# it's a iteration variable
if not self._iteration_state or route_chunk_node_id not in self._iteration_state.current_iterations:
continue
iteration_state = self._iteration_state.current_iterations[route_chunk_node_id]
iterator = iteration_state.inputs
if not iterator:
continue
iterator_selector = iterator.get('iterator_selector', [])
if value_selector[1] == 'index':
value = iteration_state.current_index
elif value_selector[1] == 'item':
value = iterator_selector[iteration_state.current_index] if iteration_state.current_index < len(
iterator_selector) else None
else:
# check chunk node id is before current node id or equal to current node id
if route_chunk_node_id not in self._task_state.ran_node_execution_infos:
@ -505,7 +581,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
else:
value = value.get(key)
if value:
if value is not None:
text = ''
if isinstance(value, str | int | float):
text = str(value)

View File

@ -1,8 +1,11 @@
from typing import Optional
from typing import Any, Optional
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
from core.app.entities.queue_entities import (
AppQueueEvent,
QueueIterationCompletedEvent,
QueueIterationNextEvent,
QueueIterationStartEvent,
QueueNodeFailedEvent,
QueueNodeStartedEvent,
QueueNodeSucceededEvent,
@ -130,6 +133,66 @@ class WorkflowEventTriggerCallback(BaseWorkflowCallback):
), PublishFrom.APPLICATION_MANAGER
)
def on_workflow_iteration_started(self,
node_id: str,
node_type: NodeType,
node_run_index: int = 1,
node_data: Optional[BaseNodeData] = None,
inputs: dict = None,
predecessor_node_id: Optional[str] = None,
metadata: Optional[dict] = None) -> None:
"""
Publish iteration started
"""
self._queue_manager.publish(
QueueIterationStartEvent(
node_id=node_id,
node_type=node_type,
node_run_index=node_run_index,
node_data=node_data,
inputs=inputs,
predecessor_node_id=predecessor_node_id,
metadata=metadata
),
PublishFrom.APPLICATION_MANAGER
)
def on_workflow_iteration_next(self, node_id: str,
node_type: NodeType,
index: int,
node_run_index: int,
output: Optional[Any]) -> None:
"""
Publish iteration next
"""
self._queue_manager._publish(
QueueIterationNextEvent(
node_id=node_id,
node_type=node_type,
index=index,
node_run_index=node_run_index,
output=output
),
PublishFrom.APPLICATION_MANAGER
)
def on_workflow_iteration_completed(self, node_id: str,
node_type: NodeType,
node_run_index: int,
outputs: dict) -> None:
"""
Publish iteration completed
"""
self._queue_manager._publish(
QueueIterationCompletedEvent(
node_id=node_id,
node_type=node_type,
node_run_index=node_run_index,
outputs=outputs
),
PublishFrom.APPLICATION_MANAGER
)
def on_event(self, event: AppQueueEvent) -> None:
"""
Publish event

View File

@ -83,7 +83,7 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
)
# parse files
files = args['files'] if 'files' in args and args['files'] else []
files = args['files'] if args.get('files') else []
message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id)
file_extra_config = FileUploadConfigManager.convert(override_model_config_dict or app_model_config.to_dict())
if file_extra_config:
@ -115,7 +115,8 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
user_id=user.id,
stream=stream,
invoke_from=invoke_from,
extras=extras
extras=extras,
call_depth=0
)
# init generate records

View File

@ -13,7 +13,9 @@ class BaseAppGenerator:
for variable_config in variables:
variable = variable_config.variable
if variable not in user_inputs or not user_inputs[variable]:
if (variable not in user_inputs
or user_inputs[variable] is None
or (isinstance(user_inputs[variable], str) and user_inputs[variable] == '')):
if variable_config.required:
raise ValueError(f"{variable} is required in input form")
else:
@ -22,7 +24,7 @@ class BaseAppGenerator:
value = user_inputs[variable]
if value:
if value is not None:
if variable_config.type != VariableEntity.Type.NUMBER and not isinstance(value, str):
raise ValueError(f"{variable} in input form must be a string")
elif variable_config.type == VariableEntity.Type.NUMBER and isinstance(value, str):
@ -44,7 +46,7 @@ class BaseAppGenerator:
if value and isinstance(value, str):
filtered_inputs[variable] = value.replace('\x00', '')
else:
filtered_inputs[variable] = value if value else None
filtered_inputs[variable] = value if value is not None else None
return filtered_inputs

View File

@ -1,6 +1,6 @@
import time
from collections.abc import Generator
from typing import Optional, Union, cast
from typing import Optional, Union
from core.app.app_config.entities import ExternalDataVariableEntity, PromptTemplateEntity
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
@ -16,11 +16,11 @@ from core.app.features.hosting_moderation.hosting_moderation import HostingModer
from core.external_data_tool.external_data_fetch import ExternalDataFetch
from core.file.file_obj import FileVar
from core.memory.token_buffer_memory import TokenBufferMemory
from core.model_manager import ModelInstance
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
from core.model_runtime.entities.model_entities import ModelPropertyKey
from core.model_runtime.errors.invoke import InvokeBadRequestError
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.moderation.input_moderation import InputModeration
from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfig
@ -45,8 +45,11 @@ class AppRunner:
:param query: query
:return:
"""
model_type_instance = model_config.provider_model_bundle.model_type_instance
model_type_instance = cast(LargeLanguageModel, model_type_instance)
# Invoke model
model_instance = ModelInstance(
provider_model_bundle=model_config.provider_model_bundle,
model=model_config.model
)
model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
@ -73,9 +76,7 @@ class AppRunner:
query=query
)
prompt_tokens = model_type_instance.get_num_tokens(
model_config.model,
model_config.credentials,
prompt_tokens = model_instance.get_llm_num_tokens(
prompt_messages
)
@ -89,8 +90,10 @@ class AppRunner:
def recalc_llm_max_tokens(self, model_config: ModelConfigWithCredentialsEntity,
prompt_messages: list[PromptMessage]):
# recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
model_type_instance = model_config.provider_model_bundle.model_type_instance
model_type_instance = cast(LargeLanguageModel, model_type_instance)
model_instance = ModelInstance(
provider_model_bundle=model_config.provider_model_bundle,
model=model_config.model
)
model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
@ -107,9 +110,7 @@ class AppRunner:
if max_tokens is None:
max_tokens = 0
prompt_tokens = model_type_instance.get_num_tokens(
model_config.model,
model_config.credentials,
prompt_tokens = model_instance.get_llm_num_tokens(
prompt_messages
)

View File

@ -80,7 +80,7 @@ class ChatAppGenerator(MessageBasedAppGenerator):
)
# parse files
files = args['files'] if 'files' in args and args['files'] else []
files = args['files'] if args.get('files') else []
message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id)
file_extra_config = FileUploadConfigManager.convert(override_model_config_dict or app_model_config.to_dict())
if file_extra_config:

View File

@ -75,7 +75,7 @@ class CompletionAppGenerator(MessageBasedAppGenerator):
)
# parse files
files = args['files'] if 'files' in args and args['files'] else []
files = args['files'] if args.get('files') else []
message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id)
file_extra_config = FileUploadConfigManager.convert(override_model_config_dict or app_model_config.to_dict())
if file_extra_config:

View File

@ -34,7 +34,8 @@ class WorkflowAppGenerator(BaseAppGenerator):
user: Union[Account, EndUser],
args: dict,
invoke_from: InvokeFrom,
stream: bool = True) \
stream: bool = True,
call_depth: int = 0) \
-> Union[dict, Generator[dict, None, None]]:
"""
Generate App response.
@ -49,7 +50,7 @@ class WorkflowAppGenerator(BaseAppGenerator):
inputs = args['inputs']
# parse files
files = args['files'] if 'files' in args and args['files'] else []
files = args['files'] if args.get('files') else []
message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id)
file_extra_config = FileUploadConfigManager.convert(workflow.features_dict, is_vision=False)
if file_extra_config:
@ -75,9 +76,38 @@ class WorkflowAppGenerator(BaseAppGenerator):
files=file_objs,
user_id=user.id,
stream=stream,
invoke_from=invoke_from
invoke_from=invoke_from,
call_depth=call_depth
)
return self._generate(
app_model=app_model,
workflow=workflow,
user=user,
application_generate_entity=application_generate_entity,
invoke_from=invoke_from,
stream=stream,
call_depth=call_depth
)
def _generate(self, app_model: App,
workflow: Workflow,
user: Union[Account, EndUser],
application_generate_entity: WorkflowAppGenerateEntity,
invoke_from: InvokeFrom,
stream: bool = True,
call_depth: int = 0) \
-> Union[dict, Generator[dict, None, None]]:
"""
Generate App response.
:param app_model: App
:param workflow: Workflow
:param user: account or end user
:param application_generate_entity: application generate entity
:param invoke_from: invoke from source
:param stream: is stream
"""
# init queue manager
queue_manager = WorkflowAppQueueManager(
task_id=application_generate_entity.task_id,
@ -109,6 +139,64 @@ class WorkflowAppGenerator(BaseAppGenerator):
invoke_from=invoke_from
)
def single_iteration_generate(self, app_model: App,
workflow: Workflow,
node_id: str,
user: Account,
args: dict,
stream: bool = True) \
-> Union[dict, Generator[dict, None, None]]:
"""
Generate App response.
:param app_model: App
:param workflow: Workflow
:param user: account or end user
:param args: request args
:param invoke_from: invoke from source
:param stream: is stream
"""
if not node_id:
raise ValueError('node_id is required')
if args.get('inputs') is None:
raise ValueError('inputs is required')
extras = {
"auto_generate_conversation_name": False
}
# convert to app config
app_config = WorkflowAppConfigManager.get_app_config(
app_model=app_model,
workflow=workflow
)
# init application generate entity
application_generate_entity = WorkflowAppGenerateEntity(
task_id=str(uuid.uuid4()),
app_config=app_config,
inputs={},
files=[],
user_id=user.id,
stream=stream,
invoke_from=InvokeFrom.DEBUGGER,
extras=extras,
single_iteration_run=WorkflowAppGenerateEntity.SingleIterationRunEntity(
node_id=node_id,
inputs=args['inputs']
)
)
return self._generate(
app_model=app_model,
workflow=workflow,
user=user,
invoke_from=InvokeFrom.DEBUGGER,
application_generate_entity=application_generate_entity,
stream=stream
)
def _generate_worker(self, flask_app: Flask,
application_generate_entity: WorkflowAppGenerateEntity,
queue_manager: AppQueueManager) -> None:
@ -123,10 +211,21 @@ class WorkflowAppGenerator(BaseAppGenerator):
try:
# workflow app
runner = WorkflowAppRunner()
runner.run(
application_generate_entity=application_generate_entity,
queue_manager=queue_manager
)
if application_generate_entity.single_iteration_run:
single_iteration_run = application_generate_entity.single_iteration_run
runner.single_iteration_run(
app_id=application_generate_entity.app_config.app_id,
workflow_id=application_generate_entity.app_config.workflow_id,
queue_manager=queue_manager,
inputs=single_iteration_run.inputs,
node_id=single_iteration_run.node_id,
user_id=application_generate_entity.user_id
)
else:
runner.run(
application_generate_entity=application_generate_entity,
queue_manager=queue_manager
)
except GenerateTaskStoppedException:
pass
except InvokeAuthorizationError:

View File

@ -73,11 +73,44 @@ class WorkflowAppRunner:
user_from=UserFrom.ACCOUNT
if application_generate_entity.invoke_from in [InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER]
else UserFrom.END_USER,
invoke_from=application_generate_entity.invoke_from,
user_inputs=inputs,
system_inputs={
SystemVariable.FILES: files,
SystemVariable.USER_ID: user_id
},
callbacks=workflow_callbacks,
call_depth=application_generate_entity.call_depth
)
def single_iteration_run(self, app_id: str, workflow_id: str,
queue_manager: AppQueueManager,
inputs: dict, node_id: str, user_id: str) -> None:
"""
Single iteration run
"""
app_record: App = db.session.query(App).filter(App.id == app_id).first()
if not app_record:
raise ValueError("App not found")
if not app_record.workflow_id:
raise ValueError("Workflow not initialized")
workflow = self.get_workflow(app_model=app_record, workflow_id=workflow_id)
if not workflow:
raise ValueError("Workflow not initialized")
workflow_callbacks = [WorkflowEventTriggerCallback(
queue_manager=queue_manager,
workflow=workflow
)]
workflow_engine_manager = WorkflowEngineManager()
workflow_engine_manager.single_step_run_iteration_workflow_node(
workflow=workflow,
node_id=node_id,
user_id=user_id,
user_inputs=inputs,
callbacks=workflow_callbacks
)

View File

@ -5,6 +5,8 @@ from typing import cast
from core.app.apps.base_app_generate_response_converter import AppGenerateResponseConverter
from core.app.entities.task_entities import (
ErrorStreamResponse,
NodeFinishStreamResponse,
NodeStartStreamResponse,
PingStreamResponse,
WorkflowAppBlockingResponse,
WorkflowAppStreamResponse,
@ -68,4 +70,24 @@ class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
:param stream_response: stream response
:return:
"""
return cls.convert_stream_full_response(stream_response)
for chunk in stream_response:
chunk = cast(WorkflowAppStreamResponse, chunk)
sub_stream_response = chunk.stream_response
if isinstance(sub_stream_response, PingStreamResponse):
yield 'ping'
continue
response_chunk = {
'event': sub_stream_response.event.value,
'workflow_run_id': chunk.workflow_run_id,
}
if isinstance(sub_stream_response, ErrorStreamResponse):
data = cls._error_to_stream_response(sub_stream_response.err)
response_chunk.update(data)
elif isinstance(sub_stream_response, NodeStartStreamResponse | NodeFinishStreamResponse):
response_chunk.update(sub_stream_response.to_ignore_detail_dict())
else:
response_chunk.update(sub_stream_response.to_dict())
yield json.dumps(response_chunk)

View File

@ -9,6 +9,9 @@ from core.app.entities.app_invoke_entities import (
)
from core.app.entities.queue_entities import (
QueueErrorEvent,
QueueIterationCompletedEvent,
QueueIterationNextEvent,
QueueIterationStartEvent,
QueueMessageReplaceEvent,
QueueNodeFailedEvent,
QueueNodeStartedEvent,
@ -58,6 +61,7 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
_task_state: WorkflowTaskState
_application_generate_entity: WorkflowAppGenerateEntity
_workflow_system_variables: dict[SystemVariable, Any]
_iteration_nested_relations: dict[str, list[str]]
def __init__(self, application_generate_entity: WorkflowAppGenerateEntity,
workflow: Workflow,
@ -85,8 +89,11 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
SystemVariable.USER_ID: user_id
}
self._task_state = WorkflowTaskState()
self._task_state = WorkflowTaskState(
iteration_nested_node_ids=[]
)
self._stream_generate_nodes = self._get_stream_generate_nodes()
self._iteration_nested_relations = self._get_iteration_nested_relations(self._workflow.graph_dict)
def process(self) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]:
"""
@ -191,6 +198,22 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
task_id=self._application_generate_entity.task_id,
workflow_node_execution=workflow_node_execution
)
if isinstance(event, QueueNodeFailedEvent):
yield from self._handle_iteration_exception(
task_id=self._application_generate_entity.task_id,
error=f'Child node failed: {event.error}'
)
elif isinstance(event, QueueIterationStartEvent | QueueIterationNextEvent | QueueIterationCompletedEvent):
if isinstance(event, QueueIterationNextEvent):
# clear ran node execution infos of current iteration
iteration_relations = self._iteration_nested_relations.get(event.node_id)
if iteration_relations:
for node_id in iteration_relations:
self._task_state.ran_node_execution_infos.pop(node_id, None)
yield self._handle_iteration_to_stream_response(self._application_generate_entity.task_id, event)
self._handle_iteration_operation(event)
elif isinstance(event, QueueStopEvent | QueueWorkflowSucceededEvent | QueueWorkflowFailedEvent):
workflow_run = self._handle_workflow_finished(event)
@ -331,13 +354,20 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
continue
node_type = source_node.get('data', {}).get('type')
node_iteration_id = source_node.get('data', {}).get('iteration_id')
iteration_start_node_id = None
if node_iteration_id:
iteration_node = next((node for node in nodes if node.get('id') == node_iteration_id), None)
iteration_start_node_id = iteration_node.get('data', {}).get('start_node_id')
if node_type in [
NodeType.IF_ELSE.value,
NodeType.QUESTION_CLASSIFIER.value
]:
start_node_id = target_node_id
start_node_ids.append(start_node_id)
elif node_type == NodeType.START.value:
elif node_type == NodeType.START.value or \
node_iteration_id is not None and iteration_start_node_id == source_node.get('id'):
start_node_id = source_node_id
start_node_ids.append(start_node_id)
else:
@ -411,3 +441,24 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
return False
return True
def _get_iteration_nested_relations(self, graph: dict) -> dict[str, list[str]]:
"""
Get iteration nested relations.
:param graph: graph
:return:
"""
nodes = graph.get('nodes')
iteration_ids = [node.get('id') for node in nodes
if node.get('data', {}).get('type') in [
NodeType.ITERATION.value,
NodeType.LOOP.value,
]]
return {
iteration_id: [
node.get('id') for node in nodes if node.get('data', {}).get('iteration_id') == iteration_id
] for iteration_id in iteration_ids
}

View File

@ -1,8 +1,11 @@
from typing import Optional
from typing import Any, Optional
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
from core.app.entities.queue_entities import (
AppQueueEvent,
QueueIterationCompletedEvent,
QueueIterationNextEvent,
QueueIterationStartEvent,
QueueNodeFailedEvent,
QueueNodeStartedEvent,
QueueNodeSucceededEvent,
@ -130,6 +133,66 @@ class WorkflowEventTriggerCallback(BaseWorkflowCallback):
), PublishFrom.APPLICATION_MANAGER
)
def on_workflow_iteration_started(self,
node_id: str,
node_type: NodeType,
node_run_index: int = 1,
node_data: Optional[BaseNodeData] = None,
inputs: dict = None,
predecessor_node_id: Optional[str] = None,
metadata: Optional[dict] = None) -> None:
"""
Publish iteration started
"""
self._queue_manager.publish(
QueueIterationStartEvent(
node_id=node_id,
node_type=node_type,
node_run_index=node_run_index,
node_data=node_data,
inputs=inputs,
predecessor_node_id=predecessor_node_id,
metadata=metadata
),
PublishFrom.APPLICATION_MANAGER
)
def on_workflow_iteration_next(self, node_id: str,
node_type: NodeType,
index: int,
node_run_index: int,
output: Optional[Any]) -> None:
"""
Publish iteration next
"""
self._queue_manager.publish(
QueueIterationNextEvent(
node_id=node_id,
node_type=node_type,
index=index,
node_run_index=node_run_index,
output=output
),
PublishFrom.APPLICATION_MANAGER
)
def on_workflow_iteration_completed(self, node_id: str,
node_type: NodeType,
node_run_index: int,
outputs: dict) -> None:
"""
Publish iteration completed
"""
self._queue_manager.publish(
QueueIterationCompletedEvent(
node_id=node_id,
node_type=node_type,
node_run_index=node_run_index,
outputs=outputs
),
PublishFrom.APPLICATION_MANAGER
)
def on_event(self, event: AppQueueEvent) -> None:
"""
Publish event

View File

@ -102,6 +102,39 @@ class WorkflowLoggingCallback(BaseWorkflowCallback):
self.print_text(text, color="pink", end="")
def on_workflow_iteration_started(self,
node_id: str,
node_type: NodeType,
node_run_index: int = 1,
node_data: Optional[BaseNodeData] = None,
inputs: dict = None,
predecessor_node_id: Optional[str] = None,
metadata: Optional[dict] = None) -> None:
"""
Publish iteration started
"""
self.print_text("\n[on_workflow_iteration_started]", color='blue')
self.print_text(f"Node ID: {node_id}", color='blue')
def on_workflow_iteration_next(self, node_id: str,
node_type: NodeType,
index: int,
node_run_index: int,
output: Optional[dict]) -> None:
"""
Publish iteration next
"""
self.print_text("\n[on_workflow_iteration_next]", color='blue')
def on_workflow_iteration_completed(self, node_id: str,
node_type: NodeType,
node_run_index: int,
outputs: dict) -> None:
"""
Publish iteration completed
"""
self.print_text("\n[on_workflow_iteration_completed]", color='blue')
def on_event(self, event: AppQueueEvent) -> None:
"""
Publish event

View File

@ -80,6 +80,9 @@ class AppGenerateEntity(BaseModel):
stream: bool
invoke_from: InvokeFrom
# invoke call depth
call_depth: int = 0
# extra parameters, like: auto_generate_conversation_name
extras: dict[str, Any] = {}
@ -126,6 +129,14 @@ class AdvancedChatAppGenerateEntity(AppGenerateEntity):
conversation_id: Optional[str] = None
query: Optional[str] = None
class SingleIterationRunEntity(BaseModel):
"""
Single Iteration Run Entity.
"""
node_id: str
inputs: dict
single_iteration_run: Optional[SingleIterationRunEntity] = None
class WorkflowAppGenerateEntity(AppGenerateEntity):
"""
@ -133,3 +144,12 @@ class WorkflowAppGenerateEntity(AppGenerateEntity):
"""
# app config
app_config: WorkflowUIBasedAppConfig
class SingleIterationRunEntity(BaseModel):
"""
Single Iteration Run Entity.
"""
node_id: str
inputs: dict
single_iteration_run: Optional[SingleIterationRunEntity] = None

View File

@ -1,7 +1,7 @@
from enum import Enum
from typing import Any, Optional
from pydantic import BaseModel
from pydantic import BaseModel, validator
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
from core.workflow.entities.base_node_data_entities import BaseNodeData
@ -21,6 +21,9 @@ class QueueEvent(Enum):
WORKFLOW_STARTED = "workflow_started"
WORKFLOW_SUCCEEDED = "workflow_succeeded"
WORKFLOW_FAILED = "workflow_failed"
ITERATION_START = "iteration_start"
ITERATION_NEXT = "iteration_next"
ITERATION_COMPLETED = "iteration_completed"
NODE_STARTED = "node_started"
NODE_SUCCEEDED = "node_succeeded"
NODE_FAILED = "node_failed"
@ -47,6 +50,55 @@ class QueueLLMChunkEvent(AppQueueEvent):
event = QueueEvent.LLM_CHUNK
chunk: LLMResultChunk
class QueueIterationStartEvent(AppQueueEvent):
"""
QueueIterationStartEvent entity
"""
event = QueueEvent.ITERATION_START
node_id: str
node_type: NodeType
node_data: BaseNodeData
node_run_index: int
inputs: dict = None
predecessor_node_id: Optional[str] = None
metadata: Optional[dict] = None
class QueueIterationNextEvent(AppQueueEvent):
"""
QueueIterationNextEvent entity
"""
event = QueueEvent.ITERATION_NEXT
index: int
node_id: str
node_type: NodeType
node_run_index: int
output: Optional[Any] # output for the current iteration
@validator('output', pre=True, always=True)
def set_output(cls, v):
"""
Set output
"""
if v is None:
return None
if isinstance(v, int | float | str | bool | dict | list):
return v
raise ValueError('output must be a valid type')
class QueueIterationCompletedEvent(AppQueueEvent):
"""
QueueIterationCompletedEvent entity
"""
event = QueueEvent.ITERATION_COMPLETED
node_id: str
node_type: NodeType
node_run_index: int
outputs: dict
class QueueTextChunkEvent(AppQueueEvent):
"""

View File

@ -1,12 +1,14 @@
from enum import Enum
from typing import Optional
from typing import Any, Optional
from pydantic import BaseModel
from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage
from core.model_runtime.utils.encoders import jsonable_encoder
from core.workflow.entities.base_node_data_entities import BaseNodeData
from core.workflow.entities.node_entities import NodeType
from core.workflow.nodes.answer.entities import GenerateRouteChunk
from models.workflow import WorkflowNodeExecutionStatus
class WorkflowStreamGenerateNodes(BaseModel):
@ -65,6 +67,7 @@ class WorkflowTaskState(TaskState):
current_stream_generate_state: Optional[WorkflowStreamGenerateNodes] = None
iteration_nested_node_ids: list[str] = None
class AdvancedChatTaskState(WorkflowTaskState):
"""
@ -91,6 +94,9 @@ class StreamEvent(Enum):
WORKFLOW_FINISHED = "workflow_finished"
NODE_STARTED = "node_started"
NODE_FINISHED = "node_finished"
ITERATION_STARTED = "iteration_started"
ITERATION_NEXT = "iteration_next"
ITERATION_COMPLETED = "iteration_completed"
TEXT_CHUNK = "text_chunk"
TEXT_REPLACE = "text_replace"
@ -246,6 +252,24 @@ class NodeStartStreamResponse(StreamResponse):
workflow_run_id: str
data: Data
def to_ignore_detail_dict(self):
return {
"event": self.event.value,
"task_id": self.task_id,
"workflow_run_id": self.workflow_run_id,
"data": {
"id": self.data.id,
"node_id": self.data.node_id,
"node_type": self.data.node_type,
"title": self.data.title,
"index": self.data.index,
"predecessor_node_id": self.data.predecessor_node_id,
"inputs": None,
"created_at": self.data.created_at,
"extras": {}
}
}
class NodeFinishStreamResponse(StreamResponse):
"""
@ -276,6 +300,99 @@ class NodeFinishStreamResponse(StreamResponse):
workflow_run_id: str
data: Data
def to_ignore_detail_dict(self):
return {
"event": self.event.value,
"task_id": self.task_id,
"workflow_run_id": self.workflow_run_id,
"data": {
"id": self.data.id,
"node_id": self.data.node_id,
"node_type": self.data.node_type,
"title": self.data.title,
"index": self.data.index,
"predecessor_node_id": self.data.predecessor_node_id,
"inputs": None,
"process_data": None,
"outputs": None,
"status": self.data.status,
"error": None,
"elapsed_time": self.data.elapsed_time,
"execution_metadata": None,
"created_at": self.data.created_at,
"finished_at": self.data.finished_at,
"files": []
}
}
class IterationNodeStartStreamResponse(StreamResponse):
"""
NodeStartStreamResponse entity
"""
class Data(BaseModel):
"""
Data entity
"""
id: str
node_id: str
node_type: str
title: str
created_at: int
extras: dict = {}
metadata: dict = {}
inputs: dict = {}
event: StreamEvent = StreamEvent.ITERATION_STARTED
workflow_run_id: str
data: Data
class IterationNodeNextStreamResponse(StreamResponse):
"""
NodeStartStreamResponse entity
"""
class Data(BaseModel):
"""
Data entity
"""
id: str
node_id: str
node_type: str
title: str
index: int
created_at: int
pre_iteration_output: Optional[Any]
extras: dict = {}
event: StreamEvent = StreamEvent.ITERATION_NEXT
workflow_run_id: str
data: Data
class IterationNodeCompletedStreamResponse(StreamResponse):
"""
NodeStartStreamResponse entity
"""
class Data(BaseModel):
"""
Data entity
"""
id: str
node_id: str
node_type: str
title: str
outputs: Optional[dict]
created_at: int
extras: dict = None
inputs: dict = None
status: WorkflowNodeExecutionStatus
error: Optional[str]
elapsed_time: float
total_tokens: int
finished_at: int
steps: int
event: StreamEvent = StreamEvent.ITERATION_COMPLETED
workflow_run_id: str
data: Data
class TextChunkStreamResponse(StreamResponse):
"""
@ -411,3 +528,23 @@ class WorkflowAppBlockingResponse(AppBlockingResponse):
workflow_run_id: str
data: Data
class WorkflowIterationState(BaseModel):
"""
WorkflowIterationState entity
"""
class Data(BaseModel):
"""
Data entity
"""
parent_iteration_id: Optional[str] = None
iteration_id: str
current_index: int
iteration_steps_boundary: list[int] = None
node_execution_id: str
started_at: float
inputs: dict = None
total_tokens: int = 0
node_data: BaseNodeData
current_iterations: dict[str, Data] = None

View File

@ -37,6 +37,7 @@ from core.app.entities.task_entities import (
)
from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline
from core.app.task_pipeline.message_cycle_manage import MessageCycleManage
from core.model_manager import ModelInstance
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
@ -317,29 +318,30 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
"""
model_config = self._model_config
model = model_config.model
model_type_instance = model_config.provider_model_bundle.model_type_instance
model_type_instance = cast(LargeLanguageModel, model_type_instance)
model_instance = ModelInstance(
provider_model_bundle=model_config.provider_model_bundle,
model=model_config.model
)
# calculate num tokens
prompt_tokens = 0
if event.stopped_by != QueueStopEvent.StopBy.ANNOTATION_REPLY:
prompt_tokens = model_type_instance.get_num_tokens(
model,
model_config.credentials,
prompt_tokens = model_instance.get_llm_num_tokens(
self._task_state.llm_result.prompt_messages
)
completion_tokens = 0
if event.stopped_by == QueueStopEvent.StopBy.USER_MANUAL:
completion_tokens = model_type_instance.get_num_tokens(
model,
model_config.credentials,
completion_tokens = model_instance.get_llm_num_tokens(
[self._task_state.llm_result.message]
)
credentials = model_config.credentials
# transform usage
model_type_instance = model_config.provider_model_bundle.model_type_instance
model_type_instance = cast(LargeLanguageModel, model_type_instance)
self._task_state.llm_result.usage = model_type_instance._calc_response_usage(
model,
credentials,

View File

@ -1,9 +1,9 @@
import json
import time
from datetime import datetime, timezone
from typing import Any, Optional, Union, cast
from typing import Optional, Union, cast
from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom, WorkflowAppGenerateEntity
from core.app.entities.app_invoke_entities import InvokeFrom
from core.app.entities.queue_entities import (
QueueNodeFailedEvent,
QueueNodeStartedEvent,
@ -13,18 +13,17 @@ from core.app.entities.queue_entities import (
QueueWorkflowSucceededEvent,
)
from core.app.entities.task_entities import (
AdvancedChatTaskState,
NodeExecutionInfo,
NodeFinishStreamResponse,
NodeStartStreamResponse,
WorkflowFinishStreamResponse,
WorkflowStartStreamResponse,
WorkflowTaskState,
)
from core.app.task_pipeline.workflow_iteration_cycle_manage import WorkflowIterationCycleManage
from core.file.file_obj import FileVar
from core.model_runtime.utils.encoders import jsonable_encoder
from core.tools.tool_manager import ToolManager
from core.workflow.entities.node_entities import NodeRunMetadataKey, NodeType, SystemVariable
from core.workflow.entities.node_entities import NodeRunMetadataKey, NodeType
from core.workflow.nodes.tool.entities import ToolNodeData
from core.workflow.workflow_engine_manager import WorkflowEngineManager
from extensions.ext_database import db
@ -42,13 +41,7 @@ from models.workflow import (
)
class WorkflowCycleManage:
_application_generate_entity: Union[AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity]
_workflow: Workflow
_user: Union[Account, EndUser]
_task_state: Union[AdvancedChatTaskState, WorkflowTaskState]
_workflow_system_variables: dict[SystemVariable, Any]
class WorkflowCycleManage(WorkflowIterationCycleManage):
def _init_workflow_run(self, workflow: Workflow,
triggered_from: WorkflowRunTriggeredFrom,
user: Union[Account, EndUser],
@ -237,6 +230,7 @@ class WorkflowCycleManage:
inputs: Optional[dict] = None,
process_data: Optional[dict] = None,
outputs: Optional[dict] = None,
execution_metadata: Optional[dict] = None
) -> WorkflowNodeExecution:
"""
Workflow node execution failed
@ -255,6 +249,8 @@ class WorkflowCycleManage:
workflow_node_execution.inputs = json.dumps(inputs) if inputs else None
workflow_node_execution.process_data = json.dumps(process_data) if process_data else None
workflow_node_execution.outputs = json.dumps(outputs) if outputs else None
workflow_node_execution.execution_metadata = json.dumps(jsonable_encoder(execution_metadata)) \
if execution_metadata else None
db.session.commit()
db.session.refresh(workflow_node_execution)
@ -444,6 +440,23 @@ class WorkflowCycleManage:
current_node_execution = self._task_state.ran_node_execution_infos[event.node_id]
workflow_node_execution = db.session.query(WorkflowNodeExecution).filter(
WorkflowNodeExecution.id == current_node_execution.workflow_node_execution_id).first()
execution_metadata = event.execution_metadata if isinstance(event, QueueNodeSucceededEvent) else None
if self._iteration_state and self._iteration_state.current_iterations:
if not execution_metadata:
execution_metadata = {}
current_iteration_data = None
for iteration_node_id in self._iteration_state.current_iterations:
data = self._iteration_state.current_iterations[iteration_node_id]
if data.parent_iteration_id == None:
current_iteration_data = data
break
if current_iteration_data:
execution_metadata[NodeRunMetadataKey.ITERATION_ID] = current_iteration_data.iteration_id
execution_metadata[NodeRunMetadataKey.ITERATION_INDEX] = current_iteration_data.current_index
if isinstance(event, QueueNodeSucceededEvent):
workflow_node_execution = self._workflow_node_execution_success(
workflow_node_execution=workflow_node_execution,
@ -451,12 +464,18 @@ class WorkflowCycleManage:
inputs=event.inputs,
process_data=event.process_data,
outputs=event.outputs,
execution_metadata=event.execution_metadata
execution_metadata=execution_metadata
)
if event.execution_metadata and event.execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS):
if execution_metadata and execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS):
self._task_state.total_tokens += (
int(event.execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS)))
int(execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS)))
if self._iteration_state:
for iteration_node_id in self._iteration_state.current_iterations:
data = self._iteration_state.current_iterations[iteration_node_id]
if execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS):
data.total_tokens += int(execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS))
if workflow_node_execution.node_type == NodeType.LLM.value:
outputs = workflow_node_execution.outputs_dict
@ -469,7 +488,8 @@ class WorkflowCycleManage:
error=event.error,
inputs=event.inputs,
process_data=event.process_data,
outputs=event.outputs
outputs=event.outputs,
execution_metadata=execution_metadata
)
db.session.close()

View File

@ -0,0 +1,16 @@
from typing import Any, Union
from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity
from core.app.entities.task_entities import AdvancedChatTaskState, WorkflowTaskState
from core.workflow.entities.node_entities import SystemVariable
from models.account import Account
from models.model import EndUser
from models.workflow import Workflow
class WorkflowCycleStateManager:
_application_generate_entity: Union[AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity]
_workflow: Workflow
_user: Union[Account, EndUser]
_task_state: Union[AdvancedChatTaskState, WorkflowTaskState]
_workflow_system_variables: dict[SystemVariable, Any]

View File

@ -0,0 +1,281 @@
import json
import time
from collections.abc import Generator
from typing import Optional, Union
from core.app.entities.queue_entities import (
QueueIterationCompletedEvent,
QueueIterationNextEvent,
QueueIterationStartEvent,
)
from core.app.entities.task_entities import (
IterationNodeCompletedStreamResponse,
IterationNodeNextStreamResponse,
IterationNodeStartStreamResponse,
NodeExecutionInfo,
WorkflowIterationState,
)
from core.app.task_pipeline.workflow_cycle_state_manager import WorkflowCycleStateManager
from core.workflow.entities.node_entities import NodeType
from extensions.ext_database import db
from models.workflow import (
WorkflowNodeExecution,
WorkflowNodeExecutionStatus,
WorkflowNodeExecutionTriggeredFrom,
WorkflowRun,
)
class WorkflowIterationCycleManage(WorkflowCycleStateManager):
_iteration_state: WorkflowIterationState = None
def _init_iteration_state(self) -> WorkflowIterationState:
if not self._iteration_state:
self._iteration_state = WorkflowIterationState(
current_iterations={}
)
def _handle_iteration_to_stream_response(self, task_id: str, event: QueueIterationStartEvent | QueueIterationNextEvent | QueueIterationCompletedEvent) \
-> Union[IterationNodeStartStreamResponse, IterationNodeNextStreamResponse, IterationNodeCompletedStreamResponse]:
"""
Handle iteration to stream response
:param task_id: task id
:param event: iteration event
:return:
"""
if isinstance(event, QueueIterationStartEvent):
return IterationNodeStartStreamResponse(
task_id=task_id,
workflow_run_id=self._task_state.workflow_run_id,
data=IterationNodeStartStreamResponse.Data(
id=event.node_id,
node_id=event.node_id,
node_type=event.node_type.value,
title=event.node_data.title,
created_at=int(time.time()),
extras={},
inputs=event.inputs,
metadata=event.metadata
)
)
elif isinstance(event, QueueIterationNextEvent):
current_iteration = self._iteration_state.current_iterations[event.node_id]
return IterationNodeNextStreamResponse(
task_id=task_id,
workflow_run_id=self._task_state.workflow_run_id,
data=IterationNodeNextStreamResponse.Data(
id=event.node_id,
node_id=event.node_id,
node_type=event.node_type.value,
title=current_iteration.node_data.title,
index=event.index,
pre_iteration_output=event.output,
created_at=int(time.time()),
extras={}
)
)
elif isinstance(event, QueueIterationCompletedEvent):
current_iteration = self._iteration_state.current_iterations[event.node_id]
return IterationNodeCompletedStreamResponse(
task_id=task_id,
workflow_run_id=self._task_state.workflow_run_id,
data=IterationNodeCompletedStreamResponse.Data(
id=event.node_id,
node_id=event.node_id,
node_type=event.node_type.value,
title=current_iteration.node_data.title,
outputs=event.outputs,
created_at=int(time.time()),
extras={},
inputs=current_iteration.inputs,
status=WorkflowNodeExecutionStatus.SUCCEEDED,
error=None,
elapsed_time=time.perf_counter() - current_iteration.started_at,
total_tokens=current_iteration.total_tokens,
finished_at=int(time.time()),
steps=current_iteration.current_index
)
)
def _init_iteration_execution_from_workflow_run(self,
workflow_run: WorkflowRun,
node_id: str,
node_type: NodeType,
node_title: str,
node_run_index: int = 1,
inputs: Optional[dict] = None,
predecessor_node_id: Optional[str] = None
) -> WorkflowNodeExecution:
workflow_node_execution = WorkflowNodeExecution(
tenant_id=workflow_run.tenant_id,
app_id=workflow_run.app_id,
workflow_id=workflow_run.workflow_id,
triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value,
workflow_run_id=workflow_run.id,
predecessor_node_id=predecessor_node_id,
index=node_run_index,
node_id=node_id,
node_type=node_type.value,
inputs=json.dumps(inputs) if inputs else None,
title=node_title,
status=WorkflowNodeExecutionStatus.RUNNING.value,
created_by_role=workflow_run.created_by_role,
created_by=workflow_run.created_by,
execution_metadata=json.dumps({
'started_run_index': node_run_index + 1,
'current_index': 0,
'steps_boundary': [],
})
)
db.session.add(workflow_node_execution)
db.session.commit()
db.session.refresh(workflow_node_execution)
db.session.close()
return workflow_node_execution
def _handle_iteration_operation(self, event: QueueIterationStartEvent | QueueIterationNextEvent | QueueIterationCompletedEvent) -> WorkflowNodeExecution:
if isinstance(event, QueueIterationStartEvent):
return self._handle_iteration_started(event)
elif isinstance(event, QueueIterationNextEvent):
return self._handle_iteration_next(event)
elif isinstance(event, QueueIterationCompletedEvent):
return self._handle_iteration_completed(event)
def _handle_iteration_started(self, event: QueueIterationStartEvent) -> WorkflowNodeExecution:
self._init_iteration_state()
workflow_run = db.session.query(WorkflowRun).filter(WorkflowRun.id == self._task_state.workflow_run_id).first()
workflow_node_execution = self._init_iteration_execution_from_workflow_run(
workflow_run=workflow_run,
node_id=event.node_id,
node_type=NodeType.ITERATION,
node_title=event.node_data.title,
node_run_index=event.node_run_index,
inputs=event.inputs,
predecessor_node_id=event.predecessor_node_id
)
latest_node_execution_info = NodeExecutionInfo(
workflow_node_execution_id=workflow_node_execution.id,
node_type=NodeType.ITERATION,
start_at=time.perf_counter()
)
self._task_state.ran_node_execution_infos[event.node_id] = latest_node_execution_info
self._task_state.latest_node_execution_info = latest_node_execution_info
self._iteration_state.current_iterations[event.node_id] = WorkflowIterationState.Data(
parent_iteration_id=None,
iteration_id=event.node_id,
current_index=0,
iteration_steps_boundary=[],
node_execution_id=workflow_node_execution.id,
started_at=time.perf_counter(),
inputs=event.inputs,
total_tokens=0,
node_data=event.node_data
)
db.session.close()
return workflow_node_execution
def _handle_iteration_next(self, event: QueueIterationNextEvent) -> WorkflowNodeExecution:
if event.node_id not in self._iteration_state.current_iterations:
return
current_iteration = self._iteration_state.current_iterations[event.node_id]
current_iteration.current_index = event.index
current_iteration.iteration_steps_boundary.append(event.node_run_index)
workflow_node_execution: WorkflowNodeExecution = db.session.query(WorkflowNodeExecution).filter(
WorkflowNodeExecution.id == current_iteration.node_execution_id
).first()
original_node_execution_metadata = workflow_node_execution.execution_metadata_dict
if original_node_execution_metadata:
original_node_execution_metadata['current_index'] = event.index
original_node_execution_metadata['steps_boundary'] = current_iteration.iteration_steps_boundary
original_node_execution_metadata['total_tokens'] = current_iteration.total_tokens
workflow_node_execution.execution_metadata = json.dumps(original_node_execution_metadata)
db.session.commit()
db.session.close()
def _handle_iteration_completed(self, event: QueueIterationCompletedEvent) -> WorkflowNodeExecution:
if event.node_id not in self._iteration_state.current_iterations:
return
current_iteration = self._iteration_state.current_iterations[event.node_id]
workflow_node_execution: WorkflowNodeExecution = db.session.query(WorkflowNodeExecution).filter(
WorkflowNodeExecution.id == current_iteration.node_execution_id
).first()
workflow_node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED.value
workflow_node_execution.outputs = json.dumps(event.outputs) if event.outputs else None
workflow_node_execution.elapsed_time = time.perf_counter() - current_iteration.started_at
original_node_execution_metadata = workflow_node_execution.execution_metadata_dict
if original_node_execution_metadata:
original_node_execution_metadata['steps_boundary'] = current_iteration.iteration_steps_boundary
original_node_execution_metadata['total_tokens'] = current_iteration.total_tokens
workflow_node_execution.execution_metadata = json.dumps(original_node_execution_metadata)
db.session.commit()
# remove current iteration
self._iteration_state.current_iterations.pop(event.node_id, None)
# set latest node execution info
latest_node_execution_info = NodeExecutionInfo(
workflow_node_execution_id=workflow_node_execution.id,
node_type=NodeType.ITERATION,
start_at=time.perf_counter()
)
self._task_state.latest_node_execution_info = latest_node_execution_info
db.session.close()
def _handle_iteration_exception(self, task_id: str, error: str) -> Generator[IterationNodeCompletedStreamResponse, None, None]:
"""
Handle iteration exception
"""
if not self._iteration_state or not self._iteration_state.current_iterations:
return
for node_id, current_iteration in self._iteration_state.current_iterations.items():
workflow_node_execution: WorkflowNodeExecution = db.session.query(WorkflowNodeExecution).filter(
WorkflowNodeExecution.id == current_iteration.node_execution_id
).first()
workflow_node_execution.status = WorkflowNodeExecutionStatus.FAILED.value
workflow_node_execution.error = error
workflow_node_execution.elapsed_time = time.perf_counter() - current_iteration.started_at
db.session.commit()
db.session.close()
yield IterationNodeCompletedStreamResponse(
task_id=task_id,
workflow_run_id=self._task_state.workflow_run_id,
data=IterationNodeCompletedStreamResponse.Data(
id=node_id,
node_id=node_id,
node_type=NodeType.ITERATION.value,
title=current_iteration.node_data.title,
outputs={},
created_at=int(time.time()),
extras={},
inputs=current_iteration.inputs,
status=WorkflowNodeExecutionStatus.FAILED,
error=error,
elapsed_time=time.perf_counter() - current_iteration.started_at,
total_tokens=current_iteration.total_tokens,
finished_at=int(time.time()),
steps=current_iteration.current_index
)
)

View File

@ -16,6 +16,7 @@ class ModelStatus(Enum):
NO_CONFIGURE = "no-configure"
QUOTA_EXCEEDED = "quota-exceeded"
NO_PERMISSION = "no-permission"
DISABLED = "disabled"
class SimpleModelProviderEntity(BaseModel):
@ -43,12 +44,19 @@ class SimpleModelProviderEntity(BaseModel):
)
class ModelWithProviderEntity(ProviderModel):
class ProviderModelWithStatusEntity(ProviderModel):
"""
Model class for model response.
"""
status: ModelStatus
load_balancing_enabled: bool = False
class ModelWithProviderEntity(ProviderModelWithStatusEntity):
"""
Model with provider entity.
"""
provider: SimpleModelProviderEntity
status: ModelStatus
class DefaultModelProviderEntity(BaseModel):

View File

@ -1,6 +1,7 @@
import datetime
import json
import logging
from collections import defaultdict
from collections.abc import Iterator
from json import JSONDecodeError
from typing import Optional
@ -8,7 +9,12 @@ from typing import Optional
from pydantic import BaseModel
from core.entities.model_entities import ModelStatus, ModelWithProviderEntity, SimpleModelProviderEntity
from core.entities.provider_entities import CustomConfiguration, SystemConfiguration, SystemConfigurationStatus
from core.entities.provider_entities import (
CustomConfiguration,
ModelSettings,
SystemConfiguration,
SystemConfigurationStatus,
)
from core.helper import encrypter
from core.helper.model_provider_cache import ProviderCredentialsCache, ProviderCredentialsCacheType
from core.model_runtime.entities.model_entities import FetchFrom, ModelType
@ -22,7 +28,14 @@ from core.model_runtime.model_providers import model_provider_factory
from core.model_runtime.model_providers.__base.ai_model import AIModel
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
from extensions.ext_database import db
from models.provider import Provider, ProviderModel, ProviderType, TenantPreferredModelProvider
from models.provider import (
LoadBalancingModelConfig,
Provider,
ProviderModel,
ProviderModelSetting,
ProviderType,
TenantPreferredModelProvider,
)
logger = logging.getLogger(__name__)
@ -39,6 +52,7 @@ class ProviderConfiguration(BaseModel):
using_provider_type: ProviderType
system_configuration: SystemConfiguration
custom_configuration: CustomConfiguration
model_settings: list[ModelSettings]
def __init__(self, **data):
super().__init__(**data)
@ -62,6 +76,14 @@ class ProviderConfiguration(BaseModel):
:param model: model name
:return:
"""
if self.model_settings:
# check if model is disabled by admin
for model_setting in self.model_settings:
if (model_setting.model_type == model_type
and model_setting.model == model):
if not model_setting.enabled:
raise ValueError(f'Model {model} is disabled.')
if self.using_provider_type == ProviderType.SYSTEM:
restrict_models = []
for quota_configuration in self.system_configuration.quota_configurations:
@ -80,15 +102,17 @@ class ProviderConfiguration(BaseModel):
return copy_credentials
else:
credentials = None
if self.custom_configuration.models:
for model_configuration in self.custom_configuration.models:
if model_configuration.model_type == model_type and model_configuration.model == model:
return model_configuration.credentials
credentials = model_configuration.credentials
break
if self.custom_configuration.provider:
return self.custom_configuration.provider.credentials
else:
return None
credentials = self.custom_configuration.provider.credentials
return credentials
def get_system_configuration_status(self) -> SystemConfigurationStatus:
"""
@ -130,7 +154,7 @@ class ProviderConfiguration(BaseModel):
return credentials
# Obfuscate credentials
return self._obfuscated_credentials(
return self.obfuscated_credentials(
credentials=credentials,
credential_form_schemas=self.provider.provider_credential_schema.credential_form_schemas
if self.provider.provider_credential_schema else []
@ -151,7 +175,7 @@ class ProviderConfiguration(BaseModel):
).first()
# Get provider credential secret variables
provider_credential_secret_variables = self._extract_secret_variables(
provider_credential_secret_variables = self.extract_secret_variables(
self.provider.provider_credential_schema.credential_form_schemas
if self.provider.provider_credential_schema else []
)
@ -274,7 +298,7 @@ class ProviderConfiguration(BaseModel):
return credentials
# Obfuscate credentials
return self._obfuscated_credentials(
return self.obfuscated_credentials(
credentials=credentials,
credential_form_schemas=self.provider.model_credential_schema.credential_form_schemas
if self.provider.model_credential_schema else []
@ -302,7 +326,7 @@ class ProviderConfiguration(BaseModel):
).first()
# Get provider credential secret variables
provider_credential_secret_variables = self._extract_secret_variables(
provider_credential_secret_variables = self.extract_secret_variables(
self.provider.model_credential_schema.credential_form_schemas
if self.provider.model_credential_schema else []
)
@ -402,6 +426,160 @@ class ProviderConfiguration(BaseModel):
provider_model_credentials_cache.delete()
def enable_model(self, model_type: ModelType, model: str) -> ProviderModelSetting:
"""
Enable model.
:param model_type: model type
:param model: model name
:return:
"""
model_setting = db.session.query(ProviderModelSetting) \
.filter(
ProviderModelSetting.tenant_id == self.tenant_id,
ProviderModelSetting.provider_name == self.provider.provider,
ProviderModelSetting.model_type == model_type.to_origin_model_type(),
ProviderModelSetting.model_name == model
).first()
if model_setting:
model_setting.enabled = True
model_setting.updated_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
db.session.commit()
else:
model_setting = ProviderModelSetting(
tenant_id=self.tenant_id,
provider_name=self.provider.provider,
model_type=model_type.to_origin_model_type(),
model_name=model,
enabled=True
)
db.session.add(model_setting)
db.session.commit()
return model_setting
def disable_model(self, model_type: ModelType, model: str) -> ProviderModelSetting:
"""
Disable model.
:param model_type: model type
:param model: model name
:return:
"""
model_setting = db.session.query(ProviderModelSetting) \
.filter(
ProviderModelSetting.tenant_id == self.tenant_id,
ProviderModelSetting.provider_name == self.provider.provider,
ProviderModelSetting.model_type == model_type.to_origin_model_type(),
ProviderModelSetting.model_name == model
).first()
if model_setting:
model_setting.enabled = False
model_setting.updated_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
db.session.commit()
else:
model_setting = ProviderModelSetting(
tenant_id=self.tenant_id,
provider_name=self.provider.provider,
model_type=model_type.to_origin_model_type(),
model_name=model,
enabled=False
)
db.session.add(model_setting)
db.session.commit()
return model_setting
def get_provider_model_setting(self, model_type: ModelType, model: str) -> Optional[ProviderModelSetting]:
"""
Get provider model setting.
:param model_type: model type
:param model: model name
:return:
"""
return db.session.query(ProviderModelSetting) \
.filter(
ProviderModelSetting.tenant_id == self.tenant_id,
ProviderModelSetting.provider_name == self.provider.provider,
ProviderModelSetting.model_type == model_type.to_origin_model_type(),
ProviderModelSetting.model_name == model
).first()
def enable_model_load_balancing(self, model_type: ModelType, model: str) -> ProviderModelSetting:
"""
Enable model load balancing.
:param model_type: model type
:param model: model name
:return:
"""
load_balancing_config_count = db.session.query(LoadBalancingModelConfig) \
.filter(
LoadBalancingModelConfig.tenant_id == self.tenant_id,
LoadBalancingModelConfig.provider_name == self.provider.provider,
LoadBalancingModelConfig.model_type == model_type.to_origin_model_type(),
LoadBalancingModelConfig.model_name == model
).count()
if load_balancing_config_count <= 1:
raise ValueError('Model load balancing configuration must be more than 1.')
model_setting = db.session.query(ProviderModelSetting) \
.filter(
ProviderModelSetting.tenant_id == self.tenant_id,
ProviderModelSetting.provider_name == self.provider.provider,
ProviderModelSetting.model_type == model_type.to_origin_model_type(),
ProviderModelSetting.model_name == model
).first()
if model_setting:
model_setting.load_balancing_enabled = True
model_setting.updated_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
db.session.commit()
else:
model_setting = ProviderModelSetting(
tenant_id=self.tenant_id,
provider_name=self.provider.provider,
model_type=model_type.to_origin_model_type(),
model_name=model,
load_balancing_enabled=True
)
db.session.add(model_setting)
db.session.commit()
return model_setting
def disable_model_load_balancing(self, model_type: ModelType, model: str) -> ProviderModelSetting:
"""
Disable model load balancing.
:param model_type: model type
:param model: model name
:return:
"""
model_setting = db.session.query(ProviderModelSetting) \
.filter(
ProviderModelSetting.tenant_id == self.tenant_id,
ProviderModelSetting.provider_name == self.provider.provider,
ProviderModelSetting.model_type == model_type.to_origin_model_type(),
ProviderModelSetting.model_name == model
).first()
if model_setting:
model_setting.load_balancing_enabled = False
model_setting.updated_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
db.session.commit()
else:
model_setting = ProviderModelSetting(
tenant_id=self.tenant_id,
provider_name=self.provider.provider,
model_type=model_type.to_origin_model_type(),
model_name=model,
load_balancing_enabled=False
)
db.session.add(model_setting)
db.session.commit()
return model_setting
def get_provider_instance(self) -> ModelProvider:
"""
Get provider instance.
@ -453,7 +631,7 @@ class ProviderConfiguration(BaseModel):
db.session.commit()
def _extract_secret_variables(self, credential_form_schemas: list[CredentialFormSchema]) -> list[str]:
def extract_secret_variables(self, credential_form_schemas: list[CredentialFormSchema]) -> list[str]:
"""
Extract secret input form variables.
@ -467,7 +645,7 @@ class ProviderConfiguration(BaseModel):
return secret_input_form_variables
def _obfuscated_credentials(self, credentials: dict, credential_form_schemas: list[CredentialFormSchema]) -> dict:
def obfuscated_credentials(self, credentials: dict, credential_form_schemas: list[CredentialFormSchema]) -> dict:
"""
Obfuscated credentials.
@ -476,7 +654,7 @@ class ProviderConfiguration(BaseModel):
:return:
"""
# Get provider credential secret variables
credential_secret_variables = self._extract_secret_variables(
credential_secret_variables = self.extract_secret_variables(
credential_form_schemas
)
@ -522,15 +700,22 @@ class ProviderConfiguration(BaseModel):
else:
model_types = provider_instance.get_provider_schema().supported_model_types
# Group model settings by model type and model
model_setting_map = defaultdict(dict)
for model_setting in self.model_settings:
model_setting_map[model_setting.model_type][model_setting.model] = model_setting
if self.using_provider_type == ProviderType.SYSTEM:
provider_models = self._get_system_provider_models(
model_types=model_types,
provider_instance=provider_instance
provider_instance=provider_instance,
model_setting_map=model_setting_map
)
else:
provider_models = self._get_custom_provider_models(
model_types=model_types,
provider_instance=provider_instance
provider_instance=provider_instance,
model_setting_map=model_setting_map
)
if only_active:
@ -541,18 +726,27 @@ class ProviderConfiguration(BaseModel):
def _get_system_provider_models(self,
model_types: list[ModelType],
provider_instance: ModelProvider) -> list[ModelWithProviderEntity]:
provider_instance: ModelProvider,
model_setting_map: dict[ModelType, dict[str, ModelSettings]]) \
-> list[ModelWithProviderEntity]:
"""
Get system provider models.
:param model_types: model types
:param provider_instance: provider instance
:param model_setting_map: model setting map
:return:
"""
provider_models = []
for model_type in model_types:
provider_models.extend(
[
for m in provider_instance.models(model_type):
status = ModelStatus.ACTIVE
if m.model_type in model_setting_map and m.model in model_setting_map[m.model_type]:
model_setting = model_setting_map[m.model_type][m.model]
if model_setting.enabled is False:
status = ModelStatus.DISABLED
provider_models.append(
ModelWithProviderEntity(
model=m.model,
label=m.label,
@ -562,11 +756,9 @@ class ProviderConfiguration(BaseModel):
model_properties=m.model_properties,
deprecated=m.deprecated,
provider=SimpleModelProviderEntity(self.provider),
status=ModelStatus.ACTIVE
status=status
)
for m in provider_instance.models(model_type)
]
)
)
if self.provider.provider not in original_provider_configurate_methods:
original_provider_configurate_methods[self.provider.provider] = []
@ -586,7 +778,8 @@ class ProviderConfiguration(BaseModel):
break
if should_use_custom_model:
if original_provider_configurate_methods[self.provider.provider] == [ConfigurateMethod.CUSTOMIZABLE_MODEL]:
if original_provider_configurate_methods[self.provider.provider] == [
ConfigurateMethod.CUSTOMIZABLE_MODEL]:
# only customizable model
for restrict_model in restrict_models:
copy_credentials = self.system_configuration.credentials.copy()
@ -611,6 +804,13 @@ class ProviderConfiguration(BaseModel):
if custom_model_schema.model_type not in model_types:
continue
status = ModelStatus.ACTIVE
if (custom_model_schema.model_type in model_setting_map
and custom_model_schema.model in model_setting_map[custom_model_schema.model_type]):
model_setting = model_setting_map[custom_model_schema.model_type][custom_model_schema.model]
if model_setting.enabled is False:
status = ModelStatus.DISABLED
provider_models.append(
ModelWithProviderEntity(
model=custom_model_schema.model,
@ -621,7 +821,7 @@ class ProviderConfiguration(BaseModel):
model_properties=custom_model_schema.model_properties,
deprecated=custom_model_schema.deprecated,
provider=SimpleModelProviderEntity(self.provider),
status=ModelStatus.ACTIVE
status=status
)
)
@ -632,16 +832,20 @@ class ProviderConfiguration(BaseModel):
m.status = ModelStatus.NO_PERMISSION
elif not quota_configuration.is_valid:
m.status = ModelStatus.QUOTA_EXCEEDED
return provider_models
def _get_custom_provider_models(self,
model_types: list[ModelType],
provider_instance: ModelProvider) -> list[ModelWithProviderEntity]:
provider_instance: ModelProvider,
model_setting_map: dict[ModelType, dict[str, ModelSettings]]) \
-> list[ModelWithProviderEntity]:
"""
Get custom provider models.
:param model_types: model types
:param provider_instance: provider instance
:param model_setting_map: model setting map
:return:
"""
provider_models = []
@ -656,6 +860,16 @@ class ProviderConfiguration(BaseModel):
models = provider_instance.models(model_type)
for m in models:
status = ModelStatus.ACTIVE if credentials else ModelStatus.NO_CONFIGURE
load_balancing_enabled = False
if m.model_type in model_setting_map and m.model in model_setting_map[m.model_type]:
model_setting = model_setting_map[m.model_type][m.model]
if model_setting.enabled is False:
status = ModelStatus.DISABLED
if len(model_setting.load_balancing_configs) > 1:
load_balancing_enabled = True
provider_models.append(
ModelWithProviderEntity(
model=m.model,
@ -666,7 +880,8 @@ class ProviderConfiguration(BaseModel):
model_properties=m.model_properties,
deprecated=m.deprecated,
provider=SimpleModelProviderEntity(self.provider),
status=ModelStatus.ACTIVE if credentials else ModelStatus.NO_CONFIGURE
status=status,
load_balancing_enabled=load_balancing_enabled
)
)
@ -690,6 +905,17 @@ class ProviderConfiguration(BaseModel):
if not custom_model_schema:
continue
status = ModelStatus.ACTIVE
load_balancing_enabled = False
if (custom_model_schema.model_type in model_setting_map
and custom_model_schema.model in model_setting_map[custom_model_schema.model_type]):
model_setting = model_setting_map[custom_model_schema.model_type][custom_model_schema.model]
if model_setting.enabled is False:
status = ModelStatus.DISABLED
if len(model_setting.load_balancing_configs) > 1:
load_balancing_enabled = True
provider_models.append(
ModelWithProviderEntity(
model=custom_model_schema.model,
@ -700,7 +926,8 @@ class ProviderConfiguration(BaseModel):
model_properties=custom_model_schema.model_properties,
deprecated=custom_model_schema.deprecated,
provider=SimpleModelProviderEntity(self.provider),
status=ModelStatus.ACTIVE
status=status,
load_balancing_enabled=load_balancing_enabled
)
)

View File

@ -72,3 +72,22 @@ class CustomConfiguration(BaseModel):
"""
provider: Optional[CustomProviderConfiguration] = None
models: list[CustomModelConfiguration] = []
class ModelLoadBalancingConfiguration(BaseModel):
"""
Class for model load balancing configuration.
"""
id: str
name: str
credentials: dict
class ModelSettings(BaseModel):
"""
Model class for model settings.
"""
model: str
model_type: ModelType
enabled: bool = True
load_balancing_configs: list[ModelLoadBalancingConfiguration] = []

View File

@ -7,7 +7,7 @@ from typing import Any, Optional
from pydantic import BaseModel
from core.utils.position_helper import sort_to_dict_by_position_map
from core.helper.position_helper import sort_to_dict_by_position_map
class ExtensionModule(enum.Enum):

View File

@ -42,6 +42,8 @@ class MessageFileParser:
raise ValueError('Invalid file url')
if file.get('transfer_method') == FileTransferMethod.LOCAL_FILE.value and not file.get('upload_file_id'):
raise ValueError('Missing file upload_file_id')
if file.get('transform_method') == FileTransferMethod.TOOL_FILE.value and not file.get('tool_file_id'):
raise ValueError('Missing file tool_file_id')
# transform files to file objs
type_file_objs = self._to_file_objs(files, file_extra_config)
@ -149,12 +151,21 @@ class MessageFileParser:
"""
if isinstance(file, dict):
transfer_method = FileTransferMethod.value_of(file.get('transfer_method'))
if transfer_method != FileTransferMethod.TOOL_FILE:
return FileVar(
tenant_id=self.tenant_id,
type=FileType.value_of(file.get('type')),
transfer_method=transfer_method,
url=file.get('url') if transfer_method == FileTransferMethod.REMOTE_URL else None,
related_id=file.get('upload_file_id') if transfer_method == FileTransferMethod.LOCAL_FILE else None,
extra_config=file_extra_config
)
return FileVar(
tenant_id=self.tenant_id,
type=FileType.value_of(file.get('type')),
transfer_method=transfer_method,
url=file.get('url') if transfer_method == FileTransferMethod.REMOTE_URL else None,
related_id=file.get('upload_file_id') if transfer_method == FileTransferMethod.LOCAL_FILE else None,
url=None,
related_id=file.get('tool_file_id'),
extra_config=file_extra_config
)
else:

View File

@ -77,4 +77,4 @@ class UploadFileParser:
return False
current_time = int(time.time())
return current_time - int(timestamp) <= 300 # expired after 5 minutes
return current_time - int(timestamp) <= current_app.config.get('FILES_ACCESS_TIMEOUT')

View File

@ -1,13 +1,21 @@
import logging
import time
from enum import Enum
from threading import Lock
from typing import Literal, Optional
from httpx import post
from httpx import get, post
from pydantic import BaseModel
from yarl import URL
from config import get_env
from core.helper.code_executor.javascript_transformer import NodeJsTemplateTransformer
from core.helper.code_executor.jinja2_transformer import Jinja2TemplateTransformer
from core.helper.code_executor.python_transformer import PythonTemplateTransformer
from core.helper.code_executor.entities import CodeDependency
from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer
from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer
from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
from core.helper.code_executor.template_transformer import TemplateTransformer
logger = logging.getLogger(__name__)
# Code Executor
CODE_EXECUTION_ENDPOINT = get_env('CODE_EXECUTION_ENDPOINT')
@ -28,9 +36,38 @@ class CodeExecutionResponse(BaseModel):
data: Data
class CodeLanguage(str, Enum):
PYTHON3 = 'python3'
JINJA2 = 'jinja2'
JAVASCRIPT = 'javascript'
class CodeExecutor:
dependencies_cache = {}
dependencies_cache_lock = Lock()
code_template_transformers: dict[CodeLanguage, type[TemplateTransformer]] = {
CodeLanguage.PYTHON3: Python3TemplateTransformer,
CodeLanguage.JINJA2: Jinja2TemplateTransformer,
CodeLanguage.JAVASCRIPT: NodeJsTemplateTransformer,
}
code_language_to_running_language = {
CodeLanguage.JAVASCRIPT: 'nodejs',
CodeLanguage.JINJA2: CodeLanguage.PYTHON3,
CodeLanguage.PYTHON3: CodeLanguage.PYTHON3,
}
supported_dependencies_languages: set[CodeLanguage] = {
CodeLanguage.PYTHON3
}
@classmethod
def execute_code(cls, language: Literal['python3', 'javascript', 'jinja2'], preload: str, code: str) -> str:
def execute_code(cls,
language: Literal['python3', 'javascript', 'jinja2'],
preload: str,
code: str,
dependencies: Optional[list[CodeDependency]] = None) -> str:
"""
Execute code
:param language: code language
@ -44,13 +81,15 @@ class CodeExecutor:
}
data = {
'language': 'python3' if language == 'jinja2' else
'nodejs' if language == 'javascript' else
'python3' if language == 'python3' else None,
'language': cls.code_language_to_running_language.get(language),
'code': code,
'preload': preload
'preload': preload,
'enable_network': True
}
if dependencies:
data['dependencies'] = [dependency.dict() for dependency in dependencies]
try:
response = post(str(url), json=data, headers=headers, timeout=CODE_EXECUTION_TIMEOUT)
if response.status_code == 503:
@ -60,7 +99,9 @@ class CodeExecutor:
except CodeExecutionException as e:
raise e
except Exception as e:
raise CodeExecutionException('Failed to execute code, this is likely a network issue, please check if the sandbox service is running')
raise CodeExecutionException('Failed to execute code, which is likely a network issue,'
' please check if the sandbox service is running.'
f' ( Error: {str(e)} )')
try:
response = response.json()
@ -78,7 +119,7 @@ class CodeExecutor:
return response.data.stdout
@classmethod
def execute_workflow_code_template(cls, language: Literal['python3', 'javascript', 'jinja2'], code: str, inputs: dict) -> dict:
def execute_workflow_code_template(cls, language: Literal['python3', 'javascript', 'jinja2'], code: str, inputs: dict, dependencies: Optional[list[CodeDependency]] = None) -> dict:
"""
Execute code
:param language: code language
@ -86,21 +127,71 @@ class CodeExecutor:
:param inputs: inputs
:return:
"""
template_transformer = None
if language == 'python3':
template_transformer = PythonTemplateTransformer
elif language == 'jinja2':
template_transformer = Jinja2TemplateTransformer
elif language == 'javascript':
template_transformer = NodeJsTemplateTransformer
else:
raise CodeExecutionException('Unsupported language')
template_transformer = cls.code_template_transformers.get(language)
if not template_transformer:
raise CodeExecutionException(f'Unsupported language {language}')
runner, preload = template_transformer.transform_caller(code, inputs)
runner, preload, dependencies = template_transformer.transform_caller(code, inputs, dependencies)
try:
response = cls.execute_code(language, preload, runner)
response = cls.execute_code(language, preload, runner, dependencies)
except CodeExecutionException as e:
raise e
return template_transformer.transform_response(response)
return template_transformer.transform_response(response)
@classmethod
def list_dependencies(cls, language: str) -> list[CodeDependency]:
if language not in cls.supported_dependencies_languages:
return []
with cls.dependencies_cache_lock:
if language in cls.dependencies_cache:
# check expiration
dependencies = cls.dependencies_cache[language]
if dependencies['expiration'] > time.time():
return dependencies['data']
# remove expired cache
del cls.dependencies_cache[language]
dependencies = cls._get_dependencies(language)
with cls.dependencies_cache_lock:
cls.dependencies_cache[language] = {
'data': dependencies,
'expiration': time.time() + 60
}
return dependencies
@classmethod
def _get_dependencies(cls, language: Literal['python3']) -> list[CodeDependency]:
"""
List dependencies
"""
url = URL(CODE_EXECUTION_ENDPOINT) / 'v1' / 'sandbox' / 'dependencies'
headers = {
'X-Api-Key': CODE_EXECUTION_API_KEY
}
running_language = cls.code_language_to_running_language.get(language)
if isinstance(running_language, Enum):
running_language = running_language.value
data = {
'language': running_language,
}
try:
response = get(str(url), params=data, headers=headers, timeout=CODE_EXECUTION_TIMEOUT)
if response.status_code != 200:
raise Exception(f'Failed to list dependencies, got status code {response.status_code}, please check if the sandbox service is running')
response = response.json()
dependencies = response.get('data', {}).get('dependencies', [])
return [
CodeDependency(**dependency) for dependency in dependencies
if dependency.get('name') not in Python3TemplateTransformer.get_standard_packages()
]
except Exception as e:
logger.exception(f'Failed to list dependencies: {e}')
return []

View File

@ -0,0 +1,55 @@
from abc import abstractmethod
from pydantic import BaseModel
from core.helper.code_executor.code_executor import CodeExecutor
class CodeNodeProvider(BaseModel):
@staticmethod
@abstractmethod
def get_language() -> str:
pass
@classmethod
def is_accept_language(cls, language: str) -> bool:
return language == cls.get_language()
@classmethod
@abstractmethod
def get_default_code(cls) -> str:
"""
get default code in specific programming language for the code node
"""
pass
@classmethod
def get_default_available_packages(cls) -> list[dict]:
return [p.dict() for p in CodeExecutor.list_dependencies(cls.get_language())]
@classmethod
def get_default_config(cls) -> dict:
return {
"type": "code",
"config": {
"variables": [
{
"variable": "arg1",
"value_selector": []
},
{
"variable": "arg2",
"value_selector": []
}
],
"code_language": cls.get_language(),
"code": cls.get_default_code(),
"outputs": {
"result": {
"type": "string",
"children": None
}
}
},
"available_dependencies": cls.get_default_available_packages(),
}

View File

@ -0,0 +1,6 @@
from pydantic import BaseModel
class CodeDependency(BaseModel):
name: str
version: str

View File

@ -0,0 +1,21 @@
from textwrap import dedent
from core.helper.code_executor.code_executor import CodeLanguage
from core.helper.code_executor.code_node_provider import CodeNodeProvider
class JavascriptCodeProvider(CodeNodeProvider):
@staticmethod
def get_language() -> str:
return CodeLanguage.JAVASCRIPT
@classmethod
def get_default_code(cls) -> str:
return dedent(
"""
function main({arg1, arg2}) {
return {
result: arg1 + arg2
}
}
""")

View File

@ -0,0 +1,25 @@
from textwrap import dedent
from core.helper.code_executor.template_transformer import TemplateTransformer
class NodeJsTemplateTransformer(TemplateTransformer):
@classmethod
def get_runner_script(cls) -> str:
runner_script = dedent(
f"""
// declare main function
{cls._code_placeholder}
// decode and prepare input object
var inputs_obj = JSON.parse(Buffer.from('{cls._inputs_placeholder}', 'base64').toString('utf-8'))
// execute main function
var output_obj = main(inputs_obj)
// convert output to json and print
var output_json = JSON.stringify(output_obj)
var result = `<<RESULT>>${{output_json}}<<RESULT>>`
console.log(result)
""")
return runner_script

View File

@ -1,54 +0,0 @@
import json
import re
from core.helper.code_executor.template_transformer import TemplateTransformer
NODEJS_RUNNER = """// declare main function here
{{code}}
// execute main function, and return the result
// inputs is a dict, unstructured inputs
output = main({{inputs}})
// convert output to json and print
output = JSON.stringify(output)
result = `<<RESULT>>${output}<<RESULT>>`
console.log(result)
"""
NODEJS_PRELOAD = """"""
class NodeJsTemplateTransformer(TemplateTransformer):
@classmethod
def transform_caller(cls, code: str, inputs: dict) -> tuple[str, str]:
"""
Transform code to python runner
:param code: code
:param inputs: inputs
:return:
"""
# transform inputs to json string
inputs_str = json.dumps(inputs, indent=4, ensure_ascii=False)
# replace code and inputs
runner = NODEJS_RUNNER.replace('{{code}}', code)
runner = runner.replace('{{inputs}}', inputs_str)
return runner, NODEJS_PRELOAD
@classmethod
def transform_response(cls, response: str) -> dict:
"""
Transform response to dict
:param response: response
:return:
"""
# extract result
result = re.search(r'<<RESULT>>(.*)<<RESULT>>', response, re.DOTALL)
if not result:
raise ValueError('Failed to parse result')
result = result.group(1)
return json.loads(result)

View File

@ -0,0 +1,17 @@
from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
class Jinja2Formatter:
@classmethod
def format(cls, template: str, inputs: str) -> str:
"""
Format template
:param template: template
:param inputs: inputs
:return:
"""
result = CodeExecutor.execute_workflow_code_template(
language=CodeLanguage.JINJA2, code=template, inputs=inputs
)
return result['result']

View File

@ -0,0 +1,64 @@
from textwrap import dedent
from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
from core.helper.code_executor.template_transformer import TemplateTransformer
class Jinja2TemplateTransformer(TemplateTransformer):
@classmethod
def get_standard_packages(cls) -> set[str]:
return {'jinja2'} | Python3TemplateTransformer.get_standard_packages()
@classmethod
def transform_response(cls, response: str) -> dict:
"""
Transform response to dict
:param response: response
:return:
"""
return {
'result': cls.extract_result_str_from_response(response)
}
@classmethod
def get_runner_script(cls) -> str:
runner_script = dedent(f"""
# declare main function
def main(**inputs):
import jinja2
template = jinja2.Template('''{cls._code_placeholder}''')
return template.render(**inputs)
import json
from base64 import b64decode
# decode and prepare input dict
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
# execute main function
output = main(**inputs_obj)
# convert output and print
result = f'''<<RESULT>>{{output}}<<RESULT>>'''
print(result)
""")
return runner_script
@classmethod
def get_preload_script(cls) -> str:
preload_script = dedent("""
import jinja2
from base64 import b64decode
def _jinja2_preload_():
# prepare jinja2 environment, load template and render before to avoid sandbox issue
template = jinja2.Template('{{s}}')
template.render(s='a')
if __name__ == '__main__':
_jinja2_preload_()
""")
return preload_script

View File

@ -1,92 +0,0 @@
import json
import re
from base64 import b64encode
from core.helper.code_executor.template_transformer import TemplateTransformer
PYTHON_RUNNER = """
import jinja2
from json import loads
from base64 import b64decode
template = jinja2.Template('''{{code}}''')
def main(**inputs):
return template.render(**inputs)
# execute main function, and return the result
inputs = b64decode('{{inputs}}').decode('utf-8')
output = main(**loads(inputs))
result = f'''<<RESULT>>{output}<<RESULT>>'''
print(result)
"""
JINJA2_PRELOAD_TEMPLATE = """{% set fruits = ['Apple'] %}
{{ 'a' }}
{% for fruit in fruits %}
<li>{{ fruit }}</li>
{% endfor %}
{% if fruits|length > 1 %}
1
{% endif %}
{% for i in range(5) %}
{% if i == 3 %}{{ i }}{% else %}{% endif %}
{% endfor %}
{% for i in range(3) %}
{{ i + 1 }}
{% endfor %}
{% macro say_hello() %}a{{ 'b' }}{% endmacro %}
{{ s }}{{ say_hello() }}"""
JINJA2_PRELOAD = f"""
import jinja2
from base64 import b64decode
def _jinja2_preload_():
# prepare jinja2 environment, load template and render before to avoid sandbox issue
template = jinja2.Template('''{JINJA2_PRELOAD_TEMPLATE}''')
template.render(s='a')
if __name__ == '__main__':
_jinja2_preload_()
"""
class Jinja2TemplateTransformer(TemplateTransformer):
@classmethod
def transform_caller(cls, code: str, inputs: dict) -> tuple[str, str]:
"""
Transform code to python runner
:param code: code
:param inputs: inputs
:return:
"""
inputs_str = b64encode(json.dumps(inputs, ensure_ascii=False).encode()).decode('utf-8')
# transform jinja2 template to python code
runner = PYTHON_RUNNER.replace('{{code}}', code)
runner = runner.replace('{{inputs}}', inputs_str)
return runner, JINJA2_PRELOAD
@classmethod
def transform_response(cls, response: str) -> dict:
"""
Transform response to dict
:param response: response
:return:
"""
# extract result
result = re.search(r'<<RESULT>>(.*)<<RESULT>>', response, re.DOTALL)
if not result:
raise ValueError('Failed to parse result')
result = result.group(1)
return {
'result': result
}

View File

@ -0,0 +1,20 @@
from textwrap import dedent
from core.helper.code_executor.code_executor import CodeLanguage
from core.helper.code_executor.code_node_provider import CodeNodeProvider
class Python3CodeProvider(CodeNodeProvider):
@staticmethod
def get_language() -> str:
return CodeLanguage.PYTHON3
@classmethod
def get_default_code(cls) -> str:
return dedent(
"""
def main(arg1: int, arg2: int) -> dict:
return {
"result": arg1 + arg2,
}
""")

View File

@ -0,0 +1,51 @@
from textwrap import dedent
from core.helper.code_executor.template_transformer import TemplateTransformer
class Python3TemplateTransformer(TemplateTransformer):
@classmethod
def get_standard_packages(cls) -> set[str]:
return {
'base64',
'binascii',
'collections',
'datetime',
'functools',
'hashlib',
'hmac',
'itertools',
'json',
'math',
'operator',
'os',
'random',
're',
'string',
'sys',
'time',
'traceback',
'uuid',
}
@classmethod
def get_runner_script(cls) -> str:
runner_script = dedent(f"""
# declare main function
{cls._code_placeholder}
import json
from base64 import b64decode
# decode and prepare input dict
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
# execute main function
output_obj = main(**inputs_obj)
# convert output to json and print
output_json = json.dumps(output_obj, indent=4)
result = f'''<<RESULT>>{{output_json}}<<RESULT>>'''
print(result)
""")
return runner_script

View File

@ -1,82 +0,0 @@
import json
import re
from base64 import b64encode
from core.helper.code_executor.template_transformer import TemplateTransformer
PYTHON_RUNNER = """# declare main function here
{{code}}
from json import loads, dumps
from base64 import b64decode
# execute main function, and return the result
# inputs is a dict, and it
inputs = b64decode('{{inputs}}').decode('utf-8')
output = main(**json.loads(inputs))
# convert output to json and print
output = dumps(output, indent=4)
result = f'''<<RESULT>>
{output}
<<RESULT>>'''
print(result)
"""
PYTHON_PRELOAD = """
# prepare general imports
import json
import datetime
import math
import random
import re
import string
import sys
import time
import traceback
import uuid
import os
import base64
import hashlib
import hmac
import binascii
import collections
import functools
import operator
import itertools
"""
class PythonTemplateTransformer(TemplateTransformer):
@classmethod
def transform_caller(cls, code: str, inputs: dict) -> tuple[str, str]:
"""
Transform code to python runner
:param code: code
:param inputs: inputs
:return:
"""
# transform inputs to json string
inputs_str = b64encode(json.dumps(inputs, ensure_ascii=False).encode()).decode('utf-8')
# replace code and inputs
runner = PYTHON_RUNNER.replace('{{code}}', code)
runner = runner.replace('{{inputs}}', inputs_str)
return runner, PYTHON_PRELOAD
@classmethod
def transform_response(cls, response: str) -> dict:
"""
Transform response to dict
:param response: response
:return:
"""
# extract result
result = re.search(r'<<RESULT>>(.*?)<<RESULT>>', response, re.DOTALL)
if not result:
raise ValueError('Failed to parse result')
result = result.group(1)
return json.loads(result)

View File

@ -1,24 +1,87 @@
import json
import re
from abc import ABC, abstractmethod
from base64 import b64encode
from typing import Optional
from pydantic import BaseModel
from core.helper.code_executor.entities import CodeDependency
class TemplateTransformer(ABC):
class TemplateTransformer(ABC, BaseModel):
_code_placeholder: str = '{{code}}'
_inputs_placeholder: str = '{{inputs}}'
_result_tag: str = '<<RESULT>>'
@classmethod
@abstractmethod
def transform_caller(cls, code: str, inputs: dict) -> tuple[str, str]:
def get_standard_packages(cls) -> set[str]:
return set()
@classmethod
def transform_caller(cls, code: str, inputs: dict,
dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]:
"""
Transform code to python runner
:param code: code
:param inputs: inputs
:return: runner, preload
"""
pass
runner_script = cls.assemble_runner_script(code, inputs)
preload_script = cls.get_preload_script()
packages = dependencies or []
standard_packages = cls.get_standard_packages()
for package in standard_packages:
if package not in packages:
packages.append(CodeDependency(name=package, version=''))
packages = list({dep.name: dep for dep in packages if dep.name}.values())
return runner_script, preload_script, packages
@classmethod
def extract_result_str_from_response(cls, response: str) -> str:
result = re.search(rf'{cls._result_tag}(.*){cls._result_tag}', response, re.DOTALL)
if not result:
raise ValueError('Failed to parse result')
result = result.group(1)
return result
@classmethod
@abstractmethod
def transform_response(cls, response: str) -> dict:
"""
Transform response to dict
:param response: response
:return:
"""
pass
return json.loads(cls.extract_result_str_from_response(response))
@classmethod
@abstractmethod
def get_runner_script(cls) -> str:
"""
Get runner script
"""
pass
@classmethod
def serialize_inputs(cls, inputs: dict) -> str:
inputs_json_str = json.dumps(inputs, ensure_ascii=False).encode()
input_base64_encoded = b64encode(inputs_json_str).decode('utf-8')
return input_base64_encoded
@classmethod
def assemble_runner_script(cls, code: str, inputs: dict) -> str:
# assemble runner script
script = cls.get_runner_script()
script = script.replace(cls._code_placeholder, code)
inputs_str = cls.serialize_inputs(inputs)
script = script.replace(cls._inputs_placeholder, inputs_str)
return script
@classmethod
def get_preload_script(cls) -> str:
"""
Get preload script
"""
return ''

View File

@ -9,6 +9,7 @@ from extensions.ext_redis import redis_client
class ProviderCredentialsCacheType(Enum):
PROVIDER = "provider"
MODEL = "provider_model"
LOAD_BALANCING_MODEL = "load_balancing_provider_model"
class ProviderCredentialsCache:

View File

@ -1,10 +1,9 @@
import logging
import os
from collections import OrderedDict
from collections.abc import Callable
from typing import Any, AnyStr
import yaml
from core.tools.utils.yaml_utils import load_yaml_file
def get_position_map(
@ -17,21 +16,15 @@ def get_position_map(
:param file_name: the YAML file name, default to '_position.yaml'
:return: a dict with name as key and index as value
"""
try:
position_file_name = os.path.join(folder_path, file_name)
if not os.path.exists(position_file_name):
return {}
with open(position_file_name, encoding='utf-8') as f:
positions = yaml.safe_load(f)
position_map = {}
for index, name in enumerate(positions):
if name and isinstance(name, str):
position_map[name.strip()] = index
return position_map
except:
logging.warning(f'Failed to load the YAML position file {folder_path}/{file_name}.')
return {}
position_file_name = os.path.join(folder_path, file_name)
positions = load_yaml_file(position_file_name, ignore_error=True)
position_map = {}
index = 0
for _, name in enumerate(positions):
if name and isinstance(name, str):
position_map[name.strip()] = index
index += 1
return position_map
def sort_by_position_map(

View File

@ -42,6 +42,20 @@ def delete(url, *args, **kwargs):
if kwargs['follow_redirects']:
kwargs['allow_redirects'] = kwargs['follow_redirects']
kwargs.pop('follow_redirects')
if 'timeout' in kwargs:
timeout = kwargs['timeout']
if timeout is None:
kwargs.pop('timeout')
elif isinstance(timeout, tuple):
# check length of tuple
if len(timeout) == 2:
kwargs['timeout'] = timeout
elif len(timeout) == 1:
kwargs['timeout'] = timeout[0]
elif len(timeout) > 2:
kwargs['timeout'] = (timeout[0], timeout[1])
else:
kwargs['timeout'] = (timeout, timeout)
return _delete(url=url, *args, proxies=requests_proxies, **kwargs)
def head(url, *args, **kwargs):

View File

@ -12,7 +12,6 @@ from flask import Flask, current_app
from flask_login import current_user
from sqlalchemy.orm.exc import ObjectDeletedError
from core.docstore.dataset_docstore import DatasetDocumentStore
from core.errors.error import ProviderTokenNotInitError
from core.llm_generator.llm_generator import LLMGenerator
from core.model_manager import ModelInstance, ModelManager
@ -20,12 +19,16 @@ from core.model_runtime.entities.model_entities import ModelType, PriceType
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
from core.rag.datasource.keyword.keyword_factory import Keyword
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from core.rag.models.document import Document
from core.splitter.fixed_text_splitter import EnhanceRecursiveCharacterTextSplitter, FixedRecursiveCharacterTextSplitter
from core.splitter.text_splitter import TextSplitter
from core.rag.splitter.fixed_text_splitter import (
EnhanceRecursiveCharacterTextSplitter,
FixedRecursiveCharacterTextSplitter,
)
from core.rag.splitter.text_splitter import TextSplitter
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from extensions.ext_storage import storage
@ -283,11 +286,7 @@ class IndexingRunner:
if len(preview_texts) < 5:
preview_texts.append(document.page_content)
if indexing_technique == 'high_quality' or embedding_model_instance:
embedding_model_type_instance = embedding_model_instance.model_type_instance
embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_type_instance)
tokens += embedding_model_type_instance.get_num_tokens(
model=embedding_model_instance.model,
credentials=embedding_model_instance.credentials,
tokens += embedding_model_instance.get_text_embedding_num_tokens(
texts=[self.filter_string(document.page_content)]
)
@ -411,14 +410,15 @@ class IndexingRunner:
# The user-defined segmentation rule
rules = json.loads(processing_rule.rules)
segmentation = rules["segmentation"]
if segmentation["max_tokens"] < 50 or segmentation["max_tokens"] > 1000:
raise ValueError("Custom segment length should be between 50 and 1000.")
max_segmentation_tokens_length = int(current_app.config['INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH'])
if segmentation["max_tokens"] < 50 or segmentation["max_tokens"] > max_segmentation_tokens_length:
raise ValueError(f"Custom segment length should be between 50 and {max_segmentation_tokens_length}.")
separator = segmentation["separator"]
if separator:
separator = separator.replace('\\n', '\n')
if 'chunk_overlap' in segmentation and segmentation['chunk_overlap']:
if segmentation.get('chunk_overlap'):
chunk_overlap = segmentation['chunk_overlap']
else:
chunk_overlap = 0
@ -427,7 +427,7 @@ class IndexingRunner:
chunk_size=segmentation["max_tokens"],
chunk_overlap=chunk_overlap,
fixed_separator=separator,
separators=["\n\n", "", ".", " ", ""],
separators=["\n\n", "", ". ", " ", ""],
embedding_model_instance=embedding_model_instance
)
else:
@ -435,7 +435,7 @@ class IndexingRunner:
character_splitter = EnhanceRecursiveCharacterTextSplitter.from_encoder(
chunk_size=DatasetProcessRule.AUTOMATIC_RULES['segmentation']['max_tokens'],
chunk_overlap=DatasetProcessRule.AUTOMATIC_RULES['segmentation']['chunk_overlap'],
separators=["\n\n", "", ".", " ", ""],
separators=["\n\n", "", ". ", " ", ""],
embedding_model_instance=embedding_model_instance
)
@ -654,10 +654,6 @@ class IndexingRunner:
tokens = 0
chunk_size = 10
embedding_model_type_instance = None
if embedding_model_instance:
embedding_model_type_instance = embedding_model_instance.model_type_instance
embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_type_instance)
# create keyword index
create_keyword_thread = threading.Thread(target=self._process_keyword_index,
args=(current_app._get_current_object(),
@ -670,8 +666,7 @@ class IndexingRunner:
chunk_documents = documents[i:i + chunk_size]
futures.append(executor.submit(self._process_chunk, current_app._get_current_object(), index_processor,
chunk_documents, dataset,
dataset_document, embedding_model_instance,
embedding_model_type_instance))
dataset_document, embedding_model_instance))
for future in futures:
tokens += future.result()
@ -712,7 +707,7 @@ class IndexingRunner:
db.session.commit()
def _process_chunk(self, flask_app, index_processor, chunk_documents, dataset, dataset_document,
embedding_model_instance, embedding_model_type_instance):
embedding_model_instance):
with flask_app.app_context():
# check document is paused
self._check_document_paused_status(dataset_document.id)
@ -720,9 +715,7 @@ class IndexingRunner:
tokens = 0
if dataset.indexing_technique == 'high_quality' or embedding_model_type_instance:
tokens += sum(
embedding_model_type_instance.get_num_tokens(
embedding_model_instance.model,
embedding_model_instance.credentials,
embedding_model_instance.get_text_embedding_num_tokens(
[document.page_content]
)
for document in chunk_documents

View File

@ -9,8 +9,6 @@ from core.model_runtime.entities.message_entities import (
TextPromptMessageContent,
UserPromptMessage,
)
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.model_providers import model_provider_factory
from extensions.ext_database import db
from models.model import AppMode, Conversation, Message
@ -78,12 +76,7 @@ class TokenBufferMemory:
return []
# prune the chat message if it exceeds the max token limit
provider_instance = model_provider_factory.get_provider_instance(self.model_instance.provider)
model_type_instance = provider_instance.get_model_instance(ModelType.LLM)
curr_message_tokens = model_type_instance.get_num_tokens(
self.model_instance.model,
self.model_instance.credentials,
curr_message_tokens = self.model_instance.get_llm_num_tokens(
prompt_messages
)
@ -91,9 +84,7 @@ class TokenBufferMemory:
pruned_memory = []
while curr_message_tokens > max_token_limit and prompt_messages:
pruned_memory.append(prompt_messages.pop(0))
curr_message_tokens = model_type_instance.get_num_tokens(
self.model_instance.model,
self.model_instance.credentials,
curr_message_tokens = self.model_instance.get_llm_num_tokens(
prompt_messages
)

View File

@ -1,7 +1,10 @@
import logging
import os
from collections.abc import Generator
from typing import IO, Optional, Union, cast
from core.entities.provider_configuration import ProviderModelBundle
from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
from core.entities.provider_entities import ModelLoadBalancingConfiguration
from core.errors.error import ProviderTokenNotInitError
from core.model_runtime.callbacks.base_callback import Callback
from core.model_runtime.entities.llm_entities import LLMResult
@ -9,6 +12,7 @@ from core.model_runtime.entities.message_entities import PromptMessage, PromptMe
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.entities.rerank_entities import RerankResult
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeConnectionError, InvokeRateLimitError
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.model_runtime.model_providers.__base.moderation_model import ModerationModel
from core.model_runtime.model_providers.__base.rerank_model import RerankModel
@ -16,6 +20,10 @@ from core.model_runtime.model_providers.__base.speech2text_model import Speech2T
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
from core.model_runtime.model_providers.__base.tts_model import TTSModel
from core.provider_manager import ProviderManager
from extensions.ext_redis import redis_client
from models.provider import ProviderType
logger = logging.getLogger(__name__)
class ModelInstance:
@ -29,6 +37,12 @@ class ModelInstance:
self.provider = provider_model_bundle.configuration.provider.provider
self.credentials = self._fetch_credentials_from_bundle(provider_model_bundle, model)
self.model_type_instance = self.provider_model_bundle.model_type_instance
self.load_balancing_manager = self._get_load_balancing_manager(
configuration=provider_model_bundle.configuration,
model_type=provider_model_bundle.model_type_instance.model_type,
model=model,
credentials=self.credentials
)
def _fetch_credentials_from_bundle(self, provider_model_bundle: ProviderModelBundle, model: str) -> dict:
"""
@ -37,8 +51,10 @@ class ModelInstance:
:param model: model name
:return:
"""
credentials = provider_model_bundle.configuration.get_current_credentials(
model_type=provider_model_bundle.model_type_instance.model_type,
configuration = provider_model_bundle.configuration
model_type = provider_model_bundle.model_type_instance.model_type
credentials = configuration.get_current_credentials(
model_type=model_type,
model=model
)
@ -47,6 +63,43 @@ class ModelInstance:
return credentials
def _get_load_balancing_manager(self, configuration: ProviderConfiguration,
model_type: ModelType,
model: str,
credentials: dict) -> Optional["LBModelManager"]:
"""
Get load balancing model credentials
:param configuration: provider configuration
:param model_type: model type
:param model: model name
:param credentials: model credentials
:return:
"""
if configuration.model_settings and configuration.using_provider_type == ProviderType.CUSTOM:
current_model_setting = None
# check if model is disabled by admin
for model_setting in configuration.model_settings:
if (model_setting.model_type == model_type
and model_setting.model == model):
current_model_setting = model_setting
break
# check if load balancing is enabled
if current_model_setting and current_model_setting.load_balancing_configs:
# use load balancing proxy to choose credentials
lb_model_manager = LBModelManager(
tenant_id=configuration.tenant_id,
provider=configuration.provider.provider,
model_type=model_type,
model=model,
load_balancing_configs=current_model_setting.load_balancing_configs,
managed_credentials=credentials if configuration.custom_configuration.provider else None
)
return lb_model_manager
return None
def invoke_llm(self, prompt_messages: list[PromptMessage], model_parameters: Optional[dict] = None,
tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
stream: bool = True, user: Optional[str] = None, callbacks: list[Callback] = None) \
@ -67,7 +120,8 @@ class ModelInstance:
raise Exception("Model type instance is not LargeLanguageModel")
self.model_type_instance = cast(LargeLanguageModel, self.model_type_instance)
return self.model_type_instance.invoke(
return self._round_robin_invoke(
function=self.model_type_instance.invoke,
model=self.model,
credentials=self.credentials,
prompt_messages=prompt_messages,
@ -79,6 +133,27 @@ class ModelInstance:
callbacks=callbacks
)
def get_llm_num_tokens(self, prompt_messages: list[PromptMessage],
tools: Optional[list[PromptMessageTool]] = None) -> int:
"""
Get number of tokens for llm
:param prompt_messages: prompt messages
:param tools: tools for tool calling
:return:
"""
if not isinstance(self.model_type_instance, LargeLanguageModel):
raise Exception("Model type instance is not LargeLanguageModel")
self.model_type_instance = cast(LargeLanguageModel, self.model_type_instance)
return self._round_robin_invoke(
function=self.model_type_instance.get_num_tokens,
model=self.model,
credentials=self.credentials,
prompt_messages=prompt_messages,
tools=tools
)
def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) \
-> TextEmbeddingResult:
"""
@ -92,13 +167,32 @@ class ModelInstance:
raise Exception("Model type instance is not TextEmbeddingModel")
self.model_type_instance = cast(TextEmbeddingModel, self.model_type_instance)
return self.model_type_instance.invoke(
return self._round_robin_invoke(
function=self.model_type_instance.invoke,
model=self.model,
credentials=self.credentials,
texts=texts,
user=user
)
def get_text_embedding_num_tokens(self, texts: list[str]) -> int:
"""
Get number of tokens for text embedding
:param texts: texts to embed
:return:
"""
if not isinstance(self.model_type_instance, TextEmbeddingModel):
raise Exception("Model type instance is not TextEmbeddingModel")
self.model_type_instance = cast(TextEmbeddingModel, self.model_type_instance)
return self._round_robin_invoke(
function=self.model_type_instance.get_num_tokens,
model=self.model,
credentials=self.credentials,
texts=texts
)
def invoke_rerank(self, query: str, docs: list[str], score_threshold: Optional[float] = None,
top_n: Optional[int] = None,
user: Optional[str] = None) \
@ -117,7 +211,8 @@ class ModelInstance:
raise Exception("Model type instance is not RerankModel")
self.model_type_instance = cast(RerankModel, self.model_type_instance)
return self.model_type_instance.invoke(
return self._round_robin_invoke(
function=self.model_type_instance.invoke,
model=self.model,
credentials=self.credentials,
query=query,
@ -140,7 +235,8 @@ class ModelInstance:
raise Exception("Model type instance is not ModerationModel")
self.model_type_instance = cast(ModerationModel, self.model_type_instance)
return self.model_type_instance.invoke(
return self._round_robin_invoke(
function=self.model_type_instance.invoke,
model=self.model,
credentials=self.credentials,
text=text,
@ -160,7 +256,8 @@ class ModelInstance:
raise Exception("Model type instance is not Speech2TextModel")
self.model_type_instance = cast(Speech2TextModel, self.model_type_instance)
return self.model_type_instance.invoke(
return self._round_robin_invoke(
function=self.model_type_instance.invoke,
model=self.model,
credentials=self.credentials,
file=file,
@ -183,7 +280,8 @@ class ModelInstance:
raise Exception("Model type instance is not TTSModel")
self.model_type_instance = cast(TTSModel, self.model_type_instance)
return self.model_type_instance.invoke(
return self._round_robin_invoke(
function=self.model_type_instance.invoke,
model=self.model,
credentials=self.credentials,
content_text=content_text,
@ -193,6 +291,43 @@ class ModelInstance:
streaming=streaming
)
def _round_robin_invoke(self, function: callable, *args, **kwargs):
"""
Round-robin invoke
:param function: function to invoke
:param args: function args
:param kwargs: function kwargs
:return:
"""
if not self.load_balancing_manager:
return function(*args, **kwargs)
last_exception = None
while True:
lb_config = self.load_balancing_manager.fetch_next()
if not lb_config:
if not last_exception:
raise ProviderTokenNotInitError("Model credentials is not initialized.")
else:
raise last_exception
try:
if 'credentials' in kwargs:
del kwargs['credentials']
return function(*args, **kwargs, credentials=lb_config.credentials)
except InvokeRateLimitError as e:
# expire in 60 seconds
self.load_balancing_manager.cooldown(lb_config, expire=60)
last_exception = e
continue
except (InvokeAuthorizationError, InvokeConnectionError) as e:
# expire in 10 seconds
self.load_balancing_manager.cooldown(lb_config, expire=10)
last_exception = e
continue
except Exception as e:
raise e
def get_tts_voices(self, language: str) -> list:
"""
Invoke large language tts model voices
@ -226,6 +361,7 @@ class ModelManager:
"""
if not provider:
return self.get_default_model_instance(tenant_id, model_type)
provider_model_bundle = self._provider_manager.get_provider_model_bundle(
tenant_id=tenant_id,
provider=provider,
@ -255,3 +391,141 @@ class ModelManager:
model_type=model_type,
model=default_model_entity.model
)
class LBModelManager:
def __init__(self, tenant_id: str,
provider: str,
model_type: ModelType,
model: str,
load_balancing_configs: list[ModelLoadBalancingConfiguration],
managed_credentials: Optional[dict] = None) -> None:
"""
Load balancing model manager
:param load_balancing_configs: all load balancing configurations
:param managed_credentials: credentials if load balancing configuration name is __inherit__
"""
self._tenant_id = tenant_id
self._provider = provider
self._model_type = model_type
self._model = model
self._load_balancing_configs = load_balancing_configs
for load_balancing_config in self._load_balancing_configs:
if load_balancing_config.name == "__inherit__":
if not managed_credentials:
# remove __inherit__ if managed credentials is not provided
self._load_balancing_configs.remove(load_balancing_config)
else:
load_balancing_config.credentials = managed_credentials
def fetch_next(self) -> Optional[ModelLoadBalancingConfiguration]:
"""
Get next model load balancing config
Strategy: Round Robin
:return:
"""
cache_key = "model_lb_index:{}:{}:{}:{}".format(
self._tenant_id,
self._provider,
self._model_type.value,
self._model
)
cooldown_load_balancing_configs = []
max_index = len(self._load_balancing_configs)
while True:
current_index = redis_client.incr(cache_key)
if current_index >= 10000000:
current_index = 1
redis_client.set(cache_key, current_index)
redis_client.expire(cache_key, 3600)
if current_index > max_index:
current_index = current_index % max_index
real_index = current_index - 1
if real_index > max_index:
real_index = 0
config = self._load_balancing_configs[real_index]
if self.in_cooldown(config):
cooldown_load_balancing_configs.append(config)
if len(cooldown_load_balancing_configs) >= len(self._load_balancing_configs):
# all configs are in cooldown
return None
continue
if bool(os.environ.get("DEBUG", 'False').lower() == 'true'):
logger.info(f"Model LB\nid: {config.id}\nname:{config.name}\n"
f"tenant_id: {self._tenant_id}\nprovider: {self._provider}\n"
f"model_type: {self._model_type.value}\nmodel: {self._model}")
return config
return None
def cooldown(self, config: ModelLoadBalancingConfiguration, expire: int = 60) -> None:
"""
Cooldown model load balancing config
:param config: model load balancing config
:param expire: cooldown time
:return:
"""
cooldown_cache_key = "model_lb_index:cooldown:{}:{}:{}:{}:{}".format(
self._tenant_id,
self._provider,
self._model_type.value,
self._model,
config.id
)
redis_client.setex(cooldown_cache_key, expire, 'true')
def in_cooldown(self, config: ModelLoadBalancingConfiguration) -> bool:
"""
Check if model load balancing config is in cooldown
:param config: model load balancing config
:return:
"""
cooldown_cache_key = "model_lb_index:cooldown:{}:{}:{}:{}:{}".format(
self._tenant_id,
self._provider,
self._model_type.value,
self._model,
config.id
)
return redis_client.exists(cooldown_cache_key)
@classmethod
def get_config_in_cooldown_and_ttl(cls, tenant_id: str,
provider: str,
model_type: ModelType,
model: str,
config_id: str) -> tuple[bool, int]:
"""
Get model load balancing config is in cooldown and ttl
:param tenant_id: workspace id
:param provider: provider name
:param model_type: model type
:param model: model name
:param config_id: model load balancing config id
:return:
"""
cooldown_cache_key = "model_lb_index:cooldown:{}:{}:{}:{}:{}".format(
tenant_id,
provider,
model_type.value,
model,
config_id
)
ttl = redis_client.ttl(cooldown_cache_key)
if ttl == -2:
return False, 0
return True, ttl

View File

@ -20,7 +20,7 @@ This module provides the interface for invoking and authenticating various model
![image-20231210143654461](./docs/en_US/images/index/image-20231210143654461.png)
Displays a list of all supported providers, including provider names, icons, supported model types list, predefined model list, configuration method, and credentials form rules, etc. For detailed rule design, see: [Schema](./schema.md).
Displays a list of all supported providers, including provider names, icons, supported model types list, predefined model list, configuration method, and credentials form rules, etc. For detailed rule design, see: [Schema](./docs/en_US/schema.md).
- Selectable model list display

View File

@ -1,4 +1,3 @@
from abc import ABC
from typing import Optional
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
@ -14,7 +13,7 @@ _TEXT_COLOR_MAPPING = {
}
class Callback(ABC):
class Callback:
"""
Base class for callbacks.
Only for LLM.

View File

@ -51,7 +51,7 @@
- `voices` (list) List of available voice.available for model type `tts`
- `mode` (string) voice model.available for model type `tts`
- `name` (string) voice model display name.available for model type `tts`
- `lanuage` (string) the voice model supports languages.available for model type `tts`
- `language` (string) the voice model supports languages.available for model type `tts`
- `word_limit` (int) Single conversion word limit, paragraphwise by defaultavailable for model type `tts`
- `audio_type` (string) Support audio file extension format, e.g.mp3,wavavailable for model type `tts`
- `max_workers` (int) Number of concurrent workers supporting text and audio conversionavailable for model type`tts`

View File

@ -52,7 +52,7 @@
- `voices` (list) 可选音色列表。
- `mode` (string) 音色模型。(模型类型 `tts` 可用)
- `name` (string) 音色模型显示名称。(模型类型 `tts` 可用)
- `lanuage` (string) 音色模型支持语言。(模型类型 `tts` 可用)
- `language` (string) 音色模型支持语言。(模型类型 `tts` 可用)
- `word_limit` (int) 单次转换字数限制,默认按段落分段(模型类型 `tts` 可用)
- `audio_type` (string) 支持音频文件扩展格式mp3,wav模型类型 `tts` 可用)
- `max_workers` (int) 支持文字音频转换并发任务数(模型类型 `tts` 可用)

View File

@ -3,8 +3,7 @@ import os
from abc import ABC, abstractmethod
from typing import Optional
import yaml
from core.helper.position_helper import get_position_map, sort_by_position_map
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.defaults import PARAMETER_RULE_TEMPLATE
from core.model_runtime.entities.model_entities import (
@ -18,7 +17,7 @@ from core.model_runtime.entities.model_entities import (
)
from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
from core.utils.position_helper import get_position_map, sort_by_position_map
from core.tools.utils.yaml_utils import load_yaml_file
class AIModel(ABC):
@ -154,8 +153,7 @@ class AIModel(ABC):
# traverse all model_schema_yaml_paths
for model_schema_yaml_path in model_schema_yaml_paths:
# read yaml data from yaml file
with open(model_schema_yaml_path, encoding='utf-8') as f:
yaml_data = yaml.safe_load(f)
yaml_data = load_yaml_file(model_schema_yaml_path, ignore_error=True)
new_parameter_rules = []
for parameter_rule in yaml_data.get('parameter_rules', []):

View File

@ -1,12 +1,11 @@
import os
from abc import ABC, abstractmethod
import yaml
from core.helper.module_import_helper import get_subclasses_from_module, import_module_from_source
from core.model_runtime.entities.model_entities import AIModelEntity, ModelType
from core.model_runtime.entities.provider_entities import ProviderEntity
from core.model_runtime.model_providers.__base.ai_model import AIModel
from core.utils.module_import_helper import get_subclasses_from_module, import_module_from_source
from core.tools.utils.yaml_utils import load_yaml_file
class ModelProvider(ABC):
@ -44,10 +43,7 @@ class ModelProvider(ABC):
# read provider schema from yaml file
yaml_path = os.path.join(current_path, f'{provider_name}.yaml')
yaml_data = {}
if os.path.exists(yaml_path):
with open(yaml_path, encoding='utf-8') as f:
yaml_data = yaml.safe_load(f)
yaml_data = load_yaml_file(yaml_path, ignore_error=True)
try:
# yaml_data to entity

View File

@ -2,7 +2,9 @@
- anthropic
- azure_openai
- google
- vertex_ai
- nvidia
- nvidia_nim
- cohere
- bedrock
- togetherai
@ -26,4 +28,6 @@
- yi
- openllm
- localai
- volcengine_maas
- openai_api_compatible
- deepseek

View File

@ -6,6 +6,7 @@ features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000

View File

@ -6,6 +6,7 @@ features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000

View File

@ -6,6 +6,7 @@ features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000

View File

@ -146,7 +146,7 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
"""
Code block mode wrapper for invoking large language model
"""
if 'response_format' in model_parameters and model_parameters['response_format']:
if model_parameters.get('response_format'):
stop = stop or []
# chat model
self._transform_chat_json_prompts(
@ -324,10 +324,32 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
output_tokens = 0
finish_reason = None
index = 0
tool_calls: list[AssistantPromptMessage.ToolCall] = []
for chunk in response:
if isinstance(chunk, MessageStartEvent):
return_model = chunk.message.model
input_tokens = chunk.message.usage.input_tokens
if hasattr(chunk, 'content_block'):
content_block = chunk.content_block
if isinstance(content_block, dict):
if content_block.get('type') == 'tool_use':
tool_call = AssistantPromptMessage.ToolCall(
id=content_block.get('id'),
type='function',
function=AssistantPromptMessage.ToolCall.ToolCallFunction(
name=content_block.get('name'),
arguments=''
)
)
tool_calls.append(tool_call)
elif hasattr(chunk, 'delta'):
delta = chunk.delta
if isinstance(delta, dict) and len(tool_calls) > 0:
if delta.get('type') == 'input_json_delta':
tool_calls[-1].function.arguments += delta.get('partial_json', '')
elif chunk.message:
return_model = chunk.message.model
input_tokens = chunk.message.usage.input_tokens
elif isinstance(chunk, MessageDeltaEvent):
output_tokens = chunk.usage.output_tokens
finish_reason = chunk.delta.stop_reason
@ -335,13 +357,19 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
# transform usage
usage = self._calc_response_usage(model, credentials, input_tokens, output_tokens)
# transform empty tool call arguments to {}
for tool_call in tool_calls:
if not tool_call.function.arguments:
tool_call.function.arguments = '{}'
yield LLMResultChunk(
model=return_model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(
index=index + 1,
message=AssistantPromptMessage(
content=''
content='',
tool_calls=tool_calls
),
finish_reason=finish_reason,
usage=usage
@ -380,7 +408,7 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
"max_retries": 1,
}
if 'anthropic_api_url' in credentials and credentials['anthropic_api_url']:
if credentials.get('anthropic_api_url'):
credentials['anthropic_api_url'] = credentials['anthropic_api_url'].rstrip('/')
credentials_kwargs['base_url'] = credentials['anthropic_api_url']

View File

@ -49,7 +49,7 @@ LLM_BASE_MODELS = [
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={
ModelPropertyKey.MODE: LLMMode.CHAT.value,
ModelPropertyKey.CONTEXT_SIZE: 4096,
ModelPropertyKey.CONTEXT_SIZE: 16385,
},
parameter_rules=[
ParameterRule(
@ -68,11 +68,25 @@ LLM_BASE_MODELS = [
name='frequency_penalty',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
),
_get_max_tokens(default=512, min_val=1, max_val=4096)
_get_max_tokens(default=512, min_val=1, max_val=4096),
ParameterRule(
name='response_format',
label=I18nObject(
zh_Hans='回复格式',
en_US='response_format'
),
type='string',
help=I18nObject(
zh_Hans='指定模型必须输出的格式',
en_US='specifying the format that the model must output'
),
required=False,
options=['text', 'json_object']
),
],
pricing=PriceConfig(
input=0.001,
output=0.002,
input=0.0005,
output=0.0015,
unit=0.001,
currency='USD',
)
@ -482,6 +496,310 @@ LLM_BASE_MODELS = [
)
)
),
AzureBaseModel(
base_model_name='gpt-4o',
entity=AIModelEntity(
model='fake-deployment-name',
label=I18nObject(
en_US='fake-deployment-name-label',
),
model_type=ModelType.LLM,
features=[
ModelFeature.AGENT_THOUGHT,
ModelFeature.VISION,
ModelFeature.MULTI_TOOL_CALL,
ModelFeature.STREAM_TOOL_CALL,
],
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={
ModelPropertyKey.MODE: LLMMode.CHAT.value,
ModelPropertyKey.CONTEXT_SIZE: 128000,
},
parameter_rules=[
ParameterRule(
name='temperature',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
),
ParameterRule(
name='top_p',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
),
ParameterRule(
name='presence_penalty',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
),
ParameterRule(
name='frequency_penalty',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
),
_get_max_tokens(default=512, min_val=1, max_val=4096),
ParameterRule(
name='seed',
label=I18nObject(
zh_Hans='种子',
en_US='Seed'
),
type='int',
help=I18nObject(
zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。',
en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
),
required=False,
precision=2,
min=0,
max=1,
),
ParameterRule(
name='response_format',
label=I18nObject(
zh_Hans='回复格式',
en_US='response_format'
),
type='string',
help=I18nObject(
zh_Hans='指定模型必须输出的格式',
en_US='specifying the format that the model must output'
),
required=False,
options=['text', 'json_object']
),
],
pricing=PriceConfig(
input=5.00,
output=15.00,
unit=0.000001,
currency='USD',
)
)
),
AzureBaseModel(
base_model_name='gpt-4o-2024-05-13',
entity=AIModelEntity(
model='fake-deployment-name',
label=I18nObject(
en_US='fake-deployment-name-label',
),
model_type=ModelType.LLM,
features=[
ModelFeature.AGENT_THOUGHT,
ModelFeature.VISION,
ModelFeature.MULTI_TOOL_CALL,
ModelFeature.STREAM_TOOL_CALL,
],
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={
ModelPropertyKey.MODE: LLMMode.CHAT.value,
ModelPropertyKey.CONTEXT_SIZE: 128000,
},
parameter_rules=[
ParameterRule(
name='temperature',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
),
ParameterRule(
name='top_p',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
),
ParameterRule(
name='presence_penalty',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
),
ParameterRule(
name='frequency_penalty',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
),
_get_max_tokens(default=512, min_val=1, max_val=4096),
ParameterRule(
name='seed',
label=I18nObject(
zh_Hans='种子',
en_US='Seed'
),
type='int',
help=I18nObject(
zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。',
en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
),
required=False,
precision=2,
min=0,
max=1,
),
ParameterRule(
name='response_format',
label=I18nObject(
zh_Hans='回复格式',
en_US='response_format'
),
type='string',
help=I18nObject(
zh_Hans='指定模型必须输出的格式',
en_US='specifying the format that the model must output'
),
required=False,
options=['text', 'json_object']
),
],
pricing=PriceConfig(
input=5.00,
output=15.00,
unit=0.000001,
currency='USD',
)
)
),
AzureBaseModel(
base_model_name='gpt-4-turbo',
entity=AIModelEntity(
model='fake-deployment-name',
label=I18nObject(
en_US='fake-deployment-name-label',
),
model_type=ModelType.LLM,
features=[
ModelFeature.AGENT_THOUGHT,
ModelFeature.VISION,
ModelFeature.MULTI_TOOL_CALL,
ModelFeature.STREAM_TOOL_CALL,
],
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={
ModelPropertyKey.MODE: LLMMode.CHAT.value,
ModelPropertyKey.CONTEXT_SIZE: 128000,
},
parameter_rules=[
ParameterRule(
name='temperature',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
),
ParameterRule(
name='top_p',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
),
ParameterRule(
name='presence_penalty',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
),
ParameterRule(
name='frequency_penalty',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
),
_get_max_tokens(default=512, min_val=1, max_val=4096),
ParameterRule(
name='seed',
label=I18nObject(
zh_Hans='种子',
en_US='Seed'
),
type='int',
help=I18nObject(
zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。',
en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
),
required=False,
precision=2,
min=0,
max=1,
),
ParameterRule(
name='response_format',
label=I18nObject(
zh_Hans='回复格式',
en_US='response_format'
),
type='string',
help=I18nObject(
zh_Hans='指定模型必须输出的格式',
en_US='specifying the format that the model must output'
),
required=False,
options=['text', 'json_object']
),
],
pricing=PriceConfig(
input=0.01,
output=0.03,
unit=0.001,
currency='USD',
)
)
),
AzureBaseModel(
base_model_name='gpt-4-turbo-2024-04-09',
entity=AIModelEntity(
model='fake-deployment-name',
label=I18nObject(
en_US='fake-deployment-name-label',
),
model_type=ModelType.LLM,
features=[
ModelFeature.AGENT_THOUGHT,
ModelFeature.VISION,
ModelFeature.MULTI_TOOL_CALL,
ModelFeature.STREAM_TOOL_CALL,
],
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={
ModelPropertyKey.MODE: LLMMode.CHAT.value,
ModelPropertyKey.CONTEXT_SIZE: 128000,
},
parameter_rules=[
ParameterRule(
name='temperature',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
),
ParameterRule(
name='top_p',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
),
ParameterRule(
name='presence_penalty',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
),
ParameterRule(
name='frequency_penalty',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
),
_get_max_tokens(default=512, min_val=1, max_val=4096),
ParameterRule(
name='seed',
label=I18nObject(
zh_Hans='种子',
en_US='Seed'
),
type='int',
help=I18nObject(
zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。',
en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
),
required=False,
precision=2,
min=0,
max=1,
),
ParameterRule(
name='response_format',
label=I18nObject(
zh_Hans='回复格式',
en_US='response_format'
),
type='string',
help=I18nObject(
zh_Hans='指定模型必须输出的格式',
en_US='specifying the format that the model must output'
),
required=False,
options=['text', 'json_object']
),
],
pricing=PriceConfig(
input=0.01,
output=0.03,
unit=0.001,
currency='USD',
)
)
),
AzureBaseModel(
base_model_name='gpt-4-vision-preview',
entity=AIModelEntity(

View File

@ -59,6 +59,9 @@ model_credential_schema:
- label:
en_US: 2023-12-01-preview
value: 2023-12-01-preview
- label:
en_US: '2024-02-01'
value: '2024-02-01'
placeholder:
zh_Hans: 在此选择您的 API 版本
en_US: Select your API Version here
@ -99,6 +102,24 @@ model_credential_schema:
show_on:
- variable: __model_type
value: llm
- label:
en_US: gpt-4o
value: gpt-4o
show_on:
- variable: __model_type
value: llm
- label:
en_US: gpt-4o-2024-05-13
value: gpt-4o-2024-05-13
show_on:
- variable: __model_type
value: llm
- label:
en_US: gpt-4-turbo
value: gpt-4-turbo
show_on:
- variable: __model_type
value: llm
- label:
en_US: gpt-4-turbo-2024-04-09
value: gpt-4-turbo-2024-04-09

View File

@ -6,7 +6,7 @@ features:
- agent-thought
model_properties:
mode: chat
context_size: 4000
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature

View File

@ -6,7 +6,7 @@ features:
- agent-thought
model_properties:
mode: chat
context_size: 192000
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature

View File

@ -0,0 +1,45 @@
model: baichuan3-turbo-128k
label:
en_US: Baichuan3-Turbo-128k
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 8000
min: 1
max: 128000
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
default: 1
min: 1
max: 2
- name: with_search_enhance
label:
zh_Hans: 搜索增强
en_US: Search Enhance
type: boolean
help:
zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
en_US: Allow the model to perform external search to enhance the generation results.
required: false

View File

@ -0,0 +1,45 @@
model: baichuan3-turbo
label:
en_US: Baichuan3-Turbo
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 8000
min: 1
max: 32000
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
default: 1
min: 1
max: 2
- name: with_search_enhance
label:
zh_Hans: 搜索增强
en_US: Search Enhance
type: boolean
help:
zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
en_US: Allow the model to perform external search to enhance the generation results.
required: false

View File

@ -0,0 +1,45 @@
model: baichuan4
label:
en_US: Baichuan4
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 8000
min: 1
max: 32000
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
default: 1
min: 1
max: 2
- name: with_search_enhance
label:
zh_Hans: 搜索增强
en_US: Search Enhance
type: boolean
help:
zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
en_US: Allow the model to perform external search to enhance the generation results.
required: false

View File

@ -51,26 +51,29 @@ class BaichuanModel:
'baichuan2-turbo': 'Baichuan2-Turbo',
'baichuan2-turbo-192k': 'Baichuan2-Turbo-192k',
'baichuan2-53b': 'Baichuan2-53B',
'baichuan3-turbo': 'Baichuan3-Turbo',
'baichuan3-turbo-128k': 'Baichuan3-Turbo-128k',
'baichuan4': 'Baichuan4',
}[model]
def _handle_chat_generate_response(self, response) -> BaichuanMessage:
resp = response.json()
choices = resp.get('choices', [])
message = BaichuanMessage(content='', role='assistant')
for choice in choices:
message.content += choice['message']['content']
message.role = choice['message']['role']
if choice['finish_reason']:
message.stop_reason = choice['finish_reason']
resp = response.json()
choices = resp.get('choices', [])
message = BaichuanMessage(content='', role='assistant')
for choice in choices:
message.content += choice['message']['content']
message.role = choice['message']['role']
if choice['finish_reason']:
message.stop_reason = choice['finish_reason']
if 'usage' in resp:
message.usage = {
'prompt_tokens': resp['usage']['prompt_tokens'],
'completion_tokens': resp['usage']['completion_tokens'],
'total_tokens': resp['usage']['total_tokens'],
}
return message
if 'usage' in resp:
message.usage = {
'prompt_tokens': resp['usage']['prompt_tokens'],
'completion_tokens': resp['usage']['completion_tokens'],
'total_tokens': resp['usage']['total_tokens'],
}
return message
def _handle_chat_stream_generate_response(self, response) -> Generator:
for line in response.iter_lines():
@ -89,7 +92,7 @@ class BaichuanModel:
# save stop reason temporarily
stop_reason = ''
for choice in choices:
if 'finish_reason' in choice and choice['finish_reason']:
if choice.get('finish_reason'):
stop_reason = choice['finish_reason']
if len(choice['delta']['content']) == 0:
@ -110,7 +113,8 @@ class BaichuanModel:
def _build_parameters(self, model: str, stream: bool, messages: list[BaichuanMessage],
parameters: dict[str, Any]) \
-> dict[str, Any]:
if model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b':
if (model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b'
or model == 'baichuan3-turbo' or model == 'baichuan3-turbo-128k' or model == 'baichuan4'):
prompt_messages = []
for message in messages:
if message.role == BaichuanMessage.Role.USER.value or message.role == BaichuanMessage.Role._SYSTEM.value:
@ -143,7 +147,8 @@ class BaichuanModel:
raise BadRequestError(f"Unknown model: {model}")
def _build_headers(self, model: str, data: dict[str, Any]) -> dict[str, Any]:
if model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b':
if (model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b'
or model == 'baichuan3-turbo' or model == 'baichuan3-turbo-128k' or model == 'baichuan4'):
# there is no secret key for turbo api
return {
'Content-Type': 'application/json',
@ -160,7 +165,8 @@ class BaichuanModel:
parameters: dict[str, Any], timeout: int) \
-> Union[Generator, BaichuanMessage]:
if model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b':
if (model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b'
or model == 'baichuan3-turbo' or model == 'baichuan3-turbo-128k' or model == 'baichuan4'):
api_base = 'https://api.baichuan-ai.com/v1/chat/completions'
else:
raise BadRequestError(f"Unknown model: {model}")

View File

@ -12,6 +12,7 @@
- meta.llama3-70b-instruct-v1:0
- meta.llama2-13b-chat-v1
- meta.llama2-70b-chat-v1
- mistral.mistral-small-2402-v1:0
- mistral.mistral-large-2402-v1:0
- mistral.mixtral-8x7b-instruct-v0:1
- mistral.mistral-7b-instruct-v0:2

View File

@ -51,7 +51,7 @@ parameter_rules:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.003'
output: '0.015'
input: '0.00025'
output: '0.00125'
unit: '0.001'
currency: USD

View File

@ -50,7 +50,7 @@ parameter_rules:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.00025'
output: '0.00125'
input: '0.003'
output: '0.015'
unit: '0.001'
currency: USD

View File

@ -358,26 +358,25 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
return message_dict
def get_num_tokens(self, model: str, credentials: dict, messages: list[PromptMessage] | str,
def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage] | str,
tools: Optional[list[PromptMessageTool]] = None) -> int:
"""
Get number of tokens for given prompt messages
:param model: model name
:param credentials: model credentials
:param messages: prompt messages or message string
:param prompt_messages: prompt messages or message string
:param tools: tools for tool calling
:return:md = genai.GenerativeModel(model)
"""
prefix = model.split('.')[0]
model_name = model.split('.')[1]
if isinstance(messages, str):
prompt = messages
if isinstance(prompt_messages, str):
prompt = prompt_messages
else:
prompt = self._convert_messages_to_prompt(messages, prefix, model_name)
prompt = self._convert_messages_to_prompt(prompt_messages, prefix, model_name)
return self._get_num_tokens_by_gpt2(prompt)
def validate_credentials(self, model: str, credentials: dict) -> None:
"""

View File

@ -0,0 +1,27 @@
model: mistral.mistral-small-2402-v1:0
label:
en_US: Mistral Small
model_type: llm
model_properties:
mode: completion
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
required: false
default: 0.7
- name: top_p
use_template: top_p
required: false
default: 1
- name: max_tokens
use_template: max_tokens
required: true
default: 512
min: 1
max: 4096
pricing:
input: '0.001'
output: '0.03'
unit: '0.001'
currency: USD

View File

@ -1,3 +1,4 @@
- amazon.titan-embed-text-v1
- amazon.titan-embed-text-v2:0
- cohere.embed-english-v3
- cohere.embed-multilingual-v3

View File

@ -4,5 +4,5 @@ model_properties:
context_size: 8192
pricing:
input: '0.0001'
unit: '0.001'
unit: '0.0001'
currency: USD

View File

@ -0,0 +1,8 @@
model: amazon.titan-embed-text-v2:0
model_type: text-embedding
model_properties:
context_size: 8192
pricing:
input: '0.00002'
unit: '0.00001'
currency: USD

View File

@ -59,15 +59,15 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
model_prefix = model.split('.')[0]
if model_prefix == "amazon" :
for text in texts:
body = {
for text in texts:
body = {
"inputText": text,
}
response_body = self._invoke_bedrock_embedding(model, bedrock_runtime, body)
embeddings.extend([response_body.get('embedding')])
token_usage += response_body.get('inputTextTokenCount')
logger.warning(f'Total Tokens: {token_usage}')
result = TextEmbeddingResult(
}
response_body = self._invoke_bedrock_embedding(model, bedrock_runtime, body)
embeddings.extend([response_body.get('embedding')])
token_usage += response_body.get('inputTextTokenCount')
logger.warning(f'Total Tokens: {token_usage}')
result = TextEmbeddingResult(
model=model,
embeddings=embeddings,
usage=self._calc_response_usage(
@ -75,20 +75,20 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
credentials=credentials,
tokens=token_usage
)
)
return result
)
return result
if model_prefix == "cohere" :
input_type = 'search_document' if len(texts) > 1 else 'search_query'
for text in texts:
body = {
input_type = 'search_document' if len(texts) > 1 else 'search_query'
for text in texts:
body = {
"texts": [text],
"input_type": input_type,
}
response_body = self._invoke_bedrock_embedding(model, bedrock_runtime, body)
embeddings.extend(response_body.get('embeddings'))
token_usage += len(text)
result = TextEmbeddingResult(
}
response_body = self._invoke_bedrock_embedding(model, bedrock_runtime, body)
embeddings.extend(response_body.get('embeddings'))
token_usage += len(text)
result = TextEmbeddingResult(
model=model,
embeddings=embeddings,
usage=self._calc_response_usage(
@ -96,9 +96,9 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
credentials=credentials,
tokens=token_usage
)
)
return result
)
return result
#others
raise ValueError(f"Got unknown model prefix {model_prefix} when handling block response")
@ -183,7 +183,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
)
return usage
def _map_client_to_invoke_error(self, error_code: str, error_msg: str) -> type[InvokeError]:
"""
Map client error to invoke error
@ -212,9 +212,9 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
content_type = 'application/json'
try:
response = bedrock_runtime.invoke_model(
body=json.dumps(body),
modelId=model,
accept=accept,
body=json.dumps(body),
modelId=model,
accept=accept,
contentType=content_type
)
response_body = json.loads(response.get('body').read().decode('utf-8'))

View File

@ -1,6 +1,5 @@
import logging
from collections.abc import Generator
from os.path import join
from typing import Optional, cast
from httpx import Timeout
@ -19,6 +18,7 @@ from openai import (
)
from openai.types.chat import ChatCompletion, ChatCompletionChunk
from openai.types.chat.chat_completion_message import FunctionCall
from yarl import URL
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import (
@ -265,7 +265,7 @@ class ChatGLMLargeLanguageModel(LargeLanguageModel):
client_kwargs = {
"timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0),
"api_key": "1",
"base_url": join(credentials['api_base'], 'v1')
"base_url": str(URL(credentials['api_base']) / 'v1')
}
return client_kwargs

View File

@ -32,6 +32,15 @@ provider_credential_schema:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key
show_on: [ ]
- variable: base_url
label:
zh_Hans: API Base
en_US: API Base
type: text-input
required: false
placeholder:
zh_Hans: 在此输入您的 API Base如 https://api.cohere.ai/v1
en_US: Enter your API Base, e.g. https://api.cohere.ai/v1
model_credential_schema:
model:
label:
@ -70,3 +79,12 @@ model_credential_schema:
placeholder:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key
- variable: base_url
label:
zh_Hans: API Base
en_US: API Base
type: text-input
required: false
placeholder:
zh_Hans: 在此输入您的 API Base如 https://api.cohere.ai/v1
en_US: Enter your API Base, e.g. https://api.cohere.ai/v1

Some files were not shown because too many files have changed in this diff Show More