mirror of
https://github.com/langgenius/dify.git
synced 2026-05-01 16:08:04 +08:00
Merge branch 'main' into feat/add-flashrank
This commit is contained in:
@ -39,6 +39,7 @@ from core.tools.entities.tool_entities import (
|
||||
from core.tools.tool.dataset_retriever_tool import DatasetRetrieverTool
|
||||
from core.tools.tool.tool import Tool
|
||||
from core.tools.tool_manager import ToolManager
|
||||
from core.tools.utils.tool_parameter_converter import ToolParameterConverter
|
||||
from extensions.ext_database import db
|
||||
from models.model import Conversation, Message, MessageAgentThought
|
||||
from models.tools import ToolConversationVariables
|
||||
@ -128,6 +129,8 @@ class BaseAgentRunner(AppRunner):
|
||||
self.files = application_generate_entity.files
|
||||
else:
|
||||
self.files = []
|
||||
self.query = None
|
||||
self._current_thoughts: list[PromptMessage] = []
|
||||
|
||||
def _repack_app_generate_entity(self, app_generate_entity: AgentChatAppGenerateEntity) \
|
||||
-> AgentChatAppGenerateEntity:
|
||||
@ -165,6 +168,7 @@ class BaseAgentRunner(AppRunner):
|
||||
tenant_id=self.tenant_id,
|
||||
app_id=self.app_config.app_id,
|
||||
agent_tool=tool,
|
||||
invoke_from=self.application_generate_entity.invoke_from
|
||||
)
|
||||
tool_entity.load_variables(self.variables_pool)
|
||||
|
||||
@ -183,21 +187,11 @@ class BaseAgentRunner(AppRunner):
|
||||
if parameter.form != ToolParameter.ToolParameterForm.LLM:
|
||||
continue
|
||||
|
||||
parameter_type = 'string'
|
||||
parameter_type = ToolParameterConverter.get_parameter_type(parameter.type)
|
||||
enum = []
|
||||
if parameter.type == ToolParameter.ToolParameterType.STRING:
|
||||
parameter_type = 'string'
|
||||
elif parameter.type == ToolParameter.ToolParameterType.BOOLEAN:
|
||||
parameter_type = 'boolean'
|
||||
elif parameter.type == ToolParameter.ToolParameterType.NUMBER:
|
||||
parameter_type = 'number'
|
||||
elif parameter.type == ToolParameter.ToolParameterType.SELECT:
|
||||
for option in parameter.options:
|
||||
enum.append(option.value)
|
||||
parameter_type = 'string'
|
||||
else:
|
||||
raise ValueError(f"parameter type {parameter.type} is not supported")
|
||||
|
||||
if parameter.type == ToolParameter.ToolParameterType.SELECT:
|
||||
enum = [option.value for option in parameter.options]
|
||||
|
||||
message_tool.parameters['properties'][parameter.name] = {
|
||||
"type": parameter_type,
|
||||
"description": parameter.llm_description or '',
|
||||
@ -278,20 +272,10 @@ class BaseAgentRunner(AppRunner):
|
||||
if parameter.form != ToolParameter.ToolParameterForm.LLM:
|
||||
continue
|
||||
|
||||
parameter_type = 'string'
|
||||
parameter_type = ToolParameterConverter.get_parameter_type(parameter.type)
|
||||
enum = []
|
||||
if parameter.type == ToolParameter.ToolParameterType.STRING:
|
||||
parameter_type = 'string'
|
||||
elif parameter.type == ToolParameter.ToolParameterType.BOOLEAN:
|
||||
parameter_type = 'boolean'
|
||||
elif parameter.type == ToolParameter.ToolParameterType.NUMBER:
|
||||
parameter_type = 'number'
|
||||
elif parameter.type == ToolParameter.ToolParameterType.SELECT:
|
||||
for option in parameter.options:
|
||||
enum.append(option.value)
|
||||
parameter_type = 'string'
|
||||
else:
|
||||
raise ValueError(f"parameter type {parameter.type} is not supported")
|
||||
if parameter.type == ToolParameter.ToolParameterType.SELECT:
|
||||
enum = [option.value for option in parameter.options]
|
||||
|
||||
prompt_tool.parameters['properties'][parameter.name] = {
|
||||
"type": parameter_type,
|
||||
@ -463,7 +447,7 @@ class BaseAgentRunner(AppRunner):
|
||||
for message in messages:
|
||||
if message.id == self.message.id:
|
||||
continue
|
||||
|
||||
|
||||
result.append(self.organize_agent_user_prompt(message))
|
||||
agent_thoughts: list[MessageAgentThought] = message.agent_thoughts
|
||||
if agent_thoughts:
|
||||
|
||||
@ -15,6 +15,7 @@ from core.model_runtime.entities.message_entities import (
|
||||
ToolPromptMessage,
|
||||
UserPromptMessage,
|
||||
)
|
||||
from core.prompt.agent_history_prompt_transform import AgentHistoryPromptTransform
|
||||
from core.tools.entities.tool_entities import ToolInvokeMeta
|
||||
from core.tools.tool.tool import Tool
|
||||
from core.tools.tool_engine import ToolEngine
|
||||
@ -121,7 +122,7 @@ class CotAgentRunner(BaseAgentRunner, ABC):
|
||||
raise ValueError("failed to invoke llm")
|
||||
|
||||
usage_dict = {}
|
||||
react_chunks = CotAgentOutputParser.handle_react_stream_output(chunks)
|
||||
react_chunks = CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)
|
||||
scratchpad = AgentScratchpadUnit(
|
||||
agent_response='',
|
||||
thought='',
|
||||
@ -189,7 +190,7 @@ class CotAgentRunner(BaseAgentRunner, ABC):
|
||||
|
||||
if not scratchpad.action:
|
||||
# failed to extract action, return final answer directly
|
||||
final_answer = scratchpad.agent_response or ''
|
||||
final_answer = ''
|
||||
else:
|
||||
if scratchpad.action.action_name.lower() == "final answer":
|
||||
# action is final answer, return final answer directly
|
||||
@ -373,7 +374,7 @@ class CotAgentRunner(BaseAgentRunner, ABC):
|
||||
|
||||
return message
|
||||
|
||||
def _organize_historic_prompt_messages(self) -> list[PromptMessage]:
|
||||
def _organize_historic_prompt_messages(self, current_session_messages: list[PromptMessage] = None) -> list[PromptMessage]:
|
||||
"""
|
||||
organize historic prompt messages
|
||||
"""
|
||||
@ -381,6 +382,13 @@ class CotAgentRunner(BaseAgentRunner, ABC):
|
||||
scratchpad: list[AgentScratchpadUnit] = []
|
||||
current_scratchpad: AgentScratchpadUnit = None
|
||||
|
||||
self.history_prompt_messages = AgentHistoryPromptTransform(
|
||||
model_config=self.model_config,
|
||||
prompt_messages=current_session_messages or [],
|
||||
history_messages=self.history_prompt_messages,
|
||||
memory=self.memory
|
||||
).get_prompt()
|
||||
|
||||
for message in self.history_prompt_messages:
|
||||
if isinstance(message, AssistantPromptMessage):
|
||||
current_scratchpad = AgentScratchpadUnit(
|
||||
|
||||
@ -32,9 +32,6 @@ class CotChatAgentRunner(CotAgentRunner):
|
||||
# organize system prompt
|
||||
system_message = self._organize_system_prompt()
|
||||
|
||||
# organize historic prompt messages
|
||||
historic_messages = self._historic_prompt_messages
|
||||
|
||||
# organize current assistant messages
|
||||
agent_scratchpad = self._agent_scratchpad
|
||||
if not agent_scratchpad:
|
||||
@ -57,6 +54,13 @@ class CotChatAgentRunner(CotAgentRunner):
|
||||
query_messages = UserPromptMessage(content=self._query)
|
||||
|
||||
if assistant_messages:
|
||||
# organize historic prompt messages
|
||||
historic_messages = self._organize_historic_prompt_messages([
|
||||
system_message,
|
||||
query_messages,
|
||||
*assistant_messages,
|
||||
UserPromptMessage(content='continue')
|
||||
])
|
||||
messages = [
|
||||
system_message,
|
||||
*historic_messages,
|
||||
@ -65,6 +69,8 @@ class CotChatAgentRunner(CotAgentRunner):
|
||||
UserPromptMessage(content='continue')
|
||||
]
|
||||
else:
|
||||
# organize historic prompt messages
|
||||
historic_messages = self._organize_historic_prompt_messages([system_message, query_messages])
|
||||
messages = [system_message, *historic_messages, query_messages]
|
||||
|
||||
# join all messages
|
||||
|
||||
@ -19,11 +19,11 @@ class CotCompletionAgentRunner(CotAgentRunner):
|
||||
|
||||
return system_prompt
|
||||
|
||||
def _organize_historic_prompt(self) -> str:
|
||||
def _organize_historic_prompt(self, current_session_messages: list[PromptMessage] = None) -> str:
|
||||
"""
|
||||
Organize historic prompt
|
||||
"""
|
||||
historic_prompt_messages = self._historic_prompt_messages
|
||||
historic_prompt_messages = self._organize_historic_prompt_messages(current_session_messages)
|
||||
historic_prompt = ""
|
||||
|
||||
for message in historic_prompt_messages:
|
||||
|
||||
@ -8,7 +8,7 @@ class AgentToolEntity(BaseModel):
|
||||
"""
|
||||
Agent Tool Entity.
|
||||
"""
|
||||
provider_type: Literal["builtin", "api"]
|
||||
provider_type: Literal["builtin", "api", "workflow"]
|
||||
provider_id: str
|
||||
tool_name: str
|
||||
tool_parameters: dict[str, Any] = {}
|
||||
|
||||
@ -17,6 +17,7 @@ from core.model_runtime.entities.message_entities import (
|
||||
ToolPromptMessage,
|
||||
UserPromptMessage,
|
||||
)
|
||||
from core.prompt.agent_history_prompt_transform import AgentHistoryPromptTransform
|
||||
from core.tools.entities.tool_entities import ToolInvokeMeta
|
||||
from core.tools.tool_engine import ToolEngine
|
||||
from models.model import Message
|
||||
@ -24,21 +25,18 @@ from models.model import Message
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class FunctionCallAgentRunner(BaseAgentRunner):
|
||||
|
||||
def run(self,
|
||||
message: Message, query: str, **kwargs: Any
|
||||
) -> Generator[LLMResultChunk, None, None]:
|
||||
"""
|
||||
Run FunctionCall agent application
|
||||
"""
|
||||
self.query = query
|
||||
app_generate_entity = self.application_generate_entity
|
||||
|
||||
app_config = self.app_config
|
||||
|
||||
prompt_template = app_config.prompt_template.simple_prompt_template or ''
|
||||
prompt_messages = self.history_prompt_messages
|
||||
prompt_messages = self._init_system_message(prompt_template, prompt_messages)
|
||||
prompt_messages = self._organize_user_query(query, prompt_messages)
|
||||
|
||||
# convert tools into ModelRuntime Tool format
|
||||
tool_instances, prompt_messages_tools = self._init_prompt_tools()
|
||||
|
||||
@ -81,6 +79,7 @@ class FunctionCallAgentRunner(BaseAgentRunner):
|
||||
)
|
||||
|
||||
# recalc llm max tokens
|
||||
prompt_messages = self._organize_prompt_messages()
|
||||
self.recalc_llm_max_tokens(self.model_config, prompt_messages)
|
||||
# invoke model
|
||||
chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = model_instance.invoke_llm(
|
||||
@ -203,7 +202,7 @@ class FunctionCallAgentRunner(BaseAgentRunner):
|
||||
else:
|
||||
assistant_message.content = response
|
||||
|
||||
prompt_messages.append(assistant_message)
|
||||
self._current_thoughts.append(assistant_message)
|
||||
|
||||
# save thought
|
||||
self.save_agent_thought(
|
||||
@ -265,12 +264,14 @@ class FunctionCallAgentRunner(BaseAgentRunner):
|
||||
}
|
||||
|
||||
tool_responses.append(tool_response)
|
||||
prompt_messages = self._organize_assistant_message(
|
||||
tool_call_id=tool_call_id,
|
||||
tool_call_name=tool_call_name,
|
||||
tool_response=tool_response['tool_response'],
|
||||
prompt_messages=prompt_messages,
|
||||
)
|
||||
if tool_response['tool_response'] is not None:
|
||||
self._current_thoughts.append(
|
||||
ToolPromptMessage(
|
||||
content=tool_response['tool_response'],
|
||||
tool_call_id=tool_call_id,
|
||||
name=tool_call_name,
|
||||
)
|
||||
)
|
||||
|
||||
if len(tool_responses) > 0:
|
||||
# save agent thought
|
||||
@ -300,8 +301,6 @@ class FunctionCallAgentRunner(BaseAgentRunner):
|
||||
|
||||
iteration_step += 1
|
||||
|
||||
prompt_messages = self._clear_user_prompt_image_messages(prompt_messages)
|
||||
|
||||
self.update_db_variables(self.variables_pool, self.db_variables_pool)
|
||||
# publish end event
|
||||
self.queue_manager.publish(QueueMessageEndEvent(llm_result=LLMResult(
|
||||
@ -393,24 +392,6 @@ class FunctionCallAgentRunner(BaseAgentRunner):
|
||||
|
||||
return prompt_messages
|
||||
|
||||
def _organize_assistant_message(self, tool_call_id: str = None, tool_call_name: str = None, tool_response: str = None,
|
||||
prompt_messages: list[PromptMessage] = None) -> list[PromptMessage]:
|
||||
"""
|
||||
Organize assistant message
|
||||
"""
|
||||
prompt_messages = deepcopy(prompt_messages)
|
||||
|
||||
if tool_response is not None:
|
||||
prompt_messages.append(
|
||||
ToolPromptMessage(
|
||||
content=tool_response,
|
||||
tool_call_id=tool_call_id,
|
||||
name=tool_call_name,
|
||||
)
|
||||
)
|
||||
|
||||
return prompt_messages
|
||||
|
||||
def _clear_user_prompt_image_messages(self, prompt_messages: list[PromptMessage]) -> list[PromptMessage]:
|
||||
"""
|
||||
As for now, gpt supports both fc and vision at the first iteration.
|
||||
@ -428,4 +409,26 @@ class FunctionCallAgentRunner(BaseAgentRunner):
|
||||
for content in prompt_message.content
|
||||
])
|
||||
|
||||
return prompt_messages
|
||||
return prompt_messages
|
||||
|
||||
def _organize_prompt_messages(self):
|
||||
prompt_template = self.app_config.prompt_template.simple_prompt_template or ''
|
||||
self.history_prompt_messages = self._init_system_message(prompt_template, self.history_prompt_messages)
|
||||
query_prompt_messages = self._organize_user_query(self.query, [])
|
||||
|
||||
self.history_prompt_messages = AgentHistoryPromptTransform(
|
||||
model_config=self.model_config,
|
||||
prompt_messages=[*query_prompt_messages, *self._current_thoughts],
|
||||
history_messages=self.history_prompt_messages,
|
||||
memory=self.memory
|
||||
).get_prompt()
|
||||
|
||||
prompt_messages = [
|
||||
*self.history_prompt_messages,
|
||||
*query_prompt_messages,
|
||||
*self._current_thoughts
|
||||
]
|
||||
if len(self._current_thoughts) != 0:
|
||||
# clear messages after the first iteration
|
||||
prompt_messages = self._clear_user_prompt_image_messages(prompt_messages)
|
||||
return prompt_messages
|
||||
|
||||
@ -9,7 +9,7 @@ from core.model_runtime.entities.llm_entities import LLMResultChunk
|
||||
|
||||
class CotAgentOutputParser:
|
||||
@classmethod
|
||||
def handle_react_stream_output(cls, llm_response: Generator[LLMResultChunk, None, None]) -> \
|
||||
def handle_react_stream_output(cls, llm_response: Generator[LLMResultChunk, None, None], usage_dict: dict) -> \
|
||||
Generator[Union[str, AgentScratchpadUnit.Action], None, None]:
|
||||
def parse_action(json_str):
|
||||
try:
|
||||
@ -58,6 +58,8 @@ class CotAgentOutputParser:
|
||||
thought_idx = 0
|
||||
|
||||
for response in llm_response:
|
||||
if response.delta.usage:
|
||||
usage_dict['usage'] = response.delta.usage
|
||||
response = response.delta.message.content
|
||||
if not isinstance(response, str):
|
||||
continue
|
||||
|
||||
@ -11,7 +11,7 @@ class SensitiveWordAvoidanceConfigManager:
|
||||
if not sensitive_word_avoidance_dict:
|
||||
return None
|
||||
|
||||
if 'enabled' in sensitive_word_avoidance_dict and sensitive_word_avoidance_dict['enabled']:
|
||||
if sensitive_word_avoidance_dict.get('enabled'):
|
||||
return SensitiveWordAvoidanceEntity(
|
||||
type=sensitive_word_avoidance_dict.get('type'),
|
||||
config=sensitive_word_avoidance_dict.get('config'),
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
from typing import Optional
|
||||
|
||||
from core.agent.entities import AgentEntity, AgentPromptEntity, AgentToolEntity
|
||||
from core.tools.prompt.template import REACT_PROMPT_TEMPLATES
|
||||
from core.agent.prompt.template import REACT_PROMPT_TEMPLATES
|
||||
|
||||
|
||||
class AgentConfigManager:
|
||||
|
||||
@ -239,4 +239,4 @@ class WorkflowUIBasedAppConfig(AppConfig):
|
||||
"""
|
||||
Workflow UI Based App Config Entity.
|
||||
"""
|
||||
workflow_id: str
|
||||
workflow_id: str
|
||||
@ -14,7 +14,7 @@ class FileUploadConfigManager:
|
||||
"""
|
||||
file_upload_dict = config.get('file_upload')
|
||||
if file_upload_dict:
|
||||
if 'image' in file_upload_dict and file_upload_dict['image']:
|
||||
if file_upload_dict.get('image'):
|
||||
if 'enabled' in file_upload_dict['image'] and file_upload_dict['image']['enabled']:
|
||||
image_config = {
|
||||
'number_limits': file_upload_dict['image']['number_limits'],
|
||||
|
||||
@ -9,7 +9,7 @@ class MoreLikeThisConfigManager:
|
||||
more_like_this = False
|
||||
more_like_this_dict = config.get('more_like_this')
|
||||
if more_like_this_dict:
|
||||
if 'enabled' in more_like_this_dict and more_like_this_dict['enabled']:
|
||||
if more_like_this_dict.get('enabled'):
|
||||
more_like_this = True
|
||||
|
||||
return more_like_this
|
||||
|
||||
@ -4,7 +4,7 @@ class RetrievalResourceConfigManager:
|
||||
show_retrieve_source = False
|
||||
retriever_resource_dict = config.get('retriever_resource')
|
||||
if retriever_resource_dict:
|
||||
if 'enabled' in retriever_resource_dict and retriever_resource_dict['enabled']:
|
||||
if retriever_resource_dict.get('enabled'):
|
||||
show_retrieve_source = True
|
||||
|
||||
return show_retrieve_source
|
||||
|
||||
@ -9,7 +9,7 @@ class SpeechToTextConfigManager:
|
||||
speech_to_text = False
|
||||
speech_to_text_dict = config.get('speech_to_text')
|
||||
if speech_to_text_dict:
|
||||
if 'enabled' in speech_to_text_dict and speech_to_text_dict['enabled']:
|
||||
if speech_to_text_dict.get('enabled'):
|
||||
speech_to_text = True
|
||||
|
||||
return speech_to_text
|
||||
|
||||
@ -9,7 +9,7 @@ class SuggestedQuestionsAfterAnswerConfigManager:
|
||||
suggested_questions_after_answer = False
|
||||
suggested_questions_after_answer_dict = config.get('suggested_questions_after_answer')
|
||||
if suggested_questions_after_answer_dict:
|
||||
if 'enabled' in suggested_questions_after_answer_dict and suggested_questions_after_answer_dict['enabled']:
|
||||
if suggested_questions_after_answer_dict.get('enabled'):
|
||||
suggested_questions_after_answer = True
|
||||
|
||||
return suggested_questions_after_answer
|
||||
|
||||
@ -12,7 +12,7 @@ class TextToSpeechConfigManager:
|
||||
text_to_speech = False
|
||||
text_to_speech_dict = config.get('text_to_speech')
|
||||
if text_to_speech_dict:
|
||||
if 'enabled' in text_to_speech_dict and text_to_speech_dict['enabled']:
|
||||
if text_to_speech_dict.get('enabled'):
|
||||
text_to_speech = TextToSpeechEntity(
|
||||
enabled=text_to_speech_dict.get('enabled'),
|
||||
voice=text_to_speech_dict.get('voice'),
|
||||
|
||||
@ -66,7 +66,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
|
||||
conversation = self._get_conversation_by_user(app_model, args.get('conversation_id'), user)
|
||||
|
||||
# parse files
|
||||
files = args['files'] if 'files' in args and args['files'] else []
|
||||
files = args['files'] if args.get('files') else []
|
||||
message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id)
|
||||
file_extra_config = FileUploadConfigManager.convert(workflow.features_dict, is_vision=False)
|
||||
if file_extra_config:
|
||||
@ -98,6 +98,90 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
|
||||
extras=extras
|
||||
)
|
||||
|
||||
return self._generate(
|
||||
app_model=app_model,
|
||||
workflow=workflow,
|
||||
user=user,
|
||||
invoke_from=invoke_from,
|
||||
application_generate_entity=application_generate_entity,
|
||||
conversation=conversation,
|
||||
stream=stream
|
||||
)
|
||||
|
||||
def single_iteration_generate(self, app_model: App,
|
||||
workflow: Workflow,
|
||||
node_id: str,
|
||||
user: Account,
|
||||
args: dict,
|
||||
stream: bool = True) \
|
||||
-> Union[dict, Generator[dict, None, None]]:
|
||||
"""
|
||||
Generate App response.
|
||||
|
||||
:param app_model: App
|
||||
:param workflow: Workflow
|
||||
:param user: account or end user
|
||||
:param args: request args
|
||||
:param invoke_from: invoke from source
|
||||
:param stream: is stream
|
||||
"""
|
||||
if not node_id:
|
||||
raise ValueError('node_id is required')
|
||||
|
||||
if args.get('inputs') is None:
|
||||
raise ValueError('inputs is required')
|
||||
|
||||
extras = {
|
||||
"auto_generate_conversation_name": False
|
||||
}
|
||||
|
||||
# get conversation
|
||||
conversation = None
|
||||
if args.get('conversation_id'):
|
||||
conversation = self._get_conversation_by_user(app_model, args.get('conversation_id'), user)
|
||||
|
||||
# convert to app config
|
||||
app_config = AdvancedChatAppConfigManager.get_app_config(
|
||||
app_model=app_model,
|
||||
workflow=workflow
|
||||
)
|
||||
|
||||
# init application generate entity
|
||||
application_generate_entity = AdvancedChatAppGenerateEntity(
|
||||
task_id=str(uuid.uuid4()),
|
||||
app_config=app_config,
|
||||
conversation_id=conversation.id if conversation else None,
|
||||
inputs={},
|
||||
query='',
|
||||
files=[],
|
||||
user_id=user.id,
|
||||
stream=stream,
|
||||
invoke_from=InvokeFrom.DEBUGGER,
|
||||
extras=extras,
|
||||
single_iteration_run=AdvancedChatAppGenerateEntity.SingleIterationRunEntity(
|
||||
node_id=node_id,
|
||||
inputs=args['inputs']
|
||||
)
|
||||
)
|
||||
|
||||
return self._generate(
|
||||
app_model=app_model,
|
||||
workflow=workflow,
|
||||
user=user,
|
||||
invoke_from=InvokeFrom.DEBUGGER,
|
||||
application_generate_entity=application_generate_entity,
|
||||
conversation=conversation,
|
||||
stream=stream
|
||||
)
|
||||
|
||||
def _generate(self, app_model: App,
|
||||
workflow: Workflow,
|
||||
user: Union[Account, EndUser],
|
||||
invoke_from: InvokeFrom,
|
||||
application_generate_entity: AdvancedChatAppGenerateEntity,
|
||||
conversation: Conversation = None,
|
||||
stream: bool = True) \
|
||||
-> Union[dict, Generator[dict, None, None]]:
|
||||
is_first_conversation = False
|
||||
if not conversation:
|
||||
is_first_conversation = True
|
||||
@ -167,18 +251,30 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
|
||||
"""
|
||||
with flask_app.app_context():
|
||||
try:
|
||||
# get conversation and message
|
||||
conversation = self._get_conversation(conversation_id)
|
||||
message = self._get_message(message_id)
|
||||
|
||||
# chatbot app
|
||||
runner = AdvancedChatAppRunner()
|
||||
runner.run(
|
||||
application_generate_entity=application_generate_entity,
|
||||
queue_manager=queue_manager,
|
||||
conversation=conversation,
|
||||
message=message
|
||||
)
|
||||
if application_generate_entity.single_iteration_run:
|
||||
single_iteration_run = application_generate_entity.single_iteration_run
|
||||
runner.single_iteration_run(
|
||||
app_id=application_generate_entity.app_config.app_id,
|
||||
workflow_id=application_generate_entity.app_config.workflow_id,
|
||||
queue_manager=queue_manager,
|
||||
inputs=single_iteration_run.inputs,
|
||||
node_id=single_iteration_run.node_id,
|
||||
user_id=application_generate_entity.user_id
|
||||
)
|
||||
else:
|
||||
# get conversation and message
|
||||
conversation = self._get_conversation(conversation_id)
|
||||
message = self._get_message(message_id)
|
||||
|
||||
# chatbot app
|
||||
runner = AdvancedChatAppRunner()
|
||||
runner.run(
|
||||
application_generate_entity=application_generate_entity,
|
||||
queue_manager=queue_manager,
|
||||
conversation=conversation,
|
||||
message=message
|
||||
)
|
||||
except GenerateTaskStoppedException:
|
||||
pass
|
||||
except InvokeAuthorizationError:
|
||||
|
||||
@ -102,6 +102,7 @@ class AdvancedChatAppRunner(AppRunner):
|
||||
user_from=UserFrom.ACCOUNT
|
||||
if application_generate_entity.invoke_from in [InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER]
|
||||
else UserFrom.END_USER,
|
||||
invoke_from=application_generate_entity.invoke_from,
|
||||
user_inputs=inputs,
|
||||
system_inputs={
|
||||
SystemVariable.QUERY: query,
|
||||
@ -109,6 +110,35 @@ class AdvancedChatAppRunner(AppRunner):
|
||||
SystemVariable.CONVERSATION_ID: conversation.id,
|
||||
SystemVariable.USER_ID: user_id
|
||||
},
|
||||
callbacks=workflow_callbacks,
|
||||
call_depth=application_generate_entity.call_depth
|
||||
)
|
||||
|
||||
def single_iteration_run(self, app_id: str, workflow_id: str,
|
||||
queue_manager: AppQueueManager,
|
||||
inputs: dict, node_id: str, user_id: str) -> None:
|
||||
"""
|
||||
Single iteration run
|
||||
"""
|
||||
app_record: App = db.session.query(App).filter(App.id == app_id).first()
|
||||
if not app_record:
|
||||
raise ValueError("App not found")
|
||||
|
||||
workflow = self.get_workflow(app_model=app_record, workflow_id=workflow_id)
|
||||
if not workflow:
|
||||
raise ValueError("Workflow not initialized")
|
||||
|
||||
workflow_callbacks = [WorkflowEventTriggerCallback(
|
||||
queue_manager=queue_manager,
|
||||
workflow=workflow
|
||||
)]
|
||||
|
||||
workflow_engine_manager = WorkflowEngineManager()
|
||||
workflow_engine_manager.single_step_run_iteration_workflow_node(
|
||||
workflow=workflow,
|
||||
node_id=node_id,
|
||||
user_id=user_id,
|
||||
user_inputs=inputs,
|
||||
callbacks=workflow_callbacks
|
||||
)
|
||||
|
||||
|
||||
@ -8,6 +8,8 @@ from core.app.entities.task_entities import (
|
||||
ChatbotAppStreamResponse,
|
||||
ErrorStreamResponse,
|
||||
MessageEndStreamResponse,
|
||||
NodeFinishStreamResponse,
|
||||
NodeStartStreamResponse,
|
||||
PingStreamResponse,
|
||||
)
|
||||
|
||||
@ -111,6 +113,8 @@ class AdvancedChatAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
if isinstance(sub_stream_response, ErrorStreamResponse):
|
||||
data = cls._error_to_stream_response(sub_stream_response.err)
|
||||
response_chunk.update(data)
|
||||
elif isinstance(sub_stream_response, NodeStartStreamResponse | NodeFinishStreamResponse):
|
||||
response_chunk.update(sub_stream_response.to_ignore_detail_dict())
|
||||
else:
|
||||
response_chunk.update(sub_stream_response.to_dict())
|
||||
|
||||
|
||||
@ -12,6 +12,9 @@ from core.app.entities.queue_entities import (
|
||||
QueueAdvancedChatMessageEndEvent,
|
||||
QueueAnnotationReplyEvent,
|
||||
QueueErrorEvent,
|
||||
QueueIterationCompletedEvent,
|
||||
QueueIterationNextEvent,
|
||||
QueueIterationStartEvent,
|
||||
QueueMessageReplaceEvent,
|
||||
QueueNodeFailedEvent,
|
||||
QueueNodeStartedEvent,
|
||||
@ -64,6 +67,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
_workflow: Workflow
|
||||
_user: Union[Account, EndUser]
|
||||
_workflow_system_variables: dict[SystemVariable, Any]
|
||||
_iteration_nested_relations: dict[str, list[str]]
|
||||
|
||||
def __init__(self, application_generate_entity: AdvancedChatAppGenerateEntity,
|
||||
workflow: Workflow,
|
||||
@ -103,6 +107,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
usage=LLMUsage.empty_usage()
|
||||
)
|
||||
|
||||
self._iteration_nested_relations = self._get_iteration_nested_relations(self._workflow.graph_dict)
|
||||
self._stream_generate_routes = self._get_stream_generate_routes()
|
||||
self._conversation_name_generate_thread = None
|
||||
|
||||
@ -204,6 +209,8 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
# search stream_generate_routes if node id is answer start at node
|
||||
if not self._task_state.current_stream_generate_state and event.node_id in self._stream_generate_routes:
|
||||
self._task_state.current_stream_generate_state = self._stream_generate_routes[event.node_id]
|
||||
# reset current route position to 0
|
||||
self._task_state.current_stream_generate_state.current_route_position = 0
|
||||
|
||||
# generate stream outputs when node started
|
||||
yield from self._generate_stream_outputs_when_node_started()
|
||||
@ -225,6 +232,22 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
workflow_node_execution=workflow_node_execution
|
||||
)
|
||||
|
||||
if isinstance(event, QueueNodeFailedEvent):
|
||||
yield from self._handle_iteration_exception(
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
error=f'Child node failed: {event.error}'
|
||||
)
|
||||
elif isinstance(event, QueueIterationStartEvent | QueueIterationNextEvent | QueueIterationCompletedEvent):
|
||||
if isinstance(event, QueueIterationNextEvent):
|
||||
# clear ran node execution infos of current iteration
|
||||
iteration_relations = self._iteration_nested_relations.get(event.node_id)
|
||||
if iteration_relations:
|
||||
for node_id in iteration_relations:
|
||||
self._task_state.ran_node_execution_infos.pop(node_id, None)
|
||||
|
||||
yield self._handle_iteration_to_stream_response(self._application_generate_entity.task_id, event)
|
||||
self._handle_iteration_operation(event)
|
||||
elif isinstance(event, QueueStopEvent | QueueWorkflowSucceededEvent | QueueWorkflowFailedEvent):
|
||||
workflow_run = self._handle_workflow_finished(event)
|
||||
if workflow_run:
|
||||
@ -263,10 +286,6 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
self._handle_retriever_resources(event)
|
||||
elif isinstance(event, QueueAnnotationReplyEvent):
|
||||
self._handle_annotation_reply(event)
|
||||
# elif isinstance(event, QueueMessageFileEvent):
|
||||
# response = self._message_file_to_stream_response(event)
|
||||
# if response:
|
||||
# yield response
|
||||
elif isinstance(event, QueueTextChunkEvent):
|
||||
delta_text = event.text
|
||||
if delta_text is None:
|
||||
@ -342,7 +361,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
id=self._message.id,
|
||||
**extras
|
||||
)
|
||||
|
||||
|
||||
def _get_stream_generate_routes(self) -> dict[str, ChatflowStreamGenerateRoute]:
|
||||
"""
|
||||
Get stream generate routes.
|
||||
@ -372,7 +391,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
)
|
||||
|
||||
return stream_generate_routes
|
||||
|
||||
|
||||
def _get_answer_start_at_node_ids(self, graph: dict, target_node_id: str) \
|
||||
-> list[str]:
|
||||
"""
|
||||
@ -391,6 +410,18 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
ingoing_edges.append(edge)
|
||||
|
||||
if not ingoing_edges:
|
||||
# check if it's the first node in the iteration
|
||||
target_node = next((node for node in nodes if node.get('id') == target_node_id), None)
|
||||
if not target_node:
|
||||
return []
|
||||
|
||||
node_iteration_id = target_node.get('data', {}).get('iteration_id')
|
||||
# get iteration start node id
|
||||
for node in nodes:
|
||||
if node.get('id') == node_iteration_id:
|
||||
if node.get('data', {}).get('start_node_id') == target_node_id:
|
||||
return [target_node_id]
|
||||
|
||||
return []
|
||||
|
||||
start_node_ids = []
|
||||
@ -401,14 +432,23 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
continue
|
||||
|
||||
node_type = source_node.get('data', {}).get('type')
|
||||
node_iteration_id = source_node.get('data', {}).get('iteration_id')
|
||||
iteration_start_node_id = None
|
||||
if node_iteration_id:
|
||||
iteration_node = next((node for node in nodes if node.get('id') == node_iteration_id), None)
|
||||
iteration_start_node_id = iteration_node.get('data', {}).get('start_node_id')
|
||||
|
||||
if node_type in [
|
||||
NodeType.ANSWER.value,
|
||||
NodeType.IF_ELSE.value,
|
||||
NodeType.QUESTION_CLASSIFIER.value
|
||||
NodeType.QUESTION_CLASSIFIER.value,
|
||||
NodeType.ITERATION.value,
|
||||
NodeType.LOOP.value
|
||||
]:
|
||||
start_node_id = target_node_id
|
||||
start_node_ids.append(start_node_id)
|
||||
elif node_type == NodeType.START.value:
|
||||
elif node_type == NodeType.START.value or \
|
||||
node_iteration_id is not None and iteration_start_node_id == source_node.get('id'):
|
||||
start_node_id = source_node_id
|
||||
start_node_ids.append(start_node_id)
|
||||
else:
|
||||
@ -417,7 +457,27 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
start_node_ids.extend(sub_start_node_ids)
|
||||
|
||||
return start_node_ids
|
||||
|
||||
def _get_iteration_nested_relations(self, graph: dict) -> dict[str, list[str]]:
|
||||
"""
|
||||
Get iteration nested relations.
|
||||
:param graph: graph
|
||||
:return:
|
||||
"""
|
||||
nodes = graph.get('nodes')
|
||||
|
||||
iteration_ids = [node.get('id') for node in nodes
|
||||
if node.get('data', {}).get('type') in [
|
||||
NodeType.ITERATION.value,
|
||||
NodeType.LOOP.value,
|
||||
]]
|
||||
|
||||
return {
|
||||
iteration_id: [
|
||||
node.get('id') for node in nodes if node.get('data', {}).get('iteration_id') == iteration_id
|
||||
] for iteration_id in iteration_ids
|
||||
}
|
||||
|
||||
def _generate_stream_outputs_when_node_started(self) -> Generator:
|
||||
"""
|
||||
Generate stream outputs.
|
||||
@ -425,7 +485,8 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
"""
|
||||
if self._task_state.current_stream_generate_state:
|
||||
route_chunks = self._task_state.current_stream_generate_state.generate_route[
|
||||
self._task_state.current_stream_generate_state.current_route_position:]
|
||||
self._task_state.current_stream_generate_state.current_route_position:
|
||||
]
|
||||
|
||||
for route_chunk in route_chunks:
|
||||
if route_chunk.type == 'text':
|
||||
@ -458,13 +519,14 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
|
||||
route_chunks = self._task_state.current_stream_generate_state.generate_route[
|
||||
self._task_state.current_stream_generate_state.current_route_position:]
|
||||
|
||||
|
||||
for route_chunk in route_chunks:
|
||||
if route_chunk.type == 'text':
|
||||
route_chunk = cast(TextGenerateRouteChunk, route_chunk)
|
||||
self._task_state.answer += route_chunk.text
|
||||
yield self._message_to_stream_response(route_chunk.text, self._message.id)
|
||||
else:
|
||||
value = None
|
||||
route_chunk = cast(VarGenerateRouteChunk, route_chunk)
|
||||
value_selector = route_chunk.value_selector
|
||||
if not value_selector:
|
||||
@ -476,6 +538,20 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
if route_chunk_node_id == 'sys':
|
||||
# system variable
|
||||
value = self._workflow_system_variables.get(SystemVariable.value_of(value_selector[1]))
|
||||
elif route_chunk_node_id in self._iteration_nested_relations:
|
||||
# it's a iteration variable
|
||||
if not self._iteration_state or route_chunk_node_id not in self._iteration_state.current_iterations:
|
||||
continue
|
||||
iteration_state = self._iteration_state.current_iterations[route_chunk_node_id]
|
||||
iterator = iteration_state.inputs
|
||||
if not iterator:
|
||||
continue
|
||||
iterator_selector = iterator.get('iterator_selector', [])
|
||||
if value_selector[1] == 'index':
|
||||
value = iteration_state.current_index
|
||||
elif value_selector[1] == 'item':
|
||||
value = iterator_selector[iteration_state.current_index] if iteration_state.current_index < len(
|
||||
iterator_selector) else None
|
||||
else:
|
||||
# check chunk node id is before current node id or equal to current node id
|
||||
if route_chunk_node_id not in self._task_state.ran_node_execution_infos:
|
||||
@ -505,7 +581,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
else:
|
||||
value = value.get(key)
|
||||
|
||||
if value:
|
||||
if value is not None:
|
||||
text = ''
|
||||
if isinstance(value, str | int | float):
|
||||
text = str(value)
|
||||
|
||||
@ -1,8 +1,11 @@
|
||||
from typing import Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
|
||||
from core.app.entities.queue_entities import (
|
||||
AppQueueEvent,
|
||||
QueueIterationCompletedEvent,
|
||||
QueueIterationNextEvent,
|
||||
QueueIterationStartEvent,
|
||||
QueueNodeFailedEvent,
|
||||
QueueNodeStartedEvent,
|
||||
QueueNodeSucceededEvent,
|
||||
@ -130,6 +133,66 @@ class WorkflowEventTriggerCallback(BaseWorkflowCallback):
|
||||
), PublishFrom.APPLICATION_MANAGER
|
||||
)
|
||||
|
||||
def on_workflow_iteration_started(self,
|
||||
node_id: str,
|
||||
node_type: NodeType,
|
||||
node_run_index: int = 1,
|
||||
node_data: Optional[BaseNodeData] = None,
|
||||
inputs: dict = None,
|
||||
predecessor_node_id: Optional[str] = None,
|
||||
metadata: Optional[dict] = None) -> None:
|
||||
"""
|
||||
Publish iteration started
|
||||
"""
|
||||
self._queue_manager.publish(
|
||||
QueueIterationStartEvent(
|
||||
node_id=node_id,
|
||||
node_type=node_type,
|
||||
node_run_index=node_run_index,
|
||||
node_data=node_data,
|
||||
inputs=inputs,
|
||||
predecessor_node_id=predecessor_node_id,
|
||||
metadata=metadata
|
||||
),
|
||||
PublishFrom.APPLICATION_MANAGER
|
||||
)
|
||||
|
||||
def on_workflow_iteration_next(self, node_id: str,
|
||||
node_type: NodeType,
|
||||
index: int,
|
||||
node_run_index: int,
|
||||
output: Optional[Any]) -> None:
|
||||
"""
|
||||
Publish iteration next
|
||||
"""
|
||||
self._queue_manager._publish(
|
||||
QueueIterationNextEvent(
|
||||
node_id=node_id,
|
||||
node_type=node_type,
|
||||
index=index,
|
||||
node_run_index=node_run_index,
|
||||
output=output
|
||||
),
|
||||
PublishFrom.APPLICATION_MANAGER
|
||||
)
|
||||
|
||||
def on_workflow_iteration_completed(self, node_id: str,
|
||||
node_type: NodeType,
|
||||
node_run_index: int,
|
||||
outputs: dict) -> None:
|
||||
"""
|
||||
Publish iteration completed
|
||||
"""
|
||||
self._queue_manager._publish(
|
||||
QueueIterationCompletedEvent(
|
||||
node_id=node_id,
|
||||
node_type=node_type,
|
||||
node_run_index=node_run_index,
|
||||
outputs=outputs
|
||||
),
|
||||
PublishFrom.APPLICATION_MANAGER
|
||||
)
|
||||
|
||||
def on_event(self, event: AppQueueEvent) -> None:
|
||||
"""
|
||||
Publish event
|
||||
|
||||
@ -83,7 +83,7 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
|
||||
)
|
||||
|
||||
# parse files
|
||||
files = args['files'] if 'files' in args and args['files'] else []
|
||||
files = args['files'] if args.get('files') else []
|
||||
message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id)
|
||||
file_extra_config = FileUploadConfigManager.convert(override_model_config_dict or app_model_config.to_dict())
|
||||
if file_extra_config:
|
||||
@ -115,7 +115,8 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
|
||||
user_id=user.id,
|
||||
stream=stream,
|
||||
invoke_from=invoke_from,
|
||||
extras=extras
|
||||
extras=extras,
|
||||
call_depth=0
|
||||
)
|
||||
|
||||
# init generate records
|
||||
|
||||
@ -13,7 +13,9 @@ class BaseAppGenerator:
|
||||
for variable_config in variables:
|
||||
variable = variable_config.variable
|
||||
|
||||
if variable not in user_inputs or not user_inputs[variable]:
|
||||
if (variable not in user_inputs
|
||||
or user_inputs[variable] is None
|
||||
or (isinstance(user_inputs[variable], str) and user_inputs[variable] == '')):
|
||||
if variable_config.required:
|
||||
raise ValueError(f"{variable} is required in input form")
|
||||
else:
|
||||
@ -22,7 +24,7 @@ class BaseAppGenerator:
|
||||
|
||||
value = user_inputs[variable]
|
||||
|
||||
if value:
|
||||
if value is not None:
|
||||
if variable_config.type != VariableEntity.Type.NUMBER and not isinstance(value, str):
|
||||
raise ValueError(f"{variable} in input form must be a string")
|
||||
elif variable_config.type == VariableEntity.Type.NUMBER and isinstance(value, str):
|
||||
@ -44,7 +46,7 @@ class BaseAppGenerator:
|
||||
if value and isinstance(value, str):
|
||||
filtered_inputs[variable] = value.replace('\x00', '')
|
||||
else:
|
||||
filtered_inputs[variable] = value if value else None
|
||||
filtered_inputs[variable] = value if value is not None else None
|
||||
|
||||
return filtered_inputs
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import time
|
||||
from collections.abc import Generator
|
||||
from typing import Optional, Union, cast
|
||||
from typing import Optional, Union
|
||||
|
||||
from core.app.app_config.entities import ExternalDataVariableEntity, PromptTemplateEntity
|
||||
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
|
||||
@ -16,11 +16,11 @@ from core.app.features.hosting_moderation.hosting_moderation import HostingModer
|
||||
from core.external_data_tool.external_data_fetch import ExternalDataFetch
|
||||
from core.file.file_obj import FileVar
|
||||
from core.memory.token_buffer_memory import TokenBufferMemory
|
||||
from core.model_manager import ModelInstance
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
|
||||
from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
|
||||
from core.model_runtime.entities.model_entities import ModelPropertyKey
|
||||
from core.model_runtime.errors.invoke import InvokeBadRequestError
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
from core.moderation.input_moderation import InputModeration
|
||||
from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
|
||||
from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfig
|
||||
@ -45,8 +45,11 @@ class AppRunner:
|
||||
:param query: query
|
||||
:return:
|
||||
"""
|
||||
model_type_instance = model_config.provider_model_bundle.model_type_instance
|
||||
model_type_instance = cast(LargeLanguageModel, model_type_instance)
|
||||
# Invoke model
|
||||
model_instance = ModelInstance(
|
||||
provider_model_bundle=model_config.provider_model_bundle,
|
||||
model=model_config.model
|
||||
)
|
||||
|
||||
model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
|
||||
|
||||
@ -73,9 +76,7 @@ class AppRunner:
|
||||
query=query
|
||||
)
|
||||
|
||||
prompt_tokens = model_type_instance.get_num_tokens(
|
||||
model_config.model,
|
||||
model_config.credentials,
|
||||
prompt_tokens = model_instance.get_llm_num_tokens(
|
||||
prompt_messages
|
||||
)
|
||||
|
||||
@ -89,8 +90,10 @@ class AppRunner:
|
||||
def recalc_llm_max_tokens(self, model_config: ModelConfigWithCredentialsEntity,
|
||||
prompt_messages: list[PromptMessage]):
|
||||
# recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
|
||||
model_type_instance = model_config.provider_model_bundle.model_type_instance
|
||||
model_type_instance = cast(LargeLanguageModel, model_type_instance)
|
||||
model_instance = ModelInstance(
|
||||
provider_model_bundle=model_config.provider_model_bundle,
|
||||
model=model_config.model
|
||||
)
|
||||
|
||||
model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
|
||||
|
||||
@ -107,9 +110,7 @@ class AppRunner:
|
||||
if max_tokens is None:
|
||||
max_tokens = 0
|
||||
|
||||
prompt_tokens = model_type_instance.get_num_tokens(
|
||||
model_config.model,
|
||||
model_config.credentials,
|
||||
prompt_tokens = model_instance.get_llm_num_tokens(
|
||||
prompt_messages
|
||||
)
|
||||
|
||||
|
||||
@ -80,7 +80,7 @@ class ChatAppGenerator(MessageBasedAppGenerator):
|
||||
)
|
||||
|
||||
# parse files
|
||||
files = args['files'] if 'files' in args and args['files'] else []
|
||||
files = args['files'] if args.get('files') else []
|
||||
message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id)
|
||||
file_extra_config = FileUploadConfigManager.convert(override_model_config_dict or app_model_config.to_dict())
|
||||
if file_extra_config:
|
||||
|
||||
@ -75,7 +75,7 @@ class CompletionAppGenerator(MessageBasedAppGenerator):
|
||||
)
|
||||
|
||||
# parse files
|
||||
files = args['files'] if 'files' in args and args['files'] else []
|
||||
files = args['files'] if args.get('files') else []
|
||||
message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id)
|
||||
file_extra_config = FileUploadConfigManager.convert(override_model_config_dict or app_model_config.to_dict())
|
||||
if file_extra_config:
|
||||
|
||||
@ -34,7 +34,8 @@ class WorkflowAppGenerator(BaseAppGenerator):
|
||||
user: Union[Account, EndUser],
|
||||
args: dict,
|
||||
invoke_from: InvokeFrom,
|
||||
stream: bool = True) \
|
||||
stream: bool = True,
|
||||
call_depth: int = 0) \
|
||||
-> Union[dict, Generator[dict, None, None]]:
|
||||
"""
|
||||
Generate App response.
|
||||
@ -49,7 +50,7 @@ class WorkflowAppGenerator(BaseAppGenerator):
|
||||
inputs = args['inputs']
|
||||
|
||||
# parse files
|
||||
files = args['files'] if 'files' in args and args['files'] else []
|
||||
files = args['files'] if args.get('files') else []
|
||||
message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id)
|
||||
file_extra_config = FileUploadConfigManager.convert(workflow.features_dict, is_vision=False)
|
||||
if file_extra_config:
|
||||
@ -75,9 +76,38 @@ class WorkflowAppGenerator(BaseAppGenerator):
|
||||
files=file_objs,
|
||||
user_id=user.id,
|
||||
stream=stream,
|
||||
invoke_from=invoke_from
|
||||
invoke_from=invoke_from,
|
||||
call_depth=call_depth
|
||||
)
|
||||
|
||||
return self._generate(
|
||||
app_model=app_model,
|
||||
workflow=workflow,
|
||||
user=user,
|
||||
application_generate_entity=application_generate_entity,
|
||||
invoke_from=invoke_from,
|
||||
stream=stream,
|
||||
call_depth=call_depth
|
||||
)
|
||||
|
||||
def _generate(self, app_model: App,
|
||||
workflow: Workflow,
|
||||
user: Union[Account, EndUser],
|
||||
application_generate_entity: WorkflowAppGenerateEntity,
|
||||
invoke_from: InvokeFrom,
|
||||
stream: bool = True,
|
||||
call_depth: int = 0) \
|
||||
-> Union[dict, Generator[dict, None, None]]:
|
||||
"""
|
||||
Generate App response.
|
||||
|
||||
:param app_model: App
|
||||
:param workflow: Workflow
|
||||
:param user: account or end user
|
||||
:param application_generate_entity: application generate entity
|
||||
:param invoke_from: invoke from source
|
||||
:param stream: is stream
|
||||
"""
|
||||
# init queue manager
|
||||
queue_manager = WorkflowAppQueueManager(
|
||||
task_id=application_generate_entity.task_id,
|
||||
@ -109,6 +139,64 @@ class WorkflowAppGenerator(BaseAppGenerator):
|
||||
invoke_from=invoke_from
|
||||
)
|
||||
|
||||
def single_iteration_generate(self, app_model: App,
|
||||
workflow: Workflow,
|
||||
node_id: str,
|
||||
user: Account,
|
||||
args: dict,
|
||||
stream: bool = True) \
|
||||
-> Union[dict, Generator[dict, None, None]]:
|
||||
"""
|
||||
Generate App response.
|
||||
|
||||
:param app_model: App
|
||||
:param workflow: Workflow
|
||||
:param user: account or end user
|
||||
:param args: request args
|
||||
:param invoke_from: invoke from source
|
||||
:param stream: is stream
|
||||
"""
|
||||
if not node_id:
|
||||
raise ValueError('node_id is required')
|
||||
|
||||
if args.get('inputs') is None:
|
||||
raise ValueError('inputs is required')
|
||||
|
||||
extras = {
|
||||
"auto_generate_conversation_name": False
|
||||
}
|
||||
|
||||
# convert to app config
|
||||
app_config = WorkflowAppConfigManager.get_app_config(
|
||||
app_model=app_model,
|
||||
workflow=workflow
|
||||
)
|
||||
|
||||
# init application generate entity
|
||||
application_generate_entity = WorkflowAppGenerateEntity(
|
||||
task_id=str(uuid.uuid4()),
|
||||
app_config=app_config,
|
||||
inputs={},
|
||||
files=[],
|
||||
user_id=user.id,
|
||||
stream=stream,
|
||||
invoke_from=InvokeFrom.DEBUGGER,
|
||||
extras=extras,
|
||||
single_iteration_run=WorkflowAppGenerateEntity.SingleIterationRunEntity(
|
||||
node_id=node_id,
|
||||
inputs=args['inputs']
|
||||
)
|
||||
)
|
||||
|
||||
return self._generate(
|
||||
app_model=app_model,
|
||||
workflow=workflow,
|
||||
user=user,
|
||||
invoke_from=InvokeFrom.DEBUGGER,
|
||||
application_generate_entity=application_generate_entity,
|
||||
stream=stream
|
||||
)
|
||||
|
||||
def _generate_worker(self, flask_app: Flask,
|
||||
application_generate_entity: WorkflowAppGenerateEntity,
|
||||
queue_manager: AppQueueManager) -> None:
|
||||
@ -123,10 +211,21 @@ class WorkflowAppGenerator(BaseAppGenerator):
|
||||
try:
|
||||
# workflow app
|
||||
runner = WorkflowAppRunner()
|
||||
runner.run(
|
||||
application_generate_entity=application_generate_entity,
|
||||
queue_manager=queue_manager
|
||||
)
|
||||
if application_generate_entity.single_iteration_run:
|
||||
single_iteration_run = application_generate_entity.single_iteration_run
|
||||
runner.single_iteration_run(
|
||||
app_id=application_generate_entity.app_config.app_id,
|
||||
workflow_id=application_generate_entity.app_config.workflow_id,
|
||||
queue_manager=queue_manager,
|
||||
inputs=single_iteration_run.inputs,
|
||||
node_id=single_iteration_run.node_id,
|
||||
user_id=application_generate_entity.user_id
|
||||
)
|
||||
else:
|
||||
runner.run(
|
||||
application_generate_entity=application_generate_entity,
|
||||
queue_manager=queue_manager
|
||||
)
|
||||
except GenerateTaskStoppedException:
|
||||
pass
|
||||
except InvokeAuthorizationError:
|
||||
|
||||
@ -73,11 +73,44 @@ class WorkflowAppRunner:
|
||||
user_from=UserFrom.ACCOUNT
|
||||
if application_generate_entity.invoke_from in [InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER]
|
||||
else UserFrom.END_USER,
|
||||
invoke_from=application_generate_entity.invoke_from,
|
||||
user_inputs=inputs,
|
||||
system_inputs={
|
||||
SystemVariable.FILES: files,
|
||||
SystemVariable.USER_ID: user_id
|
||||
},
|
||||
callbacks=workflow_callbacks,
|
||||
call_depth=application_generate_entity.call_depth
|
||||
)
|
||||
|
||||
def single_iteration_run(self, app_id: str, workflow_id: str,
|
||||
queue_manager: AppQueueManager,
|
||||
inputs: dict, node_id: str, user_id: str) -> None:
|
||||
"""
|
||||
Single iteration run
|
||||
"""
|
||||
app_record: App = db.session.query(App).filter(App.id == app_id).first()
|
||||
if not app_record:
|
||||
raise ValueError("App not found")
|
||||
|
||||
if not app_record.workflow_id:
|
||||
raise ValueError("Workflow not initialized")
|
||||
|
||||
workflow = self.get_workflow(app_model=app_record, workflow_id=workflow_id)
|
||||
if not workflow:
|
||||
raise ValueError("Workflow not initialized")
|
||||
|
||||
workflow_callbacks = [WorkflowEventTriggerCallback(
|
||||
queue_manager=queue_manager,
|
||||
workflow=workflow
|
||||
)]
|
||||
|
||||
workflow_engine_manager = WorkflowEngineManager()
|
||||
workflow_engine_manager.single_step_run_iteration_workflow_node(
|
||||
workflow=workflow,
|
||||
node_id=node_id,
|
||||
user_id=user_id,
|
||||
user_inputs=inputs,
|
||||
callbacks=workflow_callbacks
|
||||
)
|
||||
|
||||
|
||||
@ -5,6 +5,8 @@ from typing import cast
|
||||
from core.app.apps.base_app_generate_response_converter import AppGenerateResponseConverter
|
||||
from core.app.entities.task_entities import (
|
||||
ErrorStreamResponse,
|
||||
NodeFinishStreamResponse,
|
||||
NodeStartStreamResponse,
|
||||
PingStreamResponse,
|
||||
WorkflowAppBlockingResponse,
|
||||
WorkflowAppStreamResponse,
|
||||
@ -68,4 +70,24 @@ class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
:param stream_response: stream response
|
||||
:return:
|
||||
"""
|
||||
return cls.convert_stream_full_response(stream_response)
|
||||
for chunk in stream_response:
|
||||
chunk = cast(WorkflowAppStreamResponse, chunk)
|
||||
sub_stream_response = chunk.stream_response
|
||||
|
||||
if isinstance(sub_stream_response, PingStreamResponse):
|
||||
yield 'ping'
|
||||
continue
|
||||
|
||||
response_chunk = {
|
||||
'event': sub_stream_response.event.value,
|
||||
'workflow_run_id': chunk.workflow_run_id,
|
||||
}
|
||||
|
||||
if isinstance(sub_stream_response, ErrorStreamResponse):
|
||||
data = cls._error_to_stream_response(sub_stream_response.err)
|
||||
response_chunk.update(data)
|
||||
elif isinstance(sub_stream_response, NodeStartStreamResponse | NodeFinishStreamResponse):
|
||||
response_chunk.update(sub_stream_response.to_ignore_detail_dict())
|
||||
else:
|
||||
response_chunk.update(sub_stream_response.to_dict())
|
||||
yield json.dumps(response_chunk)
|
||||
|
||||
@ -9,6 +9,9 @@ from core.app.entities.app_invoke_entities import (
|
||||
)
|
||||
from core.app.entities.queue_entities import (
|
||||
QueueErrorEvent,
|
||||
QueueIterationCompletedEvent,
|
||||
QueueIterationNextEvent,
|
||||
QueueIterationStartEvent,
|
||||
QueueMessageReplaceEvent,
|
||||
QueueNodeFailedEvent,
|
||||
QueueNodeStartedEvent,
|
||||
@ -58,6 +61,7 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||
_task_state: WorkflowTaskState
|
||||
_application_generate_entity: WorkflowAppGenerateEntity
|
||||
_workflow_system_variables: dict[SystemVariable, Any]
|
||||
_iteration_nested_relations: dict[str, list[str]]
|
||||
|
||||
def __init__(self, application_generate_entity: WorkflowAppGenerateEntity,
|
||||
workflow: Workflow,
|
||||
@ -85,8 +89,11 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||
SystemVariable.USER_ID: user_id
|
||||
}
|
||||
|
||||
self._task_state = WorkflowTaskState()
|
||||
self._task_state = WorkflowTaskState(
|
||||
iteration_nested_node_ids=[]
|
||||
)
|
||||
self._stream_generate_nodes = self._get_stream_generate_nodes()
|
||||
self._iteration_nested_relations = self._get_iteration_nested_relations(self._workflow.graph_dict)
|
||||
|
||||
def process(self) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]:
|
||||
"""
|
||||
@ -191,6 +198,22 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
workflow_node_execution=workflow_node_execution
|
||||
)
|
||||
|
||||
if isinstance(event, QueueNodeFailedEvent):
|
||||
yield from self._handle_iteration_exception(
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
error=f'Child node failed: {event.error}'
|
||||
)
|
||||
elif isinstance(event, QueueIterationStartEvent | QueueIterationNextEvent | QueueIterationCompletedEvent):
|
||||
if isinstance(event, QueueIterationNextEvent):
|
||||
# clear ran node execution infos of current iteration
|
||||
iteration_relations = self._iteration_nested_relations.get(event.node_id)
|
||||
if iteration_relations:
|
||||
for node_id in iteration_relations:
|
||||
self._task_state.ran_node_execution_infos.pop(node_id, None)
|
||||
|
||||
yield self._handle_iteration_to_stream_response(self._application_generate_entity.task_id, event)
|
||||
self._handle_iteration_operation(event)
|
||||
elif isinstance(event, QueueStopEvent | QueueWorkflowSucceededEvent | QueueWorkflowFailedEvent):
|
||||
workflow_run = self._handle_workflow_finished(event)
|
||||
|
||||
@ -331,13 +354,20 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||
continue
|
||||
|
||||
node_type = source_node.get('data', {}).get('type')
|
||||
node_iteration_id = source_node.get('data', {}).get('iteration_id')
|
||||
iteration_start_node_id = None
|
||||
if node_iteration_id:
|
||||
iteration_node = next((node for node in nodes if node.get('id') == node_iteration_id), None)
|
||||
iteration_start_node_id = iteration_node.get('data', {}).get('start_node_id')
|
||||
|
||||
if node_type in [
|
||||
NodeType.IF_ELSE.value,
|
||||
NodeType.QUESTION_CLASSIFIER.value
|
||||
]:
|
||||
start_node_id = target_node_id
|
||||
start_node_ids.append(start_node_id)
|
||||
elif node_type == NodeType.START.value:
|
||||
elif node_type == NodeType.START.value or \
|
||||
node_iteration_id is not None and iteration_start_node_id == source_node.get('id'):
|
||||
start_node_id = source_node_id
|
||||
start_node_ids.append(start_node_id)
|
||||
else:
|
||||
@ -411,3 +441,24 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _get_iteration_nested_relations(self, graph: dict) -> dict[str, list[str]]:
|
||||
"""
|
||||
Get iteration nested relations.
|
||||
:param graph: graph
|
||||
:return:
|
||||
"""
|
||||
nodes = graph.get('nodes')
|
||||
|
||||
iteration_ids = [node.get('id') for node in nodes
|
||||
if node.get('data', {}).get('type') in [
|
||||
NodeType.ITERATION.value,
|
||||
NodeType.LOOP.value,
|
||||
]]
|
||||
|
||||
return {
|
||||
iteration_id: [
|
||||
node.get('id') for node in nodes if node.get('data', {}).get('iteration_id') == iteration_id
|
||||
] for iteration_id in iteration_ids
|
||||
}
|
||||
|
||||
@ -1,8 +1,11 @@
|
||||
from typing import Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
|
||||
from core.app.entities.queue_entities import (
|
||||
AppQueueEvent,
|
||||
QueueIterationCompletedEvent,
|
||||
QueueIterationNextEvent,
|
||||
QueueIterationStartEvent,
|
||||
QueueNodeFailedEvent,
|
||||
QueueNodeStartedEvent,
|
||||
QueueNodeSucceededEvent,
|
||||
@ -130,6 +133,66 @@ class WorkflowEventTriggerCallback(BaseWorkflowCallback):
|
||||
), PublishFrom.APPLICATION_MANAGER
|
||||
)
|
||||
|
||||
def on_workflow_iteration_started(self,
|
||||
node_id: str,
|
||||
node_type: NodeType,
|
||||
node_run_index: int = 1,
|
||||
node_data: Optional[BaseNodeData] = None,
|
||||
inputs: dict = None,
|
||||
predecessor_node_id: Optional[str] = None,
|
||||
metadata: Optional[dict] = None) -> None:
|
||||
"""
|
||||
Publish iteration started
|
||||
"""
|
||||
self._queue_manager.publish(
|
||||
QueueIterationStartEvent(
|
||||
node_id=node_id,
|
||||
node_type=node_type,
|
||||
node_run_index=node_run_index,
|
||||
node_data=node_data,
|
||||
inputs=inputs,
|
||||
predecessor_node_id=predecessor_node_id,
|
||||
metadata=metadata
|
||||
),
|
||||
PublishFrom.APPLICATION_MANAGER
|
||||
)
|
||||
|
||||
def on_workflow_iteration_next(self, node_id: str,
|
||||
node_type: NodeType,
|
||||
index: int,
|
||||
node_run_index: int,
|
||||
output: Optional[Any]) -> None:
|
||||
"""
|
||||
Publish iteration next
|
||||
"""
|
||||
self._queue_manager.publish(
|
||||
QueueIterationNextEvent(
|
||||
node_id=node_id,
|
||||
node_type=node_type,
|
||||
index=index,
|
||||
node_run_index=node_run_index,
|
||||
output=output
|
||||
),
|
||||
PublishFrom.APPLICATION_MANAGER
|
||||
)
|
||||
|
||||
def on_workflow_iteration_completed(self, node_id: str,
|
||||
node_type: NodeType,
|
||||
node_run_index: int,
|
||||
outputs: dict) -> None:
|
||||
"""
|
||||
Publish iteration completed
|
||||
"""
|
||||
self._queue_manager.publish(
|
||||
QueueIterationCompletedEvent(
|
||||
node_id=node_id,
|
||||
node_type=node_type,
|
||||
node_run_index=node_run_index,
|
||||
outputs=outputs
|
||||
),
|
||||
PublishFrom.APPLICATION_MANAGER
|
||||
)
|
||||
|
||||
def on_event(self, event: AppQueueEvent) -> None:
|
||||
"""
|
||||
Publish event
|
||||
|
||||
@ -102,6 +102,39 @@ class WorkflowLoggingCallback(BaseWorkflowCallback):
|
||||
|
||||
self.print_text(text, color="pink", end="")
|
||||
|
||||
def on_workflow_iteration_started(self,
|
||||
node_id: str,
|
||||
node_type: NodeType,
|
||||
node_run_index: int = 1,
|
||||
node_data: Optional[BaseNodeData] = None,
|
||||
inputs: dict = None,
|
||||
predecessor_node_id: Optional[str] = None,
|
||||
metadata: Optional[dict] = None) -> None:
|
||||
"""
|
||||
Publish iteration started
|
||||
"""
|
||||
self.print_text("\n[on_workflow_iteration_started]", color='blue')
|
||||
self.print_text(f"Node ID: {node_id}", color='blue')
|
||||
|
||||
def on_workflow_iteration_next(self, node_id: str,
|
||||
node_type: NodeType,
|
||||
index: int,
|
||||
node_run_index: int,
|
||||
output: Optional[dict]) -> None:
|
||||
"""
|
||||
Publish iteration next
|
||||
"""
|
||||
self.print_text("\n[on_workflow_iteration_next]", color='blue')
|
||||
|
||||
def on_workflow_iteration_completed(self, node_id: str,
|
||||
node_type: NodeType,
|
||||
node_run_index: int,
|
||||
outputs: dict) -> None:
|
||||
"""
|
||||
Publish iteration completed
|
||||
"""
|
||||
self.print_text("\n[on_workflow_iteration_completed]", color='blue')
|
||||
|
||||
def on_event(self, event: AppQueueEvent) -> None:
|
||||
"""
|
||||
Publish event
|
||||
|
||||
@ -80,6 +80,9 @@ class AppGenerateEntity(BaseModel):
|
||||
stream: bool
|
||||
invoke_from: InvokeFrom
|
||||
|
||||
# invoke call depth
|
||||
call_depth: int = 0
|
||||
|
||||
# extra parameters, like: auto_generate_conversation_name
|
||||
extras: dict[str, Any] = {}
|
||||
|
||||
@ -126,6 +129,14 @@ class AdvancedChatAppGenerateEntity(AppGenerateEntity):
|
||||
conversation_id: Optional[str] = None
|
||||
query: Optional[str] = None
|
||||
|
||||
class SingleIterationRunEntity(BaseModel):
|
||||
"""
|
||||
Single Iteration Run Entity.
|
||||
"""
|
||||
node_id: str
|
||||
inputs: dict
|
||||
|
||||
single_iteration_run: Optional[SingleIterationRunEntity] = None
|
||||
|
||||
class WorkflowAppGenerateEntity(AppGenerateEntity):
|
||||
"""
|
||||
@ -133,3 +144,12 @@ class WorkflowAppGenerateEntity(AppGenerateEntity):
|
||||
"""
|
||||
# app config
|
||||
app_config: WorkflowUIBasedAppConfig
|
||||
|
||||
class SingleIterationRunEntity(BaseModel):
|
||||
"""
|
||||
Single Iteration Run Entity.
|
||||
"""
|
||||
node_id: str
|
||||
inputs: dict
|
||||
|
||||
single_iteration_run: Optional[SingleIterationRunEntity] = None
|
||||
@ -1,7 +1,7 @@
|
||||
from enum import Enum
|
||||
from typing import Any, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, validator
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
|
||||
from core.workflow.entities.base_node_data_entities import BaseNodeData
|
||||
@ -21,6 +21,9 @@ class QueueEvent(Enum):
|
||||
WORKFLOW_STARTED = "workflow_started"
|
||||
WORKFLOW_SUCCEEDED = "workflow_succeeded"
|
||||
WORKFLOW_FAILED = "workflow_failed"
|
||||
ITERATION_START = "iteration_start"
|
||||
ITERATION_NEXT = "iteration_next"
|
||||
ITERATION_COMPLETED = "iteration_completed"
|
||||
NODE_STARTED = "node_started"
|
||||
NODE_SUCCEEDED = "node_succeeded"
|
||||
NODE_FAILED = "node_failed"
|
||||
@ -47,6 +50,55 @@ class QueueLLMChunkEvent(AppQueueEvent):
|
||||
event = QueueEvent.LLM_CHUNK
|
||||
chunk: LLMResultChunk
|
||||
|
||||
class QueueIterationStartEvent(AppQueueEvent):
|
||||
"""
|
||||
QueueIterationStartEvent entity
|
||||
"""
|
||||
event = QueueEvent.ITERATION_START
|
||||
node_id: str
|
||||
node_type: NodeType
|
||||
node_data: BaseNodeData
|
||||
|
||||
node_run_index: int
|
||||
inputs: dict = None
|
||||
predecessor_node_id: Optional[str] = None
|
||||
metadata: Optional[dict] = None
|
||||
|
||||
class QueueIterationNextEvent(AppQueueEvent):
|
||||
"""
|
||||
QueueIterationNextEvent entity
|
||||
"""
|
||||
event = QueueEvent.ITERATION_NEXT
|
||||
|
||||
index: int
|
||||
node_id: str
|
||||
node_type: NodeType
|
||||
|
||||
node_run_index: int
|
||||
output: Optional[Any] # output for the current iteration
|
||||
|
||||
@validator('output', pre=True, always=True)
|
||||
def set_output(cls, v):
|
||||
"""
|
||||
Set output
|
||||
"""
|
||||
if v is None:
|
||||
return None
|
||||
if isinstance(v, int | float | str | bool | dict | list):
|
||||
return v
|
||||
raise ValueError('output must be a valid type')
|
||||
|
||||
class QueueIterationCompletedEvent(AppQueueEvent):
|
||||
"""
|
||||
QueueIterationCompletedEvent entity
|
||||
"""
|
||||
event = QueueEvent.ITERATION_COMPLETED
|
||||
|
||||
node_id: str
|
||||
node_type: NodeType
|
||||
|
||||
node_run_index: int
|
||||
outputs: dict
|
||||
|
||||
class QueueTextChunkEvent(AppQueueEvent):
|
||||
"""
|
||||
|
||||
@ -1,12 +1,14 @@
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage
|
||||
from core.model_runtime.utils.encoders import jsonable_encoder
|
||||
from core.workflow.entities.base_node_data_entities import BaseNodeData
|
||||
from core.workflow.entities.node_entities import NodeType
|
||||
from core.workflow.nodes.answer.entities import GenerateRouteChunk
|
||||
from models.workflow import WorkflowNodeExecutionStatus
|
||||
|
||||
|
||||
class WorkflowStreamGenerateNodes(BaseModel):
|
||||
@ -65,6 +67,7 @@ class WorkflowTaskState(TaskState):
|
||||
|
||||
current_stream_generate_state: Optional[WorkflowStreamGenerateNodes] = None
|
||||
|
||||
iteration_nested_node_ids: list[str] = None
|
||||
|
||||
class AdvancedChatTaskState(WorkflowTaskState):
|
||||
"""
|
||||
@ -91,6 +94,9 @@ class StreamEvent(Enum):
|
||||
WORKFLOW_FINISHED = "workflow_finished"
|
||||
NODE_STARTED = "node_started"
|
||||
NODE_FINISHED = "node_finished"
|
||||
ITERATION_STARTED = "iteration_started"
|
||||
ITERATION_NEXT = "iteration_next"
|
||||
ITERATION_COMPLETED = "iteration_completed"
|
||||
TEXT_CHUNK = "text_chunk"
|
||||
TEXT_REPLACE = "text_replace"
|
||||
|
||||
@ -246,6 +252,24 @@ class NodeStartStreamResponse(StreamResponse):
|
||||
workflow_run_id: str
|
||||
data: Data
|
||||
|
||||
def to_ignore_detail_dict(self):
|
||||
return {
|
||||
"event": self.event.value,
|
||||
"task_id": self.task_id,
|
||||
"workflow_run_id": self.workflow_run_id,
|
||||
"data": {
|
||||
"id": self.data.id,
|
||||
"node_id": self.data.node_id,
|
||||
"node_type": self.data.node_type,
|
||||
"title": self.data.title,
|
||||
"index": self.data.index,
|
||||
"predecessor_node_id": self.data.predecessor_node_id,
|
||||
"inputs": None,
|
||||
"created_at": self.data.created_at,
|
||||
"extras": {}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class NodeFinishStreamResponse(StreamResponse):
|
||||
"""
|
||||
@ -276,6 +300,99 @@ class NodeFinishStreamResponse(StreamResponse):
|
||||
workflow_run_id: str
|
||||
data: Data
|
||||
|
||||
def to_ignore_detail_dict(self):
|
||||
return {
|
||||
"event": self.event.value,
|
||||
"task_id": self.task_id,
|
||||
"workflow_run_id": self.workflow_run_id,
|
||||
"data": {
|
||||
"id": self.data.id,
|
||||
"node_id": self.data.node_id,
|
||||
"node_type": self.data.node_type,
|
||||
"title": self.data.title,
|
||||
"index": self.data.index,
|
||||
"predecessor_node_id": self.data.predecessor_node_id,
|
||||
"inputs": None,
|
||||
"process_data": None,
|
||||
"outputs": None,
|
||||
"status": self.data.status,
|
||||
"error": None,
|
||||
"elapsed_time": self.data.elapsed_time,
|
||||
"execution_metadata": None,
|
||||
"created_at": self.data.created_at,
|
||||
"finished_at": self.data.finished_at,
|
||||
"files": []
|
||||
}
|
||||
}
|
||||
|
||||
class IterationNodeStartStreamResponse(StreamResponse):
|
||||
"""
|
||||
NodeStartStreamResponse entity
|
||||
"""
|
||||
class Data(BaseModel):
|
||||
"""
|
||||
Data entity
|
||||
"""
|
||||
id: str
|
||||
node_id: str
|
||||
node_type: str
|
||||
title: str
|
||||
created_at: int
|
||||
extras: dict = {}
|
||||
metadata: dict = {}
|
||||
inputs: dict = {}
|
||||
|
||||
event: StreamEvent = StreamEvent.ITERATION_STARTED
|
||||
workflow_run_id: str
|
||||
data: Data
|
||||
|
||||
class IterationNodeNextStreamResponse(StreamResponse):
|
||||
"""
|
||||
NodeStartStreamResponse entity
|
||||
"""
|
||||
class Data(BaseModel):
|
||||
"""
|
||||
Data entity
|
||||
"""
|
||||
id: str
|
||||
node_id: str
|
||||
node_type: str
|
||||
title: str
|
||||
index: int
|
||||
created_at: int
|
||||
pre_iteration_output: Optional[Any]
|
||||
extras: dict = {}
|
||||
|
||||
event: StreamEvent = StreamEvent.ITERATION_NEXT
|
||||
workflow_run_id: str
|
||||
data: Data
|
||||
|
||||
class IterationNodeCompletedStreamResponse(StreamResponse):
|
||||
"""
|
||||
NodeStartStreamResponse entity
|
||||
"""
|
||||
class Data(BaseModel):
|
||||
"""
|
||||
Data entity
|
||||
"""
|
||||
id: str
|
||||
node_id: str
|
||||
node_type: str
|
||||
title: str
|
||||
outputs: Optional[dict]
|
||||
created_at: int
|
||||
extras: dict = None
|
||||
inputs: dict = None
|
||||
status: WorkflowNodeExecutionStatus
|
||||
error: Optional[str]
|
||||
elapsed_time: float
|
||||
total_tokens: int
|
||||
finished_at: int
|
||||
steps: int
|
||||
|
||||
event: StreamEvent = StreamEvent.ITERATION_COMPLETED
|
||||
workflow_run_id: str
|
||||
data: Data
|
||||
|
||||
class TextChunkStreamResponse(StreamResponse):
|
||||
"""
|
||||
@ -411,3 +528,23 @@ class WorkflowAppBlockingResponse(AppBlockingResponse):
|
||||
|
||||
workflow_run_id: str
|
||||
data: Data
|
||||
|
||||
class WorkflowIterationState(BaseModel):
|
||||
"""
|
||||
WorkflowIterationState entity
|
||||
"""
|
||||
class Data(BaseModel):
|
||||
"""
|
||||
Data entity
|
||||
"""
|
||||
parent_iteration_id: Optional[str] = None
|
||||
iteration_id: str
|
||||
current_index: int
|
||||
iteration_steps_boundary: list[int] = None
|
||||
node_execution_id: str
|
||||
started_at: float
|
||||
inputs: dict = None
|
||||
total_tokens: int = 0
|
||||
node_data: BaseNodeData
|
||||
|
||||
current_iterations: dict[str, Data] = None
|
||||
@ -37,6 +37,7 @@ from core.app.entities.task_entities import (
|
||||
)
|
||||
from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline
|
||||
from core.app.task_pipeline.message_cycle_manage import MessageCycleManage
|
||||
from core.model_manager import ModelInstance
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
AssistantPromptMessage,
|
||||
@ -317,29 +318,30 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
|
||||
"""
|
||||
model_config = self._model_config
|
||||
model = model_config.model
|
||||
model_type_instance = model_config.provider_model_bundle.model_type_instance
|
||||
model_type_instance = cast(LargeLanguageModel, model_type_instance)
|
||||
|
||||
model_instance = ModelInstance(
|
||||
provider_model_bundle=model_config.provider_model_bundle,
|
||||
model=model_config.model
|
||||
)
|
||||
|
||||
# calculate num tokens
|
||||
prompt_tokens = 0
|
||||
if event.stopped_by != QueueStopEvent.StopBy.ANNOTATION_REPLY:
|
||||
prompt_tokens = model_type_instance.get_num_tokens(
|
||||
model,
|
||||
model_config.credentials,
|
||||
prompt_tokens = model_instance.get_llm_num_tokens(
|
||||
self._task_state.llm_result.prompt_messages
|
||||
)
|
||||
|
||||
completion_tokens = 0
|
||||
if event.stopped_by == QueueStopEvent.StopBy.USER_MANUAL:
|
||||
completion_tokens = model_type_instance.get_num_tokens(
|
||||
model,
|
||||
model_config.credentials,
|
||||
completion_tokens = model_instance.get_llm_num_tokens(
|
||||
[self._task_state.llm_result.message]
|
||||
)
|
||||
|
||||
credentials = model_config.credentials
|
||||
|
||||
# transform usage
|
||||
model_type_instance = model_config.provider_model_bundle.model_type_instance
|
||||
model_type_instance = cast(LargeLanguageModel, model_type_instance)
|
||||
self._task_state.llm_result.usage = model_type_instance._calc_response_usage(
|
||||
model,
|
||||
credentials,
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional, Union, cast
|
||||
from typing import Optional, Union, cast
|
||||
|
||||
from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom, WorkflowAppGenerateEntity
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||
from core.app.entities.queue_entities import (
|
||||
QueueNodeFailedEvent,
|
||||
QueueNodeStartedEvent,
|
||||
@ -13,18 +13,17 @@ from core.app.entities.queue_entities import (
|
||||
QueueWorkflowSucceededEvent,
|
||||
)
|
||||
from core.app.entities.task_entities import (
|
||||
AdvancedChatTaskState,
|
||||
NodeExecutionInfo,
|
||||
NodeFinishStreamResponse,
|
||||
NodeStartStreamResponse,
|
||||
WorkflowFinishStreamResponse,
|
||||
WorkflowStartStreamResponse,
|
||||
WorkflowTaskState,
|
||||
)
|
||||
from core.app.task_pipeline.workflow_iteration_cycle_manage import WorkflowIterationCycleManage
|
||||
from core.file.file_obj import FileVar
|
||||
from core.model_runtime.utils.encoders import jsonable_encoder
|
||||
from core.tools.tool_manager import ToolManager
|
||||
from core.workflow.entities.node_entities import NodeRunMetadataKey, NodeType, SystemVariable
|
||||
from core.workflow.entities.node_entities import NodeRunMetadataKey, NodeType
|
||||
from core.workflow.nodes.tool.entities import ToolNodeData
|
||||
from core.workflow.workflow_engine_manager import WorkflowEngineManager
|
||||
from extensions.ext_database import db
|
||||
@ -42,13 +41,7 @@ from models.workflow import (
|
||||
)
|
||||
|
||||
|
||||
class WorkflowCycleManage:
|
||||
_application_generate_entity: Union[AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity]
|
||||
_workflow: Workflow
|
||||
_user: Union[Account, EndUser]
|
||||
_task_state: Union[AdvancedChatTaskState, WorkflowTaskState]
|
||||
_workflow_system_variables: dict[SystemVariable, Any]
|
||||
|
||||
class WorkflowCycleManage(WorkflowIterationCycleManage):
|
||||
def _init_workflow_run(self, workflow: Workflow,
|
||||
triggered_from: WorkflowRunTriggeredFrom,
|
||||
user: Union[Account, EndUser],
|
||||
@ -237,6 +230,7 @@ class WorkflowCycleManage:
|
||||
inputs: Optional[dict] = None,
|
||||
process_data: Optional[dict] = None,
|
||||
outputs: Optional[dict] = None,
|
||||
execution_metadata: Optional[dict] = None
|
||||
) -> WorkflowNodeExecution:
|
||||
"""
|
||||
Workflow node execution failed
|
||||
@ -255,6 +249,8 @@ class WorkflowCycleManage:
|
||||
workflow_node_execution.inputs = json.dumps(inputs) if inputs else None
|
||||
workflow_node_execution.process_data = json.dumps(process_data) if process_data else None
|
||||
workflow_node_execution.outputs = json.dumps(outputs) if outputs else None
|
||||
workflow_node_execution.execution_metadata = json.dumps(jsonable_encoder(execution_metadata)) \
|
||||
if execution_metadata else None
|
||||
|
||||
db.session.commit()
|
||||
db.session.refresh(workflow_node_execution)
|
||||
@ -444,6 +440,23 @@ class WorkflowCycleManage:
|
||||
current_node_execution = self._task_state.ran_node_execution_infos[event.node_id]
|
||||
workflow_node_execution = db.session.query(WorkflowNodeExecution).filter(
|
||||
WorkflowNodeExecution.id == current_node_execution.workflow_node_execution_id).first()
|
||||
|
||||
execution_metadata = event.execution_metadata if isinstance(event, QueueNodeSucceededEvent) else None
|
||||
|
||||
if self._iteration_state and self._iteration_state.current_iterations:
|
||||
if not execution_metadata:
|
||||
execution_metadata = {}
|
||||
current_iteration_data = None
|
||||
for iteration_node_id in self._iteration_state.current_iterations:
|
||||
data = self._iteration_state.current_iterations[iteration_node_id]
|
||||
if data.parent_iteration_id == None:
|
||||
current_iteration_data = data
|
||||
break
|
||||
|
||||
if current_iteration_data:
|
||||
execution_metadata[NodeRunMetadataKey.ITERATION_ID] = current_iteration_data.iteration_id
|
||||
execution_metadata[NodeRunMetadataKey.ITERATION_INDEX] = current_iteration_data.current_index
|
||||
|
||||
if isinstance(event, QueueNodeSucceededEvent):
|
||||
workflow_node_execution = self._workflow_node_execution_success(
|
||||
workflow_node_execution=workflow_node_execution,
|
||||
@ -451,12 +464,18 @@ class WorkflowCycleManage:
|
||||
inputs=event.inputs,
|
||||
process_data=event.process_data,
|
||||
outputs=event.outputs,
|
||||
execution_metadata=event.execution_metadata
|
||||
execution_metadata=execution_metadata
|
||||
)
|
||||
|
||||
if event.execution_metadata and event.execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS):
|
||||
if execution_metadata and execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS):
|
||||
self._task_state.total_tokens += (
|
||||
int(event.execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS)))
|
||||
int(execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS)))
|
||||
|
||||
if self._iteration_state:
|
||||
for iteration_node_id in self._iteration_state.current_iterations:
|
||||
data = self._iteration_state.current_iterations[iteration_node_id]
|
||||
if execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS):
|
||||
data.total_tokens += int(execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS))
|
||||
|
||||
if workflow_node_execution.node_type == NodeType.LLM.value:
|
||||
outputs = workflow_node_execution.outputs_dict
|
||||
@ -469,7 +488,8 @@ class WorkflowCycleManage:
|
||||
error=event.error,
|
||||
inputs=event.inputs,
|
||||
process_data=event.process_data,
|
||||
outputs=event.outputs
|
||||
outputs=event.outputs,
|
||||
execution_metadata=execution_metadata
|
||||
)
|
||||
|
||||
db.session.close()
|
||||
|
||||
16
api/core/app/task_pipeline/workflow_cycle_state_manager.py
Normal file
16
api/core/app/task_pipeline/workflow_cycle_state_manager.py
Normal file
@ -0,0 +1,16 @@
|
||||
from typing import Any, Union
|
||||
|
||||
from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity
|
||||
from core.app.entities.task_entities import AdvancedChatTaskState, WorkflowTaskState
|
||||
from core.workflow.entities.node_entities import SystemVariable
|
||||
from models.account import Account
|
||||
from models.model import EndUser
|
||||
from models.workflow import Workflow
|
||||
|
||||
|
||||
class WorkflowCycleStateManager:
|
||||
_application_generate_entity: Union[AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity]
|
||||
_workflow: Workflow
|
||||
_user: Union[Account, EndUser]
|
||||
_task_state: Union[AdvancedChatTaskState, WorkflowTaskState]
|
||||
_workflow_system_variables: dict[SystemVariable, Any]
|
||||
281
api/core/app/task_pipeline/workflow_iteration_cycle_manage.py
Normal file
281
api/core/app/task_pipeline/workflow_iteration_cycle_manage.py
Normal file
@ -0,0 +1,281 @@
|
||||
import json
|
||||
import time
|
||||
from collections.abc import Generator
|
||||
from typing import Optional, Union
|
||||
|
||||
from core.app.entities.queue_entities import (
|
||||
QueueIterationCompletedEvent,
|
||||
QueueIterationNextEvent,
|
||||
QueueIterationStartEvent,
|
||||
)
|
||||
from core.app.entities.task_entities import (
|
||||
IterationNodeCompletedStreamResponse,
|
||||
IterationNodeNextStreamResponse,
|
||||
IterationNodeStartStreamResponse,
|
||||
NodeExecutionInfo,
|
||||
WorkflowIterationState,
|
||||
)
|
||||
from core.app.task_pipeline.workflow_cycle_state_manager import WorkflowCycleStateManager
|
||||
from core.workflow.entities.node_entities import NodeType
|
||||
from extensions.ext_database import db
|
||||
from models.workflow import (
|
||||
WorkflowNodeExecution,
|
||||
WorkflowNodeExecutionStatus,
|
||||
WorkflowNodeExecutionTriggeredFrom,
|
||||
WorkflowRun,
|
||||
)
|
||||
|
||||
|
||||
class WorkflowIterationCycleManage(WorkflowCycleStateManager):
|
||||
_iteration_state: WorkflowIterationState = None
|
||||
|
||||
def _init_iteration_state(self) -> WorkflowIterationState:
|
||||
if not self._iteration_state:
|
||||
self._iteration_state = WorkflowIterationState(
|
||||
current_iterations={}
|
||||
)
|
||||
|
||||
def _handle_iteration_to_stream_response(self, task_id: str, event: QueueIterationStartEvent | QueueIterationNextEvent | QueueIterationCompletedEvent) \
|
||||
-> Union[IterationNodeStartStreamResponse, IterationNodeNextStreamResponse, IterationNodeCompletedStreamResponse]:
|
||||
"""
|
||||
Handle iteration to stream response
|
||||
:param task_id: task id
|
||||
:param event: iteration event
|
||||
:return:
|
||||
"""
|
||||
if isinstance(event, QueueIterationStartEvent):
|
||||
return IterationNodeStartStreamResponse(
|
||||
task_id=task_id,
|
||||
workflow_run_id=self._task_state.workflow_run_id,
|
||||
data=IterationNodeStartStreamResponse.Data(
|
||||
id=event.node_id,
|
||||
node_id=event.node_id,
|
||||
node_type=event.node_type.value,
|
||||
title=event.node_data.title,
|
||||
created_at=int(time.time()),
|
||||
extras={},
|
||||
inputs=event.inputs,
|
||||
metadata=event.metadata
|
||||
)
|
||||
)
|
||||
elif isinstance(event, QueueIterationNextEvent):
|
||||
current_iteration = self._iteration_state.current_iterations[event.node_id]
|
||||
|
||||
return IterationNodeNextStreamResponse(
|
||||
task_id=task_id,
|
||||
workflow_run_id=self._task_state.workflow_run_id,
|
||||
data=IterationNodeNextStreamResponse.Data(
|
||||
id=event.node_id,
|
||||
node_id=event.node_id,
|
||||
node_type=event.node_type.value,
|
||||
title=current_iteration.node_data.title,
|
||||
index=event.index,
|
||||
pre_iteration_output=event.output,
|
||||
created_at=int(time.time()),
|
||||
extras={}
|
||||
)
|
||||
)
|
||||
elif isinstance(event, QueueIterationCompletedEvent):
|
||||
current_iteration = self._iteration_state.current_iterations[event.node_id]
|
||||
|
||||
return IterationNodeCompletedStreamResponse(
|
||||
task_id=task_id,
|
||||
workflow_run_id=self._task_state.workflow_run_id,
|
||||
data=IterationNodeCompletedStreamResponse.Data(
|
||||
id=event.node_id,
|
||||
node_id=event.node_id,
|
||||
node_type=event.node_type.value,
|
||||
title=current_iteration.node_data.title,
|
||||
outputs=event.outputs,
|
||||
created_at=int(time.time()),
|
||||
extras={},
|
||||
inputs=current_iteration.inputs,
|
||||
status=WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
error=None,
|
||||
elapsed_time=time.perf_counter() - current_iteration.started_at,
|
||||
total_tokens=current_iteration.total_tokens,
|
||||
finished_at=int(time.time()),
|
||||
steps=current_iteration.current_index
|
||||
)
|
||||
)
|
||||
|
||||
def _init_iteration_execution_from_workflow_run(self,
|
||||
workflow_run: WorkflowRun,
|
||||
node_id: str,
|
||||
node_type: NodeType,
|
||||
node_title: str,
|
||||
node_run_index: int = 1,
|
||||
inputs: Optional[dict] = None,
|
||||
predecessor_node_id: Optional[str] = None
|
||||
) -> WorkflowNodeExecution:
|
||||
workflow_node_execution = WorkflowNodeExecution(
|
||||
tenant_id=workflow_run.tenant_id,
|
||||
app_id=workflow_run.app_id,
|
||||
workflow_id=workflow_run.workflow_id,
|
||||
triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value,
|
||||
workflow_run_id=workflow_run.id,
|
||||
predecessor_node_id=predecessor_node_id,
|
||||
index=node_run_index,
|
||||
node_id=node_id,
|
||||
node_type=node_type.value,
|
||||
inputs=json.dumps(inputs) if inputs else None,
|
||||
title=node_title,
|
||||
status=WorkflowNodeExecutionStatus.RUNNING.value,
|
||||
created_by_role=workflow_run.created_by_role,
|
||||
created_by=workflow_run.created_by,
|
||||
execution_metadata=json.dumps({
|
||||
'started_run_index': node_run_index + 1,
|
||||
'current_index': 0,
|
||||
'steps_boundary': [],
|
||||
})
|
||||
)
|
||||
|
||||
db.session.add(workflow_node_execution)
|
||||
db.session.commit()
|
||||
db.session.refresh(workflow_node_execution)
|
||||
db.session.close()
|
||||
|
||||
return workflow_node_execution
|
||||
|
||||
def _handle_iteration_operation(self, event: QueueIterationStartEvent | QueueIterationNextEvent | QueueIterationCompletedEvent) -> WorkflowNodeExecution:
|
||||
if isinstance(event, QueueIterationStartEvent):
|
||||
return self._handle_iteration_started(event)
|
||||
elif isinstance(event, QueueIterationNextEvent):
|
||||
return self._handle_iteration_next(event)
|
||||
elif isinstance(event, QueueIterationCompletedEvent):
|
||||
return self._handle_iteration_completed(event)
|
||||
|
||||
def _handle_iteration_started(self, event: QueueIterationStartEvent) -> WorkflowNodeExecution:
|
||||
self._init_iteration_state()
|
||||
|
||||
workflow_run = db.session.query(WorkflowRun).filter(WorkflowRun.id == self._task_state.workflow_run_id).first()
|
||||
workflow_node_execution = self._init_iteration_execution_from_workflow_run(
|
||||
workflow_run=workflow_run,
|
||||
node_id=event.node_id,
|
||||
node_type=NodeType.ITERATION,
|
||||
node_title=event.node_data.title,
|
||||
node_run_index=event.node_run_index,
|
||||
inputs=event.inputs,
|
||||
predecessor_node_id=event.predecessor_node_id
|
||||
)
|
||||
|
||||
latest_node_execution_info = NodeExecutionInfo(
|
||||
workflow_node_execution_id=workflow_node_execution.id,
|
||||
node_type=NodeType.ITERATION,
|
||||
start_at=time.perf_counter()
|
||||
)
|
||||
|
||||
self._task_state.ran_node_execution_infos[event.node_id] = latest_node_execution_info
|
||||
self._task_state.latest_node_execution_info = latest_node_execution_info
|
||||
|
||||
self._iteration_state.current_iterations[event.node_id] = WorkflowIterationState.Data(
|
||||
parent_iteration_id=None,
|
||||
iteration_id=event.node_id,
|
||||
current_index=0,
|
||||
iteration_steps_boundary=[],
|
||||
node_execution_id=workflow_node_execution.id,
|
||||
started_at=time.perf_counter(),
|
||||
inputs=event.inputs,
|
||||
total_tokens=0,
|
||||
node_data=event.node_data
|
||||
)
|
||||
|
||||
db.session.close()
|
||||
|
||||
return workflow_node_execution
|
||||
|
||||
def _handle_iteration_next(self, event: QueueIterationNextEvent) -> WorkflowNodeExecution:
|
||||
if event.node_id not in self._iteration_state.current_iterations:
|
||||
return
|
||||
current_iteration = self._iteration_state.current_iterations[event.node_id]
|
||||
current_iteration.current_index = event.index
|
||||
current_iteration.iteration_steps_boundary.append(event.node_run_index)
|
||||
workflow_node_execution: WorkflowNodeExecution = db.session.query(WorkflowNodeExecution).filter(
|
||||
WorkflowNodeExecution.id == current_iteration.node_execution_id
|
||||
).first()
|
||||
|
||||
original_node_execution_metadata = workflow_node_execution.execution_metadata_dict
|
||||
if original_node_execution_metadata:
|
||||
original_node_execution_metadata['current_index'] = event.index
|
||||
original_node_execution_metadata['steps_boundary'] = current_iteration.iteration_steps_boundary
|
||||
original_node_execution_metadata['total_tokens'] = current_iteration.total_tokens
|
||||
workflow_node_execution.execution_metadata = json.dumps(original_node_execution_metadata)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
db.session.close()
|
||||
|
||||
def _handle_iteration_completed(self, event: QueueIterationCompletedEvent) -> WorkflowNodeExecution:
|
||||
if event.node_id not in self._iteration_state.current_iterations:
|
||||
return
|
||||
|
||||
current_iteration = self._iteration_state.current_iterations[event.node_id]
|
||||
workflow_node_execution: WorkflowNodeExecution = db.session.query(WorkflowNodeExecution).filter(
|
||||
WorkflowNodeExecution.id == current_iteration.node_execution_id
|
||||
).first()
|
||||
|
||||
workflow_node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED.value
|
||||
workflow_node_execution.outputs = json.dumps(event.outputs) if event.outputs else None
|
||||
workflow_node_execution.elapsed_time = time.perf_counter() - current_iteration.started_at
|
||||
|
||||
original_node_execution_metadata = workflow_node_execution.execution_metadata_dict
|
||||
if original_node_execution_metadata:
|
||||
original_node_execution_metadata['steps_boundary'] = current_iteration.iteration_steps_boundary
|
||||
original_node_execution_metadata['total_tokens'] = current_iteration.total_tokens
|
||||
workflow_node_execution.execution_metadata = json.dumps(original_node_execution_metadata)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# remove current iteration
|
||||
self._iteration_state.current_iterations.pop(event.node_id, None)
|
||||
|
||||
# set latest node execution info
|
||||
latest_node_execution_info = NodeExecutionInfo(
|
||||
workflow_node_execution_id=workflow_node_execution.id,
|
||||
node_type=NodeType.ITERATION,
|
||||
start_at=time.perf_counter()
|
||||
)
|
||||
|
||||
self._task_state.latest_node_execution_info = latest_node_execution_info
|
||||
|
||||
db.session.close()
|
||||
|
||||
def _handle_iteration_exception(self, task_id: str, error: str) -> Generator[IterationNodeCompletedStreamResponse, None, None]:
|
||||
"""
|
||||
Handle iteration exception
|
||||
"""
|
||||
if not self._iteration_state or not self._iteration_state.current_iterations:
|
||||
return
|
||||
|
||||
for node_id, current_iteration in self._iteration_state.current_iterations.items():
|
||||
workflow_node_execution: WorkflowNodeExecution = db.session.query(WorkflowNodeExecution).filter(
|
||||
WorkflowNodeExecution.id == current_iteration.node_execution_id
|
||||
).first()
|
||||
|
||||
workflow_node_execution.status = WorkflowNodeExecutionStatus.FAILED.value
|
||||
workflow_node_execution.error = error
|
||||
workflow_node_execution.elapsed_time = time.perf_counter() - current_iteration.started_at
|
||||
|
||||
db.session.commit()
|
||||
db.session.close()
|
||||
|
||||
yield IterationNodeCompletedStreamResponse(
|
||||
task_id=task_id,
|
||||
workflow_run_id=self._task_state.workflow_run_id,
|
||||
data=IterationNodeCompletedStreamResponse.Data(
|
||||
id=node_id,
|
||||
node_id=node_id,
|
||||
node_type=NodeType.ITERATION.value,
|
||||
title=current_iteration.node_data.title,
|
||||
outputs={},
|
||||
created_at=int(time.time()),
|
||||
extras={},
|
||||
inputs=current_iteration.inputs,
|
||||
status=WorkflowNodeExecutionStatus.FAILED,
|
||||
error=error,
|
||||
elapsed_time=time.perf_counter() - current_iteration.started_at,
|
||||
total_tokens=current_iteration.total_tokens,
|
||||
finished_at=int(time.time()),
|
||||
steps=current_iteration.current_index
|
||||
)
|
||||
)
|
||||
@ -16,6 +16,7 @@ class ModelStatus(Enum):
|
||||
NO_CONFIGURE = "no-configure"
|
||||
QUOTA_EXCEEDED = "quota-exceeded"
|
||||
NO_PERMISSION = "no-permission"
|
||||
DISABLED = "disabled"
|
||||
|
||||
|
||||
class SimpleModelProviderEntity(BaseModel):
|
||||
@ -43,12 +44,19 @@ class SimpleModelProviderEntity(BaseModel):
|
||||
)
|
||||
|
||||
|
||||
class ModelWithProviderEntity(ProviderModel):
|
||||
class ProviderModelWithStatusEntity(ProviderModel):
|
||||
"""
|
||||
Model class for model response.
|
||||
"""
|
||||
status: ModelStatus
|
||||
load_balancing_enabled: bool = False
|
||||
|
||||
|
||||
class ModelWithProviderEntity(ProviderModelWithStatusEntity):
|
||||
"""
|
||||
Model with provider entity.
|
||||
"""
|
||||
provider: SimpleModelProviderEntity
|
||||
status: ModelStatus
|
||||
|
||||
|
||||
class DefaultModelProviderEntity(BaseModel):
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from collections.abc import Iterator
|
||||
from json import JSONDecodeError
|
||||
from typing import Optional
|
||||
@ -8,7 +9,12 @@ from typing import Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.entities.model_entities import ModelStatus, ModelWithProviderEntity, SimpleModelProviderEntity
|
||||
from core.entities.provider_entities import CustomConfiguration, SystemConfiguration, SystemConfigurationStatus
|
||||
from core.entities.provider_entities import (
|
||||
CustomConfiguration,
|
||||
ModelSettings,
|
||||
SystemConfiguration,
|
||||
SystemConfigurationStatus,
|
||||
)
|
||||
from core.helper import encrypter
|
||||
from core.helper.model_provider_cache import ProviderCredentialsCache, ProviderCredentialsCacheType
|
||||
from core.model_runtime.entities.model_entities import FetchFrom, ModelType
|
||||
@ -22,7 +28,14 @@ from core.model_runtime.model_providers import model_provider_factory
|
||||
from core.model_runtime.model_providers.__base.ai_model import AIModel
|
||||
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
|
||||
from extensions.ext_database import db
|
||||
from models.provider import Provider, ProviderModel, ProviderType, TenantPreferredModelProvider
|
||||
from models.provider import (
|
||||
LoadBalancingModelConfig,
|
||||
Provider,
|
||||
ProviderModel,
|
||||
ProviderModelSetting,
|
||||
ProviderType,
|
||||
TenantPreferredModelProvider,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -39,6 +52,7 @@ class ProviderConfiguration(BaseModel):
|
||||
using_provider_type: ProviderType
|
||||
system_configuration: SystemConfiguration
|
||||
custom_configuration: CustomConfiguration
|
||||
model_settings: list[ModelSettings]
|
||||
|
||||
def __init__(self, **data):
|
||||
super().__init__(**data)
|
||||
@ -62,6 +76,14 @@ class ProviderConfiguration(BaseModel):
|
||||
:param model: model name
|
||||
:return:
|
||||
"""
|
||||
if self.model_settings:
|
||||
# check if model is disabled by admin
|
||||
for model_setting in self.model_settings:
|
||||
if (model_setting.model_type == model_type
|
||||
and model_setting.model == model):
|
||||
if not model_setting.enabled:
|
||||
raise ValueError(f'Model {model} is disabled.')
|
||||
|
||||
if self.using_provider_type == ProviderType.SYSTEM:
|
||||
restrict_models = []
|
||||
for quota_configuration in self.system_configuration.quota_configurations:
|
||||
@ -80,15 +102,17 @@ class ProviderConfiguration(BaseModel):
|
||||
|
||||
return copy_credentials
|
||||
else:
|
||||
credentials = None
|
||||
if self.custom_configuration.models:
|
||||
for model_configuration in self.custom_configuration.models:
|
||||
if model_configuration.model_type == model_type and model_configuration.model == model:
|
||||
return model_configuration.credentials
|
||||
credentials = model_configuration.credentials
|
||||
break
|
||||
|
||||
if self.custom_configuration.provider:
|
||||
return self.custom_configuration.provider.credentials
|
||||
else:
|
||||
return None
|
||||
credentials = self.custom_configuration.provider.credentials
|
||||
|
||||
return credentials
|
||||
|
||||
def get_system_configuration_status(self) -> SystemConfigurationStatus:
|
||||
"""
|
||||
@ -130,7 +154,7 @@ class ProviderConfiguration(BaseModel):
|
||||
return credentials
|
||||
|
||||
# Obfuscate credentials
|
||||
return self._obfuscated_credentials(
|
||||
return self.obfuscated_credentials(
|
||||
credentials=credentials,
|
||||
credential_form_schemas=self.provider.provider_credential_schema.credential_form_schemas
|
||||
if self.provider.provider_credential_schema else []
|
||||
@ -151,7 +175,7 @@ class ProviderConfiguration(BaseModel):
|
||||
).first()
|
||||
|
||||
# Get provider credential secret variables
|
||||
provider_credential_secret_variables = self._extract_secret_variables(
|
||||
provider_credential_secret_variables = self.extract_secret_variables(
|
||||
self.provider.provider_credential_schema.credential_form_schemas
|
||||
if self.provider.provider_credential_schema else []
|
||||
)
|
||||
@ -274,7 +298,7 @@ class ProviderConfiguration(BaseModel):
|
||||
return credentials
|
||||
|
||||
# Obfuscate credentials
|
||||
return self._obfuscated_credentials(
|
||||
return self.obfuscated_credentials(
|
||||
credentials=credentials,
|
||||
credential_form_schemas=self.provider.model_credential_schema.credential_form_schemas
|
||||
if self.provider.model_credential_schema else []
|
||||
@ -302,7 +326,7 @@ class ProviderConfiguration(BaseModel):
|
||||
).first()
|
||||
|
||||
# Get provider credential secret variables
|
||||
provider_credential_secret_variables = self._extract_secret_variables(
|
||||
provider_credential_secret_variables = self.extract_secret_variables(
|
||||
self.provider.model_credential_schema.credential_form_schemas
|
||||
if self.provider.model_credential_schema else []
|
||||
)
|
||||
@ -402,6 +426,160 @@ class ProviderConfiguration(BaseModel):
|
||||
|
||||
provider_model_credentials_cache.delete()
|
||||
|
||||
def enable_model(self, model_type: ModelType, model: str) -> ProviderModelSetting:
|
||||
"""
|
||||
Enable model.
|
||||
:param model_type: model type
|
||||
:param model: model name
|
||||
:return:
|
||||
"""
|
||||
model_setting = db.session.query(ProviderModelSetting) \
|
||||
.filter(
|
||||
ProviderModelSetting.tenant_id == self.tenant_id,
|
||||
ProviderModelSetting.provider_name == self.provider.provider,
|
||||
ProviderModelSetting.model_type == model_type.to_origin_model_type(),
|
||||
ProviderModelSetting.model_name == model
|
||||
).first()
|
||||
|
||||
if model_setting:
|
||||
model_setting.enabled = True
|
||||
model_setting.updated_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
else:
|
||||
model_setting = ProviderModelSetting(
|
||||
tenant_id=self.tenant_id,
|
||||
provider_name=self.provider.provider,
|
||||
model_type=model_type.to_origin_model_type(),
|
||||
model_name=model,
|
||||
enabled=True
|
||||
)
|
||||
db.session.add(model_setting)
|
||||
db.session.commit()
|
||||
|
||||
return model_setting
|
||||
|
||||
def disable_model(self, model_type: ModelType, model: str) -> ProviderModelSetting:
|
||||
"""
|
||||
Disable model.
|
||||
:param model_type: model type
|
||||
:param model: model name
|
||||
:return:
|
||||
"""
|
||||
model_setting = db.session.query(ProviderModelSetting) \
|
||||
.filter(
|
||||
ProviderModelSetting.tenant_id == self.tenant_id,
|
||||
ProviderModelSetting.provider_name == self.provider.provider,
|
||||
ProviderModelSetting.model_type == model_type.to_origin_model_type(),
|
||||
ProviderModelSetting.model_name == model
|
||||
).first()
|
||||
|
||||
if model_setting:
|
||||
model_setting.enabled = False
|
||||
model_setting.updated_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
else:
|
||||
model_setting = ProviderModelSetting(
|
||||
tenant_id=self.tenant_id,
|
||||
provider_name=self.provider.provider,
|
||||
model_type=model_type.to_origin_model_type(),
|
||||
model_name=model,
|
||||
enabled=False
|
||||
)
|
||||
db.session.add(model_setting)
|
||||
db.session.commit()
|
||||
|
||||
return model_setting
|
||||
|
||||
def get_provider_model_setting(self, model_type: ModelType, model: str) -> Optional[ProviderModelSetting]:
|
||||
"""
|
||||
Get provider model setting.
|
||||
:param model_type: model type
|
||||
:param model: model name
|
||||
:return:
|
||||
"""
|
||||
return db.session.query(ProviderModelSetting) \
|
||||
.filter(
|
||||
ProviderModelSetting.tenant_id == self.tenant_id,
|
||||
ProviderModelSetting.provider_name == self.provider.provider,
|
||||
ProviderModelSetting.model_type == model_type.to_origin_model_type(),
|
||||
ProviderModelSetting.model_name == model
|
||||
).first()
|
||||
|
||||
def enable_model_load_balancing(self, model_type: ModelType, model: str) -> ProviderModelSetting:
|
||||
"""
|
||||
Enable model load balancing.
|
||||
:param model_type: model type
|
||||
:param model: model name
|
||||
:return:
|
||||
"""
|
||||
load_balancing_config_count = db.session.query(LoadBalancingModelConfig) \
|
||||
.filter(
|
||||
LoadBalancingModelConfig.tenant_id == self.tenant_id,
|
||||
LoadBalancingModelConfig.provider_name == self.provider.provider,
|
||||
LoadBalancingModelConfig.model_type == model_type.to_origin_model_type(),
|
||||
LoadBalancingModelConfig.model_name == model
|
||||
).count()
|
||||
|
||||
if load_balancing_config_count <= 1:
|
||||
raise ValueError('Model load balancing configuration must be more than 1.')
|
||||
|
||||
model_setting = db.session.query(ProviderModelSetting) \
|
||||
.filter(
|
||||
ProviderModelSetting.tenant_id == self.tenant_id,
|
||||
ProviderModelSetting.provider_name == self.provider.provider,
|
||||
ProviderModelSetting.model_type == model_type.to_origin_model_type(),
|
||||
ProviderModelSetting.model_name == model
|
||||
).first()
|
||||
|
||||
if model_setting:
|
||||
model_setting.load_balancing_enabled = True
|
||||
model_setting.updated_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
else:
|
||||
model_setting = ProviderModelSetting(
|
||||
tenant_id=self.tenant_id,
|
||||
provider_name=self.provider.provider,
|
||||
model_type=model_type.to_origin_model_type(),
|
||||
model_name=model,
|
||||
load_balancing_enabled=True
|
||||
)
|
||||
db.session.add(model_setting)
|
||||
db.session.commit()
|
||||
|
||||
return model_setting
|
||||
|
||||
def disable_model_load_balancing(self, model_type: ModelType, model: str) -> ProviderModelSetting:
|
||||
"""
|
||||
Disable model load balancing.
|
||||
:param model_type: model type
|
||||
:param model: model name
|
||||
:return:
|
||||
"""
|
||||
model_setting = db.session.query(ProviderModelSetting) \
|
||||
.filter(
|
||||
ProviderModelSetting.tenant_id == self.tenant_id,
|
||||
ProviderModelSetting.provider_name == self.provider.provider,
|
||||
ProviderModelSetting.model_type == model_type.to_origin_model_type(),
|
||||
ProviderModelSetting.model_name == model
|
||||
).first()
|
||||
|
||||
if model_setting:
|
||||
model_setting.load_balancing_enabled = False
|
||||
model_setting.updated_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
else:
|
||||
model_setting = ProviderModelSetting(
|
||||
tenant_id=self.tenant_id,
|
||||
provider_name=self.provider.provider,
|
||||
model_type=model_type.to_origin_model_type(),
|
||||
model_name=model,
|
||||
load_balancing_enabled=False
|
||||
)
|
||||
db.session.add(model_setting)
|
||||
db.session.commit()
|
||||
|
||||
return model_setting
|
||||
|
||||
def get_provider_instance(self) -> ModelProvider:
|
||||
"""
|
||||
Get provider instance.
|
||||
@ -453,7 +631,7 @@ class ProviderConfiguration(BaseModel):
|
||||
|
||||
db.session.commit()
|
||||
|
||||
def _extract_secret_variables(self, credential_form_schemas: list[CredentialFormSchema]) -> list[str]:
|
||||
def extract_secret_variables(self, credential_form_schemas: list[CredentialFormSchema]) -> list[str]:
|
||||
"""
|
||||
Extract secret input form variables.
|
||||
|
||||
@ -467,7 +645,7 @@ class ProviderConfiguration(BaseModel):
|
||||
|
||||
return secret_input_form_variables
|
||||
|
||||
def _obfuscated_credentials(self, credentials: dict, credential_form_schemas: list[CredentialFormSchema]) -> dict:
|
||||
def obfuscated_credentials(self, credentials: dict, credential_form_schemas: list[CredentialFormSchema]) -> dict:
|
||||
"""
|
||||
Obfuscated credentials.
|
||||
|
||||
@ -476,7 +654,7 @@ class ProviderConfiguration(BaseModel):
|
||||
:return:
|
||||
"""
|
||||
# Get provider credential secret variables
|
||||
credential_secret_variables = self._extract_secret_variables(
|
||||
credential_secret_variables = self.extract_secret_variables(
|
||||
credential_form_schemas
|
||||
)
|
||||
|
||||
@ -522,15 +700,22 @@ class ProviderConfiguration(BaseModel):
|
||||
else:
|
||||
model_types = provider_instance.get_provider_schema().supported_model_types
|
||||
|
||||
# Group model settings by model type and model
|
||||
model_setting_map = defaultdict(dict)
|
||||
for model_setting in self.model_settings:
|
||||
model_setting_map[model_setting.model_type][model_setting.model] = model_setting
|
||||
|
||||
if self.using_provider_type == ProviderType.SYSTEM:
|
||||
provider_models = self._get_system_provider_models(
|
||||
model_types=model_types,
|
||||
provider_instance=provider_instance
|
||||
provider_instance=provider_instance,
|
||||
model_setting_map=model_setting_map
|
||||
)
|
||||
else:
|
||||
provider_models = self._get_custom_provider_models(
|
||||
model_types=model_types,
|
||||
provider_instance=provider_instance
|
||||
provider_instance=provider_instance,
|
||||
model_setting_map=model_setting_map
|
||||
)
|
||||
|
||||
if only_active:
|
||||
@ -541,18 +726,27 @@ class ProviderConfiguration(BaseModel):
|
||||
|
||||
def _get_system_provider_models(self,
|
||||
model_types: list[ModelType],
|
||||
provider_instance: ModelProvider) -> list[ModelWithProviderEntity]:
|
||||
provider_instance: ModelProvider,
|
||||
model_setting_map: dict[ModelType, dict[str, ModelSettings]]) \
|
||||
-> list[ModelWithProviderEntity]:
|
||||
"""
|
||||
Get system provider models.
|
||||
|
||||
:param model_types: model types
|
||||
:param provider_instance: provider instance
|
||||
:param model_setting_map: model setting map
|
||||
:return:
|
||||
"""
|
||||
provider_models = []
|
||||
for model_type in model_types:
|
||||
provider_models.extend(
|
||||
[
|
||||
for m in provider_instance.models(model_type):
|
||||
status = ModelStatus.ACTIVE
|
||||
if m.model_type in model_setting_map and m.model in model_setting_map[m.model_type]:
|
||||
model_setting = model_setting_map[m.model_type][m.model]
|
||||
if model_setting.enabled is False:
|
||||
status = ModelStatus.DISABLED
|
||||
|
||||
provider_models.append(
|
||||
ModelWithProviderEntity(
|
||||
model=m.model,
|
||||
label=m.label,
|
||||
@ -562,11 +756,9 @@ class ProviderConfiguration(BaseModel):
|
||||
model_properties=m.model_properties,
|
||||
deprecated=m.deprecated,
|
||||
provider=SimpleModelProviderEntity(self.provider),
|
||||
status=ModelStatus.ACTIVE
|
||||
status=status
|
||||
)
|
||||
for m in provider_instance.models(model_type)
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
if self.provider.provider not in original_provider_configurate_methods:
|
||||
original_provider_configurate_methods[self.provider.provider] = []
|
||||
@ -586,7 +778,8 @@ class ProviderConfiguration(BaseModel):
|
||||
break
|
||||
|
||||
if should_use_custom_model:
|
||||
if original_provider_configurate_methods[self.provider.provider] == [ConfigurateMethod.CUSTOMIZABLE_MODEL]:
|
||||
if original_provider_configurate_methods[self.provider.provider] == [
|
||||
ConfigurateMethod.CUSTOMIZABLE_MODEL]:
|
||||
# only customizable model
|
||||
for restrict_model in restrict_models:
|
||||
copy_credentials = self.system_configuration.credentials.copy()
|
||||
@ -611,6 +804,13 @@ class ProviderConfiguration(BaseModel):
|
||||
if custom_model_schema.model_type not in model_types:
|
||||
continue
|
||||
|
||||
status = ModelStatus.ACTIVE
|
||||
if (custom_model_schema.model_type in model_setting_map
|
||||
and custom_model_schema.model in model_setting_map[custom_model_schema.model_type]):
|
||||
model_setting = model_setting_map[custom_model_schema.model_type][custom_model_schema.model]
|
||||
if model_setting.enabled is False:
|
||||
status = ModelStatus.DISABLED
|
||||
|
||||
provider_models.append(
|
||||
ModelWithProviderEntity(
|
||||
model=custom_model_schema.model,
|
||||
@ -621,7 +821,7 @@ class ProviderConfiguration(BaseModel):
|
||||
model_properties=custom_model_schema.model_properties,
|
||||
deprecated=custom_model_schema.deprecated,
|
||||
provider=SimpleModelProviderEntity(self.provider),
|
||||
status=ModelStatus.ACTIVE
|
||||
status=status
|
||||
)
|
||||
)
|
||||
|
||||
@ -632,16 +832,20 @@ class ProviderConfiguration(BaseModel):
|
||||
m.status = ModelStatus.NO_PERMISSION
|
||||
elif not quota_configuration.is_valid:
|
||||
m.status = ModelStatus.QUOTA_EXCEEDED
|
||||
|
||||
return provider_models
|
||||
|
||||
def _get_custom_provider_models(self,
|
||||
model_types: list[ModelType],
|
||||
provider_instance: ModelProvider) -> list[ModelWithProviderEntity]:
|
||||
provider_instance: ModelProvider,
|
||||
model_setting_map: dict[ModelType, dict[str, ModelSettings]]) \
|
||||
-> list[ModelWithProviderEntity]:
|
||||
"""
|
||||
Get custom provider models.
|
||||
|
||||
:param model_types: model types
|
||||
:param provider_instance: provider instance
|
||||
:param model_setting_map: model setting map
|
||||
:return:
|
||||
"""
|
||||
provider_models = []
|
||||
@ -656,6 +860,16 @@ class ProviderConfiguration(BaseModel):
|
||||
|
||||
models = provider_instance.models(model_type)
|
||||
for m in models:
|
||||
status = ModelStatus.ACTIVE if credentials else ModelStatus.NO_CONFIGURE
|
||||
load_balancing_enabled = False
|
||||
if m.model_type in model_setting_map and m.model in model_setting_map[m.model_type]:
|
||||
model_setting = model_setting_map[m.model_type][m.model]
|
||||
if model_setting.enabled is False:
|
||||
status = ModelStatus.DISABLED
|
||||
|
||||
if len(model_setting.load_balancing_configs) > 1:
|
||||
load_balancing_enabled = True
|
||||
|
||||
provider_models.append(
|
||||
ModelWithProviderEntity(
|
||||
model=m.model,
|
||||
@ -666,7 +880,8 @@ class ProviderConfiguration(BaseModel):
|
||||
model_properties=m.model_properties,
|
||||
deprecated=m.deprecated,
|
||||
provider=SimpleModelProviderEntity(self.provider),
|
||||
status=ModelStatus.ACTIVE if credentials else ModelStatus.NO_CONFIGURE
|
||||
status=status,
|
||||
load_balancing_enabled=load_balancing_enabled
|
||||
)
|
||||
)
|
||||
|
||||
@ -690,6 +905,17 @@ class ProviderConfiguration(BaseModel):
|
||||
if not custom_model_schema:
|
||||
continue
|
||||
|
||||
status = ModelStatus.ACTIVE
|
||||
load_balancing_enabled = False
|
||||
if (custom_model_schema.model_type in model_setting_map
|
||||
and custom_model_schema.model in model_setting_map[custom_model_schema.model_type]):
|
||||
model_setting = model_setting_map[custom_model_schema.model_type][custom_model_schema.model]
|
||||
if model_setting.enabled is False:
|
||||
status = ModelStatus.DISABLED
|
||||
|
||||
if len(model_setting.load_balancing_configs) > 1:
|
||||
load_balancing_enabled = True
|
||||
|
||||
provider_models.append(
|
||||
ModelWithProviderEntity(
|
||||
model=custom_model_schema.model,
|
||||
@ -700,7 +926,8 @@ class ProviderConfiguration(BaseModel):
|
||||
model_properties=custom_model_schema.model_properties,
|
||||
deprecated=custom_model_schema.deprecated,
|
||||
provider=SimpleModelProviderEntity(self.provider),
|
||||
status=ModelStatus.ACTIVE
|
||||
status=status,
|
||||
load_balancing_enabled=load_balancing_enabled
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@ -72,3 +72,22 @@ class CustomConfiguration(BaseModel):
|
||||
"""
|
||||
provider: Optional[CustomProviderConfiguration] = None
|
||||
models: list[CustomModelConfiguration] = []
|
||||
|
||||
|
||||
class ModelLoadBalancingConfiguration(BaseModel):
|
||||
"""
|
||||
Class for model load balancing configuration.
|
||||
"""
|
||||
id: str
|
||||
name: str
|
||||
credentials: dict
|
||||
|
||||
|
||||
class ModelSettings(BaseModel):
|
||||
"""
|
||||
Model class for model settings.
|
||||
"""
|
||||
model: str
|
||||
model_type: ModelType
|
||||
enabled: bool = True
|
||||
load_balancing_configs: list[ModelLoadBalancingConfiguration] = []
|
||||
|
||||
@ -7,7 +7,7 @@ from typing import Any, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.utils.position_helper import sort_to_dict_by_position_map
|
||||
from core.helper.position_helper import sort_to_dict_by_position_map
|
||||
|
||||
|
||||
class ExtensionModule(enum.Enum):
|
||||
|
||||
@ -42,6 +42,8 @@ class MessageFileParser:
|
||||
raise ValueError('Invalid file url')
|
||||
if file.get('transfer_method') == FileTransferMethod.LOCAL_FILE.value and not file.get('upload_file_id'):
|
||||
raise ValueError('Missing file upload_file_id')
|
||||
if file.get('transform_method') == FileTransferMethod.TOOL_FILE.value and not file.get('tool_file_id'):
|
||||
raise ValueError('Missing file tool_file_id')
|
||||
|
||||
# transform files to file objs
|
||||
type_file_objs = self._to_file_objs(files, file_extra_config)
|
||||
@ -149,12 +151,21 @@ class MessageFileParser:
|
||||
"""
|
||||
if isinstance(file, dict):
|
||||
transfer_method = FileTransferMethod.value_of(file.get('transfer_method'))
|
||||
if transfer_method != FileTransferMethod.TOOL_FILE:
|
||||
return FileVar(
|
||||
tenant_id=self.tenant_id,
|
||||
type=FileType.value_of(file.get('type')),
|
||||
transfer_method=transfer_method,
|
||||
url=file.get('url') if transfer_method == FileTransferMethod.REMOTE_URL else None,
|
||||
related_id=file.get('upload_file_id') if transfer_method == FileTransferMethod.LOCAL_FILE else None,
|
||||
extra_config=file_extra_config
|
||||
)
|
||||
return FileVar(
|
||||
tenant_id=self.tenant_id,
|
||||
type=FileType.value_of(file.get('type')),
|
||||
transfer_method=transfer_method,
|
||||
url=file.get('url') if transfer_method == FileTransferMethod.REMOTE_URL else None,
|
||||
related_id=file.get('upload_file_id') if transfer_method == FileTransferMethod.LOCAL_FILE else None,
|
||||
url=None,
|
||||
related_id=file.get('tool_file_id'),
|
||||
extra_config=file_extra_config
|
||||
)
|
||||
else:
|
||||
|
||||
@ -77,4 +77,4 @@ class UploadFileParser:
|
||||
return False
|
||||
|
||||
current_time = int(time.time())
|
||||
return current_time - int(timestamp) <= 300 # expired after 5 minutes
|
||||
return current_time - int(timestamp) <= current_app.config.get('FILES_ACCESS_TIMEOUT')
|
||||
|
||||
@ -1,13 +1,21 @@
|
||||
import logging
|
||||
import time
|
||||
from enum import Enum
|
||||
from threading import Lock
|
||||
from typing import Literal, Optional
|
||||
|
||||
from httpx import post
|
||||
from httpx import get, post
|
||||
from pydantic import BaseModel
|
||||
from yarl import URL
|
||||
|
||||
from config import get_env
|
||||
from core.helper.code_executor.javascript_transformer import NodeJsTemplateTransformer
|
||||
from core.helper.code_executor.jinja2_transformer import Jinja2TemplateTransformer
|
||||
from core.helper.code_executor.python_transformer import PythonTemplateTransformer
|
||||
from core.helper.code_executor.entities import CodeDependency
|
||||
from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer
|
||||
from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer
|
||||
from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
|
||||
from core.helper.code_executor.template_transformer import TemplateTransformer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Code Executor
|
||||
CODE_EXECUTION_ENDPOINT = get_env('CODE_EXECUTION_ENDPOINT')
|
||||
@ -28,9 +36,38 @@ class CodeExecutionResponse(BaseModel):
|
||||
data: Data
|
||||
|
||||
|
||||
class CodeLanguage(str, Enum):
|
||||
PYTHON3 = 'python3'
|
||||
JINJA2 = 'jinja2'
|
||||
JAVASCRIPT = 'javascript'
|
||||
|
||||
|
||||
class CodeExecutor:
|
||||
dependencies_cache = {}
|
||||
dependencies_cache_lock = Lock()
|
||||
|
||||
code_template_transformers: dict[CodeLanguage, type[TemplateTransformer]] = {
|
||||
CodeLanguage.PYTHON3: Python3TemplateTransformer,
|
||||
CodeLanguage.JINJA2: Jinja2TemplateTransformer,
|
||||
CodeLanguage.JAVASCRIPT: NodeJsTemplateTransformer,
|
||||
}
|
||||
|
||||
code_language_to_running_language = {
|
||||
CodeLanguage.JAVASCRIPT: 'nodejs',
|
||||
CodeLanguage.JINJA2: CodeLanguage.PYTHON3,
|
||||
CodeLanguage.PYTHON3: CodeLanguage.PYTHON3,
|
||||
}
|
||||
|
||||
supported_dependencies_languages: set[CodeLanguage] = {
|
||||
CodeLanguage.PYTHON3
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def execute_code(cls, language: Literal['python3', 'javascript', 'jinja2'], preload: str, code: str) -> str:
|
||||
def execute_code(cls,
|
||||
language: Literal['python3', 'javascript', 'jinja2'],
|
||||
preload: str,
|
||||
code: str,
|
||||
dependencies: Optional[list[CodeDependency]] = None) -> str:
|
||||
"""
|
||||
Execute code
|
||||
:param language: code language
|
||||
@ -44,13 +81,15 @@ class CodeExecutor:
|
||||
}
|
||||
|
||||
data = {
|
||||
'language': 'python3' if language == 'jinja2' else
|
||||
'nodejs' if language == 'javascript' else
|
||||
'python3' if language == 'python3' else None,
|
||||
'language': cls.code_language_to_running_language.get(language),
|
||||
'code': code,
|
||||
'preload': preload
|
||||
'preload': preload,
|
||||
'enable_network': True
|
||||
}
|
||||
|
||||
if dependencies:
|
||||
data['dependencies'] = [dependency.dict() for dependency in dependencies]
|
||||
|
||||
try:
|
||||
response = post(str(url), json=data, headers=headers, timeout=CODE_EXECUTION_TIMEOUT)
|
||||
if response.status_code == 503:
|
||||
@ -60,7 +99,9 @@ class CodeExecutor:
|
||||
except CodeExecutionException as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
raise CodeExecutionException('Failed to execute code, this is likely a network issue, please check if the sandbox service is running')
|
||||
raise CodeExecutionException('Failed to execute code, which is likely a network issue,'
|
||||
' please check if the sandbox service is running.'
|
||||
f' ( Error: {str(e)} )')
|
||||
|
||||
try:
|
||||
response = response.json()
|
||||
@ -78,7 +119,7 @@ class CodeExecutor:
|
||||
return response.data.stdout
|
||||
|
||||
@classmethod
|
||||
def execute_workflow_code_template(cls, language: Literal['python3', 'javascript', 'jinja2'], code: str, inputs: dict) -> dict:
|
||||
def execute_workflow_code_template(cls, language: Literal['python3', 'javascript', 'jinja2'], code: str, inputs: dict, dependencies: Optional[list[CodeDependency]] = None) -> dict:
|
||||
"""
|
||||
Execute code
|
||||
:param language: code language
|
||||
@ -86,21 +127,71 @@ class CodeExecutor:
|
||||
:param inputs: inputs
|
||||
:return:
|
||||
"""
|
||||
template_transformer = None
|
||||
if language == 'python3':
|
||||
template_transformer = PythonTemplateTransformer
|
||||
elif language == 'jinja2':
|
||||
template_transformer = Jinja2TemplateTransformer
|
||||
elif language == 'javascript':
|
||||
template_transformer = NodeJsTemplateTransformer
|
||||
else:
|
||||
raise CodeExecutionException('Unsupported language')
|
||||
template_transformer = cls.code_template_transformers.get(language)
|
||||
if not template_transformer:
|
||||
raise CodeExecutionException(f'Unsupported language {language}')
|
||||
|
||||
runner, preload = template_transformer.transform_caller(code, inputs)
|
||||
runner, preload, dependencies = template_transformer.transform_caller(code, inputs, dependencies)
|
||||
|
||||
try:
|
||||
response = cls.execute_code(language, preload, runner)
|
||||
response = cls.execute_code(language, preload, runner, dependencies)
|
||||
except CodeExecutionException as e:
|
||||
raise e
|
||||
|
||||
return template_transformer.transform_response(response)
|
||||
return template_transformer.transform_response(response)
|
||||
|
||||
@classmethod
|
||||
def list_dependencies(cls, language: str) -> list[CodeDependency]:
|
||||
if language not in cls.supported_dependencies_languages:
|
||||
return []
|
||||
|
||||
with cls.dependencies_cache_lock:
|
||||
if language in cls.dependencies_cache:
|
||||
# check expiration
|
||||
dependencies = cls.dependencies_cache[language]
|
||||
if dependencies['expiration'] > time.time():
|
||||
return dependencies['data']
|
||||
# remove expired cache
|
||||
del cls.dependencies_cache[language]
|
||||
|
||||
dependencies = cls._get_dependencies(language)
|
||||
with cls.dependencies_cache_lock:
|
||||
cls.dependencies_cache[language] = {
|
||||
'data': dependencies,
|
||||
'expiration': time.time() + 60
|
||||
}
|
||||
|
||||
return dependencies
|
||||
|
||||
@classmethod
|
||||
def _get_dependencies(cls, language: Literal['python3']) -> list[CodeDependency]:
|
||||
"""
|
||||
List dependencies
|
||||
"""
|
||||
url = URL(CODE_EXECUTION_ENDPOINT) / 'v1' / 'sandbox' / 'dependencies'
|
||||
|
||||
headers = {
|
||||
'X-Api-Key': CODE_EXECUTION_API_KEY
|
||||
}
|
||||
|
||||
running_language = cls.code_language_to_running_language.get(language)
|
||||
if isinstance(running_language, Enum):
|
||||
running_language = running_language.value
|
||||
|
||||
data = {
|
||||
'language': running_language,
|
||||
}
|
||||
|
||||
try:
|
||||
response = get(str(url), params=data, headers=headers, timeout=CODE_EXECUTION_TIMEOUT)
|
||||
if response.status_code != 200:
|
||||
raise Exception(f'Failed to list dependencies, got status code {response.status_code}, please check if the sandbox service is running')
|
||||
response = response.json()
|
||||
dependencies = response.get('data', {}).get('dependencies', [])
|
||||
return [
|
||||
CodeDependency(**dependency) for dependency in dependencies
|
||||
if dependency.get('name') not in Python3TemplateTransformer.get_standard_packages()
|
||||
]
|
||||
except Exception as e:
|
||||
logger.exception(f'Failed to list dependencies: {e}')
|
||||
return []
|
||||
55
api/core/helper/code_executor/code_node_provider.py
Normal file
55
api/core/helper/code_executor/code_node_provider.py
Normal file
@ -0,0 +1,55 @@
|
||||
from abc import abstractmethod
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.helper.code_executor.code_executor import CodeExecutor
|
||||
|
||||
|
||||
class CodeNodeProvider(BaseModel):
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def get_language() -> str:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def is_accept_language(cls, language: str) -> bool:
|
||||
return language == cls.get_language()
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def get_default_code(cls) -> str:
|
||||
"""
|
||||
get default code in specific programming language for the code node
|
||||
"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def get_default_available_packages(cls) -> list[dict]:
|
||||
return [p.dict() for p in CodeExecutor.list_dependencies(cls.get_language())]
|
||||
|
||||
@classmethod
|
||||
def get_default_config(cls) -> dict:
|
||||
return {
|
||||
"type": "code",
|
||||
"config": {
|
||||
"variables": [
|
||||
{
|
||||
"variable": "arg1",
|
||||
"value_selector": []
|
||||
},
|
||||
{
|
||||
"variable": "arg2",
|
||||
"value_selector": []
|
||||
}
|
||||
],
|
||||
"code_language": cls.get_language(),
|
||||
"code": cls.get_default_code(),
|
||||
"outputs": {
|
||||
"result": {
|
||||
"type": "string",
|
||||
"children": None
|
||||
}
|
||||
}
|
||||
},
|
||||
"available_dependencies": cls.get_default_available_packages(),
|
||||
}
|
||||
6
api/core/helper/code_executor/entities.py
Normal file
6
api/core/helper/code_executor/entities.py
Normal file
@ -0,0 +1,6 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class CodeDependency(BaseModel):
|
||||
name: str
|
||||
version: str
|
||||
@ -0,0 +1,21 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from core.helper.code_executor.code_executor import CodeLanguage
|
||||
from core.helper.code_executor.code_node_provider import CodeNodeProvider
|
||||
|
||||
|
||||
class JavascriptCodeProvider(CodeNodeProvider):
|
||||
@staticmethod
|
||||
def get_language() -> str:
|
||||
return CodeLanguage.JAVASCRIPT
|
||||
|
||||
@classmethod
|
||||
def get_default_code(cls) -> str:
|
||||
return dedent(
|
||||
"""
|
||||
function main({arg1, arg2}) {
|
||||
return {
|
||||
result: arg1 + arg2
|
||||
}
|
||||
}
|
||||
""")
|
||||
@ -0,0 +1,25 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from core.helper.code_executor.template_transformer import TemplateTransformer
|
||||
|
||||
|
||||
class NodeJsTemplateTransformer(TemplateTransformer):
|
||||
@classmethod
|
||||
def get_runner_script(cls) -> str:
|
||||
runner_script = dedent(
|
||||
f"""
|
||||
// declare main function
|
||||
{cls._code_placeholder}
|
||||
|
||||
// decode and prepare input object
|
||||
var inputs_obj = JSON.parse(Buffer.from('{cls._inputs_placeholder}', 'base64').toString('utf-8'))
|
||||
|
||||
// execute main function
|
||||
var output_obj = main(inputs_obj)
|
||||
|
||||
// convert output to json and print
|
||||
var output_json = JSON.stringify(output_obj)
|
||||
var result = `<<RESULT>>${{output_json}}<<RESULT>>`
|
||||
console.log(result)
|
||||
""")
|
||||
return runner_script
|
||||
@ -1,54 +0,0 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from core.helper.code_executor.template_transformer import TemplateTransformer
|
||||
|
||||
NODEJS_RUNNER = """// declare main function here
|
||||
{{code}}
|
||||
|
||||
// execute main function, and return the result
|
||||
// inputs is a dict, unstructured inputs
|
||||
output = main({{inputs}})
|
||||
|
||||
// convert output to json and print
|
||||
output = JSON.stringify(output)
|
||||
|
||||
result = `<<RESULT>>${output}<<RESULT>>`
|
||||
|
||||
console.log(result)
|
||||
"""
|
||||
|
||||
NODEJS_PRELOAD = """"""
|
||||
|
||||
class NodeJsTemplateTransformer(TemplateTransformer):
|
||||
@classmethod
|
||||
def transform_caller(cls, code: str, inputs: dict) -> tuple[str, str]:
|
||||
"""
|
||||
Transform code to python runner
|
||||
:param code: code
|
||||
:param inputs: inputs
|
||||
:return:
|
||||
"""
|
||||
|
||||
# transform inputs to json string
|
||||
inputs_str = json.dumps(inputs, indent=4, ensure_ascii=False)
|
||||
|
||||
# replace code and inputs
|
||||
runner = NODEJS_RUNNER.replace('{{code}}', code)
|
||||
runner = runner.replace('{{inputs}}', inputs_str)
|
||||
|
||||
return runner, NODEJS_PRELOAD
|
||||
|
||||
@classmethod
|
||||
def transform_response(cls, response: str) -> dict:
|
||||
"""
|
||||
Transform response to dict
|
||||
:param response: response
|
||||
:return:
|
||||
"""
|
||||
# extract result
|
||||
result = re.search(r'<<RESULT>>(.*)<<RESULT>>', response, re.DOTALL)
|
||||
if not result:
|
||||
raise ValueError('Failed to parse result')
|
||||
result = result.group(1)
|
||||
return json.loads(result)
|
||||
17
api/core/helper/code_executor/jinja2/jinja2_formatter.py
Normal file
17
api/core/helper/code_executor/jinja2/jinja2_formatter.py
Normal file
@ -0,0 +1,17 @@
|
||||
from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
|
||||
|
||||
|
||||
class Jinja2Formatter:
|
||||
@classmethod
|
||||
def format(cls, template: str, inputs: str) -> str:
|
||||
"""
|
||||
Format template
|
||||
:param template: template
|
||||
:param inputs: inputs
|
||||
:return:
|
||||
"""
|
||||
result = CodeExecutor.execute_workflow_code_template(
|
||||
language=CodeLanguage.JINJA2, code=template, inputs=inputs
|
||||
)
|
||||
|
||||
return result['result']
|
||||
64
api/core/helper/code_executor/jinja2/jinja2_transformer.py
Normal file
64
api/core/helper/code_executor/jinja2/jinja2_transformer.py
Normal file
@ -0,0 +1,64 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
|
||||
from core.helper.code_executor.template_transformer import TemplateTransformer
|
||||
|
||||
|
||||
class Jinja2TemplateTransformer(TemplateTransformer):
|
||||
@classmethod
|
||||
def get_standard_packages(cls) -> set[str]:
|
||||
return {'jinja2'} | Python3TemplateTransformer.get_standard_packages()
|
||||
|
||||
@classmethod
|
||||
def transform_response(cls, response: str) -> dict:
|
||||
"""
|
||||
Transform response to dict
|
||||
:param response: response
|
||||
:return:
|
||||
"""
|
||||
return {
|
||||
'result': cls.extract_result_str_from_response(response)
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_runner_script(cls) -> str:
|
||||
runner_script = dedent(f"""
|
||||
# declare main function
|
||||
def main(**inputs):
|
||||
import jinja2
|
||||
template = jinja2.Template('''{cls._code_placeholder}''')
|
||||
return template.render(**inputs)
|
||||
|
||||
import json
|
||||
from base64 import b64decode
|
||||
|
||||
# decode and prepare input dict
|
||||
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
|
||||
|
||||
# execute main function
|
||||
output = main(**inputs_obj)
|
||||
|
||||
# convert output and print
|
||||
result = f'''<<RESULT>>{{output}}<<RESULT>>'''
|
||||
print(result)
|
||||
|
||||
""")
|
||||
return runner_script
|
||||
|
||||
@classmethod
|
||||
def get_preload_script(cls) -> str:
|
||||
preload_script = dedent("""
|
||||
import jinja2
|
||||
from base64 import b64decode
|
||||
|
||||
def _jinja2_preload_():
|
||||
# prepare jinja2 environment, load template and render before to avoid sandbox issue
|
||||
template = jinja2.Template('{{s}}')
|
||||
template.render(s='a')
|
||||
|
||||
if __name__ == '__main__':
|
||||
_jinja2_preload_()
|
||||
|
||||
""")
|
||||
|
||||
return preload_script
|
||||
@ -1,92 +0,0 @@
|
||||
import json
|
||||
import re
|
||||
from base64 import b64encode
|
||||
|
||||
from core.helper.code_executor.template_transformer import TemplateTransformer
|
||||
|
||||
PYTHON_RUNNER = """
|
||||
import jinja2
|
||||
from json import loads
|
||||
from base64 import b64decode
|
||||
|
||||
template = jinja2.Template('''{{code}}''')
|
||||
|
||||
def main(**inputs):
|
||||
return template.render(**inputs)
|
||||
|
||||
# execute main function, and return the result
|
||||
inputs = b64decode('{{inputs}}').decode('utf-8')
|
||||
output = main(**loads(inputs))
|
||||
|
||||
result = f'''<<RESULT>>{output}<<RESULT>>'''
|
||||
|
||||
print(result)
|
||||
|
||||
"""
|
||||
|
||||
JINJA2_PRELOAD_TEMPLATE = """{% set fruits = ['Apple'] %}
|
||||
{{ 'a' }}
|
||||
{% for fruit in fruits %}
|
||||
<li>{{ fruit }}</li>
|
||||
{% endfor %}
|
||||
{% if fruits|length > 1 %}
|
||||
1
|
||||
{% endif %}
|
||||
{% for i in range(5) %}
|
||||
{% if i == 3 %}{{ i }}{% else %}{% endif %}
|
||||
{% endfor %}
|
||||
{% for i in range(3) %}
|
||||
{{ i + 1 }}
|
||||
{% endfor %}
|
||||
{% macro say_hello() %}a{{ 'b' }}{% endmacro %}
|
||||
{{ s }}{{ say_hello() }}"""
|
||||
|
||||
JINJA2_PRELOAD = f"""
|
||||
import jinja2
|
||||
from base64 import b64decode
|
||||
|
||||
def _jinja2_preload_():
|
||||
# prepare jinja2 environment, load template and render before to avoid sandbox issue
|
||||
template = jinja2.Template('''{JINJA2_PRELOAD_TEMPLATE}''')
|
||||
template.render(s='a')
|
||||
|
||||
if __name__ == '__main__':
|
||||
_jinja2_preload_()
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class Jinja2TemplateTransformer(TemplateTransformer):
|
||||
@classmethod
|
||||
def transform_caller(cls, code: str, inputs: dict) -> tuple[str, str]:
|
||||
"""
|
||||
Transform code to python runner
|
||||
:param code: code
|
||||
:param inputs: inputs
|
||||
:return:
|
||||
"""
|
||||
|
||||
inputs_str = b64encode(json.dumps(inputs, ensure_ascii=False).encode()).decode('utf-8')
|
||||
|
||||
# transform jinja2 template to python code
|
||||
runner = PYTHON_RUNNER.replace('{{code}}', code)
|
||||
runner = runner.replace('{{inputs}}', inputs_str)
|
||||
|
||||
return runner, JINJA2_PRELOAD
|
||||
|
||||
@classmethod
|
||||
def transform_response(cls, response: str) -> dict:
|
||||
"""
|
||||
Transform response to dict
|
||||
:param response: response
|
||||
:return:
|
||||
"""
|
||||
# extract result
|
||||
result = re.search(r'<<RESULT>>(.*)<<RESULT>>', response, re.DOTALL)
|
||||
if not result:
|
||||
raise ValueError('Failed to parse result')
|
||||
result = result.group(1)
|
||||
|
||||
return {
|
||||
'result': result
|
||||
}
|
||||
0
api/core/helper/code_executor/python3/__init__.py
Normal file
0
api/core/helper/code_executor/python3/__init__.py
Normal file
@ -0,0 +1,20 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from core.helper.code_executor.code_executor import CodeLanguage
|
||||
from core.helper.code_executor.code_node_provider import CodeNodeProvider
|
||||
|
||||
|
||||
class Python3CodeProvider(CodeNodeProvider):
|
||||
@staticmethod
|
||||
def get_language() -> str:
|
||||
return CodeLanguage.PYTHON3
|
||||
|
||||
@classmethod
|
||||
def get_default_code(cls) -> str:
|
||||
return dedent(
|
||||
"""
|
||||
def main(arg1: int, arg2: int) -> dict:
|
||||
return {
|
||||
"result": arg1 + arg2,
|
||||
}
|
||||
""")
|
||||
51
api/core/helper/code_executor/python3/python3_transformer.py
Normal file
51
api/core/helper/code_executor/python3/python3_transformer.py
Normal file
@ -0,0 +1,51 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from core.helper.code_executor.template_transformer import TemplateTransformer
|
||||
|
||||
|
||||
class Python3TemplateTransformer(TemplateTransformer):
|
||||
@classmethod
|
||||
def get_standard_packages(cls) -> set[str]:
|
||||
return {
|
||||
'base64',
|
||||
'binascii',
|
||||
'collections',
|
||||
'datetime',
|
||||
'functools',
|
||||
'hashlib',
|
||||
'hmac',
|
||||
'itertools',
|
||||
'json',
|
||||
'math',
|
||||
'operator',
|
||||
'os',
|
||||
'random',
|
||||
're',
|
||||
'string',
|
||||
'sys',
|
||||
'time',
|
||||
'traceback',
|
||||
'uuid',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_runner_script(cls) -> str:
|
||||
runner_script = dedent(f"""
|
||||
# declare main function
|
||||
{cls._code_placeholder}
|
||||
|
||||
import json
|
||||
from base64 import b64decode
|
||||
|
||||
# decode and prepare input dict
|
||||
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
|
||||
|
||||
# execute main function
|
||||
output_obj = main(**inputs_obj)
|
||||
|
||||
# convert output to json and print
|
||||
output_json = json.dumps(output_obj, indent=4)
|
||||
result = f'''<<RESULT>>{{output_json}}<<RESULT>>'''
|
||||
print(result)
|
||||
""")
|
||||
return runner_script
|
||||
@ -1,82 +0,0 @@
|
||||
import json
|
||||
import re
|
||||
from base64 import b64encode
|
||||
|
||||
from core.helper.code_executor.template_transformer import TemplateTransformer
|
||||
|
||||
PYTHON_RUNNER = """# declare main function here
|
||||
{{code}}
|
||||
|
||||
from json import loads, dumps
|
||||
from base64 import b64decode
|
||||
|
||||
# execute main function, and return the result
|
||||
# inputs is a dict, and it
|
||||
inputs = b64decode('{{inputs}}').decode('utf-8')
|
||||
output = main(**json.loads(inputs))
|
||||
|
||||
# convert output to json and print
|
||||
output = dumps(output, indent=4)
|
||||
|
||||
result = f'''<<RESULT>>
|
||||
{output}
|
||||
<<RESULT>>'''
|
||||
|
||||
print(result)
|
||||
"""
|
||||
|
||||
PYTHON_PRELOAD = """
|
||||
# prepare general imports
|
||||
import json
|
||||
import datetime
|
||||
import math
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import uuid
|
||||
import os
|
||||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import binascii
|
||||
import collections
|
||||
import functools
|
||||
import operator
|
||||
import itertools
|
||||
"""
|
||||
|
||||
class PythonTemplateTransformer(TemplateTransformer):
|
||||
@classmethod
|
||||
def transform_caller(cls, code: str, inputs: dict) -> tuple[str, str]:
|
||||
"""
|
||||
Transform code to python runner
|
||||
:param code: code
|
||||
:param inputs: inputs
|
||||
:return:
|
||||
"""
|
||||
|
||||
# transform inputs to json string
|
||||
inputs_str = b64encode(json.dumps(inputs, ensure_ascii=False).encode()).decode('utf-8')
|
||||
|
||||
# replace code and inputs
|
||||
runner = PYTHON_RUNNER.replace('{{code}}', code)
|
||||
runner = runner.replace('{{inputs}}', inputs_str)
|
||||
|
||||
return runner, PYTHON_PRELOAD
|
||||
|
||||
@classmethod
|
||||
def transform_response(cls, response: str) -> dict:
|
||||
"""
|
||||
Transform response to dict
|
||||
:param response: response
|
||||
:return:
|
||||
"""
|
||||
# extract result
|
||||
result = re.search(r'<<RESULT>>(.*?)<<RESULT>>', response, re.DOTALL)
|
||||
if not result:
|
||||
raise ValueError('Failed to parse result')
|
||||
result = result.group(1)
|
||||
return json.loads(result)
|
||||
@ -1,24 +1,87 @@
|
||||
import json
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from base64 import b64encode
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.helper.code_executor.entities import CodeDependency
|
||||
|
||||
|
||||
class TemplateTransformer(ABC):
|
||||
class TemplateTransformer(ABC, BaseModel):
|
||||
_code_placeholder: str = '{{code}}'
|
||||
_inputs_placeholder: str = '{{inputs}}'
|
||||
_result_tag: str = '<<RESULT>>'
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def transform_caller(cls, code: str, inputs: dict) -> tuple[str, str]:
|
||||
def get_standard_packages(cls) -> set[str]:
|
||||
return set()
|
||||
|
||||
@classmethod
|
||||
def transform_caller(cls, code: str, inputs: dict,
|
||||
dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]:
|
||||
"""
|
||||
Transform code to python runner
|
||||
:param code: code
|
||||
:param inputs: inputs
|
||||
:return: runner, preload
|
||||
"""
|
||||
pass
|
||||
|
||||
runner_script = cls.assemble_runner_script(code, inputs)
|
||||
preload_script = cls.get_preload_script()
|
||||
|
||||
packages = dependencies or []
|
||||
standard_packages = cls.get_standard_packages()
|
||||
for package in standard_packages:
|
||||
if package not in packages:
|
||||
packages.append(CodeDependency(name=package, version=''))
|
||||
packages = list({dep.name: dep for dep in packages if dep.name}.values())
|
||||
|
||||
return runner_script, preload_script, packages
|
||||
|
||||
@classmethod
|
||||
def extract_result_str_from_response(cls, response: str) -> str:
|
||||
result = re.search(rf'{cls._result_tag}(.*){cls._result_tag}', response, re.DOTALL)
|
||||
if not result:
|
||||
raise ValueError('Failed to parse result')
|
||||
result = result.group(1)
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def transform_response(cls, response: str) -> dict:
|
||||
"""
|
||||
Transform response to dict
|
||||
:param response: response
|
||||
:return:
|
||||
"""
|
||||
pass
|
||||
return json.loads(cls.extract_result_str_from_response(response))
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def get_runner_script(cls) -> str:
|
||||
"""
|
||||
Get runner script
|
||||
"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def serialize_inputs(cls, inputs: dict) -> str:
|
||||
inputs_json_str = json.dumps(inputs, ensure_ascii=False).encode()
|
||||
input_base64_encoded = b64encode(inputs_json_str).decode('utf-8')
|
||||
return input_base64_encoded
|
||||
|
||||
@classmethod
|
||||
def assemble_runner_script(cls, code: str, inputs: dict) -> str:
|
||||
# assemble runner script
|
||||
script = cls.get_runner_script()
|
||||
script = script.replace(cls._code_placeholder, code)
|
||||
inputs_str = cls.serialize_inputs(inputs)
|
||||
script = script.replace(cls._inputs_placeholder, inputs_str)
|
||||
return script
|
||||
|
||||
@classmethod
|
||||
def get_preload_script(cls) -> str:
|
||||
"""
|
||||
Get preload script
|
||||
"""
|
||||
return ''
|
||||
|
||||
@ -9,6 +9,7 @@ from extensions.ext_redis import redis_client
|
||||
class ProviderCredentialsCacheType(Enum):
|
||||
PROVIDER = "provider"
|
||||
MODEL = "provider_model"
|
||||
LOAD_BALANCING_MODEL = "load_balancing_provider_model"
|
||||
|
||||
|
||||
class ProviderCredentialsCache:
|
||||
|
||||
@ -1,10 +1,9 @@
|
||||
import logging
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from collections.abc import Callable
|
||||
from typing import Any, AnyStr
|
||||
|
||||
import yaml
|
||||
from core.tools.utils.yaml_utils import load_yaml_file
|
||||
|
||||
|
||||
def get_position_map(
|
||||
@ -17,21 +16,15 @@ def get_position_map(
|
||||
:param file_name: the YAML file name, default to '_position.yaml'
|
||||
:return: a dict with name as key and index as value
|
||||
"""
|
||||
try:
|
||||
position_file_name = os.path.join(folder_path, file_name)
|
||||
if not os.path.exists(position_file_name):
|
||||
return {}
|
||||
|
||||
with open(position_file_name, encoding='utf-8') as f:
|
||||
positions = yaml.safe_load(f)
|
||||
position_map = {}
|
||||
for index, name in enumerate(positions):
|
||||
if name and isinstance(name, str):
|
||||
position_map[name.strip()] = index
|
||||
return position_map
|
||||
except:
|
||||
logging.warning(f'Failed to load the YAML position file {folder_path}/{file_name}.')
|
||||
return {}
|
||||
position_file_name = os.path.join(folder_path, file_name)
|
||||
positions = load_yaml_file(position_file_name, ignore_error=True)
|
||||
position_map = {}
|
||||
index = 0
|
||||
for _, name in enumerate(positions):
|
||||
if name and isinstance(name, str):
|
||||
position_map[name.strip()] = index
|
||||
index += 1
|
||||
return position_map
|
||||
|
||||
|
||||
def sort_by_position_map(
|
||||
@ -42,6 +42,20 @@ def delete(url, *args, **kwargs):
|
||||
if kwargs['follow_redirects']:
|
||||
kwargs['allow_redirects'] = kwargs['follow_redirects']
|
||||
kwargs.pop('follow_redirects')
|
||||
if 'timeout' in kwargs:
|
||||
timeout = kwargs['timeout']
|
||||
if timeout is None:
|
||||
kwargs.pop('timeout')
|
||||
elif isinstance(timeout, tuple):
|
||||
# check length of tuple
|
||||
if len(timeout) == 2:
|
||||
kwargs['timeout'] = timeout
|
||||
elif len(timeout) == 1:
|
||||
kwargs['timeout'] = timeout[0]
|
||||
elif len(timeout) > 2:
|
||||
kwargs['timeout'] = (timeout[0], timeout[1])
|
||||
else:
|
||||
kwargs['timeout'] = (timeout, timeout)
|
||||
return _delete(url=url, *args, proxies=requests_proxies, **kwargs)
|
||||
|
||||
def head(url, *args, **kwargs):
|
||||
|
||||
@ -12,7 +12,6 @@ from flask import Flask, current_app
|
||||
from flask_login import current_user
|
||||
from sqlalchemy.orm.exc import ObjectDeletedError
|
||||
|
||||
from core.docstore.dataset_docstore import DatasetDocumentStore
|
||||
from core.errors.error import ProviderTokenNotInitError
|
||||
from core.llm_generator.llm_generator import LLMGenerator
|
||||
from core.model_manager import ModelInstance, ModelManager
|
||||
@ -20,12 +19,16 @@ from core.model_runtime.entities.model_entities import ModelType, PriceType
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.rag.datasource.keyword.keyword_factory import Keyword
|
||||
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
|
||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
|
||||
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
|
||||
from core.rag.models.document import Document
|
||||
from core.splitter.fixed_text_splitter import EnhanceRecursiveCharacterTextSplitter, FixedRecursiveCharacterTextSplitter
|
||||
from core.splitter.text_splitter import TextSplitter
|
||||
from core.rag.splitter.fixed_text_splitter import (
|
||||
EnhanceRecursiveCharacterTextSplitter,
|
||||
FixedRecursiveCharacterTextSplitter,
|
||||
)
|
||||
from core.rag.splitter.text_splitter import TextSplitter
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from extensions.ext_storage import storage
|
||||
@ -283,11 +286,7 @@ class IndexingRunner:
|
||||
if len(preview_texts) < 5:
|
||||
preview_texts.append(document.page_content)
|
||||
if indexing_technique == 'high_quality' or embedding_model_instance:
|
||||
embedding_model_type_instance = embedding_model_instance.model_type_instance
|
||||
embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_type_instance)
|
||||
tokens += embedding_model_type_instance.get_num_tokens(
|
||||
model=embedding_model_instance.model,
|
||||
credentials=embedding_model_instance.credentials,
|
||||
tokens += embedding_model_instance.get_text_embedding_num_tokens(
|
||||
texts=[self.filter_string(document.page_content)]
|
||||
)
|
||||
|
||||
@ -411,14 +410,15 @@ class IndexingRunner:
|
||||
# The user-defined segmentation rule
|
||||
rules = json.loads(processing_rule.rules)
|
||||
segmentation = rules["segmentation"]
|
||||
if segmentation["max_tokens"] < 50 or segmentation["max_tokens"] > 1000:
|
||||
raise ValueError("Custom segment length should be between 50 and 1000.")
|
||||
max_segmentation_tokens_length = int(current_app.config['INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH'])
|
||||
if segmentation["max_tokens"] < 50 or segmentation["max_tokens"] > max_segmentation_tokens_length:
|
||||
raise ValueError(f"Custom segment length should be between 50 and {max_segmentation_tokens_length}.")
|
||||
|
||||
separator = segmentation["separator"]
|
||||
if separator:
|
||||
separator = separator.replace('\\n', '\n')
|
||||
|
||||
if 'chunk_overlap' in segmentation and segmentation['chunk_overlap']:
|
||||
if segmentation.get('chunk_overlap'):
|
||||
chunk_overlap = segmentation['chunk_overlap']
|
||||
else:
|
||||
chunk_overlap = 0
|
||||
@ -427,7 +427,7 @@ class IndexingRunner:
|
||||
chunk_size=segmentation["max_tokens"],
|
||||
chunk_overlap=chunk_overlap,
|
||||
fixed_separator=separator,
|
||||
separators=["\n\n", "。", ".", " ", ""],
|
||||
separators=["\n\n", "。", ". ", " ", ""],
|
||||
embedding_model_instance=embedding_model_instance
|
||||
)
|
||||
else:
|
||||
@ -435,7 +435,7 @@ class IndexingRunner:
|
||||
character_splitter = EnhanceRecursiveCharacterTextSplitter.from_encoder(
|
||||
chunk_size=DatasetProcessRule.AUTOMATIC_RULES['segmentation']['max_tokens'],
|
||||
chunk_overlap=DatasetProcessRule.AUTOMATIC_RULES['segmentation']['chunk_overlap'],
|
||||
separators=["\n\n", "。", ".", " ", ""],
|
||||
separators=["\n\n", "。", ". ", " ", ""],
|
||||
embedding_model_instance=embedding_model_instance
|
||||
)
|
||||
|
||||
@ -654,10 +654,6 @@ class IndexingRunner:
|
||||
tokens = 0
|
||||
chunk_size = 10
|
||||
|
||||
embedding_model_type_instance = None
|
||||
if embedding_model_instance:
|
||||
embedding_model_type_instance = embedding_model_instance.model_type_instance
|
||||
embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_type_instance)
|
||||
# create keyword index
|
||||
create_keyword_thread = threading.Thread(target=self._process_keyword_index,
|
||||
args=(current_app._get_current_object(),
|
||||
@ -670,8 +666,7 @@ class IndexingRunner:
|
||||
chunk_documents = documents[i:i + chunk_size]
|
||||
futures.append(executor.submit(self._process_chunk, current_app._get_current_object(), index_processor,
|
||||
chunk_documents, dataset,
|
||||
dataset_document, embedding_model_instance,
|
||||
embedding_model_type_instance))
|
||||
dataset_document, embedding_model_instance))
|
||||
|
||||
for future in futures:
|
||||
tokens += future.result()
|
||||
@ -712,7 +707,7 @@ class IndexingRunner:
|
||||
db.session.commit()
|
||||
|
||||
def _process_chunk(self, flask_app, index_processor, chunk_documents, dataset, dataset_document,
|
||||
embedding_model_instance, embedding_model_type_instance):
|
||||
embedding_model_instance):
|
||||
with flask_app.app_context():
|
||||
# check document is paused
|
||||
self._check_document_paused_status(dataset_document.id)
|
||||
@ -720,9 +715,7 @@ class IndexingRunner:
|
||||
tokens = 0
|
||||
if dataset.indexing_technique == 'high_quality' or embedding_model_type_instance:
|
||||
tokens += sum(
|
||||
embedding_model_type_instance.get_num_tokens(
|
||||
embedding_model_instance.model,
|
||||
embedding_model_instance.credentials,
|
||||
embedding_model_instance.get_text_embedding_num_tokens(
|
||||
[document.page_content]
|
||||
)
|
||||
for document in chunk_documents
|
||||
|
||||
@ -9,8 +9,6 @@ from core.model_runtime.entities.message_entities import (
|
||||
TextPromptMessageContent,
|
||||
UserPromptMessage,
|
||||
)
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.model_runtime.model_providers import model_provider_factory
|
||||
from extensions.ext_database import db
|
||||
from models.model import AppMode, Conversation, Message
|
||||
|
||||
@ -78,12 +76,7 @@ class TokenBufferMemory:
|
||||
return []
|
||||
|
||||
# prune the chat message if it exceeds the max token limit
|
||||
provider_instance = model_provider_factory.get_provider_instance(self.model_instance.provider)
|
||||
model_type_instance = provider_instance.get_model_instance(ModelType.LLM)
|
||||
|
||||
curr_message_tokens = model_type_instance.get_num_tokens(
|
||||
self.model_instance.model,
|
||||
self.model_instance.credentials,
|
||||
curr_message_tokens = self.model_instance.get_llm_num_tokens(
|
||||
prompt_messages
|
||||
)
|
||||
|
||||
@ -91,9 +84,7 @@ class TokenBufferMemory:
|
||||
pruned_memory = []
|
||||
while curr_message_tokens > max_token_limit and prompt_messages:
|
||||
pruned_memory.append(prompt_messages.pop(0))
|
||||
curr_message_tokens = model_type_instance.get_num_tokens(
|
||||
self.model_instance.model,
|
||||
self.model_instance.credentials,
|
||||
curr_message_tokens = self.model_instance.get_llm_num_tokens(
|
||||
prompt_messages
|
||||
)
|
||||
|
||||
|
||||
@ -1,7 +1,10 @@
|
||||
import logging
|
||||
import os
|
||||
from collections.abc import Generator
|
||||
from typing import IO, Optional, Union, cast
|
||||
|
||||
from core.entities.provider_configuration import ProviderModelBundle
|
||||
from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
|
||||
from core.entities.provider_entities import ModelLoadBalancingConfiguration
|
||||
from core.errors.error import ProviderTokenNotInitError
|
||||
from core.model_runtime.callbacks.base_callback import Callback
|
||||
from core.model_runtime.entities.llm_entities import LLMResult
|
||||
@ -9,6 +12,7 @@ from core.model_runtime.entities.message_entities import PromptMessage, PromptMe
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.model_runtime.entities.rerank_entities import RerankResult
|
||||
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeConnectionError, InvokeRateLimitError
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
from core.model_runtime.model_providers.__base.moderation_model import ModerationModel
|
||||
from core.model_runtime.model_providers.__base.rerank_model import RerankModel
|
||||
@ -16,6 +20,10 @@ from core.model_runtime.model_providers.__base.speech2text_model import Speech2T
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.__base.tts_model import TTSModel
|
||||
from core.provider_manager import ProviderManager
|
||||
from extensions.ext_redis import redis_client
|
||||
from models.provider import ProviderType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ModelInstance:
|
||||
@ -29,6 +37,12 @@ class ModelInstance:
|
||||
self.provider = provider_model_bundle.configuration.provider.provider
|
||||
self.credentials = self._fetch_credentials_from_bundle(provider_model_bundle, model)
|
||||
self.model_type_instance = self.provider_model_bundle.model_type_instance
|
||||
self.load_balancing_manager = self._get_load_balancing_manager(
|
||||
configuration=provider_model_bundle.configuration,
|
||||
model_type=provider_model_bundle.model_type_instance.model_type,
|
||||
model=model,
|
||||
credentials=self.credentials
|
||||
)
|
||||
|
||||
def _fetch_credentials_from_bundle(self, provider_model_bundle: ProviderModelBundle, model: str) -> dict:
|
||||
"""
|
||||
@ -37,8 +51,10 @@ class ModelInstance:
|
||||
:param model: model name
|
||||
:return:
|
||||
"""
|
||||
credentials = provider_model_bundle.configuration.get_current_credentials(
|
||||
model_type=provider_model_bundle.model_type_instance.model_type,
|
||||
configuration = provider_model_bundle.configuration
|
||||
model_type = provider_model_bundle.model_type_instance.model_type
|
||||
credentials = configuration.get_current_credentials(
|
||||
model_type=model_type,
|
||||
model=model
|
||||
)
|
||||
|
||||
@ -47,6 +63,43 @@ class ModelInstance:
|
||||
|
||||
return credentials
|
||||
|
||||
def _get_load_balancing_manager(self, configuration: ProviderConfiguration,
|
||||
model_type: ModelType,
|
||||
model: str,
|
||||
credentials: dict) -> Optional["LBModelManager"]:
|
||||
"""
|
||||
Get load balancing model credentials
|
||||
:param configuration: provider configuration
|
||||
:param model_type: model type
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
if configuration.model_settings and configuration.using_provider_type == ProviderType.CUSTOM:
|
||||
current_model_setting = None
|
||||
# check if model is disabled by admin
|
||||
for model_setting in configuration.model_settings:
|
||||
if (model_setting.model_type == model_type
|
||||
and model_setting.model == model):
|
||||
current_model_setting = model_setting
|
||||
break
|
||||
|
||||
# check if load balancing is enabled
|
||||
if current_model_setting and current_model_setting.load_balancing_configs:
|
||||
# use load balancing proxy to choose credentials
|
||||
lb_model_manager = LBModelManager(
|
||||
tenant_id=configuration.tenant_id,
|
||||
provider=configuration.provider.provider,
|
||||
model_type=model_type,
|
||||
model=model,
|
||||
load_balancing_configs=current_model_setting.load_balancing_configs,
|
||||
managed_credentials=credentials if configuration.custom_configuration.provider else None
|
||||
)
|
||||
|
||||
return lb_model_manager
|
||||
|
||||
return None
|
||||
|
||||
def invoke_llm(self, prompt_messages: list[PromptMessage], model_parameters: Optional[dict] = None,
|
||||
tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
|
||||
stream: bool = True, user: Optional[str] = None, callbacks: list[Callback] = None) \
|
||||
@ -67,7 +120,8 @@ class ModelInstance:
|
||||
raise Exception("Model type instance is not LargeLanguageModel")
|
||||
|
||||
self.model_type_instance = cast(LargeLanguageModel, self.model_type_instance)
|
||||
return self.model_type_instance.invoke(
|
||||
return self._round_robin_invoke(
|
||||
function=self.model_type_instance.invoke,
|
||||
model=self.model,
|
||||
credentials=self.credentials,
|
||||
prompt_messages=prompt_messages,
|
||||
@ -79,6 +133,27 @@ class ModelInstance:
|
||||
callbacks=callbacks
|
||||
)
|
||||
|
||||
def get_llm_num_tokens(self, prompt_messages: list[PromptMessage],
|
||||
tools: Optional[list[PromptMessageTool]] = None) -> int:
|
||||
"""
|
||||
Get number of tokens for llm
|
||||
|
||||
:param prompt_messages: prompt messages
|
||||
:param tools: tools for tool calling
|
||||
:return:
|
||||
"""
|
||||
if not isinstance(self.model_type_instance, LargeLanguageModel):
|
||||
raise Exception("Model type instance is not LargeLanguageModel")
|
||||
|
||||
self.model_type_instance = cast(LargeLanguageModel, self.model_type_instance)
|
||||
return self._round_robin_invoke(
|
||||
function=self.model_type_instance.get_num_tokens,
|
||||
model=self.model,
|
||||
credentials=self.credentials,
|
||||
prompt_messages=prompt_messages,
|
||||
tools=tools
|
||||
)
|
||||
|
||||
def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) \
|
||||
-> TextEmbeddingResult:
|
||||
"""
|
||||
@ -92,13 +167,32 @@ class ModelInstance:
|
||||
raise Exception("Model type instance is not TextEmbeddingModel")
|
||||
|
||||
self.model_type_instance = cast(TextEmbeddingModel, self.model_type_instance)
|
||||
return self.model_type_instance.invoke(
|
||||
return self._round_robin_invoke(
|
||||
function=self.model_type_instance.invoke,
|
||||
model=self.model,
|
||||
credentials=self.credentials,
|
||||
texts=texts,
|
||||
user=user
|
||||
)
|
||||
|
||||
def get_text_embedding_num_tokens(self, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for text embedding
|
||||
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
if not isinstance(self.model_type_instance, TextEmbeddingModel):
|
||||
raise Exception("Model type instance is not TextEmbeddingModel")
|
||||
|
||||
self.model_type_instance = cast(TextEmbeddingModel, self.model_type_instance)
|
||||
return self._round_robin_invoke(
|
||||
function=self.model_type_instance.get_num_tokens,
|
||||
model=self.model,
|
||||
credentials=self.credentials,
|
||||
texts=texts
|
||||
)
|
||||
|
||||
def invoke_rerank(self, query: str, docs: list[str], score_threshold: Optional[float] = None,
|
||||
top_n: Optional[int] = None,
|
||||
user: Optional[str] = None) \
|
||||
@ -117,7 +211,8 @@ class ModelInstance:
|
||||
raise Exception("Model type instance is not RerankModel")
|
||||
|
||||
self.model_type_instance = cast(RerankModel, self.model_type_instance)
|
||||
return self.model_type_instance.invoke(
|
||||
return self._round_robin_invoke(
|
||||
function=self.model_type_instance.invoke,
|
||||
model=self.model,
|
||||
credentials=self.credentials,
|
||||
query=query,
|
||||
@ -140,7 +235,8 @@ class ModelInstance:
|
||||
raise Exception("Model type instance is not ModerationModel")
|
||||
|
||||
self.model_type_instance = cast(ModerationModel, self.model_type_instance)
|
||||
return self.model_type_instance.invoke(
|
||||
return self._round_robin_invoke(
|
||||
function=self.model_type_instance.invoke,
|
||||
model=self.model,
|
||||
credentials=self.credentials,
|
||||
text=text,
|
||||
@ -160,7 +256,8 @@ class ModelInstance:
|
||||
raise Exception("Model type instance is not Speech2TextModel")
|
||||
|
||||
self.model_type_instance = cast(Speech2TextModel, self.model_type_instance)
|
||||
return self.model_type_instance.invoke(
|
||||
return self._round_robin_invoke(
|
||||
function=self.model_type_instance.invoke,
|
||||
model=self.model,
|
||||
credentials=self.credentials,
|
||||
file=file,
|
||||
@ -183,7 +280,8 @@ class ModelInstance:
|
||||
raise Exception("Model type instance is not TTSModel")
|
||||
|
||||
self.model_type_instance = cast(TTSModel, self.model_type_instance)
|
||||
return self.model_type_instance.invoke(
|
||||
return self._round_robin_invoke(
|
||||
function=self.model_type_instance.invoke,
|
||||
model=self.model,
|
||||
credentials=self.credentials,
|
||||
content_text=content_text,
|
||||
@ -193,6 +291,43 @@ class ModelInstance:
|
||||
streaming=streaming
|
||||
)
|
||||
|
||||
def _round_robin_invoke(self, function: callable, *args, **kwargs):
|
||||
"""
|
||||
Round-robin invoke
|
||||
:param function: function to invoke
|
||||
:param args: function args
|
||||
:param kwargs: function kwargs
|
||||
:return:
|
||||
"""
|
||||
if not self.load_balancing_manager:
|
||||
return function(*args, **kwargs)
|
||||
|
||||
last_exception = None
|
||||
while True:
|
||||
lb_config = self.load_balancing_manager.fetch_next()
|
||||
if not lb_config:
|
||||
if not last_exception:
|
||||
raise ProviderTokenNotInitError("Model credentials is not initialized.")
|
||||
else:
|
||||
raise last_exception
|
||||
|
||||
try:
|
||||
if 'credentials' in kwargs:
|
||||
del kwargs['credentials']
|
||||
return function(*args, **kwargs, credentials=lb_config.credentials)
|
||||
except InvokeRateLimitError as e:
|
||||
# expire in 60 seconds
|
||||
self.load_balancing_manager.cooldown(lb_config, expire=60)
|
||||
last_exception = e
|
||||
continue
|
||||
except (InvokeAuthorizationError, InvokeConnectionError) as e:
|
||||
# expire in 10 seconds
|
||||
self.load_balancing_manager.cooldown(lb_config, expire=10)
|
||||
last_exception = e
|
||||
continue
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def get_tts_voices(self, language: str) -> list:
|
||||
"""
|
||||
Invoke large language tts model voices
|
||||
@ -226,6 +361,7 @@ class ModelManager:
|
||||
"""
|
||||
if not provider:
|
||||
return self.get_default_model_instance(tenant_id, model_type)
|
||||
|
||||
provider_model_bundle = self._provider_manager.get_provider_model_bundle(
|
||||
tenant_id=tenant_id,
|
||||
provider=provider,
|
||||
@ -255,3 +391,141 @@ class ModelManager:
|
||||
model_type=model_type,
|
||||
model=default_model_entity.model
|
||||
)
|
||||
|
||||
|
||||
class LBModelManager:
|
||||
def __init__(self, tenant_id: str,
|
||||
provider: str,
|
||||
model_type: ModelType,
|
||||
model: str,
|
||||
load_balancing_configs: list[ModelLoadBalancingConfiguration],
|
||||
managed_credentials: Optional[dict] = None) -> None:
|
||||
"""
|
||||
Load balancing model manager
|
||||
:param load_balancing_configs: all load balancing configurations
|
||||
:param managed_credentials: credentials if load balancing configuration name is __inherit__
|
||||
"""
|
||||
self._tenant_id = tenant_id
|
||||
self._provider = provider
|
||||
self._model_type = model_type
|
||||
self._model = model
|
||||
self._load_balancing_configs = load_balancing_configs
|
||||
|
||||
for load_balancing_config in self._load_balancing_configs:
|
||||
if load_balancing_config.name == "__inherit__":
|
||||
if not managed_credentials:
|
||||
# remove __inherit__ if managed credentials is not provided
|
||||
self._load_balancing_configs.remove(load_balancing_config)
|
||||
else:
|
||||
load_balancing_config.credentials = managed_credentials
|
||||
|
||||
def fetch_next(self) -> Optional[ModelLoadBalancingConfiguration]:
|
||||
"""
|
||||
Get next model load balancing config
|
||||
Strategy: Round Robin
|
||||
:return:
|
||||
"""
|
||||
cache_key = "model_lb_index:{}:{}:{}:{}".format(
|
||||
self._tenant_id,
|
||||
self._provider,
|
||||
self._model_type.value,
|
||||
self._model
|
||||
)
|
||||
|
||||
cooldown_load_balancing_configs = []
|
||||
max_index = len(self._load_balancing_configs)
|
||||
|
||||
while True:
|
||||
current_index = redis_client.incr(cache_key)
|
||||
if current_index >= 10000000:
|
||||
current_index = 1
|
||||
redis_client.set(cache_key, current_index)
|
||||
|
||||
redis_client.expire(cache_key, 3600)
|
||||
if current_index > max_index:
|
||||
current_index = current_index % max_index
|
||||
|
||||
real_index = current_index - 1
|
||||
if real_index > max_index:
|
||||
real_index = 0
|
||||
|
||||
config = self._load_balancing_configs[real_index]
|
||||
|
||||
if self.in_cooldown(config):
|
||||
cooldown_load_balancing_configs.append(config)
|
||||
if len(cooldown_load_balancing_configs) >= len(self._load_balancing_configs):
|
||||
# all configs are in cooldown
|
||||
return None
|
||||
|
||||
continue
|
||||
|
||||
if bool(os.environ.get("DEBUG", 'False').lower() == 'true'):
|
||||
logger.info(f"Model LB\nid: {config.id}\nname:{config.name}\n"
|
||||
f"tenant_id: {self._tenant_id}\nprovider: {self._provider}\n"
|
||||
f"model_type: {self._model_type.value}\nmodel: {self._model}")
|
||||
|
||||
return config
|
||||
|
||||
return None
|
||||
|
||||
def cooldown(self, config: ModelLoadBalancingConfiguration, expire: int = 60) -> None:
|
||||
"""
|
||||
Cooldown model load balancing config
|
||||
:param config: model load balancing config
|
||||
:param expire: cooldown time
|
||||
:return:
|
||||
"""
|
||||
cooldown_cache_key = "model_lb_index:cooldown:{}:{}:{}:{}:{}".format(
|
||||
self._tenant_id,
|
||||
self._provider,
|
||||
self._model_type.value,
|
||||
self._model,
|
||||
config.id
|
||||
)
|
||||
|
||||
redis_client.setex(cooldown_cache_key, expire, 'true')
|
||||
|
||||
def in_cooldown(self, config: ModelLoadBalancingConfiguration) -> bool:
|
||||
"""
|
||||
Check if model load balancing config is in cooldown
|
||||
:param config: model load balancing config
|
||||
:return:
|
||||
"""
|
||||
cooldown_cache_key = "model_lb_index:cooldown:{}:{}:{}:{}:{}".format(
|
||||
self._tenant_id,
|
||||
self._provider,
|
||||
self._model_type.value,
|
||||
self._model,
|
||||
config.id
|
||||
)
|
||||
|
||||
return redis_client.exists(cooldown_cache_key)
|
||||
|
||||
@classmethod
|
||||
def get_config_in_cooldown_and_ttl(cls, tenant_id: str,
|
||||
provider: str,
|
||||
model_type: ModelType,
|
||||
model: str,
|
||||
config_id: str) -> tuple[bool, int]:
|
||||
"""
|
||||
Get model load balancing config is in cooldown and ttl
|
||||
:param tenant_id: workspace id
|
||||
:param provider: provider name
|
||||
:param model_type: model type
|
||||
:param model: model name
|
||||
:param config_id: model load balancing config id
|
||||
:return:
|
||||
"""
|
||||
cooldown_cache_key = "model_lb_index:cooldown:{}:{}:{}:{}:{}".format(
|
||||
tenant_id,
|
||||
provider,
|
||||
model_type.value,
|
||||
model,
|
||||
config_id
|
||||
)
|
||||
|
||||
ttl = redis_client.ttl(cooldown_cache_key)
|
||||
if ttl == -2:
|
||||
return False, 0
|
||||
|
||||
return True, ttl
|
||||
|
||||
@ -20,7 +20,7 @@ This module provides the interface for invoking and authenticating various model
|
||||
|
||||

|
||||
|
||||
Displays a list of all supported providers, including provider names, icons, supported model types list, predefined model list, configuration method, and credentials form rules, etc. For detailed rule design, see: [Schema](./schema.md).
|
||||
Displays a list of all supported providers, including provider names, icons, supported model types list, predefined model list, configuration method, and credentials form rules, etc. For detailed rule design, see: [Schema](./docs/en_US/schema.md).
|
||||
|
||||
- Selectable model list display
|
||||
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
from abc import ABC
|
||||
from typing import Optional
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
|
||||
@ -14,7 +13,7 @@ _TEXT_COLOR_MAPPING = {
|
||||
}
|
||||
|
||||
|
||||
class Callback(ABC):
|
||||
class Callback:
|
||||
"""
|
||||
Base class for callbacks.
|
||||
Only for LLM.
|
||||
|
||||
@ -51,7 +51,7 @@
|
||||
- `voices` (list) List of available voice.(available for model type `tts`)
|
||||
- `mode` (string) voice model.(available for model type `tts`)
|
||||
- `name` (string) voice model display name.(available for model type `tts`)
|
||||
- `lanuage` (string) the voice model supports languages.(available for model type `tts`)
|
||||
- `language` (string) the voice model supports languages.(available for model type `tts`)
|
||||
- `word_limit` (int) Single conversion word limit, paragraphwise by default(available for model type `tts`)
|
||||
- `audio_type` (string) Support audio file extension format, e.g.:mp3,wav(available for model type `tts`)
|
||||
- `max_workers` (int) Number of concurrent workers supporting text and audio conversion(available for model type`tts`)
|
||||
|
||||
@ -52,7 +52,7 @@
|
||||
- `voices` (list) 可选音色列表。
|
||||
- `mode` (string) 音色模型。(模型类型 `tts` 可用)
|
||||
- `name` (string) 音色模型显示名称。(模型类型 `tts` 可用)
|
||||
- `lanuage` (string) 音色模型支持语言。(模型类型 `tts` 可用)
|
||||
- `language` (string) 音色模型支持语言。(模型类型 `tts` 可用)
|
||||
- `word_limit` (int) 单次转换字数限制,默认按段落分段(模型类型 `tts` 可用)
|
||||
- `audio_type` (string) 支持音频文件扩展格式,如:mp3,wav(模型类型 `tts` 可用)
|
||||
- `max_workers` (int) 支持文字音频转换并发任务数(模型类型 `tts` 可用)
|
||||
|
||||
@ -3,8 +3,7 @@ import os
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
import yaml
|
||||
|
||||
from core.helper.position_helper import get_position_map, sort_by_position_map
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.defaults import PARAMETER_RULE_TEMPLATE
|
||||
from core.model_runtime.entities.model_entities import (
|
||||
@ -18,7 +17,7 @@ from core.model_runtime.entities.model_entities import (
|
||||
)
|
||||
from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
|
||||
from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
|
||||
from core.utils.position_helper import get_position_map, sort_by_position_map
|
||||
from core.tools.utils.yaml_utils import load_yaml_file
|
||||
|
||||
|
||||
class AIModel(ABC):
|
||||
@ -154,8 +153,7 @@ class AIModel(ABC):
|
||||
# traverse all model_schema_yaml_paths
|
||||
for model_schema_yaml_path in model_schema_yaml_paths:
|
||||
# read yaml data from yaml file
|
||||
with open(model_schema_yaml_path, encoding='utf-8') as f:
|
||||
yaml_data = yaml.safe_load(f)
|
||||
yaml_data = load_yaml_file(model_schema_yaml_path, ignore_error=True)
|
||||
|
||||
new_parameter_rules = []
|
||||
for parameter_rule in yaml_data.get('parameter_rules', []):
|
||||
|
||||
@ -1,12 +1,11 @@
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import yaml
|
||||
|
||||
from core.helper.module_import_helper import get_subclasses_from_module, import_module_from_source
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, ModelType
|
||||
from core.model_runtime.entities.provider_entities import ProviderEntity
|
||||
from core.model_runtime.model_providers.__base.ai_model import AIModel
|
||||
from core.utils.module_import_helper import get_subclasses_from_module, import_module_from_source
|
||||
from core.tools.utils.yaml_utils import load_yaml_file
|
||||
|
||||
|
||||
class ModelProvider(ABC):
|
||||
@ -44,10 +43,7 @@ class ModelProvider(ABC):
|
||||
|
||||
# read provider schema from yaml file
|
||||
yaml_path = os.path.join(current_path, f'{provider_name}.yaml')
|
||||
yaml_data = {}
|
||||
if os.path.exists(yaml_path):
|
||||
with open(yaml_path, encoding='utf-8') as f:
|
||||
yaml_data = yaml.safe_load(f)
|
||||
yaml_data = load_yaml_file(yaml_path, ignore_error=True)
|
||||
|
||||
try:
|
||||
# yaml_data to entity
|
||||
|
||||
@ -2,7 +2,9 @@
|
||||
- anthropic
|
||||
- azure_openai
|
||||
- google
|
||||
- vertex_ai
|
||||
- nvidia
|
||||
- nvidia_nim
|
||||
- cohere
|
||||
- bedrock
|
||||
- togetherai
|
||||
@ -26,4 +28,6 @@
|
||||
- yi
|
||||
- openllm
|
||||
- localai
|
||||
- volcengine_maas
|
||||
- openai_api_compatible
|
||||
- deepseek
|
||||
|
||||
@ -6,6 +6,7 @@ features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
|
||||
@ -6,6 +6,7 @@ features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
|
||||
@ -6,6 +6,7 @@ features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
|
||||
@ -146,7 +146,7 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
|
||||
"""
|
||||
Code block mode wrapper for invoking large language model
|
||||
"""
|
||||
if 'response_format' in model_parameters and model_parameters['response_format']:
|
||||
if model_parameters.get('response_format'):
|
||||
stop = stop or []
|
||||
# chat model
|
||||
self._transform_chat_json_prompts(
|
||||
@ -324,10 +324,32 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
|
||||
output_tokens = 0
|
||||
finish_reason = None
|
||||
index = 0
|
||||
|
||||
tool_calls: list[AssistantPromptMessage.ToolCall] = []
|
||||
|
||||
for chunk in response:
|
||||
if isinstance(chunk, MessageStartEvent):
|
||||
return_model = chunk.message.model
|
||||
input_tokens = chunk.message.usage.input_tokens
|
||||
if hasattr(chunk, 'content_block'):
|
||||
content_block = chunk.content_block
|
||||
if isinstance(content_block, dict):
|
||||
if content_block.get('type') == 'tool_use':
|
||||
tool_call = AssistantPromptMessage.ToolCall(
|
||||
id=content_block.get('id'),
|
||||
type='function',
|
||||
function=AssistantPromptMessage.ToolCall.ToolCallFunction(
|
||||
name=content_block.get('name'),
|
||||
arguments=''
|
||||
)
|
||||
)
|
||||
tool_calls.append(tool_call)
|
||||
elif hasattr(chunk, 'delta'):
|
||||
delta = chunk.delta
|
||||
if isinstance(delta, dict) and len(tool_calls) > 0:
|
||||
if delta.get('type') == 'input_json_delta':
|
||||
tool_calls[-1].function.arguments += delta.get('partial_json', '')
|
||||
elif chunk.message:
|
||||
return_model = chunk.message.model
|
||||
input_tokens = chunk.message.usage.input_tokens
|
||||
elif isinstance(chunk, MessageDeltaEvent):
|
||||
output_tokens = chunk.usage.output_tokens
|
||||
finish_reason = chunk.delta.stop_reason
|
||||
@ -335,13 +357,19 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
|
||||
# transform usage
|
||||
usage = self._calc_response_usage(model, credentials, input_tokens, output_tokens)
|
||||
|
||||
# transform empty tool call arguments to {}
|
||||
for tool_call in tool_calls:
|
||||
if not tool_call.function.arguments:
|
||||
tool_call.function.arguments = '{}'
|
||||
|
||||
yield LLMResultChunk(
|
||||
model=return_model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=index + 1,
|
||||
message=AssistantPromptMessage(
|
||||
content=''
|
||||
content='',
|
||||
tool_calls=tool_calls
|
||||
),
|
||||
finish_reason=finish_reason,
|
||||
usage=usage
|
||||
@ -380,7 +408,7 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
|
||||
"max_retries": 1,
|
||||
}
|
||||
|
||||
if 'anthropic_api_url' in credentials and credentials['anthropic_api_url']:
|
||||
if credentials.get('anthropic_api_url'):
|
||||
credentials['anthropic_api_url'] = credentials['anthropic_api_url'].rstrip('/')
|
||||
credentials_kwargs['base_url'] = credentials['anthropic_api_url']
|
||||
|
||||
|
||||
@ -49,7 +49,7 @@ LLM_BASE_MODELS = [
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 4096,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 16385,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
@ -68,11 +68,25 @@ LLM_BASE_MODELS = [
|
||||
name='frequency_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
|
||||
),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=4096)
|
||||
_get_max_tokens(default=512, min_val=1, max_val=4096),
|
||||
ParameterRule(
|
||||
name='response_format',
|
||||
label=I18nObject(
|
||||
zh_Hans='回复格式',
|
||||
en_US='response_format'
|
||||
),
|
||||
type='string',
|
||||
help=I18nObject(
|
||||
zh_Hans='指定模型必须输出的格式',
|
||||
en_US='specifying the format that the model must output'
|
||||
),
|
||||
required=False,
|
||||
options=['text', 'json_object']
|
||||
),
|
||||
],
|
||||
pricing=PriceConfig(
|
||||
input=0.001,
|
||||
output=0.002,
|
||||
input=0.0005,
|
||||
output=0.0015,
|
||||
unit=0.001,
|
||||
currency='USD',
|
||||
)
|
||||
@ -482,6 +496,310 @@ LLM_BASE_MODELS = [
|
||||
)
|
||||
)
|
||||
),
|
||||
AzureBaseModel(
|
||||
base_model_name='gpt-4o',
|
||||
entity=AIModelEntity(
|
||||
model='fake-deployment-name',
|
||||
label=I18nObject(
|
||||
en_US='fake-deployment-name-label',
|
||||
),
|
||||
model_type=ModelType.LLM,
|
||||
features=[
|
||||
ModelFeature.AGENT_THOUGHT,
|
||||
ModelFeature.VISION,
|
||||
ModelFeature.MULTI_TOOL_CALL,
|
||||
ModelFeature.STREAM_TOOL_CALL,
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 128000,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
name='temperature',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
|
||||
),
|
||||
ParameterRule(
|
||||
name='top_p',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
|
||||
),
|
||||
ParameterRule(
|
||||
name='presence_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
|
||||
),
|
||||
ParameterRule(
|
||||
name='frequency_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
|
||||
),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=4096),
|
||||
ParameterRule(
|
||||
name='seed',
|
||||
label=I18nObject(
|
||||
zh_Hans='种子',
|
||||
en_US='Seed'
|
||||
),
|
||||
type='int',
|
||||
help=I18nObject(
|
||||
zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。',
|
||||
en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
|
||||
),
|
||||
required=False,
|
||||
precision=2,
|
||||
min=0,
|
||||
max=1,
|
||||
),
|
||||
ParameterRule(
|
||||
name='response_format',
|
||||
label=I18nObject(
|
||||
zh_Hans='回复格式',
|
||||
en_US='response_format'
|
||||
),
|
||||
type='string',
|
||||
help=I18nObject(
|
||||
zh_Hans='指定模型必须输出的格式',
|
||||
en_US='specifying the format that the model must output'
|
||||
),
|
||||
required=False,
|
||||
options=['text', 'json_object']
|
||||
),
|
||||
],
|
||||
pricing=PriceConfig(
|
||||
input=5.00,
|
||||
output=15.00,
|
||||
unit=0.000001,
|
||||
currency='USD',
|
||||
)
|
||||
)
|
||||
),
|
||||
AzureBaseModel(
|
||||
base_model_name='gpt-4o-2024-05-13',
|
||||
entity=AIModelEntity(
|
||||
model='fake-deployment-name',
|
||||
label=I18nObject(
|
||||
en_US='fake-deployment-name-label',
|
||||
),
|
||||
model_type=ModelType.LLM,
|
||||
features=[
|
||||
ModelFeature.AGENT_THOUGHT,
|
||||
ModelFeature.VISION,
|
||||
ModelFeature.MULTI_TOOL_CALL,
|
||||
ModelFeature.STREAM_TOOL_CALL,
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 128000,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
name='temperature',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
|
||||
),
|
||||
ParameterRule(
|
||||
name='top_p',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
|
||||
),
|
||||
ParameterRule(
|
||||
name='presence_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
|
||||
),
|
||||
ParameterRule(
|
||||
name='frequency_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
|
||||
),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=4096),
|
||||
ParameterRule(
|
||||
name='seed',
|
||||
label=I18nObject(
|
||||
zh_Hans='种子',
|
||||
en_US='Seed'
|
||||
),
|
||||
type='int',
|
||||
help=I18nObject(
|
||||
zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。',
|
||||
en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
|
||||
),
|
||||
required=False,
|
||||
precision=2,
|
||||
min=0,
|
||||
max=1,
|
||||
),
|
||||
ParameterRule(
|
||||
name='response_format',
|
||||
label=I18nObject(
|
||||
zh_Hans='回复格式',
|
||||
en_US='response_format'
|
||||
),
|
||||
type='string',
|
||||
help=I18nObject(
|
||||
zh_Hans='指定模型必须输出的格式',
|
||||
en_US='specifying the format that the model must output'
|
||||
),
|
||||
required=False,
|
||||
options=['text', 'json_object']
|
||||
),
|
||||
],
|
||||
pricing=PriceConfig(
|
||||
input=5.00,
|
||||
output=15.00,
|
||||
unit=0.000001,
|
||||
currency='USD',
|
||||
)
|
||||
)
|
||||
),
|
||||
AzureBaseModel(
|
||||
base_model_name='gpt-4-turbo',
|
||||
entity=AIModelEntity(
|
||||
model='fake-deployment-name',
|
||||
label=I18nObject(
|
||||
en_US='fake-deployment-name-label',
|
||||
),
|
||||
model_type=ModelType.LLM,
|
||||
features=[
|
||||
ModelFeature.AGENT_THOUGHT,
|
||||
ModelFeature.VISION,
|
||||
ModelFeature.MULTI_TOOL_CALL,
|
||||
ModelFeature.STREAM_TOOL_CALL,
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 128000,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
name='temperature',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
|
||||
),
|
||||
ParameterRule(
|
||||
name='top_p',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
|
||||
),
|
||||
ParameterRule(
|
||||
name='presence_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
|
||||
),
|
||||
ParameterRule(
|
||||
name='frequency_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
|
||||
),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=4096),
|
||||
ParameterRule(
|
||||
name='seed',
|
||||
label=I18nObject(
|
||||
zh_Hans='种子',
|
||||
en_US='Seed'
|
||||
),
|
||||
type='int',
|
||||
help=I18nObject(
|
||||
zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。',
|
||||
en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
|
||||
),
|
||||
required=False,
|
||||
precision=2,
|
||||
min=0,
|
||||
max=1,
|
||||
),
|
||||
ParameterRule(
|
||||
name='response_format',
|
||||
label=I18nObject(
|
||||
zh_Hans='回复格式',
|
||||
en_US='response_format'
|
||||
),
|
||||
type='string',
|
||||
help=I18nObject(
|
||||
zh_Hans='指定模型必须输出的格式',
|
||||
en_US='specifying the format that the model must output'
|
||||
),
|
||||
required=False,
|
||||
options=['text', 'json_object']
|
||||
),
|
||||
],
|
||||
pricing=PriceConfig(
|
||||
input=0.01,
|
||||
output=0.03,
|
||||
unit=0.001,
|
||||
currency='USD',
|
||||
)
|
||||
)
|
||||
),
|
||||
AzureBaseModel(
|
||||
base_model_name='gpt-4-turbo-2024-04-09',
|
||||
entity=AIModelEntity(
|
||||
model='fake-deployment-name',
|
||||
label=I18nObject(
|
||||
en_US='fake-deployment-name-label',
|
||||
),
|
||||
model_type=ModelType.LLM,
|
||||
features=[
|
||||
ModelFeature.AGENT_THOUGHT,
|
||||
ModelFeature.VISION,
|
||||
ModelFeature.MULTI_TOOL_CALL,
|
||||
ModelFeature.STREAM_TOOL_CALL,
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 128000,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
name='temperature',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
|
||||
),
|
||||
ParameterRule(
|
||||
name='top_p',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
|
||||
),
|
||||
ParameterRule(
|
||||
name='presence_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
|
||||
),
|
||||
ParameterRule(
|
||||
name='frequency_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
|
||||
),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=4096),
|
||||
ParameterRule(
|
||||
name='seed',
|
||||
label=I18nObject(
|
||||
zh_Hans='种子',
|
||||
en_US='Seed'
|
||||
),
|
||||
type='int',
|
||||
help=I18nObject(
|
||||
zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。',
|
||||
en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
|
||||
),
|
||||
required=False,
|
||||
precision=2,
|
||||
min=0,
|
||||
max=1,
|
||||
),
|
||||
ParameterRule(
|
||||
name='response_format',
|
||||
label=I18nObject(
|
||||
zh_Hans='回复格式',
|
||||
en_US='response_format'
|
||||
),
|
||||
type='string',
|
||||
help=I18nObject(
|
||||
zh_Hans='指定模型必须输出的格式',
|
||||
en_US='specifying the format that the model must output'
|
||||
),
|
||||
required=False,
|
||||
options=['text', 'json_object']
|
||||
),
|
||||
],
|
||||
pricing=PriceConfig(
|
||||
input=0.01,
|
||||
output=0.03,
|
||||
unit=0.001,
|
||||
currency='USD',
|
||||
)
|
||||
)
|
||||
),
|
||||
AzureBaseModel(
|
||||
base_model_name='gpt-4-vision-preview',
|
||||
entity=AIModelEntity(
|
||||
|
||||
@ -59,6 +59,9 @@ model_credential_schema:
|
||||
- label:
|
||||
en_US: 2023-12-01-preview
|
||||
value: 2023-12-01-preview
|
||||
- label:
|
||||
en_US: '2024-02-01'
|
||||
value: '2024-02-01'
|
||||
placeholder:
|
||||
zh_Hans: 在此选择您的 API 版本
|
||||
en_US: Select your API Version here
|
||||
@ -99,6 +102,24 @@ model_credential_schema:
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4o
|
||||
value: gpt-4o
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4o-2024-05-13
|
||||
value: gpt-4o-2024-05-13
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4-turbo
|
||||
value: gpt-4-turbo
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4-turbo-2024-04-09
|
||||
value: gpt-4-turbo-2024-04-09
|
||||
|
||||
@ -6,7 +6,7 @@ features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 4000
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
|
||||
@ -6,7 +6,7 @@ features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 192000
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
|
||||
@ -0,0 +1,45 @@
|
||||
model: baichuan3-turbo-128k
|
||||
label:
|
||||
en_US: Baichuan3-Turbo-128k
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8000
|
||||
min: 1
|
||||
max: 128000
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
default: 1
|
||||
min: 1
|
||||
max: 2
|
||||
- name: with_search_enhance
|
||||
label:
|
||||
zh_Hans: 搜索增强
|
||||
en_US: Search Enhance
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
@ -0,0 +1,45 @@
|
||||
model: baichuan3-turbo
|
||||
label:
|
||||
en_US: Baichuan3-Turbo
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8000
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
default: 1
|
||||
min: 1
|
||||
max: 2
|
||||
- name: with_search_enhance
|
||||
label:
|
||||
zh_Hans: 搜索增强
|
||||
en_US: Search Enhance
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
@ -0,0 +1,45 @@
|
||||
model: baichuan4
|
||||
label:
|
||||
en_US: Baichuan4
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8000
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
default: 1
|
||||
min: 1
|
||||
max: 2
|
||||
- name: with_search_enhance
|
||||
label:
|
||||
zh_Hans: 搜索增强
|
||||
en_US: Search Enhance
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
@ -51,26 +51,29 @@ class BaichuanModel:
|
||||
'baichuan2-turbo': 'Baichuan2-Turbo',
|
||||
'baichuan2-turbo-192k': 'Baichuan2-Turbo-192k',
|
||||
'baichuan2-53b': 'Baichuan2-53B',
|
||||
'baichuan3-turbo': 'Baichuan3-Turbo',
|
||||
'baichuan3-turbo-128k': 'Baichuan3-Turbo-128k',
|
||||
'baichuan4': 'Baichuan4',
|
||||
}[model]
|
||||
|
||||
def _handle_chat_generate_response(self, response) -> BaichuanMessage:
|
||||
resp = response.json()
|
||||
choices = resp.get('choices', [])
|
||||
message = BaichuanMessage(content='', role='assistant')
|
||||
for choice in choices:
|
||||
message.content += choice['message']['content']
|
||||
message.role = choice['message']['role']
|
||||
if choice['finish_reason']:
|
||||
message.stop_reason = choice['finish_reason']
|
||||
resp = response.json()
|
||||
choices = resp.get('choices', [])
|
||||
message = BaichuanMessage(content='', role='assistant')
|
||||
for choice in choices:
|
||||
message.content += choice['message']['content']
|
||||
message.role = choice['message']['role']
|
||||
if choice['finish_reason']:
|
||||
message.stop_reason = choice['finish_reason']
|
||||
|
||||
if 'usage' in resp:
|
||||
message.usage = {
|
||||
'prompt_tokens': resp['usage']['prompt_tokens'],
|
||||
'completion_tokens': resp['usage']['completion_tokens'],
|
||||
'total_tokens': resp['usage']['total_tokens'],
|
||||
}
|
||||
|
||||
return message
|
||||
if 'usage' in resp:
|
||||
message.usage = {
|
||||
'prompt_tokens': resp['usage']['prompt_tokens'],
|
||||
'completion_tokens': resp['usage']['completion_tokens'],
|
||||
'total_tokens': resp['usage']['total_tokens'],
|
||||
}
|
||||
|
||||
return message
|
||||
|
||||
def _handle_chat_stream_generate_response(self, response) -> Generator:
|
||||
for line in response.iter_lines():
|
||||
@ -89,7 +92,7 @@ class BaichuanModel:
|
||||
# save stop reason temporarily
|
||||
stop_reason = ''
|
||||
for choice in choices:
|
||||
if 'finish_reason' in choice and choice['finish_reason']:
|
||||
if choice.get('finish_reason'):
|
||||
stop_reason = choice['finish_reason']
|
||||
|
||||
if len(choice['delta']['content']) == 0:
|
||||
@ -110,7 +113,8 @@ class BaichuanModel:
|
||||
def _build_parameters(self, model: str, stream: bool, messages: list[BaichuanMessage],
|
||||
parameters: dict[str, Any]) \
|
||||
-> dict[str, Any]:
|
||||
if model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b':
|
||||
if (model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b'
|
||||
or model == 'baichuan3-turbo' or model == 'baichuan3-turbo-128k' or model == 'baichuan4'):
|
||||
prompt_messages = []
|
||||
for message in messages:
|
||||
if message.role == BaichuanMessage.Role.USER.value or message.role == BaichuanMessage.Role._SYSTEM.value:
|
||||
@ -143,7 +147,8 @@ class BaichuanModel:
|
||||
raise BadRequestError(f"Unknown model: {model}")
|
||||
|
||||
def _build_headers(self, model: str, data: dict[str, Any]) -> dict[str, Any]:
|
||||
if model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b':
|
||||
if (model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b'
|
||||
or model == 'baichuan3-turbo' or model == 'baichuan3-turbo-128k' or model == 'baichuan4'):
|
||||
# there is no secret key for turbo api
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
@ -160,7 +165,8 @@ class BaichuanModel:
|
||||
parameters: dict[str, Any], timeout: int) \
|
||||
-> Union[Generator, BaichuanMessage]:
|
||||
|
||||
if model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b':
|
||||
if (model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b'
|
||||
or model == 'baichuan3-turbo' or model == 'baichuan3-turbo-128k' or model == 'baichuan4'):
|
||||
api_base = 'https://api.baichuan-ai.com/v1/chat/completions'
|
||||
else:
|
||||
raise BadRequestError(f"Unknown model: {model}")
|
||||
|
||||
@ -12,6 +12,7 @@
|
||||
- meta.llama3-70b-instruct-v1:0
|
||||
- meta.llama2-13b-chat-v1
|
||||
- meta.llama2-70b-chat-v1
|
||||
- mistral.mistral-small-2402-v1:0
|
||||
- mistral.mistral-large-2402-v1:0
|
||||
- mistral.mixtral-8x7b-instruct-v0:1
|
||||
- mistral.mistral-7b-instruct-v0:2
|
||||
|
||||
@ -51,7 +51,7 @@ parameter_rules:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
input: '0.00025'
|
||||
output: '0.00125'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
|
||||
@ -50,7 +50,7 @@ parameter_rules:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.00025'
|
||||
output: '0.00125'
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
|
||||
@ -358,26 +358,25 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
|
||||
|
||||
return message_dict
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, messages: list[PromptMessage] | str,
|
||||
def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage] | str,
|
||||
tools: Optional[list[PromptMessageTool]] = None) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param messages: prompt messages or message string
|
||||
:param prompt_messages: prompt messages or message string
|
||||
:param tools: tools for tool calling
|
||||
:return:md = genai.GenerativeModel(model)
|
||||
"""
|
||||
prefix = model.split('.')[0]
|
||||
model_name = model.split('.')[1]
|
||||
if isinstance(messages, str):
|
||||
prompt = messages
|
||||
if isinstance(prompt_messages, str):
|
||||
prompt = prompt_messages
|
||||
else:
|
||||
prompt = self._convert_messages_to_prompt(messages, prefix, model_name)
|
||||
prompt = self._convert_messages_to_prompt(prompt_messages, prefix, model_name)
|
||||
|
||||
return self._get_num_tokens_by_gpt2(prompt)
|
||||
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
|
||||
@ -0,0 +1,27 @@
|
||||
model: mistral.mistral-small-2402-v1:0
|
||||
label:
|
||||
en_US: Mistral Small
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
default: 0.7
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '0.001'
|
||||
output: '0.03'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@ -1,3 +1,4 @@
|
||||
- amazon.titan-embed-text-v1
|
||||
- amazon.titan-embed-text-v2:0
|
||||
- cohere.embed-english-v3
|
||||
- cohere.embed-multilingual-v3
|
||||
|
||||
@ -4,5 +4,5 @@ model_properties:
|
||||
context_size: 8192
|
||||
pricing:
|
||||
input: '0.0001'
|
||||
unit: '0.001'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
|
||||
@ -0,0 +1,8 @@
|
||||
model: amazon.titan-embed-text-v2:0
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 8192
|
||||
pricing:
|
||||
input: '0.00002'
|
||||
unit: '0.00001'
|
||||
currency: USD
|
||||
@ -59,15 +59,15 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
|
||||
model_prefix = model.split('.')[0]
|
||||
|
||||
if model_prefix == "amazon" :
|
||||
for text in texts:
|
||||
body = {
|
||||
for text in texts:
|
||||
body = {
|
||||
"inputText": text,
|
||||
}
|
||||
response_body = self._invoke_bedrock_embedding(model, bedrock_runtime, body)
|
||||
embeddings.extend([response_body.get('embedding')])
|
||||
token_usage += response_body.get('inputTextTokenCount')
|
||||
logger.warning(f'Total Tokens: {token_usage}')
|
||||
result = TextEmbeddingResult(
|
||||
}
|
||||
response_body = self._invoke_bedrock_embedding(model, bedrock_runtime, body)
|
||||
embeddings.extend([response_body.get('embedding')])
|
||||
token_usage += response_body.get('inputTextTokenCount')
|
||||
logger.warning(f'Total Tokens: {token_usage}')
|
||||
result = TextEmbeddingResult(
|
||||
model=model,
|
||||
embeddings=embeddings,
|
||||
usage=self._calc_response_usage(
|
||||
@ -75,20 +75,20 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
|
||||
credentials=credentials,
|
||||
tokens=token_usage
|
||||
)
|
||||
)
|
||||
return result
|
||||
|
||||
)
|
||||
return result
|
||||
|
||||
if model_prefix == "cohere" :
|
||||
input_type = 'search_document' if len(texts) > 1 else 'search_query'
|
||||
for text in texts:
|
||||
body = {
|
||||
input_type = 'search_document' if len(texts) > 1 else 'search_query'
|
||||
for text in texts:
|
||||
body = {
|
||||
"texts": [text],
|
||||
"input_type": input_type,
|
||||
}
|
||||
response_body = self._invoke_bedrock_embedding(model, bedrock_runtime, body)
|
||||
embeddings.extend(response_body.get('embeddings'))
|
||||
token_usage += len(text)
|
||||
result = TextEmbeddingResult(
|
||||
}
|
||||
response_body = self._invoke_bedrock_embedding(model, bedrock_runtime, body)
|
||||
embeddings.extend(response_body.get('embeddings'))
|
||||
token_usage += len(text)
|
||||
result = TextEmbeddingResult(
|
||||
model=model,
|
||||
embeddings=embeddings,
|
||||
usage=self._calc_response_usage(
|
||||
@ -96,9 +96,9 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
|
||||
credentials=credentials,
|
||||
tokens=token_usage
|
||||
)
|
||||
)
|
||||
return result
|
||||
|
||||
)
|
||||
return result
|
||||
|
||||
#others
|
||||
raise ValueError(f"Got unknown model prefix {model_prefix} when handling block response")
|
||||
|
||||
@ -183,7 +183,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
|
||||
def _map_client_to_invoke_error(self, error_code: str, error_msg: str) -> type[InvokeError]:
|
||||
"""
|
||||
Map client error to invoke error
|
||||
@ -212,9 +212,9 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
|
||||
content_type = 'application/json'
|
||||
try:
|
||||
response = bedrock_runtime.invoke_model(
|
||||
body=json.dumps(body),
|
||||
modelId=model,
|
||||
accept=accept,
|
||||
body=json.dumps(body),
|
||||
modelId=model,
|
||||
accept=accept,
|
||||
contentType=content_type
|
||||
)
|
||||
response_body = json.loads(response.get('body').read().decode('utf-8'))
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
import logging
|
||||
from collections.abc import Generator
|
||||
from os.path import join
|
||||
from typing import Optional, cast
|
||||
|
||||
from httpx import Timeout
|
||||
@ -19,6 +18,7 @@ from openai import (
|
||||
)
|
||||
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
||||
from openai.types.chat.chat_completion_message import FunctionCall
|
||||
from yarl import URL
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
@ -265,7 +265,7 @@ class ChatGLMLargeLanguageModel(LargeLanguageModel):
|
||||
client_kwargs = {
|
||||
"timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0),
|
||||
"api_key": "1",
|
||||
"base_url": join(credentials['api_base'], 'v1')
|
||||
"base_url": str(URL(credentials['api_base']) / 'v1')
|
||||
}
|
||||
|
||||
return client_kwargs
|
||||
|
||||
@ -32,6 +32,15 @@ provider_credential_schema:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
show_on: [ ]
|
||||
- variable: base_url
|
||||
label:
|
||||
zh_Hans: API Base
|
||||
en_US: API Base
|
||||
type: text-input
|
||||
required: false
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Base,如 https://api.cohere.ai/v1
|
||||
en_US: Enter your API Base, e.g. https://api.cohere.ai/v1
|
||||
model_credential_schema:
|
||||
model:
|
||||
label:
|
||||
@ -70,3 +79,12 @@ model_credential_schema:
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
- variable: base_url
|
||||
label:
|
||||
zh_Hans: API Base
|
||||
en_US: API Base
|
||||
type: text-input
|
||||
required: false
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Base,如 https://api.cohere.ai/v1
|
||||
en_US: Enter your API Base, e.g. https://api.cohere.ai/v1
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user