Merge branch 'main' into e-300

This commit is contained in:
NFish
2025-05-06 10:13:49 +08:00
373 changed files with 5422 additions and 3930 deletions

View File

@ -24,7 +24,6 @@ from core.model_runtime.errors.invoke import (
InvokeRateLimitError,
InvokeServerUnavailableError,
)
from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
from core.plugin.entities.plugin_daemon import PluginDaemonInnerError, PluginModelProviderEntity
from core.plugin.impl.model import PluginModelClient
@ -253,15 +252,3 @@ class AIModel(BaseModel):
raise Exception(f"Invalid model parameter rule name {name}")
return default_parameter_rule
def _get_num_tokens_by_gpt2(self, text: str) -> int:
"""
Get number of tokens for given prompt messages by gpt2
Some provider models do not provide an interface for obtaining the number of tokens.
Here, the gpt2 tokenizer is used to calculate the number of tokens.
This method can be executed offline, and the gpt2 tokenizer has been cached in the project.
:param text: plain text of prompt. You need to convert the original message to plain text
:return: number of tokens
"""
return GPT2Tokenizer.get_num_tokens(text)

View File

@ -2,7 +2,7 @@ import logging
import time
import uuid
from collections.abc import Generator, Sequence
from typing import Optional, Union, cast
from typing import Optional, Union
from pydantic import ConfigDict
@ -13,14 +13,15 @@ from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk,
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
PromptMessage,
PromptMessageContentUnionTypes,
PromptMessageTool,
TextPromptMessageContent,
)
from core.model_runtime.entities.model_entities import (
ModelType,
PriceType,
)
from core.model_runtime.model_providers.__base.ai_model import AIModel
from core.model_runtime.utils.helper import convert_llm_result_chunk_to_str
from core.plugin.impl.model import PluginModelClient
logger = logging.getLogger(__name__)
@ -238,7 +239,7 @@ class LargeLanguageModel(AIModel):
def _invoke_result_generator(
self,
model: str,
result: Generator,
result: Generator[LLMResultChunk, None, None],
credentials: dict,
prompt_messages: Sequence[PromptMessage],
model_parameters: dict,
@ -255,11 +256,21 @@ class LargeLanguageModel(AIModel):
:return: result generator
"""
callbacks = callbacks or []
assistant_message = AssistantPromptMessage(content="")
message_content: list[PromptMessageContentUnionTypes] = []
usage = None
system_fingerprint = None
real_model = model
def _update_message_content(content: str | list[PromptMessageContentUnionTypes] | None):
if not content:
return
if isinstance(content, list):
message_content.extend(content)
return
if isinstance(content, str):
message_content.append(TextPromptMessageContent(data=content))
return
try:
for chunk in result:
# Following https://github.com/langgenius/dify/issues/17799,
@ -281,9 +292,8 @@ class LargeLanguageModel(AIModel):
callbacks=callbacks,
)
text = convert_llm_result_chunk_to_str(chunk.delta.message.content)
current_content = cast(str, assistant_message.content)
assistant_message.content = current_content + text
_update_message_content(chunk.delta.message.content)
real_model = chunk.model
if chunk.delta.usage:
usage = chunk.delta.usage
@ -293,6 +303,7 @@ class LargeLanguageModel(AIModel):
except Exception as e:
raise self._transform_invoke_error(e)
assistant_message = AssistantPromptMessage(content=message_content)
self._trigger_after_invoke_callbacks(
model=model,
result=LLMResult(

View File

@ -30,6 +30,8 @@ class GPT2Tokenizer:
@staticmethod
def get_encoder() -> Any:
global _tokenizer, _lock
if _tokenizer is not None:
return _tokenizer
with _lock:
if _tokenizer is None:
# Try to use tiktoken to get the tokenizer because it is faster