Merge branch 'refs/heads/main' into feat/workflow-parallel-support

# Conflicts:
#	api/core/app/apps/advanced_chat/app_generator.py
#	api/core/app/apps/advanced_chat/app_runner.py
#	api/core/app/apps/advanced_chat/generate_task_pipeline.py
#	api/core/app/apps/workflow/app_runner.py
#	api/core/app/task_pipeline/workflow_cycle_manage.py
#	api/core/workflow/entities/variable_pool.py
#	api/core/workflow/nodes/base_node.py
#	api/core/workflow/workflow_engine_manager.py
This commit is contained in:
takatost
2024-08-13 17:05:39 +08:00
606 changed files with 22896 additions and 4246 deletions

View File

@ -12,6 +12,7 @@ ENV POETRY_CACHE_DIR=/tmp/poetry_cache
ENV POETRY_NO_INTERACTION=1
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
ENV POETRY_VIRTUALENVS_CREATE=true
ENV POETRY_REQUESTS_TIMEOUT=15
FROM base AS packages
@ -41,8 +42,12 @@ ENV TZ=UTC
WORKDIR /app/api
RUN apt-get update \
&& apt-get install -y --no-install-recommends curl wget vim nodejs ffmpeg libgmp-dev libmpfr-dev libmpc-dev \
&& apt-get autoremove \
&& apt-get install -y --no-install-recommends curl nodejs libgmp-dev libmpfr-dev libmpc-dev \
&& echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \
&& apt-get update \
# For Security
&& apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1 expat=2.6.2-1 libldap-2.5-0=2.5.18+dfsg-2 perl=5.38.2-5 libsqlite3-0=3.46.0-1 \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
# Copy Python environment and packages
@ -50,6 +55,9 @@ ENV VIRTUAL_ENV=/app/api/.venv
COPY --from=packages ${VIRTUAL_ENV} ${VIRTUAL_ENV}
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Download nltk data
RUN python -c "import nltk; nltk.download('punkt')"
# Copy source code
COPY . /app/api/

View File

@ -12,19 +12,14 @@ from configs.packaging import PackagingInfo
class DifyConfig(
# Packaging info
PackagingInfo,
# Deployment configs
DeploymentConfig,
# Feature configs
FeatureConfig,
# Middleware configs
MiddlewareConfig,
# Extra service configs
ExtraServiceConfig,
# Enterprise feature configs
# **Before using, please contact business@dify.ai by email to inquire about licensing matters.**
EnterpriseFeatureConfig,
@ -36,7 +31,6 @@ class DifyConfig(
env_file='.env',
env_file_encoding='utf-8',
frozen=True,
# ignore extra attributes
extra='ignore',
)
@ -67,3 +61,5 @@ class DifyConfig(
SSRF_PROXY_HTTPS_URL: str | None = None
MODERATION_BUFFER_SIZE: int = Field(default=300, description='The buffer size for moderation.')
MAX_VARIABLE_SIZE: int = Field(default=5 * 1024, description='The maximum size of a variable. default is 5KB.')

View File

@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):
CURRENT_VERSION: str = Field(
description='Dify version',
default='0.6.15',
default='0.6.16',
)
COMMIT_SHA: str = Field(

View File

@ -1,2 +1 @@
# TODO: Update all string in code to use this constant
HIDDEN_VALUE = '[__HIDDEN__]'
HIDDEN_VALUE = '[__HIDDEN__]'

View File

@ -15,6 +15,8 @@ language_timezone_mapping = {
'ro-RO': 'Europe/Bucharest',
'pl-PL': 'Europe/Warsaw',
'hi-IN': 'Asia/Kolkata',
'tr-TR': 'Europe/Istanbul',
'fa-IR': 'Asia/Tehran',
}
languages = list(language_timezone_mapping.keys())

View File

@ -17,6 +17,7 @@ from .app import (
audio,
completion,
conversation,
conversation_variables,
generator,
message,
model_config,

View File

@ -23,8 +23,7 @@ class AnnotationReplyActionApi(Resource):
@account_initialization_required
@cloud_edition_billing_resource_check('annotation')
def post(self, app_id, action):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
app_id = str(app_id)
@ -47,8 +46,7 @@ class AppAnnotationSettingDetailApi(Resource):
@login_required
@account_initialization_required
def get(self, app_id):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
app_id = str(app_id)
@ -61,8 +59,7 @@ class AppAnnotationSettingUpdateApi(Resource):
@login_required
@account_initialization_required
def post(self, app_id, annotation_setting_id):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
app_id = str(app_id)
@ -82,8 +79,7 @@ class AnnotationReplyActionStatusApi(Resource):
@account_initialization_required
@cloud_edition_billing_resource_check('annotation')
def get(self, app_id, job_id, action):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
job_id = str(job_id)
@ -110,8 +106,7 @@ class AnnotationListApi(Resource):
@login_required
@account_initialization_required
def get(self, app_id):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
page = request.args.get('page', default=1, type=int)
@ -135,8 +130,7 @@ class AnnotationExportApi(Resource):
@login_required
@account_initialization_required
def get(self, app_id):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
app_id = str(app_id)
@ -154,8 +148,7 @@ class AnnotationCreateApi(Resource):
@cloud_edition_billing_resource_check('annotation')
@marshal_with(annotation_fields)
def post(self, app_id):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
app_id = str(app_id)
@ -174,8 +167,7 @@ class AnnotationUpdateDeleteApi(Resource):
@cloud_edition_billing_resource_check('annotation')
@marshal_with(annotation_fields)
def post(self, app_id, annotation_id):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
app_id = str(app_id)
@ -191,8 +183,7 @@ class AnnotationUpdateDeleteApi(Resource):
@login_required
@account_initialization_required
def delete(self, app_id, annotation_id):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
app_id = str(app_id)
@ -207,8 +198,7 @@ class AnnotationBatchImportApi(Resource):
@account_initialization_required
@cloud_edition_billing_resource_check('annotation')
def post(self, app_id):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
app_id = str(app_id)
@ -232,8 +222,7 @@ class AnnotationBatchImportStatusApi(Resource):
@account_initialization_required
@cloud_edition_billing_resource_check('annotation')
def get(self, app_id, job_id):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
job_id = str(job_id)
@ -259,8 +248,7 @@ class AnnotationHitHistoryListApi(Resource):
@login_required
@account_initialization_required
def get(self, app_id, annotation_id):
# The role of the current user in the table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
page = request.args.get('page', default=1, type=int)

View File

@ -143,7 +143,7 @@ class ChatConversationApi(Resource):
@get_app_model(mode=[AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT])
@marshal_with(conversation_with_summary_pagination_fields)
def get(self, app_model):
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
parser = reqparse.RequestParser()
parser.add_argument('keyword', type=str, location='args')
@ -245,7 +245,7 @@ class ChatConversationDetailApi(Resource):
@get_app_model(mode=[AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT])
@marshal_with(conversation_detail_fields)
def get(self, app_model, conversation_id):
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
conversation_id = str(conversation_id)

View File

@ -0,0 +1,61 @@
from flask_restful import Resource, marshal_with, reqparse
from sqlalchemy import select
from sqlalchemy.orm import Session
from controllers.console import api
from controllers.console.app.wraps import get_app_model
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from extensions.ext_database import db
from fields.conversation_variable_fields import paginated_conversation_variable_fields
from libs.login import login_required
from models import ConversationVariable
from models.model import AppMode
class ConversationVariablesApi(Resource):
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=AppMode.ADVANCED_CHAT)
@marshal_with(paginated_conversation_variable_fields)
def get(self, app_model):
parser = reqparse.RequestParser()
parser.add_argument('conversation_id', type=str, location='args')
args = parser.parse_args()
stmt = (
select(ConversationVariable)
.where(ConversationVariable.app_id == app_model.id)
.order_by(ConversationVariable.created_at)
)
if args['conversation_id']:
stmt = stmt.where(ConversationVariable.conversation_id == args['conversation_id'])
else:
raise ValueError('conversation_id is required')
# NOTE: This is a temporary solution to avoid performance issues.
page = 1
page_size = 100
stmt = stmt.limit(page_size).offset((page - 1) * page_size)
with Session(db.engine) as session:
rows = session.scalars(stmt).all()
return {
'page': page,
'limit': page_size,
'total': len(rows),
'has_more': False,
'data': [
{
'created_at': row.created_at,
'updated_at': row.updated_at,
**row.to_variable().model_dump(),
}
for row in rows
],
}
api.add_resource(ConversationVariablesApi, '/apps/<uuid:app_id>/conversation-variables')

View File

@ -149,8 +149,7 @@ class MessageAnnotationApi(Resource):
@get_app_model
@marshal_with(annotation_fields)
def post(self, app_model):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
parser = reqparse.RequestParser()

View File

@ -74,6 +74,7 @@ class DraftWorkflowApi(Resource):
parser.add_argument('hash', type=str, required=False, location='json')
# TODO: set this to required=True after frontend is updated
parser.add_argument('environment_variables', type=list, required=False, location='json')
parser.add_argument('conversation_variables', type=list, required=False, location='json')
args = parser.parse_args()
elif 'text/plain' in content_type:
try:
@ -88,7 +89,8 @@ class DraftWorkflowApi(Resource):
'graph': data.get('graph'),
'features': data.get('features'),
'hash': data.get('hash'),
'environment_variables': data.get('environment_variables')
'environment_variables': data.get('environment_variables'),
'conversation_variables': data.get('conversation_variables'),
}
except json.JSONDecodeError:
return {'message': 'Invalid JSON data'}, 400
@ -100,6 +102,8 @@ class DraftWorkflowApi(Resource):
try:
environment_variables_list = args.get('environment_variables') or []
environment_variables = [factory.build_variable_from_mapping(obj) for obj in environment_variables_list]
conversation_variables_list = args.get('conversation_variables') or []
conversation_variables = [factory.build_variable_from_mapping(obj) for obj in conversation_variables_list]
workflow = workflow_service.sync_draft_workflow(
app_model=app_model,
graph=args['graph'],
@ -107,6 +111,7 @@ class DraftWorkflowApi(Resource):
unique_hash=args.get('hash'),
account=current_user,
environment_variables=environment_variables,
conversation_variables=conversation_variables,
)
except WorkflowHashNotEqualError:
raise DraftWorkflowNotSync()

View File

@ -17,8 +17,6 @@ from ..wraps import account_initialization_required
def get_oauth_providers():
with current_app.app_context():
if not dify_config.NOTION_CLIENT_ID or not dify_config.NOTION_CLIENT_SECRET:
return {}
notion_oauth = NotionOAuth(client_id=dify_config.NOTION_CLIENT_ID,
client_secret=dify_config.NOTION_CLIENT_SECRET,
redirect_uri=dify_config.CONSOLE_API_URL + '/console/api/oauth/data-source/callback/notion')

View File

@ -189,8 +189,6 @@ class DatasetApi(Resource):
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
raise NotFound("Dataset not found.")
# check user's model setting
DatasetService.check_dataset_model_setting(dataset)
parser = reqparse.RequestParser()
parser.add_argument('name', nullable=False,
@ -215,6 +213,13 @@ class DatasetApi(Resource):
args = parser.parse_args()
data = request.get_json()
# check embedding model setting
if data.get('indexing_technique') == 'high_quality':
DatasetService.check_embedding_model_setting(dataset.tenant_id,
data.get('embedding_model_provider'),
data.get('embedding_model')
)
# The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
DatasetPermissionService.check_permission(
current_user, dataset, data.get('permission'), data.get('partial_member_list')
@ -233,7 +238,8 @@ class DatasetApi(Resource):
DatasetPermissionService.update_partial_member_list(
tenant_id, dataset_id_str, data.get('partial_member_list')
)
else:
# clear partial member list when permission is only_me or all_team_members
elif data.get('permission') == 'only_me' or data.get('permission') == 'all_team_members':
DatasetPermissionService.clear_partial_member_list(dataset_id_str)
partial_member_list = DatasetPermissionService.get_dataset_partial_member_list(dataset_id_str)

View File

@ -223,8 +223,7 @@ class DatasetDocumentSegmentAddApi(Resource):
document = DocumentService.get_document(dataset_id, document_id)
if not document:
raise NotFound('Document not found.')
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
# check embedding model setting
if dataset.indexing_technique == 'high_quality':
@ -347,7 +346,7 @@ class DatasetDocumentSegmentUpdateApi(Resource):
if not segment:
raise NotFound('Segment not found.')
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
if not current_user.is_editor:
raise Forbidden()
try:
DatasetService.check_dataset_permission(dataset, current_user)

View File

@ -1,6 +1,7 @@
from flask_login import current_user
from flask_restful import Resource, marshal_with, reqparse
from constants import HIDDEN_VALUE
from controllers.console import api
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
@ -89,7 +90,7 @@ class APIBasedExtensionDetailAPI(Resource):
extension_data_from_db.name = args['name']
extension_data_from_db.api_endpoint = args['api_endpoint']
if args['api_key'] != '[__HIDDEN__]':
if args['api_key'] != HIDDEN_VALUE:
extension_data_from_db.api_key = args['api_key']
return APIBasedExtensionService.save(extension_data_from_db)

View File

@ -19,7 +19,7 @@ def inner_api_only(view):
# get header 'X-Inner-Api-Key'
inner_api_key = request.headers.get('X-Inner-Api-Key')
if not inner_api_key or inner_api_key != dify_config.INNER_API_KEY:
abort(404)
abort(401)
return view(*args, **kwargs)

View File

@ -53,7 +53,7 @@ class ConversationDetailApi(Resource):
ConversationService.delete(app_model, conversation_id, end_user)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
return {"result": "success"}, 204
return {'result': 'success'}, 200
class ConversationRenameApi(Resource):

View File

@ -131,7 +131,7 @@ class MessageSuggestedApi(Resource):
except services.errors.message.MessageNotExistsError:
raise NotFound("Message Not Exists.")
except SuggestedQuestionsAfterAnswerDisabledError:
raise BadRequest("Message Not Exists.")
raise BadRequest("Suggested Questions Is Disabled.")
except Exception:
logging.exception("internal server error.")
raise InternalServerError()

View File

@ -79,6 +79,7 @@ class CotAgentRunner(BaseAgentRunner, ABC):
llm_usage.completion_tokens += usage.completion_tokens
llm_usage.prompt_price += usage.prompt_price
llm_usage.completion_price += usage.completion_price
llm_usage.total_price += usage.total_price
model_instance = self.model_instance

View File

@ -62,6 +62,7 @@ class FunctionCallAgentRunner(BaseAgentRunner):
llm_usage.completion_tokens += usage.completion_tokens
llm_usage.prompt_price += usage.prompt_price
llm_usage.completion_price += usage.completion_price
llm_usage.total_price += usage.total_price
model_instance = self.model_instance

View File

@ -91,7 +91,8 @@ class DatasetConfigManager:
top_k=dataset_configs.get('top_k', 4),
score_threshold=dataset_configs.get('score_threshold'),
reranking_model=dataset_configs.get('reranking_model'),
weights=dataset_configs.get('weights')
weights=dataset_configs.get('weights'),
reranking_enabled=dataset_configs.get('reranking_enabled', True),
)
)

View File

@ -3,8 +3,9 @@ from typing import Any, Optional
from pydantic import BaseModel
from core.file.file_obj import FileExtraConfig
from core.model_runtime.entities.message_entities import PromptMessageRole
from models.model import AppMode
from models import AppMode
class ModelConfigEntity(BaseModel):
@ -158,10 +159,11 @@ class DatasetRetrieveConfigEntity(BaseModel):
retrieve_strategy: RetrieveStrategy
top_k: Optional[int] = None
score_threshold: Optional[float] = None
score_threshold: Optional[float] = .0
rerank_mode: Optional[str] = 'reranking_model'
reranking_model: Optional[dict] = None
weights: Optional[dict] = None
reranking_enabled: Optional[bool] = True
@ -199,11 +201,6 @@ class TracingConfigEntity(BaseModel):
tracing_provider: str
class FileExtraConfig(BaseModel):
"""
File Upload Entity.
"""
image_config: Optional[dict[str, Any]] = None
class AppAdditionalFeatures(BaseModel):

View File

@ -1,7 +1,7 @@
from collections.abc import Mapping
from typing import Any, Optional
from core.app.app_config.entities import FileExtraConfig
from core.file.file_obj import FileExtraConfig
class FileUploadConfigManager:

View File

@ -89,7 +89,8 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
)
# get tracing instance
trace_manager = TraceQueueManager(app_id=app_model.id)
user_id = user.id if isinstance(user, Account) else user.session_id
trace_manager = TraceQueueManager(app_model.id, user_id)
if invoke_from == InvokeFrom.DEBUGGER:
# always enable retriever resource in debugger mode
@ -112,7 +113,6 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
contexts.tenant_id.set(application_generate_entity.app_config.tenant_id)
return self._generate(
app_model=app_model,
workflow=workflow,
user=user,
invoke_from=invoke_from,
@ -121,7 +121,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
stream=stream
)
def _generate(self, app_model: App,
def _generate(self, *,
workflow: Workflow,
user: Union[Account, EndUser],
invoke_from: InvokeFrom,

View File

@ -5,7 +5,12 @@ import queue
import re
import threading
from core.app.entities.queue_entities import QueueAgentMessageEvent, QueueLLMChunkEvent, QueueTextChunkEvent
from core.app.entities.queue_entities import (
QueueAgentMessageEvent,
QueueLLMChunkEvent,
QueueNodeSucceededEvent,
QueueTextChunkEvent,
)
from core.model_manager import ModelManager
from core.model_runtime.entities.model_entities import ModelType
@ -88,6 +93,8 @@ class AppGeneratorTTSPublisher:
self.msg_text += message.event.chunk.delta.message.content
elif isinstance(message.event, QueueTextChunkEvent):
self.msg_text += message.event.text
elif isinstance(message.event, QueueNodeSucceededEvent):
self.msg_text += message.event.outputs.get('output', '')
self.last_message = message
sentence_arr, text_tmp = self._extract_sentence(self.msg_text)
if len(sentence_arr) >= min(self.MAX_SENTENCE, 7):

View File

@ -3,6 +3,9 @@ import os
from collections.abc import Mapping
from typing import Any, Optional, cast
from sqlalchemy import select
from sqlalchemy.orm import Session
from core.app.apps.advanced_chat.app_config_manager import AdvancedChatAppConfig
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
from core.app.apps.base_app_runner import AppRunner
@ -32,6 +35,7 @@ from core.app.entities.queue_entities import (
from core.moderation.base import ModerationException
from core.workflow.callbacks.base_workflow_callback import WorkflowCallback
from core.workflow.entities.node_entities import SystemVariable, UserFrom
from core.workflow.entities.variable_pool import VariablePool
from core.workflow.graph_engine.entities.event import (
GraphEngineEvent,
GraphRunFailedEvent,
@ -53,7 +57,7 @@ from core.workflow.graph_engine.entities.event import (
from core.workflow.workflow_entry import WorkflowEntry
from extensions.ext_database import db
from models.model import App, Conversation, EndUser, Message
from models.workflow import Workflow
from models.workflow import ConversationVariable, Workflow
logger = logging.getLogger(__name__)
@ -91,11 +95,11 @@ class AdvancedChatAppRunner(AppRunner):
app_record = db.session.query(App).filter(App.id == app_config.app_id).first()
if not app_record:
raise ValueError("App not found")
raise ValueError('App not found')
workflow = self.get_workflow(app_model=app_record, workflow_id=app_config.workflow_id)
if not workflow:
raise ValueError("Workflow not initialized")
raise ValueError('Workflow not initialized')
inputs = self.application_generate_entity.inputs
query = self.application_generate_entity.query
@ -134,6 +138,38 @@ class AdvancedChatAppRunner(AppRunner):
if bool(os.environ.get("DEBUG", 'False').lower() == 'true'):
workflow_callbacks.append(WorkflowLoggingCallback())
# Init conversation variables
stmt = select(ConversationVariable).where(
ConversationVariable.app_id == conversation.app_id, ConversationVariable.conversation_id == conversation.id
)
with Session(db.engine) as session:
conversation_variables = session.scalars(stmt).all()
if not conversation_variables:
conversation_variables = [
ConversationVariable.from_variable(
app_id=conversation.app_id, conversation_id=conversation.id, variable=variable
)
for variable in workflow.conversation_variables
]
session.add_all(conversation_variables)
session.commit()
# Convert database entities to variables
conversation_variables = [item.to_variable() for item in conversation_variables]
# Create a variable pool.
system_inputs = {
SystemVariable.QUERY: query,
SystemVariable.FILES: files,
SystemVariable.CONVERSATION_ID: conversation.id,
SystemVariable.USER_ID: user_id,
}
variable_pool = VariablePool(
system_variables=system_inputs,
user_inputs=inputs,
environment_variables=workflow.environment_variables,
conversation_variables=conversation_variables,
)
# RUN WORKFLOW
workflow_entry = WorkflowEntry(
workflow=workflow,
@ -142,14 +178,8 @@ class AdvancedChatAppRunner(AppRunner):
if self.application_generate_entity.invoke_from in [InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER]
else UserFrom.END_USER,
invoke_from=self.application_generate_entity.invoke_from,
user_inputs=inputs,
system_inputs={
SystemVariable.QUERY: query,
SystemVariable.FILES: files,
SystemVariable.CONVERSATION_ID: self.conversation.id,
SystemVariable.USER_ID: user_id
},
call_depth=self.application_generate_entity.call_depth
call_depth=self.application_generate_entity.call_depth,
variable_pool=variable_pool,
)
generator = workflow_entry.run(
@ -323,11 +353,13 @@ class AdvancedChatAppRunner(AppRunner):
Get workflow
"""
# fetch workflow by workflow_id
workflow = db.session.query(Workflow).filter(
Workflow.tenant_id == app_model.tenant_id,
Workflow.app_id == app_model.id,
Workflow.id == workflow_id
).first()
workflow = (
db.session.query(Workflow)
.filter(
Workflow.tenant_id == app_model.tenant_id, Workflow.app_id == app_model.id, Workflow.id == workflow_id
)
.first()
)
# return workflow
return workflow
@ -385,7 +417,7 @@ class AdvancedChatAppRunner(AppRunner):
message=message,
query=query,
user_id=app_generate_entity.user_id,
invoke_from=app_generate_entity.invoke_from
invoke_from=app_generate_entity.invoke_from,
)
if annotation_reply:

View File

@ -110,7 +110,8 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
)
# get tracing instance
trace_manager = TraceQueueManager(app_model.id)
user_id = user.id if isinstance(user, Account) else user.session_id
trace_manager = TraceQueueManager(app_model.id, user_id)
# init application generate entity
application_generate_entity = AgentChatAppGenerateEntity(

View File

@ -74,7 +74,8 @@ class WorkflowAppGenerator(BaseAppGenerator):
)
# get tracing instance
trace_manager = TraceQueueManager(app_model.id)
user_id = user.id if isinstance(user, Account) else user.session_id
trace_manager = TraceQueueManager(app_model.id, user_id)
# init application generate entity
application_generate_entity = WorkflowAppGenerateEntity(

View File

@ -11,6 +11,7 @@ from core.app.entities.app_invoke_entities import (
)
from core.workflow.callbacks.base_workflow_callback import WorkflowCallback
from core.workflow.entities.node_entities import SystemVariable, UserFrom
from core.workflow.entities.variable_pool import VariablePool
from core.workflow.workflow_entry import WorkflowEntry
from extensions.ext_database import db
from models.model import App, EndUser
@ -24,8 +25,7 @@ class WorkflowAppRunner:
Workflow Application Runner
"""
def run(self, application_generate_entity: WorkflowAppGenerateEntity,
queue_manager: AppQueueManager) -> None:
def run(self, application_generate_entity: WorkflowAppGenerateEntity, queue_manager: AppQueueManager) -> None:
"""
Run application
:param application_generate_entity: application generate entity
@ -45,11 +45,11 @@ class WorkflowAppRunner:
app_record = db.session.query(App).filter(App.id == app_config.app_id).first()
if not app_record:
raise ValueError("App not found")
raise ValueError('App not found')
workflow = self.get_workflow(app_model=app_record, workflow_id=app_config.workflow_id)
if not workflow:
raise ValueError("Workflow not initialized")
raise ValueError('Workflow not initialized')
inputs = application_generate_entity.inputs
files = application_generate_entity.files
@ -58,9 +58,21 @@ class WorkflowAppRunner:
workflow_callbacks: list[WorkflowCallback] = []
if bool(os.environ.get("DEBUG", 'False').lower() == 'true'):
if bool(os.environ.get('DEBUG', 'False').lower() == 'true'):
workflow_callbacks.append(WorkflowLoggingCallback())
# Create a variable pool.
system_inputs = {
SystemVariable.FILES: files,
SystemVariable.USER_ID: user_id,
}
variable_pool = VariablePool(
system_variables=system_inputs,
user_inputs=inputs,
environment_variables=workflow.environment_variables,
conversation_variables=[],
)
# RUN WORKFLOW
workflow_entry = WorkflowEntry()
workflow_entry.run(
@ -71,26 +83,22 @@ class WorkflowAppRunner:
else UserFrom.END_USER,
invoke_from=application_generate_entity.invoke_from,
callbacks=workflow_callbacks,
user_inputs=inputs,
system_inputs={
SystemVariable.FILES: files,
SystemVariable.USER_ID: user_id
},
call_depth=application_generate_entity.call_depth
call_depth=application_generate_entity.call_depth,
variable_pool=variable_pool,
)
def single_iteration_run(self, app_id: str, workflow_id: str,
queue_manager: AppQueueManager,
inputs: dict, node_id: str, user_id: str) -> None:
def single_iteration_run(
self, app_id: str, workflow_id: str, queue_manager: AppQueueManager, inputs: dict, node_id: str, user_id: str
) -> None:
"""
Single iteration run
"""
app_record: App = db.session.query(App).filter(App.id == app_id).first()
app_record = db.session.query(App).filter(App.id == app_id).first()
if not app_record:
raise ValueError("App not found")
raise ValueError('App not found')
if not app_record.workflow_id:
raise ValueError("Workflow not initialized")
raise ValueError('Workflow not initialized')
workflow = self.get_workflow(app_model=app_record, workflow_id=workflow_id)
if not workflow:
@ -112,11 +120,13 @@ class WorkflowAppRunner:
Get workflow
"""
# fetch workflow by workflow_id
workflow = db.session.query(Workflow).filter(
Workflow.tenant_id == app_model.tenant_id,
Workflow.app_id == app_model.id,
Workflow.id == workflow_id
).first()
workflow = (
db.session.query(Workflow)
.filter(
Workflow.tenant_id == app_model.tenant_id, Workflow.app_id == app_model.id, Workflow.id == workflow_id
)
.first()
)
# return workflow
return workflow

View File

@ -1,6 +1,7 @@
from .segment_group import SegmentGroup
from .segments import (
ArrayAnySegment,
ArraySegment,
FileSegment,
FloatSegment,
IntegerSegment,
@ -50,4 +51,5 @@ __all__ = [
'ArrayNumberVariable',
'ArrayObjectVariable',
'ArrayFileVariable',
'ArraySegment',
]

View File

@ -0,0 +1,2 @@
class VariableError(Exception):
pass

View File

@ -1,8 +1,10 @@
from collections.abc import Mapping
from typing import Any
from configs import dify_config
from core.file.file_obj import FileVar
from .exc import VariableError
from .segments import (
ArrayAnySegment,
FileSegment,
@ -29,39 +31,43 @@ from .variables import (
)
def build_variable_from_mapping(m: Mapping[str, Any], /) -> Variable:
if (value_type := m.get('value_type')) is None:
raise ValueError('missing value type')
if not m.get('name'):
raise ValueError('missing name')
if (value := m.get('value')) is None:
raise ValueError('missing value')
def build_variable_from_mapping(mapping: Mapping[str, Any], /) -> Variable:
if (value_type := mapping.get('value_type')) is None:
raise VariableError('missing value type')
if not mapping.get('name'):
raise VariableError('missing name')
if (value := mapping.get('value')) is None:
raise VariableError('missing value')
match value_type:
case SegmentType.STRING:
return StringVariable.model_validate(m)
result = StringVariable.model_validate(mapping)
case SegmentType.SECRET:
return SecretVariable.model_validate(m)
result = SecretVariable.model_validate(mapping)
case SegmentType.NUMBER if isinstance(value, int):
return IntegerVariable.model_validate(m)
result = IntegerVariable.model_validate(mapping)
case SegmentType.NUMBER if isinstance(value, float):
return FloatVariable.model_validate(m)
result = FloatVariable.model_validate(mapping)
case SegmentType.NUMBER if not isinstance(value, float | int):
raise ValueError(f'invalid number value {value}')
raise VariableError(f'invalid number value {value}')
case SegmentType.FILE:
return FileVariable.model_validate(m)
result = FileVariable.model_validate(mapping)
case SegmentType.OBJECT if isinstance(value, dict):
return ObjectVariable.model_validate(
{**m, 'value': {k: build_variable_from_mapping(v) for k, v in value.items()}}
)
result = ObjectVariable.model_validate(mapping)
case SegmentType.ARRAY_STRING if isinstance(value, list):
return ArrayStringVariable.model_validate({**m, 'value': [build_variable_from_mapping(v) for v in value]})
result = ArrayStringVariable.model_validate(mapping)
case SegmentType.ARRAY_NUMBER if isinstance(value, list):
return ArrayNumberVariable.model_validate({**m, 'value': [build_variable_from_mapping(v) for v in value]})
result = ArrayNumberVariable.model_validate(mapping)
case SegmentType.ARRAY_OBJECT if isinstance(value, list):
return ArrayObjectVariable.model_validate({**m, 'value': [build_variable_from_mapping(v) for v in value]})
result = ArrayObjectVariable.model_validate(mapping)
case SegmentType.ARRAY_FILE if isinstance(value, list):
return ArrayFileVariable.model_validate({**m, 'value': [build_variable_from_mapping(v) for v in value]})
raise ValueError(f'not supported value type {value_type}')
mapping = dict(mapping)
mapping['value'] = [{'value': v} for v in value]
result = ArrayFileVariable.model_validate(mapping)
case _:
raise VariableError(f'not supported value type {value_type}')
if result.size > dify_config.MAX_VARIABLE_SIZE:
raise VariableError(f'variable size {result.size} exceeds limit {dify_config.MAX_VARIABLE_SIZE}')
return result
def build_segment(value: Any, /) -> Segment:
@ -74,13 +80,9 @@ def build_segment(value: Any, /) -> Segment:
if isinstance(value, float):
return FloatSegment(value=value)
if isinstance(value, dict):
# TODO: Limit the depth of the object
obj = {k: build_segment(v) for k, v in value.items()}
return ObjectSegment(value=obj)
return ObjectSegment(value=value)
if isinstance(value, list):
# TODO: Limit the depth of the array
elements = [build_segment(v) for v in value]
return ArrayAnySegment(value=elements)
return ArrayAnySegment(value=value)
if isinstance(value, FileVar):
return FileSegment(value=value)
raise ValueError(f'not supported value {value}')

View File

@ -1,4 +1,5 @@
import json
import sys
from collections.abc import Mapping, Sequence
from typing import Any
@ -37,6 +38,10 @@ class Segment(BaseModel):
def markdown(self) -> str:
return str(self.value)
@property
def size(self) -> int:
return sys.getsizeof(self.value)
def to_object(self) -> Any:
return self.value
@ -85,54 +90,45 @@ class FileSegment(Segment):
class ObjectSegment(Segment):
value_type: SegmentType = SegmentType.OBJECT
value: Mapping[str, Segment]
value: Mapping[str, Any]
@property
def text(self) -> str:
# TODO: Process variables.
return json.dumps(self.model_dump()['value'], ensure_ascii=False)
@property
def log(self) -> str:
# TODO: Process variables.
return json.dumps(self.model_dump()['value'], ensure_ascii=False, indent=2)
@property
def markdown(self) -> str:
# TODO: Use markdown code block
return json.dumps(self.model_dump()['value'], ensure_ascii=False, indent=2)
def to_object(self):
return {k: v.to_object() for k, v in self.value.items()}
class ArraySegment(Segment):
@property
def markdown(self) -> str:
return '\n'.join(['- ' + item.markdown for item in self.value])
def to_object(self):
return [v.to_object() for v in self.value]
class ArrayAnySegment(ArraySegment):
value_type: SegmentType = SegmentType.ARRAY_ANY
value: Sequence[Segment]
value: Sequence[Any]
class ArrayStringSegment(ArraySegment):
value_type: SegmentType = SegmentType.ARRAY_STRING
value: Sequence[StringSegment]
value: Sequence[str]
class ArrayNumberSegment(ArraySegment):
value_type: SegmentType = SegmentType.ARRAY_NUMBER
value: Sequence[FloatSegment | IntegerSegment]
value: Sequence[float | int]
class ArrayObjectSegment(ArraySegment):
value_type: SegmentType = SegmentType.ARRAY_OBJECT
value: Sequence[ObjectSegment]
value: Sequence[Mapping[str, Any]]
class ArrayFileSegment(ArraySegment):

View File

@ -48,7 +48,8 @@ from core.model_runtime.entities.message_entities import (
)
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.model_runtime.utils.encoders import jsonable_encoder
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask, TraceTaskName
from core.ops.entities.trace_entity import TraceTaskName
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
from core.prompt.utils.prompt_message_util import PromptMessageUtil
from core.prompt.utils.prompt_template_parser import PromptTemplateParser
from events.message_event import message_was_created

View File

@ -24,7 +24,8 @@ from core.app.entities.task_entities import (
)
from core.file.file_obj import FileVar
from core.model_runtime.utils.encoders import jsonable_encoder
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask, TraceTaskName
from core.ops.entities.trace_entity import TraceTaskName
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
from core.tools.tool_manager import ToolManager
from core.workflow.entities.node_entities import NodeType, SystemVariable
from core.workflow.nodes.tool.entities import ToolNodeData
@ -42,6 +43,7 @@ from models.workflow import (
WorkflowRunStatus,
WorkflowRunTriggeredFrom,
)
from services.workflow_service import WorkflowService
class WorkflowCycleManage:
@ -50,7 +52,7 @@ class WorkflowCycleManage:
_user: Union[Account, EndUser]
_task_state: WorkflowTaskState
_workflow_system_variables: dict[SystemVariable, Any]
def _handle_workflow_run_start(self) -> WorkflowRun:
max_sequence = (
db.session.query(db.func.max(WorkflowRun.sequence_number))
@ -71,7 +73,7 @@ class WorkflowCycleManage:
inputs = WorkflowEntry.handle_special_values(inputs)
triggered_from= (
WorkflowRunTriggeredFrom.DEBUGGING
WorkflowRunTriggeredFrom.DEBUGGING
if self._application_generate_entity.invoke_from == InvokeFrom.DEBUGGER
else WorkflowRunTriggeredFrom.APP_RUN
)
@ -99,7 +101,7 @@ class WorkflowCycleManage:
db.session.close()
return workflow_run
def _handle_workflow_run_success(
self,
workflow_run: WorkflowRun,
@ -121,7 +123,7 @@ class WorkflowCycleManage:
:return:
"""
workflow_run = self._refetch_workflow_run(workflow_run.id)
workflow_run.status = WorkflowRunStatus.SUCCEEDED.value
workflow_run.outputs = outputs
workflow_run.elapsed_time = time.perf_counter() - start_at
@ -138,6 +140,7 @@ class WorkflowCycleManage:
TraceTaskName.WORKFLOW_TRACE,
workflow_run=workflow_run,
conversation_id=conversation_id,
user_id=trace_manager.user_id,
)
)
@ -185,11 +188,12 @@ class WorkflowCycleManage:
TraceTaskName.WORKFLOW_TRACE,
workflow_run=workflow_run,
conversation_id=conversation_id,
user_id=trace_manager.user_id,
)
)
return workflow_run
def _handle_node_execution_start(self, workflow_run: WorkflowRun, event: QueueNodeStartedEvent) -> WorkflowNodeExecution:
# init workflow node execution
workflow_node_execution = WorkflowNodeExecution()
@ -250,7 +254,7 @@ class WorkflowCycleManage:
:return:
"""
workflow_node_execution = self._refetch_workflow_node_execution(event.node_execution_id)
inputs = WorkflowEntry.handle_special_values(event.inputs)
outputs = WorkflowEntry.handle_special_values(event.outputs)
@ -267,7 +271,7 @@ class WorkflowCycleManage:
db.session.close()
return workflow_node_execution
#################################################
# to stream responses #
#################################################
@ -406,10 +410,10 @@ class WorkflowCycleManage:
files=self._fetch_files_from_node_outputs(workflow_node_execution.outputs_dict or {}),
),
)
def _workflow_iteration_start_to_stream_response(
self,
task_id: str,
task_id: str,
workflow_run: WorkflowRun,
event: QueueIterationStartEvent
) -> IterationNodeStartStreamResponse:
@ -434,7 +438,7 @@ class WorkflowCycleManage:
metadata=event.metadata or {}
)
)
def _workflow_iteration_next_to_stream_response(self, task_id: str, workflow_run: WorkflowRun, event: QueueIterationNextEvent) -> IterationNodeNextStreamResponse:
"""
Workflow iteration next to stream response
@ -457,7 +461,7 @@ class WorkflowCycleManage:
extras={}
)
)
def _workflow_iteration_completed_to_stream_response(self, task_id: str, workflow_run: WorkflowRun, event: QueueIterationCompletedEvent) -> IterationNodeCompletedStreamResponse:
"""
Workflow iteration completed to stream response
@ -552,10 +556,10 @@ class WorkflowCycleManage:
"""
workflow_run = db.session.query(WorkflowRun).filter(
WorkflowRun.id == workflow_run_id).first()
if not workflow_run:
raise Exception(f'Workflow run not found: {workflow_run_id}')
return workflow_run
def _refetch_workflow_node_execution(self, node_execution_id: str) -> WorkflowNodeExecution:
@ -578,5 +582,5 @@ class WorkflowCycleManage:
if not workflow_node_execution:
raise Exception(f'Workflow node execution not found: {node_execution_id}')
return workflow_node_execution

View File

@ -4,7 +4,8 @@ from typing import Any, Optional, TextIO, Union
from pydantic import BaseModel
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask, TraceTaskName
from core.ops.entities.trace_entity import TraceTaskName
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
from core.tools.entities.tool_entities import ToolInvokeMessage
_TEXT_COLOR_MAPPING = {

View File

@ -8,6 +8,7 @@ from typing import Optional
from pydantic import BaseModel, ConfigDict
from constants import HIDDEN_VALUE
from core.entities.model_entities import ModelStatus, ModelWithProviderEntity, SimpleModelProviderEntity
from core.entities.provider_entities import (
CustomConfiguration,
@ -202,7 +203,7 @@ class ProviderConfiguration(BaseModel):
for key, value in credentials.items():
if key in provider_credential_secret_variables:
# if send [__HIDDEN__] in secret input, it will be same as original value
if value == '[__HIDDEN__]' and key in original_credentials:
if value == HIDDEN_VALUE and key in original_credentials:
credentials[key] = encrypter.decrypt_token(self.tenant_id, original_credentials[key])
credentials = model_provider_factory.provider_credentials_validate(
@ -345,7 +346,7 @@ class ProviderConfiguration(BaseModel):
for key, value in credentials.items():
if key in provider_credential_secret_variables:
# if send [__HIDDEN__] in secret input, it will be same as original value
if value == '[__HIDDEN__]' and key in original_credentials:
if value == HIDDEN_VALUE and key in original_credentials:
credentials[key] = encrypter.decrypt_token(self.tenant_id, original_credentials[key])
credentials = model_provider_factory.model_credentials_validate(

View File

@ -1,14 +1,19 @@
import enum
from typing import Optional
from typing import Any, Optional
from pydantic import BaseModel
from core.app.app_config.entities import FileExtraConfig
from core.file.tool_file_parser import ToolFileParser
from core.file.upload_file_parser import UploadFileParser
from core.model_runtime.entities.message_entities import ImagePromptMessageContent
from extensions.ext_database import db
from models.model import UploadFile
class FileExtraConfig(BaseModel):
"""
File Upload Entity.
"""
image_config: Optional[dict[str, Any]] = None
class FileType(enum.Enum):
@ -114,6 +119,7 @@ class FileVar(BaseModel):
)
def _get_data(self, force_url: bool = False) -> Optional[str]:
from models.model import UploadFile
if self.type == FileType.IMAGE:
if self.transfer_method == FileTransferMethod.REMOTE_URL:
return self.url

View File

@ -1,10 +1,11 @@
import re
from collections.abc import Mapping, Sequence
from typing import Any, Union
from urllib.parse import parse_qs, urlparse
import requests
from core.app.app_config.entities import FileExtraConfig
from core.file.file_obj import FileBelongsTo, FileTransferMethod, FileType, FileVar
from core.file.file_obj import FileBelongsTo, FileExtraConfig, FileTransferMethod, FileType, FileVar
from extensions.ext_database import db
from models.account import Account
from models.model import EndUser, MessageFile, UploadFile
@ -186,6 +187,30 @@ class MessageFileParser:
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
def is_s3_presigned_url(url):
try:
parsed_url = urlparse(url)
if 'amazonaws.com' not in parsed_url.netloc:
return False
query_params = parse_qs(parsed_url.query)
required_params = ['Signature', 'Expires']
for param in required_params:
if param not in query_params:
return False
if not query_params['Expires'][0].isdigit():
return False
signature = query_params['Signature'][0]
if not re.match(r'^[A-Za-z0-9+/]+={0,2}$', signature):
return False
return True
except Exception:
return False
if is_s3_presigned_url(url):
response = requests.get(url, headers=headers, allow_redirects=True)
if response.status_code in {200, 304}:
return True, ""
response = requests.head(url, headers=headers, allow_redirects=True)
if response.status_code in {200, 304}:
return True, ""

View File

@ -107,11 +107,11 @@ class CodeExecutor:
response = response.json()
except:
raise CodeExecutionException('Failed to parse response')
if (code := response.get('code')) != 0:
raise CodeExecutionException(f"Got error code: {code}. Got error msg: {response.get('message')}")
response = CodeExecutionResponse(**response)
if response.code != 0:
raise CodeExecutionException(response.message)
if response.data.error:
raise CodeExecutionException(response.data.error)

View File

@ -2,7 +2,6 @@ import base64
from extensions.ext_database import db
from libs import rsa
from models.account import Tenant
def obfuscated_token(token: str):
@ -14,6 +13,7 @@ def obfuscated_token(token: str):
def encrypt_token(tenant_id: str, token: str):
from models.account import Tenant
if not (tenant := db.session.query(Tenant).filter(Tenant.id == tenant_id).first()):
raise ValueError(f'Tenant with id {tenant_id} not found')
encrypted_token = rsa.encrypt(token, tenant.encrypt_public_key)

View File

@ -73,6 +73,8 @@ class HostingConfiguration:
quota_limit=hosted_quota_limit,
restrict_models=[
RestrictModel(model="gpt-4", base_model_name="gpt-4", model_type=ModelType.LLM),
RestrictModel(model="gpt-4o", base_model_name="gpt-4o", model_type=ModelType.LLM),
RestrictModel(model="gpt-4o-mini", base_model_name="gpt-4o-mini", model_type=ModelType.LLM),
RestrictModel(model="gpt-4-32k", base_model_name="gpt-4-32k", model_type=ModelType.LLM),
RestrictModel(model="gpt-4-1106-preview", base_model_name="gpt-4-1106-preview", model_type=ModelType.LLM),
RestrictModel(model="gpt-4-vision-preview", base_model_name="gpt-4-vision-preview", model_type=ModelType.LLM),

View File

@ -14,7 +14,8 @@ from core.model_manager import ModelManager
from core.model_runtime.entities.message_entities import SystemPromptMessage, UserPromptMessage
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask, TraceTaskName
from core.ops.entities.trace_entity import TraceTaskName
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
from core.ops.utils import measure_time
from core.prompt.utils.prompt_template_parser import PromptTemplateParser

View File

@ -1,18 +1,16 @@
import hashlib
import logging
import re
import subprocess
import uuid
from abc import abstractmethod
from typing import Optional
from pydantic import ConfigDict
from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
from core.model_runtime.errors.invoke import InvokeBadRequestError
from core.model_runtime.model_providers.__base.ai_model import AIModel
logger = logging.getLogger(__name__)
class TTSModel(AIModel):
"""
Model class for ttstext model.
@ -37,8 +35,6 @@ class TTSModel(AIModel):
:return: translated audio file
"""
try:
logger.info(f"Invoke TTS model: {model} , invoke content : {content_text}")
self._is_ffmpeg_installed()
return self._invoke(model=model, credentials=credentials, user=user,
content_text=content_text, voice=voice, tenant_id=tenant_id)
except Exception as e:
@ -75,7 +71,8 @@ class TTSModel(AIModel):
if model_schema and ModelPropertyKey.VOICES in model_schema.model_properties:
voices = model_schema.model_properties[ModelPropertyKey.VOICES]
if language:
return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')]
return [{'name': d['name'], 'value': d['mode']} for d in voices if
language and language in d.get('language')]
else:
return [{'name': d['name'], 'value': d['mode']} for d in voices]
@ -146,28 +143,3 @@ class TTSModel(AIModel):
if one_sentence != '':
result.append(one_sentence)
return result
@staticmethod
def _is_ffmpeg_installed():
try:
output = subprocess.check_output("ffmpeg -version", shell=True)
if "ffmpeg version" in output.decode("utf-8"):
return True
else:
raise InvokeBadRequestError("ffmpeg is not installed, "
"details: https://docs.dify.ai/getting-started/install-self-hosted"
"/install-faq#id-14.-what-to-do-if-this-error-occurs-in-text-to-speech")
except Exception:
raise InvokeBadRequestError("ffmpeg is not installed, "
"details: https://docs.dify.ai/getting-started/install-self-hosted"
"/install-faq#id-14.-what-to-do-if-this-error-occurs-in-text-to-speech")
# Todo: To improve the streaming function
@staticmethod
def _get_file_name(file_content: str) -> str:
hash_object = hashlib.sha256(file_content.encode())
hex_digest = hash_object.hexdigest()
namespace_uuid = uuid.UUID('a5da6ef9-b303-596f-8e88-bf8fa40f4b31')
unique_uuid = uuid.uuid5(namespace_uuid, hex_digest)
return str(unique_uuid)

View File

@ -6,6 +6,7 @@
- nvidia
- nvidia_nim
- cohere
- upstage
- bedrock
- togetherai
- openrouter
@ -35,3 +36,4 @@
- hunyuan
- siliconflow
- perfxcloud
- zhinao

View File

@ -116,7 +116,8 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
# Add the new header for claude-3-5-sonnet-20240620 model
extra_headers = {}
if model == "claude-3-5-sonnet-20240620":
extra_headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15"
if model_parameters.get('max_tokens') > 4096:
extra_headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15"
if tools:
extra_model_kwargs['tools'] = [

View File

@ -496,6 +496,158 @@ LLM_BASE_MODELS = [
)
)
),
AzureBaseModel(
base_model_name='gpt-4o-mini',
entity=AIModelEntity(
model='fake-deployment-name',
label=I18nObject(
en_US='fake-deployment-name-label',
),
model_type=ModelType.LLM,
features=[
ModelFeature.AGENT_THOUGHT,
ModelFeature.VISION,
ModelFeature.MULTI_TOOL_CALL,
ModelFeature.STREAM_TOOL_CALL,
],
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={
ModelPropertyKey.MODE: LLMMode.CHAT.value,
ModelPropertyKey.CONTEXT_SIZE: 128000,
},
parameter_rules=[
ParameterRule(
name='temperature',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
),
ParameterRule(
name='top_p',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
),
ParameterRule(
name='presence_penalty',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
),
ParameterRule(
name='frequency_penalty',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
),
_get_max_tokens(default=512, min_val=1, max_val=16384),
ParameterRule(
name='seed',
label=I18nObject(
zh_Hans='种子',
en_US='Seed'
),
type='int',
help=I18nObject(
zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。',
en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
),
required=False,
precision=2,
min=0,
max=1,
),
ParameterRule(
name='response_format',
label=I18nObject(
zh_Hans='回复格式',
en_US='response_format'
),
type='string',
help=I18nObject(
zh_Hans='指定模型必须输出的格式',
en_US='specifying the format that the model must output'
),
required=False,
options=['text', 'json_object']
),
],
pricing=PriceConfig(
input=0.150,
output=0.600,
unit=0.000001,
currency='USD',
)
)
),
AzureBaseModel(
base_model_name='gpt-4o-mini-2024-07-18',
entity=AIModelEntity(
model='fake-deployment-name',
label=I18nObject(
en_US='fake-deployment-name-label',
),
model_type=ModelType.LLM,
features=[
ModelFeature.AGENT_THOUGHT,
ModelFeature.VISION,
ModelFeature.MULTI_TOOL_CALL,
ModelFeature.STREAM_TOOL_CALL,
],
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={
ModelPropertyKey.MODE: LLMMode.CHAT.value,
ModelPropertyKey.CONTEXT_SIZE: 128000,
},
parameter_rules=[
ParameterRule(
name='temperature',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
),
ParameterRule(
name='top_p',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
),
ParameterRule(
name='presence_penalty',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
),
ParameterRule(
name='frequency_penalty',
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
),
_get_max_tokens(default=512, min_val=1, max_val=16384),
ParameterRule(
name='seed',
label=I18nObject(
zh_Hans='种子',
en_US='Seed'
),
type='int',
help=I18nObject(
zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。',
en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
),
required=False,
precision=2,
min=0,
max=1,
),
ParameterRule(
name='response_format',
label=I18nObject(
zh_Hans='回复格式',
en_US='response_format'
),
type='string',
help=I18nObject(
zh_Hans='指定模型必须输出的格式',
en_US='specifying the format that the model must output'
),
required=False,
options=['text', 'json_object']
),
],
pricing=PriceConfig(
input=0.150,
output=0.600,
unit=0.000001,
currency='USD',
)
)
),
AzureBaseModel(
base_model_name='gpt-4o',
entity=AIModelEntity(

View File

@ -114,6 +114,18 @@ model_credential_schema:
show_on:
- variable: __model_type
value: llm
- label:
en_US: gpt-4o-mini
value: gpt-4o-mini
show_on:
- variable: __model_type
value: llm
- label:
en_US: gpt-4o-mini-2024-07-18
value: gpt-4o-mini-2024-07-18
show_on:
- variable: __model_type
value: llm
- label:
en_US: gpt-4o
value: gpt-4o

View File

@ -1,12 +1,8 @@
import concurrent.futures
import copy
from functools import reduce
from io import BytesIO
from typing import Optional
from flask import Response
from openai import AzureOpenAI
from pydub import AudioSegment
from core.model_runtime.entities.model_entities import AIModelEntity
from core.model_runtime.errors.invoke import InvokeBadRequestError
@ -51,7 +47,7 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
:return: text translated to audio file
"""
try:
self._tts_invoke(
self._tts_invoke_streaming(
model=model,
credentials=credentials,
content_text='Hello Dify!',
@ -60,45 +56,6 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
def _tts_invoke(self, model: str, credentials: dict, content_text: str, voice: str) -> Response:
"""
_tts_invoke text2speech model
:param model: model name
:param credentials: model credentials
:param content_text: text content to be translated
:param voice: model timbre
:return: text translated to audio file
"""
audio_type = self._get_model_audio_type(model, credentials)
word_limit = self._get_model_word_limit(model, credentials)
max_workers = self._get_model_workers_limit(model, credentials)
try:
sentences = list(self._split_text_into_sentences(org_text=content_text, max_length=word_limit))
audio_bytes_list = []
# Create a thread pool and map the function to the list of sentences
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(self._process_sentence, sentence=sentence, model=model, voice=voice,
credentials=credentials) for sentence in sentences]
for future in futures:
try:
if future.result():
audio_bytes_list.append(future.result())
except Exception as ex:
raise InvokeBadRequestError(str(ex))
if len(audio_bytes_list) > 0:
audio_segments = [AudioSegment.from_file(BytesIO(audio_bytes), format=audio_type) for audio_bytes in
audio_bytes_list if audio_bytes]
combined_segment = reduce(lambda x, y: x + y, audio_segments)
buffer: BytesIO = BytesIO()
combined_segment.export(buffer, format=audio_type)
buffer.seek(0)
return Response(buffer.read(), status=200, mimetype=f"audio/{audio_type}")
except Exception as ex:
raise InvokeBadRequestError(str(ex))
def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str,
voice: str) -> any:
"""
@ -144,7 +101,6 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
:param sentence: text content to be translated
:return: text translated to audio file
"""
# transform credentials to kwargs for model instance
credentials_kwargs = self._to_credential_kwargs(credentials)
client = AzureOpenAI(**credentials_kwargs)
response = client.audio.speech.create(model=model, voice=voice, input=sentence.strip())

View File

@ -379,8 +379,12 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
if not message_content.data.startswith("data:"):
# fetch image data from url
try:
image_content = requests.get(message_content.data).content
mime_type, _ = mimetypes.guess_type(message_content.data)
url = message_content.data
image_content = requests.get(url).content
if '?' in url:
url = url.split('?')[0]
mime_type, _ = mimetypes.guess_type(url)
base64_data = base64.b64encode(image_content).decode('utf-8')
except Exception as ex:
raise ValueError(f"Failed to fetch image data from url {message_content.data}, {ex}")
else:

View File

@ -5,6 +5,8 @@ label:
model_type: llm
features:
- agent-thought
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 128000

View File

@ -5,6 +5,8 @@ label:
model_type: llm
features:
- agent-thought
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 128000

View File

@ -19,7 +19,7 @@ parameter_rules:
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
input: '0.59'
output: '0.79'
unit: '0.000001'
currency: USD

View File

@ -19,7 +19,7 @@ parameter_rules:
min: 1
max: 8192
pricing:
input: '0.59'
output: '0.79'
input: '0.05'
output: '0.08'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,11 @@
import logging
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
logger = logging.getLogger(__name__)
class HuggingfaceTeiProvider(ModelProvider):
def validate_provider_credentials(self, credentials: dict) -> None:
pass

View File

@ -0,0 +1,36 @@
provider: huggingface_tei
label:
en_US: Text Embedding Inference
description:
en_US: A blazing fast inference solution for text embeddings models.
zh_Hans: 用于文本嵌入模型的超快速推理解决方案。
background: "#FFF8DC"
help:
title:
en_US: How to deploy Text Embedding Inference
zh_Hans: 如何部署 Text Embedding Inference
url:
en_US: https://github.com/huggingface/text-embeddings-inference
supported_model_types:
- text-embedding
- rerank
configurate_methods:
- customizable-model
model_credential_schema:
model:
label:
en_US: Model Name
zh_Hans: 模型名称
placeholder:
en_US: Enter your model name
zh_Hans: 输入模型名称
credential_form_schemas:
- variable: server_url
label:
zh_Hans: 服务器URL
en_US: Server url
type: secret-input
required: true
placeholder:
zh_Hans: 在此输入Text Embedding Inference的服务器地址如 http://192.168.1.100:8080
en_US: Enter the url of your Text Embedding Inference, e.g. http://192.168.1.100:8080

View File

@ -0,0 +1,137 @@
from typing import Optional
import httpx
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType
from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
InvokeConnectionError,
InvokeError,
InvokeRateLimitError,
InvokeServerUnavailableError,
)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.rerank_model import RerankModel
from core.model_runtime.model_providers.huggingface_tei.tei_helper import TeiHelper
class HuggingfaceTeiRerankModel(RerankModel):
"""
Model class for Text Embedding Inference rerank model.
"""
def _invoke(
self,
model: str,
credentials: dict,
query: str,
docs: list[str],
score_threshold: Optional[float] = None,
top_n: Optional[int] = None,
user: Optional[str] = None,
) -> RerankResult:
"""
Invoke rerank model
:param model: model name
:param credentials: model credentials
:param query: search query
:param docs: docs for reranking
:param score_threshold: score threshold
:param top_n: top n
:param user: unique user id
:return: rerank result
"""
if len(docs) == 0:
return RerankResult(model=model, docs=[])
server_url = credentials['server_url']
if server_url.endswith('/'):
server_url = server_url[:-1]
try:
results = TeiHelper.invoke_rerank(server_url, query, docs)
rerank_documents = []
for result in results:
rerank_document = RerankDocument(
index=result['index'],
text=result['text'],
score=result['score'],
)
if score_threshold is None or result['score'] >= score_threshold:
rerank_documents.append(rerank_document)
if top_n is not None and len(rerank_documents) >= top_n:
break
return RerankResult(model=model, docs=rerank_documents)
except httpx.HTTPStatusError as e:
raise InvokeServerUnavailableError(str(e))
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
server_url = credentials['server_url']
extra_args = TeiHelper.get_tei_extra_parameter(server_url, model)
if extra_args.model_type != 'reranker':
raise CredentialsValidateFailedError('Current model is not a rerank model')
credentials['context_size'] = extra_args.max_input_length
self.invoke(
model=model,
credentials=credentials,
query='Whose kasumi',
docs=[
'Kasumi is a girl\'s name of Japanese origin meaning "mist".',
'Her music is a kawaii bass, a mix of future bass, pop, and kawaii music ',
'and she leads a team named PopiParty.',
],
score_threshold=0.8,
)
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
"""
Map model invoke error to unified error
The key is the error type thrown to the caller
The value is the error type thrown by the model,
which needs to be converted into a unified error type for the caller.
:return: Invoke error mapping
"""
return {
InvokeConnectionError: [InvokeConnectionError],
InvokeServerUnavailableError: [InvokeServerUnavailableError],
InvokeRateLimitError: [InvokeRateLimitError],
InvokeAuthorizationError: [InvokeAuthorizationError],
InvokeBadRequestError: [InvokeBadRequestError, KeyError, ValueError],
}
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
"""
used to define customizable model schema
"""
entity = AIModelEntity(
model=model,
label=I18nObject(en_US=model),
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_type=ModelType.RERANK,
model_properties={
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get('context_size', 512)),
},
parameter_rules=[],
)
return entity

View File

@ -0,0 +1,183 @@
from threading import Lock
from time import time
from typing import Optional
import httpx
from requests.adapters import HTTPAdapter
from requests.exceptions import ConnectionError, MissingSchema, Timeout
from requests.sessions import Session
from yarl import URL
class TeiModelExtraParameter:
model_type: str
max_input_length: int
max_client_batch_size: int
def __init__(self, model_type: str, max_input_length: int, max_client_batch_size: Optional[int] = None) -> None:
self.model_type = model_type
self.max_input_length = max_input_length
self.max_client_batch_size = max_client_batch_size
cache = {}
cache_lock = Lock()
class TeiHelper:
@staticmethod
def get_tei_extra_parameter(server_url: str, model_name: str) -> TeiModelExtraParameter:
TeiHelper._clean_cache()
with cache_lock:
if model_name not in cache:
cache[model_name] = {
'expires': time() + 300,
'value': TeiHelper._get_tei_extra_parameter(server_url),
}
return cache[model_name]['value']
@staticmethod
def _clean_cache() -> None:
try:
with cache_lock:
expired_keys = [model_uid for model_uid, model in cache.items() if model['expires'] < time()]
for model_uid in expired_keys:
del cache[model_uid]
except RuntimeError as e:
pass
@staticmethod
def _get_tei_extra_parameter(server_url: str) -> TeiModelExtraParameter:
"""
get tei model extra parameter like model_type, max_input_length, max_batch_requests
"""
url = str(URL(server_url) / 'info')
# this method is surrounded by a lock, and default requests may hang forever, so we just set a Adapter with max_retries=3
session = Session()
session.mount('http://', HTTPAdapter(max_retries=3))
session.mount('https://', HTTPAdapter(max_retries=3))
try:
response = session.get(url, timeout=10)
except (MissingSchema, ConnectionError, Timeout) as e:
raise RuntimeError(f'get tei model extra parameter failed, url: {url}, error: {e}')
if response.status_code != 200:
raise RuntimeError(
f'get tei model extra parameter failed, status code: {response.status_code}, response: {response.text}'
)
response_json = response.json()
model_type = response_json.get('model_type', {})
if len(model_type.keys()) < 1:
raise RuntimeError('model_type is empty')
model_type = list(model_type.keys())[0]
if model_type not in ['embedding', 'reranker']:
raise RuntimeError(f'invalid model_type: {model_type}')
max_input_length = response_json.get('max_input_length', 512)
max_client_batch_size = response_json.get('max_client_batch_size', 1)
return TeiModelExtraParameter(
model_type=model_type,
max_input_length=max_input_length,
max_client_batch_size=max_client_batch_size
)
@staticmethod
def invoke_tokenize(server_url: str, texts: list[str]) -> list[list[dict]]:
"""
Invoke tokenize endpoint
Example response:
[
[
{
"id": 0,
"text": "<s>",
"special": true,
"start": null,
"stop": null
},
{
"id": 7704,
"text": "str",
"special": false,
"start": 0,
"stop": 3
},
< MORE TOKENS >
]
]
:param server_url: server url
:param texts: texts to tokenize
"""
resp = httpx.post(
f'{server_url}/tokenize',
json={'inputs': texts},
)
resp.raise_for_status()
return resp.json()
@staticmethod
def invoke_embeddings(server_url: str, texts: list[str]) -> dict:
"""
Invoke embeddings endpoint
Example response:
{
"object": "list",
"data": [
{
"object": "embedding",
"embedding": [...],
"index": 0
}
],
"model": "MODEL_NAME",
"usage": {
"prompt_tokens": 3,
"total_tokens": 3
}
}
:param server_url: server url
:param texts: texts to embed
"""
# Use OpenAI compatible API here, which has usage tracking
resp = httpx.post(
f'{server_url}/v1/embeddings',
json={'input': texts},
)
resp.raise_for_status()
return resp.json()
@staticmethod
def invoke_rerank(server_url: str, query: str, docs: list[str]) -> list[dict]:
"""
Invoke rerank endpoint
Example response:
[
{
"index": 0,
"text": "Deep Learning is ...",
"score": 0.9950755
}
]
:param server_url: server url
:param texts: texts to rerank
:param candidates: candidates to rerank
"""
params = {'query': query, 'texts': docs, 'return_text': True}
response = httpx.post(
server_url + '/rerank',
json=params,
)
response.raise_for_status()
return response.json()

View File

@ -0,0 +1,204 @@
import time
from typing import Optional
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
InvokeConnectionError,
InvokeError,
InvokeRateLimitError,
InvokeServerUnavailableError,
)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
from core.model_runtime.model_providers.huggingface_tei.tei_helper import TeiHelper
class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
"""
Model class for Text Embedding Inference text embedding model.
"""
def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
) -> TextEmbeddingResult:
"""
Invoke text embedding model
credentials should be like:
{
'server_url': 'server url',
'model_uid': 'model uid',
}
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:return: embeddings result
"""
server_url = credentials['server_url']
if server_url.endswith('/'):
server_url = server_url[:-1]
# get model properties
context_size = self._get_context_size(model, credentials)
max_chunks = self._get_max_chunks(model, credentials)
inputs = []
indices = []
used_tokens = 0
# get tokenized results from TEI
batched_tokenize_result = TeiHelper.invoke_tokenize(server_url, texts)
for i, (text, tokenize_result) in enumerate(zip(texts, batched_tokenize_result)):
# Check if the number of tokens is larger than the context size
num_tokens = len(tokenize_result)
if num_tokens >= context_size:
# Find the best cutoff point
pre_special_token_count = 0
for token in tokenize_result:
if token['special']:
pre_special_token_count += 1
else:
break
rest_special_token_count = len([token for token in tokenize_result if token['special']]) - pre_special_token_count
# Calculate the cutoff point, leave 20 extra space to avoid exceeding the limit
token_cutoff = context_size - rest_special_token_count - 20
# Find the cutoff index
cutpoint_token = tokenize_result[token_cutoff]
cutoff = cutpoint_token['start']
inputs.append(text[0: cutoff])
else:
inputs.append(text)
indices += [i]
batched_embeddings = []
_iter = range(0, len(inputs), max_chunks)
try:
used_tokens = 0
for i in _iter:
iter_texts = inputs[i : i + max_chunks]
results = TeiHelper.invoke_embeddings(server_url, iter_texts)
embeddings = results['data']
embeddings = [embedding['embedding'] for embedding in embeddings]
batched_embeddings.extend(embeddings)
usage = results['usage']
used_tokens += usage['total_tokens']
except RuntimeError as e:
raise InvokeServerUnavailableError(str(e))
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
result = TextEmbeddingResult(model=model, embeddings=batched_embeddings, usage=usage)
return result
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
"""
Get number of tokens for given prompt messages
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:return:
"""
num_tokens = 0
server_url = credentials['server_url']
if server_url.endswith('/'):
server_url = server_url[:-1]
batch_tokens = TeiHelper.invoke_tokenize(server_url, texts)
num_tokens = sum(len(tokens) for tokens in batch_tokens)
return num_tokens
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
server_url = credentials['server_url']
extra_args = TeiHelper.get_tei_extra_parameter(server_url, model)
print(extra_args)
if extra_args.model_type != 'embedding':
raise CredentialsValidateFailedError('Current model is not a embedding model')
credentials['context_size'] = extra_args.max_input_length
credentials['max_chunks'] = extra_args.max_client_batch_size
self._invoke(model=model, credentials=credentials, texts=['ping'])
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
return {
InvokeConnectionError: [InvokeConnectionError],
InvokeServerUnavailableError: [InvokeServerUnavailableError],
InvokeRateLimitError: [InvokeRateLimitError],
InvokeAuthorizationError: [InvokeAuthorizationError],
InvokeBadRequestError: [KeyError],
}
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
"""
Calculate response usage
:param model: model name
:param credentials: model credentials
:param tokens: input tokens
:return: usage
"""
# get input price info
input_price_info = self.get_price(
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
)
# transform usage
usage = EmbeddingUsage(
tokens=tokens,
total_tokens=tokens,
unit_price=input_price_info.unit_price,
price_unit=input_price_info.unit,
total_price=input_price_info.total_amount,
currency=input_price_info.currency,
latency=time.perf_counter() - self.started_at,
)
return usage
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
"""
used to define customizable model schema
"""
entity = AIModelEntity(
model=model,
label=I18nObject(en_US=model),
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_type=ModelType.TEXT_EMBEDDING,
model_properties={
ModelPropertyKey.MAX_CHUNKS: int(credentials.get('max_chunks', 1)),
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get('context_size', 512)),
},
parameter_rules=[],
)
return entity

View File

@ -21,6 +21,16 @@ parameter_rules:
default: 1024
min: 1
max: 32000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.03'
output: '0.10'

View File

@ -21,6 +21,16 @@ parameter_rules:
default: 1024
min: 1
max: 256000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.015'
output: '0.06'

View File

@ -21,6 +21,16 @@ parameter_rules:
default: 1024
min: 1
max: 32000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.0045'
output: '0.0005'

View File

@ -36,7 +36,8 @@ class HunyuanLargeLanguageModel(LargeLanguageModel):
custom_parameters = {
'Temperature': model_parameters.get('temperature', 0.0),
'TopP': model_parameters.get('top_p', 1.0)
'TopP': model_parameters.get('top_p', 1.0),
'EnableEnhancement': model_parameters.get('enable_enhance', True)
}
params = {
@ -213,7 +214,7 @@ class HunyuanLargeLanguageModel(LargeLanguageModel):
def _handle_chat_response(self, credentials, model, prompt_messages, response):
usage = self._calc_response_usage(model, credentials, response.Usage.PromptTokens,
response.Usage.CompletionTokens)
assistant_prompt_message = PromptMessage(role="assistant")
assistant_prompt_message = AssistantPromptMessage()
assistant_prompt_message.content = response.Choices[0].Message.Content
result = LLMResult(
model=model,

View File

@ -1,4 +1,4 @@
model: jina-reranker-v2-base-multilingual
model_type: rerank
model_properties:
context_size: 8192
context_size: 1024

View File

@ -2,10 +2,16 @@
- google/codegemma-7b
- google/recurrentgemma-2b
- meta/llama2-70b
- meta/llama-3.1-8b-instruct
- meta/llama-3.1-70b-instruct
- meta/llama-3.1-405b-instruct
- meta/llama3-8b-instruct
- meta/llama3-70b-instruct
- mistralai/mistral-large
- mistralai/mixtral-8x7b-instruct-v0.1
- mistralai/mixtral-8x22b-instruct-v0.1
- nvidia/nemotron-4-340b-instruct
- microsoft/phi-3-medium-128k-instruct
- microsoft/phi-3-mini-128k-instruct
- fuyu-8b
- snowflake/arctic

View File

@ -0,0 +1,36 @@
model: meta/llama-3.1-405b-instruct
label:
zh_Hans: meta/llama-3.1-405b-instruct
en_US: meta/llama-3.1-405b-instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 1
default: 0.5
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 4096
default: 1024
- name: frequency_penalt
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0

View File

@ -0,0 +1,36 @@
model: meta/llama-3.1-70b-instruct
label:
zh_Hans: meta/llama-3.1-70b-instruct
en_US: meta/llama-3.1-70b-instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 1
default: 0.5
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 4096
default: 1024
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0

View File

@ -0,0 +1,36 @@
model: meta/llama-3.1-8b-instruct
label:
zh_Hans: meta/llama-3.1-8b-instruct
en_US: meta/llama-3.1-8b-instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 1
default: 0.5
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 4096
default: 1024
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0

View File

@ -31,8 +31,13 @@ class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
'meta/llama2-70b': '',
'meta/llama3-8b-instruct': '',
'meta/llama3-70b-instruct': '',
'google/recurrentgemma-2b': ''
'meta/llama-3.1-8b-instruct': '',
'meta/llama-3.1-70b-instruct': '',
'meta/llama-3.1-405b-instruct': '',
'google/recurrentgemma-2b': '',
'nvidia/nemotron-4-340b-instruct': '',
'microsoft/phi-3-medium-128k-instruct':'',
'microsoft/phi-3-mini-128k-instruct':''
}
def _invoke(self, model: str, credentials: dict,

View File

@ -0,0 +1,36 @@
model: nvidia/nemotron-4-340b-instruct
label:
zh_Hans: nvidia/nemotron-4-340b-instruct
en_US: nvidia/nemotron-4-340b-instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 1
default: 0.5
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 4096
default: 1024
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0

View File

@ -0,0 +1,36 @@
model: microsoft/phi-3-medium-128k-instruct
label:
zh_Hans: microsoft/phi-3-medium-128k-instruct
en_US: microsoft/phi-3-medium-128k-instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 1
default: 0.5
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 4096
default: 1024
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0

View File

@ -0,0 +1,36 @@
model: microsoft/phi-3-mini-128k-instruct
label:
zh_Hans: microsoft/phi-3-mini-128k-instruct
en_US: microsoft/phi-3-mini-128k-instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 1
default: 0.5
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 4096
default: 1024
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0

View File

@ -59,7 +59,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
if not endpoint_url.endswith('/'):
endpoint_url += '/'
endpoint_url = urljoin(endpoint_url, 'api/embeddings')
endpoint_url = urljoin(endpoint_url, 'api/embed')
# get model properties
context_size = self._get_context_size(model, credentials)
@ -72,38 +72,34 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
num_tokens = self._get_num_tokens_by_gpt2(text)
if num_tokens >= context_size:
cutoff = int(len(text) * (np.floor(context_size / num_tokens)))
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
# if num tokens is larger than context length, only use the start
inputs.append(text[0: cutoff])
else:
inputs.append(text)
batched_embeddings = []
# Prepare the payload for the request
payload = {
'input': inputs,
'model': model,
}
for text in inputs:
# Prepare the payload for the request
payload = {
'prompt': text,
'model': model,
}
# Make the request to the OpenAI API
response = requests.post(
endpoint_url,
headers=headers,
data=json.dumps(payload),
timeout=(10, 300)
)
# Make the request to the OpenAI API
response = requests.post(
endpoint_url,
headers=headers,
data=json.dumps(payload),
timeout=(10, 300)
)
response.raise_for_status() # Raise an exception for HTTP errors
response_data = response.json()
response.raise_for_status() # Raise an exception for HTTP errors
response_data = response.json()
# Extract embeddings and used tokens from the response
embeddings = response_data['embeddings']
embedding_used_tokens = self.get_num_tokens(model, credentials, inputs)
# Extract embeddings and used tokens from the response
embeddings = response_data['embedding']
embedding_used_tokens = self.get_num_tokens(model, credentials, [text])
used_tokens += embedding_used_tokens
batched_embeddings.append(embeddings)
used_tokens += embedding_used_tokens
# calc usage
usage = self._calc_response_usage(
@ -113,7 +109,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
)
return TextEmbeddingResult(
embeddings=batched_embeddings,
embeddings=embeddings,
usage=usage,
model=model
)

View File

@ -1,6 +1,7 @@
- gpt-4
- gpt-4o
- gpt-4o-2024-05-13
- gpt-4o-2024-08-06
- gpt-4o-mini
- gpt-4o-mini-2024-07-18
- gpt-4-turbo

View File

@ -37,7 +37,7 @@ parameter_rules:
- text
- json_object
pricing:
input: '0.001'
output: '0.002'
input: '0.0005'
output: '0.0015'
unit: '0.001'
currency: USD

View File

@ -0,0 +1,44 @@
model: gpt-4o-2024-08-06
label:
zh_Hans: gpt-4o-2024-08-06
en_US: gpt-4o-2024-08-06
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 16384
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '2.50'
output: '10.00'
unit: '0.000001'
currency: USD

View File

@ -1,11 +1,7 @@
import concurrent.futures
from functools import reduce
from io import BytesIO
from typing import Optional
from flask import Response
from openai import OpenAI
from pydub import AudioSegment
from core.model_runtime.errors.invoke import InvokeBadRequestError
from core.model_runtime.errors.validate import CredentialsValidateFailedError
@ -32,7 +28,8 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
:return: text translated to audio file
"""
if not voice or voice not in [d['value'] for d in self.get_tts_model_voices(model=model, credentials=credentials)]:
if not voice or voice not in [d['value'] for d in
self.get_tts_model_voices(model=model, credentials=credentials)]:
voice = self._get_model_default_voice(model, credentials)
# if streaming:
return self._tts_invoke_streaming(model=model,
@ -50,7 +47,7 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
:return: text translated to audio file
"""
try:
self._tts_invoke(
self._tts_invoke_streaming(
model=model,
credentials=credentials,
content_text='Hello Dify!',
@ -59,46 +56,6 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
def _tts_invoke(self, model: str, credentials: dict, content_text: str, voice: str) -> Response:
"""
_tts_invoke text2speech model
:param model: model name
:param credentials: model credentials
:param content_text: text content to be translated
:param voice: model timbre
:return: text translated to audio file
"""
audio_type = self._get_model_audio_type(model, credentials)
word_limit = self._get_model_word_limit(model, credentials)
max_workers = self._get_model_workers_limit(model, credentials)
try:
sentences = list(self._split_text_into_sentences(org_text=content_text, max_length=word_limit))
audio_bytes_list = []
# Create a thread pool and map the function to the list of sentences
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(self._process_sentence, sentence=sentence, model=model, voice=voice,
credentials=credentials) for sentence in sentences]
for future in futures:
try:
if future.result():
audio_bytes_list.append(future.result())
except Exception as ex:
raise InvokeBadRequestError(str(ex))
if len(audio_bytes_list) > 0:
audio_segments = [AudioSegment.from_file(BytesIO(audio_bytes), format=audio_type) for audio_bytes in
audio_bytes_list if audio_bytes]
combined_segment = reduce(lambda x, y: x + y, audio_segments)
buffer: BytesIO = BytesIO()
combined_segment.export(buffer, format=audio_type)
buffer.seek(0)
return Response(buffer.read(), status=200, mimetype=f"audio/{audio_type}")
except Exception as ex:
raise InvokeBadRequestError(str(ex))
def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str,
voice: str) -> any:
"""
@ -114,7 +71,8 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
# doc: https://platform.openai.com/docs/guides/text-to-speech
credentials_kwargs = self._to_credential_kwargs(credentials)
client = OpenAI(**credentials_kwargs)
model_support_voice = [x.get("value") for x in self.get_tts_model_voices(model=model, credentials=credentials)]
model_support_voice = [x.get("value") for x in
self.get_tts_model_voices(model=model, credentials=credentials)]
if not voice or voice not in model_support_voice:
voice = self._get_model_default_voice(model, credentials)
word_limit = self._get_model_word_limit(model, credentials)

View File

@ -7,6 +7,7 @@ description:
supported_model_types:
- llm
- text-embedding
- speech2text
configurate_methods:
- customizable-model
model_credential_schema:
@ -61,6 +62,22 @@ model_credential_schema:
zh_Hans: 模型上下文长度
en_US: Model context size
required: true
show_on:
- variable: __model_type
value: llm
type: text-input
default: '4096'
placeholder:
zh_Hans: 在此输入您的模型上下文长度
en_US: Enter your Model context size
- variable: context_size
label:
zh_Hans: 模型上下文长度
en_US: Model context size
required: true
show_on:
- variable: __model_type
value: text-embedding
type: text-input
default: '4096'
placeholder:

View File

@ -0,0 +1,63 @@
from typing import IO, Optional
from urllib.parse import urljoin
import requests
from core.model_runtime.errors.invoke import InvokeBadRequestError
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel
from core.model_runtime.model_providers.openai_api_compatible._common import _CommonOAI_API_Compat
class OAICompatSpeech2TextModel(_CommonOAI_API_Compat, Speech2TextModel):
"""
Model class for OpenAI Compatible Speech to text model.
"""
def _invoke(
self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None
) -> str:
"""
Invoke speech2text model
:param model: model name
:param credentials: model credentials
:param file: audio file
:param user: unique user id
:return: text for given audio file
"""
headers = {}
api_key = credentials.get("api_key")
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
endpoint_url = credentials.get("endpoint_url")
if not endpoint_url.endswith("/"):
endpoint_url += "/"
endpoint_url = urljoin(endpoint_url, "audio/transcriptions")
payload = {"model": model}
files = [("file", file)]
response = requests.post(endpoint_url, headers=headers, data=payload, files=files)
if response.status_code != 200:
raise InvokeBadRequestError(response.text)
response_data = response.json()
return response_data["text"]
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
audio_file_path = self._get_demo_file_path()
with open(audio_file_path, "rb") as audio_file:
self._invoke(model, credentials, audio_file)
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))

View File

@ -76,7 +76,7 @@ class OAICompatEmbeddingModel(_CommonOAI_API_Compat, TextEmbeddingModel):
num_tokens = self._get_num_tokens_by_gpt2(text)
if num_tokens >= context_size:
cutoff = int(len(text) * (np.floor(context_size / num_tokens)))
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
# if num tokens is larger than context length, only use the start
inputs.append(text[0: cutoff])
else:

View File

@ -4,7 +4,7 @@ label:
model_type: llm
model_properties:
mode: chat
context_size: 128000
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
@ -15,9 +15,9 @@ parameter_rules:
required: true
default: 512
min: 1
max: 128000
max: 131072
pricing:
input: "3"
output: "3"
input: "2.7"
output: "2.7"
unit: "0.000001"
currency: USD

View File

@ -4,7 +4,7 @@ label:
model_type: llm
model_properties:
mode: chat
context_size: 128000
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
@ -15,9 +15,9 @@ parameter_rules:
required: true
default: 512
min: 1
max: 128000
max: 131072
pricing:
input: "0.9"
output: "0.9"
input: "0.52"
output: "0.75"
unit: "0.000001"
currency: USD

View File

@ -4,7 +4,7 @@ label:
model_type: llm
model_properties:
mode: chat
context_size: 128000
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
@ -15,9 +15,9 @@ parameter_rules:
required: true
default: 512
min: 1
max: 128000
max: 131072
pricing:
input: "0.2"
output: "0.2"
input: "0.06"
output: "0.06"
unit: "0.000001"
currency: USD

View File

@ -79,7 +79,7 @@ class OAICompatEmbeddingModel(_CommonOAI_API_Compat, TextEmbeddingModel):
num_tokens = self._get_num_tokens_by_gpt2(text)
if num_tokens >= context_size:
cutoff = int(len(text) * (np.floor(context_size / num_tokens)))
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
# if num tokens is larger than context length, only use the start
inputs.append(text[0: cutoff])
else:

View File

@ -1,8 +1,20 @@
- deepseek-v2-chat
- qwen2-72b-instruct
- qwen2-57b-a14b-instruct
- qwen2-7b-instruct
- yi-1.5-34b-chat
- yi-1.5-9b-chat
- yi-1.5-6b-chat
- glm4-9B-chat
- Qwen/Qwen2-72B-Instruct
- Qwen/Qwen2-57B-A14B-Instruct
- Qwen/Qwen2-7B-Instruct
- Qwen/Qwen2-1.5B-Instruct
- 01-ai/Yi-1.5-34B-Chat
- 01-ai/Yi-1.5-9B-Chat-16K
- 01-ai/Yi-1.5-6B-Chat
- THUDM/glm-4-9b-chat
- deepseek-ai/DeepSeek-V2-Chat
- deepseek-ai/DeepSeek-Coder-V2-Instruct
- internlm/internlm2_5-7b-chat
- google/gemma-2-27b-it
- google/gemma-2-9b-it
- meta-llama/Meta-Llama-3-70B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3.1-405B-Instruct
- meta-llama/Meta-Llama-3.1-70B-Instruct
- meta-llama/Meta-Llama-3.1-8B-Instruct
- mistralai/Mixtral-8x7B-Instruct-v0.1
- mistralai/Mistral-7B-Instruct-v0.2

View File

@ -1,4 +1,4 @@
model: deepseek-ai/deepseek-v2-chat
model: deepseek-ai/DeepSeek-V2-Chat
label:
en_US: deepseek-ai/DeepSeek-V2-Chat
model_type: llm

View File

@ -0,0 +1,30 @@
model: google/gemma-2-27b-it
label:
en_US: google/gemma-2-27b-it
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8196
parameter_rules:
- name: temperature
use_template: temperature
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '1.26'
output: '1.26'
unit: '0.000001'
currency: RMB

View File

@ -0,0 +1,30 @@
model: google/gemma-2-9b-it
label:
en_US: google/gemma-2-9b-it
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8196
parameter_rules:
- name: temperature
use_template: temperature
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '0'
output: '0'
unit: '0.000001'
currency: RMB

View File

@ -1,4 +1,4 @@
model: zhipuai/glm4-9B-chat
model: THUDM/glm-4-9b-chat
label:
en_US: THUDM/glm-4-9b-chat
model_type: llm
@ -24,7 +24,7 @@ parameter_rules:
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '0.6'
output: '0.6'
input: '0'
output: '0'
unit: '0.000001'
currency: RMB

View File

@ -0,0 +1,30 @@
model: internlm/internlm2_5-7b-chat
label:
en_US: internlm/internlm2_5-7b-chat
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '0'
output: '0'
unit: '0.000001'
currency: RMB

View File

@ -0,0 +1,30 @@
model: meta-llama/Meta-Llama-3-70B-Instruct
label:
en_US: meta-llama/Meta-Llama-3-70B-Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '4.13'
output: '4.13'
unit: '0.000001'
currency: RMB

View File

@ -0,0 +1,30 @@
model: meta-llama/Meta-Llama-3-8B-Instruct
label:
en_US: meta-llama/Meta-Llama-3-8B-Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '0'
output: '0'
unit: '0.000001'
currency: RMB

View File

@ -0,0 +1,30 @@
model: meta-llama/Meta-Llama-3.1-405B-Instruct
label:
en_US: meta-llama/Meta-Llama-3.1-405B-Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '21'
output: '21'
unit: '0.000001'
currency: RMB

View File

@ -0,0 +1,30 @@
model: meta-llama/Meta-Llama-3.1-70B-Instruct
label:
en_US: meta-llama/Meta-Llama-3.1-70B-Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '4.13'
output: '4.13'
unit: '0.000001'
currency: RMB

View File

@ -0,0 +1,30 @@
model: meta-llama/Meta-Llama-3.1-8B-Instruct
label:
en_US: meta-llama/Meta-Llama-3.1-8B-Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '0'
output: '0'
unit: '0.000001'
currency: RMB

View File

@ -0,0 +1,30 @@
model: mistralai/Mistral-7B-Instruct-v0.2
label:
en_US: mistralai/Mistral-7B-Instruct-v0.2
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '0'
output: '0'
unit: '0.000001'
currency: RMB

Some files were not shown because too many files have changed in this diff Show More