mirror of
https://github.com/langgenius/dify.git
synced 2026-04-30 07:28:05 +08:00
Merge branch 'refs/heads/main' into feat/workflow-parallel-support
# Conflicts: # api/core/app/apps/advanced_chat/app_generator.py # api/core/app/apps/advanced_chat/app_runner.py # api/core/app/apps/advanced_chat/generate_task_pipeline.py # api/core/app/apps/workflow/app_runner.py # api/core/app/task_pipeline/workflow_cycle_manage.py # api/core/workflow/entities/variable_pool.py # api/core/workflow/nodes/base_node.py # api/core/workflow/workflow_engine_manager.py
This commit is contained in:
@ -12,6 +12,7 @@ ENV POETRY_CACHE_DIR=/tmp/poetry_cache
|
||||
ENV POETRY_NO_INTERACTION=1
|
||||
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
|
||||
ENV POETRY_VIRTUALENVS_CREATE=true
|
||||
ENV POETRY_REQUESTS_TIMEOUT=15
|
||||
|
||||
FROM base AS packages
|
||||
|
||||
@ -41,8 +42,12 @@ ENV TZ=UTC
|
||||
WORKDIR /app/api
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends curl wget vim nodejs ffmpeg libgmp-dev libmpfr-dev libmpc-dev \
|
||||
&& apt-get autoremove \
|
||||
&& apt-get install -y --no-install-recommends curl nodejs libgmp-dev libmpfr-dev libmpc-dev \
|
||||
&& echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \
|
||||
&& apt-get update \
|
||||
# For Security
|
||||
&& apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1 expat=2.6.2-1 libldap-2.5-0=2.5.18+dfsg-2 perl=5.38.2-5 libsqlite3-0=3.46.0-1 \
|
||||
&& apt-get autoremove -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy Python environment and packages
|
||||
@ -50,6 +55,9 @@ ENV VIRTUAL_ENV=/app/api/.venv
|
||||
COPY --from=packages ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
||||
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
|
||||
|
||||
# Download nltk data
|
||||
RUN python -c "import nltk; nltk.download('punkt')"
|
||||
|
||||
# Copy source code
|
||||
COPY . /app/api/
|
||||
|
||||
|
||||
@ -12,19 +12,14 @@ from configs.packaging import PackagingInfo
|
||||
class DifyConfig(
|
||||
# Packaging info
|
||||
PackagingInfo,
|
||||
|
||||
# Deployment configs
|
||||
DeploymentConfig,
|
||||
|
||||
# Feature configs
|
||||
FeatureConfig,
|
||||
|
||||
# Middleware configs
|
||||
MiddlewareConfig,
|
||||
|
||||
# Extra service configs
|
||||
ExtraServiceConfig,
|
||||
|
||||
# Enterprise feature configs
|
||||
# **Before using, please contact business@dify.ai by email to inquire about licensing matters.**
|
||||
EnterpriseFeatureConfig,
|
||||
@ -36,7 +31,6 @@ class DifyConfig(
|
||||
env_file='.env',
|
||||
env_file_encoding='utf-8',
|
||||
frozen=True,
|
||||
|
||||
# ignore extra attributes
|
||||
extra='ignore',
|
||||
)
|
||||
@ -67,3 +61,5 @@ class DifyConfig(
|
||||
SSRF_PROXY_HTTPS_URL: str | None = None
|
||||
|
||||
MODERATION_BUFFER_SIZE: int = Field(default=300, description='The buffer size for moderation.')
|
||||
|
||||
MAX_VARIABLE_SIZE: int = Field(default=5 * 1024, description='The maximum size of a variable. default is 5KB.')
|
||||
|
||||
@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):
|
||||
|
||||
CURRENT_VERSION: str = Field(
|
||||
description='Dify version',
|
||||
default='0.6.15',
|
||||
default='0.6.16',
|
||||
)
|
||||
|
||||
COMMIT_SHA: str = Field(
|
||||
|
||||
@ -1,2 +1 @@
|
||||
# TODO: Update all string in code to use this constant
|
||||
HIDDEN_VALUE = '[__HIDDEN__]'
|
||||
HIDDEN_VALUE = '[__HIDDEN__]'
|
||||
|
||||
@ -15,6 +15,8 @@ language_timezone_mapping = {
|
||||
'ro-RO': 'Europe/Bucharest',
|
||||
'pl-PL': 'Europe/Warsaw',
|
||||
'hi-IN': 'Asia/Kolkata',
|
||||
'tr-TR': 'Europe/Istanbul',
|
||||
'fa-IR': 'Asia/Tehran',
|
||||
}
|
||||
|
||||
languages = list(language_timezone_mapping.keys())
|
||||
|
||||
@ -17,6 +17,7 @@ from .app import (
|
||||
audio,
|
||||
completion,
|
||||
conversation,
|
||||
conversation_variables,
|
||||
generator,
|
||||
message,
|
||||
model_config,
|
||||
|
||||
@ -23,8 +23,7 @@ class AnnotationReplyActionApi(Resource):
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_resource_check('annotation')
|
||||
def post(self, app_id, action):
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
app_id = str(app_id)
|
||||
@ -47,8 +46,7 @@ class AppAnnotationSettingDetailApi(Resource):
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, app_id):
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
app_id = str(app_id)
|
||||
@ -61,8 +59,7 @@ class AppAnnotationSettingUpdateApi(Resource):
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, app_id, annotation_setting_id):
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
app_id = str(app_id)
|
||||
@ -82,8 +79,7 @@ class AnnotationReplyActionStatusApi(Resource):
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_resource_check('annotation')
|
||||
def get(self, app_id, job_id, action):
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
job_id = str(job_id)
|
||||
@ -110,8 +106,7 @@ class AnnotationListApi(Resource):
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, app_id):
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
page = request.args.get('page', default=1, type=int)
|
||||
@ -135,8 +130,7 @@ class AnnotationExportApi(Resource):
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, app_id):
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
app_id = str(app_id)
|
||||
@ -154,8 +148,7 @@ class AnnotationCreateApi(Resource):
|
||||
@cloud_edition_billing_resource_check('annotation')
|
||||
@marshal_with(annotation_fields)
|
||||
def post(self, app_id):
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
app_id = str(app_id)
|
||||
@ -174,8 +167,7 @@ class AnnotationUpdateDeleteApi(Resource):
|
||||
@cloud_edition_billing_resource_check('annotation')
|
||||
@marshal_with(annotation_fields)
|
||||
def post(self, app_id, annotation_id):
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
app_id = str(app_id)
|
||||
@ -191,8 +183,7 @@ class AnnotationUpdateDeleteApi(Resource):
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def delete(self, app_id, annotation_id):
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
app_id = str(app_id)
|
||||
@ -207,8 +198,7 @@ class AnnotationBatchImportApi(Resource):
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_resource_check('annotation')
|
||||
def post(self, app_id):
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
app_id = str(app_id)
|
||||
@ -232,8 +222,7 @@ class AnnotationBatchImportStatusApi(Resource):
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_resource_check('annotation')
|
||||
def get(self, app_id, job_id):
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
job_id = str(job_id)
|
||||
@ -259,8 +248,7 @@ class AnnotationHitHistoryListApi(Resource):
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def get(self, app_id, annotation_id):
|
||||
# The role of the current user in the table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
page = request.args.get('page', default=1, type=int)
|
||||
|
||||
@ -143,7 +143,7 @@ class ChatConversationApi(Resource):
|
||||
@get_app_model(mode=[AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT])
|
||||
@marshal_with(conversation_with_summary_pagination_fields)
|
||||
def get(self, app_model):
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('keyword', type=str, location='args')
|
||||
@ -245,7 +245,7 @@ class ChatConversationDetailApi(Resource):
|
||||
@get_app_model(mode=[AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT])
|
||||
@marshal_with(conversation_detail_fields)
|
||||
def get(self, app_model, conversation_id):
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
conversation_id = str(conversation_id)
|
||||
|
||||
|
||||
61
api/controllers/console/app/conversation_variables.py
Normal file
61
api/controllers/console/app/conversation_variables.py
Normal file
@ -0,0 +1,61 @@
|
||||
from flask_restful import Resource, marshal_with, reqparse
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.app.wraps import get_app_model
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
from extensions.ext_database import db
|
||||
from fields.conversation_variable_fields import paginated_conversation_variable_fields
|
||||
from libs.login import login_required
|
||||
from models import ConversationVariable
|
||||
from models.model import AppMode
|
||||
|
||||
|
||||
class ConversationVariablesApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@get_app_model(mode=AppMode.ADVANCED_CHAT)
|
||||
@marshal_with(paginated_conversation_variable_fields)
|
||||
def get(self, app_model):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('conversation_id', type=str, location='args')
|
||||
args = parser.parse_args()
|
||||
|
||||
stmt = (
|
||||
select(ConversationVariable)
|
||||
.where(ConversationVariable.app_id == app_model.id)
|
||||
.order_by(ConversationVariable.created_at)
|
||||
)
|
||||
if args['conversation_id']:
|
||||
stmt = stmt.where(ConversationVariable.conversation_id == args['conversation_id'])
|
||||
else:
|
||||
raise ValueError('conversation_id is required')
|
||||
|
||||
# NOTE: This is a temporary solution to avoid performance issues.
|
||||
page = 1
|
||||
page_size = 100
|
||||
stmt = stmt.limit(page_size).offset((page - 1) * page_size)
|
||||
|
||||
with Session(db.engine) as session:
|
||||
rows = session.scalars(stmt).all()
|
||||
|
||||
return {
|
||||
'page': page,
|
||||
'limit': page_size,
|
||||
'total': len(rows),
|
||||
'has_more': False,
|
||||
'data': [
|
||||
{
|
||||
'created_at': row.created_at,
|
||||
'updated_at': row.updated_at,
|
||||
**row.to_variable().model_dump(),
|
||||
}
|
||||
for row in rows
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
api.add_resource(ConversationVariablesApi, '/apps/<uuid:app_id>/conversation-variables')
|
||||
@ -149,8 +149,7 @@ class MessageAnnotationApi(Resource):
|
||||
@get_app_model
|
||||
@marshal_with(annotation_fields)
|
||||
def post(self, app_model):
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
|
||||
@ -74,6 +74,7 @@ class DraftWorkflowApi(Resource):
|
||||
parser.add_argument('hash', type=str, required=False, location='json')
|
||||
# TODO: set this to required=True after frontend is updated
|
||||
parser.add_argument('environment_variables', type=list, required=False, location='json')
|
||||
parser.add_argument('conversation_variables', type=list, required=False, location='json')
|
||||
args = parser.parse_args()
|
||||
elif 'text/plain' in content_type:
|
||||
try:
|
||||
@ -88,7 +89,8 @@ class DraftWorkflowApi(Resource):
|
||||
'graph': data.get('graph'),
|
||||
'features': data.get('features'),
|
||||
'hash': data.get('hash'),
|
||||
'environment_variables': data.get('environment_variables')
|
||||
'environment_variables': data.get('environment_variables'),
|
||||
'conversation_variables': data.get('conversation_variables'),
|
||||
}
|
||||
except json.JSONDecodeError:
|
||||
return {'message': 'Invalid JSON data'}, 400
|
||||
@ -100,6 +102,8 @@ class DraftWorkflowApi(Resource):
|
||||
try:
|
||||
environment_variables_list = args.get('environment_variables') or []
|
||||
environment_variables = [factory.build_variable_from_mapping(obj) for obj in environment_variables_list]
|
||||
conversation_variables_list = args.get('conversation_variables') or []
|
||||
conversation_variables = [factory.build_variable_from_mapping(obj) for obj in conversation_variables_list]
|
||||
workflow = workflow_service.sync_draft_workflow(
|
||||
app_model=app_model,
|
||||
graph=args['graph'],
|
||||
@ -107,6 +111,7 @@ class DraftWorkflowApi(Resource):
|
||||
unique_hash=args.get('hash'),
|
||||
account=current_user,
|
||||
environment_variables=environment_variables,
|
||||
conversation_variables=conversation_variables,
|
||||
)
|
||||
except WorkflowHashNotEqualError:
|
||||
raise DraftWorkflowNotSync()
|
||||
|
||||
@ -17,8 +17,6 @@ from ..wraps import account_initialization_required
|
||||
|
||||
def get_oauth_providers():
|
||||
with current_app.app_context():
|
||||
if not dify_config.NOTION_CLIENT_ID or not dify_config.NOTION_CLIENT_SECRET:
|
||||
return {}
|
||||
notion_oauth = NotionOAuth(client_id=dify_config.NOTION_CLIENT_ID,
|
||||
client_secret=dify_config.NOTION_CLIENT_SECRET,
|
||||
redirect_uri=dify_config.CONSOLE_API_URL + '/console/api/oauth/data-source/callback/notion')
|
||||
|
||||
@ -189,8 +189,6 @@ class DatasetApi(Resource):
|
||||
dataset = DatasetService.get_dataset(dataset_id_str)
|
||||
if dataset is None:
|
||||
raise NotFound("Dataset not found.")
|
||||
# check user's model setting
|
||||
DatasetService.check_dataset_model_setting(dataset)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('name', nullable=False,
|
||||
@ -215,6 +213,13 @@ class DatasetApi(Resource):
|
||||
args = parser.parse_args()
|
||||
data = request.get_json()
|
||||
|
||||
# check embedding model setting
|
||||
if data.get('indexing_technique') == 'high_quality':
|
||||
DatasetService.check_embedding_model_setting(dataset.tenant_id,
|
||||
data.get('embedding_model_provider'),
|
||||
data.get('embedding_model')
|
||||
)
|
||||
|
||||
# The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
|
||||
DatasetPermissionService.check_permission(
|
||||
current_user, dataset, data.get('permission'), data.get('partial_member_list')
|
||||
@ -233,7 +238,8 @@ class DatasetApi(Resource):
|
||||
DatasetPermissionService.update_partial_member_list(
|
||||
tenant_id, dataset_id_str, data.get('partial_member_list')
|
||||
)
|
||||
else:
|
||||
# clear partial member list when permission is only_me or all_team_members
|
||||
elif data.get('permission') == 'only_me' or data.get('permission') == 'all_team_members':
|
||||
DatasetPermissionService.clear_partial_member_list(dataset_id_str)
|
||||
|
||||
partial_member_list = DatasetPermissionService.get_dataset_partial_member_list(dataset_id_str)
|
||||
|
||||
@ -223,8 +223,7 @@ class DatasetDocumentSegmentAddApi(Resource):
|
||||
document = DocumentService.get_document(dataset_id, document_id)
|
||||
if not document:
|
||||
raise NotFound('Document not found.')
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
# check embedding model setting
|
||||
if dataset.indexing_technique == 'high_quality':
|
||||
@ -347,7 +346,7 @@ class DatasetDocumentSegmentUpdateApi(Resource):
|
||||
if not segment:
|
||||
raise NotFound('Segment not found.')
|
||||
# The role of the current user in the ta table must be admin or owner
|
||||
if not current_user.is_admin_or_owner:
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from flask_login import current_user
|
||||
from flask_restful import Resource, marshal_with, reqparse
|
||||
|
||||
from constants import HIDDEN_VALUE
|
||||
from controllers.console import api
|
||||
from controllers.console.setup import setup_required
|
||||
from controllers.console.wraps import account_initialization_required
|
||||
@ -89,7 +90,7 @@ class APIBasedExtensionDetailAPI(Resource):
|
||||
extension_data_from_db.name = args['name']
|
||||
extension_data_from_db.api_endpoint = args['api_endpoint']
|
||||
|
||||
if args['api_key'] != '[__HIDDEN__]':
|
||||
if args['api_key'] != HIDDEN_VALUE:
|
||||
extension_data_from_db.api_key = args['api_key']
|
||||
|
||||
return APIBasedExtensionService.save(extension_data_from_db)
|
||||
|
||||
@ -19,7 +19,7 @@ def inner_api_only(view):
|
||||
# get header 'X-Inner-Api-Key'
|
||||
inner_api_key = request.headers.get('X-Inner-Api-Key')
|
||||
if not inner_api_key or inner_api_key != dify_config.INNER_API_KEY:
|
||||
abort(404)
|
||||
abort(401)
|
||||
|
||||
return view(*args, **kwargs)
|
||||
|
||||
|
||||
@ -53,7 +53,7 @@ class ConversationDetailApi(Resource):
|
||||
ConversationService.delete(app_model, conversation_id, end_user)
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
return {"result": "success"}, 204
|
||||
return {'result': 'success'}, 200
|
||||
|
||||
|
||||
class ConversationRenameApi(Resource):
|
||||
|
||||
@ -131,7 +131,7 @@ class MessageSuggestedApi(Resource):
|
||||
except services.errors.message.MessageNotExistsError:
|
||||
raise NotFound("Message Not Exists.")
|
||||
except SuggestedQuestionsAfterAnswerDisabledError:
|
||||
raise BadRequest("Message Not Exists.")
|
||||
raise BadRequest("Suggested Questions Is Disabled.")
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
@ -79,6 +79,7 @@ class CotAgentRunner(BaseAgentRunner, ABC):
|
||||
llm_usage.completion_tokens += usage.completion_tokens
|
||||
llm_usage.prompt_price += usage.prompt_price
|
||||
llm_usage.completion_price += usage.completion_price
|
||||
llm_usage.total_price += usage.total_price
|
||||
|
||||
model_instance = self.model_instance
|
||||
|
||||
|
||||
@ -62,6 +62,7 @@ class FunctionCallAgentRunner(BaseAgentRunner):
|
||||
llm_usage.completion_tokens += usage.completion_tokens
|
||||
llm_usage.prompt_price += usage.prompt_price
|
||||
llm_usage.completion_price += usage.completion_price
|
||||
llm_usage.total_price += usage.total_price
|
||||
|
||||
model_instance = self.model_instance
|
||||
|
||||
|
||||
@ -91,7 +91,8 @@ class DatasetConfigManager:
|
||||
top_k=dataset_configs.get('top_k', 4),
|
||||
score_threshold=dataset_configs.get('score_threshold'),
|
||||
reranking_model=dataset_configs.get('reranking_model'),
|
||||
weights=dataset_configs.get('weights')
|
||||
weights=dataset_configs.get('weights'),
|
||||
reranking_enabled=dataset_configs.get('reranking_enabled', True),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@ -3,8 +3,9 @@ from typing import Any, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.file.file_obj import FileExtraConfig
|
||||
from core.model_runtime.entities.message_entities import PromptMessageRole
|
||||
from models.model import AppMode
|
||||
from models import AppMode
|
||||
|
||||
|
||||
class ModelConfigEntity(BaseModel):
|
||||
@ -158,10 +159,11 @@ class DatasetRetrieveConfigEntity(BaseModel):
|
||||
|
||||
retrieve_strategy: RetrieveStrategy
|
||||
top_k: Optional[int] = None
|
||||
score_threshold: Optional[float] = None
|
||||
score_threshold: Optional[float] = .0
|
||||
rerank_mode: Optional[str] = 'reranking_model'
|
||||
reranking_model: Optional[dict] = None
|
||||
weights: Optional[dict] = None
|
||||
reranking_enabled: Optional[bool] = True
|
||||
|
||||
|
||||
|
||||
@ -199,11 +201,6 @@ class TracingConfigEntity(BaseModel):
|
||||
tracing_provider: str
|
||||
|
||||
|
||||
class FileExtraConfig(BaseModel):
|
||||
"""
|
||||
File Upload Entity.
|
||||
"""
|
||||
image_config: Optional[dict[str, Any]] = None
|
||||
|
||||
|
||||
class AppAdditionalFeatures(BaseModel):
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
from collections.abc import Mapping
|
||||
from typing import Any, Optional
|
||||
|
||||
from core.app.app_config.entities import FileExtraConfig
|
||||
from core.file.file_obj import FileExtraConfig
|
||||
|
||||
|
||||
class FileUploadConfigManager:
|
||||
|
||||
@ -89,7 +89,8 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
|
||||
)
|
||||
|
||||
# get tracing instance
|
||||
trace_manager = TraceQueueManager(app_id=app_model.id)
|
||||
user_id = user.id if isinstance(user, Account) else user.session_id
|
||||
trace_manager = TraceQueueManager(app_model.id, user_id)
|
||||
|
||||
if invoke_from == InvokeFrom.DEBUGGER:
|
||||
# always enable retriever resource in debugger mode
|
||||
@ -112,7 +113,6 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
|
||||
contexts.tenant_id.set(application_generate_entity.app_config.tenant_id)
|
||||
|
||||
return self._generate(
|
||||
app_model=app_model,
|
||||
workflow=workflow,
|
||||
user=user,
|
||||
invoke_from=invoke_from,
|
||||
@ -121,7 +121,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
|
||||
stream=stream
|
||||
)
|
||||
|
||||
def _generate(self, app_model: App,
|
||||
def _generate(self, *,
|
||||
workflow: Workflow,
|
||||
user: Union[Account, EndUser],
|
||||
invoke_from: InvokeFrom,
|
||||
|
||||
@ -5,7 +5,12 @@ import queue
|
||||
import re
|
||||
import threading
|
||||
|
||||
from core.app.entities.queue_entities import QueueAgentMessageEvent, QueueLLMChunkEvent, QueueTextChunkEvent
|
||||
from core.app.entities.queue_entities import (
|
||||
QueueAgentMessageEvent,
|
||||
QueueLLMChunkEvent,
|
||||
QueueNodeSucceededEvent,
|
||||
QueueTextChunkEvent,
|
||||
)
|
||||
from core.model_manager import ModelManager
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
|
||||
@ -88,6 +93,8 @@ class AppGeneratorTTSPublisher:
|
||||
self.msg_text += message.event.chunk.delta.message.content
|
||||
elif isinstance(message.event, QueueTextChunkEvent):
|
||||
self.msg_text += message.event.text
|
||||
elif isinstance(message.event, QueueNodeSucceededEvent):
|
||||
self.msg_text += message.event.outputs.get('output', '')
|
||||
self.last_message = message
|
||||
sentence_arr, text_tmp = self._extract_sentence(self.msg_text)
|
||||
if len(sentence_arr) >= min(self.MAX_SENTENCE, 7):
|
||||
|
||||
@ -3,6 +3,9 @@ import os
|
||||
from collections.abc import Mapping
|
||||
from typing import Any, Optional, cast
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from core.app.apps.advanced_chat.app_config_manager import AdvancedChatAppConfig
|
||||
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
|
||||
from core.app.apps.base_app_runner import AppRunner
|
||||
@ -32,6 +35,7 @@ from core.app.entities.queue_entities import (
|
||||
from core.moderation.base import ModerationException
|
||||
from core.workflow.callbacks.base_workflow_callback import WorkflowCallback
|
||||
from core.workflow.entities.node_entities import SystemVariable, UserFrom
|
||||
from core.workflow.entities.variable_pool import VariablePool
|
||||
from core.workflow.graph_engine.entities.event import (
|
||||
GraphEngineEvent,
|
||||
GraphRunFailedEvent,
|
||||
@ -53,7 +57,7 @@ from core.workflow.graph_engine.entities.event import (
|
||||
from core.workflow.workflow_entry import WorkflowEntry
|
||||
from extensions.ext_database import db
|
||||
from models.model import App, Conversation, EndUser, Message
|
||||
from models.workflow import Workflow
|
||||
from models.workflow import ConversationVariable, Workflow
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -91,11 +95,11 @@ class AdvancedChatAppRunner(AppRunner):
|
||||
|
||||
app_record = db.session.query(App).filter(App.id == app_config.app_id).first()
|
||||
if not app_record:
|
||||
raise ValueError("App not found")
|
||||
raise ValueError('App not found')
|
||||
|
||||
workflow = self.get_workflow(app_model=app_record, workflow_id=app_config.workflow_id)
|
||||
if not workflow:
|
||||
raise ValueError("Workflow not initialized")
|
||||
raise ValueError('Workflow not initialized')
|
||||
|
||||
inputs = self.application_generate_entity.inputs
|
||||
query = self.application_generate_entity.query
|
||||
@ -134,6 +138,38 @@ class AdvancedChatAppRunner(AppRunner):
|
||||
if bool(os.environ.get("DEBUG", 'False').lower() == 'true'):
|
||||
workflow_callbacks.append(WorkflowLoggingCallback())
|
||||
|
||||
# Init conversation variables
|
||||
stmt = select(ConversationVariable).where(
|
||||
ConversationVariable.app_id == conversation.app_id, ConversationVariable.conversation_id == conversation.id
|
||||
)
|
||||
with Session(db.engine) as session:
|
||||
conversation_variables = session.scalars(stmt).all()
|
||||
if not conversation_variables:
|
||||
conversation_variables = [
|
||||
ConversationVariable.from_variable(
|
||||
app_id=conversation.app_id, conversation_id=conversation.id, variable=variable
|
||||
)
|
||||
for variable in workflow.conversation_variables
|
||||
]
|
||||
session.add_all(conversation_variables)
|
||||
session.commit()
|
||||
# Convert database entities to variables
|
||||
conversation_variables = [item.to_variable() for item in conversation_variables]
|
||||
|
||||
# Create a variable pool.
|
||||
system_inputs = {
|
||||
SystemVariable.QUERY: query,
|
||||
SystemVariable.FILES: files,
|
||||
SystemVariable.CONVERSATION_ID: conversation.id,
|
||||
SystemVariable.USER_ID: user_id,
|
||||
}
|
||||
variable_pool = VariablePool(
|
||||
system_variables=system_inputs,
|
||||
user_inputs=inputs,
|
||||
environment_variables=workflow.environment_variables,
|
||||
conversation_variables=conversation_variables,
|
||||
)
|
||||
|
||||
# RUN WORKFLOW
|
||||
workflow_entry = WorkflowEntry(
|
||||
workflow=workflow,
|
||||
@ -142,14 +178,8 @@ class AdvancedChatAppRunner(AppRunner):
|
||||
if self.application_generate_entity.invoke_from in [InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER]
|
||||
else UserFrom.END_USER,
|
||||
invoke_from=self.application_generate_entity.invoke_from,
|
||||
user_inputs=inputs,
|
||||
system_inputs={
|
||||
SystemVariable.QUERY: query,
|
||||
SystemVariable.FILES: files,
|
||||
SystemVariable.CONVERSATION_ID: self.conversation.id,
|
||||
SystemVariable.USER_ID: user_id
|
||||
},
|
||||
call_depth=self.application_generate_entity.call_depth
|
||||
call_depth=self.application_generate_entity.call_depth,
|
||||
variable_pool=variable_pool,
|
||||
)
|
||||
|
||||
generator = workflow_entry.run(
|
||||
@ -323,11 +353,13 @@ class AdvancedChatAppRunner(AppRunner):
|
||||
Get workflow
|
||||
"""
|
||||
# fetch workflow by workflow_id
|
||||
workflow = db.session.query(Workflow).filter(
|
||||
Workflow.tenant_id == app_model.tenant_id,
|
||||
Workflow.app_id == app_model.id,
|
||||
Workflow.id == workflow_id
|
||||
).first()
|
||||
workflow = (
|
||||
db.session.query(Workflow)
|
||||
.filter(
|
||||
Workflow.tenant_id == app_model.tenant_id, Workflow.app_id == app_model.id, Workflow.id == workflow_id
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
# return workflow
|
||||
return workflow
|
||||
@ -385,7 +417,7 @@ class AdvancedChatAppRunner(AppRunner):
|
||||
message=message,
|
||||
query=query,
|
||||
user_id=app_generate_entity.user_id,
|
||||
invoke_from=app_generate_entity.invoke_from
|
||||
invoke_from=app_generate_entity.invoke_from,
|
||||
)
|
||||
|
||||
if annotation_reply:
|
||||
|
||||
@ -110,7 +110,8 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
|
||||
)
|
||||
|
||||
# get tracing instance
|
||||
trace_manager = TraceQueueManager(app_model.id)
|
||||
user_id = user.id if isinstance(user, Account) else user.session_id
|
||||
trace_manager = TraceQueueManager(app_model.id, user_id)
|
||||
|
||||
# init application generate entity
|
||||
application_generate_entity = AgentChatAppGenerateEntity(
|
||||
|
||||
@ -74,7 +74,8 @@ class WorkflowAppGenerator(BaseAppGenerator):
|
||||
)
|
||||
|
||||
# get tracing instance
|
||||
trace_manager = TraceQueueManager(app_model.id)
|
||||
user_id = user.id if isinstance(user, Account) else user.session_id
|
||||
trace_manager = TraceQueueManager(app_model.id, user_id)
|
||||
|
||||
# init application generate entity
|
||||
application_generate_entity = WorkflowAppGenerateEntity(
|
||||
|
||||
@ -11,6 +11,7 @@ from core.app.entities.app_invoke_entities import (
|
||||
)
|
||||
from core.workflow.callbacks.base_workflow_callback import WorkflowCallback
|
||||
from core.workflow.entities.node_entities import SystemVariable, UserFrom
|
||||
from core.workflow.entities.variable_pool import VariablePool
|
||||
from core.workflow.workflow_entry import WorkflowEntry
|
||||
from extensions.ext_database import db
|
||||
from models.model import App, EndUser
|
||||
@ -24,8 +25,7 @@ class WorkflowAppRunner:
|
||||
Workflow Application Runner
|
||||
"""
|
||||
|
||||
def run(self, application_generate_entity: WorkflowAppGenerateEntity,
|
||||
queue_manager: AppQueueManager) -> None:
|
||||
def run(self, application_generate_entity: WorkflowAppGenerateEntity, queue_manager: AppQueueManager) -> None:
|
||||
"""
|
||||
Run application
|
||||
:param application_generate_entity: application generate entity
|
||||
@ -45,11 +45,11 @@ class WorkflowAppRunner:
|
||||
|
||||
app_record = db.session.query(App).filter(App.id == app_config.app_id).first()
|
||||
if not app_record:
|
||||
raise ValueError("App not found")
|
||||
raise ValueError('App not found')
|
||||
|
||||
workflow = self.get_workflow(app_model=app_record, workflow_id=app_config.workflow_id)
|
||||
if not workflow:
|
||||
raise ValueError("Workflow not initialized")
|
||||
raise ValueError('Workflow not initialized')
|
||||
|
||||
inputs = application_generate_entity.inputs
|
||||
files = application_generate_entity.files
|
||||
@ -58,9 +58,21 @@ class WorkflowAppRunner:
|
||||
|
||||
workflow_callbacks: list[WorkflowCallback] = []
|
||||
|
||||
if bool(os.environ.get("DEBUG", 'False').lower() == 'true'):
|
||||
if bool(os.environ.get('DEBUG', 'False').lower() == 'true'):
|
||||
workflow_callbacks.append(WorkflowLoggingCallback())
|
||||
|
||||
# Create a variable pool.
|
||||
system_inputs = {
|
||||
SystemVariable.FILES: files,
|
||||
SystemVariable.USER_ID: user_id,
|
||||
}
|
||||
variable_pool = VariablePool(
|
||||
system_variables=system_inputs,
|
||||
user_inputs=inputs,
|
||||
environment_variables=workflow.environment_variables,
|
||||
conversation_variables=[],
|
||||
)
|
||||
|
||||
# RUN WORKFLOW
|
||||
workflow_entry = WorkflowEntry()
|
||||
workflow_entry.run(
|
||||
@ -71,26 +83,22 @@ class WorkflowAppRunner:
|
||||
else UserFrom.END_USER,
|
||||
invoke_from=application_generate_entity.invoke_from,
|
||||
callbacks=workflow_callbacks,
|
||||
user_inputs=inputs,
|
||||
system_inputs={
|
||||
SystemVariable.FILES: files,
|
||||
SystemVariable.USER_ID: user_id
|
||||
},
|
||||
call_depth=application_generate_entity.call_depth
|
||||
call_depth=application_generate_entity.call_depth,
|
||||
variable_pool=variable_pool,
|
||||
)
|
||||
|
||||
def single_iteration_run(self, app_id: str, workflow_id: str,
|
||||
queue_manager: AppQueueManager,
|
||||
inputs: dict, node_id: str, user_id: str) -> None:
|
||||
def single_iteration_run(
|
||||
self, app_id: str, workflow_id: str, queue_manager: AppQueueManager, inputs: dict, node_id: str, user_id: str
|
||||
) -> None:
|
||||
"""
|
||||
Single iteration run
|
||||
"""
|
||||
app_record: App = db.session.query(App).filter(App.id == app_id).first()
|
||||
app_record = db.session.query(App).filter(App.id == app_id).first()
|
||||
if not app_record:
|
||||
raise ValueError("App not found")
|
||||
|
||||
raise ValueError('App not found')
|
||||
|
||||
if not app_record.workflow_id:
|
||||
raise ValueError("Workflow not initialized")
|
||||
raise ValueError('Workflow not initialized')
|
||||
|
||||
workflow = self.get_workflow(app_model=app_record, workflow_id=workflow_id)
|
||||
if not workflow:
|
||||
@ -112,11 +120,13 @@ class WorkflowAppRunner:
|
||||
Get workflow
|
||||
"""
|
||||
# fetch workflow by workflow_id
|
||||
workflow = db.session.query(Workflow).filter(
|
||||
Workflow.tenant_id == app_model.tenant_id,
|
||||
Workflow.app_id == app_model.id,
|
||||
Workflow.id == workflow_id
|
||||
).first()
|
||||
workflow = (
|
||||
db.session.query(Workflow)
|
||||
.filter(
|
||||
Workflow.tenant_id == app_model.tenant_id, Workflow.app_id == app_model.id, Workflow.id == workflow_id
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
# return workflow
|
||||
return workflow
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from .segment_group import SegmentGroup
|
||||
from .segments import (
|
||||
ArrayAnySegment,
|
||||
ArraySegment,
|
||||
FileSegment,
|
||||
FloatSegment,
|
||||
IntegerSegment,
|
||||
@ -50,4 +51,5 @@ __all__ = [
|
||||
'ArrayNumberVariable',
|
||||
'ArrayObjectVariable',
|
||||
'ArrayFileVariable',
|
||||
'ArraySegment',
|
||||
]
|
||||
|
||||
2
api/core/app/segments/exc.py
Normal file
2
api/core/app/segments/exc.py
Normal file
@ -0,0 +1,2 @@
|
||||
class VariableError(Exception):
|
||||
pass
|
||||
@ -1,8 +1,10 @@
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from configs import dify_config
|
||||
from core.file.file_obj import FileVar
|
||||
|
||||
from .exc import VariableError
|
||||
from .segments import (
|
||||
ArrayAnySegment,
|
||||
FileSegment,
|
||||
@ -29,39 +31,43 @@ from .variables import (
|
||||
)
|
||||
|
||||
|
||||
def build_variable_from_mapping(m: Mapping[str, Any], /) -> Variable:
|
||||
if (value_type := m.get('value_type')) is None:
|
||||
raise ValueError('missing value type')
|
||||
if not m.get('name'):
|
||||
raise ValueError('missing name')
|
||||
if (value := m.get('value')) is None:
|
||||
raise ValueError('missing value')
|
||||
def build_variable_from_mapping(mapping: Mapping[str, Any], /) -> Variable:
|
||||
if (value_type := mapping.get('value_type')) is None:
|
||||
raise VariableError('missing value type')
|
||||
if not mapping.get('name'):
|
||||
raise VariableError('missing name')
|
||||
if (value := mapping.get('value')) is None:
|
||||
raise VariableError('missing value')
|
||||
match value_type:
|
||||
case SegmentType.STRING:
|
||||
return StringVariable.model_validate(m)
|
||||
result = StringVariable.model_validate(mapping)
|
||||
case SegmentType.SECRET:
|
||||
return SecretVariable.model_validate(m)
|
||||
result = SecretVariable.model_validate(mapping)
|
||||
case SegmentType.NUMBER if isinstance(value, int):
|
||||
return IntegerVariable.model_validate(m)
|
||||
result = IntegerVariable.model_validate(mapping)
|
||||
case SegmentType.NUMBER if isinstance(value, float):
|
||||
return FloatVariable.model_validate(m)
|
||||
result = FloatVariable.model_validate(mapping)
|
||||
case SegmentType.NUMBER if not isinstance(value, float | int):
|
||||
raise ValueError(f'invalid number value {value}')
|
||||
raise VariableError(f'invalid number value {value}')
|
||||
case SegmentType.FILE:
|
||||
return FileVariable.model_validate(m)
|
||||
result = FileVariable.model_validate(mapping)
|
||||
case SegmentType.OBJECT if isinstance(value, dict):
|
||||
return ObjectVariable.model_validate(
|
||||
{**m, 'value': {k: build_variable_from_mapping(v) for k, v in value.items()}}
|
||||
)
|
||||
result = ObjectVariable.model_validate(mapping)
|
||||
case SegmentType.ARRAY_STRING if isinstance(value, list):
|
||||
return ArrayStringVariable.model_validate({**m, 'value': [build_variable_from_mapping(v) for v in value]})
|
||||
result = ArrayStringVariable.model_validate(mapping)
|
||||
case SegmentType.ARRAY_NUMBER if isinstance(value, list):
|
||||
return ArrayNumberVariable.model_validate({**m, 'value': [build_variable_from_mapping(v) for v in value]})
|
||||
result = ArrayNumberVariable.model_validate(mapping)
|
||||
case SegmentType.ARRAY_OBJECT if isinstance(value, list):
|
||||
return ArrayObjectVariable.model_validate({**m, 'value': [build_variable_from_mapping(v) for v in value]})
|
||||
result = ArrayObjectVariable.model_validate(mapping)
|
||||
case SegmentType.ARRAY_FILE if isinstance(value, list):
|
||||
return ArrayFileVariable.model_validate({**m, 'value': [build_variable_from_mapping(v) for v in value]})
|
||||
raise ValueError(f'not supported value type {value_type}')
|
||||
mapping = dict(mapping)
|
||||
mapping['value'] = [{'value': v} for v in value]
|
||||
result = ArrayFileVariable.model_validate(mapping)
|
||||
case _:
|
||||
raise VariableError(f'not supported value type {value_type}')
|
||||
if result.size > dify_config.MAX_VARIABLE_SIZE:
|
||||
raise VariableError(f'variable size {result.size} exceeds limit {dify_config.MAX_VARIABLE_SIZE}')
|
||||
return result
|
||||
|
||||
|
||||
def build_segment(value: Any, /) -> Segment:
|
||||
@ -74,13 +80,9 @@ def build_segment(value: Any, /) -> Segment:
|
||||
if isinstance(value, float):
|
||||
return FloatSegment(value=value)
|
||||
if isinstance(value, dict):
|
||||
# TODO: Limit the depth of the object
|
||||
obj = {k: build_segment(v) for k, v in value.items()}
|
||||
return ObjectSegment(value=obj)
|
||||
return ObjectSegment(value=value)
|
||||
if isinstance(value, list):
|
||||
# TODO: Limit the depth of the array
|
||||
elements = [build_segment(v) for v in value]
|
||||
return ArrayAnySegment(value=elements)
|
||||
return ArrayAnySegment(value=value)
|
||||
if isinstance(value, FileVar):
|
||||
return FileSegment(value=value)
|
||||
raise ValueError(f'not supported value {value}')
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import json
|
||||
import sys
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import Any
|
||||
|
||||
@ -37,6 +38,10 @@ class Segment(BaseModel):
|
||||
def markdown(self) -> str:
|
||||
return str(self.value)
|
||||
|
||||
@property
|
||||
def size(self) -> int:
|
||||
return sys.getsizeof(self.value)
|
||||
|
||||
def to_object(self) -> Any:
|
||||
return self.value
|
||||
|
||||
@ -85,54 +90,45 @@ class FileSegment(Segment):
|
||||
|
||||
class ObjectSegment(Segment):
|
||||
value_type: SegmentType = SegmentType.OBJECT
|
||||
value: Mapping[str, Segment]
|
||||
value: Mapping[str, Any]
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
# TODO: Process variables.
|
||||
return json.dumps(self.model_dump()['value'], ensure_ascii=False)
|
||||
|
||||
@property
|
||||
def log(self) -> str:
|
||||
# TODO: Process variables.
|
||||
return json.dumps(self.model_dump()['value'], ensure_ascii=False, indent=2)
|
||||
|
||||
@property
|
||||
def markdown(self) -> str:
|
||||
# TODO: Use markdown code block
|
||||
return json.dumps(self.model_dump()['value'], ensure_ascii=False, indent=2)
|
||||
|
||||
def to_object(self):
|
||||
return {k: v.to_object() for k, v in self.value.items()}
|
||||
|
||||
|
||||
class ArraySegment(Segment):
|
||||
@property
|
||||
def markdown(self) -> str:
|
||||
return '\n'.join(['- ' + item.markdown for item in self.value])
|
||||
|
||||
def to_object(self):
|
||||
return [v.to_object() for v in self.value]
|
||||
|
||||
|
||||
class ArrayAnySegment(ArraySegment):
|
||||
value_type: SegmentType = SegmentType.ARRAY_ANY
|
||||
value: Sequence[Segment]
|
||||
value: Sequence[Any]
|
||||
|
||||
|
||||
class ArrayStringSegment(ArraySegment):
|
||||
value_type: SegmentType = SegmentType.ARRAY_STRING
|
||||
value: Sequence[StringSegment]
|
||||
value: Sequence[str]
|
||||
|
||||
|
||||
class ArrayNumberSegment(ArraySegment):
|
||||
value_type: SegmentType = SegmentType.ARRAY_NUMBER
|
||||
value: Sequence[FloatSegment | IntegerSegment]
|
||||
value: Sequence[float | int]
|
||||
|
||||
|
||||
class ArrayObjectSegment(ArraySegment):
|
||||
value_type: SegmentType = SegmentType.ARRAY_OBJECT
|
||||
value: Sequence[ObjectSegment]
|
||||
value: Sequence[Mapping[str, Any]]
|
||||
|
||||
|
||||
class ArrayFileSegment(ArraySegment):
|
||||
|
||||
@ -48,7 +48,8 @@ from core.model_runtime.entities.message_entities import (
|
||||
)
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
from core.model_runtime.utils.encoders import jsonable_encoder
|
||||
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask, TraceTaskName
|
||||
from core.ops.entities.trace_entity import TraceTaskName
|
||||
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
|
||||
from core.prompt.utils.prompt_message_util import PromptMessageUtil
|
||||
from core.prompt.utils.prompt_template_parser import PromptTemplateParser
|
||||
from events.message_event import message_was_created
|
||||
|
||||
@ -24,7 +24,8 @@ from core.app.entities.task_entities import (
|
||||
)
|
||||
from core.file.file_obj import FileVar
|
||||
from core.model_runtime.utils.encoders import jsonable_encoder
|
||||
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask, TraceTaskName
|
||||
from core.ops.entities.trace_entity import TraceTaskName
|
||||
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
|
||||
from core.tools.tool_manager import ToolManager
|
||||
from core.workflow.entities.node_entities import NodeType, SystemVariable
|
||||
from core.workflow.nodes.tool.entities import ToolNodeData
|
||||
@ -42,6 +43,7 @@ from models.workflow import (
|
||||
WorkflowRunStatus,
|
||||
WorkflowRunTriggeredFrom,
|
||||
)
|
||||
from services.workflow_service import WorkflowService
|
||||
|
||||
|
||||
class WorkflowCycleManage:
|
||||
@ -50,7 +52,7 @@ class WorkflowCycleManage:
|
||||
_user: Union[Account, EndUser]
|
||||
_task_state: WorkflowTaskState
|
||||
_workflow_system_variables: dict[SystemVariable, Any]
|
||||
|
||||
|
||||
def _handle_workflow_run_start(self) -> WorkflowRun:
|
||||
max_sequence = (
|
||||
db.session.query(db.func.max(WorkflowRun.sequence_number))
|
||||
@ -71,7 +73,7 @@ class WorkflowCycleManage:
|
||||
inputs = WorkflowEntry.handle_special_values(inputs)
|
||||
|
||||
triggered_from= (
|
||||
WorkflowRunTriggeredFrom.DEBUGGING
|
||||
WorkflowRunTriggeredFrom.DEBUGGING
|
||||
if self._application_generate_entity.invoke_from == InvokeFrom.DEBUGGER
|
||||
else WorkflowRunTriggeredFrom.APP_RUN
|
||||
)
|
||||
@ -99,7 +101,7 @@ class WorkflowCycleManage:
|
||||
db.session.close()
|
||||
|
||||
return workflow_run
|
||||
|
||||
|
||||
def _handle_workflow_run_success(
|
||||
self,
|
||||
workflow_run: WorkflowRun,
|
||||
@ -121,7 +123,7 @@ class WorkflowCycleManage:
|
||||
:return:
|
||||
"""
|
||||
workflow_run = self._refetch_workflow_run(workflow_run.id)
|
||||
|
||||
|
||||
workflow_run.status = WorkflowRunStatus.SUCCEEDED.value
|
||||
workflow_run.outputs = outputs
|
||||
workflow_run.elapsed_time = time.perf_counter() - start_at
|
||||
@ -138,6 +140,7 @@ class WorkflowCycleManage:
|
||||
TraceTaskName.WORKFLOW_TRACE,
|
||||
workflow_run=workflow_run,
|
||||
conversation_id=conversation_id,
|
||||
user_id=trace_manager.user_id,
|
||||
)
|
||||
)
|
||||
|
||||
@ -185,11 +188,12 @@ class WorkflowCycleManage:
|
||||
TraceTaskName.WORKFLOW_TRACE,
|
||||
workflow_run=workflow_run,
|
||||
conversation_id=conversation_id,
|
||||
user_id=trace_manager.user_id,
|
||||
)
|
||||
)
|
||||
|
||||
return workflow_run
|
||||
|
||||
|
||||
def _handle_node_execution_start(self, workflow_run: WorkflowRun, event: QueueNodeStartedEvent) -> WorkflowNodeExecution:
|
||||
# init workflow node execution
|
||||
workflow_node_execution = WorkflowNodeExecution()
|
||||
@ -250,7 +254,7 @@ class WorkflowCycleManage:
|
||||
:return:
|
||||
"""
|
||||
workflow_node_execution = self._refetch_workflow_node_execution(event.node_execution_id)
|
||||
|
||||
|
||||
inputs = WorkflowEntry.handle_special_values(event.inputs)
|
||||
outputs = WorkflowEntry.handle_special_values(event.outputs)
|
||||
|
||||
@ -267,7 +271,7 @@ class WorkflowCycleManage:
|
||||
db.session.close()
|
||||
|
||||
return workflow_node_execution
|
||||
|
||||
|
||||
#################################################
|
||||
# to stream responses #
|
||||
#################################################
|
||||
@ -406,10 +410,10 @@ class WorkflowCycleManage:
|
||||
files=self._fetch_files_from_node_outputs(workflow_node_execution.outputs_dict or {}),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _workflow_iteration_start_to_stream_response(
|
||||
self,
|
||||
task_id: str,
|
||||
task_id: str,
|
||||
workflow_run: WorkflowRun,
|
||||
event: QueueIterationStartEvent
|
||||
) -> IterationNodeStartStreamResponse:
|
||||
@ -434,7 +438,7 @@ class WorkflowCycleManage:
|
||||
metadata=event.metadata or {}
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _workflow_iteration_next_to_stream_response(self, task_id: str, workflow_run: WorkflowRun, event: QueueIterationNextEvent) -> IterationNodeNextStreamResponse:
|
||||
"""
|
||||
Workflow iteration next to stream response
|
||||
@ -457,7 +461,7 @@ class WorkflowCycleManage:
|
||||
extras={}
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _workflow_iteration_completed_to_stream_response(self, task_id: str, workflow_run: WorkflowRun, event: QueueIterationCompletedEvent) -> IterationNodeCompletedStreamResponse:
|
||||
"""
|
||||
Workflow iteration completed to stream response
|
||||
@ -552,10 +556,10 @@ class WorkflowCycleManage:
|
||||
"""
|
||||
workflow_run = db.session.query(WorkflowRun).filter(
|
||||
WorkflowRun.id == workflow_run_id).first()
|
||||
|
||||
|
||||
if not workflow_run:
|
||||
raise Exception(f'Workflow run not found: {workflow_run_id}')
|
||||
|
||||
|
||||
return workflow_run
|
||||
|
||||
def _refetch_workflow_node_execution(self, node_execution_id: str) -> WorkflowNodeExecution:
|
||||
@ -578,5 +582,5 @@ class WorkflowCycleManage:
|
||||
|
||||
if not workflow_node_execution:
|
||||
raise Exception(f'Workflow node execution not found: {node_execution_id}')
|
||||
|
||||
|
||||
return workflow_node_execution
|
||||
@ -4,7 +4,8 @@ from typing import Any, Optional, TextIO, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask, TraceTaskName
|
||||
from core.ops.entities.trace_entity import TraceTaskName
|
||||
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
|
||||
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||
|
||||
_TEXT_COLOR_MAPPING = {
|
||||
|
||||
@ -8,6 +8,7 @@ from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from constants import HIDDEN_VALUE
|
||||
from core.entities.model_entities import ModelStatus, ModelWithProviderEntity, SimpleModelProviderEntity
|
||||
from core.entities.provider_entities import (
|
||||
CustomConfiguration,
|
||||
@ -202,7 +203,7 @@ class ProviderConfiguration(BaseModel):
|
||||
for key, value in credentials.items():
|
||||
if key in provider_credential_secret_variables:
|
||||
# if send [__HIDDEN__] in secret input, it will be same as original value
|
||||
if value == '[__HIDDEN__]' and key in original_credentials:
|
||||
if value == HIDDEN_VALUE and key in original_credentials:
|
||||
credentials[key] = encrypter.decrypt_token(self.tenant_id, original_credentials[key])
|
||||
|
||||
credentials = model_provider_factory.provider_credentials_validate(
|
||||
@ -345,7 +346,7 @@ class ProviderConfiguration(BaseModel):
|
||||
for key, value in credentials.items():
|
||||
if key in provider_credential_secret_variables:
|
||||
# if send [__HIDDEN__] in secret input, it will be same as original value
|
||||
if value == '[__HIDDEN__]' and key in original_credentials:
|
||||
if value == HIDDEN_VALUE and key in original_credentials:
|
||||
credentials[key] = encrypter.decrypt_token(self.tenant_id, original_credentials[key])
|
||||
|
||||
credentials = model_provider_factory.model_credentials_validate(
|
||||
|
||||
@ -1,14 +1,19 @@
|
||||
import enum
|
||||
from typing import Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.app.app_config.entities import FileExtraConfig
|
||||
from core.file.tool_file_parser import ToolFileParser
|
||||
from core.file.upload_file_parser import UploadFileParser
|
||||
from core.model_runtime.entities.message_entities import ImagePromptMessageContent
|
||||
from extensions.ext_database import db
|
||||
from models.model import UploadFile
|
||||
|
||||
|
||||
class FileExtraConfig(BaseModel):
|
||||
"""
|
||||
File Upload Entity.
|
||||
"""
|
||||
image_config: Optional[dict[str, Any]] = None
|
||||
|
||||
|
||||
class FileType(enum.Enum):
|
||||
@ -114,6 +119,7 @@ class FileVar(BaseModel):
|
||||
)
|
||||
|
||||
def _get_data(self, force_url: bool = False) -> Optional[str]:
|
||||
from models.model import UploadFile
|
||||
if self.type == FileType.IMAGE:
|
||||
if self.transfer_method == FileTransferMethod.REMOTE_URL:
|
||||
return self.url
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
import re
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import Any, Union
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import requests
|
||||
|
||||
from core.app.app_config.entities import FileExtraConfig
|
||||
from core.file.file_obj import FileBelongsTo, FileTransferMethod, FileType, FileVar
|
||||
from core.file.file_obj import FileBelongsTo, FileExtraConfig, FileTransferMethod, FileType, FileVar
|
||||
from extensions.ext_database import db
|
||||
from models.account import Account
|
||||
from models.model import EndUser, MessageFile, UploadFile
|
||||
@ -186,6 +187,30 @@ class MessageFileParser:
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
}
|
||||
|
||||
def is_s3_presigned_url(url):
|
||||
try:
|
||||
parsed_url = urlparse(url)
|
||||
if 'amazonaws.com' not in parsed_url.netloc:
|
||||
return False
|
||||
query_params = parse_qs(parsed_url.query)
|
||||
required_params = ['Signature', 'Expires']
|
||||
for param in required_params:
|
||||
if param not in query_params:
|
||||
return False
|
||||
if not query_params['Expires'][0].isdigit():
|
||||
return False
|
||||
signature = query_params['Signature'][0]
|
||||
if not re.match(r'^[A-Za-z0-9+/]+={0,2}$', signature):
|
||||
return False
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
if is_s3_presigned_url(url):
|
||||
response = requests.get(url, headers=headers, allow_redirects=True)
|
||||
if response.status_code in {200, 304}:
|
||||
return True, ""
|
||||
|
||||
response = requests.head(url, headers=headers, allow_redirects=True)
|
||||
if response.status_code in {200, 304}:
|
||||
return True, ""
|
||||
|
||||
@ -107,11 +107,11 @@ class CodeExecutor:
|
||||
response = response.json()
|
||||
except:
|
||||
raise CodeExecutionException('Failed to parse response')
|
||||
|
||||
if (code := response.get('code')) != 0:
|
||||
raise CodeExecutionException(f"Got error code: {code}. Got error msg: {response.get('message')}")
|
||||
|
||||
response = CodeExecutionResponse(**response)
|
||||
|
||||
if response.code != 0:
|
||||
raise CodeExecutionException(response.message)
|
||||
|
||||
if response.data.error:
|
||||
raise CodeExecutionException(response.data.error)
|
||||
|
||||
@ -2,7 +2,6 @@ import base64
|
||||
|
||||
from extensions.ext_database import db
|
||||
from libs import rsa
|
||||
from models.account import Tenant
|
||||
|
||||
|
||||
def obfuscated_token(token: str):
|
||||
@ -14,6 +13,7 @@ def obfuscated_token(token: str):
|
||||
|
||||
|
||||
def encrypt_token(tenant_id: str, token: str):
|
||||
from models.account import Tenant
|
||||
if not (tenant := db.session.query(Tenant).filter(Tenant.id == tenant_id).first()):
|
||||
raise ValueError(f'Tenant with id {tenant_id} not found')
|
||||
encrypted_token = rsa.encrypt(token, tenant.encrypt_public_key)
|
||||
|
||||
@ -73,6 +73,8 @@ class HostingConfiguration:
|
||||
quota_limit=hosted_quota_limit,
|
||||
restrict_models=[
|
||||
RestrictModel(model="gpt-4", base_model_name="gpt-4", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-4o", base_model_name="gpt-4o", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-4o-mini", base_model_name="gpt-4o-mini", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-4-32k", base_model_name="gpt-4-32k", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-4-1106-preview", base_model_name="gpt-4-1106-preview", model_type=ModelType.LLM),
|
||||
RestrictModel(model="gpt-4-vision-preview", base_model_name="gpt-4-vision-preview", model_type=ModelType.LLM),
|
||||
|
||||
@ -14,7 +14,8 @@ from core.model_manager import ModelManager
|
||||
from core.model_runtime.entities.message_entities import SystemPromptMessage, UserPromptMessage
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
|
||||
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask, TraceTaskName
|
||||
from core.ops.entities.trace_entity import TraceTaskName
|
||||
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
|
||||
from core.ops.utils import measure_time
|
||||
from core.prompt.utils.prompt_template_parser import PromptTemplateParser
|
||||
|
||||
|
||||
@ -1,18 +1,16 @@
|
||||
import hashlib
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
import uuid
|
||||
from abc import abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import ConfigDict
|
||||
|
||||
from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
|
||||
from core.model_runtime.errors.invoke import InvokeBadRequestError
|
||||
from core.model_runtime.model_providers.__base.ai_model import AIModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TTSModel(AIModel):
|
||||
"""
|
||||
Model class for ttstext model.
|
||||
@ -37,8 +35,6 @@ class TTSModel(AIModel):
|
||||
:return: translated audio file
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Invoke TTS model: {model} , invoke content : {content_text}")
|
||||
self._is_ffmpeg_installed()
|
||||
return self._invoke(model=model, credentials=credentials, user=user,
|
||||
content_text=content_text, voice=voice, tenant_id=tenant_id)
|
||||
except Exception as e:
|
||||
@ -75,7 +71,8 @@ class TTSModel(AIModel):
|
||||
if model_schema and ModelPropertyKey.VOICES in model_schema.model_properties:
|
||||
voices = model_schema.model_properties[ModelPropertyKey.VOICES]
|
||||
if language:
|
||||
return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')]
|
||||
return [{'name': d['name'], 'value': d['mode']} for d in voices if
|
||||
language and language in d.get('language')]
|
||||
else:
|
||||
return [{'name': d['name'], 'value': d['mode']} for d in voices]
|
||||
|
||||
@ -146,28 +143,3 @@ class TTSModel(AIModel):
|
||||
if one_sentence != '':
|
||||
result.append(one_sentence)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _is_ffmpeg_installed():
|
||||
try:
|
||||
output = subprocess.check_output("ffmpeg -version", shell=True)
|
||||
if "ffmpeg version" in output.decode("utf-8"):
|
||||
return True
|
||||
else:
|
||||
raise InvokeBadRequestError("ffmpeg is not installed, "
|
||||
"details: https://docs.dify.ai/getting-started/install-self-hosted"
|
||||
"/install-faq#id-14.-what-to-do-if-this-error-occurs-in-text-to-speech")
|
||||
except Exception:
|
||||
raise InvokeBadRequestError("ffmpeg is not installed, "
|
||||
"details: https://docs.dify.ai/getting-started/install-self-hosted"
|
||||
"/install-faq#id-14.-what-to-do-if-this-error-occurs-in-text-to-speech")
|
||||
|
||||
# Todo: To improve the streaming function
|
||||
@staticmethod
|
||||
def _get_file_name(file_content: str) -> str:
|
||||
hash_object = hashlib.sha256(file_content.encode())
|
||||
hex_digest = hash_object.hexdigest()
|
||||
|
||||
namespace_uuid = uuid.UUID('a5da6ef9-b303-596f-8e88-bf8fa40f4b31')
|
||||
unique_uuid = uuid.uuid5(namespace_uuid, hex_digest)
|
||||
return str(unique_uuid)
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
- nvidia
|
||||
- nvidia_nim
|
||||
- cohere
|
||||
- upstage
|
||||
- bedrock
|
||||
- togetherai
|
||||
- openrouter
|
||||
@ -35,3 +36,4 @@
|
||||
- hunyuan
|
||||
- siliconflow
|
||||
- perfxcloud
|
||||
- zhinao
|
||||
|
||||
@ -116,7 +116,8 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
|
||||
# Add the new header for claude-3-5-sonnet-20240620 model
|
||||
extra_headers = {}
|
||||
if model == "claude-3-5-sonnet-20240620":
|
||||
extra_headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15"
|
||||
if model_parameters.get('max_tokens') > 4096:
|
||||
extra_headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15"
|
||||
|
||||
if tools:
|
||||
extra_model_kwargs['tools'] = [
|
||||
|
||||
@ -496,6 +496,158 @@ LLM_BASE_MODELS = [
|
||||
)
|
||||
)
|
||||
),
|
||||
AzureBaseModel(
|
||||
base_model_name='gpt-4o-mini',
|
||||
entity=AIModelEntity(
|
||||
model='fake-deployment-name',
|
||||
label=I18nObject(
|
||||
en_US='fake-deployment-name-label',
|
||||
),
|
||||
model_type=ModelType.LLM,
|
||||
features=[
|
||||
ModelFeature.AGENT_THOUGHT,
|
||||
ModelFeature.VISION,
|
||||
ModelFeature.MULTI_TOOL_CALL,
|
||||
ModelFeature.STREAM_TOOL_CALL,
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 128000,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
name='temperature',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
|
||||
),
|
||||
ParameterRule(
|
||||
name='top_p',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
|
||||
),
|
||||
ParameterRule(
|
||||
name='presence_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
|
||||
),
|
||||
ParameterRule(
|
||||
name='frequency_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
|
||||
),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=16384),
|
||||
ParameterRule(
|
||||
name='seed',
|
||||
label=I18nObject(
|
||||
zh_Hans='种子',
|
||||
en_US='Seed'
|
||||
),
|
||||
type='int',
|
||||
help=I18nObject(
|
||||
zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。',
|
||||
en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
|
||||
),
|
||||
required=False,
|
||||
precision=2,
|
||||
min=0,
|
||||
max=1,
|
||||
),
|
||||
ParameterRule(
|
||||
name='response_format',
|
||||
label=I18nObject(
|
||||
zh_Hans='回复格式',
|
||||
en_US='response_format'
|
||||
),
|
||||
type='string',
|
||||
help=I18nObject(
|
||||
zh_Hans='指定模型必须输出的格式',
|
||||
en_US='specifying the format that the model must output'
|
||||
),
|
||||
required=False,
|
||||
options=['text', 'json_object']
|
||||
),
|
||||
],
|
||||
pricing=PriceConfig(
|
||||
input=0.150,
|
||||
output=0.600,
|
||||
unit=0.000001,
|
||||
currency='USD',
|
||||
)
|
||||
)
|
||||
),
|
||||
AzureBaseModel(
|
||||
base_model_name='gpt-4o-mini-2024-07-18',
|
||||
entity=AIModelEntity(
|
||||
model='fake-deployment-name',
|
||||
label=I18nObject(
|
||||
en_US='fake-deployment-name-label',
|
||||
),
|
||||
model_type=ModelType.LLM,
|
||||
features=[
|
||||
ModelFeature.AGENT_THOUGHT,
|
||||
ModelFeature.VISION,
|
||||
ModelFeature.MULTI_TOOL_CALL,
|
||||
ModelFeature.STREAM_TOOL_CALL,
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 128000,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
name='temperature',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
|
||||
),
|
||||
ParameterRule(
|
||||
name='top_p',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
|
||||
),
|
||||
ParameterRule(
|
||||
name='presence_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
|
||||
),
|
||||
ParameterRule(
|
||||
name='frequency_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
|
||||
),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=16384),
|
||||
ParameterRule(
|
||||
name='seed',
|
||||
label=I18nObject(
|
||||
zh_Hans='种子',
|
||||
en_US='Seed'
|
||||
),
|
||||
type='int',
|
||||
help=I18nObject(
|
||||
zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。',
|
||||
en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
|
||||
),
|
||||
required=False,
|
||||
precision=2,
|
||||
min=0,
|
||||
max=1,
|
||||
),
|
||||
ParameterRule(
|
||||
name='response_format',
|
||||
label=I18nObject(
|
||||
zh_Hans='回复格式',
|
||||
en_US='response_format'
|
||||
),
|
||||
type='string',
|
||||
help=I18nObject(
|
||||
zh_Hans='指定模型必须输出的格式',
|
||||
en_US='specifying the format that the model must output'
|
||||
),
|
||||
required=False,
|
||||
options=['text', 'json_object']
|
||||
),
|
||||
],
|
||||
pricing=PriceConfig(
|
||||
input=0.150,
|
||||
output=0.600,
|
||||
unit=0.000001,
|
||||
currency='USD',
|
||||
)
|
||||
)
|
||||
),
|
||||
AzureBaseModel(
|
||||
base_model_name='gpt-4o',
|
||||
entity=AIModelEntity(
|
||||
|
||||
@ -114,6 +114,18 @@ model_credential_schema:
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4o-mini
|
||||
value: gpt-4o-mini
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4o-mini-2024-07-18
|
||||
value: gpt-4o-mini-2024-07-18
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4o
|
||||
value: gpt-4o
|
||||
|
||||
@ -1,12 +1,8 @@
|
||||
import concurrent.futures
|
||||
import copy
|
||||
from functools import reduce
|
||||
from io import BytesIO
|
||||
from typing import Optional
|
||||
|
||||
from flask import Response
|
||||
from openai import AzureOpenAI
|
||||
from pydub import AudioSegment
|
||||
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity
|
||||
from core.model_runtime.errors.invoke import InvokeBadRequestError
|
||||
@ -51,7 +47,7 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
|
||||
:return: text translated to audio file
|
||||
"""
|
||||
try:
|
||||
self._tts_invoke(
|
||||
self._tts_invoke_streaming(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
content_text='Hello Dify!',
|
||||
@ -60,45 +56,6 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def _tts_invoke(self, model: str, credentials: dict, content_text: str, voice: str) -> Response:
|
||||
"""
|
||||
_tts_invoke text2speech model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param content_text: text content to be translated
|
||||
:param voice: model timbre
|
||||
:return: text translated to audio file
|
||||
"""
|
||||
audio_type = self._get_model_audio_type(model, credentials)
|
||||
word_limit = self._get_model_word_limit(model, credentials)
|
||||
max_workers = self._get_model_workers_limit(model, credentials)
|
||||
try:
|
||||
sentences = list(self._split_text_into_sentences(org_text=content_text, max_length=word_limit))
|
||||
audio_bytes_list = []
|
||||
|
||||
# Create a thread pool and map the function to the list of sentences
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
futures = [executor.submit(self._process_sentence, sentence=sentence, model=model, voice=voice,
|
||||
credentials=credentials) for sentence in sentences]
|
||||
for future in futures:
|
||||
try:
|
||||
if future.result():
|
||||
audio_bytes_list.append(future.result())
|
||||
except Exception as ex:
|
||||
raise InvokeBadRequestError(str(ex))
|
||||
|
||||
if len(audio_bytes_list) > 0:
|
||||
audio_segments = [AudioSegment.from_file(BytesIO(audio_bytes), format=audio_type) for audio_bytes in
|
||||
audio_bytes_list if audio_bytes]
|
||||
combined_segment = reduce(lambda x, y: x + y, audio_segments)
|
||||
buffer: BytesIO = BytesIO()
|
||||
combined_segment.export(buffer, format=audio_type)
|
||||
buffer.seek(0)
|
||||
return Response(buffer.read(), status=200, mimetype=f"audio/{audio_type}")
|
||||
except Exception as ex:
|
||||
raise InvokeBadRequestError(str(ex))
|
||||
|
||||
def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str,
|
||||
voice: str) -> any:
|
||||
"""
|
||||
@ -144,7 +101,6 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
|
||||
:param sentence: text content to be translated
|
||||
:return: text translated to audio file
|
||||
"""
|
||||
# transform credentials to kwargs for model instance
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
client = AzureOpenAI(**credentials_kwargs)
|
||||
response = client.audio.speech.create(model=model, voice=voice, input=sentence.strip())
|
||||
|
||||
@ -379,8 +379,12 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
|
||||
if not message_content.data.startswith("data:"):
|
||||
# fetch image data from url
|
||||
try:
|
||||
image_content = requests.get(message_content.data).content
|
||||
mime_type, _ = mimetypes.guess_type(message_content.data)
|
||||
url = message_content.data
|
||||
image_content = requests.get(url).content
|
||||
if '?' in url:
|
||||
url = url.split('?')[0]
|
||||
mime_type, _ = mimetypes.guess_type(url)
|
||||
base64_data = base64.b64encode(image_content).decode('utf-8')
|
||||
except Exception as ex:
|
||||
raise ValueError(f"Failed to fetch image data from url {message_content.data}, {ex}")
|
||||
else:
|
||||
|
||||
@ -5,6 +5,8 @@ label:
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
|
||||
@ -5,6 +5,8 @@ label:
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
|
||||
@ -19,7 +19,7 @@ parameter_rules:
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
input: '0.59'
|
||||
output: '0.79'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
|
||||
@ -19,7 +19,7 @@ parameter_rules:
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.59'
|
||||
output: '0.79'
|
||||
input: '0.05'
|
||||
output: '0.08'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
|
||||
@ -0,0 +1,11 @@
|
||||
import logging
|
||||
|
||||
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HuggingfaceTeiProvider(ModelProvider):
|
||||
|
||||
def validate_provider_credentials(self, credentials: dict) -> None:
|
||||
pass
|
||||
@ -0,0 +1,36 @@
|
||||
provider: huggingface_tei
|
||||
label:
|
||||
en_US: Text Embedding Inference
|
||||
description:
|
||||
en_US: A blazing fast inference solution for text embeddings models.
|
||||
zh_Hans: 用于文本嵌入模型的超快速推理解决方案。
|
||||
background: "#FFF8DC"
|
||||
help:
|
||||
title:
|
||||
en_US: How to deploy Text Embedding Inference
|
||||
zh_Hans: 如何部署 Text Embedding Inference
|
||||
url:
|
||||
en_US: https://github.com/huggingface/text-embeddings-inference
|
||||
supported_model_types:
|
||||
- text-embedding
|
||||
- rerank
|
||||
configurate_methods:
|
||||
- customizable-model
|
||||
model_credential_schema:
|
||||
model:
|
||||
label:
|
||||
en_US: Model Name
|
||||
zh_Hans: 模型名称
|
||||
placeholder:
|
||||
en_US: Enter your model name
|
||||
zh_Hans: 输入模型名称
|
||||
credential_form_schemas:
|
||||
- variable: server_url
|
||||
label:
|
||||
zh_Hans: 服务器URL
|
||||
en_US: Server url
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入Text Embedding Inference的服务器地址,如 http://192.168.1.100:8080
|
||||
en_US: Enter the url of your Text Embedding Inference, e.g. http://192.168.1.100:8080
|
||||
@ -0,0 +1,137 @@
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType
|
||||
from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.rerank_model import RerankModel
|
||||
from core.model_runtime.model_providers.huggingface_tei.tei_helper import TeiHelper
|
||||
|
||||
|
||||
class HuggingfaceTeiRerankModel(RerankModel):
|
||||
"""
|
||||
Model class for Text Embedding Inference rerank model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
query: str,
|
||||
docs: list[str],
|
||||
score_threshold: Optional[float] = None,
|
||||
top_n: Optional[int] = None,
|
||||
user: Optional[str] = None,
|
||||
) -> RerankResult:
|
||||
"""
|
||||
Invoke rerank model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param query: search query
|
||||
:param docs: docs for reranking
|
||||
:param score_threshold: score threshold
|
||||
:param top_n: top n
|
||||
:param user: unique user id
|
||||
:return: rerank result
|
||||
"""
|
||||
if len(docs) == 0:
|
||||
return RerankResult(model=model, docs=[])
|
||||
server_url = credentials['server_url']
|
||||
|
||||
if server_url.endswith('/'):
|
||||
server_url = server_url[:-1]
|
||||
|
||||
try:
|
||||
results = TeiHelper.invoke_rerank(server_url, query, docs)
|
||||
|
||||
rerank_documents = []
|
||||
for result in results:
|
||||
rerank_document = RerankDocument(
|
||||
index=result['index'],
|
||||
text=result['text'],
|
||||
score=result['score'],
|
||||
)
|
||||
if score_threshold is None or result['score'] >= score_threshold:
|
||||
rerank_documents.append(rerank_document)
|
||||
if top_n is not None and len(rerank_documents) >= top_n:
|
||||
break
|
||||
|
||||
return RerankResult(model=model, docs=rerank_documents)
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise InvokeServerUnavailableError(str(e))
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
server_url = credentials['server_url']
|
||||
extra_args = TeiHelper.get_tei_extra_parameter(server_url, model)
|
||||
if extra_args.model_type != 'reranker':
|
||||
raise CredentialsValidateFailedError('Current model is not a rerank model')
|
||||
|
||||
credentials['context_size'] = extra_args.max_input_length
|
||||
|
||||
self.invoke(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
query='Whose kasumi',
|
||||
docs=[
|
||||
'Kasumi is a girl\'s name of Japanese origin meaning "mist".',
|
||||
'Her music is a kawaii bass, a mix of future bass, pop, and kawaii music ',
|
||||
'and she leads a team named PopiParty.',
|
||||
],
|
||||
score_threshold=0.8,
|
||||
)
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the error type thrown to the caller
|
||||
The value is the error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
|
||||
:return: Invoke error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: [InvokeConnectionError],
|
||||
InvokeServerUnavailableError: [InvokeServerUnavailableError],
|
||||
InvokeRateLimitError: [InvokeRateLimitError],
|
||||
InvokeAuthorizationError: [InvokeAuthorizationError],
|
||||
InvokeBadRequestError: [InvokeBadRequestError, KeyError, ValueError],
|
||||
}
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
|
||||
"""
|
||||
used to define customizable model schema
|
||||
"""
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.RERANK,
|
||||
model_properties={
|
||||
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get('context_size', 512)),
|
||||
},
|
||||
parameter_rules=[],
|
||||
)
|
||||
|
||||
return entity
|
||||
@ -0,0 +1,183 @@
|
||||
from threading import Lock
|
||||
from time import time
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
from requests.adapters import HTTPAdapter
|
||||
from requests.exceptions import ConnectionError, MissingSchema, Timeout
|
||||
from requests.sessions import Session
|
||||
from yarl import URL
|
||||
|
||||
|
||||
class TeiModelExtraParameter:
|
||||
model_type: str
|
||||
max_input_length: int
|
||||
max_client_batch_size: int
|
||||
|
||||
def __init__(self, model_type: str, max_input_length: int, max_client_batch_size: Optional[int] = None) -> None:
|
||||
self.model_type = model_type
|
||||
self.max_input_length = max_input_length
|
||||
self.max_client_batch_size = max_client_batch_size
|
||||
|
||||
|
||||
cache = {}
|
||||
cache_lock = Lock()
|
||||
|
||||
|
||||
class TeiHelper:
|
||||
@staticmethod
|
||||
def get_tei_extra_parameter(server_url: str, model_name: str) -> TeiModelExtraParameter:
|
||||
TeiHelper._clean_cache()
|
||||
with cache_lock:
|
||||
if model_name not in cache:
|
||||
cache[model_name] = {
|
||||
'expires': time() + 300,
|
||||
'value': TeiHelper._get_tei_extra_parameter(server_url),
|
||||
}
|
||||
return cache[model_name]['value']
|
||||
|
||||
@staticmethod
|
||||
def _clean_cache() -> None:
|
||||
try:
|
||||
with cache_lock:
|
||||
expired_keys = [model_uid for model_uid, model in cache.items() if model['expires'] < time()]
|
||||
for model_uid in expired_keys:
|
||||
del cache[model_uid]
|
||||
except RuntimeError as e:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def _get_tei_extra_parameter(server_url: str) -> TeiModelExtraParameter:
|
||||
"""
|
||||
get tei model extra parameter like model_type, max_input_length, max_batch_requests
|
||||
"""
|
||||
|
||||
url = str(URL(server_url) / 'info')
|
||||
|
||||
# this method is surrounded by a lock, and default requests may hang forever, so we just set a Adapter with max_retries=3
|
||||
session = Session()
|
||||
session.mount('http://', HTTPAdapter(max_retries=3))
|
||||
session.mount('https://', HTTPAdapter(max_retries=3))
|
||||
|
||||
try:
|
||||
response = session.get(url, timeout=10)
|
||||
except (MissingSchema, ConnectionError, Timeout) as e:
|
||||
raise RuntimeError(f'get tei model extra parameter failed, url: {url}, error: {e}')
|
||||
if response.status_code != 200:
|
||||
raise RuntimeError(
|
||||
f'get tei model extra parameter failed, status code: {response.status_code}, response: {response.text}'
|
||||
)
|
||||
|
||||
response_json = response.json()
|
||||
|
||||
model_type = response_json.get('model_type', {})
|
||||
if len(model_type.keys()) < 1:
|
||||
raise RuntimeError('model_type is empty')
|
||||
model_type = list(model_type.keys())[0]
|
||||
if model_type not in ['embedding', 'reranker']:
|
||||
raise RuntimeError(f'invalid model_type: {model_type}')
|
||||
|
||||
max_input_length = response_json.get('max_input_length', 512)
|
||||
max_client_batch_size = response_json.get('max_client_batch_size', 1)
|
||||
|
||||
return TeiModelExtraParameter(
|
||||
model_type=model_type,
|
||||
max_input_length=max_input_length,
|
||||
max_client_batch_size=max_client_batch_size
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def invoke_tokenize(server_url: str, texts: list[str]) -> list[list[dict]]:
|
||||
"""
|
||||
Invoke tokenize endpoint
|
||||
|
||||
Example response:
|
||||
[
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"text": "<s>",
|
||||
"special": true,
|
||||
"start": null,
|
||||
"stop": null
|
||||
},
|
||||
{
|
||||
"id": 7704,
|
||||
"text": "str",
|
||||
"special": false,
|
||||
"start": 0,
|
||||
"stop": 3
|
||||
},
|
||||
< MORE TOKENS >
|
||||
]
|
||||
]
|
||||
|
||||
:param server_url: server url
|
||||
:param texts: texts to tokenize
|
||||
"""
|
||||
resp = httpx.post(
|
||||
f'{server_url}/tokenize',
|
||||
json={'inputs': texts},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
@staticmethod
|
||||
def invoke_embeddings(server_url: str, texts: list[str]) -> dict:
|
||||
"""
|
||||
Invoke embeddings endpoint
|
||||
|
||||
Example response:
|
||||
{
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"object": "embedding",
|
||||
"embedding": [...],
|
||||
"index": 0
|
||||
}
|
||||
],
|
||||
"model": "MODEL_NAME",
|
||||
"usage": {
|
||||
"prompt_tokens": 3,
|
||||
"total_tokens": 3
|
||||
}
|
||||
}
|
||||
|
||||
:param server_url: server url
|
||||
:param texts: texts to embed
|
||||
"""
|
||||
# Use OpenAI compatible API here, which has usage tracking
|
||||
resp = httpx.post(
|
||||
f'{server_url}/v1/embeddings',
|
||||
json={'input': texts},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
@staticmethod
|
||||
def invoke_rerank(server_url: str, query: str, docs: list[str]) -> list[dict]:
|
||||
"""
|
||||
Invoke rerank endpoint
|
||||
|
||||
Example response:
|
||||
[
|
||||
{
|
||||
"index": 0,
|
||||
"text": "Deep Learning is ...",
|
||||
"score": 0.9950755
|
||||
}
|
||||
]
|
||||
|
||||
:param server_url: server url
|
||||
:param texts: texts to rerank
|
||||
:param candidates: candidates to rerank
|
||||
"""
|
||||
params = {'query': query, 'texts': docs, 'return_text': True}
|
||||
|
||||
response = httpx.post(
|
||||
server_url + '/rerank',
|
||||
json=params,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
@ -0,0 +1,204 @@
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.huggingface_tei.tei_helper import TeiHelper
|
||||
|
||||
|
||||
class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Text Embedding Inference text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
credentials should be like:
|
||||
{
|
||||
'server_url': 'server url',
|
||||
'model_uid': 'model uid',
|
||||
}
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:return: embeddings result
|
||||
"""
|
||||
server_url = credentials['server_url']
|
||||
|
||||
if server_url.endswith('/'):
|
||||
server_url = server_url[:-1]
|
||||
|
||||
|
||||
# get model properties
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
max_chunks = self._get_max_chunks(model, credentials)
|
||||
|
||||
inputs = []
|
||||
indices = []
|
||||
used_tokens = 0
|
||||
|
||||
# get tokenized results from TEI
|
||||
batched_tokenize_result = TeiHelper.invoke_tokenize(server_url, texts)
|
||||
|
||||
for i, (text, tokenize_result) in enumerate(zip(texts, batched_tokenize_result)):
|
||||
|
||||
# Check if the number of tokens is larger than the context size
|
||||
num_tokens = len(tokenize_result)
|
||||
|
||||
if num_tokens >= context_size:
|
||||
# Find the best cutoff point
|
||||
pre_special_token_count = 0
|
||||
for token in tokenize_result:
|
||||
if token['special']:
|
||||
pre_special_token_count += 1
|
||||
else:
|
||||
break
|
||||
rest_special_token_count = len([token for token in tokenize_result if token['special']]) - pre_special_token_count
|
||||
|
||||
# Calculate the cutoff point, leave 20 extra space to avoid exceeding the limit
|
||||
token_cutoff = context_size - rest_special_token_count - 20
|
||||
|
||||
# Find the cutoff index
|
||||
cutpoint_token = tokenize_result[token_cutoff]
|
||||
cutoff = cutpoint_token['start']
|
||||
|
||||
inputs.append(text[0: cutoff])
|
||||
else:
|
||||
inputs.append(text)
|
||||
indices += [i]
|
||||
|
||||
batched_embeddings = []
|
||||
_iter = range(0, len(inputs), max_chunks)
|
||||
|
||||
try:
|
||||
used_tokens = 0
|
||||
for i in _iter:
|
||||
iter_texts = inputs[i : i + max_chunks]
|
||||
results = TeiHelper.invoke_embeddings(server_url, iter_texts)
|
||||
embeddings = results['data']
|
||||
embeddings = [embedding['embedding'] for embedding in embeddings]
|
||||
batched_embeddings.extend(embeddings)
|
||||
|
||||
usage = results['usage']
|
||||
used_tokens += usage['total_tokens']
|
||||
except RuntimeError as e:
|
||||
raise InvokeServerUnavailableError(str(e))
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
|
||||
|
||||
result = TextEmbeddingResult(model=model, embeddings=batched_embeddings, usage=usage)
|
||||
|
||||
return result
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
num_tokens = 0
|
||||
server_url = credentials['server_url']
|
||||
|
||||
if server_url.endswith('/'):
|
||||
server_url = server_url[:-1]
|
||||
|
||||
batch_tokens = TeiHelper.invoke_tokenize(server_url, texts)
|
||||
num_tokens = sum(len(tokens) for tokens in batch_tokens)
|
||||
return num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
server_url = credentials['server_url']
|
||||
extra_args = TeiHelper.get_tei_extra_parameter(server_url, model)
|
||||
print(extra_args)
|
||||
if extra_args.model_type != 'embedding':
|
||||
raise CredentialsValidateFailedError('Current model is not a embedding model')
|
||||
|
||||
credentials['context_size'] = extra_args.max_input_length
|
||||
credentials['max_chunks'] = extra_args.max_client_batch_size
|
||||
self._invoke(model=model, credentials=credentials, texts=['ping'])
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
return {
|
||||
InvokeConnectionError: [InvokeConnectionError],
|
||||
InvokeServerUnavailableError: [InvokeServerUnavailableError],
|
||||
InvokeRateLimitError: [InvokeRateLimitError],
|
||||
InvokeAuthorizationError: [InvokeAuthorizationError],
|
||||
InvokeBadRequestError: [KeyError],
|
||||
}
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
|
||||
"""
|
||||
used to define customizable model schema
|
||||
"""
|
||||
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
model_properties={
|
||||
ModelPropertyKey.MAX_CHUNKS: int(credentials.get('max_chunks', 1)),
|
||||
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get('context_size', 512)),
|
||||
},
|
||||
parameter_rules=[],
|
||||
)
|
||||
|
||||
return entity
|
||||
@ -21,6 +21,16 @@ parameter_rules:
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.03'
|
||||
output: '0.10'
|
||||
|
||||
@ -21,6 +21,16 @@ parameter_rules:
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 256000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.015'
|
||||
output: '0.06'
|
||||
|
||||
@ -21,6 +21,16 @@ parameter_rules:
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.0045'
|
||||
output: '0.0005'
|
||||
|
||||
@ -36,7 +36,8 @@ class HunyuanLargeLanguageModel(LargeLanguageModel):
|
||||
|
||||
custom_parameters = {
|
||||
'Temperature': model_parameters.get('temperature', 0.0),
|
||||
'TopP': model_parameters.get('top_p', 1.0)
|
||||
'TopP': model_parameters.get('top_p', 1.0),
|
||||
'EnableEnhancement': model_parameters.get('enable_enhance', True)
|
||||
}
|
||||
|
||||
params = {
|
||||
@ -213,7 +214,7 @@ class HunyuanLargeLanguageModel(LargeLanguageModel):
|
||||
def _handle_chat_response(self, credentials, model, prompt_messages, response):
|
||||
usage = self._calc_response_usage(model, credentials, response.Usage.PromptTokens,
|
||||
response.Usage.CompletionTokens)
|
||||
assistant_prompt_message = PromptMessage(role="assistant")
|
||||
assistant_prompt_message = AssistantPromptMessage()
|
||||
assistant_prompt_message.content = response.Choices[0].Message.Content
|
||||
result = LLMResult(
|
||||
model=model,
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
model: jina-reranker-v2-base-multilingual
|
||||
model_type: rerank
|
||||
model_properties:
|
||||
context_size: 8192
|
||||
context_size: 1024
|
||||
|
||||
@ -2,10 +2,16 @@
|
||||
- google/codegemma-7b
|
||||
- google/recurrentgemma-2b
|
||||
- meta/llama2-70b
|
||||
- meta/llama-3.1-8b-instruct
|
||||
- meta/llama-3.1-70b-instruct
|
||||
- meta/llama-3.1-405b-instruct
|
||||
- meta/llama3-8b-instruct
|
||||
- meta/llama3-70b-instruct
|
||||
- mistralai/mistral-large
|
||||
- mistralai/mixtral-8x7b-instruct-v0.1
|
||||
- mistralai/mixtral-8x22b-instruct-v0.1
|
||||
- nvidia/nemotron-4-340b-instruct
|
||||
- microsoft/phi-3-medium-128k-instruct
|
||||
- microsoft/phi-3-mini-128k-instruct
|
||||
- fuyu-8b
|
||||
- snowflake/arctic
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
model: meta/llama-3.1-405b-instruct
|
||||
label:
|
||||
zh_Hans: meta/llama-3.1-405b-instruct
|
||||
en_US: meta/llama-3.1-405b-instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 1
|
||||
default: 0.5
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 4096
|
||||
default: 1024
|
||||
- name: frequency_penalt
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
@ -0,0 +1,36 @@
|
||||
model: meta/llama-3.1-70b-instruct
|
||||
label:
|
||||
zh_Hans: meta/llama-3.1-70b-instruct
|
||||
en_US: meta/llama-3.1-70b-instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 1
|
||||
default: 0.5
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 4096
|
||||
default: 1024
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
@ -0,0 +1,36 @@
|
||||
model: meta/llama-3.1-8b-instruct
|
||||
label:
|
||||
zh_Hans: meta/llama-3.1-8b-instruct
|
||||
en_US: meta/llama-3.1-8b-instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 1
|
||||
default: 0.5
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 4096
|
||||
default: 1024
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
@ -31,8 +31,13 @@ class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
|
||||
'meta/llama2-70b': '',
|
||||
'meta/llama3-8b-instruct': '',
|
||||
'meta/llama3-70b-instruct': '',
|
||||
'google/recurrentgemma-2b': ''
|
||||
|
||||
'meta/llama-3.1-8b-instruct': '',
|
||||
'meta/llama-3.1-70b-instruct': '',
|
||||
'meta/llama-3.1-405b-instruct': '',
|
||||
'google/recurrentgemma-2b': '',
|
||||
'nvidia/nemotron-4-340b-instruct': '',
|
||||
'microsoft/phi-3-medium-128k-instruct':'',
|
||||
'microsoft/phi-3-mini-128k-instruct':''
|
||||
}
|
||||
|
||||
def _invoke(self, model: str, credentials: dict,
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
model: nvidia/nemotron-4-340b-instruct
|
||||
label:
|
||||
zh_Hans: nvidia/nemotron-4-340b-instruct
|
||||
en_US: nvidia/nemotron-4-340b-instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 1
|
||||
default: 0.5
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 4096
|
||||
default: 1024
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
@ -0,0 +1,36 @@
|
||||
model: microsoft/phi-3-medium-128k-instruct
|
||||
label:
|
||||
zh_Hans: microsoft/phi-3-medium-128k-instruct
|
||||
en_US: microsoft/phi-3-medium-128k-instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 1
|
||||
default: 0.5
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 4096
|
||||
default: 1024
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
@ -0,0 +1,36 @@
|
||||
model: microsoft/phi-3-mini-128k-instruct
|
||||
label:
|
||||
zh_Hans: microsoft/phi-3-mini-128k-instruct
|
||||
en_US: microsoft/phi-3-mini-128k-instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 1
|
||||
default: 0.5
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 4096
|
||||
default: 1024
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
@ -59,7 +59,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
|
||||
if not endpoint_url.endswith('/'):
|
||||
endpoint_url += '/'
|
||||
|
||||
endpoint_url = urljoin(endpoint_url, 'api/embeddings')
|
||||
endpoint_url = urljoin(endpoint_url, 'api/embed')
|
||||
|
||||
# get model properties
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
@ -72,38 +72,34 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
|
||||
num_tokens = self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
if num_tokens >= context_size:
|
||||
cutoff = int(len(text) * (np.floor(context_size / num_tokens)))
|
||||
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
|
||||
# if num tokens is larger than context length, only use the start
|
||||
inputs.append(text[0: cutoff])
|
||||
else:
|
||||
inputs.append(text)
|
||||
|
||||
batched_embeddings = []
|
||||
# Prepare the payload for the request
|
||||
payload = {
|
||||
'input': inputs,
|
||||
'model': model,
|
||||
}
|
||||
|
||||
for text in inputs:
|
||||
# Prepare the payload for the request
|
||||
payload = {
|
||||
'prompt': text,
|
||||
'model': model,
|
||||
}
|
||||
# Make the request to the OpenAI API
|
||||
response = requests.post(
|
||||
endpoint_url,
|
||||
headers=headers,
|
||||
data=json.dumps(payload),
|
||||
timeout=(10, 300)
|
||||
)
|
||||
|
||||
# Make the request to the OpenAI API
|
||||
response = requests.post(
|
||||
endpoint_url,
|
||||
headers=headers,
|
||||
data=json.dumps(payload),
|
||||
timeout=(10, 300)
|
||||
)
|
||||
response.raise_for_status() # Raise an exception for HTTP errors
|
||||
response_data = response.json()
|
||||
|
||||
response.raise_for_status() # Raise an exception for HTTP errors
|
||||
response_data = response.json()
|
||||
# Extract embeddings and used tokens from the response
|
||||
embeddings = response_data['embeddings']
|
||||
embedding_used_tokens = self.get_num_tokens(model, credentials, inputs)
|
||||
|
||||
# Extract embeddings and used tokens from the response
|
||||
embeddings = response_data['embedding']
|
||||
embedding_used_tokens = self.get_num_tokens(model, credentials, [text])
|
||||
|
||||
used_tokens += embedding_used_tokens
|
||||
batched_embeddings.append(embeddings)
|
||||
used_tokens += embedding_used_tokens
|
||||
|
||||
# calc usage
|
||||
usage = self._calc_response_usage(
|
||||
@ -113,7 +109,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
|
||||
)
|
||||
|
||||
return TextEmbeddingResult(
|
||||
embeddings=batched_embeddings,
|
||||
embeddings=embeddings,
|
||||
usage=usage,
|
||||
model=model
|
||||
)
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
- gpt-4
|
||||
- gpt-4o
|
||||
- gpt-4o-2024-05-13
|
||||
- gpt-4o-2024-08-06
|
||||
- gpt-4o-mini
|
||||
- gpt-4o-mini-2024-07-18
|
||||
- gpt-4-turbo
|
||||
|
||||
@ -37,7 +37,7 @@ parameter_rules:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.001'
|
||||
output: '0.002'
|
||||
input: '0.0005'
|
||||
output: '0.0015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
|
||||
@ -0,0 +1,44 @@
|
||||
model: gpt-4o-2024-08-06
|
||||
label:
|
||||
zh_Hans: gpt-4o-2024-08-06
|
||||
en_US: gpt-4o-2024-08-06
|
||||
model_type: llm
|
||||
features:
|
||||
- multi-tool-call
|
||||
- agent-thought
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 16384
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: response_format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '2.50'
|
||||
output: '10.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -1,11 +1,7 @@
|
||||
import concurrent.futures
|
||||
from functools import reduce
|
||||
from io import BytesIO
|
||||
from typing import Optional
|
||||
|
||||
from flask import Response
|
||||
from openai import OpenAI
|
||||
from pydub import AudioSegment
|
||||
|
||||
from core.model_runtime.errors.invoke import InvokeBadRequestError
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
@ -32,7 +28,8 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
|
||||
:return: text translated to audio file
|
||||
"""
|
||||
|
||||
if not voice or voice not in [d['value'] for d in self.get_tts_model_voices(model=model, credentials=credentials)]:
|
||||
if not voice or voice not in [d['value'] for d in
|
||||
self.get_tts_model_voices(model=model, credentials=credentials)]:
|
||||
voice = self._get_model_default_voice(model, credentials)
|
||||
# if streaming:
|
||||
return self._tts_invoke_streaming(model=model,
|
||||
@ -50,7 +47,7 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
|
||||
:return: text translated to audio file
|
||||
"""
|
||||
try:
|
||||
self._tts_invoke(
|
||||
self._tts_invoke_streaming(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
content_text='Hello Dify!',
|
||||
@ -59,46 +56,6 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def _tts_invoke(self, model: str, credentials: dict, content_text: str, voice: str) -> Response:
|
||||
"""
|
||||
_tts_invoke text2speech model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param content_text: text content to be translated
|
||||
:param voice: model timbre
|
||||
:return: text translated to audio file
|
||||
"""
|
||||
audio_type = self._get_model_audio_type(model, credentials)
|
||||
word_limit = self._get_model_word_limit(model, credentials)
|
||||
max_workers = self._get_model_workers_limit(model, credentials)
|
||||
try:
|
||||
sentences = list(self._split_text_into_sentences(org_text=content_text, max_length=word_limit))
|
||||
audio_bytes_list = []
|
||||
|
||||
# Create a thread pool and map the function to the list of sentences
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
futures = [executor.submit(self._process_sentence, sentence=sentence, model=model, voice=voice,
|
||||
credentials=credentials) for sentence in sentences]
|
||||
for future in futures:
|
||||
try:
|
||||
if future.result():
|
||||
audio_bytes_list.append(future.result())
|
||||
except Exception as ex:
|
||||
raise InvokeBadRequestError(str(ex))
|
||||
|
||||
if len(audio_bytes_list) > 0:
|
||||
audio_segments = [AudioSegment.from_file(BytesIO(audio_bytes), format=audio_type) for audio_bytes in
|
||||
audio_bytes_list if audio_bytes]
|
||||
combined_segment = reduce(lambda x, y: x + y, audio_segments)
|
||||
buffer: BytesIO = BytesIO()
|
||||
combined_segment.export(buffer, format=audio_type)
|
||||
buffer.seek(0)
|
||||
return Response(buffer.read(), status=200, mimetype=f"audio/{audio_type}")
|
||||
except Exception as ex:
|
||||
raise InvokeBadRequestError(str(ex))
|
||||
|
||||
|
||||
def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str,
|
||||
voice: str) -> any:
|
||||
"""
|
||||
@ -114,7 +71,8 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
|
||||
# doc: https://platform.openai.com/docs/guides/text-to-speech
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
client = OpenAI(**credentials_kwargs)
|
||||
model_support_voice = [x.get("value") for x in self.get_tts_model_voices(model=model, credentials=credentials)]
|
||||
model_support_voice = [x.get("value") for x in
|
||||
self.get_tts_model_voices(model=model, credentials=credentials)]
|
||||
if not voice or voice not in model_support_voice:
|
||||
voice = self._get_model_default_voice(model, credentials)
|
||||
word_limit = self._get_model_word_limit(model, credentials)
|
||||
|
||||
@ -7,6 +7,7 @@ description:
|
||||
supported_model_types:
|
||||
- llm
|
||||
- text-embedding
|
||||
- speech2text
|
||||
configurate_methods:
|
||||
- customizable-model
|
||||
model_credential_schema:
|
||||
@ -61,6 +62,22 @@ model_credential_schema:
|
||||
zh_Hans: 模型上下文长度
|
||||
en_US: Model context size
|
||||
required: true
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
type: text-input
|
||||
default: '4096'
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的模型上下文长度
|
||||
en_US: Enter your Model context size
|
||||
- variable: context_size
|
||||
label:
|
||||
zh_Hans: 模型上下文长度
|
||||
en_US: Model context size
|
||||
required: true
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: text-embedding
|
||||
type: text-input
|
||||
default: '4096'
|
||||
placeholder:
|
||||
|
||||
@ -0,0 +1,63 @@
|
||||
from typing import IO, Optional
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
|
||||
from core.model_runtime.errors.invoke import InvokeBadRequestError
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel
|
||||
from core.model_runtime.model_providers.openai_api_compatible._common import _CommonOAI_API_Compat
|
||||
|
||||
|
||||
class OAICompatSpeech2TextModel(_CommonOAI_API_Compat, Speech2TextModel):
|
||||
"""
|
||||
Model class for OpenAI Compatible Speech to text model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Invoke speech2text model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param file: audio file
|
||||
:param user: unique user id
|
||||
:return: text for given audio file
|
||||
"""
|
||||
headers = {}
|
||||
|
||||
api_key = credentials.get("api_key")
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
endpoint_url = credentials.get("endpoint_url")
|
||||
if not endpoint_url.endswith("/"):
|
||||
endpoint_url += "/"
|
||||
endpoint_url = urljoin(endpoint_url, "audio/transcriptions")
|
||||
|
||||
payload = {"model": model}
|
||||
files = [("file", file)]
|
||||
response = requests.post(endpoint_url, headers=headers, data=payload, files=files)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise InvokeBadRequestError(response.text)
|
||||
response_data = response.json()
|
||||
return response_data["text"]
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
audio_file_path = self._get_demo_file_path()
|
||||
|
||||
with open(audio_file_path, "rb") as audio_file:
|
||||
self._invoke(model, credentials, audio_file)
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
@ -76,7 +76,7 @@ class OAICompatEmbeddingModel(_CommonOAI_API_Compat, TextEmbeddingModel):
|
||||
num_tokens = self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
if num_tokens >= context_size:
|
||||
cutoff = int(len(text) * (np.floor(context_size / num_tokens)))
|
||||
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
|
||||
# if num tokens is larger than context length, only use the start
|
||||
inputs.append(text[0: cutoff])
|
||||
else:
|
||||
|
||||
@ -4,7 +4,7 @@ label:
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
@ -15,9 +15,9 @@ parameter_rules:
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 128000
|
||||
max: 131072
|
||||
pricing:
|
||||
input: "3"
|
||||
output: "3"
|
||||
input: "2.7"
|
||||
output: "2.7"
|
||||
unit: "0.000001"
|
||||
currency: USD
|
||||
|
||||
@ -4,7 +4,7 @@ label:
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
@ -15,9 +15,9 @@ parameter_rules:
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 128000
|
||||
max: 131072
|
||||
pricing:
|
||||
input: "0.9"
|
||||
output: "0.9"
|
||||
input: "0.52"
|
||||
output: "0.75"
|
||||
unit: "0.000001"
|
||||
currency: USD
|
||||
|
||||
@ -4,7 +4,7 @@ label:
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
@ -15,9 +15,9 @@ parameter_rules:
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 128000
|
||||
max: 131072
|
||||
pricing:
|
||||
input: "0.2"
|
||||
output: "0.2"
|
||||
input: "0.06"
|
||||
output: "0.06"
|
||||
unit: "0.000001"
|
||||
currency: USD
|
||||
|
||||
@ -79,7 +79,7 @@ class OAICompatEmbeddingModel(_CommonOAI_API_Compat, TextEmbeddingModel):
|
||||
num_tokens = self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
if num_tokens >= context_size:
|
||||
cutoff = int(len(text) * (np.floor(context_size / num_tokens)))
|
||||
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
|
||||
# if num tokens is larger than context length, only use the start
|
||||
inputs.append(text[0: cutoff])
|
||||
else:
|
||||
|
||||
@ -1,8 +1,20 @@
|
||||
- deepseek-v2-chat
|
||||
- qwen2-72b-instruct
|
||||
- qwen2-57b-a14b-instruct
|
||||
- qwen2-7b-instruct
|
||||
- yi-1.5-34b-chat
|
||||
- yi-1.5-9b-chat
|
||||
- yi-1.5-6b-chat
|
||||
- glm4-9B-chat
|
||||
- Qwen/Qwen2-72B-Instruct
|
||||
- Qwen/Qwen2-57B-A14B-Instruct
|
||||
- Qwen/Qwen2-7B-Instruct
|
||||
- Qwen/Qwen2-1.5B-Instruct
|
||||
- 01-ai/Yi-1.5-34B-Chat
|
||||
- 01-ai/Yi-1.5-9B-Chat-16K
|
||||
- 01-ai/Yi-1.5-6B-Chat
|
||||
- THUDM/glm-4-9b-chat
|
||||
- deepseek-ai/DeepSeek-V2-Chat
|
||||
- deepseek-ai/DeepSeek-Coder-V2-Instruct
|
||||
- internlm/internlm2_5-7b-chat
|
||||
- google/gemma-2-27b-it
|
||||
- google/gemma-2-9b-it
|
||||
- meta-llama/Meta-Llama-3-70B-Instruct
|
||||
- meta-llama/Meta-Llama-3-8B-Instruct
|
||||
- meta-llama/Meta-Llama-3.1-405B-Instruct
|
||||
- meta-llama/Meta-Llama-3.1-70B-Instruct
|
||||
- meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||
- mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
- mistralai/Mistral-7B-Instruct-v0.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
model: deepseek-ai/deepseek-v2-chat
|
||||
model: deepseek-ai/DeepSeek-V2-Chat
|
||||
label:
|
||||
en_US: deepseek-ai/DeepSeek-V2-Chat
|
||||
model_type: llm
|
||||
|
||||
@ -0,0 +1,30 @@
|
||||
model: google/gemma-2-27b-it
|
||||
label:
|
||||
en_US: google/gemma-2-27b-it
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8196
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '1.26'
|
||||
output: '1.26'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
@ -0,0 +1,30 @@
|
||||
model: google/gemma-2-9b-it
|
||||
label:
|
||||
en_US: google/gemma-2-9b-it
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8196
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
@ -1,4 +1,4 @@
|
||||
model: zhipuai/glm4-9B-chat
|
||||
model: THUDM/glm-4-9b-chat
|
||||
label:
|
||||
en_US: THUDM/glm-4-9b-chat
|
||||
model_type: llm
|
||||
@ -24,7 +24,7 @@ parameter_rules:
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0.6'
|
||||
output: '0.6'
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
|
||||
@ -0,0 +1,30 @@
|
||||
model: internlm/internlm2_5-7b-chat
|
||||
label:
|
||||
en_US: internlm/internlm2_5-7b-chat
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
@ -0,0 +1,30 @@
|
||||
model: meta-llama/Meta-Llama-3-70B-Instruct
|
||||
label:
|
||||
en_US: meta-llama/Meta-Llama-3-70B-Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '4.13'
|
||||
output: '4.13'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
@ -0,0 +1,30 @@
|
||||
model: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
label:
|
||||
en_US: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
@ -0,0 +1,30 @@
|
||||
model: meta-llama/Meta-Llama-3.1-405B-Instruct
|
||||
label:
|
||||
en_US: meta-llama/Meta-Llama-3.1-405B-Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '21'
|
||||
output: '21'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
@ -0,0 +1,30 @@
|
||||
model: meta-llama/Meta-Llama-3.1-70B-Instruct
|
||||
label:
|
||||
en_US: meta-llama/Meta-Llama-3.1-70B-Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '4.13'
|
||||
output: '4.13'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
@ -0,0 +1,30 @@
|
||||
model: meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||
label:
|
||||
en_US: meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
@ -0,0 +1,30 @@
|
||||
model: mistralai/Mistral-7B-Instruct-v0.2
|
||||
label:
|
||||
en_US: mistralai/Mistral-7B-Instruct-v0.2
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
|
||||
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0'
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user