Compare commits

..

116 Commits

Author SHA1 Message Date
7001a29739 update action config. 2025-04-03 11:09:11 +08:00
1045f6db7a fix: wrong arg parsing 2025-03-26 01:37:45 -04:00
50d36612f0 fix: bad import 2025-03-26 00:34:04 -04:00
e38631db8a feat: add inner mail api 2025-03-25 21:47:30 -04:00
7f63cd52a2 update. 2025-03-24 23:08:54 +08:00
5b357fdbf0 Merge branch 'release/0.15.5' into e-0154 2025-03-24 16:42:11 +08:00
9283a5414f fix: update yarn.lock 2025-03-24 16:41:07 +08:00
8923e64b8d Merge branch 'release/0.15.5' into e-0154 2025-03-24 15:40:32 +08:00
2a2a0e9be9 fix: update DifySandbox image version to 0.2.11 in docker-compose files
Sgned-off-by: -LAN- <laipz8200@outlook.com>
2025-03-24 15:37:55 +08:00
061a765b7d fix: sanitizer svg to avoid xss (#16608) 2025-03-24 14:48:40 +08:00
acd7fead87 feat: remove Vanna provider and associated assets from the project
Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-03-24 14:34:03 +08:00
64e9d96d84 chore: compatible with es5 (#14268) 2025-03-24 13:17:48 +08:00
d27de3818c Merge branch 'release/0.15.5' into e-0154 2025-03-24 11:46:30 +08:00
bbb080d5b2 fix: update chatbot help doc link on the create app form 2025-03-24 11:28:35 +08:00
8c025abb3b Merge branch 'release/0.15.5' into e-0154 2025-03-24 10:32:56 +08:00
c01d8a70f3 fix: upgrade nextjs to v14.2.25. a security patch for CVE-2025-29927. 2025-03-24 10:32:18 +08:00
98606ca558 fix: upgrade nextjs to v14.2.25 2025-03-24 10:12:21 +08:00
adf3e18ebd Merge tag '0.15.4' into e-0154 2025-03-21 18:29:43 +08:00
1ca15989e0 chore: update version to 0.15.4 in configuration and docker files
Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-03-21 16:39:06 +08:00
8b5a3a9424 Merge branch 'release/0.15.4' of github.com:langgenius/dify into release/0.15.4 2025-03-21 16:31:06 +08:00
42ddcf1edd chore: remove 0.15.3 branch config in the build action
Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-03-21 16:30:33 +08:00
21561df10f fix: xss in render svg (#16437) 2025-03-21 15:24:58 +08:00
4327ec8c4c fix license expireAt field typo (#16428) 2025-03-21 13:43:43 +08:00
bbc5ec8301 fix: expired date calc error 2025-03-21 11:00:07 +08:00
4a51a72c1d Merge branch 'e-0154' into deploy/enterprise 2025-03-20 17:34:52 +08:00
4b6adffa8e fix: hide copyright on forgot-password/install/reset-password page 2025-03-20 17:34:19 +08:00
c7fd73d330 Merge branch 'e-0154' into deploy/enterprise 2025-03-20 10:13:09 +08:00
8a709e445a fix: remove Dify from Service API doc 2025-03-20 10:12:27 +08:00
f02b77b99f fix: Decouple login page logo component to avoid conflict with internal logo 2025-03-20 10:11:26 +08:00
abc625bcce Merge branch 'e-0154' into deploy/enterprise 2025-03-18 22:35:39 -04:00
b6bc1f8bc4 fix: adjust logic for branding toggle 2025-03-18 22:35:27 -04:00
b8f9037cd3 Merge branch 'e-0154' into deploy/enterprise 2025-03-18 16:13:14 +08:00
02606ba3c7 fix: cannot update webapp copyright info 2025-03-18 16:12:52 +08:00
79311d3fb5 Merge branch 'e-0154' into deploy/enterprise 2025-03-18 03:53:18 -04:00
31086a1fbf feat: add webapp copyright feature 2025-03-18 03:53:07 -04:00
6ae5d052e5 Merge branch 'e-0154' into deploy/enterprise 2025-03-18 14:55:36 +08:00
c794ecf101 fix: user can edit webapp copyright info only if webapp_copyright_enabled is true 2025-03-18 14:54:34 +08:00
d887aae012 Merge branch 'e-0154' into deploy/enterprise 2025-03-18 01:55:38 -04:00
1b1e96eff7 fix: typo 2025-03-18 01:55:27 -04:00
eecd091063 Merge branch 'e-0154' into deploy/enterprise 2025-03-17 15:34:49 -04:00
d38f2cb380 fix: change subject title 2025-03-17 15:34:28 -04:00
56aaee5558 fix: wrong branding title 2025-03-17 15:01:31 -04:00
d72b4752c9 fix: wrong title location 2025-03-17 15:00:04 -04:00
ea769c6483 Merge branch 'e-0154' into deploy/enterprise 2025-03-17 14:24:00 -04:00
ec194fa3d4 fix: invalid email template variables 2025-03-17 14:23:46 -04:00
b877039859 Merge branch 'e-0154' into deploy/enterprise 2025-03-17 10:37:20 +08:00
54634f26d2 fix: show copyright in webapp 2025-03-17 10:36:51 +08:00
3bef91a2cd fix: show loading icon when fetching system features 2025-03-15 12:01:30 +08:00
7da45ba589 fix: show loading icon when fetching system features 2025-03-15 12:00:22 +08:00
e0232c67cc fix: update document title and favicon in client side 2025-03-15 12:00:22 +08:00
1dc4a229d4 Merge branch 'e-0154' into deploy/enterprise 2025-03-14 16:37:02 -04:00
0e0bada1f3 fix: missing json keys 2025-03-14 16:36:49 -04:00
5366a814f9 fix: update json keys 2025-03-14 16:35:05 -04:00
f1240a22db fix: remove default value 2025-03-14 13:26:44 -04:00
66f35c2b7e Merge branch 'e-0154' into deploy/enterprise 2025-03-15 01:25:15 +08:00
766ee48531 fix: update document title and favicon in client side 2025-03-15 01:25:04 +08:00
083045f45c Merge branch 'e-0154' into deploy/enterprise 2025-03-14 20:49:17 +08:00
fe237802c9 fix: update Dify text 2025-03-14 19:10:03 +08:00
00b923651f fix: update document title with system features config 2025-03-14 19:10:03 +08:00
24fce3cc64 chore: use global zustand manage systemFeatures and share between all pages 2025-03-14 19:10:03 +08:00
8ba969f67d fix: add ci workflow 2025-03-13 17:15:11 -04:00
6844d59371 fix: add default title name 2025-03-13 17:07:45 -04:00
fe5529db85 Trigger workflow 2025-03-13 17:04:13 -04:00
d89034d913 feat: add application title 2025-03-13 15:49:04 -04:00
360fbeb108 fix: update email template, add application_title 2025-03-13 17:28:49 +08:00
e7c2fa1cfa fix: remove system feature is_branding 2025-03-12 10:48:58 -04:00
735f09d977 fix: build failed due to getPrevChatList no longer exists (#13383) 2025-03-12 10:22:33 +08:00
f83a5e3e49 fix: wrong type 2025-03-11 07:46:48 -04:00
01a8d4efcc fix: remove dify from invite template 2025-03-11 19:25:30 +08:00
fdb1e649d4 feat: add branding support 2025-03-11 07:14:52 -04:00
0856792a57 fix: add email templates that are no brands or logo 2025-03-11 16:03:15 +08:00
0e33a3aa5f chore: add ci 2025-02-19 14:34:36 +08:00
d3895bcd6b revert 2025-02-19 14:32:28 +08:00
eeb390650b fix: build failed 2025-02-19 14:32:28 +08:00
ca19bd31d4 chore(*): Bump version to 0.15.3 (#13308)
Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-02-07 15:20:05 +08:00
413dfd5628 feat: add completion mode and context size options for LLM configuration (#13325)
Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-02-07 15:08:53 +08:00
f9515901cc fix: Azure AI Foundry model cannot be used in the workflow (#13323)
Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-02-07 14:52:57 +08:00
3f42fabff8 chore:improve thinking display for llm from xinference and ollama pro… (#13318) 2025-02-07 14:29:29 +08:00
1caa578771 chore(*): Update style of thinking (#13319)
Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-02-07 14:06:35 +08:00
b7c11c1818 Fix the problem of Workflow terminates after parallel tasks execution, merge node not triggered (#12498)
Co-authored-by: Novice Lee <novicelee@NoviPro.local>
2025-02-07 13:56:08 +08:00
3eb3db0663 chore: refactor the OpenAICompatible and improve thinking display (#13299) 2025-02-07 13:28:46 +08:00
be46f32056 fix(credits): require model name equals (#13314)
Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-02-07 13:28:17 +08:00
6e5c915f96 feat(model): add deepseek-r1 for openrouter (#13312) 2025-02-07 12:39:13 +08:00
04d13a8116 feat(credits): Allow to configure model-credit mapping (#13274)
Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-02-07 11:01:31 +08:00
e638ede3f2 Update README_TR.md (#13294) 2025-02-07 09:11:39 +08:00
2348abe4bf feat: added a couple of models not defined in vertex ai, that were already … (#13296) 2025-02-07 09:11:25 +08:00
f7e7a399d9 feat:add think tag display for xinference deepseek r1 (#13291) 2025-02-06 22:04:58 +08:00
ba91f34636 fix: incorrect transferMethod assignment for remote file (#13286) 2025-02-06 19:32:21 +08:00
16865d43a8 feat: add deepseek models for volcengine provider (#13283)
Co-authored-by: zhaoqingyu.1075 <zhaoqingyu.1075@bytedance.com>
2025-02-06 18:20:03 +08:00
0d13aee15c feat:add deepseek r1 think display for ollama provider (#13272) 2025-02-06 15:32:10 +08:00
49b4144ffd fix: add dataset edit permissions (#13223) 2025-02-06 14:26:16 +08:00
186e2d972e chore(deps): bump katex from 0.16.10 to 0.16.21 in /web (#13270)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-06 13:27:07 +08:00
40dd63ecef Upgrade oracle models (#13174)
Co-authored-by: engchina <atjapan2015@gmail.com>
2025-02-06 13:24:27 +08:00
6d66d6da15 feat(model_providers): Support deepseek-r1 for Nvidia Catalog (#13269)
Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-02-06 13:03:19 +08:00
03ec3513f3 Fix bug large data no render (#12683)
Co-authored-by: ex_wenyan.wei <ex_wenyan.wei@tcl.com>
2025-02-06 13:00:04 +08:00
87763fc234 feat(model_providers): Support deepseek for Azure AI Foundry (#13267)
Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-02-06 12:45:48 +08:00
f6c44cae2e feat(model): add gemini-2.0 model (#13266) 2025-02-06 12:28:59 +08:00
xhe
da2ee04fce fix: correct linewrap think display in generic openai api (#13260)
Signed-off-by: xhe <xw897002528@gmail.com>
2025-02-06 10:53:08 +08:00
7673c36af3 feat(model): add gemini-2.0-flash-thinking-exp-01-21 (#13230) 2025-02-06 10:01:00 +08:00
9457b2af2f feat: added models :gemini 2.0 flash 001 and gemini 2.0 pro exp 02-05 (#13247) 2025-02-06 09:58:39 +08:00
7203991032 feat: add parameter "reasoning_effort" and Openai o3-mini (#13243) 2025-02-06 09:29:48 +08:00
xhe
5a685f7156 feat: add think display for volcengine and generic openapi (#13234)
Signed-off-by: xhe <xw897002528@gmail.com>
2025-02-06 09:24:40 +08:00
a6a25030ad fix: updated _position.yaml to include the latest model already integ… (#13245) 2025-02-06 09:21:51 +08:00
00458a31d5 feat: added deepseek r1 and v3 to siliconflow (#13238) 2025-02-05 21:59:18 +08:00
c6ddf6d6cc feat(model_providers): Add Groq DeepSeek-R1-Distill-Llama-70b (#13229)
Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-02-05 19:15:29 +08:00
34b21b3065 feat: Add o3-mini and o3-mini-2025-01-31 model variants (#13129)
Co-authored-by: crazywoola <427733928@qq.com>
2025-02-05 17:04:45 +08:00
8fbb355cd2 chore: squash system dependencies installation steps (#13206) 2025-02-05 16:42:53 +08:00
e8b3b7e578 Fix new variables in the conversation opener would override prompt_variables (#13191) 2025-02-05 16:16:00 +08:00
59ca44f493 chore(model_runtime): Move deepseek ahead in the providers list. (#13197)
Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-02-05 16:08:28 +08:00
9e1457c2c3 fix: mypy checks violation in AzureBlobStorage (#13215) 2025-02-05 15:56:23 +08:00
fac83e14bc Use DefaultAzureCredential for managed identity in azure blob extention (#11559) 2025-02-05 13:43:43 +08:00
a97cec57e4 fix: SSRF proxy file descriptor leak in concurrent requests (#13108) 2025-02-05 13:10:27 +08:00
38c10b47d3 Feat: add linkedin to readme (#13203) 2025-02-05 12:27:58 +08:00
1a2523fd15 feat: bedrock_endpoint_url (#12838) 2025-02-05 12:24:24 +08:00
03243cb422 Modify params for bedrock retrieve generate (#13182) 2025-02-05 12:17:42 +08:00
2ad7ee0344 chore: add tests for build docker image when dockerfile changed (#10732) 2025-02-05 11:40:22 +08:00
194 changed files with 7468 additions and 6715 deletions

View File

@ -5,6 +5,8 @@ on:
branches:
- "main"
- "deploy/dev"
- "deploy/enterprise"
- "e-0156"
release:
types: [published]

29
.github/workflows/deploy-enterprise.yml vendored Normal file
View File

@ -0,0 +1,29 @@
name: Deploy Enterprise
permissions:
contents: read
on:
workflow_run:
workflows: ["Build and Push API & Web"]
branches:
- "deploy/enterprise"
types:
- completed
jobs:
deploy:
runs-on: ubuntu-latest
if: |
github.event.workflow_run.conclusion == 'success' &&
github.event.workflow_run.head_branch == 'deploy/enterprise'
steps:
- name: Deploy to server
uses: appleboy/ssh-action@v0.1.8
with:
host: ${{ secrets.ENTERPRISE_SSH_HOST }}
username: ${{ secrets.ENTERPRISE_SSH_USER }}
password: ${{ secrets.ENTERPRISE_SSH_PASSWORD }}
script: |
${{ vars.ENTERPRISE_SSH_SCRIPT || secrets.ENTERPRISE_SSH_SCRIPT }}

47
.github/workflows/docker-build.yml vendored Normal file
View File

@ -0,0 +1,47 @@
name: Build docker image
on:
pull_request:
branches:
- "main"
paths:
- api/Dockerfile
- web/Dockerfile
concurrency:
group: docker-build-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
build-docker:
runs-on: ubuntu-latest
strategy:
matrix:
include:
- service_name: "api-amd64"
platform: linux/amd64
context: "api"
- service_name: "api-arm64"
platform: linux/arm64
context: "api"
- service_name: "web-amd64"
platform: linux/amd64
context: "web"
- service_name: "web-arm64"
platform: linux/arm64
context: "web"
steps:
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build Docker Image
uses: docker/build-push-action@v6
with:
push: false
context: "{{defaultContext}}:${{ matrix.context }}"
platforms: ${{ matrix.platform }}
cache-from: type=gha
cache-to: type=gha,mode=max

View File

@ -25,6 +25,9 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@ -21,6 +21,9 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@ -21,6 +21,9 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@ -21,6 +21,9 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="seguir en X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="seguir en LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Descargas de Docker" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@ -21,6 +21,9 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="suivre sur X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="suivre sur LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Tirages Docker" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@ -21,6 +21,9 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="X(Twitter)でフォロー"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="LinkedInでフォロー"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@ -21,6 +21,9 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@ -21,6 +21,9 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@ -25,6 +25,9 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@ -22,6 +22,9 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@ -21,6 +21,9 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="X(Twitter)'da takip et"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="LinkedIn'da takip et"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Çekmeleri" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
@ -62,8 +65,6 @@ Görsel bir arayüz üzerinde güçlü AI iş akışları oluşturun ve test edi
![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3)
Özür dilerim, haklısınız. Daha anlamlı ve akıcı bir çeviri yapmaya çalışayım. İşte güncellenmiş çeviri:
**3. Prompt IDE**:
Komut istemlerini oluşturmak, model performansını karşılaştırmak ve sohbet tabanlı uygulamalara metin-konuşma gibi ek özellikler eklemek için kullanıcı dostu bir arayüz.
@ -150,8 +151,6 @@ Görsel bir arayüz üzerinde güçlü AI iş akışları oluşturun ve test edi
## Dify'ı Kullanma
- **Cloud </br>**
İşte verdiğiniz metnin Türkçe çevirisi, kod bloğu içinde:
-
Herkesin sıfır kurulumla denemesi için bir [Dify Cloud](https://dify.ai) hizmeti sunuyoruz. Bu hizmet, kendi kendine dağıtılan versiyonun tüm yeteneklerini sağlar ve sandbox planında 200 ücretsiz GPT-4 çağrısı içerir.
- **Dify Topluluk Sürümünü Kendi Sunucunuzda Barındırma</br>**
@ -177,8 +176,6 @@ GitHub'da Dify'a yıldız verin ve yeni sürümlerden anında haberdar olun.
>- RAM >= 4GB
</br>
İşte verdiğiniz metnin Türkçe çevirisi, kod bloğu içinde:
Dify sunucusunu başlatmanın en kolay yolu, [docker-compose.yml](docker/docker-compose.yaml) dosyamızı çalıştırmaktır. Kurulum komutunu çalıştırmadan önce, makinenizde [Docker](https://docs.docker.com/get-docker/) ve [Docker Compose](https://docs.docker.com/compose/install/)'un kurulu olduğundan emin olun:
```bash

View File

@ -21,6 +21,9 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="theo dõi trên X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="theo dõi trên LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@ -48,18 +48,18 @@ ENV TZ=UTC
WORKDIR /app/api
RUN apt-get update \
&& apt-get install -y --no-install-recommends curl nodejs libgmp-dev libmpfr-dev libmpc-dev \
# if you located in China, you can use aliyun mirror to speed up
# && echo "deb http://mirrors.aliyun.com/debian testing main" > /etc/apt/sources.list \
&& echo "deb http://deb.debian.org/debian bookworm main" > /etc/apt/sources.list \
&& apt-get update \
# For Security
&& apt-get install -y --no-install-recommends expat libldap-2.5-0 perl libsqlite3-0 zlib1g \
# install a chinese font to support the use of tools like matplotlib
&& apt-get install -y fonts-noto-cjk \
# install libmagic to support the use of python-magic guess MIMETYPE
&& apt-get install -y libmagic1 \
RUN \
apt-get update \
# Install dependencies
&& apt-get install -y --no-install-recommends \
# basic environment
curl nodejs libgmp-dev libmpfr-dev libmpc-dev \
# For Security
expat libldap-2.5-0 perl libsqlite3-0 zlib1g \
# install a chinese font to support the use of tools like matplotlib
fonts-noto-cjk \
# install libmagic to support the use of python-magic guess MIMETYPE
libmagic1 \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
@ -78,7 +78,6 @@ COPY . /app/api/
COPY docker/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ARG COMMIT_SHA
ENV COMMIT_SHA=${COMMIT_SHA}

View File

@ -1,9 +1,40 @@
from typing import Optional
from pydantic import Field, NonNegativeInt
from pydantic import Field, NonNegativeInt, computed_field
from pydantic_settings import BaseSettings
class HostedCreditConfig(BaseSettings):
HOSTED_MODEL_CREDIT_CONFIG: str = Field(
description="Model credit configuration in format 'model:credits,model:credits', e.g., 'gpt-4:20,gpt-4o:10'",
default="",
)
def get_model_credits(self, model_name: str) -> int:
"""
Get credit value for a specific model name.
Returns 1 if model is not found in configuration (default credit).
:param model_name: The name of the model to search for
:return: The credit value for the model
"""
if not self.HOSTED_MODEL_CREDIT_CONFIG:
return 1
try:
credit_map = dict(
item.strip().split(":", 1) for item in self.HOSTED_MODEL_CREDIT_CONFIG.split(",") if ":" in item
)
# Search for matching model pattern
for pattern, credit in credit_map.items():
if pattern.strip() == model_name:
return int(credit)
return 1 # Default quota if no match found
except (ValueError, AttributeError):
return 1 # Return default quota if parsing fails
class HostedOpenAiConfig(BaseSettings):
"""
Configuration for hosted OpenAI service
@ -202,5 +233,7 @@ class HostedServiceConfig(
HostedZhipuAIConfig,
# moderation
HostedModerationConfig,
# credit config
HostedCreditConfig,
):
pass

View File

@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):
CURRENT_VERSION: str = Field(
description="Dify version",
default="0.15.2",
default="0.15.4",
)
COMMIT_SHA: str = Field(

View File

@ -39,6 +39,17 @@ def only_edition_cloud(view):
return decorated
def only_enterprise_edition(view):
@wraps(view)
def decorated(*args, **kwargs):
if not dify_config.ENTERPRISE_ENABLED:
abort(404)
return view(*args, **kwargs)
return decorated
def only_edition_self_hosted(view):
@wraps(view)
def decorated(*args, **kwargs):

View File

@ -6,3 +6,4 @@ bp = Blueprint("inner_api", __name__, url_prefix="/inner/api")
api = ExternalApi(bp)
from .workspace import workspace
from . import mail

View File

@ -0,0 +1,25 @@
from flask_restful import Resource # type: ignore
from flask_restful import reqparse
from controllers.console.wraps import setup_required
from controllers.inner_api import api
from controllers.inner_api.wraps import inner_api_only
from services.enterprise.mail_service import DifyMail, EnterpriseMailService
class EnterpriseMail(Resource):
@setup_required
@inner_api_only
def post(self):
parser = reqparse.RequestParser()
parser.add_argument("to", type=str, action='append', required=True)
parser.add_argument("subject", type=str, required=True)
parser.add_argument("body", type=str, required=True)
parser.add_argument("substitutions", type=dict, required=False)
args = parser.parse_args()
EnterpriseMailService.send_mail(DifyMail(**args))
return {"message": "success"}, 200
api.add_resource(EnterpriseMail, "/enterprise/mail")

View File

@ -11,15 +11,6 @@ from configs import dify_config
SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES
proxy_mounts = (
{
"http://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTP_URL),
"https://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTPS_URL),
}
if dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL
else None
)
BACKOFF_FACTOR = 0.5
STATUS_FORCELIST = [429, 500, 502, 503, 504]
@ -51,7 +42,11 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
if dify_config.SSRF_PROXY_ALL_URL:
with httpx.Client(proxy=dify_config.SSRF_PROXY_ALL_URL) as client:
response = client.request(method=method, url=url, **kwargs)
elif proxy_mounts:
elif dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
proxy_mounts = {
"http://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTP_URL),
"https://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTPS_URL),
}
with httpx.Client(mounts=proxy_mounts) as client:
response = client.request(method=method, url=url, **kwargs)
else:

View File

@ -1,4 +1,4 @@
from .llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from .llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from .message_entities import (
AssistantPromptMessage,
AudioPromptMessageContent,
@ -23,6 +23,7 @@ __all__ = [
"AudioPromptMessageContent",
"DocumentPromptMessageContent",
"ImagePromptMessageContent",
"LLMMode",
"LLMResult",
"LLMResultChunk",
"LLMResultChunkDelta",

View File

@ -1,5 +1,5 @@
from decimal import Decimal
from enum import Enum
from enum import StrEnum
from typing import Optional
from pydantic import BaseModel
@ -8,7 +8,7 @@ from core.model_runtime.entities.message_entities import AssistantPromptMessage,
from core.model_runtime.entities.model_entities import ModelUsage, PriceInfo
class LLMMode(Enum):
class LLMMode(StrEnum):
"""
Enum class for large language model mode.
"""

View File

@ -30,6 +30,11 @@ from core.model_runtime.model_providers.__base.ai_model import AIModel
logger = logging.getLogger(__name__)
HTML_THINKING_TAG = (
'<details style="color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;" open> '
"<summary> Thinking... </summary>"
)
class LargeLanguageModel(AIModel):
"""
@ -400,6 +405,40 @@ if you are not sure about the structure.
),
)
def _wrap_thinking_by_reasoning_content(self, delta: dict, is_reasoning: bool) -> tuple[str, bool]:
"""
If the reasoning response is from delta.get("reasoning_content"), we wrap
it with HTML details tag.
:param delta: delta dictionary from LLM streaming response
:param is_reasoning: is reasoning
:return: tuple of (processed_content, is_reasoning)
"""
content = delta.get("content") or ""
reasoning_content = delta.get("reasoning_content")
if reasoning_content:
if not is_reasoning:
content = HTML_THINKING_TAG + reasoning_content
is_reasoning = True
else:
content = reasoning_content
elif is_reasoning:
content = "</details>" + content
is_reasoning = False
return content, is_reasoning
def _wrap_thinking_by_tag(self, content: str) -> str:
"""
if the reasoning response is a <think>...</think> block from delta.get("content"),
we replace <think> to <detail>.
:param content: delta.get("content")
:return: processed_content
"""
return content.replace("<think>", HTML_THINKING_TAG).replace("</think>", "</details>")
def _invoke_result_generator(
self,
model: str,

View File

@ -1,4 +1,5 @@
- openai
- deepseek
- anthropic
- azure_openai
- google
@ -32,7 +33,6 @@
- localai
- volcengine_maas
- openai_api_compatible
- deepseek
- hunyuan
- siliconflow
- perfxcloud

View File

@ -51,6 +51,40 @@ model_credential_schema:
show_on:
- variable: __model_type
value: llm
- variable: mode
show_on:
- variable: __model_type
value: llm
label:
en_US: Completion mode
type: select
required: false
default: chat
placeholder:
zh_Hans: 选择对话类型
en_US: Select completion mode
options:
- value: completion
label:
en_US: Completion
zh_Hans: 补全
- value: chat
label:
en_US: Chat
zh_Hans: 对话
- variable: context_size
label:
zh_Hans: 模型上下文长度
en_US: Model context size
required: true
show_on:
- variable: __model_type
value: llm
type: text-input
default: "4096"
placeholder:
zh_Hans: 在此输入您的模型上下文长度
en_US: Enter your Model context size
- variable: jwt_token
required: true
label:

View File

@ -1,9 +1,9 @@
import logging
from collections.abc import Generator
from collections.abc import Generator, Sequence
from typing import Any, Optional, Union
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import StreamingChatCompletionsUpdate
from azure.ai.inference.models import StreamingChatCompletionsUpdate, SystemMessage, UserMessage
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import (
ClientAuthenticationError,
@ -20,7 +20,7 @@ from azure.core.exceptions import (
)
from core.model_runtime.callbacks.base_callback import Callback
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
PromptMessage,
@ -30,6 +30,7 @@ from core.model_runtime.entities.model_entities import (
AIModelEntity,
FetchFrom,
I18nObject,
ModelPropertyKey,
ModelType,
ParameterRule,
ParameterType,
@ -60,10 +61,10 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
self,
model: str,
credentials: dict,
prompt_messages: list[PromptMessage],
prompt_messages: Sequence[PromptMessage],
model_parameters: dict,
tools: Optional[list[PromptMessageTool]] = None,
stop: Optional[list[str]] = None,
tools: Optional[Sequence[PromptMessageTool]] = None,
stop: Optional[Sequence[str]] = None,
stream: bool = True,
user: Optional[str] = None,
) -> Union[LLMResult, Generator]:
@ -82,8 +83,8 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
"""
if not self.client:
endpoint = credentials.get("endpoint")
api_key = credentials.get("api_key")
endpoint = str(credentials.get("endpoint"))
api_key = str(credentials.get("api_key"))
self.client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
messages = [{"role": msg.role.value, "content": msg.content} for msg in prompt_messages]
@ -94,6 +95,7 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
"temperature": model_parameters.get("temperature", 0),
"top_p": model_parameters.get("top_p", 1),
"stream": stream,
"model": model,
}
if stop:
@ -255,10 +257,16 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
:return:
"""
try:
endpoint = credentials.get("endpoint")
api_key = credentials.get("api_key")
endpoint = str(credentials.get("endpoint"))
api_key = str(credentials.get("api_key"))
client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
client.get_model_info()
client.complete(
messages=[
SystemMessage(content="I say 'ping', you say 'pong'"),
UserMessage(content="ping"),
],
model=model,
)
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
@ -327,7 +335,10 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_type=ModelType.LLM,
features=[],
model_properties={},
model_properties={
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "4096")),
ModelPropertyKey.MODE: credentials.get("mode", LLMMode.CHAT),
},
parameter_rules=rules,
)

View File

@ -138,6 +138,18 @@ model_credential_schema:
show_on:
- variable: __model_type
value: llm
- label:
en_US: o3-mini
value: o3-mini
show_on:
- variable: __model_type
value: llm
- label:
en_US: o3-mini-2025-01-31
value: o3-mini-2025-01-31
show_on:
- variable: __model_type
value: llm
- label:
en_US: o1-preview
value: o1-preview

View File

@ -123,6 +123,15 @@ provider_credential_schema:
en_US: AWS GovCloud (US-West)
zh_Hans: AWS GovCloud (US-West)
ja_JP: AWS GovCloud (米国西部)
- variable: bedrock_endpoint_url
label:
zh_Hans: Bedrock Endpoint URL
en_US: Bedrock Endpoint URL
type: text-input
required: false
placeholder:
zh_Hans: 在此输入您的 Bedrock Endpoint URL, 如https://123456.cloudfront.net
en_US: Enter your Bedrock Endpoint URL, e.g. https://123456.cloudfront.net
- variable: model_for_validation
required: false
label:

View File

@ -13,6 +13,7 @@ def get_bedrock_client(service_name: str, credentials: Mapping[str, str]):
client_config = Config(region_name=region_name)
aws_access_key_id = credentials.get("aws_access_key_id")
aws_secret_access_key = credentials.get("aws_secret_access_key")
bedrock_endpoint_url = credentials.get("bedrock_endpoint_url")
if aws_access_key_id and aws_secret_access_key:
# use aksk to call bedrock
@ -21,6 +22,7 @@ def get_bedrock_client(service_name: str, credentials: Mapping[str, str]):
config=client_config,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
**({"endpoint_url": bedrock_endpoint_url} if bedrock_endpoint_url else {}),
)
else:
# use iam without aksk to call

View File

@ -1,13 +1,10 @@
import json
from collections.abc import Generator
from typing import Optional, Union
import requests
from yarl import URL
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
PromptMessage,
PromptMessageTool,
)
@ -39,208 +36,3 @@ class DeepseekLargeLanguageModel(OAIAPICompatLargeLanguageModel):
credentials["mode"] = LLMMode.CHAT.value
credentials["function_calling_type"] = "tool_call"
credentials["stream_function_calling"] = "support"
def _handle_generate_stream_response(
self, model: str, credentials: dict, response: requests.Response, prompt_messages: list[PromptMessage]
) -> Generator:
"""
Handle llm stream response
:param model: model name
:param credentials: model credentials
:param response: streamed response
:param prompt_messages: prompt messages
:return: llm response chunk generator
"""
full_assistant_content = ""
chunk_index = 0
is_reasoning_started = False # Add flag to track reasoning state
def create_final_llm_result_chunk(
id: Optional[str], index: int, message: AssistantPromptMessage, finish_reason: str, usage: dict
) -> LLMResultChunk:
# calculate num tokens
prompt_tokens = usage and usage.get("prompt_tokens")
if prompt_tokens is None:
prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content)
completion_tokens = usage and usage.get("completion_tokens")
if completion_tokens is None:
completion_tokens = self._num_tokens_from_string(model, full_assistant_content)
# transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
return LLMResultChunk(
id=id,
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage),
)
# delimiter for stream response, need unicode_escape
import codecs
delimiter = credentials.get("stream_mode_delimiter", "\n\n")
delimiter = codecs.decode(delimiter, "unicode_escape")
tools_calls: list[AssistantPromptMessage.ToolCall] = []
def increase_tool_call(new_tool_calls: list[AssistantPromptMessage.ToolCall]):
def get_tool_call(tool_call_id: str):
if not tool_call_id:
return tools_calls[-1]
tool_call = next((tool_call for tool_call in tools_calls if tool_call.id == tool_call_id), None)
if tool_call is None:
tool_call = AssistantPromptMessage.ToolCall(
id=tool_call_id,
type="function",
function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""),
)
tools_calls.append(tool_call)
return tool_call
for new_tool_call in new_tool_calls:
# get tool call
tool_call = get_tool_call(new_tool_call.function.name)
# update tool call
if new_tool_call.id:
tool_call.id = new_tool_call.id
if new_tool_call.type:
tool_call.type = new_tool_call.type
if new_tool_call.function.name:
tool_call.function.name = new_tool_call.function.name
if new_tool_call.function.arguments:
tool_call.function.arguments += new_tool_call.function.arguments
finish_reason = None # The default value of finish_reason is None
message_id, usage = None, None
for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter):
chunk = chunk.strip()
if chunk:
# ignore sse comments
if chunk.startswith(":"):
continue
decoded_chunk = chunk.strip().removeprefix("data:").lstrip()
if decoded_chunk == "[DONE]": # Some provider returns "data: [DONE]"
continue
try:
chunk_json: dict = json.loads(decoded_chunk)
# stream ended
except json.JSONDecodeError as e:
yield create_final_llm_result_chunk(
id=message_id,
index=chunk_index + 1,
message=AssistantPromptMessage(content=""),
finish_reason="Non-JSON encountered.",
usage=usage,
)
break
# handle the error here. for issue #11629
if chunk_json.get("error") and chunk_json.get("choices") is None:
raise ValueError(chunk_json.get("error"))
if chunk_json:
if u := chunk_json.get("usage"):
usage = u
if not chunk_json or len(chunk_json["choices"]) == 0:
continue
choice = chunk_json["choices"][0]
finish_reason = chunk_json["choices"][0].get("finish_reason")
message_id = chunk_json.get("id")
chunk_index += 1
if "delta" in choice:
delta = choice["delta"]
is_reasoning = delta.get("reasoning_content")
delta_content = delta.get("content") or delta.get("reasoning_content")
assistant_message_tool_calls = None
if "tool_calls" in delta and credentials.get("function_calling_type", "no_call") == "tool_call":
assistant_message_tool_calls = delta.get("tool_calls", None)
elif (
"function_call" in delta
and credentials.get("function_calling_type", "no_call") == "function_call"
):
assistant_message_tool_calls = [
{"id": "tool_call_id", "type": "function", "function": delta.get("function_call", {})}
]
# assistant_message_function_call = delta.delta.function_call
# extract tool calls from response
if assistant_message_tool_calls:
tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
increase_tool_call(tool_calls)
if delta_content is None or delta_content == "":
continue
# Add markdown quote markers for reasoning content
if is_reasoning:
if not is_reasoning_started:
delta_content = "> 💭 " + delta_content
is_reasoning_started = True
elif "\n\n" in delta_content:
delta_content = delta_content.replace("\n\n", "\n> ")
elif "\n" in delta_content:
delta_content = delta_content.replace("\n", "\n> ")
elif is_reasoning_started:
# If we were in reasoning mode but now getting regular content,
# add \n\n to close the reasoning block
delta_content = "\n\n" + delta_content
is_reasoning_started = False
# transform assistant message to prompt message
assistant_prompt_message = AssistantPromptMessage(
content=delta_content,
)
# reset tool calls
tool_calls = []
full_assistant_content += delta_content
elif "text" in choice:
choice_text = choice.get("text", "")
if choice_text == "":
continue
# transform assistant message to prompt message
assistant_prompt_message = AssistantPromptMessage(content=choice_text)
full_assistant_content += choice_text
else:
continue
yield LLMResultChunk(
id=message_id,
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(
index=chunk_index,
message=assistant_prompt_message,
),
)
chunk_index += 1
if tools_calls:
yield LLMResultChunk(
id=message_id,
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(
index=chunk_index,
message=AssistantPromptMessage(tool_calls=tools_calls, content=""),
),
)
yield create_final_llm_result_chunk(
id=message_id,
index=chunk_index,
message=AssistantPromptMessage(content=""),
finish_reason=finish_reason,
usage=usage,
)

View File

@ -1,4 +1,6 @@
- gemini-2.0-flash-001
- gemini-2.0-flash-exp
- gemini-2.0-pro-exp-02-05
- gemini-2.0-flash-thinking-exp-1219
- gemini-2.0-flash-thinking-exp-01-21
- gemini-1.5-pro

View File

@ -0,0 +1,41 @@
model: gemini-2.0-flash-001
label:
en_US: Gemini 2.0 Flash 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,41 @@
model: gemini-2.0-pro-exp-02-05
label:
en_US: Gemini 2.0 pro exp 02-05
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -1,3 +1,4 @@
- deepseek-r1-distill-llama-70b
- llama-3.1-405b-reasoning
- llama-3.3-70b-versatile
- llama-3.1-70b-versatile

View File

@ -0,0 +1,36 @@
model: deepseek-r1-distill-llama-70b
label:
en_US: DeepSeek R1 Distill Llama 70b
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '3.00'
output: '3.00'
unit: '0.000001'
currency: USD

View File

@ -1,3 +1,4 @@
- deepseek-ai/deepseek-r1
- google/gemma-7b
- google/codegemma-7b
- google/recurrentgemma-2b

View File

@ -0,0 +1,35 @@
model: deepseek-ai/deepseek-r1
label:
en_US: deepseek-ai/deepseek-r1
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 1
default: 0.5
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 1024
default: 1024
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0

View File

@ -83,7 +83,7 @@ class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
def _add_custom_parameters(self, credentials: dict, model: str) -> None:
credentials["mode"] = "chat"
if self.MODEL_SUFFIX_MAP[model]:
if self.MODEL_SUFFIX_MAP.get(model):
credentials["server_url"] = f"https://ai.api.nvidia.com/v1/{self.MODEL_SUFFIX_MAP[model]}"
credentials.pop("endpoint_url")
else:

View File

@ -0,0 +1,52 @@
model: cohere.command-r-08-2024
label:
en_US: cohere.command-r-08-2024 v1.7
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 1
max: 1.0
- name: topP
use_template: top_p
default: 0.75
min: 0
max: 1
- name: topK
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
default: 0
min: 0
max: 500
- name: presencePenalty
use_template: presence_penalty
min: 0
max: 1
default: 0
- name: frequencyPenalty
use_template: frequency_penalty
min: 0
max: 1
default: 0
- name: maxTokens
use_template: max_tokens
default: 600
max: 4000
pricing:
input: '0.0009'
output: '0.0009'
unit: '0.0001'
currency: USD

View File

@ -50,3 +50,4 @@ pricing:
output: '0.004'
unit: '0.0001'
currency: USD
deprecated: true

View File

@ -0,0 +1,52 @@
model: cohere.command-r-plus-08-2024
label:
en_US: cohere.command-r-plus-08-2024 v1.6
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 1
max: 1.0
- name: topP
use_template: top_p
default: 0.75
min: 0
max: 1
- name: topK
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
default: 0
min: 0
max: 500
- name: presencePenalty
use_template: presence_penalty
min: 0
max: 1
default: 0
- name: frequencyPenalty
use_template: frequency_penalty
min: 0
max: 1
default: 0
- name: maxTokens
use_template: max_tokens
default: 600
max: 4000
pricing:
input: '0.0156'
output: '0.0156'
unit: '0.0001'
currency: USD

View File

@ -50,3 +50,4 @@ pricing:
output: '0.0219'
unit: '0.0001'
currency: USD
deprecated: true

View File

@ -33,7 +33,7 @@ logger = logging.getLogger(__name__)
request_template = {
"compartmentId": "",
"servingMode": {"modelId": "cohere.command-r-plus", "servingType": "ON_DEMAND"},
"servingMode": {"modelId": "cohere.command-r-plus-08-2024", "servingType": "ON_DEMAND"},
"chatRequest": {
"apiFormat": "COHERE",
# "preambleOverride": "You are a helpful assistant.",
@ -60,19 +60,19 @@ oci_config_template = {
class OCILargeLanguageModel(LargeLanguageModel):
# https://docs.oracle.com/en-us/iaas/Content/generative-ai/pretrained-models.htm
_supported_models = {
"meta.llama-3-70b-instruct": {
"meta.llama-3.1-70b-instruct": {
"system": True,
"multimodal": False,
"tool_call": False,
"stream_tool_call": False,
},
"cohere.command-r-16k": {
"cohere.command-r-08-2024": {
"system": True,
"multimodal": False,
"tool_call": True,
"stream_tool_call": False,
},
"cohere.command-r-plus": {
"cohere.command-r-plus-08-2024": {
"system": True,
"multimodal": False,
"tool_call": True,

View File

@ -49,3 +49,4 @@ pricing:
output: '0.015'
unit: '0.0001'
currency: USD
deprecated: true

View File

@ -0,0 +1,51 @@
model: meta.llama-3.1-70b-instruct
label:
zh_Hans: meta.llama-3.1-70b-instruct
en_US: meta.llama-3.1-70b-instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
default: 1
max: 2.0
- name: topP
use_template: top_p
default: 0.75
min: 0
max: 1
- name: topK
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
default: 0
min: 0
max: 500
- name: presencePenalty
use_template: presence_penalty
min: -2
max: 2
default: 0
- name: frequencyPenalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: maxTokens
use_template: max_tokens
default: 600
max: 4000
pricing:
input: '0.0075'
output: '0.0075'
unit: '0.0001'
currency: USD

View File

@ -19,8 +19,8 @@ class OCIGENAIProvider(ModelProvider):
try:
model_instance = self.get_model_instance(ModelType.LLM)
# Use `cohere.command-r-plus` model for validate,
model_instance.validate_credentials(model="cohere.command-r-plus", credentials=credentials)
# Use `cohere.command-r-plus-08-2024` model for validate,
model_instance.validate_credentials(model="cohere.command-r-plus-08-2024", credentials=credentials)
except CredentialsValidateFailedError as ex:
raise ex
except Exception as ex:

View File

@ -367,6 +367,7 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
# transform assistant message to prompt message
text = chunk_json["response"]
text = self._wrap_thinking_by_tag(text)
assistant_prompt_message = AssistantPromptMessage(content=text)

View File

@ -2,6 +2,8 @@
- o1-2024-12-17
- o1-mini
- o1-mini-2024-09-12
- o3-mini
- o3-mini-2025-01-31
- gpt-4
- gpt-4o
- gpt-4o-2024-05-13

View File

@ -619,9 +619,9 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel):
# clear illegal prompt messages
prompt_messages = self._clear_illegal_prompt_messages(model, prompt_messages)
# o1 compatibility
# o1, o3 compatibility
block_as_stream = False
if model.startswith("o1"):
if model.startswith(("o1", "o3")):
if "max_tokens" in model_parameters:
model_parameters["max_completion_tokens"] = model_parameters["max_tokens"]
del model_parameters["max_tokens"]
@ -941,7 +941,7 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel):
]
)
if model.startswith("o1"):
if model.startswith(("o1", "o3")):
system_message_count = len([m for m in prompt_messages if isinstance(m, SystemPromptMessage)])
if system_message_count > 0:
new_prompt_messages = []
@ -1053,7 +1053,7 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel):
model = model.split(":")[1]
# Currently, we can use gpt4o to calculate chatgpt-4o-latest's token.
if model == "chatgpt-4o-latest" or model.startswith("o1"):
if model == "chatgpt-4o-latest" or model.startswith(("o1", "o3")):
model = "gpt-4o"
try:
@ -1068,7 +1068,7 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel):
tokens_per_message = 4
# if there's a name, the role is omitted
tokens_per_name = -1
elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4") or model.startswith("o1"):
elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4") or model.startswith(("o1", "o3")):
tokens_per_message = 3
tokens_per_name = 1
else:

View File

@ -16,6 +16,19 @@ parameter_rules:
default: 50000
min: 1
max: 50000
- name: reasoning_effort
label:
zh_Hans: 推理工作
en_US: reasoning_effort
type: string
help:
zh_Hans: 限制推理模型的推理工作
en_US: constrains effort on reasoning for reasoning models
required: false
options:
- low
- medium
- high
- name: response_format
label:
zh_Hans: 回复格式

View File

@ -17,6 +17,19 @@ parameter_rules:
default: 50000
min: 1
max: 50000
- name: reasoning_effort
label:
zh_Hans: 推理工作
en_US: reasoning_effort
type: string
help:
zh_Hans: 限制推理模型的推理工作
en_US: constrains effort on reasoning for reasoning models
required: false
options:
- low
- medium
- high
- name: response_format
label:
zh_Hans: 回复格式

View File

@ -0,0 +1,46 @@
model: o3-mini-2025-01-31
label:
zh_Hans: o3-mini-2025-01-31
en_US: o3-mini-2025-01-31
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: max_tokens
use_template: max_tokens
default: 100000
min: 1
max: 100000
- name: reasoning_effort
label:
zh_Hans: 推理工作
en_US: reasoning_effort
type: string
help:
zh_Hans: 限制推理模型的推理工作
en_US: constrains effort on reasoning for reasoning models
required: false
options:
- low
- medium
- high
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '1.10'
output: '4.40'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,46 @@
model: o3-mini
label:
zh_Hans: o3-mini
en_US: o3-mini
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: max_tokens
use_template: max_tokens
default: 100000
min: 1
max: 100000
- name: reasoning_effort
label:
zh_Hans: 推理工作
en_US: reasoning_effort
type: string
help:
zh_Hans: 限制推理模型的推理工作
en_US: constrains effort on reasoning for reasoning models
required: false
options:
- low
- medium
- high
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '1.10'
output: '4.40'
unit: '0.000001'
currency: USD

View File

@ -1,5 +1,5 @@
import codecs
import json
import logging
from collections.abc import Generator
from decimal import Decimal
from typing import Optional, Union, cast
@ -38,8 +38,6 @@ from core.model_runtime.model_providers.__base.large_language_model import Large
from core.model_runtime.model_providers.openai_api_compatible._common import _CommonOaiApiCompat
from core.model_runtime.utils import helper
logger = logging.getLogger(__name__)
class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
"""
@ -99,7 +97,7 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
:param tools: tools for tool calling
:return:
"""
return self._num_tokens_from_messages(model, prompt_messages, tools, credentials)
return self._num_tokens_from_messages(prompt_messages, tools, credentials)
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
@ -398,6 +396,73 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
return self._handle_generate_response(model, credentials, response, prompt_messages)
def _create_final_llm_result_chunk(
self,
index: int,
message: AssistantPromptMessage,
finish_reason: str,
usage: dict,
model: str,
prompt_messages: list[PromptMessage],
credentials: dict,
full_content: str,
) -> LLMResultChunk:
# calculate num tokens
prompt_tokens = usage and usage.get("prompt_tokens")
if prompt_tokens is None:
prompt_tokens = self._num_tokens_from_string(text=prompt_messages[0].content)
completion_tokens = usage and usage.get("completion_tokens")
if completion_tokens is None:
completion_tokens = self._num_tokens_from_string(text=full_content)
# transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
return LLMResultChunk(
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage),
)
def _get_tool_call(self, tool_call_id: str, tools_calls: list[AssistantPromptMessage.ToolCall]):
"""
Get or create a tool call by ID
:param tool_call_id: tool call ID
:param tools_calls: list of existing tool calls
:return: existing or new tool call, updated tools_calls
"""
if not tool_call_id:
return tools_calls[-1], tools_calls
tool_call = next((tool_call for tool_call in tools_calls if tool_call.id == tool_call_id), None)
if tool_call is None:
tool_call = AssistantPromptMessage.ToolCall(
id=tool_call_id,
type="function",
function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""),
)
tools_calls.append(tool_call)
return tool_call, tools_calls
def _increase_tool_call(
self, new_tool_calls: list[AssistantPromptMessage.ToolCall], tools_calls: list[AssistantPromptMessage.ToolCall]
) -> list[AssistantPromptMessage.ToolCall]:
for new_tool_call in new_tool_calls:
# get tool call
tool_call, tools_calls = self._get_tool_call(new_tool_call.function.name, tools_calls)
# update tool call
if new_tool_call.id:
tool_call.id = new_tool_call.id
if new_tool_call.type:
tool_call.type = new_tool_call.type
if new_tool_call.function.name:
tool_call.function.name = new_tool_call.function.name
if new_tool_call.function.arguments:
tool_call.function.arguments += new_tool_call.function.arguments
return tools_calls
def _handle_generate_stream_response(
self, model: str, credentials: dict, response: requests.Response, prompt_messages: list[PromptMessage]
) -> Generator:
@ -410,69 +475,15 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
:param prompt_messages: prompt messages
:return: llm response chunk generator
"""
full_assistant_content = ""
chunk_index = 0
def create_final_llm_result_chunk(
id: Optional[str], index: int, message: AssistantPromptMessage, finish_reason: str, usage: dict
) -> LLMResultChunk:
# calculate num tokens
prompt_tokens = usage and usage.get("prompt_tokens")
if prompt_tokens is None:
prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content)
completion_tokens = usage and usage.get("completion_tokens")
if completion_tokens is None:
completion_tokens = self._num_tokens_from_string(model, full_assistant_content)
# transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
return LLMResultChunk(
id=id,
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage),
)
full_assistant_content = ""
tools_calls: list[AssistantPromptMessage.ToolCall] = []
finish_reason = None
usage = None
is_reasoning_started = False
# delimiter for stream response, need unicode_escape
import codecs
delimiter = credentials.get("stream_mode_delimiter", "\n\n")
delimiter = codecs.decode(delimiter, "unicode_escape")
tools_calls: list[AssistantPromptMessage.ToolCall] = []
def increase_tool_call(new_tool_calls: list[AssistantPromptMessage.ToolCall]):
def get_tool_call(tool_call_id: str):
if not tool_call_id:
return tools_calls[-1]
tool_call = next((tool_call for tool_call in tools_calls if tool_call.id == tool_call_id), None)
if tool_call is None:
tool_call = AssistantPromptMessage.ToolCall(
id=tool_call_id,
type="function",
function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""),
)
tools_calls.append(tool_call)
return tool_call
for new_tool_call in new_tool_calls:
# get tool call
tool_call = get_tool_call(new_tool_call.function.name)
# update tool call
if new_tool_call.id:
tool_call.id = new_tool_call.id
if new_tool_call.type:
tool_call.type = new_tool_call.type
if new_tool_call.function.name:
tool_call.function.name = new_tool_call.function.name
if new_tool_call.function.arguments:
tool_call.function.arguments += new_tool_call.function.arguments
finish_reason = None # The default value of finish_reason is None
message_id, usage = None, None
for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter):
chunk = chunk.strip()
if chunk:
@ -487,12 +498,15 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
chunk_json: dict = json.loads(decoded_chunk)
# stream ended
except json.JSONDecodeError as e:
yield create_final_llm_result_chunk(
id=message_id,
yield self._create_final_llm_result_chunk(
index=chunk_index + 1,
message=AssistantPromptMessage(content=""),
finish_reason="Non-JSON encountered.",
usage=usage,
model=model,
credentials=credentials,
prompt_messages=prompt_messages,
full_content=full_assistant_content,
)
break
# handle the error here. for issue #11629
@ -507,12 +521,14 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
choice = chunk_json["choices"][0]
finish_reason = chunk_json["choices"][0].get("finish_reason")
message_id = chunk_json.get("id")
chunk_index += 1
if "delta" in choice:
delta = choice["delta"]
delta_content = delta.get("content")
delta_content, is_reasoning_started = self._wrap_thinking_by_reasoning_content(
delta, is_reasoning_started
)
delta_content = self._wrap_thinking_by_tag(delta_content)
assistant_message_tool_calls = None
@ -526,12 +542,10 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
{"id": "tool_call_id", "type": "function", "function": delta.get("function_call", {})}
]
# assistant_message_function_call = delta.delta.function_call
# extract tool calls from response
if assistant_message_tool_calls:
tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
increase_tool_call(tool_calls)
tools_calls = self._increase_tool_call(tool_calls, tools_calls)
if delta_content is None or delta_content == "":
continue
@ -556,7 +570,6 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
continue
yield LLMResultChunk(
id=message_id,
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(
@ -569,7 +582,6 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
if tools_calls:
yield LLMResultChunk(
id=message_id,
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(
@ -578,12 +590,15 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
),
)
yield create_final_llm_result_chunk(
id=message_id,
yield self._create_final_llm_result_chunk(
index=chunk_index,
message=AssistantPromptMessage(content=""),
finish_reason=finish_reason,
usage=usage,
model=model,
credentials=credentials,
prompt_messages=prompt_messages,
full_content=full_assistant_content,
)
def _handle_generate_response(
@ -697,12 +712,11 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
return message_dict
def _num_tokens_from_string(
self, model: str, text: Union[str, list[PromptMessageContent]], tools: Optional[list[PromptMessageTool]] = None
self, text: Union[str, list[PromptMessageContent]], tools: Optional[list[PromptMessageTool]] = None
) -> int:
"""
Approximate num tokens for model with gpt2 tokenizer.
:param model: model name
:param text: prompt text
:param tools: tools for tool calling
:return: number of tokens
@ -725,7 +739,6 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
def _num_tokens_from_messages(
self,
model: str,
messages: list[PromptMessage],
tools: Optional[list[PromptMessageTool]] = None,
credentials: Optional[dict] = None,

View File

@ -1,5 +1,7 @@
- openai/o1-preview
- openai/o1-mini
- openai/o3-mini
- openai/o3-mini-2025-01-31
- openai/gpt-4o
- openai/gpt-4o-mini
- openai/gpt-4
@ -28,5 +30,6 @@
- mistralai/mistral-7b-instruct
- qwen/qwen-2.5-72b-instruct
- qwen/qwen-2-72b-instruct
- deepseek/deepseek-r1
- deepseek/deepseek-chat
- deepseek/deepseek-coder

View File

@ -53,7 +53,7 @@ parameter_rules:
zh_Hans: 介于 -2.0 和 2.0 之间的数字。如果该值为正,那么新 token 会根据其在已有文本中的出现频率受到相应的惩罚,降低模型重复相同内容的可能性。
en_US: A number between -2.0 and 2.0. If the value is positive, new tokens are penalized based on their frequency of occurrence in existing text, reducing the likelihood that the model will repeat the same content.
pricing:
input: "0.14"
output: "0.28"
input: "0.49"
output: "0.89"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,59 @@
model: deepseek/deepseek-r1
label:
en_US: deepseek-r1
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 163840
parameter_rules:
- name: temperature
use_template: temperature
type: float
default: 1
min: 0.0
max: 2.0
help:
zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
- name: max_tokens
use_template: max_tokens
type: int
default: 4096
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
type: float
default: 1
min: 0.01
max: 1.00
help:
zh_Hans: 控制生成结果的随机性。数值越小随机性越弱数值越大随机性越强。一般而言top_p 和 temperature 两个参数选择一个进行调整即可。
en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
default: 0
min: -2.0
max: 2.0
help:
zh_Hans: 介于 -2.0 和 2.0 之间的数字。如果该值为正,那么新 token 会根据其在已有文本中的出现频率受到相应的惩罚,降低模型重复相同内容的可能性。
en_US: A number between -2.0 and 2.0. If the value is positive, new tokens are penalized based on their frequency of occurrence in existing text, reducing the likelihood that the model will repeat the same content.
pricing:
input: "3"
output: "8"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,49 @@
model: openai/o3-mini-2025-01-31
label:
en_US: o3-mini-2025-01-31
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 100000
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "1.10"
output: "4.40"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,49 @@
model: openai/o3-mini
label:
en_US: o3-mini
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 100000
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "1.10"
output: "4.40"
unit: "0.000001"
currency: USD

View File

@ -12,7 +12,11 @@
- Pro/Qwen/Qwen2-VL-7B-Instruct
- OpenGVLab/InternVL2-26B
- Pro/OpenGVLab/InternVL2-8B
- deepseek-ai/DeepSeek-R1
- deepseek-ai/DeepSeek-V2-Chat
- deepseek-ai/DeepSeek-V2.5
- deepseek-ai/DeepSeek-V3
- deepseek-ai/DeepSeek-Coder-V2-Instruct
- THUDM/glm-4-9b-chat
- 01-ai/Yi-1.5-34B-Chat-16K
- 01-ai/Yi-1.5-9B-Chat-16K
@ -25,3 +29,4 @@
- meta-llama/Meta-Llama-3.1-8B-Instruct
- google/gemma-2-27b-it
- google/gemma-2-9b-it
- Tencent/Hunyuan-A52B-Instruct

View File

@ -0,0 +1,21 @@
model: deepseek-ai/DeepSeek-R1
label:
zh_Hans: deepseek-ai/DeepSeek-R1
en_US: deepseek-ai/DeepSeek-R1
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 64000
parameter_rules:
- name: max_tokens
use_template: max_tokens
min: 1
max: 8192
default: 4096
pricing:
input: "4"
output: "16"
unit: "0.000001"
currency: RMB

View File

@ -0,0 +1,53 @@
model: deepseek-ai/DeepSeek-V3
label:
en_US: deepseek-ai/DeepSeek-V3
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 64000
parameter_rules:
- name: temperature
use_template: temperature
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "1"
output: "2"
unit: "0.000001"
currency: RMB

View File

@ -1,13 +1,9 @@
import json
from collections.abc import Generator
from typing import Optional, Union
import requests
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
PromptMessage,
PromptMessageTool,
)
@ -96,208 +92,3 @@ class SiliconflowLargeLanguageModel(OAIAPICompatLargeLanguageModel):
),
],
)
def _handle_generate_stream_response(
self, model: str, credentials: dict, response: requests.Response, prompt_messages: list[PromptMessage]
) -> Generator:
"""
Handle llm stream response
:param model: model name
:param credentials: model credentials
:param response: streamed response
:param prompt_messages: prompt messages
:return: llm response chunk generator
"""
full_assistant_content = ""
chunk_index = 0
is_reasoning_started = False # Add flag to track reasoning state
def create_final_llm_result_chunk(
id: Optional[str], index: int, message: AssistantPromptMessage, finish_reason: str, usage: dict
) -> LLMResultChunk:
# calculate num tokens
prompt_tokens = usage and usage.get("prompt_tokens")
if prompt_tokens is None:
prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content)
completion_tokens = usage and usage.get("completion_tokens")
if completion_tokens is None:
completion_tokens = self._num_tokens_from_string(model, full_assistant_content)
# transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
return LLMResultChunk(
id=id,
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage),
)
# delimiter for stream response, need unicode_escape
import codecs
delimiter = credentials.get("stream_mode_delimiter", "\n\n")
delimiter = codecs.decode(delimiter, "unicode_escape")
tools_calls: list[AssistantPromptMessage.ToolCall] = []
def increase_tool_call(new_tool_calls: list[AssistantPromptMessage.ToolCall]):
def get_tool_call(tool_call_id: str):
if not tool_call_id:
return tools_calls[-1]
tool_call = next((tool_call for tool_call in tools_calls if tool_call.id == tool_call_id), None)
if tool_call is None:
tool_call = AssistantPromptMessage.ToolCall(
id=tool_call_id,
type="function",
function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""),
)
tools_calls.append(tool_call)
return tool_call
for new_tool_call in new_tool_calls:
# get tool call
tool_call = get_tool_call(new_tool_call.function.name)
# update tool call
if new_tool_call.id:
tool_call.id = new_tool_call.id
if new_tool_call.type:
tool_call.type = new_tool_call.type
if new_tool_call.function.name:
tool_call.function.name = new_tool_call.function.name
if new_tool_call.function.arguments:
tool_call.function.arguments += new_tool_call.function.arguments
finish_reason = None # The default value of finish_reason is None
message_id, usage = None, None
for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter):
chunk = chunk.strip()
if chunk:
# ignore sse comments
if chunk.startswith(":"):
continue
decoded_chunk = chunk.strip().removeprefix("data:").lstrip()
if decoded_chunk == "[DONE]": # Some provider returns "data: [DONE]"
continue
try:
chunk_json: dict = json.loads(decoded_chunk)
# stream ended
except json.JSONDecodeError as e:
yield create_final_llm_result_chunk(
id=message_id,
index=chunk_index + 1,
message=AssistantPromptMessage(content=""),
finish_reason="Non-JSON encountered.",
usage=usage,
)
break
# handle the error here. for issue #11629
if chunk_json.get("error") and chunk_json.get("choices") is None:
raise ValueError(chunk_json.get("error"))
if chunk_json:
if u := chunk_json.get("usage"):
usage = u
if not chunk_json or len(chunk_json["choices"]) == 0:
continue
choice = chunk_json["choices"][0]
finish_reason = chunk_json["choices"][0].get("finish_reason")
message_id = chunk_json.get("id")
chunk_index += 1
if "delta" in choice:
delta = choice["delta"]
delta_content = delta.get("content")
assistant_message_tool_calls = None
if "tool_calls" in delta and credentials.get("function_calling_type", "no_call") == "tool_call":
assistant_message_tool_calls = delta.get("tool_calls", None)
elif (
"function_call" in delta
and credentials.get("function_calling_type", "no_call") == "function_call"
):
assistant_message_tool_calls = [
{"id": "tool_call_id", "type": "function", "function": delta.get("function_call", {})}
]
# assistant_message_function_call = delta.delta.function_call
# extract tool calls from response
if assistant_message_tool_calls:
tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
increase_tool_call(tool_calls)
if delta_content is None or delta_content == "":
continue
# Check for think tags
if "<think>" in delta_content:
is_reasoning_started = True
# Remove <think> tag and add markdown quote
delta_content = "> 💭 " + delta_content.replace("<think>", "")
elif "</think>" in delta_content:
# Remove </think> tag and add newlines to end quote block
delta_content = delta_content.replace("</think>", "") + "\n\n"
is_reasoning_started = False
elif is_reasoning_started:
# Add quote markers for content within thinking block
if "\n\n" in delta_content:
delta_content = delta_content.replace("\n\n", "\n> ")
elif "\n" in delta_content:
delta_content = delta_content.replace("\n", "\n> ")
# transform assistant message to prompt message
assistant_prompt_message = AssistantPromptMessage(
content=delta_content,
)
# reset tool calls
tool_calls = []
full_assistant_content += delta_content
elif "text" in choice:
choice_text = choice.get("text", "")
if choice_text == "":
continue
# transform assistant message to prompt message
assistant_prompt_message = AssistantPromptMessage(content=choice_text)
full_assistant_content += choice_text
else:
continue
yield LLMResultChunk(
id=message_id,
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(
index=chunk_index,
message=assistant_prompt_message,
),
)
chunk_index += 1
if tools_calls:
yield LLMResultChunk(
id=message_id,
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(
index=chunk_index,
message=AssistantPromptMessage(tool_calls=tools_calls, content=""),
),
)
yield create_final_llm_result_chunk(
id=message_id,
index=chunk_index,
message=AssistantPromptMessage(content=""),
finish_reason=finish_reason,
usage=usage,
)

View File

@ -0,0 +1,41 @@
model: gemini-2.0-flash-001
label:
en_US: Gemini 2.0 Flash 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,41 @@
model: gemini-2.0-flash-lite-preview-02-05
label:
en_US: Gemini 2.0 Flash Lite Preview 0205
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,39 @@
model: gemini-2.0-flash-thinking-exp-01-21
label:
en_US: Gemini 2.0 Flash Thinking Exp 0121
model_type: llm
features:
- agent-thought
- vision
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,39 @@
model: gemini-2.0-flash-thinking-exp-1219
label:
en_US: Gemini 2.0 Flash Thinking Exp 1219
model_type: llm
features:
- agent-thought
- vision
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,37 @@
model: gemini-2.0-pro-exp-02-05
label:
en_US: Gemini 2.0 Pro Exp 0205
model_type: llm
features:
- agent-thought
- document
model_properties:
mode: chat
context_size: 2000000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
en_US: Top k
type: int
help:
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_output_tokens
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,41 @@
model: gemini-exp-1114
label:
en_US: Gemini exp 1114
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,41 @@
model: gemini-exp-1121
label:
en_US: Gemini exp 1121
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,41 @@
model: gemini-exp-1206
label:
en_US: Gemini exp 1206
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -1,4 +1,5 @@
import logging
import re
from collections.abc import Generator
from typing import Optional
@ -247,15 +248,34 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
req_params["tools"] = tools
def _handle_stream_chat_response(chunks: Generator[ChatCompletionChunk]) -> Generator:
is_reasoning_started = False
for chunk in chunks:
content = ""
if chunk.choices:
delta = chunk.choices[0].delta
if is_reasoning_started and not hasattr(delta, "reasoning_content") and not delta.content:
content = ""
elif hasattr(delta, "reasoning_content"):
if not is_reasoning_started:
is_reasoning_started = True
content = "> 💭 " + delta.reasoning_content
else:
content = delta.reasoning_content
if "\n" in content:
content = re.sub(r"\n(?!(>|\n))", "\n> ", content)
elif is_reasoning_started:
content = "\n\n" + delta.content
is_reasoning_started = False
else:
content = delta.content
yield LLMResultChunk(
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(
index=0,
message=AssistantPromptMessage(
content=chunk.choices[0].delta.content if chunk.choices else "", tool_calls=[]
),
message=AssistantPromptMessage(content=content, tool_calls=[]),
usage=self._calc_response_usage(
model=model,
credentials=credentials,

View File

@ -18,6 +18,22 @@ class ModelConfig(BaseModel):
configs: dict[str, ModelConfig] = {
"DeepSeek-R1-Distill-Qwen-32B": ModelConfig(
properties=ModelProperties(context_size=64000, max_tokens=8192, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT],
),
"DeepSeek-R1-Distill-Qwen-7B": ModelConfig(
properties=ModelProperties(context_size=64000, max_tokens=8192, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT],
),
"DeepSeek-R1": ModelConfig(
properties=ModelProperties(context_size=64000, max_tokens=8192, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT],
),
"DeepSeek-V3": ModelConfig(
properties=ModelProperties(context_size=64000, max_tokens=8192, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.TOOL_CALL, ModelFeature.STREAM_TOOL_CALL],
),
"Doubao-1.5-vision-pro-32k": ModelConfig(
properties=ModelProperties(context_size=32768, max_tokens=12288, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.VISION],

View File

@ -118,6 +118,30 @@ model_credential_schema:
type: select
required: true
options:
- label:
en_US: DeepSeek-R1-Distill-Qwen-32B
value: DeepSeek-R1-Distill-Qwen-32B
show_on:
- variable: __model_type
value: llm
- label:
en_US: DeepSeek-R1-Distill-Qwen-7B
value: DeepSeek-R1-Distill-Qwen-7B
show_on:
- variable: __model_type
value: llm
- label:
en_US: DeepSeek-R1
value: DeepSeek-R1
show_on:
- variable: __model_type
value: llm
- label:
en_US: DeepSeek-V3
value: DeepSeek-V3
show_on:
- variable: __model_type
value: llm
- label:
en_US: Doubao-1.5-vision-pro-32k
value: Doubao-1.5-vision-pro-32k

View File

@ -635,16 +635,13 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
handle stream chat generate response
"""
full_response = ""
for chunk in resp:
if len(chunk.choices) == 0:
continue
delta = chunk.choices[0]
if delta.finish_reason is None and (delta.delta.content is None or delta.delta.content == ""):
continue
delta_content = delta.delta.content or ""
# check if there is a tool call in the response
function_call = None
tool_calls = []
@ -657,9 +654,10 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
if function_call:
assistant_message_tool_calls += [self._extract_response_function_call(function_call)]
delta_content = self._wrap_thinking_by_tag(delta_content)
# transform assistant message to prompt message
assistant_prompt_message = AssistantPromptMessage(
content=delta.delta.content or "", tool_calls=assistant_message_tool_calls
content=delta_content or "", tool_calls=assistant_message_tool_calls
)
if delta.finish_reason is not None:
@ -697,7 +695,7 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
),
)
full_response += delta.delta.content
full_response += delta_content
def _handle_completion_generate_response(
self,

View File

@ -77,5 +77,4 @@
- onebot
- regex
- trello
- vanna
- fal

View File

@ -1,114 +0,0 @@
"""
Configuration classes for AWS Bedrock retrieve and generate API
"""
from dataclasses import dataclass
from typing import Any, Literal, Optional
@dataclass
class TextInferenceConfig:
"""Text inference configuration"""
maxTokens: Optional[int] = None
stopSequences: Optional[list[str]] = None
temperature: Optional[float] = None
topP: Optional[float] = None
@dataclass
class PerformanceConfig:
"""Performance configuration"""
latency: Literal["standard", "optimized"]
@dataclass
class PromptTemplate:
"""Prompt template configuration"""
textPromptTemplate: str
@dataclass
class GuardrailConfig:
"""Guardrail configuration"""
guardrailId: str
guardrailVersion: str
@dataclass
class GenerationConfig:
"""Generation configuration"""
additionalModelRequestFields: Optional[dict[str, Any]] = None
guardrailConfiguration: Optional[GuardrailConfig] = None
inferenceConfig: Optional[dict[str, TextInferenceConfig]] = None
performanceConfig: Optional[PerformanceConfig] = None
promptTemplate: Optional[PromptTemplate] = None
@dataclass
class VectorSearchConfig:
"""Vector search configuration"""
filter: Optional[dict[str, Any]] = None
numberOfResults: Optional[int] = None
overrideSearchType: Optional[Literal["HYBRID", "SEMANTIC"]] = None
@dataclass
class RetrievalConfig:
"""Retrieval configuration"""
vectorSearchConfiguration: VectorSearchConfig
@dataclass
class OrchestrationConfig:
"""Orchestration configuration"""
additionalModelRequestFields: Optional[dict[str, Any]] = None
inferenceConfig: Optional[dict[str, TextInferenceConfig]] = None
performanceConfig: Optional[PerformanceConfig] = None
promptTemplate: Optional[PromptTemplate] = None
@dataclass
class KnowledgeBaseConfig:
"""Knowledge base configuration"""
generationConfiguration: GenerationConfig
knowledgeBaseId: str
modelArn: str
orchestrationConfiguration: Optional[OrchestrationConfig] = None
retrievalConfiguration: Optional[RetrievalConfig] = None
@dataclass
class SessionConfig:
"""Session configuration"""
kmsKeyArn: Optional[str] = None
sessionId: Optional[str] = None
@dataclass
class RetrieveAndGenerateConfiguration:
"""Retrieve and generate configuration
The use of knowledgeBaseConfiguration or externalSourcesConfiguration depends on the type value
"""
type: str = "KNOWLEDGE_BASE"
knowledgeBaseConfiguration: Optional[KnowledgeBaseConfig] = None
@dataclass
class RetrieveAndGenerateConfig:
"""Retrieve and generate main configuration"""
input: dict[str, str]
retrieveAndGenerateConfiguration: RetrieveAndGenerateConfiguration
sessionConfiguration: Optional[SessionConfig] = None
sessionId: Optional[str] = None

View File

@ -77,15 +77,27 @@ class BedrockRetrieveTool(BuiltinTool):
"""
invoke tools
"""
line = 0
try:
line = 0
# Initialize Bedrock client if not already initialized
if not self.bedrock_client:
aws_region = tool_parameters.get("aws_region")
if aws_region:
self.bedrock_client = boto3.client("bedrock-agent-runtime", region_name=aws_region)
else:
self.bedrock_client = boto3.client("bedrock-agent-runtime")
aws_access_key_id = tool_parameters.get("aws_access_key_id")
aws_secret_access_key = tool_parameters.get("aws_secret_access_key")
client_kwargs = {"service_name": "bedrock-agent-runtime", "region_name": aws_region or None}
# Only add credentials if both access key and secret key are provided
if aws_access_key_id and aws_secret_access_key:
client_kwargs.update(
{"aws_access_key_id": aws_access_key_id, "aws_secret_access_key": aws_secret_access_key}
)
self.bedrock_client = boto3.client(**client_kwargs)
except Exception as e:
return self.create_text_message(f"Failed to initialize Bedrock client: {str(e)}")
try:
line = 1
if not self.knowledge_base_id:
self.knowledge_base_id = tool_parameters.get("knowledge_base_id")
@ -123,7 +135,14 @@ class BedrockRetrieveTool(BuiltinTool):
sorted_docs = sorted(retrieved_docs, key=operator.itemgetter("score"), reverse=True)
line = 6
return [self.create_json_message(res) for res in sorted_docs]
result_type = tool_parameters.get("result_type")
if result_type == "json":
return [self.create_json_message(res) for res in sorted_docs]
else:
text = ""
for i, res in enumerate(sorted_docs):
text += f"{i + 1}: {res['content']}\n"
return self.create_text_message(text)
except Exception as e:
return self.create_text_message(f"Exception {str(e)}, line : {line}")
@ -138,7 +157,6 @@ class BedrockRetrieveTool(BuiltinTool):
if not parameters.get("query"):
raise ValueError("query is required")
# 可选:可以验证元数据过滤条件是否为有效的 JSON 字符串(如果提供)
metadata_filter_str = parameters.get("metadata_filter")
if metadata_filter_str and not isinstance(json.loads(metadata_filter_str), dict):
raise ValueError("metadata_filter must be a valid JSON object")

View File

@ -15,6 +15,60 @@ description:
llm: A tool for retrieving relevant information from Amazon Bedrock Knowledge Base. You can find deploy instructions on Github Repo - https://github.com/aws-samples/dify-aws-tool
parameters:
- name: aws_region
type: string
required: false
label:
en_US: AWS Region
zh_Hans: AWS区域
human_description:
en_US: AWS region for the Bedrock service
zh_Hans: Bedrock服务的AWS区域
form: form
- name: aws_access_key_id
type: string
required: false
label:
en_US: AWS Access Key ID
zh_Hans: AWS访问密钥ID
human_description:
en_US: AWS access key ID for authentication (optional)
zh_Hans: 用于身份验证的AWS访问密钥ID可选
form: form
- name: aws_secret_access_key
type: string
required: false
label:
en_US: AWS Secret Access Key
zh_Hans: AWS秘密访问密钥
human_description:
en_US: AWS secret access key for authentication (optional)
zh_Hans: 用于身份验证的AWS秘密访问密钥可选
form: form
- name: result_type
type: select
required: true
label:
en_US: result type
zh_Hans: 结果类型
human_description:
en_US: return a list of json or texts
zh_Hans: 返回一个列表内容是json还是纯文本
default: text
options:
- value: json
label:
en_US: JSON
zh_Hans: JSON
- value: text
label:
en_US: Text
zh_Hans: 文本
form: form
- name: knowledge_base_id
type: string
required: true
@ -95,6 +149,7 @@ parameters:
zh_Hans: 重拍模型ID
pt_BR: rerank model id
llm_description: rerank model id
default: default
options:
- value: default
label:
@ -110,20 +165,6 @@ parameters:
zh_Hans: amazon.rerank-v1:0
form: form
- name: aws_region
type: string
required: false
label:
en_US: AWS Region
zh_Hans: AWS 区域
pt_BR: AWS Region
human_description:
en_US: AWS region where the Bedrock Knowledge Base is located
zh_Hans: Bedrock知识库所在的AWS区域
pt_BR: AWS region where the Bedrock Knowledge Base is located
llm_description: AWS region where the Bedrock Knowledge Base is located
form: form
- name: metadata_filter # Additional parameter for metadata filtering
type: string # String type, expects JSON-formatted filter conditions
required: false # Optional field - can be omitted

View File

@ -1,5 +1,5 @@
import json
from typing import Any, Optional
from typing import Any
import boto3
@ -10,193 +10,63 @@ from core.tools.tool.builtin_tool import BuiltinTool
class BedrockRetrieveAndGenerateTool(BuiltinTool):
bedrock_client: Any = None
def _create_text_inference_config(
def _invoke(
self,
max_tokens: Optional[int] = None,
stop_sequences: Optional[str] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
) -> Optional[dict]:
"""Create text inference configuration"""
if any([max_tokens, stop_sequences, temperature, top_p]):
config = {}
if max_tokens is not None:
config["maxTokens"] = max_tokens
if stop_sequences:
try:
config["stopSequences"] = json.loads(stop_sequences)
except json.JSONDecodeError:
config["stopSequences"] = []
if temperature is not None:
config["temperature"] = temperature
if top_p is not None:
config["topP"] = top_p
return config
return None
def _create_guardrail_config(
self,
guardrail_id: Optional[str] = None,
guardrail_version: Optional[str] = None,
) -> Optional[dict]:
"""Create guardrail configuration"""
if guardrail_id and guardrail_version:
return {"guardrailId": guardrail_id, "guardrailVersion": guardrail_version}
return None
def _create_generation_config(
self,
additional_model_fields: Optional[str] = None,
guardrail_config: Optional[dict] = None,
text_inference_config: Optional[dict] = None,
performance_mode: Optional[str] = None,
prompt_template: Optional[str] = None,
) -> dict:
"""Create generation configuration"""
config = {}
if additional_model_fields:
try:
config["additionalModelRequestFields"] = json.loads(additional_model_fields)
except json.JSONDecodeError:
pass
if guardrail_config:
config["guardrailConfiguration"] = guardrail_config
if text_inference_config:
config["inferenceConfig"] = {"textInferenceConfig": text_inference_config}
if performance_mode:
config["performanceConfig"] = {"latency": performance_mode}
if prompt_template:
config["promptTemplate"] = {"textPromptTemplate": prompt_template}
return config
def _create_orchestration_config(
self,
orchestration_additional_model_fields: Optional[str] = None,
orchestration_text_inference_config: Optional[dict] = None,
orchestration_performance_mode: Optional[str] = None,
orchestration_prompt_template: Optional[str] = None,
) -> dict:
"""Create orchestration configuration"""
config = {}
if orchestration_additional_model_fields:
try:
config["additionalModelRequestFields"] = json.loads(orchestration_additional_model_fields)
except json.JSONDecodeError:
pass
if orchestration_text_inference_config:
config["inferenceConfig"] = {"textInferenceConfig": orchestration_text_inference_config}
if orchestration_performance_mode:
config["performanceConfig"] = {"latency": orchestration_performance_mode}
if orchestration_prompt_template:
config["promptTemplate"] = {"textPromptTemplate": orchestration_prompt_template}
return config
def _create_vector_search_config(
self,
number_of_results: int = 5,
search_type: str = "SEMANTIC",
metadata_filter: Optional[dict] = None,
) -> dict:
"""Create vector search configuration"""
config = {
"numberOfResults": number_of_results,
"overrideSearchType": search_type,
}
# Only add filter if metadata_filter is not empty
if metadata_filter:
config["filter"] = metadata_filter
return config
def _bedrock_retrieve_and_generate(
self,
query: str,
knowledge_base_id: str,
model_arn: str,
# Generation Configuration
additional_model_fields: Optional[str] = None,
guardrail_id: Optional[str] = None,
guardrail_version: Optional[str] = None,
max_tokens: Optional[int] = None,
stop_sequences: Optional[str] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
performance_mode: str = "standard",
prompt_template: Optional[str] = None,
# Orchestration Configuration
orchestration_additional_model_fields: Optional[str] = None,
orchestration_max_tokens: Optional[int] = None,
orchestration_stop_sequences: Optional[str] = None,
orchestration_temperature: Optional[float] = None,
orchestration_top_p: Optional[float] = None,
orchestration_performance_mode: Optional[str] = None,
orchestration_prompt_template: Optional[str] = None,
# Retrieval Configuration
number_of_results: int = 5,
search_type: str = "SEMANTIC",
metadata_filter: Optional[dict] = None,
# Additional Configuration
session_id: Optional[str] = None,
) -> dict[str, Any]:
user_id: str,
tool_parameters: dict[str, Any],
) -> ToolInvokeMessage:
try:
# Create text inference configurations
text_inference_config = self._create_text_inference_config(max_tokens, stop_sequences, temperature, top_p)
orchestration_text_inference_config = self._create_text_inference_config(
orchestration_max_tokens, orchestration_stop_sequences, orchestration_temperature, orchestration_top_p
)
# Initialize Bedrock client if not already initialized
if not self.bedrock_client:
aws_region = tool_parameters.get("aws_region")
aws_access_key_id = tool_parameters.get("aws_access_key_id")
aws_secret_access_key = tool_parameters.get("aws_secret_access_key")
# Create guardrail configuration
guardrail_config = self._create_guardrail_config(guardrail_id, guardrail_version)
client_kwargs = {"service_name": "bedrock-agent-runtime", "region_name": aws_region or None}
# Create vector search configuration
vector_search_config = self._create_vector_search_config(number_of_results, search_type, metadata_filter)
# Only add credentials if both access key and secret key are provided
if aws_access_key_id and aws_secret_access_key:
client_kwargs.update(
{"aws_access_key_id": aws_access_key_id, "aws_secret_access_key": aws_secret_access_key}
)
# Create generation configuration
generation_config = self._create_generation_config(
additional_model_fields, guardrail_config, text_inference_config, performance_mode, prompt_template
)
self.bedrock_client = boto3.client(**client_kwargs)
except Exception as e:
return self.create_text_message(f"Failed to initialize Bedrock client: {str(e)}")
# Create orchestration configuration
orchestration_config = self._create_orchestration_config(
orchestration_additional_model_fields,
orchestration_text_inference_config,
orchestration_performance_mode,
orchestration_prompt_template,
)
try:
request_config = {}
# Create knowledge base configuration
knowledge_base_config = {
"knowledgeBaseId": knowledge_base_id,
"modelArn": model_arn,
"generationConfiguration": generation_config,
"orchestrationConfiguration": orchestration_config,
"retrievalConfiguration": {"vectorSearchConfiguration": vector_search_config},
}
# Set input configuration
input_text = tool_parameters.get("input")
if input_text:
request_config["input"] = {"text": input_text}
# Create request configuration
request_config = {
"input": {"text": query},
"retrieveAndGenerateConfiguration": {
"type": "KNOWLEDGE_BASE",
"knowledgeBaseConfiguration": knowledge_base_config,
},
}
# Build retrieve and generate configuration
config_type = tool_parameters.get("type")
retrieve_generate_config = {"type": config_type}
# Add session configuration if provided
if session_id and len(session_id) >= 2:
request_config["sessionConfiguration"] = {"sessionId": session_id}
# Add configuration based on type
if config_type == "KNOWLEDGE_BASE":
kb_config_str = tool_parameters.get("knowledge_base_configuration")
kb_config = json.loads(kb_config_str) if kb_config_str else None
retrieve_generate_config["knowledgeBaseConfiguration"] = kb_config
else: # EXTERNAL_SOURCES
es_config_str = tool_parameters.get("external_sources_configuration")
es_config = json.loads(kb_config_str) if es_config_str else None
retrieve_generate_config["externalSourcesConfiguration"] = es_config
request_config["retrieveAndGenerateConfiguration"] = retrieve_generate_config
# Parse session configuration
session_config_str = tool_parameters.get("session_configuration")
session_config = json.loads(session_config_str) if session_config_str else None
if session_config:
request_config["sessionConfiguration"] = session_config
# Add session ID if provided
session_id = tool_parameters.get("session_id")
if session_id:
request_config["sessionId"] = session_id
# Send request
@ -226,99 +96,42 @@ class BedrockRetrieveAndGenerateTool(BuiltinTool):
citation_info["references"].append(reference)
result["citations"].append(citation_info)
return result
result_type = tool_parameters.get("result_type")
if result_type == "json":
return self.create_json_message(result)
elif result_type == "text-with-citations":
return self.create_text_message(result)
else:
return self.create_text_message(result.get("output"))
except json.JSONDecodeError as e:
return self.create_text_message(f"Invalid JSON format: {str(e)}")
except Exception as e:
raise Exception(f"Error calling Bedrock service: {str(e)}")
def _invoke(
self,
user_id: str,
tool_parameters: dict[str, Any],
) -> ToolInvokeMessage:
try:
# Initialize Bedrock client if not already initialized
if not self.bedrock_client:
aws_region = tool_parameters.get("aws_region")
aws_access_key_id = tool_parameters.get("aws_access_key_id")
aws_secret_access_key = tool_parameters.get("aws_secret_access_key")
client_kwargs = {
"service_name": "bedrock-agent-runtime",
}
if aws_region:
client_kwargs["region_name"] = aws_region
# Only add credentials if both access key and secret key are provided
if aws_access_key_id and aws_secret_access_key:
client_kwargs.update(
{"aws_access_key_id": aws_access_key_id, "aws_secret_access_key": aws_secret_access_key}
)
try:
self.bedrock_client = boto3.client(**client_kwargs)
except Exception as e:
return self.create_text_message(f"Failed to initialize Bedrock client: {str(e)}")
# Parse metadata filter if provided
metadata_filter = None
if metadata_filter_str := tool_parameters.get("metadata_filter"):
try:
parsed_filter = json.loads(metadata_filter_str)
if parsed_filter: # Only set if not empty
metadata_filter = parsed_filter
except json.JSONDecodeError:
return self.create_text_message("metadata_filter must be a valid JSON string")
try:
response = self._bedrock_retrieve_and_generate(
query=tool_parameters["query"],
knowledge_base_id=tool_parameters["knowledge_base_id"],
model_arn=tool_parameters["model_arn"],
# Generation Configuration
additional_model_fields=tool_parameters.get("additional_model_fields"),
guardrail_id=tool_parameters.get("guardrail_id"),
guardrail_version=tool_parameters.get("guardrail_version"),
max_tokens=tool_parameters.get("max_tokens"),
stop_sequences=tool_parameters.get("stop_sequences"),
temperature=tool_parameters.get("temperature"),
top_p=tool_parameters.get("top_p"),
performance_mode=tool_parameters.get("performance_mode", "standard"),
prompt_template=tool_parameters.get("prompt_template"),
# Orchestration Configuration
orchestration_additional_model_fields=tool_parameters.get("orchestration_additional_model_fields"),
orchestration_max_tokens=tool_parameters.get("orchestration_max_tokens"),
orchestration_stop_sequences=tool_parameters.get("orchestration_stop_sequences"),
orchestration_temperature=tool_parameters.get("orchestration_temperature"),
orchestration_top_p=tool_parameters.get("orchestration_top_p"),
orchestration_performance_mode=tool_parameters.get("orchestration_performance_mode"),
orchestration_prompt_template=tool_parameters.get("orchestration_prompt_template"),
# Retrieval Configuration
number_of_results=tool_parameters.get("number_of_results", 5),
search_type=tool_parameters.get("search_type", "SEMANTIC"),
metadata_filter=metadata_filter,
# Additional Configuration
session_id=tool_parameters.get("session_id"),
)
return self.create_json_message(response)
except Exception as e:
return self.create_text_message(f"Tool invocation error: {str(e)}")
except Exception as e:
return self.create_text_message(f"Tool execution error: {str(e)}")
return self.create_text_message(f"Tool invocation error: {str(e)}")
def validate_parameters(self, parameters: dict[str, Any]) -> None:
"""Validate the parameters"""
required_params = ["query", "model_arn", "knowledge_base_id"]
for param in required_params:
if not parameters.get(param):
raise ValueError(f"{param} is required")
# Validate required parameters
if not parameters.get("input"):
raise ValueError("input is required")
if not parameters.get("type"):
raise ValueError("type is required")
# Validate metadata filter if provided
if metadata_filter_str := parameters.get("metadata_filter"):
try:
if not isinstance(json.loads(metadata_filter_str), dict):
raise ValueError("metadata_filter must be a valid JSON object")
except json.JSONDecodeError:
raise ValueError("metadata_filter must be a valid JSON string")
# Validate JSON configurations
json_configs = ["knowledge_base_configuration", "external_sources_configuration", "session_configuration"]
for config in json_configs:
if config_value := parameters.get(config):
try:
json.loads(config_value)
except json.JSONDecodeError:
raise ValueError(f"{config} must be a valid JSON string")
# Validate configuration type
config_type = parameters.get("type")
if config_type not in ["KNOWLEDGE_BASE", "EXTERNAL_SOURCES"]:
raise ValueError("type must be either KNOWLEDGE_BASE or EXTERNAL_SOURCES")
# Validate type-specific configuration
if config_type == "KNOWLEDGE_BASE" and not parameters.get("knowledge_base_configuration"):
raise ValueError("knowledge_base_configuration is required when type is KNOWLEDGE_BASE")
elif config_type == "EXTERNAL_SOURCES" and not parameters.get("external_sources_configuration"):
raise ValueError("external_sources_configuration is required when type is EXTERNAL_SOURCES")

View File

@ -8,24 +8,11 @@ identity:
description:
human:
en_US: A tool for retrieving and generating information using Amazon Bedrock Knowledge Base
zh_Hans: 使用Amazon Bedrock知识库进行信息检索和生成的工具
en_US: "This is an advanced usage of Bedrock Retrieve. Please refer to the API documentation for detailed parameters and paste them into the corresponding Knowledge Base Configuration or External Sources Configuration"
zh_Hans: "这个工具为Bedrock Retrieve的高级用法请参考API设置详细的参数并粘贴到对应的知识库配置或者外部源配置"
llm: A tool for retrieving and generating information using Amazon Bedrock Knowledge Base
parameters:
# Additional Configuration
- name: session_id
type: string
required: false
label:
en_US: Session ID
zh_Hans: 会话ID
human_description:
en_US: Optional session ID for continuous conversations
zh_Hans: 用于连续对话的可选会话ID
form: form
# AWS Configuration
- name: aws_region
type: string
required: false
@ -59,300 +46,103 @@ parameters:
zh_Hans: 用于身份验证的AWS秘密访问密钥可选
form: form
# Knowledge Base Configuration
- name: knowledge_base_id
type: string
- name: result_type
type: select
required: true
label:
en_US: Knowledge Base ID
zh_Hans: 知识库ID
en_US: result type
zh_Hans: 结果类型
human_description:
en_US: ID of the Bedrock Knowledge Base
zh_Hans: Bedrock知识库的ID
en_US: return a list of json or texts
zh_Hans: 返回一个列表内容是json还是纯文本
default: text
options:
- value: json
label:
en_US: JSON
zh_Hans: JSON
- value: text
label:
en_US: Text
zh_Hans: 文本
- value: text-with-citations
label:
en_US: Text With Citations
zh_Hans: 文本(包含引用)
form: form
- name: model_arn
- name: input
type: string
required: true
label:
en_US: Model ARN
zh_Hans: 模型ARN
en_US: Input Text
zh_Hans: 输入文本
human_description:
en_US: The ARN of the model to use
zh_Hans: 要使用的模型ARN
form: form
# Retrieval Configuration
- name: query
type: string
required: true
label:
en_US: Query
zh_Hans: 查询
human_description:
en_US: The search query to retrieve information
zh_Hans: 用于检索信息的查询语句
en_US: The text query to retrieve information
zh_Hans: 用于检索信息的文本查询
form: llm
- name: number_of_results
type: number
required: false
label:
en_US: Number of Results
zh_Hans: 结果数量
human_description:
en_US: Number of results to retrieve (1-10)
zh_Hans: 要检索的结果数量1-10
default: 5
min: 1
max: 10
form: form
- name: search_type
- name: type
type: select
required: false
required: true
label:
en_US: Search Type
zh_Hans: 搜索类型
en_US: Configuration Type
zh_Hans: 配置类型
human_description:
en_US: Type of search to perform
zh_Hans: 要执行的搜索类型
default: SEMANTIC
en_US: Type of retrieve and generate configuration
zh_Hans: 检索和生成配置的类型
options:
- value: SEMANTIC
- value: KNOWLEDGE_BASE
label:
en_US: Semantic Search
zh_Hans: 语义搜索
- value: HYBRID
en_US: Knowledge Base
zh_Hans: 知识库
- value: EXTERNAL_SOURCES
label:
en_US: Hybrid Search
zh_Hans: 混合搜索
en_US: External Sources
zh_Hans: 外部源
form: form
- name: metadata_filter
- name: knowledge_base_configuration
type: string
required: false
label:
en_US: Metadata Filter
zh_Hans: 元数据过滤器
en_US: Knowledge Base Configuration
zh_Hans: 知识库配置
human_description:
en_US: JSON formatted filter conditions for metadata, supporting operations like equals, greaterThan, lessThan, etc.
zh_Hans: 元数据的JSON格式过滤条件支持等于、大于、小于等操作
default: "{}"
en_US: Please refer to @https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent-runtime/client/retrieve_and_generate.html#retrieve-and-generate for complete parameters and paste them here
zh_Hans: 请参考 https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent-runtime/client/retrieve_and_generate.html#retrieve-and-generate 配置完整的参数并粘贴到这里
form: form
# Generation Configuration
- name: guardrail_id
- name: external_sources_configuration
type: string
required: false
label:
en_US: Guardrail ID
zh_Hans: 防护栏ID
en_US: External Sources Configuration
zh_Hans: 外部源配置
human_description:
en_US: ID of the guardrail to apply
zh_Hans: 要应用的防护栏ID
en_US: Please refer to https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent-runtime/client/retrieve_and_generate.html#retrieve-and-generate for complete parameters and paste them here
zh_Hans: 请参考 https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent-runtime/client/retrieve_and_generate.html#retrieve-and-generate 配置完整的参数并粘贴到这里
form: form
- name: guardrail_version
- name: session_configuration
type: string
required: false
label:
en_US: Guardrail Version
zh_Hans: 防护栏版本
en_US: Session Configuration
zh_Hans: 会话配置
human_description:
en_US: Version of the guardrail to apply
zh_Hans: 要应用的防护栏版本
en_US: JSON formatted session configuration
zh_Hans: JSON格式的会话配置
default: ""
form: form
- name: max_tokens
type: number
required: false
label:
en_US: Maximum Tokens
zh_Hans: 最大令牌数
human_description:
en_US: Maximum number of tokens to generate
zh_Hans: 生成的最大令牌数
default: 2048
form: form
- name: stop_sequences
- name: session_id
type: string
required: false
label:
en_US: Stop Sequences
zh_Hans: 停止序列
en_US: Session ID
zh_Hans: 会话ID
human_description:
en_US: JSON array of strings that will stop generation when encountered
zh_Hans: JSON数组格式的字符串遇到这些序列时将停止生成
default: "[]"
form: form
- name: temperature
type: number
required: false
label:
en_US: Temperature
zh_Hans: 温度
human_description:
en_US: Controls randomness in the output (0-1)
zh_Hans: 控制输出的随机性0-1
default: 0.7
min: 0
max: 1
form: form
- name: top_p
type: number
required: false
label:
en_US: Top P
zh_Hans: Top P值
human_description:
en_US: Controls diversity via nucleus sampling (0-1)
zh_Hans: 通过核采样控制多样性0-1
default: 0.95
min: 0
max: 1
form: form
- name: performance_mode
type: select
required: false
label:
en_US: Performance Mode
zh_Hans: 性能模式
human_description:
en_US: Select performance optimization mode(performanceConfig.latency)
zh_Hans: 选择性能优化模式(performanceConfig.latency)
default: standard
options:
- value: standard
label:
en_US: Standard
zh_Hans: 标准
- value: optimized
label:
en_US: Optimized
zh_Hans: 优化
form: form
- name: prompt_template
type: string
required: false
label:
en_US: Prompt Template
zh_Hans: 提示模板
human_description:
en_US: Custom prompt template for generation
zh_Hans: 用于生成的自定义提示模板
form: form
- name: additional_model_fields
type: string
required: false
label:
en_US: Additional Model Fields
zh_Hans: 额外模型字段
human_description:
en_US: JSON formatted additional fields for model configuration
zh_Hans: JSON格式的额外模型配置字段
default: "{}"
form: form
# Orchestration Configuration
- name: orchestration_max_tokens
type: number
required: false
label:
en_US: Orchestration Maximum Tokens
zh_Hans: 编排最大令牌数
human_description:
en_US: Maximum number of tokens for orchestration
zh_Hans: 编排过程的最大令牌数
default: 2048
form: form
- name: orchestration_stop_sequences
type: string
required: false
label:
en_US: Orchestration Stop Sequences
zh_Hans: 编排停止序列
human_description:
en_US: JSON array of strings that will stop orchestration when encountered
zh_Hans: JSON数组格式的字符串遇到这些序列时将停止编排
default: "[]"
form: form
- name: orchestration_temperature
type: number
required: false
label:
en_US: Orchestration Temperature
zh_Hans: 编排温度
human_description:
en_US: Controls randomness in the orchestration output (0-1)
zh_Hans: 控制编排输出的随机性0-1
default: 0.7
min: 0
max: 1
form: form
- name: orchestration_top_p
type: number
required: false
label:
en_US: Orchestration Top P
zh_Hans: 编排Top P值
human_description:
en_US: Controls diversity via nucleus sampling in orchestration (0-1)
zh_Hans: 通过核采样控制编排的多样性0-1
default: 0.95
min: 0
max: 1
form: form
- name: orchestration_performance_mode
type: select
required: false
label:
en_US: Orchestration Performance Mode
zh_Hans: 编排性能模式
human_description:
en_US: Select performance optimization mode for orchestration
zh_Hans: 选择编排的性能优化模式
default: standard
options:
- value: standard
label:
en_US: Standard
zh_Hans: 标准
- value: optimized
label:
en_US: Optimized
zh_Hans: 优化
form: form
- name: orchestration_prompt_template
type: string
required: false
label:
en_US: Orchestration Prompt Template
zh_Hans: 编排提示模板
human_description:
en_US: Custom prompt template for orchestration
zh_Hans: 用于编排的自定义提示模板
form: form
- name: orchestration_additional_model_fields
type: string
required: false
label:
en_US: Orchestration Additional Model Fields
zh_Hans: 编排额外模型字段
human_description:
en_US: JSON formatted additional fields for orchestration model configuration
zh_Hans: JSON格式的编排模型额外配置字段
default: "{}"
en_US: Session ID for continuous conversations
zh_Hans: 用于连续对话的会话ID
form: form

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.5 KiB

View File

@ -1,134 +0,0 @@
from typing import Any, Union
from vanna.remote import VannaDefault # type: ignore
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.errors import ToolProviderCredentialValidationError
from core.tools.tool.builtin_tool import BuiltinTool
class VannaTool(BuiltinTool):
def _invoke(
self, user_id: str, tool_parameters: dict[str, Any]
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
"""
invoke tools
"""
# Ensure runtime and credentials
if not self.runtime or not self.runtime.credentials:
raise ToolProviderCredentialValidationError("Tool runtime or credentials are missing")
api_key = self.runtime.credentials.get("api_key", None)
if not api_key:
raise ToolProviderCredentialValidationError("Please input api key")
model = tool_parameters.get("model", "")
if not model:
return self.create_text_message("Please input RAG model")
prompt = tool_parameters.get("prompt", "")
if not prompt:
return self.create_text_message("Please input prompt")
url = tool_parameters.get("url", "")
if not url:
return self.create_text_message("Please input URL/Host/DSN")
db_name = tool_parameters.get("db_name", "")
username = tool_parameters.get("username", "")
password = tool_parameters.get("password", "")
port = tool_parameters.get("port", 0)
base_url = self.runtime.credentials.get("base_url", None)
vn = VannaDefault(model=model, api_key=api_key, config={"endpoint": base_url})
db_type = tool_parameters.get("db_type", "")
if db_type in {"Postgres", "MySQL", "Hive", "ClickHouse"}:
if not db_name:
return self.create_text_message("Please input database name")
if not username:
return self.create_text_message("Please input username")
if port < 1:
return self.create_text_message("Please input port")
schema_sql = "SELECT * FROM INFORMATION_SCHEMA.COLUMNS"
match db_type:
case "SQLite":
schema_sql = "SELECT type, sql FROM sqlite_master WHERE sql is not null"
vn.connect_to_sqlite(url)
case "Postgres":
vn.connect_to_postgres(host=url, dbname=db_name, user=username, password=password, port=port)
case "DuckDB":
vn.connect_to_duckdb(url=url)
case "SQLServer":
vn.connect_to_mssql(url)
case "MySQL":
vn.connect_to_mysql(host=url, dbname=db_name, user=username, password=password, port=port)
case "Oracle":
vn.connect_to_oracle(user=username, password=password, dsn=url)
case "Hive":
vn.connect_to_hive(host=url, dbname=db_name, user=username, password=password, port=port)
case "ClickHouse":
vn.connect_to_clickhouse(host=url, dbname=db_name, user=username, password=password, port=port)
enable_training = tool_parameters.get("enable_training", False)
reset_training_data = tool_parameters.get("reset_training_data", False)
if enable_training:
if reset_training_data:
existing_training_data = vn.get_training_data()
if len(existing_training_data) > 0:
for _, training_data in existing_training_data.iterrows():
vn.remove_training_data(training_data["id"])
ddl = tool_parameters.get("ddl", "")
question = tool_parameters.get("question", "")
sql = tool_parameters.get("sql", "")
memos = tool_parameters.get("memos", "")
training_metadata = tool_parameters.get("training_metadata", False)
if training_metadata:
if db_type == "SQLite":
df_ddl = vn.run_sql(schema_sql)
for ddl in df_ddl["sql"].to_list():
vn.train(ddl=ddl)
else:
df_information_schema = vn.run_sql(schema_sql)
plan = vn.get_training_plan_generic(df_information_schema)
vn.train(plan=plan)
if ddl:
vn.train(ddl=ddl)
if sql:
if question:
vn.train(question=question, sql=sql)
else:
vn.train(sql=sql)
if memos:
vn.train(documentation=memos)
#########################################################################################
# Due to CVE-2024-5565, we have to disable the chart generation feature
# The Vanna library uses a prompt function to present the user with visualized results,
# it is possible to alter the prompt using prompt injection and run arbitrary Python code
# instead of the intended visualization code.
# Specifically - allowing external input to the librarys “ask” method
# with "visualize" set to True (default behavior) leads to remote code execution.
# Affected versions: <= 0.5.5
#########################################################################################
allow_llm_to_see_data = tool_parameters.get("allow_llm_to_see_data", False)
res = vn.ask(
prompt, print_results=False, auto_train=True, visualize=False, allow_llm_to_see_data=allow_llm_to_see_data
)
result = []
if res is not None:
result.append(self.create_text_message(res[0]))
if len(res) > 1 and res[1] is not None:
result.append(self.create_text_message(res[1].to_markdown()))
if len(res) > 2 and res[2] is not None:
result.append(
self.create_blob_message(blob=res[2].to_image(format="svg"), meta={"mime_type": "image/svg+xml"})
)
return result

View File

@ -1,213 +0,0 @@
identity:
name: vanna
author: QCTC
label:
en_US: Vanna.AI
zh_Hans: Vanna.AI
description:
human:
en_US: The fastest way to get actionable insights from your database just by asking questions.
zh_Hans: 一个基于大模型和RAG的Text2SQL工具。
llm: A tool for converting text to SQL.
parameters:
- name: prompt
type: string
required: true
label:
en_US: Prompt
zh_Hans: 提示词
pt_BR: Prompt
human_description:
en_US: used for generating SQL
zh_Hans: 用于生成SQL
llm_description: key words for generating SQL
form: llm
- name: model
type: string
required: true
label:
en_US: RAG Model
zh_Hans: RAG模型
human_description:
en_US: RAG Model for your database DDL
zh_Hans: 存储数据库训练数据的RAG模型
llm_description: RAG Model for generating SQL
form: llm
- name: db_type
type: select
required: true
options:
- value: SQLite
label:
en_US: SQLite
zh_Hans: SQLite
- value: Postgres
label:
en_US: Postgres
zh_Hans: Postgres
- value: DuckDB
label:
en_US: DuckDB
zh_Hans: DuckDB
- value: SQLServer
label:
en_US: Microsoft SQL Server
zh_Hans: 微软 SQL Server
- value: MySQL
label:
en_US: MySQL
zh_Hans: MySQL
- value: Oracle
label:
en_US: Oracle
zh_Hans: Oracle
- value: Hive
label:
en_US: Hive
zh_Hans: Hive
- value: ClickHouse
label:
en_US: ClickHouse
zh_Hans: ClickHouse
default: SQLite
label:
en_US: DB Type
zh_Hans: 数据库类型
human_description:
en_US: Database type.
zh_Hans: 选择要链接的数据库类型。
form: form
- name: url
type: string
required: true
label:
en_US: URL/Host/DSN
zh_Hans: URL/Host/DSN
human_description:
en_US: Please input depending on DB type, visit https://vanna.ai/docs/ for more specification
zh_Hans: 请根据数据库类型填入对应值详情参考https://vanna.ai/docs/
form: form
- name: db_name
type: string
required: false
label:
en_US: DB name
zh_Hans: 数据库名
human_description:
en_US: Database name
zh_Hans: 数据库名
form: form
- name: username
type: string
required: false
label:
en_US: Username
zh_Hans: 用户名
human_description:
en_US: Username
zh_Hans: 用户名
form: form
- name: password
type: secret-input
required: false
label:
en_US: Password
zh_Hans: 密码
human_description:
en_US: Password
zh_Hans: 密码
form: form
- name: port
type: number
required: false
label:
en_US: Port
zh_Hans: 端口
human_description:
en_US: Port
zh_Hans: 端口
form: form
- name: ddl
type: string
required: false
label:
en_US: Training DDL
zh_Hans: 训练DDL
human_description:
en_US: DDL statements for training data
zh_Hans: 用于训练RAG Model的建表语句
form: llm
- name: question
type: string
required: false
label:
en_US: Training Question
zh_Hans: 训练问题
human_description:
en_US: Question-SQL Pairs
zh_Hans: Question-SQL中的问题
form: llm
- name: sql
type: string
required: false
label:
en_US: Training SQL
zh_Hans: 训练SQL
human_description:
en_US: SQL queries to your training data
zh_Hans: 用于训练RAG Model的SQL语句
form: llm
- name: memos
type: string
required: false
label:
en_US: Training Memos
zh_Hans: 训练说明
human_description:
en_US: Sometimes you may want to add documentation about your business terminology or definitions
zh_Hans: 添加更多关于数据库的业务说明
form: llm
- name: enable_training
type: boolean
required: false
default: false
label:
en_US: Training Data
zh_Hans: 训练数据
human_description:
en_US: You only need to train once. Do not train again unless you want to add more training data
zh_Hans: 训练数据无更新时,训练一次即可
form: form
- name: reset_training_data
type: boolean
required: false
default: false
label:
en_US: Reset Training Data
zh_Hans: 重置训练数据
human_description:
en_US: Remove all training data in the current RAG Model
zh_Hans: 删除当前RAG Model中的所有训练数据
form: form
- name: training_metadata
type: boolean
required: false
default: false
label:
en_US: Training Metadata
zh_Hans: 训练元数据
human_description:
en_US: If enabled, it will attempt to train on the metadata of that database
zh_Hans: 是否自动从数据库获取元数据来训练
form: form
- name: allow_llm_to_see_data
type: boolean
required: false
default: false
label:
en_US: Whether to allow the LLM to see the data
zh_Hans: 是否允许LLM查看数据
human_description:
en_US: Whether to allow the LLM to see the data
zh_Hans: 是否允许LLM查看数据
form: form

View File

@ -1,46 +0,0 @@
import re
from typing import Any
from urllib.parse import urlparse
from core.tools.errors import ToolProviderCredentialValidationError
from core.tools.provider.builtin.vanna.tools.vanna import VannaTool
from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
class VannaProvider(BuiltinToolProviderController):
def _get_protocol_and_main_domain(self, url):
parsed_url = urlparse(url)
protocol = parsed_url.scheme
hostname = parsed_url.hostname
port = f":{parsed_url.port}" if parsed_url.port else ""
# Check if the hostname is an IP address
is_ip = re.match(r"^\d{1,3}(\.\d{1,3}){3}$", hostname) is not None
# Return the full hostname (with port if present) for IP addresses, otherwise return the main domain
main_domain = f"{hostname}{port}" if is_ip else ".".join(hostname.split(".")[-2:]) + port
return f"{protocol}://{main_domain}"
def _validate_credentials(self, credentials: dict[str, Any]) -> None:
base_url = credentials.get("base_url")
if not base_url:
base_url = "https://ask.vanna.ai/rpc"
else:
base_url = base_url.removesuffix("/")
credentials["base_url"] = base_url
try:
VannaTool().fork_tool_runtime(
runtime={
"credentials": credentials,
}
).invoke(
user_id="",
tool_parameters={
"model": "chinook",
"db_type": "SQLite",
"url": f"{self._get_protocol_and_main_domain(credentials['base_url'])}/Chinook.sqlite",
"query": "What are the top 10 customers by sales?",
},
)
except Exception as e:
raise ToolProviderCredentialValidationError(str(e))

View File

@ -1,35 +0,0 @@
identity:
author: QCTC
name: vanna
label:
en_US: Vanna.AI
zh_Hans: Vanna.AI
description:
en_US: The fastest way to get actionable insights from your database just by asking questions.
zh_Hans: 一个基于大模型和RAG的Text2SQL工具。
icon: icon.png
tags:
- utilities
- productivity
credentials_for_provider:
api_key:
type: secret-input
required: true
label:
en_US: API key
zh_Hans: API key
placeholder:
en_US: Please input your API key
zh_Hans: 请输入你的 API key
pt_BR: Please input your API key
help:
en_US: Get your API key from Vanna.AI
zh_Hans: 从 Vanna.AI 获取你的 API key
url: https://vanna.ai/account/profile
base_url:
type: text-input
required: false
label:
en_US: Vanna.AI Endpoint Base URL
placeholder:
en_US: https://ask.vanna.ai/rpc

View File

@ -590,6 +590,8 @@ class Graph(BaseModel):
start_node_id=node_id,
routes_node_ids=routes_node_ids,
)
# Exclude conditional branch nodes
and all(edge.run_condition is None for edge in reverse_edge_mapping.get(node_id, []))
):
if node_id not in merge_branch_node_ids:
merge_branch_node_ids[node_id] = []

View File

@ -3,7 +3,7 @@ from typing import Any, Optional
from pydantic import BaseModel, Field, field_validator
from core.model_runtime.entities import ImagePromptMessageContent
from core.model_runtime.entities import ImagePromptMessageContent, LLMMode
from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfig
from core.workflow.entities.variable_entities import VariableSelector
from core.workflow.nodes.base import BaseNodeData
@ -12,7 +12,7 @@ from core.workflow.nodes.base import BaseNodeData
class ModelConfig(BaseModel):
provider: str
name: str
mode: str
mode: LLMMode
completion_params: dict[str, Any] = {}

View File

@ -3,6 +3,7 @@ import logging
from collections.abc import Generator, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Optional, cast
from configs import dify_config
from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
from core.entities.model_entities import ModelStatus
from core.entities.provider_entities import QuotaUnit
@ -732,10 +733,7 @@ class LLMNode(BaseNode[LLMNodeData]):
if quota_unit == QuotaUnit.TOKENS:
used_quota = usage.total_tokens
elif quota_unit == QuotaUnit.CREDITS:
used_quota = 1
if "gpt-4" in model_instance.model:
used_quota = 20
used_quota = dify_config.get_model_credits(model_instance.model)
else:
used_quota = 1

View File

@ -1,3 +1,4 @@
from configs import dify_config
from core.app.entities.app_invoke_entities import AgentChatAppGenerateEntity, ChatAppGenerateEntity
from core.entities.provider_entities import QuotaUnit
from events.message_event import message_was_created
@ -37,10 +38,7 @@ def handle(sender, **kwargs):
if quota_unit == QuotaUnit.TOKENS:
used_quota = message.message_tokens + message.answer_tokens
elif quota_unit == QuotaUnit.CREDITS:
used_quota = 1
if "gpt-4" in model_config.model:
used_quota = 20
used_quota = dify_config.get_model_credits(model_config.model)
else:
used_quota = 1

View File

@ -1,6 +1,8 @@
from collections.abc import Generator
from datetime import UTC, datetime, timedelta
from typing import Optional
from azure.identity import ChainedTokenCredential, DefaultAzureCredential
from azure.storage.blob import AccountSasPermissions, BlobServiceClient, ResourceTypes, generate_account_sas
from configs import dify_config
@ -18,6 +20,12 @@ class AzureBlobStorage(BaseStorage):
self.account_name = dify_config.AZURE_BLOB_ACCOUNT_NAME
self.account_key = dify_config.AZURE_BLOB_ACCOUNT_KEY
self.credential: Optional[ChainedTokenCredential] = None
if self.account_key == "managedidentity":
self.credential = DefaultAzureCredential()
else:
self.credential = None
def save(self, filename, data):
client = self._sync_client()
blob_container = client.get_container_client(container=self.bucket_name)
@ -57,6 +65,9 @@ class AzureBlobStorage(BaseStorage):
blob_container.delete_blob(filename)
def _sync_client(self):
if self.account_key == "managedidentity":
return BlobServiceClient(account_url=self.account_url, credential=self.credential) # type: ignore
cache_key = "azure_blob_sas_token_{}_{}".format(self.account_name, self.account_key)
cache_result = redis_client.get(cache_key)
if cache_result is not None:

33
api/poetry.lock generated
View File

@ -627,15 +627,15 @@ cryptography = "*"
[[package]]
name = "azure-ai-inference"
version = "1.0.0b6"
version = "1.0.0b8"
description = "Microsoft Azure AI Inference Client Library for Python"
optional = false
python-versions = ">=3.8"
groups = ["main"]
markers = "python_version == \"3.11\" or python_version >= \"3.12\""
files = [
{file = "azure_ai_inference-1.0.0b6-py3-none-any.whl", hash = "sha256:5699ad78d70ec2d227a5eff2c1bafc845018f6624edc5b03589dfff861c54958"},
{file = "azure_ai_inference-1.0.0b6.tar.gz", hash = "sha256:b8ac941de1e69151bad464191e18856d4e74f962ae03235da137a9a326143676"},
{file = "azure_ai_inference-1.0.0b8-py3-none-any.whl", hash = "sha256:9bfcfe6ef5b1699fed6c70058027c253bcbc88f4730e7409fbfc675636ec05e4"},
{file = "azure_ai_inference-1.0.0b8.tar.gz", hash = "sha256:b7bcaaac5f53f2be06804ac6c755be9583ac6ba99df533a3970da081838b4cc1"},
]
[package.dependencies]
@ -931,19 +931,19 @@ files = [
[[package]]
name = "boto3"
version = "1.36.4"
version = "1.36.12"
description = "The AWS SDK for Python"
optional = false
python-versions = ">=3.8"
groups = ["main"]
markers = "python_version == \"3.11\" or python_version >= \"3.12\""
files = [
{file = "boto3-1.36.4-py3-none-any.whl", hash = "sha256:9f8f699e75ec63fcc98c4dd7290997c7c06c68d3ac8161ad4735fe71f5fe945c"},
{file = "boto3-1.36.4.tar.gz", hash = "sha256:eeceeb74ef8b65634d358c27aa074917f4449dc828f79301f1075232618eb502"},
{file = "boto3-1.36.12-py3-none-any.whl", hash = "sha256:32cdf0967287f3ec25a9dc09df0d29cb86b8900c3e0546a63d672775d8127abf"},
{file = "boto3-1.36.12.tar.gz", hash = "sha256:287d84f49bba3255a17b374578127d42b6251e72f55914a62e0ad9ca78c0954b"},
]
[package.dependencies]
botocore = ">=1.36.4,<1.37.0"
botocore = ">=1.36.12,<1.37.0"
jmespath = ">=0.7.1,<2.0.0"
s3transfer = ">=0.11.0,<0.12.0"
@ -952,15 +952,15 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
[[package]]
name = "botocore"
version = "1.36.5"
version = "1.36.12"
description = "Low-level, data-driven core of boto 3."
optional = false
python-versions = ">=3.8"
groups = ["main"]
markers = "python_version == \"3.11\" or python_version >= \"3.12\""
files = [
{file = "botocore-1.36.5-py3-none-any.whl", hash = "sha256:6d9f70afa9bf9d21407089dc22b8cc8ec6fa44866d4660858c062c74fc8555eb"},
{file = "botocore-1.36.5.tar.gz", hash = "sha256:234ed3d29a8954c37a551c933453bf14c6ae44a69a4f273ffef377a2612ca6a6"},
{file = "botocore-1.36.12-py3-none-any.whl", hash = "sha256:5ae1ed362c8ed908a6ced8cdd12b21e2196c100bc79f9e95c9c1fc7f9ea74f5a"},
{file = "botocore-1.36.12.tar.gz", hash = "sha256:86ed88beb4f244c96529435c868d3940073c2774116f0023fb7691f6e7053bd9"},
]
[package.dependencies]
@ -969,7 +969,7 @@ python-dateutil = ">=2.1,<3.0.0"
urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}
[package.extras]
crt = ["awscrt (==0.23.4)"]
crt = ["awscrt (==0.23.8)"]
[[package]]
name = "bottleneck"
@ -6522,15 +6522,15 @@ sympy = "*"
[[package]]
name = "openai"
version = "1.52.2"
version = "1.61.0"
description = "The official Python library for the openai API"
optional = false
python-versions = ">=3.7.1"
python-versions = ">=3.8"
groups = ["main"]
markers = "python_version == \"3.11\" or python_version >= \"3.12\""
files = [
{file = "openai-1.52.2-py3-none-any.whl", hash = "sha256:57e9e37bc407f39bb6ec3a27d7e8fb9728b2779936daa1fcf95df17d3edfaccc"},
{file = "openai-1.52.2.tar.gz", hash = "sha256:87b7d0f69d85f5641678d414b7ee3082363647a5c66a462ed7f3ccb59582da0d"},
{file = "openai-1.61.0-py3-none-any.whl", hash = "sha256:e8c512c0743accbdbe77f3429a1490d862f8352045de8dc81969301eb4a4f666"},
{file = "openai-1.61.0.tar.gz", hash = "sha256:216f325a24ed8578e929b0f1b3fb2052165f3b04b0461818adaa51aa29c71f8a"},
]
[package.dependencies]
@ -6545,6 +6545,7 @@ typing-extensions = ">=4.11,<5"
[package.extras]
datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
realtime = ["websockets (>=13,<15)"]
[[package]]
name = "opencensus"
@ -12388,4 +12389,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.11,<3.13"
content-hash = "6243573a26b9aa03558eb2c176d2477a08b1033a17065e870e4be83af0af644d"
content-hash = "d197cdff507a70323c1d6aca11609188f54970f67715af744fe6def15b7776fd"

View File

@ -17,11 +17,11 @@ package-mode = false
[tool.poetry.dependencies]
anthropic = "~0.23.1"
authlib = "1.3.1"
azure-ai-inference = "~1.0.0b3"
azure-ai-inference = "~1.0.0b8"
azure-ai-ml = "~1.20.0"
azure-identity = "1.16.1"
beautifulsoup4 = "4.12.2"
boto3 = "1.36.4"
boto3 = "1.36.12"
bs4 = "~0.0.1"
cachetools = "~5.3.0"
celery = "~5.4.0"
@ -58,7 +58,7 @@ nomic = "~3.1.2"
novita-client = "~0.5.7"
numpy = "~1.26.4"
oci = "~2.135.1"
openai = "~1.52.0"
openai = "~1.61.0"
openpyxl = "~3.1.5"
opik = "~1.3.4"
pandas = { version = "~2.2.2", extras = ["performance", "excel"] }

View File

@ -0,0 +1,26 @@
from typing import Dict, List
from pydantic import BaseModel
from tasks.mail_enterprise_task import send_enterprise_email_task
class DifyMail(BaseModel):
to: List[str]
subject: str
body: str
substitutions: Dict[str, str] = {}
class EnterpriseMailService:
@classmethod
def send_mail(cls, mail: DifyMail):
send_enterprise_email_task.delay(
to=mail.to,
subject=mail.subject,
body=mail.body,
substitutions=mail.substitutions
)

View File

@ -36,6 +36,14 @@ class LicenseModel(BaseModel):
expired_at: str = ""
class BrandingModel(BaseModel):
enabled: bool = False
application_title: str = ""
login_page_logo: str = ""
workspace_logo: str = ""
favicon: str = ""
class FeatureModel(BaseModel):
billing: BillingModel = BillingModel()
members: LimitationModel = LimitationModel(size=0, limit=1)
@ -47,6 +55,7 @@ class FeatureModel(BaseModel):
can_replace_logo: bool = False
model_load_balancing_enabled: bool = False
dataset_operator_enabled: bool = False
webapp_copyright_enabled: bool = False
# pydantic configs
model_config = ConfigDict(protected_namespaces=())
@ -65,6 +74,7 @@ class SystemFeatureModel(BaseModel):
is_allow_create_workspace: bool = False
is_email_setup: bool = False
license: LicenseModel = LicenseModel()
branding: BrandingModel = BrandingModel()
class FeatureService:
@ -77,6 +87,9 @@ class FeatureService:
if dify_config.BILLING_ENABLED and tenant_id:
cls._fulfill_params_from_billing_api(features, tenant_id)
if dify_config.ENTERPRISE_ENABLED:
features.webapp_copyright_enabled = True
return features
@classmethod
@ -87,7 +100,7 @@ class FeatureService:
if dify_config.ENTERPRISE_ENABLED:
system_features.enable_web_sso_switch_component = True
system_features.branding.enabled = True
cls._fulfill_params_from_enterprise(system_features)
return system_features
@ -115,6 +128,9 @@ class FeatureService:
features.billing.subscription.plan = billing_info["subscription"]["plan"]
features.billing.subscription.interval = billing_info["subscription"]["interval"]
if features.billing.subscription.plan != "sandbox":
features.webapp_copyright_enabled = True
if "members" in billing_info:
features.members.size = billing_info["members"]["size"]
features.members.limit = billing_info["members"]["limit"]
@ -148,35 +164,41 @@ class FeatureService:
def _fulfill_params_from_enterprise(cls, features):
enterprise_info = EnterpriseService.get_info()
if "sso_enforced_for_signin" in enterprise_info:
features.sso_enforced_for_signin = enterprise_info["sso_enforced_for_signin"]
if "SSOEnforcedForSignin" in enterprise_info:
features.sso_enforced_for_signin = enterprise_info["SSOEnforcedForSignin"]
if "sso_enforced_for_signin_protocol" in enterprise_info:
features.sso_enforced_for_signin_protocol = enterprise_info["sso_enforced_for_signin_protocol"]
if "SSOEnforcedForSigninProtocol" in enterprise_info:
features.sso_enforced_for_signin_protocol = enterprise_info["SSOEnforcedForSigninProtocol"]
if "sso_enforced_for_web" in enterprise_info:
features.sso_enforced_for_web = enterprise_info["sso_enforced_for_web"]
if "SSOEnforcedForWeb" in enterprise_info:
features.sso_enforced_for_web = enterprise_info["SSOEnforcedForWeb"]
if "sso_enforced_for_web_protocol" in enterprise_info:
features.sso_enforced_for_web_protocol = enterprise_info["sso_enforced_for_web_protocol"]
if "SSOEnforcedForWebProtocol" in enterprise_info:
features.sso_enforced_for_web_protocol = enterprise_info["SSOEnforcedForWebProtocol"]
if "enable_email_code_login" in enterprise_info:
features.enable_email_code_login = enterprise_info["enable_email_code_login"]
if "EnableEmailCodeLogin" in enterprise_info:
features.enable_email_code_login = enterprise_info["EnableEmailCodeLogin"]
if "enable_email_password_login" in enterprise_info:
features.enable_email_password_login = enterprise_info["enable_email_password_login"]
if "EnableEmailPasswordLogin" in enterprise_info:
features.enable_email_password_login = enterprise_info["EnableEmailPasswordLogin"]
if "is_allow_register" in enterprise_info:
features.is_allow_register = enterprise_info["is_allow_register"]
if "IsAllowRegister" in enterprise_info:
features.is_allow_register = enterprise_info["IsAllowRegister"]
if "is_allow_create_workspace" in enterprise_info:
features.is_allow_create_workspace = enterprise_info["is_allow_create_workspace"]
if "IsAllowCreateWorkspace" in enterprise_info:
features.is_allow_create_workspace = enterprise_info["IsAllowCreateWorkspace"]
if "license" in enterprise_info:
license_info = enterprise_info["license"]
if "Branding" in enterprise_info:
features.branding.application_title = enterprise_info["Branding"].get("applicationTitle", "")
features.branding.login_page_logo = enterprise_info["Branding"].get("loginPageLogo", "")
features.branding.workspace_logo = enterprise_info["Branding"].get("workspaceLogo", "")
features.branding.favicon = enterprise_info["Branding"].get("favicon", "")
if "License" in enterprise_info:
license_info = enterprise_info["License"]
if "status" in license_info:
features.license.status = LicenseStatus(license_info.get("status", LicenseStatus.INACTIVE))
if "expired_at" in license_info:
features.license.expired_at = license_info["expired_at"]
if "expiredAt" in license_info:
features.license.expired_at = license_info["expiredAt"]

View File

@ -6,6 +6,7 @@ from celery import shared_task # type: ignore
from flask import render_template
from extensions.ext_mail import mail
from services.feature_service import FeatureService
@shared_task(queue="mail")
@ -25,10 +26,24 @@ def send_email_code_login_mail_task(language: str, to: str, code: str):
# send email code login mail using different languages
try:
if language == "zh-Hans":
html_content = render_template("email_code_login_mail_template_zh-CN.html", to=to, code=code)
template = "email_code_login_mail_template_zh-CN.html"
system_features = FeatureService.get_system_features()
if system_features.branding.enabled:
application_title = system_features.branding.application_title
template = "without-brand/email_code_login_mail_template_zh-CN.html"
html_content = render_template(template, to=to, code=code, application_title=application_title)
else:
html_content = render_template(template, to=to, code=code)
mail.send(to=to, subject="邮箱验证码", html=html_content)
else:
html_content = render_template("email_code_login_mail_template_en-US.html", to=to, code=code)
template = "email_code_login_mail_template_en-US.html"
system_features = FeatureService.get_system_features()
if system_features.branding.enabled:
application_title = system_features.branding.application_title
template = "without-brand/email_code_login_mail_template_en-US.html"
html_content = render_template(template, to=to, code=code, application_title=application_title)
else:
html_content = render_template(template, to=to, code=code)
mail.send(to=to, subject="Email Code", html=html_content)
end_at = time.perf_counter()

View File

@ -0,0 +1,37 @@
import logging
import time
import click
from celery import shared_task # type: ignore
from flask import render_template_string
from extensions.ext_mail import mail
@shared_task(queue="mail")
def send_enterprise_email_task(to, subject, body, substitutions):
if not mail.is_inited():
return
logging.info(
click.style("Start enterprise mail to {} with subject {}".format(to, subject), fg="green")
)
start_at = time.perf_counter()
try:
html_content = render_template_string(body, **substitutions)
if isinstance(to, list):
for t in to:
mail.send(to=t, subject=subject, html=html_content)
else:
mail.send(to=to, subject=subject, html=html_content)
end_at = time.perf_counter()
logging.info(
click.style(
"Send enterprise mail to {} succeeded: latency: {}".format(to, end_at - start_at), fg="green"
)
)
except Exception:
logging.exception("Send enterprise mail to {} failed".format(to))

View File

@ -7,6 +7,7 @@ from flask import render_template
from configs import dify_config
from extensions.ext_mail import mail
from services.feature_service import FeatureService
@shared_task(queue="mail")
@ -33,23 +34,45 @@ def send_invite_member_mail_task(language: str, to: str, token: str, inviter_nam
try:
url = f"{dify_config.CONSOLE_WEB_URL}/activate?token={token}"
if language == "zh-Hans":
html_content = render_template(
"invite_member_mail_template_zh-CN.html",
to=to,
inviter_name=inviter_name,
workspace_name=workspace_name,
url=url,
)
mail.send(to=to, subject="立即加入 Dify 工作空间", html=html_content)
template = "invite_member_mail_template_zh-CN.html"
system_features = FeatureService.get_system_features()
if system_features.branding.enabled:
application_title = system_features.branding.application_title
template = "without-brand/invite_member_mail_template_zh-CN.html"
html_content = render_template(
template,
to=to,
inviter_name=inviter_name,
workspace_name=workspace_name,
url=url,
application_title=application_title,
)
mail.send(to=to, subject=f"立即加入 {application_title} 工作空间", html=html_content)
else:
html_content = render_template(
template, to=to, inviter_name=inviter_name, workspace_name=workspace_name, url=url
)
mail.send(to=to, subject="立即加入 Dify 工作空间", html=html_content)
else:
html_content = render_template(
"invite_member_mail_template_en-US.html",
to=to,
inviter_name=inviter_name,
workspace_name=workspace_name,
url=url,
)
mail.send(to=to, subject="Join Dify Workspace Now", html=html_content)
template = "invite_member_mail_template_en-US.html"
system_features = FeatureService.get_system_features()
if system_features.branding.enabled:
application_title = system_features.branding.application_title
template = "without-brand/invite_member_mail_template_en-US.html"
html_content = render_template(
template,
to=to,
inviter_name=inviter_name,
workspace_name=workspace_name,
url=url,
application_title=application_title,
)
mail.send(to=to, subject=f"Join {application_title} Workspace Now", html=html_content)
else:
html_content = render_template(
template, to=to, inviter_name=inviter_name, workspace_name=workspace_name, url=url
)
mail.send(to=to, subject="Join Dify Workspace Now", html=html_content)
end_at = time.perf_counter()
logging.info(

Some files were not shown because too many files have changed in this diff Show More