mirror of
https://github.com/langgenius/dify.git
synced 2026-02-08 12:35:27 +08:00
Compare commits
106 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1387f9b23e | |||
| 6817eab5f1 | |||
| 218f591a5d | |||
| 17af0de7b6 | |||
| 9d962053a2 | |||
| 59909b5ca7 | |||
| a6cd0f0e73 | |||
| 2c43393bf1 | |||
| 669c8c3cca | |||
| b0b0cc045f | |||
| 20d16d7b31 | |||
| 714722bb2d | |||
| 830495a607 | |||
| 41a4593b6d | |||
| 08b727833e | |||
| c8b82b9d08 | |||
| 5becb4c43a | |||
| 13694293e3 | |||
| 815beac356 | |||
| 5e60204832 | |||
| d2624b13a0 | |||
| 61f5de9662 | |||
| 40dbf30784 | |||
| afd77c4745 | |||
| d70bd4aaa4 | |||
| 8e05261588 | |||
| a676d4387c | |||
| 08a5afcf9f | |||
| eeaa3c1643 | |||
| 7c8c233cf4 | |||
| 129a9850eb | |||
| 1f98a4fff3 | |||
| 58e4702b14 | |||
| c60749678b | |||
| d5214e4644 | |||
| 52804ca6d1 | |||
| 4fb9606361 | |||
| c534d95972 | |||
| 46ccfda493 | |||
| 6dc62334d6 | |||
| c7d003d551 | |||
| cc754122fc | |||
| 240a94182e | |||
| 16af509c46 | |||
| 86e474fff1 | |||
| 9a3d5729bb | |||
| 5a1c29fd8c | |||
| 180775a0ec | |||
| d018e279f8 | |||
| 11636bc7c7 | |||
| 518c1ceb94 | |||
| 696efe494e | |||
| 4419d357c4 | |||
| fbbba6db92 | |||
| 53d428907b | |||
| 8133ba16b1 | |||
| e9aa0e89d3 | |||
| 7e3c59e53e | |||
| f6314f8e73 | |||
| 3bcfd84fba | |||
| 7c0ae76cd0 | |||
| 2dee8a25d5 | |||
| 507aa6d949 | |||
| 59f173f2e6 | |||
| c3790c239c | |||
| 45e51e7730 | |||
| 4834eae887 | |||
| 01108e6172 | |||
| 95b74c211d | |||
| cb79a90031 | |||
| 4502436c47 | |||
| c3d0cf940c | |||
| e7343cc67c | |||
| 83145486b0 | |||
| 6fd1795d25 | |||
| f770232b63 | |||
| a8e694c235 | |||
| 15a6d94953 | |||
| 056331981e | |||
| cef16862da | |||
| 8a4015722d | |||
| 156345cb4b | |||
| f29280ba5c | |||
| 742be06ea9 | |||
| af98954fc1 | |||
| 4d63770189 | |||
| bbea3a6b84 | |||
| 19d3a56194 | |||
| 5cab2b711f | |||
| 1e5455e266 | |||
| 4fe585acc2 | |||
| e52448b84b | |||
| 1f92b55f58 | |||
| 8b15b742ad | |||
| 849dc0560b | |||
| a026c5fd08 | |||
| fd7aade26b | |||
| 510f8ede10 | |||
| 8f9125b08a | |||
| e5e97c0a0a | |||
| 870ca713df | |||
| 6854a3fd26 | |||
| 620360d41a | |||
| 20bd49285b | |||
| 6bd2730317 | |||
| f734cca337 |
2
.github/pull_request_template.md
vendored
2
.github/pull_request_template.md
vendored
@ -12,6 +12,8 @@ Please delete options that are not relevant.
|
||||
- [ ] New feature (non-breaking change which adds functionality)
|
||||
- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
|
||||
- [ ] This change requires a documentation update, included: [Dify Document](https://github.com/langgenius/dify-docs)
|
||||
- [ ] Improvement,including but not limited to code refactoring, performance optimization, and UI/UX improvement
|
||||
- [ ] Dependency upgrade
|
||||
|
||||
# How Has This Been Tested?
|
||||
|
||||
|
||||
@ -1,17 +1,32 @@
|
||||
name: Build and Push API Image
|
||||
name: Build and Push API & Web
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'deploy/dev'
|
||||
- "main"
|
||||
- "deploy/dev"
|
||||
release:
|
||||
types: [ published ]
|
||||
types: [published]
|
||||
|
||||
env:
|
||||
DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }}
|
||||
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
DIFY_WEB_IMAGE_NAME: ${{ vars.DIFY_WEB_IMAGE_NAME || 'langgenius/dify-web' }}
|
||||
DIFY_API_IMAGE_NAME: ${{ vars.DIFY_API_IMAGE_NAME || 'langgenius/dify-api' }}
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.pull_request.draft == false
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- service_name: "web"
|
||||
image_name_env: "DIFY_WEB_IMAGE_NAME"
|
||||
context: "web"
|
||||
- service_name: "api"
|
||||
image_name_env: "DIFY_API_IMAGE_NAME"
|
||||
context: "api"
|
||||
steps:
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
@ -22,14 +37,14 @@ jobs:
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
username: ${{ env.DOCKERHUB_USER }}
|
||||
password: ${{ env.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: langgenius/dify-api
|
||||
images: ${{ env[matrix.image_name_env] }}
|
||||
tags: |
|
||||
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
|
||||
type=ref,event=branch
|
||||
@ -39,22 +54,11 @@ jobs:
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: "{{defaultContext}}:api"
|
||||
context: "{{defaultContext}}:${{ matrix.context }}"
|
||||
platforms: ${{ startsWith(github.ref, 'refs/tags/') && 'linux/amd64,linux/arm64' || 'linux/amd64' }}
|
||||
build-args: |
|
||||
COMMIT_SHA=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.revision'] }}
|
||||
build-args: COMMIT_SHA=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.revision'] }}
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Deploy to server
|
||||
if: github.ref == 'refs/heads/deploy/dev'
|
||||
uses: appleboy/ssh-action@v0.1.8
|
||||
with:
|
||||
host: ${{ secrets.SSH_HOST }}
|
||||
username: ${{ secrets.SSH_USER }}
|
||||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
script: |
|
||||
${{ secrets.SSH_SCRIPT }}
|
||||
60
.github/workflows/build-web-image.yml
vendored
60
.github/workflows/build-web-image.yml
vendored
@ -1,60 +0,0 @@
|
||||
name: Build and Push WEB Image
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'deploy/dev'
|
||||
release:
|
||||
types: [ published ]
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.pull_request.draft == false
|
||||
steps:
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: langgenius/dify-web
|
||||
tags: |
|
||||
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
|
||||
type=ref,event=branch
|
||||
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
|
||||
type=raw,value=${{ github.ref_name }},enable=${{ startsWith(github.ref, 'refs/tags/') }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: "{{defaultContext}}:web"
|
||||
platforms: ${{ startsWith(github.ref, 'refs/tags/') && 'linux/amd64,linux/arm64' || 'linux/amd64' }}
|
||||
build-args: |
|
||||
COMMIT_SHA=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.revision'] }}
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Deploy to server
|
||||
if: github.ref == 'refs/heads/deploy/dev'
|
||||
uses: appleboy/ssh-action@v0.1.8
|
||||
with:
|
||||
host: ${{ secrets.SSH_HOST }}
|
||||
username: ${{ secrets.SSH_USER }}
|
||||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
script: |
|
||||
${{ secrets.SSH_SCRIPT }}
|
||||
24
.github/workflows/deploy-dev.yml
vendored
Normal file
24
.github/workflows/deploy-dev.yml
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
name: Deploy Dev
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ["Build and Push API & Web"]
|
||||
branches:
|
||||
- "deploy/dev"
|
||||
types:
|
||||
- completed
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
if: |
|
||||
github.event.workflow_run.conclusion == 'success'
|
||||
steps:
|
||||
- name: Deploy to server
|
||||
uses: appleboy/ssh-action@v0.1.8
|
||||
with:
|
||||
host: ${{ secrets.SSH_HOST }}
|
||||
username: ${{ secrets.SSH_USER }}
|
||||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
script: |
|
||||
${{ vars.SSH_SCRIPT || secrets.SSH_SCRIPT }}
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@ -154,4 +154,5 @@ sdks/python-client/dist
|
||||
sdks/python-client/dify_client.egg-info
|
||||
|
||||
.vscode/*
|
||||
!.vscode/launch.json
|
||||
!.vscode/launch.json
|
||||
pyrightconfig.json
|
||||
|
||||
@ -155,4 +155,4 @@ And that's it! Once your PR is merged, you will be featured as a contributor in
|
||||
|
||||
## Getting Help
|
||||
|
||||
If you ever get stuck or got a burning question while contributing, simply shoot your queries our way via the related GitHub issue, or hop onto our [Discord](https://discord.gg/AhzKf7dNgk) for a quick chat.
|
||||
If you ever get stuck or got a burning question while contributing, simply shoot your queries our way via the related GitHub issue, or hop onto our [Discord](https://discord.gg/8Tpq4AcN9c) for a quick chat.
|
||||
|
||||
@ -152,4 +152,4 @@ Dify的后端使用Python编写,使用[Flask](https://flask.palletsprojects.co
|
||||
|
||||
## 获取帮助
|
||||
|
||||
如果你在贡献过程中遇到困难或者有任何问题,可以通过相关的 GitHub 问题提出你的疑问,或者加入我们的 [Discord](https://discord.gg/AhzKf7dNgk) 进行快速交流。
|
||||
如果你在贡献过程中遇到困难或者有任何问题,可以通过相关的 GitHub 问题提出你的疑问,或者加入我们的 [Discord](https://discord.gg/8Tpq4AcN9c) 进行快速交流。
|
||||
|
||||
43
Makefile
Normal file
43
Makefile
Normal file
@ -0,0 +1,43 @@
|
||||
# Variables
|
||||
DOCKER_REGISTRY=langgenius
|
||||
WEB_IMAGE=$(DOCKER_REGISTRY)/dify-web
|
||||
API_IMAGE=$(DOCKER_REGISTRY)/dify-api
|
||||
VERSION=latest
|
||||
|
||||
# Build Docker images
|
||||
build-web:
|
||||
@echo "Building web Docker image: $(WEB_IMAGE):$(VERSION)..."
|
||||
docker build -t $(WEB_IMAGE):$(VERSION) ./web
|
||||
@echo "Web Docker image built successfully: $(WEB_IMAGE):$(VERSION)"
|
||||
|
||||
build-api:
|
||||
@echo "Building API Docker image: $(API_IMAGE):$(VERSION)..."
|
||||
docker build -t $(API_IMAGE):$(VERSION) ./api
|
||||
@echo "API Docker image built successfully: $(API_IMAGE):$(VERSION)"
|
||||
|
||||
# Push Docker images
|
||||
push-web:
|
||||
@echo "Pushing web Docker image: $(WEB_IMAGE):$(VERSION)..."
|
||||
docker push $(WEB_IMAGE):$(VERSION)
|
||||
@echo "Web Docker image pushed successfully: $(WEB_IMAGE):$(VERSION)"
|
||||
|
||||
push-api:
|
||||
@echo "Pushing API Docker image: $(API_IMAGE):$(VERSION)..."
|
||||
docker push $(API_IMAGE):$(VERSION)
|
||||
@echo "API Docker image pushed successfully: $(API_IMAGE):$(VERSION)"
|
||||
|
||||
# Build all images
|
||||
build-all: build-web build-api
|
||||
|
||||
# Push all images
|
||||
push-all: push-web push-api
|
||||
|
||||
build-push-api: build-api push-api
|
||||
build-push-web: build-web push-web
|
||||
|
||||
# Build and push all images
|
||||
build-push-all: build-all push-all
|
||||
@echo "All Docker images have been built and pushed."
|
||||
|
||||
# Phony targets
|
||||
.PHONY: build-web build-api push-web push-api build-all push-all build-push-all
|
||||
18
README.md
18
README.md
@ -22,8 +22,8 @@
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://dify.ai/blog/dify-ai-unveils-ai-agent-creating-gpts-and-assistants-with-various-llms" target="_blank">
|
||||
Dify.AI Unveils AI Agent: Creating GPTs and Assistants with Various LLMs
|
||||
<a href="https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6" target="_blank">
|
||||
📌 Check out Dify Premium on AWS and deploy it to your own AWS VPC with one-click.
|
||||
</a>
|
||||
</p>
|
||||
|
||||
@ -37,6 +37,9 @@
|
||||
|
||||
You can try out [Dify.AI Cloud](https://dify.ai) now. It provides all the capabilities of the self-deployed version, and includes 200 free requests to OpenAI GPT-3.5.
|
||||
|
||||
### Looking to purchase via AWS?
|
||||
Check out [Dify Premium on AWS](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) and deploy it to your own AWS VPC with one-click.
|
||||
|
||||
## Dify vs. LangChain vs. Assistants API
|
||||
|
||||
| Feature | Dify.AI | Assistants API | LangChain |
|
||||
@ -97,10 +100,12 @@ docker compose up -d
|
||||
|
||||
After running, you can access the Dify dashboard in your browser at [http://localhost/install](http://localhost/install) and start the initialization installation process.
|
||||
|
||||
### Helm Chart
|
||||
#### Deploy with Helm Chart
|
||||
|
||||
Big thanks to @BorisPolonsky for providing us with a [Helm Chart](https://helm.sh/) version, which allows Dify to be deployed on Kubernetes.
|
||||
You can go to https://github.com/BorisPolonsky/dify-helm for deployment information.
|
||||
[Helm Chart](https://helm.sh/) version, which allows Dify to be deployed on Kubernetes.
|
||||
|
||||
- [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
|
||||
- [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
|
||||
|
||||
### Configuration
|
||||
|
||||
@ -117,6 +122,7 @@ For those who'd like to contribute code, see our [Contribution Guide](https://gi
|
||||
|
||||
At the same time, please consider supporting Dify by sharing it on social media and at events and conferences.
|
||||
|
||||
|
||||
### Contributors
|
||||
|
||||
<a href="https://github.com/langgenius/dify/graphs/contributors">
|
||||
@ -125,7 +131,7 @@ At the same time, please consider supporting Dify by sharing it on social media
|
||||
|
||||
### Translations
|
||||
|
||||
We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README_EN.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/AhzKf7dNgk).
|
||||
We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).
|
||||
|
||||
## Community & Support
|
||||
|
||||
|
||||
@ -94,10 +94,12 @@ docker compose up -d
|
||||
|
||||
运行后,可以在浏览器上访问 [http://localhost/install](http://localhost/install) 进入 Dify 控制台并开始初始化安装操作。
|
||||
|
||||
### Helm Chart
|
||||
#### 使用 Helm Chart 部署
|
||||
|
||||
非常感谢 @BorisPolonsky 为我们提供了一个 [Helm Chart](https://helm.sh/) 版本,可以在 Kubernetes 上部署 Dify。
|
||||
您可以前往 https://github.com/BorisPolonsky/dify-helm 来获取部署信息。
|
||||
使用 [Helm Chart](https://helm.sh/) 版本,可以在 Kubernetes 上部署 Dify。
|
||||
|
||||
- [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
|
||||
- [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
|
||||
|
||||
### 配置
|
||||
|
||||
|
||||
@ -39,7 +39,7 @@ DB_DATABASE=dify
|
||||
|
||||
# Storage configuration
|
||||
# use for store upload files, private keys...
|
||||
# storage type: local, s3
|
||||
# storage type: local, s3, azure-blob
|
||||
STORAGE_TYPE=local
|
||||
STORAGE_LOCAL_PATH=storage
|
||||
S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com
|
||||
@ -47,6 +47,11 @@ S3_BUCKET_NAME=your-bucket-name
|
||||
S3_ACCESS_KEY=your-access-key
|
||||
S3_SECRET_KEY=your-secret-key
|
||||
S3_REGION=your-region
|
||||
# Azure Blob Storage configuration
|
||||
AZURE_BLOB_ACCOUNT_NAME=your-account-name
|
||||
AZURE_BLOB_ACCOUNT_KEY=your-account-key
|
||||
AZURE_BLOB_CONTAINER_NAME=yout-container-name
|
||||
AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net
|
||||
|
||||
# CORS configuration
|
||||
WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
||||
|
||||
@ -109,19 +109,20 @@ def reset_encrypt_key_pair():
|
||||
click.echo(click.style('Sorry, only support SELF_HOSTED mode.', fg='red'))
|
||||
return
|
||||
|
||||
tenant = db.session.query(Tenant).first()
|
||||
if not tenant:
|
||||
click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red'))
|
||||
return
|
||||
tenants = db.session.query(Tenant).all()
|
||||
for tenant in tenants:
|
||||
if not tenant:
|
||||
click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red'))
|
||||
return
|
||||
|
||||
tenant.encrypt_public_key = generate_key_pair(tenant.id)
|
||||
tenant.encrypt_public_key = generate_key_pair(tenant.id)
|
||||
|
||||
db.session.query(Provider).filter(Provider.provider_type == 'custom').delete()
|
||||
db.session.query(ProviderModel).delete()
|
||||
db.session.commit()
|
||||
db.session.query(Provider).filter(Provider.provider_type == 'custom', Provider.tenant_id == tenant.id).delete()
|
||||
db.session.query(ProviderModel).filter(ProviderModel.tenant_id == tenant.id).delete()
|
||||
db.session.commit()
|
||||
|
||||
click.echo(click.style('Congratulations! '
|
||||
'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green'))
|
||||
click.echo(click.style('Congratulations! '
|
||||
'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green'))
|
||||
|
||||
|
||||
@click.command('vdb-migrate', help='migrate vector db.')
|
||||
@ -254,7 +255,7 @@ def migrate_knowledge_vector_database():
|
||||
for dataset in datasets:
|
||||
total_count = total_count + 1
|
||||
click.echo(f'Processing the {total_count} dataset {dataset.id}. '
|
||||
+ f'{create_count} created, ${skipped_count} skipped.')
|
||||
+ f'{create_count} created, {skipped_count} skipped.')
|
||||
try:
|
||||
click.echo('Create dataset vdb index: {}'.format(dataset.id))
|
||||
if dataset.index_struct_dict:
|
||||
|
||||
@ -22,6 +22,7 @@ DEFAULTS = {
|
||||
'SERVICE_API_URL': 'https://api.dify.ai',
|
||||
'APP_WEB_URL': 'https://udify.app',
|
||||
'FILES_URL': '',
|
||||
'S3_ADDRESS_STYLE': 'auto',
|
||||
'STORAGE_TYPE': 'local',
|
||||
'STORAGE_LOCAL_PATH': 'storage',
|
||||
'CHECK_UPDATE_URL': 'https://updates.dify.ai',
|
||||
@ -59,7 +60,8 @@ DEFAULTS = {
|
||||
'CAN_REPLACE_LOGO': 'False',
|
||||
'ETL_TYPE': 'dify',
|
||||
'KEYWORD_STORE': 'jieba',
|
||||
'BATCH_UPLOAD_LIMIT': 20
|
||||
'BATCH_UPLOAD_LIMIT': 20,
|
||||
'TOOL_ICON_CACHE_MAX_AGE': 3600,
|
||||
}
|
||||
|
||||
|
||||
@ -90,7 +92,7 @@ class Config:
|
||||
# ------------------------
|
||||
# General Configurations.
|
||||
# ------------------------
|
||||
self.CURRENT_VERSION = "0.5.9"
|
||||
self.CURRENT_VERSION = "0.5.11"
|
||||
self.COMMIT_SHA = get_env('COMMIT_SHA')
|
||||
self.EDITION = "SELF_HOSTED"
|
||||
self.DEPLOY_ENV = get_env('DEPLOY_ENV')
|
||||
@ -180,6 +182,11 @@ class Config:
|
||||
self.S3_ACCESS_KEY = get_env('S3_ACCESS_KEY')
|
||||
self.S3_SECRET_KEY = get_env('S3_SECRET_KEY')
|
||||
self.S3_REGION = get_env('S3_REGION')
|
||||
self.S3_ADDRESS_STYLE = get_env('S3_ADDRESS_STYLE')
|
||||
self.AZURE_BLOB_ACCOUNT_NAME = get_env('AZURE_BLOB_ACCOUNT_NAME')
|
||||
self.AZURE_BLOB_ACCOUNT_KEY = get_env('AZURE_BLOB_ACCOUNT_KEY')
|
||||
self.AZURE_BLOB_CONTAINER_NAME = get_env('AZURE_BLOB_CONTAINER_NAME')
|
||||
self.AZURE_BLOB_ACCOUNT_URL = get_env('AZURE_BLOB_ACCOUNT_URL')
|
||||
|
||||
# ------------------------
|
||||
# Vector Store Configurations.
|
||||
@ -294,6 +301,7 @@ class Config:
|
||||
self.BATCH_UPLOAD_LIMIT = get_env('BATCH_UPLOAD_LIMIT')
|
||||
|
||||
self.API_COMPRESSION_ENABLED = get_bool_env('API_COMPRESSION_ENABLED')
|
||||
self.TOOL_ICON_CACHE_MAX_AGE = get_env('TOOL_ICON_CACHE_MAX_AGE')
|
||||
|
||||
|
||||
class CloudEditionConfig(Config):
|
||||
|
||||
@ -2,7 +2,7 @@ import json
|
||||
|
||||
from models.model import AppModelConfig
|
||||
|
||||
languages = ['en-US', 'zh-Hans', 'pt-BR', 'es-ES', 'fr-FR', 'de-DE', 'ja-JP', 'ko-KR', 'ru-RU', 'it-IT', 'uk-UA']
|
||||
languages = ['en-US', 'zh-Hans', 'pt-BR', 'es-ES', 'fr-FR', 'de-DE', 'ja-JP', 'ko-KR', 'ru-RU', 'it-IT', 'uk-UA', 'vi-VN']
|
||||
|
||||
language_timezone_mapping = {
|
||||
'en-US': 'America/New_York',
|
||||
@ -16,6 +16,7 @@ language_timezone_mapping = {
|
||||
'ru-RU': 'Europe/Moscow',
|
||||
'it-IT': 'Europe/Rome',
|
||||
'uk-UA': 'Europe/Kyiv',
|
||||
'vi-VN': 'Asia/Ho_Chi_Minh',
|
||||
}
|
||||
|
||||
|
||||
@ -79,6 +80,16 @@ user_input_form_template = {
|
||||
}
|
||||
}
|
||||
],
|
||||
"vi-VN": [
|
||||
{
|
||||
"paragraph": {
|
||||
"label": "Nội dung truy vấn",
|
||||
"variable": "default_input",
|
||||
"required": False,
|
||||
"default": ""
|
||||
}
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
demo_model_templates = {
|
||||
@ -208,7 +219,6 @@ demo_model_templates = {
|
||||
)
|
||||
}
|
||||
],
|
||||
|
||||
'zh-Hans': [
|
||||
{
|
||||
'name': '翻译助手',
|
||||
@ -335,91 +345,92 @@ demo_model_templates = {
|
||||
)
|
||||
}
|
||||
],
|
||||
'uk-UA': [{
|
||||
"name": "Помічник перекладу",
|
||||
"icon": "",
|
||||
"icon_background": "",
|
||||
"description": "Багатомовний перекладач, який надає можливості перекладу різними мовами, перекладаючи введені користувачем дані на потрібну мову.",
|
||||
"mode": "completion",
|
||||
"model_config": AppModelConfig(
|
||||
provider="openai",
|
||||
model_id="gpt-3.5-turbo-instruct",
|
||||
configs={
|
||||
"prompt_template": "Будь ласка, перекладіть наступний текст на {{target_language}}:\n",
|
||||
"prompt_variables": [
|
||||
{
|
||||
"key": "target_language",
|
||||
"name": "Цільова мова",
|
||||
"description": "Мова, на яку ви хочете перекласти.",
|
||||
"type": "select",
|
||||
"default": "Ukrainian",
|
||||
"options": [
|
||||
"Chinese",
|
||||
"English",
|
||||
"Japanese",
|
||||
"French",
|
||||
"Russian",
|
||||
"German",
|
||||
"Spanish",
|
||||
"Korean",
|
||||
"Italian",
|
||||
],
|
||||
'uk-UA': [
|
||||
{
|
||||
"name": "Помічник перекладу",
|
||||
"icon": "",
|
||||
"icon_background": "",
|
||||
"description": "Багатомовний перекладач, який надає можливості перекладу різними мовами, перекладаючи введені користувачем дані на потрібну мову.",
|
||||
"mode": "completion",
|
||||
"model_config": AppModelConfig(
|
||||
provider="openai",
|
||||
model_id="gpt-3.5-turbo-instruct",
|
||||
configs={
|
||||
"prompt_template": "Будь ласка, перекладіть наступний текст на {{target_language}}:\n",
|
||||
"prompt_variables": [
|
||||
{
|
||||
"key": "target_language",
|
||||
"name": "Цільова мова",
|
||||
"description": "Мова, на яку ви хочете перекласти.",
|
||||
"type": "select",
|
||||
"default": "Ukrainian",
|
||||
"options": [
|
||||
"Chinese",
|
||||
"English",
|
||||
"Japanese",
|
||||
"French",
|
||||
"Russian",
|
||||
"German",
|
||||
"Spanish",
|
||||
"Korean",
|
||||
"Italian",
|
||||
],
|
||||
},
|
||||
],
|
||||
"completion_params": {
|
||||
"max_token": 1000,
|
||||
"temperature": 0,
|
||||
"top_p": 0,
|
||||
"presence_penalty": 0.1,
|
||||
"frequency_penalty": 0.1,
|
||||
},
|
||||
],
|
||||
"completion_params": {
|
||||
"max_token": 1000,
|
||||
"temperature": 0,
|
||||
"top_p": 0,
|
||||
"presence_penalty": 0.1,
|
||||
"frequency_penalty": 0.1,
|
||||
},
|
||||
},
|
||||
opening_statement="",
|
||||
suggested_questions=None,
|
||||
pre_prompt="Будь ласка, перекладіть наступний текст на {{target_language}}:\n{{query}}\ntranslate:",
|
||||
model=json.dumps({
|
||||
"provider": "openai",
|
||||
"name": "gpt-3.5-turbo-instruct",
|
||||
"mode": "completion",
|
||||
"completion_params": {
|
||||
"max_tokens": 1000,
|
||||
"temperature": 0,
|
||||
"top_p": 0,
|
||||
"presence_penalty": 0.1,
|
||||
"frequency_penalty": 0.1,
|
||||
},
|
||||
}),
|
||||
user_input_form=json.dumps([
|
||||
{
|
||||
"select": {
|
||||
"label": "Цільова мова",
|
||||
"variable": "target_language",
|
||||
"description": "Мова, на яку ви хочете перекласти.",
|
||||
"default": "Chinese",
|
||||
"required": True,
|
||||
'options': [
|
||||
'Chinese',
|
||||
'English',
|
||||
'Japanese',
|
||||
'French',
|
||||
'Russian',
|
||||
'German',
|
||||
'Spanish',
|
||||
'Korean',
|
||||
'Italian',
|
||||
]
|
||||
opening_statement="",
|
||||
suggested_questions=None,
|
||||
pre_prompt="Будь ласка, перекладіть наступний текст на {{target_language}}:\n{{query}}\ntranslate:",
|
||||
model=json.dumps({
|
||||
"provider": "openai",
|
||||
"name": "gpt-3.5-turbo-instruct",
|
||||
"mode": "completion",
|
||||
"completion_params": {
|
||||
"max_tokens": 1000,
|
||||
"temperature": 0,
|
||||
"top_p": 0,
|
||||
"presence_penalty": 0.1,
|
||||
"frequency_penalty": 0.1,
|
||||
},
|
||||
}),
|
||||
user_input_form=json.dumps([
|
||||
{
|
||||
"select": {
|
||||
"label": "Цільова мова",
|
||||
"variable": "target_language",
|
||||
"description": "Мова, на яку ви хочете перекласти.",
|
||||
"default": "Chinese",
|
||||
"required": True,
|
||||
'options': [
|
||||
'Chinese',
|
||||
'English',
|
||||
'Japanese',
|
||||
'French',
|
||||
'Russian',
|
||||
'German',
|
||||
'Spanish',
|
||||
'Korean',
|
||||
'Italian',
|
||||
]
|
||||
}
|
||||
}, {
|
||||
"paragraph": {
|
||||
"label": "Запит",
|
||||
"variable": "query",
|
||||
"required": True,
|
||||
"default": ""
|
||||
}
|
||||
}
|
||||
}, {
|
||||
"paragraph": {
|
||||
"label": "Запит",
|
||||
"variable": "query",
|
||||
"required": True,
|
||||
"default": ""
|
||||
}
|
||||
}
|
||||
])
|
||||
)
|
||||
},
|
||||
])
|
||||
)
|
||||
},
|
||||
{
|
||||
"name": "AI інтерв’юер фронтенду",
|
||||
"icon": "",
|
||||
@ -460,5 +471,132 @@ demo_model_templates = {
|
||||
),
|
||||
}
|
||||
],
|
||||
|
||||
'vi-VN': [
|
||||
{
|
||||
'name': 'Trợ lý dịch thuật',
|
||||
'icon': '',
|
||||
'icon_background': '',
|
||||
'description': 'Trình dịch đa ngôn ngữ cung cấp khả năng dịch bằng nhiều ngôn ngữ, dịch thông tin đầu vào của người dùng sang ngôn ngữ họ cần.',
|
||||
'mode': 'completion',
|
||||
'model_config': AppModelConfig(
|
||||
provider='openai',
|
||||
model_id='gpt-3.5-turbo-instruct',
|
||||
configs={
|
||||
'prompt_template': "Hãy dịch đoạn văn bản sau sang ngôn ngữ {{target_language}}:\n",
|
||||
'prompt_variables': [
|
||||
{
|
||||
"key": "target_language",
|
||||
"name": "Ngôn ngữ đích",
|
||||
"description": "Ngôn ngữ bạn muốn dịch sang.",
|
||||
"type": "select",
|
||||
"default": "Vietnamese",
|
||||
'options': [
|
||||
'Chinese',
|
||||
'English',
|
||||
'Japanese',
|
||||
'French',
|
||||
'Russian',
|
||||
'German',
|
||||
'Spanish',
|
||||
'Korean',
|
||||
'Italian',
|
||||
'Vietnamese',
|
||||
]
|
||||
}
|
||||
],
|
||||
'completion_params': {
|
||||
'max_token': 1000,
|
||||
'temperature': 0,
|
||||
'top_p': 0,
|
||||
'presence_penalty': 0.1,
|
||||
'frequency_penalty': 0.1,
|
||||
}
|
||||
},
|
||||
opening_statement='',
|
||||
suggested_questions=None,
|
||||
pre_prompt="Hãy dịch đoạn văn bản sau sang {{target_language}}:\n{{query}}\ndịch:",
|
||||
model=json.dumps({
|
||||
"provider": "openai",
|
||||
"name": "gpt-3.5-turbo-instruct",
|
||||
"mode": "completion",
|
||||
"completion_params": {
|
||||
"max_tokens": 1000,
|
||||
"temperature": 0,
|
||||
"top_p": 0,
|
||||
"presence_penalty": 0.1,
|
||||
"frequency_penalty": 0.1
|
||||
}
|
||||
}),
|
||||
user_input_form=json.dumps([
|
||||
{
|
||||
"select": {
|
||||
"label": "Ngôn ngữ đích",
|
||||
"variable": "target_language",
|
||||
"description": "Ngôn ngữ bạn muốn dịch sang.",
|
||||
"default": "Vietnamese",
|
||||
"required": True,
|
||||
'options': [
|
||||
'Chinese',
|
||||
'English',
|
||||
'Japanese',
|
||||
'French',
|
||||
'Russian',
|
||||
'German',
|
||||
'Spanish',
|
||||
'Korean',
|
||||
'Italian',
|
||||
'Vietnamese',
|
||||
]
|
||||
}
|
||||
}, {
|
||||
"paragraph": {
|
||||
"label": "Query",
|
||||
"variable": "query",
|
||||
"required": True,
|
||||
"default": ""
|
||||
}
|
||||
}
|
||||
])
|
||||
)
|
||||
},
|
||||
{
|
||||
'name': 'Phỏng vấn front-end AI',
|
||||
'icon': '',
|
||||
'icon_background': '',
|
||||
'description': 'Một người phỏng vấn front-end mô phỏng để kiểm tra mức độ kỹ năng phát triển front-end thông qua việc đặt câu hỏi.',
|
||||
'mode': 'chat',
|
||||
'model_config': AppModelConfig(
|
||||
provider='openai',
|
||||
model_id='gpt-3.5-turbo',
|
||||
configs={
|
||||
'introduction': 'Xin chào, chào mừng đến với cuộc phỏng vấn của chúng tôi. Tôi là người phỏng vấn cho công ty công nghệ này và tôi sẽ kiểm tra kỹ năng phát triển web front-end của bạn. Tiếp theo, tôi sẽ hỏi bạn một số câu hỏi kỹ thuật. Hãy trả lời chúng càng kỹ lưỡng càng tốt. ',
|
||||
'prompt_template': "Bạn sẽ đóng vai người phỏng vấn cho một công ty công nghệ, kiểm tra kỹ năng phát triển web front-end của người dùng và đặt ra 5-10 câu hỏi kỹ thuật sắc bén.\n\nXin lưu ý:\n- Mỗi lần chỉ hỏi một câu hỏi.\n - Sau khi người dùng trả lời một câu hỏi, hãy hỏi trực tiếp câu hỏi tiếp theo mà không cố gắng sửa bất kỳ lỗi nào mà thí sinh mắc phải.\n- Nếu bạn cho rằng người dùng đã không trả lời đúng cho một số câu hỏi liên tiếp, hãy hỏi ít câu hỏi hơn.\n- Sau đặt câu hỏi cuối cùng, bạn có thể hỏi câu hỏi này: Tại sao bạn lại rời bỏ công việc cuối cùng của mình? Sau khi người dùng trả lời câu hỏi này, vui lòng bày tỏ sự hiểu biết và ủng hộ của bạn.\n",
|
||||
'prompt_variables': [],
|
||||
'completion_params': {
|
||||
'max_token': 300,
|
||||
'temperature': 0.8,
|
||||
'top_p': 0.9,
|
||||
'presence_penalty': 0.1,
|
||||
'frequency_penalty': 0.1,
|
||||
}
|
||||
},
|
||||
opening_statement='Xin chào, chào mừng đến với cuộc phỏng vấn của chúng tôi. Tôi là người phỏng vấn cho công ty công nghệ này và tôi sẽ kiểm tra kỹ năng phát triển web front-end của bạn. Tiếp theo, tôi sẽ hỏi bạn một số câu hỏi kỹ thuật. Hãy trả lời chúng càng kỹ lưỡng càng tốt. ',
|
||||
suggested_questions=None,
|
||||
pre_prompt="Bạn sẽ đóng vai người phỏng vấn cho một công ty công nghệ, kiểm tra kỹ năng phát triển web front-end của người dùng và đặt ra 5-10 câu hỏi kỹ thuật sắc bén.\n\nXin lưu ý:\n- Mỗi lần chỉ hỏi một câu hỏi.\n - Sau khi người dùng trả lời một câu hỏi, hãy hỏi trực tiếp câu hỏi tiếp theo mà không cố gắng sửa bất kỳ lỗi nào mà thí sinh mắc phải.\n- Nếu bạn cho rằng người dùng đã không trả lời đúng cho một số câu hỏi liên tiếp, hãy hỏi ít câu hỏi hơn.\n- Sau đặt câu hỏi cuối cùng, bạn có thể hỏi câu hỏi này: Tại sao bạn lại rời bỏ công việc cuối cùng của mình? Sau khi người dùng trả lời câu hỏi này, vui lòng bày tỏ sự hiểu biết và ủng hộ của bạn.\n",
|
||||
model=json.dumps({
|
||||
"provider": "openai",
|
||||
"name": "gpt-3.5-turbo",
|
||||
"mode": "chat",
|
||||
"completion_params": {
|
||||
"max_tokens": 300,
|
||||
"temperature": 0.8,
|
||||
"top_p": 0.9,
|
||||
"presence_penalty": 0.1,
|
||||
"frequency_penalty": 0.1
|
||||
}
|
||||
}),
|
||||
user_input_form=None
|
||||
)
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
@ -245,6 +245,8 @@ class AppApi(Resource):
|
||||
agent_mode = model_config.agent_mode_dict
|
||||
# decrypt agent tool parameters if it's secret-input
|
||||
for tool in agent_mode.get('tools') or []:
|
||||
if not isinstance(tool, dict) or len(tool.keys()) <= 3:
|
||||
continue
|
||||
agent_tool_entity = AgentToolEntity(**tool)
|
||||
# get tool
|
||||
try:
|
||||
|
||||
@ -52,6 +52,9 @@ class ModelConfigResource(Resource):
|
||||
masked_parameter_map = {}
|
||||
tool_map = {}
|
||||
for tool in agent_mode.get('tools') or []:
|
||||
if not isinstance(tool, dict) or len(tool.keys()) <= 3:
|
||||
continue
|
||||
|
||||
agent_tool_entity = AgentToolEntity(**tool)
|
||||
# get tool
|
||||
try:
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import io
|
||||
|
||||
from flask import send_file
|
||||
from flask import current_app, send_file
|
||||
from flask_login import current_user
|
||||
from flask_restful import Resource, reqparse
|
||||
from werkzeug.exceptions import Forbidden
|
||||
@ -80,7 +80,8 @@ class ToolBuiltinProviderIconApi(Resource):
|
||||
@setup_required
|
||||
def get(self, provider):
|
||||
icon_bytes, minetype = ToolManageService.get_builtin_tool_provider_icon(provider)
|
||||
return send_file(io.BytesIO(icon_bytes), mimetype=minetype)
|
||||
icon_cache_max_age = int(current_app.config.get('TOOL_ICON_CACHE_MAX_AGE'))
|
||||
return send_file(io.BytesIO(icon_bytes), mimetype=minetype, max_age=icon_cache_max_age)
|
||||
|
||||
class ToolModelProviderIconApi(Resource):
|
||||
@setup_required
|
||||
|
||||
@ -44,7 +44,7 @@ class AudioApi(Resource):
|
||||
response = AudioService.transcript_asr(
|
||||
tenant_id=app_model.tenant_id,
|
||||
file=file,
|
||||
end_user=end_user
|
||||
end_user=end_user.get_id()
|
||||
)
|
||||
|
||||
return response
|
||||
@ -75,7 +75,7 @@ class AudioApi(Resource):
|
||||
|
||||
|
||||
class TextApi(Resource):
|
||||
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True))
|
||||
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON))
|
||||
def post(self, app_model: App, end_user: EndUser):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('text', type=str, required=True, nullable=False, location='json')
|
||||
@ -86,8 +86,8 @@ class TextApi(Resource):
|
||||
response = AudioService.transcript_tts(
|
||||
tenant_id=app_model.tenant_id,
|
||||
text=args['text'],
|
||||
end_user=end_user,
|
||||
voice=args['voice'] if args['voice'] else app_model.app_model_config.text_to_speech_dict.get('voice'),
|
||||
end_user=end_user.get_id(),
|
||||
voice=app_model.app_model_config.text_to_speech_dict.get('voice'),
|
||||
streaming=args['streaming']
|
||||
)
|
||||
|
||||
|
||||
@ -197,11 +197,11 @@ class DatasetSegmentApi(DatasetApiResource):
|
||||
|
||||
# validate args
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('segments', type=dict, required=False, nullable=True, location='json')
|
||||
parser.add_argument('segment', type=dict, required=False, nullable=True, location='json')
|
||||
args = parser.parse_args()
|
||||
|
||||
SegmentService.segment_create_args_validate(args, document)
|
||||
segment = SegmentService.update_segment(args, segment, document, dataset)
|
||||
SegmentService.segment_create_args_validate(args['segment'], document)
|
||||
segment = SegmentService.update_segment(args['segment'], segment, document, dataset)
|
||||
return {
|
||||
'data': marshal(segment, segment_fields),
|
||||
'doc_form': document.doc_form
|
||||
|
||||
@ -35,7 +35,7 @@ from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotIni
|
||||
from core.file.file_obj import FileObj
|
||||
from core.model_runtime.entities.message_entities import PromptMessageRole
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
|
||||
from core.model_runtime.errors.invoke import InvokeAuthorizationError
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
from core.prompt.prompt_template import PromptTemplateParser
|
||||
from core.provider_manager import ProviderManager
|
||||
@ -195,8 +195,6 @@ class ApplicationManager:
|
||||
except ValidationError as e:
|
||||
logger.exception("Validation Error when generating")
|
||||
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
|
||||
except (ValueError, InvokeError) as e:
|
||||
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
|
||||
except Exception as e:
|
||||
logger.exception("Unknown Error when generating")
|
||||
queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
|
||||
|
||||
@ -1,13 +1,14 @@
|
||||
import enum
|
||||
import importlib.util
|
||||
import importlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from typing import Any, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.utils.position_helper import sort_to_dict_by_position_map
|
||||
|
||||
|
||||
class ExtensionModule(enum.Enum):
|
||||
MODERATION = 'moderation'
|
||||
@ -36,7 +37,8 @@ class Extensible:
|
||||
|
||||
@classmethod
|
||||
def scan_extensions(cls):
|
||||
extensions = {}
|
||||
extensions: list[ModuleExtension] = []
|
||||
position_map = {}
|
||||
|
||||
# get the path of the current class
|
||||
current_path = os.path.abspath(cls.__module__.replace(".", os.path.sep) + '.py')
|
||||
@ -63,6 +65,7 @@ class Extensible:
|
||||
if os.path.exists(builtin_file_path):
|
||||
with open(builtin_file_path, encoding='utf-8') as f:
|
||||
position = int(f.read().strip())
|
||||
position_map[extension_name] = position
|
||||
|
||||
if (extension_name + '.py') not in file_names:
|
||||
logging.warning(f"Missing {extension_name}.py file in {subdir_path}, Skip.")
|
||||
@ -96,16 +99,15 @@ class Extensible:
|
||||
with open(json_path, encoding='utf-8') as f:
|
||||
json_data = json.load(f)
|
||||
|
||||
extensions[extension_name] = ModuleExtension(
|
||||
extensions.append(ModuleExtension(
|
||||
extension_class=extension_class,
|
||||
name=extension_name,
|
||||
label=json_data.get('label'),
|
||||
form_schema=json_data.get('form_schema'),
|
||||
builtin=builtin,
|
||||
position=position
|
||||
)
|
||||
))
|
||||
|
||||
sorted_items = sorted(extensions.items(), key=lambda x: (x[1].position is None, x[1].position))
|
||||
sorted_extensions = OrderedDict(sorted_items)
|
||||
sorted_extensions = sort_to_dict_by_position_map(position_map, extensions, lambda x: x.name)
|
||||
|
||||
return sorted_extensions
|
||||
|
||||
@ -145,7 +145,7 @@ class BaseAssistantApplicationRunner(AppRunner):
|
||||
result += f"result link: {response.message}. please tell user to check it."
|
||||
elif response.type == ToolInvokeMessage.MessageType.IMAGE_LINK or \
|
||||
response.type == ToolInvokeMessage.MessageType.IMAGE:
|
||||
result += "image has been created and sent to user already, you should tell user to check it now."
|
||||
result += "image has been created and sent to user already, you do not need to create it, just tell the user to check it now."
|
||||
else:
|
||||
result += f"tool response: {response.message}."
|
||||
|
||||
@ -566,7 +566,11 @@ class BaseAssistantApplicationRunner(AppRunner):
|
||||
tools = tools.split(';')
|
||||
tool_calls: list[AssistantPromptMessage.ToolCall] = []
|
||||
tool_call_response: list[ToolPromptMessage] = []
|
||||
tool_inputs = json.loads(agent_thought.tool_input)
|
||||
try:
|
||||
tool_inputs = json.loads(agent_thought.tool_input)
|
||||
except Exception as e:
|
||||
logging.warning("tool execution error: {}, tool_input: {}.".format(str(e), agent_thought.tool_input))
|
||||
tool_inputs = { agent_thought.tool: agent_thought.tool_input }
|
||||
for tool in tools:
|
||||
# generate a uuid for tool call
|
||||
tool_call_id = str(uuid.uuid4())
|
||||
@ -599,4 +603,4 @@ class BaseAssistantApplicationRunner(AppRunner):
|
||||
|
||||
db.session.close()
|
||||
|
||||
return result
|
||||
return result
|
||||
|
||||
@ -182,7 +182,7 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
|
||||
delta=LLMResultChunkDelta(
|
||||
index=0,
|
||||
message=AssistantPromptMessage(
|
||||
content=json.dumps(chunk)
|
||||
content=json.dumps(chunk, ensure_ascii=False) # if ensure_ascii=True, the text in webui maybe garbled text
|
||||
),
|
||||
usage=None
|
||||
)
|
||||
@ -667,4 +667,4 @@ class AssistantCotApplicationRunner(BaseAssistantApplicationRunner):
|
||||
try:
|
||||
return json.dumps(tools, ensure_ascii=False)
|
||||
except json.JSONDecodeError:
|
||||
return json.dumps(tools)
|
||||
return json.dumps(tools)
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import concurrent.futures
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
@ -650,17 +651,44 @@ class IndexingRunner:
|
||||
# chunk nodes by chunk size
|
||||
indexing_start_at = time.perf_counter()
|
||||
tokens = 0
|
||||
chunk_size = 100
|
||||
chunk_size = 10
|
||||
|
||||
embedding_model_type_instance = None
|
||||
if embedding_model_instance:
|
||||
embedding_model_type_instance = embedding_model_instance.model_type_instance
|
||||
embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_type_instance)
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
||||
futures = []
|
||||
for i in range(0, len(documents), chunk_size):
|
||||
chunk_documents = documents[i:i + chunk_size]
|
||||
futures.append(executor.submit(self._process_chunk, current_app._get_current_object(), index_processor,
|
||||
chunk_documents, dataset,
|
||||
dataset_document, embedding_model_instance,
|
||||
embedding_model_type_instance))
|
||||
|
||||
for i in range(0, len(documents), chunk_size):
|
||||
for future in futures:
|
||||
tokens += future.result()
|
||||
|
||||
indexing_end_at = time.perf_counter()
|
||||
|
||||
# update document status to completed
|
||||
self._update_document_index_status(
|
||||
document_id=dataset_document.id,
|
||||
after_indexing_status="completed",
|
||||
extra_update_params={
|
||||
DatasetDocument.tokens: tokens,
|
||||
DatasetDocument.completed_at: datetime.datetime.utcnow(),
|
||||
DatasetDocument.indexing_latency: indexing_end_at - indexing_start_at,
|
||||
}
|
||||
)
|
||||
|
||||
def _process_chunk(self, flask_app, index_processor, chunk_documents, dataset, dataset_document,
|
||||
embedding_model_instance, embedding_model_type_instance):
|
||||
with flask_app.app_context():
|
||||
# check document is paused
|
||||
self._check_document_paused_status(dataset_document.id)
|
||||
chunk_documents = documents[i:i + chunk_size]
|
||||
|
||||
tokens = 0
|
||||
if dataset.indexing_technique == 'high_quality' or embedding_model_type_instance:
|
||||
tokens += sum(
|
||||
embedding_model_type_instance.get_num_tokens(
|
||||
@ -670,9 +698,9 @@ class IndexingRunner:
|
||||
)
|
||||
for document in chunk_documents
|
||||
)
|
||||
|
||||
# load index
|
||||
index_processor.load(dataset, chunk_documents)
|
||||
db.session.add(dataset)
|
||||
|
||||
document_ids = [document.metadata['doc_id'] for document in chunk_documents]
|
||||
db.session.query(DocumentSegment).filter(
|
||||
@ -687,18 +715,7 @@ class IndexingRunner:
|
||||
|
||||
db.session.commit()
|
||||
|
||||
indexing_end_at = time.perf_counter()
|
||||
|
||||
# update document status to completed
|
||||
self._update_document_index_status(
|
||||
document_id=dataset_document.id,
|
||||
after_indexing_status="completed",
|
||||
extra_update_params={
|
||||
DatasetDocument.tokens: tokens,
|
||||
DatasetDocument.completed_at: datetime.datetime.utcnow(),
|
||||
DatasetDocument.indexing_latency: indexing_end_at - indexing_start_at,
|
||||
}
|
||||
)
|
||||
return tokens
|
||||
|
||||
def _check_document_paused_status(self, document_id: str):
|
||||
indexing_cache_key = 'document_{}_is_paused'.format(document_id)
|
||||
|
||||
@ -147,7 +147,7 @@
|
||||
|
||||
- `input` (float) Input price, i.e., Prompt price
|
||||
- `output` (float) Output price, i.e., returned content price
|
||||
- `unit` (float) Pricing unit, e.g., per 100K price is `0.000001`
|
||||
- `unit` (float) Pricing unit, e.g., if the price is meausred in 1M tokens, the corresponding token amount for the unit price is `0.000001`.
|
||||
- `currency` (string) Currency unit
|
||||
|
||||
### ProviderCredentialSchema
|
||||
|
||||
@ -149,7 +149,7 @@
|
||||
|
||||
- `input` (float) 输入单价,即 Prompt 单价
|
||||
- `output` (float) 输出单价,即返回内容单价
|
||||
- `unit` (float) 价格单位,如:每 100K 的单价为 `0.000001`
|
||||
- `unit` (float) 价格单位,如以 1M tokens 计价,则单价对应的单位 token 数为 `0.000001`
|
||||
- `currency` (string) 货币单位
|
||||
|
||||
### ProviderCredentialSchema
|
||||
|
||||
@ -73,8 +73,8 @@ PARAMETER_RULE_TEMPLATE: dict[DefaultParameterName, dict] = {
|
||||
},
|
||||
'type': 'int',
|
||||
'help': {
|
||||
'en_US': 'The maximum number of tokens to generate. Requests can use up to 2048 tokens shared between prompt and completion.',
|
||||
'zh_Hans': '要生成的标记的最大数量。请求可以使用最多2048个标记,这些标记在提示和完成之间共享。',
|
||||
'en_US': 'Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.',
|
||||
'zh_Hans': '指定生成结果长度的上限。如果生成结果截断,可以调大该参数。',
|
||||
},
|
||||
'required': False,
|
||||
'default': 64,
|
||||
|
||||
@ -133,7 +133,7 @@ class ModelPropertyKey(Enum):
|
||||
DEFAULT_VOICE = "default_voice"
|
||||
VOICES = "voices"
|
||||
WORD_LIMIT = "word_limit"
|
||||
AUDOI_TYPE = "audio_type"
|
||||
AUDIO_TYPE = "audio_type"
|
||||
MAX_WORKERS = "max_workers"
|
||||
|
||||
|
||||
|
||||
@ -18,6 +18,7 @@ from core.model_runtime.entities.model_entities import (
|
||||
)
|
||||
from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
|
||||
from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
|
||||
from core.utils.position_helper import get_position_map, sort_by_position_map
|
||||
|
||||
|
||||
class AIModel(ABC):
|
||||
@ -148,15 +149,7 @@ class AIModel(ABC):
|
||||
]
|
||||
|
||||
# get _position.yaml file path
|
||||
position_file_path = os.path.join(provider_model_type_path, '_position.yaml')
|
||||
|
||||
# read _position.yaml file
|
||||
position_map = {}
|
||||
if os.path.exists(position_file_path):
|
||||
with open(position_file_path, encoding='utf-8') as f:
|
||||
positions = yaml.safe_load(f)
|
||||
# convert list to dict with key as model provider name, value as index
|
||||
position_map = {position: index for index, position in enumerate(positions)}
|
||||
position_map = get_position_map(provider_model_type_path)
|
||||
|
||||
# traverse all model_schema_yaml_paths
|
||||
for model_schema_yaml_path in model_schema_yaml_paths:
|
||||
@ -206,8 +199,7 @@ class AIModel(ABC):
|
||||
model_schemas.append(model_schema)
|
||||
|
||||
# resort model schemas by position
|
||||
if position_map:
|
||||
model_schemas.sort(key=lambda x: position_map.get(x.model, 999))
|
||||
model_schemas = sort_by_position_map(position_map, model_schemas, lambda x: x.model)
|
||||
|
||||
# cache model schemas
|
||||
self.model_schemas = model_schemas
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
import importlib
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
@ -7,6 +6,7 @@ import yaml
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, ModelType
|
||||
from core.model_runtime.entities.provider_entities import ProviderEntity
|
||||
from core.model_runtime.model_providers.__base.ai_model import AIModel
|
||||
from core.utils.module_import_helper import get_subclasses_from_module, import_module_from_source
|
||||
|
||||
|
||||
class ModelProvider(ABC):
|
||||
@ -104,17 +104,10 @@ class ModelProvider(ABC):
|
||||
|
||||
# Dynamic loading {model_type_name}.py file and find the subclass of AIModel
|
||||
parent_module = '.'.join(self.__class__.__module__.split('.')[:-1])
|
||||
spec = importlib.util.spec_from_file_location(f"{parent_module}.{model_type_name}.{model_type_name}", model_type_py_path)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
|
||||
model_class = None
|
||||
for name, obj in vars(mod).items():
|
||||
if (isinstance(obj, type) and issubclass(obj, AIModel) and not obj.__abstractmethods__
|
||||
and obj != AIModel and obj.__module__ == mod.__name__):
|
||||
model_class = obj
|
||||
break
|
||||
|
||||
mod = import_module_from_source(
|
||||
f'{parent_module}.{model_type_name}.{model_type_name}', model_type_py_path)
|
||||
model_class = next(filter(lambda x: x.__module__ == mod.__name__ and not x.__abstractmethods__,
|
||||
get_subclasses_from_module(mod, AIModel)), None)
|
||||
if not model_class:
|
||||
raise Exception(f'Missing AIModel Class for model type {model_type} in {model_type_py_path}')
|
||||
|
||||
|
||||
@ -94,8 +94,8 @@ class TTSModel(AIModel):
|
||||
"""
|
||||
model_schema = self.get_model_schema(model, credentials)
|
||||
|
||||
if model_schema and ModelPropertyKey.AUDOI_TYPE in model_schema.model_properties:
|
||||
return model_schema.model_properties[ModelPropertyKey.AUDOI_TYPE]
|
||||
if model_schema and ModelPropertyKey.AUDIO_TYPE in model_schema.model_properties:
|
||||
return model_schema.model_properties[ModelPropertyKey.AUDIO_TYPE]
|
||||
|
||||
def _get_model_word_limit(self, model: str, credentials: dict) -> int:
|
||||
"""
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
- anthropic
|
||||
- azure_openai
|
||||
- google
|
||||
- nvidia
|
||||
- cohere
|
||||
- bedrock
|
||||
- togetherai
|
||||
@ -10,6 +11,8 @@
|
||||
- groq
|
||||
- replicate
|
||||
- huggingface_hub
|
||||
- xinference
|
||||
- triton_inference_server
|
||||
- zhipuai
|
||||
- baichuan
|
||||
- spark
|
||||
@ -19,7 +22,7 @@
|
||||
- moonshot
|
||||
- jina
|
||||
- chatglm
|
||||
- xinference
|
||||
- yi
|
||||
- openllm
|
||||
- localai
|
||||
- openai_api_compatible
|
||||
|
||||
@ -0,0 +1,37 @@
|
||||
model: claude-3-haiku-20240307
|
||||
label:
|
||||
en_US: claude-3-haiku-20240307
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.25'
|
||||
output: '1.25'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@ -342,12 +342,20 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
|
||||
Convert prompt messages to dict list and system
|
||||
"""
|
||||
system = ""
|
||||
prompt_message_dicts = []
|
||||
|
||||
first_loop = True
|
||||
for message in prompt_messages:
|
||||
if isinstance(message, SystemPromptMessage):
|
||||
system += message.content + ("\n" if not system else "")
|
||||
else:
|
||||
message.content=message.content.strip()
|
||||
if first_loop:
|
||||
system=message.content
|
||||
first_loop=False
|
||||
else:
|
||||
system+="\n"
|
||||
system+=message.content
|
||||
|
||||
prompt_message_dicts = []
|
||||
for message in prompt_messages:
|
||||
if not isinstance(message, SystemPromptMessage):
|
||||
prompt_message_dicts.append(self._convert_prompt_message_to_dict(message))
|
||||
|
||||
return system, prompt_message_dicts
|
||||
|
||||
@ -15,10 +15,11 @@ from core.model_runtime.model_providers.azure_openai._constant import AZURE_OPEN
|
||||
class _CommonAzureOpenAI:
|
||||
@staticmethod
|
||||
def _to_credential_kwargs(credentials: dict) -> dict:
|
||||
api_version = credentials.get('openai_api_version', AZURE_OPENAI_API_VERSION)
|
||||
credentials_kwargs = {
|
||||
"api_key": credentials['openai_api_key'],
|
||||
"azure_endpoint": credentials['openai_api_base'],
|
||||
"api_version": AZURE_OPENAI_API_VERSION,
|
||||
"api_version": api_version,
|
||||
"timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0),
|
||||
"max_retries": 1,
|
||||
}
|
||||
|
||||
@ -14,8 +14,7 @@ from core.model_runtime.entities.model_entities import (
|
||||
PriceConfig,
|
||||
)
|
||||
|
||||
AZURE_OPENAI_API_VERSION = '2023-12-01-preview'
|
||||
|
||||
AZURE_OPENAI_API_VERSION = '2024-02-15-preview'
|
||||
|
||||
def _get_max_tokens(default: int, min_val: int, max_val: int) -> ParameterRule:
|
||||
rule = ParameterRule(
|
||||
@ -124,6 +123,65 @@ LLM_BASE_MODELS = [
|
||||
)
|
||||
)
|
||||
),
|
||||
AzureBaseModel(
|
||||
base_model_name='gpt-35-turbo-0125',
|
||||
entity=AIModelEntity(
|
||||
model='fake-deployment-name',
|
||||
label=I18nObject(
|
||||
en_US='fake-deployment-name-label',
|
||||
),
|
||||
model_type=ModelType.LLM,
|
||||
features=[
|
||||
ModelFeature.AGENT_THOUGHT,
|
||||
ModelFeature.MULTI_TOOL_CALL,
|
||||
ModelFeature.STREAM_TOOL_CALL,
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 16385,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
name='temperature',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
|
||||
),
|
||||
ParameterRule(
|
||||
name='top_p',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
|
||||
),
|
||||
ParameterRule(
|
||||
name='presence_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
|
||||
),
|
||||
ParameterRule(
|
||||
name='frequency_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
|
||||
),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=4096),
|
||||
ParameterRule(
|
||||
name='response_format',
|
||||
label=I18nObject(
|
||||
zh_Hans='回复格式',
|
||||
en_US='response_format'
|
||||
),
|
||||
type='string',
|
||||
help=I18nObject(
|
||||
zh_Hans='指定模型必须输出的格式',
|
||||
en_US='specifying the format that the model must output'
|
||||
),
|
||||
required=False,
|
||||
options=['text', 'json_object']
|
||||
),
|
||||
],
|
||||
pricing=PriceConfig(
|
||||
input=0.0005,
|
||||
output=0.0015,
|
||||
unit=0.001,
|
||||
currency='USD',
|
||||
)
|
||||
)
|
||||
),
|
||||
AzureBaseModel(
|
||||
base_model_name='gpt-4',
|
||||
entity=AIModelEntity(
|
||||
@ -274,6 +332,81 @@ LLM_BASE_MODELS = [
|
||||
)
|
||||
)
|
||||
),
|
||||
AzureBaseModel(
|
||||
base_model_name='gpt-4-0125-preview',
|
||||
entity=AIModelEntity(
|
||||
model='fake-deployment-name',
|
||||
label=I18nObject(
|
||||
en_US='fake-deployment-name-label',
|
||||
),
|
||||
model_type=ModelType.LLM,
|
||||
features=[
|
||||
ModelFeature.AGENT_THOUGHT,
|
||||
ModelFeature.MULTI_TOOL_CALL,
|
||||
ModelFeature.STREAM_TOOL_CALL,
|
||||
],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
ModelPropertyKey.CONTEXT_SIZE: 128000,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
name='temperature',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
|
||||
),
|
||||
ParameterRule(
|
||||
name='top_p',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
|
||||
),
|
||||
ParameterRule(
|
||||
name='presence_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
|
||||
),
|
||||
ParameterRule(
|
||||
name='frequency_penalty',
|
||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
|
||||
),
|
||||
_get_max_tokens(default=512, min_val=1, max_val=4096),
|
||||
ParameterRule(
|
||||
name='seed',
|
||||
label=I18nObject(
|
||||
zh_Hans='种子',
|
||||
en_US='Seed'
|
||||
),
|
||||
type='int',
|
||||
help=I18nObject(
|
||||
zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。',
|
||||
en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
|
||||
),
|
||||
required=False,
|
||||
precision=2,
|
||||
min=0,
|
||||
max=1,
|
||||
),
|
||||
ParameterRule(
|
||||
name='response_format',
|
||||
label=I18nObject(
|
||||
zh_Hans='回复格式',
|
||||
en_US='response_format'
|
||||
),
|
||||
type='string',
|
||||
help=I18nObject(
|
||||
zh_Hans='指定模型必须输出的格式',
|
||||
en_US='specifying the format that the model must output'
|
||||
),
|
||||
required=False,
|
||||
options=['text', 'json_object']
|
||||
),
|
||||
],
|
||||
pricing=PriceConfig(
|
||||
input=0.01,
|
||||
output=0.03,
|
||||
unit=0.001,
|
||||
currency='USD',
|
||||
)
|
||||
)
|
||||
),
|
||||
AzureBaseModel(
|
||||
base_model_name='gpt-4-1106-preview',
|
||||
entity=AIModelEntity(
|
||||
@ -628,7 +761,7 @@ TTS_BASE_MODELS = [
|
||||
},
|
||||
],
|
||||
ModelPropertyKey.WORD_LIMIT: 120,
|
||||
ModelPropertyKey.AUDOI_TYPE: 'mp3',
|
||||
ModelPropertyKey.AUDIO_TYPE: 'mp3',
|
||||
ModelPropertyKey.MAX_WORKERS: 5
|
||||
},
|
||||
pricing=PriceConfig(
|
||||
@ -682,7 +815,7 @@ TTS_BASE_MODELS = [
|
||||
},
|
||||
],
|
||||
ModelPropertyKey.WORD_LIMIT: 120,
|
||||
ModelPropertyKey.AUDOI_TYPE: 'mp3',
|
||||
ModelPropertyKey.AUDIO_TYPE: 'mp3',
|
||||
ModelPropertyKey.MAX_WORKERS: 5
|
||||
},
|
||||
pricing=PriceConfig(
|
||||
|
||||
@ -46,6 +46,22 @@ model_credential_schema:
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API key here
|
||||
- variable: openai_api_version
|
||||
label:
|
||||
zh_Hans: API 版本
|
||||
en_US: API Version
|
||||
type: select
|
||||
required: true
|
||||
options:
|
||||
- label:
|
||||
en_US: 2024-02-15-preview
|
||||
value: 2024-02-15-preview
|
||||
- label:
|
||||
en_US: 2023-12-01-preview
|
||||
value: 2023-12-01-preview
|
||||
placeholder:
|
||||
zh_Hans: 在此选择您的 API 版本
|
||||
en_US: Select your API Version here
|
||||
- variable: base_model_name
|
||||
label:
|
||||
en_US: Base Model
|
||||
@ -59,6 +75,12 @@ model_credential_schema:
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-35-turbo-0125
|
||||
value: gpt-35-turbo-0125
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-35-turbo-16k
|
||||
value: gpt-35-turbo-16k
|
||||
@ -77,6 +99,12 @@ model_credential_schema:
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4-0125-preview
|
||||
value: gpt-4-0125-preview
|
||||
show_on:
|
||||
- variable: __model_type
|
||||
value: llm
|
||||
- label:
|
||||
en_US: gpt-4-1106-preview
|
||||
value: gpt-4-1106-preview
|
||||
|
||||
@ -124,7 +124,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
|
||||
elif err == 'insufficient_quota':
|
||||
raise InsufficientAccountBalance(msg)
|
||||
elif err == 'invalid_authentication':
|
||||
raise InvalidAuthenticationError(msg)
|
||||
raise InvalidAuthenticationError(msg)
|
||||
elif err and 'rate' in err:
|
||||
raise RateLimitReachedError(msg)
|
||||
elif err and 'internal' in err:
|
||||
|
||||
@ -18,9 +18,10 @@ class BedrockProvider(ModelProvider):
|
||||
try:
|
||||
model_instance = self.get_model_instance(ModelType.LLM)
|
||||
|
||||
# Use `gemini-pro` model for validate,
|
||||
# Use `amazon.titan-text-lite-v1` model by default for validating credentials
|
||||
model_for_validation = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1')
|
||||
model_instance.validate_credentials(
|
||||
model='amazon.titan-text-lite-v1',
|
||||
model=model_for_validation,
|
||||
credentials=credentials
|
||||
)
|
||||
except CredentialsValidateFailedError as ex:
|
||||
|
||||
@ -48,24 +48,33 @@ provider_credential_schema:
|
||||
- value: us-east-1
|
||||
label:
|
||||
en_US: US East (N. Virginia)
|
||||
zh_Hans: US East (N. Virginia)
|
||||
zh_Hans: 美国东部 (弗吉尼亚北部)
|
||||
- value: us-west-2
|
||||
label:
|
||||
en_US: US West (Oregon)
|
||||
zh_Hans: US West (Oregon)
|
||||
zh_Hans: 美国西部 (俄勒冈州)
|
||||
- value: ap-southeast-1
|
||||
label:
|
||||
en_US: Asia Pacific (Singapore)
|
||||
zh_Hans: Asia Pacific (Singapore)
|
||||
zh_Hans: 亚太地区 (新加坡)
|
||||
- value: ap-northeast-1
|
||||
label:
|
||||
en_US: Asia Pacific (Tokyo)
|
||||
zh_Hans: Asia Pacific (Tokyo)
|
||||
zh_Hans: 亚太地区 (东京)
|
||||
- value: eu-central-1
|
||||
label:
|
||||
en_US: Europe (Frankfurt)
|
||||
zh_Hans: Europe (Frankfurt)
|
||||
zh_Hans: 欧洲 (法兰克福)
|
||||
- value: us-gov-west-1
|
||||
label:
|
||||
en_US: AWS GovCloud (US-West)
|
||||
zh_Hans: AWS GovCloud (US-West)
|
||||
- variable: model_for_validation
|
||||
required: false
|
||||
label:
|
||||
en_US: Available Model Name
|
||||
zh_Hans: 可用模型名称
|
||||
type: secret-input
|
||||
placeholder:
|
||||
en_US: A model you have access to (e.g. amazon.titan-text-lite-v1) for validation.
|
||||
zh_Hans: 为了进行验证,请输入一个您可用的模型名称 (例如:amazon.titan-text-lite-v1)
|
||||
|
||||
@ -4,6 +4,8 @@
|
||||
- anthropic.claude-v1
|
||||
- anthropic.claude-v2
|
||||
- anthropic.claude-v2:1
|
||||
- anthropic.claude-3-sonnet-v1:0
|
||||
- anthropic.claude-3-haiku-v1:0
|
||||
- cohere.command-light-text-v14
|
||||
- cohere.command-text-v14
|
||||
- meta.llama2-13b-chat-v1
|
||||
|
||||
@ -0,0 +1,57 @@
|
||||
model: anthropic.claude-3-haiku-20240307-v1:0
|
||||
label:
|
||||
en_US: Claude 3 Haiku
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
# docs: https://docs.anthropic.com/claude/docs/system-prompts
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@ -0,0 +1,56 @@
|
||||
model: anthropic.claude-3-sonnet-20240229-v1:0
|
||||
label:
|
||||
en_US: Claude 3 Sonnet
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.00025'
|
||||
output: '0.00125'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@ -1,33 +1,50 @@
|
||||
model: anthropic.claude-instant-v1
|
||||
label:
|
||||
en_US: Claude Instant V1
|
||||
en_US: Claude Instant 1
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 100000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: topP
|
||||
use_template: top_p
|
||||
- name: topK
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top K
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
default: 250
|
||||
min: 0
|
||||
max: 500
|
||||
- name: max_tokens_to_sample
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.0008'
|
||||
output: '0.0024'
|
||||
|
||||
@ -1,33 +1,50 @@
|
||||
model: anthropic.claude-v1
|
||||
label:
|
||||
en_US: Claude V1
|
||||
en_US: Claude 1
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 100000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top K
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
default: 250
|
||||
min: 0
|
||||
max: 500
|
||||
- name: max_tokens_to_sample
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
|
||||
@ -1,33 +1,50 @@
|
||||
model: anthropic.claude-v2:1
|
||||
label:
|
||||
en_US: Claude V2.1
|
||||
en_US: Claude 2.1
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top K
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
default: 250
|
||||
min: 0
|
||||
max: 500
|
||||
- name: max_tokens_to_sample
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
|
||||
@ -1,33 +1,50 @@
|
||||
model: anthropic.claude-v2
|
||||
label:
|
||||
en_US: Claude V2
|
||||
en_US: Claude 2
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 100000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top K
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
default: 250
|
||||
min: 0
|
||||
max: 500
|
||||
- name: max_tokens_to_sample
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
|
||||
@ -1,9 +1,22 @@
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
import time
|
||||
from collections.abc import Generator
|
||||
from typing import Optional, Union
|
||||
from typing import Optional, Union, cast
|
||||
|
||||
import boto3
|
||||
import requests
|
||||
from anthropic import AnthropicBedrock, Stream
|
||||
from anthropic.types import (
|
||||
ContentBlockDeltaEvent,
|
||||
Message,
|
||||
MessageDeltaEvent,
|
||||
MessageStartEvent,
|
||||
MessageStopEvent,
|
||||
MessageStreamEvent,
|
||||
)
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import (
|
||||
ClientError,
|
||||
@ -13,14 +26,18 @@ from botocore.exceptions import (
|
||||
UnknownServiceError,
|
||||
)
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
AssistantPromptMessage,
|
||||
ImagePromptMessageContent,
|
||||
PromptMessage,
|
||||
PromptMessageContentType,
|
||||
PromptMessageTool,
|
||||
SystemPromptMessage,
|
||||
TextPromptMessageContent,
|
||||
UserPromptMessage,
|
||||
)
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
@ -54,9 +71,293 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
|
||||
:param user: unique user id
|
||||
:return: full response or stream response chunk generator result
|
||||
"""
|
||||
# invoke model
|
||||
|
||||
# invoke anthropic models via anthropic official SDK
|
||||
if "anthropic" in model:
|
||||
return self._generate_anthropic(model, credentials, prompt_messages, model_parameters, stop, stream, user)
|
||||
# invoke other models via boto3 client
|
||||
return self._generate(model, credentials, prompt_messages, model_parameters, stop, stream, user)
|
||||
|
||||
def _generate_anthropic(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
|
||||
stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
|
||||
"""
|
||||
Invoke Anthropic large language model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param prompt_messages: prompt messages
|
||||
:param model_parameters: model parameters
|
||||
:param stop: stop words
|
||||
:param stream: is stream response
|
||||
:return: full response or stream response chunk generator result
|
||||
"""
|
||||
# use Anthropic official SDK references
|
||||
# - https://docs.anthropic.com/claude/reference/claude-on-amazon-bedrock
|
||||
# - https://github.com/anthropics/anthropic-sdk-python
|
||||
client = AnthropicBedrock(
|
||||
aws_access_key=credentials["aws_access_key_id"],
|
||||
aws_secret_key=credentials["aws_secret_access_key"],
|
||||
aws_region=credentials["aws_region"],
|
||||
)
|
||||
|
||||
extra_model_kwargs = {}
|
||||
if stop:
|
||||
extra_model_kwargs['stop_sequences'] = stop
|
||||
|
||||
# Notice: If you request the current version of the SDK to the bedrock server,
|
||||
# you will get the following error message and you need to wait for the service or SDK to be updated.
|
||||
# Response: Error code: 400
|
||||
# {'message': 'Malformed input request: #: subject must not be valid against schema
|
||||
# {"required":["messages"]}#: extraneous key [metadata] is not permitted, please reformat your input and try again.'}
|
||||
# TODO: Open in the future when the interface is properly supported
|
||||
# if user:
|
||||
# ref: https://github.com/anthropics/anthropic-sdk-python/blob/e84645b07ca5267066700a104b4d8d6a8da1383d/src/anthropic/resources/messages.py#L465
|
||||
# extra_model_kwargs['metadata'] = message_create_params.Metadata(user_id=user)
|
||||
|
||||
system, prompt_message_dicts = self._convert_claude_prompt_messages(prompt_messages)
|
||||
|
||||
if system:
|
||||
extra_model_kwargs['system'] = system
|
||||
|
||||
response = client.messages.create(
|
||||
model=model,
|
||||
messages=prompt_message_dicts,
|
||||
stream=stream,
|
||||
**model_parameters,
|
||||
**extra_model_kwargs
|
||||
)
|
||||
|
||||
if stream:
|
||||
return self._handle_claude_stream_response(model, credentials, response, prompt_messages)
|
||||
|
||||
return self._handle_claude_response(model, credentials, response, prompt_messages)
|
||||
|
||||
def _handle_claude_response(self, model: str, credentials: dict, response: Message,
|
||||
prompt_messages: list[PromptMessage]) -> LLMResult:
|
||||
"""
|
||||
Handle llm chat response
|
||||
|
||||
:param model: model name
|
||||
:param credentials: credentials
|
||||
:param response: response
|
||||
:param prompt_messages: prompt messages
|
||||
:return: full response chunk generator result
|
||||
"""
|
||||
|
||||
# transform assistant message to prompt message
|
||||
assistant_prompt_message = AssistantPromptMessage(
|
||||
content=response.content[0].text
|
||||
)
|
||||
|
||||
# calculate num tokens
|
||||
if response.usage:
|
||||
# transform usage
|
||||
prompt_tokens = response.usage.input_tokens
|
||||
completion_tokens = response.usage.output_tokens
|
||||
else:
|
||||
# calculate num tokens
|
||||
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
|
||||
completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
|
||||
|
||||
# transform usage
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
|
||||
# transform response
|
||||
response = LLMResult(
|
||||
model=response.model,
|
||||
prompt_messages=prompt_messages,
|
||||
message=assistant_prompt_message,
|
||||
usage=usage
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def _handle_claude_stream_response(self, model: str, credentials: dict, response: Stream[MessageStreamEvent],
|
||||
prompt_messages: list[PromptMessage], ) -> Generator:
|
||||
"""
|
||||
Handle llm chat stream response
|
||||
|
||||
:param model: model name
|
||||
:param credentials: credentials
|
||||
:param response: response
|
||||
:param prompt_messages: prompt messages
|
||||
:return: full response or stream response chunk generator result
|
||||
"""
|
||||
|
||||
try:
|
||||
full_assistant_content = ''
|
||||
return_model = None
|
||||
input_tokens = 0
|
||||
output_tokens = 0
|
||||
finish_reason = None
|
||||
index = 0
|
||||
|
||||
for chunk in response:
|
||||
if isinstance(chunk, MessageStartEvent):
|
||||
return_model = chunk.message.model
|
||||
input_tokens = chunk.message.usage.input_tokens
|
||||
elif isinstance(chunk, MessageDeltaEvent):
|
||||
output_tokens = chunk.usage.output_tokens
|
||||
finish_reason = chunk.delta.stop_reason
|
||||
elif isinstance(chunk, MessageStopEvent):
|
||||
usage = self._calc_response_usage(model, credentials, input_tokens, output_tokens)
|
||||
yield LLMResultChunk(
|
||||
model=return_model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=index + 1,
|
||||
message=AssistantPromptMessage(
|
||||
content=''
|
||||
),
|
||||
finish_reason=finish_reason,
|
||||
usage=usage
|
||||
)
|
||||
)
|
||||
elif isinstance(chunk, ContentBlockDeltaEvent):
|
||||
chunk_text = chunk.delta.text if chunk.delta.text else ''
|
||||
full_assistant_content += chunk_text
|
||||
assistant_prompt_message = AssistantPromptMessage(
|
||||
content=chunk_text if chunk_text else '',
|
||||
)
|
||||
index = chunk.index
|
||||
yield LLMResultChunk(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=index,
|
||||
message=assistant_prompt_message,
|
||||
)
|
||||
)
|
||||
except Exception as ex:
|
||||
raise InvokeError(str(ex))
|
||||
|
||||
def _calc_claude_response_usage(self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int) -> LLMUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param prompt_tokens: prompt tokens
|
||||
:param completion_tokens: completion tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get prompt price info
|
||||
prompt_price_info = self.get_price(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
price_type=PriceType.INPUT,
|
||||
tokens=prompt_tokens,
|
||||
)
|
||||
|
||||
# get completion price info
|
||||
completion_price_info = self.get_price(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
price_type=PriceType.OUTPUT,
|
||||
tokens=completion_tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = LLMUsage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
prompt_unit_price=prompt_price_info.unit_price,
|
||||
prompt_price_unit=prompt_price_info.unit,
|
||||
prompt_price=prompt_price_info.total_amount,
|
||||
completion_tokens=completion_tokens,
|
||||
completion_unit_price=completion_price_info.unit_price,
|
||||
completion_price_unit=completion_price_info.unit,
|
||||
completion_price=completion_price_info.total_amount,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
total_price=prompt_price_info.total_amount + completion_price_info.total_amount,
|
||||
currency=prompt_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at
|
||||
)
|
||||
|
||||
return usage
|
||||
|
||||
def _convert_claude_prompt_messages(self, prompt_messages: list[PromptMessage]) -> tuple[str, list[dict]]:
|
||||
"""
|
||||
Convert prompt messages to dict list and system
|
||||
"""
|
||||
|
||||
system = ""
|
||||
first_loop = True
|
||||
for message in prompt_messages:
|
||||
if isinstance(message, SystemPromptMessage):
|
||||
message.content=message.content.strip()
|
||||
if first_loop:
|
||||
system=message.content
|
||||
first_loop=False
|
||||
else:
|
||||
system+="\n"
|
||||
system+=message.content
|
||||
|
||||
prompt_message_dicts = []
|
||||
for message in prompt_messages:
|
||||
if not isinstance(message, SystemPromptMessage):
|
||||
prompt_message_dicts.append(self._convert_claude_prompt_message_to_dict(message))
|
||||
|
||||
return system, prompt_message_dicts
|
||||
|
||||
def _convert_claude_prompt_message_to_dict(self, message: PromptMessage) -> dict:
|
||||
"""
|
||||
Convert PromptMessage to dict
|
||||
"""
|
||||
if isinstance(message, UserPromptMessage):
|
||||
message = cast(UserPromptMessage, message)
|
||||
if isinstance(message.content, str):
|
||||
message_dict = {"role": "user", "content": message.content}
|
||||
else:
|
||||
sub_messages = []
|
||||
for message_content in message.content:
|
||||
if message_content.type == PromptMessageContentType.TEXT:
|
||||
message_content = cast(TextPromptMessageContent, message_content)
|
||||
sub_message_dict = {
|
||||
"type": "text",
|
||||
"text": message_content.data
|
||||
}
|
||||
sub_messages.append(sub_message_dict)
|
||||
elif message_content.type == PromptMessageContentType.IMAGE:
|
||||
message_content = cast(ImagePromptMessageContent, message_content)
|
||||
if not message_content.data.startswith("data:"):
|
||||
# fetch image data from url
|
||||
try:
|
||||
image_content = requests.get(message_content.data).content
|
||||
mime_type, _ = mimetypes.guess_type(message_content.data)
|
||||
base64_data = base64.b64encode(image_content).decode('utf-8')
|
||||
except Exception as ex:
|
||||
raise ValueError(f"Failed to fetch image data from url {message_content.data}, {ex}")
|
||||
else:
|
||||
data_split = message_content.data.split(";base64,")
|
||||
mime_type = data_split[0].replace("data:", "")
|
||||
base64_data = data_split[1]
|
||||
|
||||
if mime_type not in ["image/jpeg", "image/png", "image/gif", "image/webp"]:
|
||||
raise ValueError(f"Unsupported image type {mime_type}, "
|
||||
f"only support image/jpeg, image/png, image/gif, and image/webp")
|
||||
|
||||
sub_message_dict = {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": mime_type,
|
||||
"data": base64_data
|
||||
}
|
||||
}
|
||||
sub_messages.append(sub_message_dict)
|
||||
|
||||
message_dict = {"role": "user", "content": sub_messages}
|
||||
elif isinstance(message, AssistantPromptMessage):
|
||||
message = cast(AssistantPromptMessage, message)
|
||||
message_dict = {"role": "assistant", "content": message.content}
|
||||
elif isinstance(message, SystemPromptMessage):
|
||||
message = cast(SystemPromptMessage, message)
|
||||
message_dict = {"role": "system", "content": message.content}
|
||||
else:
|
||||
raise ValueError(f"Got unknown type {message}")
|
||||
|
||||
return message_dict
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, messages: list[PromptMessage] | str,
|
||||
tools: Optional[list[PromptMessageTool]] = None) -> int:
|
||||
"""
|
||||
@ -101,7 +402,19 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
|
||||
|
||||
if "anthropic.claude-3" in model:
|
||||
try:
|
||||
self._invoke_claude(model=model,
|
||||
credentials=credentials,
|
||||
prompt_messages=[{"role": "user", "content": "ping"}],
|
||||
model_parameters={},
|
||||
stop=None,
|
||||
stream=False)
|
||||
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
try:
|
||||
ping_message = UserPromptMessage(content="ping")
|
||||
self._generate(model=model,
|
||||
|
||||
@ -0,0 +1,4 @@
|
||||
model: jina-colbert-v1-en
|
||||
model_type: rerank
|
||||
model_properties:
|
||||
context_size: 8192
|
||||
@ -1,6 +1,5 @@
|
||||
from collections.abc import Generator
|
||||
from typing import cast
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from httpx import Timeout
|
||||
from openai import (
|
||||
@ -19,6 +18,7 @@ from openai import (
|
||||
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
||||
from openai.types.chat.chat_completion_message import FunctionCall
|
||||
from openai.types.completion import Completion
|
||||
from yarl import URL
|
||||
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
|
||||
@ -181,7 +181,7 @@ class LocalAILarguageModel(LargeLanguageModel):
|
||||
UserPromptMessage(content='ping')
|
||||
], model_parameters={
|
||||
'max_tokens': 10,
|
||||
}, stop=[])
|
||||
}, stop=[], stream=False)
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(f'Invalid credentials {str(ex)}')
|
||||
|
||||
@ -227,6 +227,12 @@ class LocalAILarguageModel(LargeLanguageModel):
|
||||
)
|
||||
]
|
||||
|
||||
model_properties = {
|
||||
ModelPropertyKey.MODE: completion_model,
|
||||
} if completion_model else {}
|
||||
|
||||
model_properties[ModelPropertyKey.CONTEXT_SIZE] = int(credentials.get('context_size', '2048'))
|
||||
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(
|
||||
@ -234,7 +240,7 @@ class LocalAILarguageModel(LargeLanguageModel):
|
||||
),
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.LLM,
|
||||
model_properties={ ModelPropertyKey.MODE: completion_model } if completion_model else {},
|
||||
model_properties=model_properties,
|
||||
parameter_rules=rules
|
||||
)
|
||||
|
||||
@ -319,7 +325,7 @@ class LocalAILarguageModel(LargeLanguageModel):
|
||||
client_kwargs = {
|
||||
"timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0),
|
||||
"api_key": "1",
|
||||
"base_url": urljoin(credentials['server_url'], 'v1'),
|
||||
"base_url": str(URL(credentials['server_url']) / 'v1'),
|
||||
}
|
||||
|
||||
return client_kwargs
|
||||
|
||||
@ -56,3 +56,12 @@ model_credential_schema:
|
||||
placeholder:
|
||||
zh_Hans: 在此输入LocalAI的服务器地址,如 http://192.168.1.100:8080
|
||||
en_US: Enter the url of your LocalAI, e.g. http://192.168.1.100:8080
|
||||
- variable: context_size
|
||||
label:
|
||||
zh_Hans: 上下文大小
|
||||
en_US: Context size
|
||||
placeholder:
|
||||
zh_Hans: 输入上下文大小
|
||||
en_US: Enter context size
|
||||
required: false
|
||||
type: text-input
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
import time
|
||||
from json import JSONDecodeError, dumps
|
||||
from os.path import join
|
||||
from typing import Optional
|
||||
|
||||
from requests import post
|
||||
from yarl import URL
|
||||
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
@ -57,7 +58,7 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
|
||||
}
|
||||
|
||||
try:
|
||||
response = post(join(url, 'embeddings'), headers=headers, data=dumps(data), timeout=10)
|
||||
response = post(str(URL(url) / 'embeddings'), headers=headers, data=dumps(data), timeout=10)
|
||||
except Exception as e:
|
||||
raise InvokeConnectionError(str(e))
|
||||
|
||||
@ -113,6 +114,27 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
|
||||
# use GPT2Tokenizer to get num tokens
|
||||
num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
return num_tokens
|
||||
|
||||
def _get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
|
||||
"""
|
||||
Get customizable model schema
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return: model schema
|
||||
"""
|
||||
return AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(zh_Hans=model, en_US=model),
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
features=[],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get('context_size', '512')),
|
||||
ModelPropertyKey.MAX_CHUNKS: 1,
|
||||
},
|
||||
parameter_rules=[]
|
||||
)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
|
||||
@ -1,10 +1,7 @@
|
||||
import importlib
|
||||
import logging
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from typing import Optional
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
@ -12,6 +9,8 @@ from core.model_runtime.entities.provider_entities import ProviderConfig, Provid
|
||||
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
|
||||
from core.model_runtime.schema_validators.model_credential_schema_validator import ModelCredentialSchemaValidator
|
||||
from core.model_runtime.schema_validators.provider_credential_schema_validator import ProviderCredentialSchemaValidator
|
||||
from core.utils.module_import_helper import load_single_subclass_from_source
|
||||
from core.utils.position_helper import get_position_map, sort_to_dict_by_position_map
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -200,7 +199,6 @@ class ModelProviderFactory:
|
||||
if self.model_provider_extensions:
|
||||
return self.model_provider_extensions
|
||||
|
||||
model_providers = {}
|
||||
|
||||
# get the path of current classes
|
||||
current_path = os.path.abspath(__file__)
|
||||
@ -215,17 +213,10 @@ class ModelProviderFactory:
|
||||
]
|
||||
|
||||
# get _position.yaml file path
|
||||
position_file_path = os.path.join(model_providers_path, '_position.yaml')
|
||||
|
||||
# read _position.yaml file
|
||||
position_map = {}
|
||||
if os.path.exists(position_file_path):
|
||||
with open(position_file_path, encoding='utf-8') as f:
|
||||
positions = yaml.safe_load(f)
|
||||
# convert list to dict with key as model provider name, value as index
|
||||
position_map = {position: index for index, position in enumerate(positions)}
|
||||
position_map = get_position_map(model_providers_path)
|
||||
|
||||
# traverse all model_provider_dir_paths
|
||||
model_providers: list[ModelProviderExtension] = []
|
||||
for model_provider_dir_path in model_provider_dir_paths:
|
||||
# get model_provider dir name
|
||||
model_provider_name = os.path.basename(model_provider_dir_path)
|
||||
@ -238,15 +229,10 @@ class ModelProviderFactory:
|
||||
|
||||
# Dynamic loading {model_provider_name}.py file and find the subclass of ModelProvider
|
||||
py_path = os.path.join(model_provider_dir_path, model_provider_name + '.py')
|
||||
spec = importlib.util.spec_from_file_location(f'core.model_runtime.model_providers.{model_provider_name}.{model_provider_name}', py_path)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
|
||||
model_provider_class = None
|
||||
for name, obj in vars(mod).items():
|
||||
if isinstance(obj, type) and issubclass(obj, ModelProvider) and obj != ModelProvider:
|
||||
model_provider_class = obj
|
||||
break
|
||||
model_provider_class = load_single_subclass_from_source(
|
||||
module_name=f'core.model_runtime.model_providers.{model_provider_name}.{model_provider_name}',
|
||||
script_path=py_path,
|
||||
parent_type=ModelProvider)
|
||||
|
||||
if not model_provider_class:
|
||||
logger.warning(f"Missing Model Provider Class that extends ModelProvider in {py_path}, Skip.")
|
||||
@ -256,14 +242,13 @@ class ModelProviderFactory:
|
||||
logger.warning(f"Missing {model_provider_name}.yaml file in {model_provider_dir_path}, Skip.")
|
||||
continue
|
||||
|
||||
model_providers[model_provider_name] = ModelProviderExtension(
|
||||
model_providers.append(ModelProviderExtension(
|
||||
name=model_provider_name,
|
||||
provider_instance=model_provider_class(),
|
||||
position=position_map.get(model_provider_name)
|
||||
)
|
||||
))
|
||||
|
||||
sorted_items = sorted(model_providers.items(), key=lambda x: (x[1].position is None, x[1].position))
|
||||
sorted_extensions = OrderedDict(sorted_items)
|
||||
sorted_extensions = sort_to_dict_by_position_map(position_map, model_providers, lambda x: x.name)
|
||||
|
||||
self.model_provider_extensions = sorted_extensions
|
||||
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 110 KiB |
@ -0,0 +1,3 @@
|
||||
<svg width="567" height="376" viewBox="0 0 567 376" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M58.0366 161.868C58.0366 161.868 109.261 86.2912 211.538 78.4724V51.053C98.2528 60.1511 0.152344 156.098 0.152344 156.098C0.152344 156.098 55.7148 316.717 211.538 331.426V302.282C97.1876 287.896 58.0366 161.868 58.0366 161.868ZM211.538 244.32V271.013C125.114 255.603 101.125 165.768 101.125 165.768C101.125 165.768 142.621 119.799 211.538 112.345V141.633C211.486 141.633 211.449 141.617 211.406 141.617C175.235 137.276 146.978 171.067 146.978 171.067C146.978 171.067 162.816 227.949 211.538 244.32ZM211.538 0.47998V51.053C214.864 50.7981 218.189 50.5818 221.533 50.468C350.326 46.1273 434.243 156.098 434.243 156.098C434.243 156.098 337.861 273.296 237.448 273.296C228.245 273.296 219.63 272.443 211.538 271.009V302.282C218.695 303.201 225.903 303.667 233.119 303.675C326.56 303.675 394.134 255.954 459.566 199.474C470.415 208.162 514.828 229.299 523.958 238.55C461.745 290.639 316.752 332.626 234.551 332.626C226.627 332.626 219.018 332.148 211.538 331.426V375.369H566.701V0.47998H211.538ZM211.538 112.345V78.4724C214.829 78.2425 218.146 78.0672 221.533 77.9602C314.148 75.0512 374.909 157.548 374.909 157.548C374.909 157.548 309.281 248.693 238.914 248.693C228.787 248.693 219.707 247.065 211.536 244.318V141.631C247.591 145.987 254.848 161.914 276.524 198.049L324.737 157.398C324.737 157.398 289.544 111.243 230.219 111.243C223.768 111.241 217.597 111.696 211.538 112.345Z" fill="#77B900"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.5 KiB |
@ -0,0 +1,4 @@
|
||||
- google/gemma-7b
|
||||
- meta/llama2-70b
|
||||
- mistralai/mixtral-8x7b-instruct-v0.1
|
||||
- fuyu-8b
|
||||
@ -0,0 +1,27 @@
|
||||
model: fuyu-8b
|
||||
label:
|
||||
zh_Hans: fuyu-8b
|
||||
en_US: fuyu-8b
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 16000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.2
|
||||
min: 0.1
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 0.7
|
||||
min: 0.1
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 1024
|
||||
@ -0,0 +1,30 @@
|
||||
model: google/gemma-7b
|
||||
label:
|
||||
zh_Hans: google/gemma-7b
|
||||
en_US: google/gemma-7b
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 1024
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
@ -0,0 +1,30 @@
|
||||
model: meta/llama2-70b
|
||||
label:
|
||||
zh_Hans: meta/llama2-70b
|
||||
en_US: meta/llama2-70b
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 1024
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
247
api/core/model_runtime/model_providers/nvidia/llm/llm.py
Normal file
247
api/core/model_runtime/model_providers/nvidia/llm/llm.py
Normal file
@ -0,0 +1,247 @@
|
||||
import json
|
||||
from collections.abc import Generator
|
||||
from typing import Optional, Union
|
||||
|
||||
import requests
|
||||
from yarl import URL
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
PromptMessage,
|
||||
PromptMessageContentType,
|
||||
PromptMessageFunction,
|
||||
PromptMessageTool,
|
||||
UserPromptMessage,
|
||||
)
|
||||
from core.model_runtime.errors.invoke import InvokeError
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
|
||||
from core.model_runtime.utils import helper
|
||||
|
||||
|
||||
class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
|
||||
MODEL_SUFFIX_MAP = {
|
||||
'fuyu-8b': 'vlm/adept/fuyu-8b',
|
||||
'mistralai/mixtral-8x7b-instruct-v0.1': '',
|
||||
'google/gemma-7b': '',
|
||||
'meta/llama2-70b': ''
|
||||
}
|
||||
|
||||
def _invoke(self, model: str, credentials: dict,
|
||||
prompt_messages: list[PromptMessage], model_parameters: dict,
|
||||
tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
|
||||
stream: bool = True, user: Optional[str] = None) \
|
||||
-> Union[LLMResult, Generator]:
|
||||
|
||||
self._add_custom_parameters(credentials, model)
|
||||
prompt_messages = self._transform_prompt_messages(prompt_messages)
|
||||
stop = []
|
||||
user = None
|
||||
|
||||
return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
|
||||
|
||||
def _transform_prompt_messages(self, prompt_messages: list[PromptMessage]) -> list[PromptMessage]:
|
||||
"""
|
||||
Handle Image transform
|
||||
"""
|
||||
for i, p in enumerate(prompt_messages):
|
||||
if isinstance(p, UserPromptMessage) and isinstance(p.content, list):
|
||||
content = p.content
|
||||
content_text = ''
|
||||
for prompt_content in content:
|
||||
if prompt_content.type == PromptMessageContentType.TEXT:
|
||||
content_text += prompt_content.data
|
||||
else:
|
||||
content_text += f' <img src="{prompt_content.data}" />'
|
||||
|
||||
prompt_message = UserPromptMessage(
|
||||
content=content_text
|
||||
)
|
||||
prompt_messages[i] = prompt_message
|
||||
return prompt_messages
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
self._add_custom_parameters(credentials, model)
|
||||
self._validate_credentials(model, credentials)
|
||||
|
||||
def _add_custom_parameters(self, credentials: dict, model: str) -> None:
|
||||
credentials['mode'] = 'chat'
|
||||
|
||||
if self.MODEL_SUFFIX_MAP[model]:
|
||||
credentials['server_url'] = f'https://ai.api.nvidia.com/v1/{self.MODEL_SUFFIX_MAP[model]}'
|
||||
credentials.pop('endpoint_url')
|
||||
else:
|
||||
credentials['endpoint_url'] = 'https://integrate.api.nvidia.com/v1'
|
||||
|
||||
credentials['stream_mode_delimiter'] = '\n'
|
||||
|
||||
def _validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials using requests to ensure compatibility with all providers following OpenAI's API standard.
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
api_key = credentials.get('api_key')
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
endpoint_url = credentials['endpoint_url'] if 'endpoint_url' in credentials else None
|
||||
if endpoint_url and not endpoint_url.endswith('/'):
|
||||
endpoint_url += '/'
|
||||
server_url = credentials['server_url'] if 'server_url' in credentials else None
|
||||
|
||||
# prepare the payload for a simple ping to the model
|
||||
data = {
|
||||
'model': model,
|
||||
'max_tokens': 5
|
||||
}
|
||||
|
||||
completion_type = LLMMode.value_of(credentials['mode'])
|
||||
|
||||
if completion_type is LLMMode.CHAT:
|
||||
data['messages'] = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "ping"
|
||||
},
|
||||
]
|
||||
if 'endpoint_url' in credentials:
|
||||
endpoint_url = str(URL(endpoint_url) / 'chat' / 'completions')
|
||||
elif 'server_url' in credentials:
|
||||
endpoint_url = server_url
|
||||
elif completion_type is LLMMode.COMPLETION:
|
||||
data['prompt'] = 'ping'
|
||||
if 'endpoint_url' in credentials:
|
||||
endpoint_url = str(URL(endpoint_url) / 'completions')
|
||||
elif 'server_url' in credentials:
|
||||
endpoint_url = server_url
|
||||
else:
|
||||
raise ValueError("Unsupported completion type for model configuration.")
|
||||
|
||||
# send a post request to validate the credentials
|
||||
response = requests.post(
|
||||
endpoint_url,
|
||||
headers=headers,
|
||||
json=data,
|
||||
timeout=(10, 60)
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise CredentialsValidateFailedError(
|
||||
f'Credentials validation failed with status code {response.status_code}')
|
||||
|
||||
try:
|
||||
json_result = response.json()
|
||||
except json.JSONDecodeError as e:
|
||||
raise CredentialsValidateFailedError('Credentials validation failed: JSON decode error')
|
||||
except CredentialsValidateFailedError:
|
||||
raise
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(f'An error occurred during credentials validation: {str(ex)}')
|
||||
|
||||
def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
|
||||
tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
|
||||
stream: bool = True, \
|
||||
user: Optional[str] = None) -> Union[LLMResult, Generator]:
|
||||
"""
|
||||
Invoke llm completion model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: credentials
|
||||
:param prompt_messages: prompt messages
|
||||
:param model_parameters: model parameters
|
||||
:param stop: stop words
|
||||
:param stream: is stream response
|
||||
:param user: unique user id
|
||||
:return: full response or stream response chunk generator result
|
||||
"""
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Accept-Charset': 'utf-8',
|
||||
}
|
||||
|
||||
api_key = credentials.get('api_key')
|
||||
if api_key:
|
||||
headers['Authorization'] = f'Bearer {api_key}'
|
||||
|
||||
if stream:
|
||||
headers['Accept'] = 'text/event-stream'
|
||||
|
||||
endpoint_url = credentials['endpoint_url'] if 'endpoint_url' in credentials else None
|
||||
if endpoint_url and not endpoint_url.endswith('/'):
|
||||
endpoint_url += '/'
|
||||
server_url = credentials['server_url'] if 'server_url' in credentials else None
|
||||
|
||||
data = {
|
||||
"model": model,
|
||||
"stream": stream,
|
||||
**model_parameters
|
||||
}
|
||||
|
||||
completion_type = LLMMode.value_of(credentials['mode'])
|
||||
|
||||
if completion_type is LLMMode.CHAT:
|
||||
if 'endpoint_url' in credentials:
|
||||
endpoint_url = str(URL(endpoint_url) / 'chat' / 'completions')
|
||||
elif 'server_url' in credentials:
|
||||
endpoint_url = server_url
|
||||
data['messages'] = [self._convert_prompt_message_to_dict(m) for m in prompt_messages]
|
||||
elif completion_type is LLMMode.COMPLETION:
|
||||
data['prompt'] = 'ping'
|
||||
if 'endpoint_url' in credentials:
|
||||
endpoint_url = str(URL(endpoint_url) / 'completions')
|
||||
elif 'server_url' in credentials:
|
||||
endpoint_url = server_url
|
||||
else:
|
||||
raise ValueError("Unsupported completion type for model configuration.")
|
||||
|
||||
|
||||
# annotate tools with names, descriptions, etc.
|
||||
function_calling_type = credentials.get('function_calling_type', 'no_call')
|
||||
formatted_tools = []
|
||||
if tools:
|
||||
if function_calling_type == 'function_call':
|
||||
data['functions'] = [{
|
||||
"name": tool.name,
|
||||
"description": tool.description,
|
||||
"parameters": tool.parameters
|
||||
} for tool in tools]
|
||||
elif function_calling_type == 'tool_call':
|
||||
data["tool_choice"] = "auto"
|
||||
|
||||
for tool in tools:
|
||||
formatted_tools.append(helper.dump_model(PromptMessageFunction(function=tool)))
|
||||
|
||||
data["tools"] = formatted_tools
|
||||
|
||||
if stop:
|
||||
data["stop"] = stop
|
||||
|
||||
if user:
|
||||
data["user"] = user
|
||||
|
||||
response = requests.post(
|
||||
endpoint_url,
|
||||
headers=headers,
|
||||
json=data,
|
||||
timeout=(10, 60),
|
||||
stream=stream
|
||||
)
|
||||
|
||||
if response.encoding is None or response.encoding == 'ISO-8859-1':
|
||||
response.encoding = 'utf-8'
|
||||
|
||||
if not response.ok:
|
||||
raise InvokeError(f"API request failed with status code {response.status_code}: {response.text}")
|
||||
|
||||
if stream:
|
||||
return self._handle_generate_stream_response(model, credentials, response, prompt_messages)
|
||||
|
||||
return self._handle_generate_response(model, credentials, response, prompt_messages)
|
||||
@ -0,0 +1,30 @@
|
||||
model: mistralai/mixtral-8x7b-instruct-v0.1
|
||||
label:
|
||||
zh_Hans: mistralai/mixtral-8x7b-instruct-v0.1
|
||||
en_US: mistralai/mixtral-8x7b-instruct-v0.1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 1024
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
30
api/core/model_runtime/model_providers/nvidia/nvidia.py
Normal file
30
api/core/model_runtime/model_providers/nvidia/nvidia.py
Normal file
@ -0,0 +1,30 @@
|
||||
import logging
|
||||
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MistralAIProvider(ModelProvider):
|
||||
|
||||
def validate_provider_credentials(self, credentials: dict) -> None:
|
||||
"""
|
||||
Validate provider credentials
|
||||
if validate failed, raise exception
|
||||
|
||||
:param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
|
||||
"""
|
||||
try:
|
||||
model_instance = self.get_model_instance(ModelType.LLM)
|
||||
|
||||
model_instance.validate_credentials(
|
||||
model='mistralai/mixtral-8x7b-instruct-v0.1',
|
||||
credentials=credentials
|
||||
)
|
||||
except CredentialsValidateFailedError as ex:
|
||||
raise ex
|
||||
except Exception as ex:
|
||||
logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
|
||||
raise ex
|
||||
30
api/core/model_runtime/model_providers/nvidia/nvidia.yaml
Normal file
30
api/core/model_runtime/model_providers/nvidia/nvidia.yaml
Normal file
@ -0,0 +1,30 @@
|
||||
provider: nvidia
|
||||
label:
|
||||
en_US: NVIDIA
|
||||
icon_small:
|
||||
en_US: icon_s_en.svg
|
||||
icon_large:
|
||||
en_US: icon_l_en.png
|
||||
background: "#FFFFFF"
|
||||
help:
|
||||
title:
|
||||
en_US: Get your API Key from NVIDIA
|
||||
zh_Hans: 从 NVIDIA 获取 API Key
|
||||
url:
|
||||
en_US: https://build.nvidia.com/explore/discover
|
||||
supported_model_types:
|
||||
- llm
|
||||
- text-embedding
|
||||
- rerank
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
credential_form_schemas:
|
||||
- variable: api_key
|
||||
label:
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
@ -0,0 +1,4 @@
|
||||
model: nv-rerank-qa-mistral-4b:1
|
||||
model_type: rerank
|
||||
model_properties:
|
||||
context_size: 8192
|
||||
112
api/core/model_runtime/model_providers/nvidia/rerank/rerank.py
Normal file
112
api/core/model_runtime/model_providers/nvidia/rerank/rerank.py
Normal file
@ -0,0 +1,112 @@
|
||||
from math import exp
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.rerank_model import RerankModel
|
||||
|
||||
|
||||
class NvidiaRerankModel(RerankModel):
|
||||
"""
|
||||
Model class for NVIDIA rerank model.
|
||||
"""
|
||||
|
||||
def _sigmoid(self, logit: float) -> float:
|
||||
return 1/(1+exp(-logit))
|
||||
|
||||
def _invoke(self, model: str, credentials: dict,
|
||||
query: str, docs: list[str], score_threshold: Optional[float] = None, top_n: Optional[int] = None,
|
||||
user: Optional[str] = None) -> RerankResult:
|
||||
"""
|
||||
Invoke rerank model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param query: search query
|
||||
:param docs: docs for reranking
|
||||
:param score_threshold: score threshold
|
||||
:param top_n: top n documents to return
|
||||
:param user: unique user id
|
||||
:return: rerank result
|
||||
"""
|
||||
if len(docs) == 0:
|
||||
return RerankResult(model=model, docs=[])
|
||||
|
||||
try:
|
||||
invoke_url = "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking"
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {credentials.get('api_key')}",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": model,
|
||||
"query": {"text": query},
|
||||
"passages": [{"text": doc} for doc in docs],
|
||||
}
|
||||
|
||||
session = requests.Session()
|
||||
response = session.post(invoke_url, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
results = response.json()
|
||||
|
||||
rerank_documents = []
|
||||
for result in results['rankings']:
|
||||
index = result['index']
|
||||
logit = result['logit']
|
||||
rerank_document = RerankDocument(
|
||||
index=index,
|
||||
text=docs[index],
|
||||
score=self._sigmoid(logit),
|
||||
)
|
||||
|
||||
rerank_documents.append(rerank_document)
|
||||
|
||||
return RerankResult(model=model, docs=rerank_documents)
|
||||
except requests.HTTPError as e:
|
||||
raise InvokeServerUnavailableError(str(e))
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self._invoke(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
query="What is the GPU memory bandwidth of H100 SXM?",
|
||||
docs=[
|
||||
"Example doc 1",
|
||||
"Example doc 2",
|
||||
"Example doc 3",
|
||||
],
|
||||
)
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: [requests.ConnectionError],
|
||||
InvokeServerUnavailableError: [requests.HTTPError],
|
||||
InvokeRateLimitError: [],
|
||||
InvokeAuthorizationError: [requests.HTTPError],
|
||||
InvokeBadRequestError: [requests.RequestException]
|
||||
}
|
||||
@ -0,0 +1,5 @@
|
||||
model: NV-Embed-QA
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 512
|
||||
max_chunks: 1
|
||||
@ -0,0 +1,172 @@
|
||||
import time
|
||||
from json import JSONDecodeError, dumps
|
||||
from typing import Optional
|
||||
|
||||
from requests import post
|
||||
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
|
||||
|
||||
class NvidiaTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Nvidia text embedding model.
|
||||
"""
|
||||
api_base: str = 'https://ai.api.nvidia.com/v1/retrieval/nvidia/embeddings'
|
||||
models: list[str] = ['NV-Embed-QA']
|
||||
|
||||
def _invoke(self, model: str, credentials: dict,
|
||||
texts: list[str], user: Optional[str] = None) \
|
||||
-> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:return: embeddings result
|
||||
"""
|
||||
api_key = credentials['api_key']
|
||||
if model not in self.models:
|
||||
raise InvokeBadRequestError('Invalid model name')
|
||||
if not api_key:
|
||||
raise CredentialsValidateFailedError('api_key is required')
|
||||
url = self.api_base
|
||||
headers = {
|
||||
'Authorization': 'Bearer ' + api_key,
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
data = {
|
||||
'model': model,
|
||||
'input': texts[0],
|
||||
'input_type': 'query'
|
||||
}
|
||||
|
||||
try:
|
||||
response = post(url, headers=headers, data=dumps(data))
|
||||
except Exception as e:
|
||||
raise InvokeConnectionError(str(e))
|
||||
|
||||
if response.status_code != 200:
|
||||
try:
|
||||
resp = response.json()
|
||||
msg = resp['detail']
|
||||
if response.status_code == 401:
|
||||
raise InvokeAuthorizationError(msg)
|
||||
elif response.status_code == 429:
|
||||
raise InvokeRateLimitError(msg)
|
||||
elif response.status_code == 500:
|
||||
raise InvokeServerUnavailableError(msg)
|
||||
else:
|
||||
raise InvokeError(msg)
|
||||
except JSONDecodeError as e:
|
||||
raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
|
||||
|
||||
try:
|
||||
resp = response.json()
|
||||
embeddings = resp['data']
|
||||
usage = resp['usage']
|
||||
except Exception as e:
|
||||
raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage['total_tokens'])
|
||||
|
||||
result = TextEmbeddingResult(
|
||||
model=model,
|
||||
embeddings=[[
|
||||
float(data) for data in x['embedding']
|
||||
] for x in embeddings],
|
||||
usage=usage
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
num_tokens = 0
|
||||
for text in texts:
|
||||
# use JinaTokenizer to get num tokens
|
||||
num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
return num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self._invoke(model=model, credentials=credentials, texts=['ping'])
|
||||
except InvokeAuthorizationError:
|
||||
raise CredentialsValidateFailedError('Invalid api key')
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
return {
|
||||
InvokeConnectionError: [
|
||||
InvokeConnectionError
|
||||
],
|
||||
InvokeServerUnavailableError: [
|
||||
InvokeServerUnavailableError
|
||||
],
|
||||
InvokeRateLimitError: [
|
||||
InvokeRateLimitError
|
||||
],
|
||||
InvokeAuthorizationError: [
|
||||
InvokeAuthorizationError
|
||||
],
|
||||
InvokeBadRequestError: [
|
||||
KeyError
|
||||
]
|
||||
}
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
price_type=PriceType.INPUT,
|
||||
tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -449,7 +449,7 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
||||
help=I18nObject(en_US="The temperature of the model. "
|
||||
"Increasing the temperature will make the model answer "
|
||||
"more creatively. (Default: 0.8)"),
|
||||
default=0.8,
|
||||
default=0.1,
|
||||
min=0,
|
||||
max=2
|
||||
),
|
||||
@ -472,7 +472,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
||||
help=I18nObject(en_US="Reduces the probability of generating nonsense. "
|
||||
"A higher value (e.g. 100) will give more diverse answers, "
|
||||
"while a lower value (e.g. 10) will be more conservative. (Default: 40)"),
|
||||
default=40,
|
||||
min=1,
|
||||
max=100
|
||||
),
|
||||
@ -483,7 +482,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
||||
help=I18nObject(en_US="Sets how strongly to penalize repetitions. "
|
||||
"A higher value (e.g., 1.5) will penalize repetitions more strongly, "
|
||||
"while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)"),
|
||||
default=1.1,
|
||||
min=-2,
|
||||
max=2
|
||||
),
|
||||
@ -494,7 +492,7 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
||||
type=ParameterType.INT,
|
||||
help=I18nObject(en_US="Maximum number of tokens to predict when generating text. "
|
||||
"(Default: 128, -1 = infinite generation, -2 = fill context)"),
|
||||
default=128,
|
||||
default=512 if int(credentials.get('max_tokens', 4096)) >= 768 else 128,
|
||||
min=-2,
|
||||
max=int(credentials.get('max_tokens', 4096)),
|
||||
),
|
||||
@ -504,7 +502,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
||||
type=ParameterType.INT,
|
||||
help=I18nObject(en_US="Enable Mirostat sampling for controlling perplexity. "
|
||||
"(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"),
|
||||
default=0,
|
||||
min=0,
|
||||
max=2
|
||||
),
|
||||
@ -516,7 +513,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
||||
"the generated text. A lower learning rate will result in slower adjustments, "
|
||||
"while a higher learning rate will make the algorithm more responsive. "
|
||||
"(Default: 0.1)"),
|
||||
default=0.1,
|
||||
precision=1
|
||||
),
|
||||
ParameterRule(
|
||||
@ -525,7 +521,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
||||
type=ParameterType.FLOAT,
|
||||
help=I18nObject(en_US="Controls the balance between coherence and diversity of the output. "
|
||||
"A lower value will result in more focused and coherent text. (Default: 5.0)"),
|
||||
default=5.0,
|
||||
precision=1
|
||||
),
|
||||
ParameterRule(
|
||||
@ -543,7 +538,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
||||
type=ParameterType.INT,
|
||||
help=I18nObject(en_US="The number of layers to send to the GPU(s). "
|
||||
"On macOS it defaults to 1 to enable metal support, 0 to disable."),
|
||||
default=1,
|
||||
min=0,
|
||||
max=1
|
||||
),
|
||||
@ -563,7 +557,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
||||
type=ParameterType.INT,
|
||||
help=I18nObject(en_US="Sets how far back for the model to look back to prevent repetition. "
|
||||
"(Default: 64, 0 = disabled, -1 = num_ctx)"),
|
||||
default=64,
|
||||
min=-1
|
||||
),
|
||||
ParameterRule(
|
||||
@ -573,7 +566,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
||||
help=I18nObject(en_US="Tail free sampling is used to reduce the impact of less probable tokens "
|
||||
"from the output. A higher value (e.g., 2.0) will reduce the impact more, "
|
||||
"while a value of 1.0 disables this setting. (default: 1)"),
|
||||
default=1,
|
||||
precision=1
|
||||
),
|
||||
ParameterRule(
|
||||
@ -583,7 +575,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
||||
help=I18nObject(en_US="Sets the random number seed to use for generation. Setting this to "
|
||||
"a specific number will make the model generate the same text for "
|
||||
"the same prompt. (Default: 0)"),
|
||||
default=0
|
||||
),
|
||||
ParameterRule(
|
||||
name='format',
|
||||
|
||||
@ -656,6 +656,8 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel):
|
||||
if assistant_message_function_call:
|
||||
# start of stream function call
|
||||
delta_assistant_message_function_call_storage = assistant_message_function_call
|
||||
if delta_assistant_message_function_call_storage.arguments is None:
|
||||
delta_assistant_message_function_call_storage.arguments = ''
|
||||
if not has_finish_reason:
|
||||
continue
|
||||
|
||||
|
||||
@ -148,7 +148,8 @@ class SparkLLMClient:
|
||||
data = {
|
||||
"header": {
|
||||
"app_id": self.app_id,
|
||||
"uid": user_id
|
||||
# resolve this error message => $.header.uid' length must be less or equal than 32
|
||||
"uid": user_id[:32] if user_id else None
|
||||
},
|
||||
"parameter": {
|
||||
"chat": {
|
||||
|
||||
@ -8,54 +8,70 @@ model_properties:
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1.0
|
||||
type: float
|
||||
default: 0.85
|
||||
min: 0.0
|
||||
max: 2.0
|
||||
help:
|
||||
zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
|
||||
en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 2000
|
||||
min: 1
|
||||
max: 2000
|
||||
help:
|
||||
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
|
||||
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
type: float
|
||||
default: 0.8
|
||||
min: 0.1
|
||||
max: 0.9
|
||||
help:
|
||||
zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
|
||||
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1500
|
||||
min: 1
|
||||
max: 6000
|
||||
help:
|
||||
zh_Hans: 用于限制模型生成token的数量,max_tokens设置的是生成上限,并不表示一定会生成这么多的token数量。
|
||||
en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
|
||||
- name: top_k
|
||||
type: int
|
||||
min: 0
|
||||
max: 99
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。默认不传递该参数,取值为None或当top_k大于100时,表示不启用top_k策略,此时,仅有top_p策略生效。
|
||||
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect.
|
||||
required: false
|
||||
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
|
||||
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
|
||||
- name: seed
|
||||
required: false
|
||||
type: int
|
||||
default: 1234
|
||||
label:
|
||||
zh_Hans: 随机种子
|
||||
en_US: Random seed
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。
|
||||
en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
|
||||
required: false
|
||||
zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
|
||||
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
|
||||
- name: repetition_penalty
|
||||
label:
|
||||
en_US: Repetition penalty
|
||||
required: false
|
||||
type: float
|
||||
default: 1.1
|
||||
label:
|
||||
en_US: Repetition penalty
|
||||
help:
|
||||
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
|
||||
en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
|
||||
required: false
|
||||
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
|
||||
- name: enable_search
|
||||
type: boolean
|
||||
default: false
|
||||
help:
|
||||
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
|
||||
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.12'
|
||||
output: '0.12'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
|
||||
@ -4,58 +4,74 @@ label:
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 30000
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1.0
|
||||
type: float
|
||||
default: 0.85
|
||||
min: 0.0
|
||||
max: 2.0
|
||||
help:
|
||||
zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
|
||||
en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 2000
|
||||
min: 1
|
||||
max: 2000
|
||||
help:
|
||||
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
|
||||
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
type: float
|
||||
default: 0.8
|
||||
min: 0.1
|
||||
max: 0.9
|
||||
help:
|
||||
zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
|
||||
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 2000
|
||||
min: 1
|
||||
max: 28000
|
||||
help:
|
||||
zh_Hans: 用于限制模型生成token的数量,max_tokens设置的是生成上限,并不表示一定会生成这么多的token数量。
|
||||
en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
|
||||
- name: top_k
|
||||
type: int
|
||||
min: 0
|
||||
max: 99
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。默认不传递该参数,取值为None或当top_k大于100时,表示不启用top_k策略,此时,仅有top_p策略生效。
|
||||
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect.
|
||||
required: false
|
||||
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
|
||||
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
|
||||
- name: seed
|
||||
required: false
|
||||
type: int
|
||||
default: 1234
|
||||
label:
|
||||
zh_Hans: 随机种子
|
||||
en_US: Random seed
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。
|
||||
en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
|
||||
required: false
|
||||
zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
|
||||
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
|
||||
- name: repetition_penalty
|
||||
label:
|
||||
en_US: Repetition penalty
|
||||
required: false
|
||||
type: float
|
||||
default: 1.1
|
||||
label:
|
||||
en_US: Repetition penalty
|
||||
help:
|
||||
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
|
||||
en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
|
||||
required: false
|
||||
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
|
||||
- name: enable_search
|
||||
type: boolean
|
||||
default: false
|
||||
help:
|
||||
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
|
||||
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.12'
|
||||
output: '0.12'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
|
||||
@ -8,54 +8,70 @@ model_properties:
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1.0
|
||||
type: float
|
||||
default: 0.85
|
||||
min: 0.0
|
||||
max: 2.0
|
||||
help:
|
||||
zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
|
||||
en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 2000
|
||||
min: 1
|
||||
max: 2000
|
||||
help:
|
||||
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
|
||||
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
type: float
|
||||
default: 0.8
|
||||
min: 0.1
|
||||
max: 0.9
|
||||
help:
|
||||
zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
|
||||
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1500
|
||||
min: 1
|
||||
max: 6000
|
||||
help:
|
||||
zh_Hans: 用于限制模型生成token的数量,max_tokens设置的是生成上限,并不表示一定会生成这么多的token数量。
|
||||
en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
|
||||
- name: top_k
|
||||
type: int
|
||||
min: 0
|
||||
max: 99
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。默认不传递该参数,取值为None或当top_k大于100时,表示不启用top_k策略,此时,仅有top_p策略生效。
|
||||
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect.
|
||||
required: false
|
||||
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
|
||||
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
|
||||
- name: seed
|
||||
required: false
|
||||
type: int
|
||||
default: 1234
|
||||
label:
|
||||
zh_Hans: 随机种子
|
||||
en_US: Random seed
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。
|
||||
en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
|
||||
required: false
|
||||
zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
|
||||
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
|
||||
- name: repetition_penalty
|
||||
label:
|
||||
en_US: Repetition penalty
|
||||
required: false
|
||||
type: float
|
||||
default: 1.1
|
||||
label:
|
||||
en_US: Repetition penalty
|
||||
help:
|
||||
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
|
||||
en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
|
||||
required: false
|
||||
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
|
||||
- name: enable_search
|
||||
type: boolean
|
||||
default: false
|
||||
help:
|
||||
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
|
||||
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.12'
|
||||
output: '0.12'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
|
||||
@ -4,58 +4,70 @@ label:
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 32000
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1.0
|
||||
type: float
|
||||
default: 0.85
|
||||
min: 0.0
|
||||
max: 2.0
|
||||
help:
|
||||
zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
|
||||
en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 1500
|
||||
min: 1
|
||||
max: 1500
|
||||
help:
|
||||
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
|
||||
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
type: float
|
||||
default: 0.8
|
||||
min: 0.1
|
||||
max: 0.9
|
||||
help:
|
||||
zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
|
||||
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 2000
|
||||
min: 1
|
||||
max: 30000
|
||||
help:
|
||||
zh_Hans: 用于限制模型生成token的数量,max_tokens设置的是生成上限,并不表示一定会生成这么多的token数量。
|
||||
en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
|
||||
- name: top_k
|
||||
type: int
|
||||
min: 0
|
||||
max: 99
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。默认不传递该参数,取值为None或当top_k大于100时,表示不启用top_k策略,此时,仅有top_p策略生效。
|
||||
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect.
|
||||
required: false
|
||||
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
|
||||
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
|
||||
- name: seed
|
||||
required: false
|
||||
type: int
|
||||
default: 1234
|
||||
label:
|
||||
zh_Hans: 随机种子
|
||||
en_US: Random seed
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。
|
||||
en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
|
||||
required: false
|
||||
zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
|
||||
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
|
||||
- name: repetition_penalty
|
||||
label:
|
||||
en_US: Repetition penalty
|
||||
required: false
|
||||
type: float
|
||||
default: 1.1
|
||||
label:
|
||||
en_US: Repetition penalty
|
||||
help:
|
||||
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
|
||||
en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
|
||||
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
|
||||
- name: enable_search
|
||||
type: boolean
|
||||
default: false
|
||||
help:
|
||||
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
|
||||
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
|
||||
@ -8,55 +8,66 @@ model_properties:
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1.0
|
||||
type: float
|
||||
default: 0.85
|
||||
min: 0.0
|
||||
max: 2.0
|
||||
help:
|
||||
zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
|
||||
en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
type: int
|
||||
default: 1500
|
||||
min: 1
|
||||
max: 1500
|
||||
help:
|
||||
zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
|
||||
en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
type: float
|
||||
default: 0.8
|
||||
min: 0.1
|
||||
max: 0.9
|
||||
help:
|
||||
zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
|
||||
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1500
|
||||
min: 1
|
||||
max: 6000
|
||||
help:
|
||||
zh_Hans: 用于限制模型生成token的数量,max_tokens设置的是生成上限,并不表示一定会生成这么多的token数量。
|
||||
en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
|
||||
- name: top_k
|
||||
type: int
|
||||
min: 0
|
||||
max: 99
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。默认不传递该参数,取值为None或当top_k大于100时,表示不启用top_k策略,此时,仅有top_p策略生效。
|
||||
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect.
|
||||
required: false
|
||||
zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
|
||||
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
|
||||
- name: seed
|
||||
required: false
|
||||
type: int
|
||||
default: 1234
|
||||
label:
|
||||
zh_Hans: 随机种子
|
||||
en_US: Random seed
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。
|
||||
en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
|
||||
required: false
|
||||
zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
|
||||
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
|
||||
- name: repetition_penalty
|
||||
label:
|
||||
en_US: Repetition penalty
|
||||
required: false
|
||||
type: float
|
||||
default: 1.1
|
||||
label:
|
||||
en_US: Repetition penalty
|
||||
help:
|
||||
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
|
||||
en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
|
||||
required: false
|
||||
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
|
||||
- name: enable_search
|
||||
type: boolean
|
||||
default: false
|
||||
help:
|
||||
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
|
||||
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
|
||||
@ -0,0 +1,4 @@
|
||||
model: text-embedding-v1
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 2048
|
||||
@ -0,0 +1,4 @@
|
||||
model: text-embedding-v2
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 2048
|
||||
@ -0,0 +1,132 @@
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import dashscope
|
||||
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import (
|
||||
EmbeddingUsage,
|
||||
TextEmbeddingResult,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import (
|
||||
TextEmbeddingModel,
|
||||
)
|
||||
from core.model_runtime.model_providers.tongyi._common import _CommonTongyi
|
||||
|
||||
|
||||
class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Tongyi text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:return: embeddings result
|
||||
"""
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
dashscope.api_key = credentials_kwargs["dashscope_api_key"]
|
||||
embeddings, embedding_used_tokens = self.embed_documents(model, texts)
|
||||
|
||||
return TextEmbeddingResult(
|
||||
embeddings=embeddings,
|
||||
usage=self._calc_response_usage(model, credentials_kwargs, embedding_used_tokens),
|
||||
model=model
|
||||
)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
if len(texts) == 0:
|
||||
return 0
|
||||
total_num_tokens = 0
|
||||
for text in texts:
|
||||
total_num_tokens += self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
return total_num_tokens
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
# transform credentials to kwargs for model instance
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
dashscope.api_key = credentials_kwargs["dashscope_api_key"]
|
||||
# call embedding model
|
||||
self.embed_documents(model=model, texts=["ping"])
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
@staticmethod
|
||||
def embed_documents(model: str, texts: list[str]) -> tuple[list[list[float]], int]:
|
||||
"""Call out to Tongyi's embedding endpoint.
|
||||
|
||||
Args:
|
||||
texts: The list of texts to embed.
|
||||
|
||||
Returns:
|
||||
List of embeddings, one for each text, and tokens usage.
|
||||
"""
|
||||
embeddings = []
|
||||
embedding_used_tokens = 0
|
||||
for text in texts:
|
||||
response = dashscope.TextEmbedding.call(model=model, input=text, text_type="document")
|
||||
data = response.output["embeddings"][0]
|
||||
embeddings.append(data["embedding"])
|
||||
embedding_used_tokens += response.usage["total_tokens"]
|
||||
|
||||
return [list(map(float, e)) for e in embeddings], embedding_used_tokens
|
||||
|
||||
def _calc_response_usage(
|
||||
self, model: str, credentials: dict, tokens: int
|
||||
) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
# get input price info
|
||||
input_price_info = self.get_price(
|
||||
model=model,
|
||||
credentials=credentials,
|
||||
price_type=PriceType.INPUT,
|
||||
tokens=tokens
|
||||
)
|
||||
|
||||
# transform usage
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at
|
||||
)
|
||||
|
||||
return usage
|
||||
@ -17,15 +17,16 @@ help:
|
||||
supported_model_types:
|
||||
- llm
|
||||
- tts
|
||||
- text-embedding
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
credential_form_schemas:
|
||||
- variable: dashscope_api_key
|
||||
label:
|
||||
en_US: APIKey
|
||||
en_US: API Key
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的 APIKey
|
||||
en_US: Enter your APIKey
|
||||
zh_Hans: 在此输入您的 API Key
|
||||
en_US: Enter your API Key
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
@ -0,0 +1,3 @@
|
||||
<svg width="567" height="376" viewBox="0 0 567 376" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M58.0366 161.868C58.0366 161.868 109.261 86.2912 211.538 78.4724V51.053C98.2528 60.1511 0.152344 156.098 0.152344 156.098C0.152344 156.098 55.7148 316.717 211.538 331.426V302.282C97.1876 287.896 58.0366 161.868 58.0366 161.868ZM211.538 244.32V271.013C125.114 255.603 101.125 165.768 101.125 165.768C101.125 165.768 142.621 119.799 211.538 112.345V141.633C211.486 141.633 211.449 141.617 211.406 141.617C175.235 137.276 146.978 171.067 146.978 171.067C146.978 171.067 162.816 227.949 211.538 244.32ZM211.538 0.47998V51.053C214.864 50.7981 218.189 50.5818 221.533 50.468C350.326 46.1273 434.243 156.098 434.243 156.098C434.243 156.098 337.861 273.296 237.448 273.296C228.245 273.296 219.63 272.443 211.538 271.009V302.282C218.695 303.201 225.903 303.667 233.119 303.675C326.56 303.675 394.134 255.954 459.566 199.474C470.415 208.162 514.828 229.299 523.958 238.55C461.745 290.639 316.752 332.626 234.551 332.626C226.627 332.626 219.018 332.148 211.538 331.426V375.369H566.701V0.47998H211.538ZM211.538 112.345V78.4724C214.829 78.2425 218.146 78.0672 221.533 77.9602C314.148 75.0512 374.909 157.548 374.909 157.548C374.909 157.548 309.281 248.693 238.914 248.693C228.787 248.693 219.707 247.065 211.536 244.318V141.631C247.591 145.987 254.848 161.914 276.524 198.049L324.737 157.398C324.737 157.398 289.544 111.243 230.219 111.243C223.768 111.241 217.597 111.696 211.538 112.345Z" fill="#77B900"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.5 KiB |
@ -0,0 +1,267 @@
|
||||
from collections.abc import Generator
|
||||
|
||||
from httpx import Response, post
|
||||
from yarl import URL
|
||||
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
AssistantPromptMessage,
|
||||
PromptMessage,
|
||||
PromptMessageTool,
|
||||
SystemPromptMessage,
|
||||
UserPromptMessage,
|
||||
)
|
||||
from core.model_runtime.entities.model_entities import (
|
||||
AIModelEntity,
|
||||
FetchFrom,
|
||||
ModelPropertyKey,
|
||||
ModelType,
|
||||
ParameterRule,
|
||||
ParameterType,
|
||||
)
|
||||
from core.model_runtime.errors.invoke import (
|
||||
InvokeAuthorizationError,
|
||||
InvokeBadRequestError,
|
||||
InvokeConnectionError,
|
||||
InvokeError,
|
||||
InvokeRateLimitError,
|
||||
InvokeServerUnavailableError,
|
||||
)
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
|
||||
|
||||
class TritonInferenceAILargeLanguageModel(LargeLanguageModel):
|
||||
def _invoke(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
|
||||
model_parameters: dict, tools: list[PromptMessageTool] | None = None,
|
||||
stop: list[str] | None = None, stream: bool = True, user: str | None = None) \
|
||||
-> LLMResult | Generator:
|
||||
"""
|
||||
invoke LLM
|
||||
|
||||
see `core.model_runtime.model_providers.__base.large_language_model.LargeLanguageModel._invoke`
|
||||
"""
|
||||
return self._generate(
|
||||
model=model, credentials=credentials, prompt_messages=prompt_messages, model_parameters=model_parameters,
|
||||
tools=tools, stop=stop, stream=stream, user=user,
|
||||
)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
"""
|
||||
validate credentials
|
||||
"""
|
||||
if 'server_url' not in credentials:
|
||||
raise CredentialsValidateFailedError('server_url is required in credentials')
|
||||
|
||||
try:
|
||||
self._invoke(model=model, credentials=credentials, prompt_messages=[
|
||||
UserPromptMessage(content='ping')
|
||||
], model_parameters={}, stream=False)
|
||||
except InvokeError as ex:
|
||||
raise CredentialsValidateFailedError(f'An error occurred during connection: {str(ex)}')
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
|
||||
tools: list[PromptMessageTool] | None = None) -> int:
|
||||
"""
|
||||
get number of tokens
|
||||
|
||||
cause TritonInference LLM is a customized model, we could net detect which tokenizer to use
|
||||
so we just take the GPT2 tokenizer as default
|
||||
"""
|
||||
return self._get_num_tokens_by_gpt2(self._convert_prompt_message_to_text(prompt_messages))
|
||||
|
||||
def _convert_prompt_message_to_text(self, message: list[PromptMessage]) -> str:
|
||||
"""
|
||||
convert prompt message to text
|
||||
"""
|
||||
text = ''
|
||||
for item in message:
|
||||
if isinstance(item, UserPromptMessage):
|
||||
text += f'User: {item.content}'
|
||||
elif isinstance(item, SystemPromptMessage):
|
||||
text += f'System: {item.content}'
|
||||
elif isinstance(item, AssistantPromptMessage):
|
||||
text += f'Assistant: {item.content}'
|
||||
else:
|
||||
raise NotImplementedError(f'PromptMessage type {type(item)} is not supported')
|
||||
return text
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
|
||||
"""
|
||||
used to define customizable model schema
|
||||
"""
|
||||
rules = [
|
||||
ParameterRule(
|
||||
name='temperature',
|
||||
type=ParameterType.FLOAT,
|
||||
use_template='temperature',
|
||||
label=I18nObject(
|
||||
zh_Hans='温度',
|
||||
en_US='Temperature'
|
||||
),
|
||||
),
|
||||
ParameterRule(
|
||||
name='top_p',
|
||||
type=ParameterType.FLOAT,
|
||||
use_template='top_p',
|
||||
label=I18nObject(
|
||||
zh_Hans='Top P',
|
||||
en_US='Top P'
|
||||
)
|
||||
),
|
||||
ParameterRule(
|
||||
name='max_tokens',
|
||||
type=ParameterType.INT,
|
||||
use_template='max_tokens',
|
||||
min=1,
|
||||
max=int(credentials.get('context_length', 2048)),
|
||||
default=min(512, int(credentials.get('context_length', 2048))),
|
||||
label=I18nObject(
|
||||
zh_Hans='最大生成长度',
|
||||
en_US='Max Tokens'
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
completion_type = None
|
||||
|
||||
if 'completion_type' in credentials:
|
||||
if credentials['completion_type'] == 'chat':
|
||||
completion_type = LLMMode.CHAT.value
|
||||
elif credentials['completion_type'] == 'completion':
|
||||
completion_type = LLMMode.COMPLETION.value
|
||||
else:
|
||||
raise ValueError(f'completion_type {credentials["completion_type"]} is not supported')
|
||||
|
||||
entity = AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(
|
||||
en_US=model
|
||||
),
|
||||
parameter_rules=rules,
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_type=ModelType.LLM,
|
||||
model_properties={
|
||||
ModelPropertyKey.MODE: completion_type,
|
||||
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get('context_length', 2048)),
|
||||
},
|
||||
)
|
||||
|
||||
return entity
|
||||
|
||||
def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
|
||||
model_parameters: dict,
|
||||
tools: list[PromptMessageTool] | None = None,
|
||||
stop: list[str] | None = None, stream: bool = True, user: str | None = None) \
|
||||
-> LLMResult | Generator:
|
||||
"""
|
||||
generate text from LLM
|
||||
"""
|
||||
if 'server_url' not in credentials:
|
||||
raise CredentialsValidateFailedError('server_url is required in credentials')
|
||||
|
||||
if 'stream' in credentials and not bool(credentials['stream']) and stream:
|
||||
raise ValueError(f'stream is not supported by model {model}')
|
||||
|
||||
try:
|
||||
parameters = {}
|
||||
if 'temperature' in model_parameters:
|
||||
parameters['temperature'] = model_parameters['temperature']
|
||||
if 'top_p' in model_parameters:
|
||||
parameters['top_p'] = model_parameters['top_p']
|
||||
if 'top_k' in model_parameters:
|
||||
parameters['top_k'] = model_parameters['top_k']
|
||||
if 'presence_penalty' in model_parameters:
|
||||
parameters['presence_penalty'] = model_parameters['presence_penalty']
|
||||
if 'frequency_penalty' in model_parameters:
|
||||
parameters['frequency_penalty'] = model_parameters['frequency_penalty']
|
||||
|
||||
response = post(str(URL(credentials['server_url']) / 'v2' / 'models' / model / 'generate'), json={
|
||||
'text_input': self._convert_prompt_message_to_text(prompt_messages),
|
||||
'max_tokens': model_parameters.get('max_tokens', 512),
|
||||
'parameters': {
|
||||
'stream': False,
|
||||
**parameters
|
||||
},
|
||||
}, timeout=(10, 120))
|
||||
response.raise_for_status()
|
||||
if response.status_code != 200:
|
||||
raise InvokeBadRequestError(f'Invoke failed with status code {response.status_code}, {response.text}')
|
||||
|
||||
if stream:
|
||||
return self._handle_chat_stream_response(model=model, credentials=credentials, prompt_messages=prompt_messages,
|
||||
tools=tools, resp=response)
|
||||
return self._handle_chat_generate_response(model=model, credentials=credentials, prompt_messages=prompt_messages,
|
||||
tools=tools, resp=response)
|
||||
except Exception as ex:
|
||||
raise InvokeConnectionError(f'An error occurred during connection: {str(ex)}')
|
||||
|
||||
def _handle_chat_generate_response(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
|
||||
tools: list[PromptMessageTool],
|
||||
resp: Response) -> LLMResult:
|
||||
"""
|
||||
handle normal chat generate response
|
||||
"""
|
||||
text = resp.json()['text_output']
|
||||
|
||||
usage = LLMUsage.empty_usage()
|
||||
usage.prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
|
||||
usage.completion_tokens = self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
return LLMResult(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
message=AssistantPromptMessage(
|
||||
content=text
|
||||
),
|
||||
usage=usage
|
||||
)
|
||||
|
||||
def _handle_chat_stream_response(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
|
||||
tools: list[PromptMessageTool],
|
||||
resp: Response) -> Generator:
|
||||
"""
|
||||
handle normal chat generate response
|
||||
"""
|
||||
text = resp.json()['text_output']
|
||||
|
||||
usage = LLMUsage.empty_usage()
|
||||
usage.prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
|
||||
usage.completion_tokens = self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
yield LLMResultChunk(
|
||||
model=model,
|
||||
prompt_messages=prompt_messages,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=0,
|
||||
message=AssistantPromptMessage(
|
||||
content=text
|
||||
),
|
||||
usage=usage
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
|
||||
"""
|
||||
Map model invoke error to unified error
|
||||
The key is the error type thrown to the caller
|
||||
The value is the error type thrown by the model,
|
||||
which needs to be converted into a unified error type for the caller.
|
||||
|
||||
:return: Invoke error mapping
|
||||
"""
|
||||
return {
|
||||
InvokeConnectionError: [
|
||||
],
|
||||
InvokeServerUnavailableError: [
|
||||
],
|
||||
InvokeRateLimitError: [
|
||||
],
|
||||
InvokeAuthorizationError: [
|
||||
],
|
||||
InvokeBadRequestError: [
|
||||
ValueError
|
||||
]
|
||||
}
|
||||
@ -0,0 +1,9 @@
|
||||
import logging
|
||||
|
||||
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class XinferenceAIProvider(ModelProvider):
|
||||
def validate_provider_credentials(self, credentials: dict) -> None:
|
||||
pass
|
||||
@ -0,0 +1,84 @@
|
||||
provider: triton_inference_server
|
||||
label:
|
||||
en_US: Triton Inference Server
|
||||
icon_small:
|
||||
en_US: icon_s_en.svg
|
||||
icon_large:
|
||||
en_US: icon_l_en.png
|
||||
background: "#EFFDFD"
|
||||
help:
|
||||
title:
|
||||
en_US: How to deploy Triton Inference Server
|
||||
zh_Hans: 如何部署 Triton Inference Server
|
||||
url:
|
||||
en_US: https://github.com/triton-inference-server/server
|
||||
supported_model_types:
|
||||
- llm
|
||||
configurate_methods:
|
||||
- customizable-model
|
||||
model_credential_schema:
|
||||
model:
|
||||
label:
|
||||
en_US: Model Name
|
||||
zh_Hans: 模型名称
|
||||
placeholder:
|
||||
en_US: Enter your model name
|
||||
zh_Hans: 输入模型名称
|
||||
credential_form_schemas:
|
||||
- variable: server_url
|
||||
label:
|
||||
zh_Hans: 服务器URL
|
||||
en_US: Server url
|
||||
type: secret-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入 Triton Inference Server 的服务器地址,如 http://192.168.1.100:8000
|
||||
en_US: Enter the url of your Triton Inference Server, e.g. http://192.168.1.100:8000
|
||||
- variable: context_size
|
||||
label:
|
||||
zh_Hans: 上下文大小
|
||||
en_US: Context size
|
||||
type: text-input
|
||||
required: true
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的上下文大小
|
||||
en_US: Enter the context size
|
||||
default: 2048
|
||||
- variable: completion_type
|
||||
label:
|
||||
zh_Hans: 补全类型
|
||||
en_US: Model type
|
||||
type: select
|
||||
required: true
|
||||
default: chat
|
||||
placeholder:
|
||||
zh_Hans: 在此输入您的补全类型
|
||||
en_US: Enter the completion type
|
||||
options:
|
||||
- label:
|
||||
zh_Hans: 补全模型
|
||||
en_US: Completion model
|
||||
value: completion
|
||||
- label:
|
||||
zh_Hans: 对话模型
|
||||
en_US: Chat model
|
||||
value: chat
|
||||
- variable: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream output
|
||||
type: select
|
||||
required: true
|
||||
default: true
|
||||
placeholder:
|
||||
zh_Hans: 是否支持流式输出
|
||||
en_US: Whether to support stream output
|
||||
options:
|
||||
- label:
|
||||
zh_Hans: 是
|
||||
en_US: Yes
|
||||
value: true
|
||||
- label:
|
||||
zh_Hans: 否
|
||||
en_US: No
|
||||
value: false
|
||||
@ -0,0 +1,37 @@
|
||||
model: ernie-3.5-8k
|
||||
label:
|
||||
en_US: Ernie-3.5-8K
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 4096
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0.1
|
||||
max: 1.0
|
||||
default: 0.8
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 2
|
||||
max: 2048
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: disable_search
|
||||
label:
|
||||
zh_Hans: 禁用搜索
|
||||
en_US: Disable Search
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 禁用模型自行进行外部搜索。
|
||||
en_US: Disable the model to perform external search.
|
||||
required: false
|
||||
@ -0,0 +1,37 @@
|
||||
model: ernie-3.5-8k-0205
|
||||
label:
|
||||
en_US: Ernie-3.5-8K-0205
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0.1
|
||||
max: 1.0
|
||||
default: 0.8
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 2
|
||||
max: 2048
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: disable_search
|
||||
label:
|
||||
zh_Hans: 禁用搜索
|
||||
en_US: Disable Search
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 禁用模型自行进行外部搜索。
|
||||
en_US: Disable the model to perform external search.
|
||||
required: false
|
||||
@ -0,0 +1,37 @@
|
||||
model: ernie-3.5-8k-1222
|
||||
label:
|
||||
en_US: Ernie-3.5-8K-1222
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0.1
|
||||
max: 1.0
|
||||
default: 0.8
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 2
|
||||
max: 2048
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: disable_search
|
||||
label:
|
||||
zh_Hans: 禁用搜索
|
||||
en_US: Disable Search
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 禁用模型自行进行外部搜索。
|
||||
en_US: Disable the model to perform external search.
|
||||
required: false
|
||||
@ -0,0 +1,37 @@
|
||||
model: ernie-3.5-8k
|
||||
label:
|
||||
en_US: Ernie-3.5-8K
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0.1
|
||||
max: 1.0
|
||||
default: 0.8
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 2
|
||||
max: 2048
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: disable_search
|
||||
label:
|
||||
zh_Hans: 禁用搜索
|
||||
en_US: Disable Search
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 禁用模型自行进行外部搜索。
|
||||
en_US: Disable the model to perform external search.
|
||||
required: false
|
||||
@ -0,0 +1,37 @@
|
||||
model: ernie-4.0-8k
|
||||
label:
|
||||
en_US: Ernie-4.0-8K
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0.1
|
||||
max: 1.0
|
||||
default: 0.8
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 2
|
||||
max: 2048
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: disable_search
|
||||
label:
|
||||
zh_Hans: 禁用搜索
|
||||
en_US: Disable Search
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 禁用模型自行进行外部搜索。
|
||||
en_US: Disable the model to perform external search.
|
||||
required: false
|
||||
@ -36,3 +36,4 @@ parameter_rules:
|
||||
zh_Hans: 禁用模型自行进行外部搜索。
|
||||
en_US: Disable the model to perform external search.
|
||||
required: false
|
||||
deprecated: true
|
||||
|
||||
@ -36,3 +36,4 @@ parameter_rules:
|
||||
zh_Hans: 禁用模型自行进行外部搜索。
|
||||
en_US: Disable the model to perform external search.
|
||||
required: false
|
||||
deprecated: true
|
||||
|
||||
@ -27,3 +27,4 @@ parameter_rules:
|
||||
use_template: frequency_penalty
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
deprecated: true
|
||||
|
||||
@ -36,3 +36,4 @@ parameter_rules:
|
||||
required: false
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
deprecated: true
|
||||
|
||||
@ -0,0 +1,30 @@
|
||||
model: ernie-lite-8k-0308
|
||||
label:
|
||||
en_US: ERNIE-Lite-8K-0308
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0.1
|
||||
max: 1.0
|
||||
default: 0.95
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1.0
|
||||
default: 0.7
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 2
|
||||
max: 1024
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
default: 1.0
|
||||
min: 1.0
|
||||
max: 2.0
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user