mirror of
https://github.com/langgenius/dify.git
synced 2026-05-24 02:47:53 +08:00
Compare commits
27 Commits
yanli/phas
...
chore/bump
| Author | SHA1 | Date | |
|---|---|---|---|
| c67c1090a2 | |||
| c9dd4a0dd4 | |||
| bcfe8c368c | |||
| c5d8c008da | |||
| d8a465a746 | |||
| 5aaf327d58 | |||
| 0ef793f935 | |||
| cc72e56cd0 | |||
| a5d6a0369d | |||
| 1eac1aa03d | |||
| 654153fcf5 | |||
| f3d4605dc7 | |||
| cd771ed909 | |||
| cb94877c18 | |||
| 9853e28230 | |||
| d666fb1b25 | |||
| 573ec3af9e | |||
| e7746cb256 | |||
| 2256e75f16 | |||
| 3184ffd39b | |||
| 57a4828dbf | |||
| e7e28baff7 | |||
| 3bd6f1a253 | |||
| d1f6edd7ab | |||
| 59639ca9b2 | |||
| 66b8c42a25 | |||
| 449d8c7768 |
1
.ee-base-commit
Normal file
1
.ee-base-commit
Normal file
@ -0,0 +1 @@
|
||||
cc72e56cd0cfd60581d98a2a5e2d4d4a41c9436d
|
||||
19
.github/workflows/anti-slop.yml
vendored
19
.github/workflows/anti-slop.yml
vendored
@ -1,19 +0,0 @@
|
||||
name: Anti-Slop PR Check
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, edited, synchronize]
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
anti-slop:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: peakoss/anti-slop@85daca1880e9e1af197fc06ea03349daf08f4202 # v0.2.1
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
close-pr: false
|
||||
failure-add-pr-labels: "needs-revision"
|
||||
147
.github/workflows/api-tests.yml
vendored
147
.github/workflows/api-tests.yml
vendored
@ -14,18 +14,17 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: API Tests
|
||||
runs-on: ubuntu-latest
|
||||
api-unit:
|
||||
name: API Unit Tests
|
||||
runs-on: depot-ubuntu-24.04
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
COVERAGE_FILE: coverage-unit
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.11"
|
||||
- "3.12"
|
||||
|
||||
steps:
|
||||
@ -36,7 +35,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup UV and Python
|
||||
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
|
||||
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: ${{ matrix.python-version }}
|
||||
@ -51,16 +50,62 @@ jobs:
|
||||
- name: Run dify config tests
|
||||
run: uv run --project api dev/pytest/pytest_config_tests.py
|
||||
|
||||
- name: Run Unit Tests
|
||||
run: uv run --project api bash dev/pytest/pytest_unit_tests.sh
|
||||
|
||||
- name: Upload unit coverage data
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: api-coverage-unit
|
||||
path: coverage-unit
|
||||
retention-days: 1
|
||||
|
||||
api-integration:
|
||||
name: API Integration Tests
|
||||
runs-on: depot-ubuntu-24.04
|
||||
env:
|
||||
COVERAGE_FILE: coverage-integration
|
||||
STORAGE_TYPE: opendal
|
||||
OPENDAL_SCHEME: fs
|
||||
OPENDAL_FS_ROOT: /tmp/dify-storage
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.12"
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup UV and Python
|
||||
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache-dependency-glob: api/uv.lock
|
||||
|
||||
- name: Check UV lockfile
|
||||
run: uv lock --project api --check
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync --project api --dev
|
||||
|
||||
- name: Set up dotenvs
|
||||
run: |
|
||||
cp docker/.env.example docker/.env
|
||||
cp docker/middleware.env.example docker/middleware.env
|
||||
cp docker/envs/middleware.env.example docker/middleware.env
|
||||
|
||||
- name: Expose Service Ports
|
||||
run: sh .github/workflows/expose_service_ports.sh
|
||||
|
||||
- name: Set up Sandbox
|
||||
uses: hoverkraft-tech/compose-action@4894d2492015c1774ee5a13a95b1072093087ec3 # v2.5.0
|
||||
uses: hoverkraft-tech/compose-action@d2bee4f07e8ca410d6b196d00f90c12e7d48c33a # v2.6.0
|
||||
with:
|
||||
compose-file: |
|
||||
docker/docker-compose.middleware.yaml
|
||||
@ -74,23 +119,91 @@ jobs:
|
||||
run: |
|
||||
cp api/tests/integration_tests/.env.example api/tests/integration_tests/.env
|
||||
|
||||
- name: Run API Tests
|
||||
env:
|
||||
STORAGE_TYPE: opendal
|
||||
OPENDAL_SCHEME: fs
|
||||
OPENDAL_FS_ROOT: /tmp/dify-storage
|
||||
- name: Run Integration Tests
|
||||
run: |
|
||||
uv run --project api pytest \
|
||||
-n auto \
|
||||
--timeout "${PYTEST_TIMEOUT:-180}" \
|
||||
api/tests/integration_tests/workflow \
|
||||
api/tests/integration_tests/tools \
|
||||
api/tests/test_containers_integration_tests \
|
||||
api/tests/unit_tests
|
||||
api/tests/test_containers_integration_tests
|
||||
|
||||
- name: Upload integration coverage data
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: api-coverage-integration
|
||||
path: coverage-integration
|
||||
retention-days: 1
|
||||
|
||||
api-coverage:
|
||||
name: API Coverage
|
||||
runs-on: depot-ubuntu-24.04
|
||||
needs:
|
||||
- api-unit
|
||||
- api-integration
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
COVERAGE_FILE: .coverage
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup UV and Python
|
||||
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: "3.12"
|
||||
cache-dependency-glob: api/uv.lock
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync --project api --dev
|
||||
|
||||
- name: Download coverage data
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
||||
with:
|
||||
path: coverage-data
|
||||
pattern: api-coverage-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Combine coverage
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
echo "### API Coverage" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "Merged backend coverage report generated for Codecov project status." >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "" >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
unit_coverage="$(find coverage-data -type f -name coverage-unit -print -quit)"
|
||||
integration_coverage="$(find coverage-data -type f -name coverage-integration -print -quit)"
|
||||
: "${unit_coverage:?coverage-unit artifact not found}"
|
||||
: "${integration_coverage:?coverage-integration artifact not found}"
|
||||
|
||||
report_file="$(mktemp)"
|
||||
uv run --project api coverage combine "$unit_coverage" "$integration_coverage"
|
||||
uv run --project api coverage report --show-missing | tee "$report_file"
|
||||
echo "Summary: \`$(tail -n 1 "$report_file")\`" >> "$GITHUB_STEP_SUMMARY"
|
||||
{
|
||||
echo ""
|
||||
echo "<details><summary>Coverage report</summary>"
|
||||
echo ""
|
||||
echo '```'
|
||||
cat "$report_file"
|
||||
echo '```'
|
||||
echo "</details>"
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
uv run --project api coverage xml -o coverage.xml
|
||||
|
||||
- name: Report coverage
|
||||
if: ${{ env.CODECOV_TOKEN != '' && matrix.python-version == '3.12' }}
|
||||
uses: codecov/codecov-action@1af58845a975a7985b0beb0cbe6fbbb71a41dbad # v5.5.3
|
||||
if: ${{ env.CODECOV_TOKEN != '' }}
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
|
||||
with:
|
||||
files: ./coverage.xml
|
||||
disable_search: true
|
||||
|
||||
60
.github/workflows/autofix.yml
vendored
60
.github/workflows/autofix.yml
vendored
@ -2,6 +2,9 @@ name: autofix.ci
|
||||
on:
|
||||
pull_request:
|
||||
branches: ["main"]
|
||||
merge_group:
|
||||
branches: ["main"]
|
||||
types: [checks_requested]
|
||||
push:
|
||||
branches: ["main"]
|
||||
permissions:
|
||||
@ -10,13 +13,19 @@ permissions:
|
||||
jobs:
|
||||
autofix:
|
||||
if: github.repository == 'langgenius/dify'
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- name: Complete merge group check
|
||||
if: github.event_name == 'merge_group'
|
||||
run: echo "autofix.ci updates pull request branches, not merge group refs."
|
||||
|
||||
- if: github.event_name != 'merge_group'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Check Docker Compose inputs
|
||||
if: github.event_name != 'merge_group'
|
||||
id: docker-compose-changes
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
with:
|
||||
files: |
|
||||
docker/generate_docker_compose
|
||||
@ -24,30 +33,39 @@ jobs:
|
||||
docker/docker-compose-template.yaml
|
||||
docker/docker-compose.yaml
|
||||
- name: Check web inputs
|
||||
if: github.event_name != 'merge_group'
|
||||
id: web-changes
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
with:
|
||||
files: |
|
||||
web/**
|
||||
packages/**
|
||||
package.json
|
||||
pnpm-lock.yaml
|
||||
pnpm-workspace.yaml
|
||||
.nvmrc
|
||||
- name: Check api inputs
|
||||
if: github.event_name != 'merge_group'
|
||||
id: api-changes
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
with:
|
||||
files: |
|
||||
api/**
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
- if: github.event_name != 'merge_group'
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
|
||||
- if: github.event_name != 'merge_group'
|
||||
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
||||
|
||||
- name: Generate Docker Compose
|
||||
if: steps.docker-compose-changes.outputs.any_changed == 'true'
|
||||
if: github.event_name != 'merge_group' && steps.docker-compose-changes.outputs.any_changed == 'true'
|
||||
run: |
|
||||
cd docker
|
||||
./generate_docker_compose
|
||||
|
||||
- if: steps.api-changes.outputs.any_changed == 'true'
|
||||
- if: github.event_name != 'merge_group' && steps.api-changes.outputs.any_changed == 'true'
|
||||
run: |
|
||||
cd api
|
||||
uv sync --dev
|
||||
@ -59,13 +77,13 @@ jobs:
|
||||
uv run ruff format ..
|
||||
|
||||
- name: count migration progress
|
||||
if: steps.api-changes.outputs.any_changed == 'true'
|
||||
if: github.event_name != 'merge_group' && steps.api-changes.outputs.any_changed == 'true'
|
||||
run: |
|
||||
cd api
|
||||
./cnt_base.sh
|
||||
|
||||
- name: ast-grep
|
||||
if: steps.api-changes.outputs.any_changed == 'true'
|
||||
if: github.event_name != 'merge_group' && steps.api-changes.outputs.any_changed == 'true'
|
||||
run: |
|
||||
# ast-grep exits 1 if no matches are found; allow idempotent runs.
|
||||
uvx --from ast-grep-cli ast-grep --pattern 'db.session.query($WHATEVER).filter($HERE)' --rewrite 'db.session.query($WHATEVER).where($HERE)' -l py --update-all || true
|
||||
@ -95,13 +113,23 @@ jobs:
|
||||
find . -name "*.py.bak" -type f -delete
|
||||
|
||||
- name: Setup web environment
|
||||
if: steps.web-changes.outputs.any_changed == 'true'
|
||||
if: github.event_name != 'merge_group'
|
||||
uses: ./.github/actions/setup-web
|
||||
|
||||
- name: ESLint autofix
|
||||
if: steps.web-changes.outputs.any_changed == 'true'
|
||||
- name: Generate API docs
|
||||
if: github.event_name != 'merge_group' && steps.api-changes.outputs.any_changed == 'true'
|
||||
run: |
|
||||
cd api
|
||||
uv run dev/generate_swagger_markdown_docs.py --swagger-dir ../packages/contracts/openapi --markdown-dir openapi/markdown --keep-swagger-json
|
||||
|
||||
- name: Generate frontend contracts
|
||||
if: github.event_name != 'merge_group' && steps.api-changes.outputs.any_changed == 'true'
|
||||
run: pnpm --dir packages/contracts gen-api-contract-from-openapi
|
||||
|
||||
- name: ESLint autofix
|
||||
if: github.event_name != 'merge_group' && steps.web-changes.outputs.any_changed == 'true'
|
||||
run: |
|
||||
cd web
|
||||
vp exec eslint --concurrency=2 --prune-suppressions --quiet || true
|
||||
|
||||
- uses: autofix-ci/action@7a166d7532b277f34e16238930461bf77f9d7ed8 # v1.3.3
|
||||
- if: github.event_name != 'merge_group'
|
||||
uses: autofix-ci/action@c5b2d67aa2274e7b5a18224e8171550871fc7e4a # v1.3.4
|
||||
|
||||
74
.github/workflows/build-push.yml
vendored
74
.github/workflows/build-push.yml
vendored
@ -24,27 +24,42 @@ env:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ${{ matrix.platform == 'linux/arm64' && 'arm64_runner' || 'ubuntu-latest' }}
|
||||
runs-on: ${{ matrix.runs_on }}
|
||||
if: github.repository == 'langgenius/dify'
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- service_name: "build-api-amd64"
|
||||
image_name_env: "DIFY_API_IMAGE_NAME"
|
||||
context: "api"
|
||||
artifact_context: "api"
|
||||
build_context: "{{defaultContext}}:api"
|
||||
file: "Dockerfile"
|
||||
platform: linux/amd64
|
||||
runs_on: depot-ubuntu-24.04-4
|
||||
- service_name: "build-api-arm64"
|
||||
image_name_env: "DIFY_API_IMAGE_NAME"
|
||||
context: "api"
|
||||
artifact_context: "api"
|
||||
build_context: "{{defaultContext}}:api"
|
||||
file: "Dockerfile"
|
||||
platform: linux/arm64
|
||||
runs_on: depot-ubuntu-24.04-4
|
||||
- service_name: "build-web-amd64"
|
||||
image_name_env: "DIFY_WEB_IMAGE_NAME"
|
||||
context: "web"
|
||||
artifact_context: "web"
|
||||
build_context: "{{defaultContext}}"
|
||||
file: "web/Dockerfile"
|
||||
platform: linux/amd64
|
||||
runs_on: depot-ubuntu-24.04-4
|
||||
- service_name: "build-web-arm64"
|
||||
image_name_env: "DIFY_WEB_IMAGE_NAME"
|
||||
context: "web"
|
||||
artifact_context: "web"
|
||||
build_context: "{{defaultContext}}"
|
||||
file: "web/Dockerfile"
|
||||
platform: linux/arm64
|
||||
runs_on: depot-ubuntu-24.04-4
|
||||
|
||||
steps:
|
||||
- name: Prepare
|
||||
@ -53,16 +68,13 @@ jobs:
|
||||
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ env.DOCKERHUB_USER }}
|
||||
password: ${{ env.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
|
||||
- name: Set up Depot CLI
|
||||
uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1.7.1
|
||||
|
||||
- name: Extract metadata for Docker
|
||||
id: meta
|
||||
@ -72,15 +84,15 @@ jobs:
|
||||
|
||||
- name: Build Docker image
|
||||
id: build
|
||||
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
|
||||
uses: depot/build-push-action@5f3b3c2e5a00f0093de47f657aeaefcedff27d18 # v1.17.0
|
||||
with:
|
||||
context: "{{defaultContext}}:${{ matrix.context }}"
|
||||
project: ${{ vars.DEPOT_PROJECT_ID }}
|
||||
context: ${{ matrix.build_context }}
|
||||
file: ${{ matrix.file }}
|
||||
platforms: ${{ matrix.platform }}
|
||||
build-args: COMMIT_SHA=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.revision'] }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
outputs: type=image,name=${{ env[matrix.image_name_env] }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=gha,scope=${{ matrix.service_name }}
|
||||
cache-to: type=gha,mode=max,scope=${{ matrix.service_name }}
|
||||
|
||||
- name: Export digest
|
||||
env:
|
||||
@ -91,16 +103,40 @@ jobs:
|
||||
touch "/tmp/digests/${sanitized_digest}"
|
||||
|
||||
- name: Upload digest
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: digests-${{ matrix.context }}-${{ env.PLATFORM_PAIR }}
|
||||
name: digests-${{ matrix.artifact_context }}-${{ env.PLATFORM_PAIR }}
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
fork-build-validate:
|
||||
if: github.repository != 'langgenius/dify'
|
||||
runs-on: ubuntu-24.04
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- service_name: "validate-api-amd64"
|
||||
build_context: "{{defaultContext}}:api"
|
||||
file: "Dockerfile"
|
||||
- service_name: "validate-web-amd64"
|
||||
build_context: "{{defaultContext}}"
|
||||
file: "web/Dockerfile"
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
|
||||
|
||||
- name: Validate Docker image
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
push: false
|
||||
context: ${{ matrix.build_context }}
|
||||
file: ${{ matrix.file }}
|
||||
platforms: linux/amd64
|
||||
|
||||
create-manifest:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
if: github.repository == 'langgenius/dify'
|
||||
strategy:
|
||||
matrix:
|
||||
@ -120,7 +156,7 @@ jobs:
|
||||
merge-multiple: true
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ env.DOCKERHUB_USER }}
|
||||
password: ${{ env.DOCKERHUB_TOKEN }}
|
||||
|
||||
38
.github/workflows/db-migration-test.yml
vendored
38
.github/workflows/db-migration-test.yml
vendored
@ -9,7 +9,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
db-migration-test-postgres:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@ -19,7 +19,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup UV and Python
|
||||
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
|
||||
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: "3.12"
|
||||
@ -37,10 +37,10 @@ jobs:
|
||||
- name: Prepare middleware env
|
||||
run: |
|
||||
cd docker
|
||||
cp middleware.env.example middleware.env
|
||||
cp envs/middleware.env.example middleware.env
|
||||
|
||||
- name: Set up Middlewares
|
||||
uses: hoverkraft-tech/compose-action@4894d2492015c1774ee5a13a95b1072093087ec3 # v2.5.0
|
||||
uses: hoverkraft-tech/compose-action@d2bee4f07e8ca410d6b196d00f90c12e7d48c33a # v2.6.0
|
||||
with:
|
||||
compose-file: |
|
||||
docker/docker-compose.middleware.yaml
|
||||
@ -59,7 +59,7 @@ jobs:
|
||||
run: uv run --directory api flask upgrade-db
|
||||
|
||||
db-migration-test-mysql:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@ -69,7 +69,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup UV and Python
|
||||
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
|
||||
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: "3.12"
|
||||
@ -87,14 +87,14 @@ jobs:
|
||||
- name: Prepare middleware env for MySQL
|
||||
run: |
|
||||
cd docker
|
||||
cp middleware.env.example middleware.env
|
||||
cp envs/middleware.env.example middleware.env
|
||||
sed -i 's/DB_TYPE=postgresql/DB_TYPE=mysql/' middleware.env
|
||||
sed -i 's/DB_HOST=db_postgres/DB_HOST=db_mysql/' middleware.env
|
||||
sed -i 's/DB_PORT=5432/DB_PORT=3306/' middleware.env
|
||||
sed -i 's/DB_USERNAME=postgres/DB_USERNAME=mysql/' middleware.env
|
||||
|
||||
- name: Set up Middlewares
|
||||
uses: hoverkraft-tech/compose-action@4894d2492015c1774ee5a13a95b1072093087ec3 # v2.5.0
|
||||
uses: hoverkraft-tech/compose-action@d2bee4f07e8ca410d6b196d00f90c12e7d48c33a # v2.6.0
|
||||
with:
|
||||
compose-file: |
|
||||
docker/docker-compose.middleware.yaml
|
||||
@ -110,6 +110,28 @@ jobs:
|
||||
sed -i 's/DB_PORT=5432/DB_PORT=3306/' .env
|
||||
sed -i 's/DB_USERNAME=postgres/DB_USERNAME=root/' .env
|
||||
|
||||
# hoverkraft-tech/compose-action@v2.6.0 only waits for `docker compose up -d`
|
||||
# to return (container processes started); it does not wait on healthcheck
|
||||
# status. mysql:8.0's first-time init takes 15-30s, so without an explicit
|
||||
# wait the migration runs while InnoDB is still initialising and gets
|
||||
# killed with "Lost connection during query". Poll a real SELECT until it
|
||||
# succeeds.
|
||||
- name: Wait for MySQL to accept queries
|
||||
run: |
|
||||
set +e
|
||||
for i in $(seq 1 60); do
|
||||
if docker run --rm --network host mysql:8.0 \
|
||||
mysql -h 127.0.0.1 -P 3306 -uroot -pdifyai123456 \
|
||||
-e 'SELECT 1' >/dev/null 2>&1; then
|
||||
echo "MySQL ready after ${i}s"
|
||||
exit 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "MySQL not ready after 60s; dumping container logs:"
|
||||
docker compose -f docker/docker-compose.middleware.yaml --profile mysql logs --tail=200 db_mysql
|
||||
exit 1
|
||||
|
||||
- name: Run DB Migration
|
||||
env:
|
||||
DEBUG: true
|
||||
|
||||
2
.github/workflows/deploy-agent-dev.yml
vendored
2
.github/workflows/deploy-agent-dev.yml
vendored
@ -13,7 +13,7 @@ on:
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
if: |
|
||||
github.event.workflow_run.conclusion == 'success' &&
|
||||
github.event.workflow_run.head_branch == 'deploy/agent-dev'
|
||||
|
||||
2
.github/workflows/deploy-dev.yml
vendored
2
.github/workflows/deploy-dev.yml
vendored
@ -10,7 +10,7 @@ on:
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
if: |
|
||||
github.event.workflow_run.conclusion == 'success' &&
|
||||
github.event.workflow_run.head_branch == 'deploy/dev'
|
||||
|
||||
2
.github/workflows/deploy-enterprise.yml
vendored
2
.github/workflows/deploy-enterprise.yml
vendored
@ -13,7 +13,7 @@ on:
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
if: |
|
||||
github.event.workflow_run.conclusion == 'success' &&
|
||||
github.event.workflow_run.head_branch == 'deploy/enterprise'
|
||||
|
||||
2
.github/workflows/deploy-hitl.yml
vendored
2
.github/workflows/deploy-hitl.yml
vendored
@ -10,7 +10,7 @@ on:
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
if: |
|
||||
github.event.workflow_run.conclusion == 'success' &&
|
||||
github.event.workflow_run.head_branch == 'build/feat/hitl'
|
||||
|
||||
60
.github/workflows/docker-build.yml
vendored
60
.github/workflows/docker-build.yml
vendored
@ -14,35 +14,69 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
build-docker:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.pull_request.head.repo.full_name == github.repository
|
||||
runs-on: ${{ matrix.runs_on }}
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- service_name: "api-amd64"
|
||||
platform: linux/amd64
|
||||
context: "api"
|
||||
runs_on: depot-ubuntu-24.04-4
|
||||
context: "{{defaultContext}}:api"
|
||||
file: "Dockerfile"
|
||||
- service_name: "api-arm64"
|
||||
platform: linux/arm64
|
||||
context: "api"
|
||||
runs_on: depot-ubuntu-24.04-4
|
||||
context: "{{defaultContext}}:api"
|
||||
file: "Dockerfile"
|
||||
- service_name: "web-amd64"
|
||||
platform: linux/amd64
|
||||
context: "web"
|
||||
runs_on: depot-ubuntu-24.04-4
|
||||
context: "{{defaultContext}}"
|
||||
file: "web/Dockerfile"
|
||||
- service_name: "web-arm64"
|
||||
platform: linux/arm64
|
||||
context: "web"
|
||||
runs_on: depot-ubuntu-24.04-4
|
||||
context: "{{defaultContext}}"
|
||||
file: "web/Dockerfile"
|
||||
steps:
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0
|
||||
- name: Set up Depot CLI
|
||||
uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1.7.1
|
||||
|
||||
- name: Build Docker Image
|
||||
uses: depot/build-push-action@5f3b3c2e5a00f0093de47f657aeaefcedff27d18 # v1.17.0
|
||||
with:
|
||||
project: ${{ vars.DEPOT_PROJECT_ID }}
|
||||
push: false
|
||||
context: ${{ matrix.context }}
|
||||
file: ${{ matrix.file }}
|
||||
platforms: ${{ matrix.platform }}
|
||||
|
||||
build-docker-fork:
|
||||
if: github.event.pull_request.head.repo.full_name != github.repository
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- service_name: "api-amd64"
|
||||
context: "{{defaultContext}}:api"
|
||||
file: "Dockerfile"
|
||||
- service_name: "web-amd64"
|
||||
context: "{{defaultContext}}"
|
||||
file: "web/Dockerfile"
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
|
||||
|
||||
- name: Build Docker Image
|
||||
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
push: false
|
||||
context: "{{defaultContext}}:${{ matrix.context }}"
|
||||
file: "${{ matrix.file }}"
|
||||
platforms: ${{ matrix.platform }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
context: ${{ matrix.context }}
|
||||
file: ${{ matrix.file }}
|
||||
platforms: linux/amd64
|
||||
|
||||
49
.github/workflows/hotfix-cherry-pick.yml
vendored
Normal file
49
.github/workflows/hotfix-cherry-pick.yml
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
name: Hotfix Cherry-Pick Provenance
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'hotfix/**'
|
||||
- 'lts/**'
|
||||
types:
|
||||
- opened
|
||||
- edited
|
||||
- reopened
|
||||
- ready_for_review
|
||||
- synchronize
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: hotfix-cherry-pick-${{ github.event.pull_request.number || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check-cherry-pick-provenance:
|
||||
name: Require cherry-pick provenance
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Fetch PR base, PR head, and main
|
||||
env:
|
||||
BASE_REF: ${{ github.base_ref }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
run: |
|
||||
git fetch --no-tags --prune origin \
|
||||
"+refs/heads/main:refs/remotes/origin/main" \
|
||||
"+refs/heads/${BASE_REF}:refs/remotes/origin/${BASE_REF}" \
|
||||
"+refs/pull/${PR_NUMBER}/head:refs/remotes/pull/${PR_NUMBER}/head"
|
||||
|
||||
- name: Load checker from main
|
||||
run: git show origin/main:.github/scripts/check-hotfix-cherry-picks.sh > "$RUNNER_TEMP/check-hotfix-cherry-picks.sh"
|
||||
|
||||
- name: Check PR commits
|
||||
env:
|
||||
BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
|
||||
MAIN_REF: origin/main
|
||||
run: bash "$RUNNER_TEMP/check-hotfix-cherry-picks.sh"
|
||||
4
.github/workflows/labeler.yml
vendored
4
.github/workflows/labeler.yml
vendored
@ -7,8 +7,8 @@ jobs:
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1
|
||||
- uses: actions/labeler@f27b608878404679385c85cfa523b85ccb86e213 # v6.1.0
|
||||
with:
|
||||
sync-labels: true
|
||||
|
||||
382
.github/workflows/main-ci.yml
vendored
382
.github/workflows/main-ci.yml
vendored
@ -3,10 +3,14 @@ name: Main CI Pipeline
|
||||
on:
|
||||
pull_request:
|
||||
branches: ["main"]
|
||||
merge_group:
|
||||
branches: ["main"]
|
||||
types: [checks_requested]
|
||||
push:
|
||||
branches: ["main"]
|
||||
|
||||
permissions:
|
||||
actions: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
checks: write
|
||||
@ -17,12 +21,28 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
pre_job:
|
||||
name: Skip Duplicate Checks
|
||||
runs-on: depot-ubuntu-24.04
|
||||
outputs:
|
||||
should_skip: ${{ steps.skip_check.outputs.should_skip || 'false' }}
|
||||
steps:
|
||||
- id: skip_check
|
||||
continue-on-error: true
|
||||
uses: fkirc/skip-duplicate-actions@f75f66ce1886f00957d99748a42c724f4330bdcf # v5.3.1
|
||||
with:
|
||||
cancel_others: 'true'
|
||||
concurrent_skipping: same_content_newer
|
||||
|
||||
# Check which paths were changed to determine which tests to run
|
||||
check-changes:
|
||||
name: Check Changed Files
|
||||
runs-on: ubuntu-latest
|
||||
needs: pre_job
|
||||
if: needs.pre_job.outputs.should_skip != 'true'
|
||||
runs-on: depot-ubuntu-24.04
|
||||
outputs:
|
||||
api-changed: ${{ steps.changes.outputs.api }}
|
||||
e2e-changed: ${{ steps.changes.outputs.e2e }}
|
||||
web-changed: ${{ steps.changes.outputs.web }}
|
||||
vdb-changed: ${{ steps.changes.outputs.vdb }}
|
||||
migration-changed: ${{ steps.changes.outputs.migration }}
|
||||
@ -34,49 +54,375 @@ jobs:
|
||||
filters: |
|
||||
api:
|
||||
- 'api/**'
|
||||
- 'docker/**'
|
||||
- '.github/workflows/api-tests.yml'
|
||||
- '.github/workflows/expose_service_ports.sh'
|
||||
- 'docker/.env.example'
|
||||
- 'docker/envs/middleware.env.example'
|
||||
- 'docker/docker-compose.middleware.yaml'
|
||||
- 'docker/docker-compose-template.yaml'
|
||||
- 'docker/generate_docker_compose'
|
||||
- 'docker/ssrf_proxy/**'
|
||||
- 'docker/volumes/sandbox/conf/**'
|
||||
web:
|
||||
- 'web/**'
|
||||
- 'packages/**'
|
||||
- 'package.json'
|
||||
- 'pnpm-lock.yaml'
|
||||
- 'pnpm-workspace.yaml'
|
||||
- '.nvmrc'
|
||||
- '.github/workflows/web-tests.yml'
|
||||
- '.github/actions/setup-web/**'
|
||||
e2e:
|
||||
- 'api/**'
|
||||
- 'api/pyproject.toml'
|
||||
- 'api/uv.lock'
|
||||
- 'e2e/**'
|
||||
- 'web/**'
|
||||
- 'packages/**'
|
||||
- 'package.json'
|
||||
- 'pnpm-lock.yaml'
|
||||
- 'pnpm-workspace.yaml'
|
||||
- '.nvmrc'
|
||||
- 'docker/docker-compose.middleware.yaml'
|
||||
- 'docker/envs/middleware.env.example'
|
||||
- '.github/workflows/web-e2e.yml'
|
||||
- '.github/actions/setup-web/**'
|
||||
vdb:
|
||||
- 'api/core/rag/datasource/**'
|
||||
- 'docker/**'
|
||||
- 'api/tests/integration_tests/vdb/**'
|
||||
- 'api/providers/vdb/*/tests/**'
|
||||
- '.github/workflows/vdb-tests.yml'
|
||||
- '.github/workflows/expose_service_ports.sh'
|
||||
- 'docker/.env.example'
|
||||
- 'docker/envs/middleware.env.example'
|
||||
- 'docker/docker-compose.yaml'
|
||||
- 'docker/docker-compose-template.yaml'
|
||||
- 'docker/generate_docker_compose'
|
||||
- 'docker/certbot/**'
|
||||
- 'docker/couchbase-server/**'
|
||||
- 'docker/elasticsearch/**'
|
||||
- 'docker/iris/**'
|
||||
- 'docker/nginx/**'
|
||||
- 'docker/pgvector/**'
|
||||
- 'docker/ssrf_proxy/**'
|
||||
- 'docker/startupscripts/**'
|
||||
- 'docker/tidb/**'
|
||||
- 'docker/volumes/**'
|
||||
- 'api/uv.lock'
|
||||
- 'api/pyproject.toml'
|
||||
migration:
|
||||
- 'api/migrations/**'
|
||||
- 'api/.env.example'
|
||||
- '.github/workflows/db-migration-test.yml'
|
||||
- '.github/workflows/expose_service_ports.sh'
|
||||
- 'docker/.env.example'
|
||||
- 'docker/envs/middleware.env.example'
|
||||
- 'docker/docker-compose.middleware.yaml'
|
||||
- 'docker/docker-compose-template.yaml'
|
||||
- 'docker/generate_docker_compose'
|
||||
- 'docker/ssrf_proxy/**'
|
||||
- 'docker/volumes/sandbox/conf/**'
|
||||
|
||||
# Run tests in parallel
|
||||
api-tests:
|
||||
name: API Tests
|
||||
needs: check-changes
|
||||
if: needs.check-changes.outputs.api-changed == 'true'
|
||||
# Run tests in parallel while always emitting stable required checks.
|
||||
api-tests-run:
|
||||
name: Run API Tests
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
if: needs.pre_job.outputs.should_skip != 'true' && needs.check-changes.outputs.api-changed == 'true'
|
||||
uses: ./.github/workflows/api-tests.yml
|
||||
secrets: inherit
|
||||
|
||||
web-tests:
|
||||
name: Web Tests
|
||||
needs: check-changes
|
||||
if: needs.check-changes.outputs.web-changed == 'true'
|
||||
api-tests-skip:
|
||||
name: Skip API Tests
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
if: needs.pre_job.outputs.should_skip != 'true' && needs.check-changes.outputs.api-changed != 'true'
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- name: Report skipped API tests
|
||||
run: echo "No API-related changes detected; skipping API tests."
|
||||
|
||||
api-tests:
|
||||
name: API Tests
|
||||
if: ${{ always() }}
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
- api-tests-run
|
||||
- api-tests-skip
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- name: Finalize API Tests status
|
||||
env:
|
||||
SHOULD_SKIP_WORKFLOW: ${{ needs.pre_job.outputs.should_skip }}
|
||||
TESTS_CHANGED: ${{ needs.check-changes.outputs.api-changed }}
|
||||
RUN_RESULT: ${{ needs.api-tests-run.result }}
|
||||
SKIP_RESULT: ${{ needs.api-tests-skip.result }}
|
||||
run: |
|
||||
if [[ "$SHOULD_SKIP_WORKFLOW" == 'true' ]]; then
|
||||
echo "API tests were skipped because this workflow run duplicated a successful or newer run."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$TESTS_CHANGED" == 'true' ]]; then
|
||||
if [[ "$RUN_RESULT" == 'success' ]]; then
|
||||
echo "API tests ran successfully."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "API tests were required but finished with result: $RUN_RESULT" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$SKIP_RESULT" == 'success' ]]; then
|
||||
echo "API tests were skipped because no API-related files changed."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "API tests were not required, but the skip job finished with result: $SKIP_RESULT" >&2
|
||||
exit 1
|
||||
|
||||
web-tests-run:
|
||||
name: Run Web Tests
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
if: needs.pre_job.outputs.should_skip != 'true' && needs.check-changes.outputs.web-changed == 'true'
|
||||
uses: ./.github/workflows/web-tests.yml
|
||||
secrets: inherit
|
||||
|
||||
web-tests-skip:
|
||||
name: Skip Web Tests
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
if: needs.pre_job.outputs.should_skip != 'true' && needs.check-changes.outputs.web-changed != 'true'
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- name: Report skipped web tests
|
||||
run: echo "No web-related changes detected; skipping web tests."
|
||||
|
||||
web-tests:
|
||||
name: Web Tests
|
||||
if: ${{ always() }}
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
- web-tests-run
|
||||
- web-tests-skip
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- name: Finalize Web Tests status
|
||||
env:
|
||||
SHOULD_SKIP_WORKFLOW: ${{ needs.pre_job.outputs.should_skip }}
|
||||
TESTS_CHANGED: ${{ needs.check-changes.outputs.web-changed }}
|
||||
RUN_RESULT: ${{ needs.web-tests-run.result }}
|
||||
SKIP_RESULT: ${{ needs.web-tests-skip.result }}
|
||||
run: |
|
||||
if [[ "$SHOULD_SKIP_WORKFLOW" == 'true' ]]; then
|
||||
echo "Web tests were skipped because this workflow run duplicated a successful or newer run."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$TESTS_CHANGED" == 'true' ]]; then
|
||||
if [[ "$RUN_RESULT" == 'success' ]]; then
|
||||
echo "Web tests ran successfully."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Web tests were required but finished with result: $RUN_RESULT" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$SKIP_RESULT" == 'success' ]]; then
|
||||
echo "Web tests were skipped because no web-related files changed."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Web tests were not required, but the skip job finished with result: $SKIP_RESULT" >&2
|
||||
exit 1
|
||||
|
||||
web-e2e-run:
|
||||
name: Run Web Full-Stack E2E
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
if: needs.pre_job.outputs.should_skip != 'true' && needs.check-changes.outputs.e2e-changed == 'true'
|
||||
uses: ./.github/workflows/web-e2e.yml
|
||||
|
||||
web-e2e-skip:
|
||||
name: Skip Web Full-Stack E2E
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
if: needs.pre_job.outputs.should_skip != 'true' && needs.check-changes.outputs.e2e-changed != 'true'
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- name: Report skipped web full-stack e2e
|
||||
run: echo "No E2E-related changes detected; skipping web full-stack E2E."
|
||||
|
||||
web-e2e:
|
||||
name: Web Full-Stack E2E
|
||||
if: ${{ always() }}
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
- web-e2e-run
|
||||
- web-e2e-skip
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- name: Finalize Web Full-Stack E2E status
|
||||
env:
|
||||
SHOULD_SKIP_WORKFLOW: ${{ needs.pre_job.outputs.should_skip }}
|
||||
TESTS_CHANGED: ${{ needs.check-changes.outputs.e2e-changed }}
|
||||
RUN_RESULT: ${{ needs.web-e2e-run.result }}
|
||||
SKIP_RESULT: ${{ needs.web-e2e-skip.result }}
|
||||
run: |
|
||||
if [[ "$SHOULD_SKIP_WORKFLOW" == 'true' ]]; then
|
||||
echo "Web full-stack E2E was skipped because this workflow run duplicated a successful or newer run."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$TESTS_CHANGED" == 'true' ]]; then
|
||||
if [[ "$RUN_RESULT" == 'success' ]]; then
|
||||
echo "Web full-stack E2E ran successfully."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Web full-stack E2E was required but finished with result: $RUN_RESULT" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$SKIP_RESULT" == 'success' ]]; then
|
||||
echo "Web full-stack E2E was skipped because no E2E-related files changed."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Web full-stack E2E was not required, but the skip job finished with result: $SKIP_RESULT" >&2
|
||||
exit 1
|
||||
|
||||
style-check:
|
||||
name: Style Check
|
||||
needs: pre_job
|
||||
if: needs.pre_job.outputs.should_skip != 'true'
|
||||
uses: ./.github/workflows/style.yml
|
||||
|
||||
vdb-tests-run:
|
||||
name: Run VDB Tests
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
if: needs.pre_job.outputs.should_skip != 'true' && needs.check-changes.outputs.vdb-changed == 'true'
|
||||
uses: ./.github/workflows/vdb-tests.yml
|
||||
|
||||
vdb-tests-skip:
|
||||
name: Skip VDB Tests
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
if: needs.pre_job.outputs.should_skip != 'true' && needs.check-changes.outputs.vdb-changed != 'true'
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- name: Report skipped VDB tests
|
||||
run: echo "No VDB-related changes detected; skipping VDB tests."
|
||||
|
||||
vdb-tests:
|
||||
name: VDB Tests
|
||||
needs: check-changes
|
||||
if: needs.check-changes.outputs.vdb-changed == 'true'
|
||||
uses: ./.github/workflows/vdb-tests.yml
|
||||
if: ${{ always() }}
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
- vdb-tests-run
|
||||
- vdb-tests-skip
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- name: Finalize VDB Tests status
|
||||
env:
|
||||
SHOULD_SKIP_WORKFLOW: ${{ needs.pre_job.outputs.should_skip }}
|
||||
TESTS_CHANGED: ${{ needs.check-changes.outputs.vdb-changed }}
|
||||
RUN_RESULT: ${{ needs.vdb-tests-run.result }}
|
||||
SKIP_RESULT: ${{ needs.vdb-tests-skip.result }}
|
||||
run: |
|
||||
if [[ "$SHOULD_SKIP_WORKFLOW" == 'true' ]]; then
|
||||
echo "VDB tests were skipped because this workflow run duplicated a successful or newer run."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$TESTS_CHANGED" == 'true' ]]; then
|
||||
if [[ "$RUN_RESULT" == 'success' ]]; then
|
||||
echo "VDB tests ran successfully."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "VDB tests were required but finished with result: $RUN_RESULT" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$SKIP_RESULT" == 'success' ]]; then
|
||||
echo "VDB tests were skipped because no VDB-related files changed."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "VDB tests were not required, but the skip job finished with result: $SKIP_RESULT" >&2
|
||||
exit 1
|
||||
|
||||
db-migration-test-run:
|
||||
name: Run DB Migration Test
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
if: needs.pre_job.outputs.should_skip != 'true' && needs.check-changes.outputs.migration-changed == 'true'
|
||||
uses: ./.github/workflows/db-migration-test.yml
|
||||
|
||||
db-migration-test-skip:
|
||||
name: Skip DB Migration Test
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
if: needs.pre_job.outputs.should_skip != 'true' && needs.check-changes.outputs.migration-changed != 'true'
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- name: Report skipped DB migration tests
|
||||
run: echo "No migration-related changes detected; skipping DB migration tests."
|
||||
|
||||
db-migration-test:
|
||||
name: DB Migration Test
|
||||
needs: check-changes
|
||||
if: needs.check-changes.outputs.migration-changed == 'true'
|
||||
uses: ./.github/workflows/db-migration-test.yml
|
||||
if: ${{ always() }}
|
||||
needs:
|
||||
- pre_job
|
||||
- check-changes
|
||||
- db-migration-test-run
|
||||
- db-migration-test-skip
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- name: Finalize DB Migration Test status
|
||||
env:
|
||||
SHOULD_SKIP_WORKFLOW: ${{ needs.pre_job.outputs.should_skip }}
|
||||
TESTS_CHANGED: ${{ needs.check-changes.outputs.migration-changed }}
|
||||
RUN_RESULT: ${{ needs.db-migration-test-run.result }}
|
||||
SKIP_RESULT: ${{ needs.db-migration-test-skip.result }}
|
||||
run: |
|
||||
if [[ "$SHOULD_SKIP_WORKFLOW" == 'true' ]]; then
|
||||
echo "DB migration tests were skipped because this workflow run duplicated a successful or newer run."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$TESTS_CHANGED" == 'true' ]]; then
|
||||
if [[ "$RUN_RESULT" == 'success' ]]; then
|
||||
echo "DB migration tests ran successfully."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "DB migration tests were required but finished with result: $RUN_RESULT" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$SKIP_RESULT" == 'success' ]]; then
|
||||
echo "DB migration tests were skipped because no migration-related files changed."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "DB migration tests were not required, but the skip job finished with result: $SKIP_RESULT" >&2
|
||||
exit 1
|
||||
|
||||
40
.github/workflows/pyrefly-diff-comment.yml
vendored
40
.github/workflows/pyrefly-diff-comment.yml
vendored
@ -12,7 +12,7 @@ permissions: {}
|
||||
jobs:
|
||||
comment:
|
||||
name: Comment PR with pyrefly diff
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
@ -21,7 +21,7 @@ jobs:
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.pull_requests[0].head.repo.full_name != github.repository }}
|
||||
steps:
|
||||
- name: Download pyrefly diff artifact
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
@ -49,7 +49,7 @@ jobs:
|
||||
run: unzip -o pyrefly_diff.zip
|
||||
|
||||
- name: Post comment
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
@ -76,13 +76,29 @@ jobs:
|
||||
diff += '\\n\\n... (truncated) ...';
|
||||
}
|
||||
|
||||
const body = diff.trim()
|
||||
? '### Pyrefly Diff\n<details>\n<summary>base → PR</summary>\n\n```diff\n' + diff + '\n```\n</details>'
|
||||
: '### Pyrefly Diff\nNo changes detected.';
|
||||
if (diff.trim()) {
|
||||
const body = '### Pyrefly Diff\n<details>\n<summary>base → PR</summary>\n\n```diff\n' + diff + '\n```\n</details>';
|
||||
const marker = '### Pyrefly Diff';
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
issue_number: prNumber,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
});
|
||||
const existing = comments.find((comment) => comment.body.startsWith(marker));
|
||||
|
||||
await github.rest.issues.createComment({
|
||||
issue_number: prNumber,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body,
|
||||
});
|
||||
if (existing) {
|
||||
await github.rest.issues.updateComment({
|
||||
comment_id: existing.id,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body,
|
||||
});
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
issue_number: prNumber,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
42
.github/workflows/pyrefly-diff.yml
vendored
42
.github/workflows/pyrefly-diff.yml
vendored
@ -10,7 +10,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
pyrefly-diff:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
@ -22,7 +22,7 @@ jobs:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python & UV
|
||||
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
|
||||
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
@ -50,12 +50,23 @@ jobs:
|
||||
run: |
|
||||
diff -u /tmp/pyrefly_base.txt /tmp/pyrefly_pr.txt > pyrefly_diff.txt || true
|
||||
|
||||
- name: Check if line counts match
|
||||
id: line_count_check
|
||||
run: |
|
||||
base_lines=$(wc -l < /tmp/pyrefly_base.txt)
|
||||
pr_lines=$(wc -l < /tmp/pyrefly_pr.txt)
|
||||
if [ "$base_lines" -eq "$pr_lines" ]; then
|
||||
echo "same=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "same=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Save PR number
|
||||
run: |
|
||||
echo ${{ github.event.pull_request.number }} > pr_number.txt
|
||||
|
||||
- name: Upload pyrefly diff
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: pyrefly_diff
|
||||
path: |
|
||||
@ -63,8 +74,8 @@ jobs:
|
||||
pr_number.txt
|
||||
|
||||
- name: Comment PR with pyrefly diff
|
||||
if: ${{ github.event.pull_request.head.repo.full_name == github.repository }}
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
if: ${{ github.event.pull_request.head.repo.full_name == github.repository && steps.line_count_check.outputs.same == 'false' }}
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
@ -92,9 +103,26 @@ jobs:
|
||||
].join('\n')
|
||||
: '### Pyrefly Diff\nNo changes detected.';
|
||||
|
||||
await github.rest.issues.createComment({
|
||||
const marker = '### Pyrefly Diff';
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
issue_number: prNumber,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body,
|
||||
});
|
||||
const existing = comments.find((comment) => comment.body.startsWith(marker));
|
||||
|
||||
if (existing) {
|
||||
await github.rest.issues.updateComment({
|
||||
comment_id: existing.id,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body,
|
||||
});
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
issue_number: prNumber,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body,
|
||||
});
|
||||
}
|
||||
|
||||
118
.github/workflows/pyrefly-type-coverage-comment.yml
vendored
Normal file
118
.github/workflows/pyrefly-type-coverage-comment.yml
vendored
Normal file
@ -0,0 +1,118 @@
|
||||
name: Comment with Pyrefly Type Coverage
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows:
|
||||
- Pyrefly Type Coverage
|
||||
types:
|
||||
- completed
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
comment:
|
||||
name: Comment PR with type coverage
|
||||
runs-on: depot-ubuntu-24.04
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
issues: write
|
||||
pull-requests: write
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.pull_requests[0].head.repo.full_name != github.repository }}
|
||||
steps:
|
||||
- name: Checkout default branch (trusted code)
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Setup Python & UV
|
||||
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync --project api --dev
|
||||
|
||||
- name: Download type coverage artifact
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
run_id: ${{ github.event.workflow_run.id }},
|
||||
});
|
||||
const match = artifacts.data.artifacts.find((artifact) =>
|
||||
artifact.name === 'pyrefly_type_coverage'
|
||||
);
|
||||
if (!match) {
|
||||
throw new Error('pyrefly_type_coverage artifact not found');
|
||||
}
|
||||
const download = await github.rest.actions.downloadArtifact({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
artifact_id: match.id,
|
||||
archive_format: 'zip',
|
||||
});
|
||||
fs.writeFileSync('pyrefly_type_coverage.zip', Buffer.from(download.data));
|
||||
|
||||
- name: Unzip artifact
|
||||
run: unzip -o pyrefly_type_coverage.zip
|
||||
|
||||
- name: Render coverage markdown from structured data
|
||||
id: render
|
||||
run: |
|
||||
comment_body="$(uv run --directory api python libs/pyrefly_type_coverage.py \
|
||||
--base base_report.json \
|
||||
< pr_report.json)"
|
||||
|
||||
{
|
||||
echo "### Pyrefly Type Coverage"
|
||||
echo ""
|
||||
echo "$comment_body"
|
||||
} > /tmp/type_coverage_comment.md
|
||||
|
||||
- name: Post comment
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const body = fs.readFileSync('/tmp/type_coverage_comment.md', { encoding: 'utf8' });
|
||||
let prNumber = null;
|
||||
try {
|
||||
prNumber = parseInt(fs.readFileSync('pr_number.txt', { encoding: 'utf8' }), 10);
|
||||
} catch (err) {
|
||||
const prs = context.payload.workflow_run.pull_requests || [];
|
||||
if (prs.length > 0 && prs[0].number) {
|
||||
prNumber = prs[0].number;
|
||||
}
|
||||
}
|
||||
if (!prNumber) {
|
||||
throw new Error('PR number not found in artifact or workflow_run payload');
|
||||
}
|
||||
|
||||
// Update existing comment if one exists, otherwise create new
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
issue_number: prNumber,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
});
|
||||
const marker = '### Pyrefly Type Coverage';
|
||||
const existing = comments.find(c => c.body.startsWith(marker));
|
||||
|
||||
if (existing) {
|
||||
await github.rest.issues.updateComment({
|
||||
comment_id: existing.id,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body,
|
||||
});
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
issue_number: prNumber,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body,
|
||||
});
|
||||
}
|
||||
120
.github/workflows/pyrefly-type-coverage.yml
vendored
Normal file
120
.github/workflows/pyrefly-type-coverage.yml
vendored
Normal file
@ -0,0 +1,120 @@
|
||||
name: Pyrefly Type Coverage
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'api/**/*.py'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
pyrefly-type-coverage:
|
||||
runs-on: depot-ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Checkout PR branch
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python & UV
|
||||
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync --project api --dev
|
||||
|
||||
- name: Run pyrefly report on PR branch
|
||||
run: |
|
||||
uv run --directory api --dev pyrefly report 2>/dev/null > /tmp/pyrefly_report_pr.tmp && \
|
||||
mv /tmp/pyrefly_report_pr.tmp /tmp/pyrefly_report_pr.json || \
|
||||
echo '{}' > /tmp/pyrefly_report_pr.json
|
||||
|
||||
- name: Save helper script from base branch
|
||||
run: |
|
||||
git show ${{ github.event.pull_request.base.sha }}:api/libs/pyrefly_type_coverage.py > /tmp/pyrefly_type_coverage.py 2>/dev/null \
|
||||
|| cp api/libs/pyrefly_type_coverage.py /tmp/pyrefly_type_coverage.py
|
||||
|
||||
- name: Checkout base branch
|
||||
run: git checkout ${{ github.base_ref }}
|
||||
|
||||
- name: Run pyrefly report on base branch
|
||||
run: |
|
||||
uv run --directory api --dev pyrefly report 2>/dev/null > /tmp/pyrefly_report_base.tmp && \
|
||||
mv /tmp/pyrefly_report_base.tmp /tmp/pyrefly_report_base.json || \
|
||||
echo '{}' > /tmp/pyrefly_report_base.json
|
||||
|
||||
- name: Generate coverage comparison
|
||||
id: coverage
|
||||
run: |
|
||||
comment_body="$(uv run --directory api python /tmp/pyrefly_type_coverage.py \
|
||||
--base /tmp/pyrefly_report_base.json \
|
||||
< /tmp/pyrefly_report_pr.json)"
|
||||
|
||||
{
|
||||
echo "### Pyrefly Type Coverage"
|
||||
echo ""
|
||||
echo "$comment_body"
|
||||
} | tee -a "$GITHUB_STEP_SUMMARY" > /tmp/type_coverage_comment.md
|
||||
|
||||
# Save structured data for the fork-PR comment workflow
|
||||
cp /tmp/pyrefly_report_pr.json pr_report.json
|
||||
cp /tmp/pyrefly_report_base.json base_report.json
|
||||
|
||||
- name: Save PR number
|
||||
run: |
|
||||
echo ${{ github.event.pull_request.number }} > pr_number.txt
|
||||
|
||||
- name: Upload type coverage artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: pyrefly_type_coverage
|
||||
path: |
|
||||
pr_report.json
|
||||
base_report.json
|
||||
pr_number.txt
|
||||
|
||||
- name: Comment PR with type coverage
|
||||
if: ${{ github.event.pull_request.head.repo.full_name == github.repository }}
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const marker = '### Pyrefly Type Coverage';
|
||||
let body;
|
||||
try {
|
||||
body = fs.readFileSync('/tmp/type_coverage_comment.md', { encoding: 'utf8' });
|
||||
} catch {
|
||||
body = `${marker}\n\n_Coverage report unavailable._`;
|
||||
}
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
|
||||
// Update existing comment if one exists, otherwise create new
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
issue_number: prNumber,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
});
|
||||
const existing = comments.find(c => c.body.startsWith(marker));
|
||||
|
||||
if (existing) {
|
||||
await github.rest.issues.updateComment({
|
||||
comment_id: existing.id,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body,
|
||||
});
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
issue_number: prNumber,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body,
|
||||
});
|
||||
}
|
||||
9
.github/workflows/semantic-pull-request.yml
vendored
9
.github/workflows/semantic-pull-request.yml
vendored
@ -7,15 +7,22 @@ on:
|
||||
- edited
|
||||
- reopened
|
||||
- synchronize
|
||||
merge_group:
|
||||
branches: ["main"]
|
||||
types: [checks_requested]
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: Validate PR title
|
||||
permissions:
|
||||
pull-requests: read
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
steps:
|
||||
- name: Complete merge group check
|
||||
if: github.event_name == 'merge_group'
|
||||
run: echo "Semantic PR title validation is handled on pull requests."
|
||||
- name: Check title
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: amannn/action-semantic-pull-request@48f256284bd46cdaab1048c3721360e808335d50 # v6.1.1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
8
.github/workflows/stale.yml
vendored
8
.github/workflows/stale.yml
vendored
@ -12,7 +12,7 @@ on:
|
||||
jobs:
|
||||
stale:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
@ -23,8 +23,8 @@ jobs:
|
||||
days-before-issue-stale: 15
|
||||
days-before-issue-close: 3
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
stale-issue-message: "Close due to it's no longer active, if you have any questions, you can reopen it."
|
||||
stale-pr-message: "Close due to it's no longer active, if you have any questions, you can reopen it."
|
||||
stale-issue-message: "Closed due to inactivity. If you have any questions, you can reopen it."
|
||||
stale-pr-message: "Closed due to inactivity. If you have any questions, you can reopen it."
|
||||
stale-issue-label: 'no-issue-activity'
|
||||
stale-pr-label: 'no-pr-activity'
|
||||
any-of-labels: 'duplicate,question,invalid,wontfix,no-issue-activity,no-pr-activity,enhancement,cant-reproduce,help-wanted'
|
||||
any-of-labels: '🌚 invalid,🙋♂️ question,wont-fix,no-issue-activity,no-pr-activity,💪 enhancement,🤔 cant-reproduce,🙏 help wanted'
|
||||
|
||||
43
.github/workflows/style.yml
vendored
43
.github/workflows/style.yml
vendored
@ -15,7 +15,7 @@ permissions:
|
||||
jobs:
|
||||
python-style:
|
||||
name: Python Style
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@ -25,7 +25,7 @@ jobs:
|
||||
|
||||
- name: Check changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
with:
|
||||
files: |
|
||||
api/**
|
||||
@ -33,7 +33,7 @@ jobs:
|
||||
|
||||
- name: Setup UV and Python
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
|
||||
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
||||
with:
|
||||
enable-cache: false
|
||||
python-version: "3.12"
|
||||
@ -49,7 +49,7 @@ jobs:
|
||||
|
||||
- name: Run Type Checks
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
run: make type-check
|
||||
run: make type-check-core
|
||||
|
||||
- name: Dotenv check
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
@ -57,7 +57,7 @@ jobs:
|
||||
|
||||
web-style:
|
||||
name: Web Style
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./web
|
||||
@ -73,10 +73,17 @@ jobs:
|
||||
|
||||
- name: Check changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
with:
|
||||
files: |
|
||||
web/**
|
||||
e2e/**
|
||||
sdks/nodejs-client/**
|
||||
packages/**
|
||||
package.json
|
||||
pnpm-lock.yaml
|
||||
pnpm-workspace.yaml
|
||||
.nvmrc
|
||||
.github/workflows/style.yml
|
||||
.github/actions/setup-web/**
|
||||
|
||||
@ -87,26 +94,28 @@ jobs:
|
||||
- name: Restore ESLint cache
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
id: eslint-cache-restore
|
||||
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
|
||||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: web/.eslintcache
|
||||
key: ${{ runner.os }}-web-eslint-${{ hashFiles('web/package.json', 'web/pnpm-lock.yaml', 'web/eslint.config.mjs', 'web/eslint.constants.mjs', 'web/plugins/eslint/**') }}-${{ github.sha }}
|
||||
path: .eslintcache
|
||||
key: ${{ runner.os }}-eslint-${{ hashFiles('pnpm-lock.yaml', 'eslint.config.mjs', 'web/eslint.config.mjs', 'web/eslint.constants.mjs', 'web/plugins/eslint/**') }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-web-eslint-${{ hashFiles('web/package.json', 'web/pnpm-lock.yaml', 'web/eslint.config.mjs', 'web/eslint.constants.mjs', 'web/plugins/eslint/**') }}-
|
||||
${{ runner.os }}-eslint-${{ hashFiles('pnpm-lock.yaml', 'eslint.config.mjs', 'web/eslint.config.mjs', 'web/eslint.constants.mjs', 'web/plugins/eslint/**') }}-
|
||||
|
||||
- name: Web style check
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
working-directory: ./web
|
||||
working-directory: .
|
||||
run: vp run lint:ci
|
||||
|
||||
- name: Web tsslint
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
working-directory: ./web
|
||||
env:
|
||||
NODE_OPTIONS: --max-old-space-size=4096
|
||||
run: vp run lint:tss
|
||||
|
||||
- name: Web type check
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
working-directory: ./web
|
||||
working-directory: .
|
||||
run: vp run type-check
|
||||
|
||||
- name: Web dead code check
|
||||
@ -116,14 +125,14 @@ jobs:
|
||||
|
||||
- name: Save ESLint cache
|
||||
if: steps.changed-files.outputs.any_changed == 'true' && success() && steps.eslint-cache-restore.outputs.cache-hit != 'true'
|
||||
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
|
||||
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: web/.eslintcache
|
||||
path: .eslintcache
|
||||
key: ${{ steps.eslint-cache-restore.outputs.cache-primary-key }}
|
||||
|
||||
superlinter:
|
||||
name: SuperLinter
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@ -134,7 +143,7 @@ jobs:
|
||||
|
||||
- name: Check changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
with:
|
||||
files: |
|
||||
**.sh
|
||||
@ -145,7 +154,7 @@ jobs:
|
||||
.editorconfig
|
||||
|
||||
- name: Super-linter
|
||||
uses: super-linter/super-linter/slim@61abc07d755095a68f4987d1c2c3d1d64408f1f9 # v8.5.0
|
||||
uses: super-linter/super-linter/slim@9e863354e3ff62e0727d37183162c4a88873df41 # v8.6.0
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
env:
|
||||
BASH_SEVERITY: warning
|
||||
|
||||
7
.github/workflows/tool-test-sdks.yaml
vendored
7
.github/workflows/tool-test-sdks.yaml
vendored
@ -6,6 +6,9 @@ on:
|
||||
- main
|
||||
paths:
|
||||
- sdks/**
|
||||
- package.json
|
||||
- pnpm-lock.yaml
|
||||
- pnpm-workspace.yaml
|
||||
|
||||
concurrency:
|
||||
group: sdk-tests-${{ github.head_ref || github.run_id }}
|
||||
@ -14,7 +17,7 @@ concurrency:
|
||||
jobs:
|
||||
build:
|
||||
name: unit test for Node.js SDK
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@ -26,7 +29,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Use Node.js
|
||||
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
|
||||
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
with:
|
||||
node-version: 22
|
||||
cache: ''
|
||||
|
||||
643
.github/workflows/translate-i18n-claude.yml
vendored
643
.github/workflows/translate-i18n-claude.yml
vendored
@ -1,26 +1,24 @@
|
||||
name: Translate i18n Files with Claude Code
|
||||
|
||||
# Note: claude-code-action doesn't support push events directly.
|
||||
# Push events are handled by trigger-i18n-sync.yml which sends repository_dispatch.
|
||||
# See: https://github.com/langgenius/dify/issues/30743
|
||||
|
||||
# Push events are bridged by trigger-i18n-sync.yml via repository_dispatch.
|
||||
on:
|
||||
repository_dispatch:
|
||||
types: [i18n-sync]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
files:
|
||||
description: 'Specific files to translate (space-separated, e.g., "app common"). Leave empty for all files.'
|
||||
description: 'Specific files to translate (space-separated, e.g., "app common"). Required for full mode; leave empty in incremental mode to use en-US files changed since HEAD~1.'
|
||||
required: false
|
||||
type: string
|
||||
languages:
|
||||
description: 'Specific languages to translate (space-separated, e.g., "zh-Hans ja-JP"). Leave empty for all supported languages.'
|
||||
description: 'Specific languages to translate (space-separated, e.g., "zh-Hans ja-JP"). Leave empty for all supported target languages except en-US.'
|
||||
required: false
|
||||
type: string
|
||||
mode:
|
||||
description: 'Sync mode: incremental (only changes) or full (re-check all keys)'
|
||||
description: 'Sync mode: incremental (compare with previous en-US revision) or full (sync all keys in scope)'
|
||||
required: false
|
||||
default: 'incremental'
|
||||
default: incremental
|
||||
type: choice
|
||||
options:
|
||||
- incremental
|
||||
@ -30,11 +28,15 @@ permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: translate-i18n-${{ github.event_name }}-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
translate:
|
||||
if: github.repository == 'langgenius/dify'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
runs-on: depot-ubuntu-24.04
|
||||
timeout-minutes: 120
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
@ -51,380 +53,293 @@ jobs:
|
||||
- name: Setup web environment
|
||||
uses: ./.github/actions/setup-web
|
||||
|
||||
- name: Detect changed files and generate diff
|
||||
id: detect_changes
|
||||
- name: Prepare sync context
|
||||
id: context
|
||||
shell: bash
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
|
||||
# Manual trigger
|
||||
if [ -n "${{ github.event.inputs.files }}" ]; then
|
||||
echo "CHANGED_FILES=${{ github.event.inputs.files }}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
# Get all JSON files in en-US directory
|
||||
files=$(ls web/i18n/en-US/*.json 2>/dev/null | xargs -n1 basename | sed 's/.json$//' | tr '\n' ' ')
|
||||
echo "CHANGED_FILES=$files" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
echo "TARGET_LANGS=${{ github.event.inputs.languages }}" >> $GITHUB_OUTPUT
|
||||
echo "SYNC_MODE=${{ github.event.inputs.mode || 'incremental' }}" >> $GITHUB_OUTPUT
|
||||
DEFAULT_TARGET_LANGS=$(awk "
|
||||
/value: '/ {
|
||||
value=\$2
|
||||
gsub(/[',]/, \"\", value)
|
||||
}
|
||||
/supported: true/ && value != \"en-US\" {
|
||||
printf \"%s \", value
|
||||
}
|
||||
" web/i18n-config/languages.ts | sed 's/[[:space:]]*$//')
|
||||
|
||||
# For manual trigger with incremental mode, get diff from last commit
|
||||
# For full mode, we'll do a complete check anyway
|
||||
if [ "${{ github.event.inputs.mode }}" == "full" ]; then
|
||||
echo "Full mode: will check all keys" > /tmp/i18n-diff.txt
|
||||
echo "DIFF_AVAILABLE=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
git diff HEAD~1..HEAD -- 'web/i18n/en-US/*.json' > /tmp/i18n-diff.txt 2>/dev/null || echo "" > /tmp/i18n-diff.txt
|
||||
if [ -s /tmp/i18n-diff.txt ]; then
|
||||
echo "DIFF_AVAILABLE=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "DIFF_AVAILABLE=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
fi
|
||||
elif [ "${{ github.event_name }}" == "repository_dispatch" ]; then
|
||||
# Triggered by push via trigger-i18n-sync.yml workflow
|
||||
# Validate required payload fields
|
||||
if [ -z "${{ github.event.client_payload.changed_files }}" ]; then
|
||||
echo "Error: repository_dispatch payload missing required 'changed_files' field" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "CHANGED_FILES=${{ github.event.client_payload.changed_files }}" >> $GITHUB_OUTPUT
|
||||
echo "TARGET_LANGS=" >> $GITHUB_OUTPUT
|
||||
echo "SYNC_MODE=${{ github.event.client_payload.sync_mode || 'incremental' }}" >> $GITHUB_OUTPUT
|
||||
generate_changes_json() {
|
||||
node .github/scripts/generate-i18n-changes.mjs
|
||||
}
|
||||
|
||||
# Decode the base64-encoded diff from the trigger workflow
|
||||
if [ -n "${{ github.event.client_payload.diff_base64 }}" ]; then
|
||||
if ! echo "${{ github.event.client_payload.diff_base64 }}" | base64 -d > /tmp/i18n-diff.txt 2>&1; then
|
||||
echo "Warning: Failed to decode base64 diff payload" >&2
|
||||
echo "" > /tmp/i18n-diff.txt
|
||||
echo "DIFF_AVAILABLE=false" >> $GITHUB_OUTPUT
|
||||
elif [ -s /tmp/i18n-diff.txt ]; then
|
||||
echo "DIFF_AVAILABLE=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "DIFF_AVAILABLE=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
if [ "${{ github.event_name }}" = "repository_dispatch" ]; then
|
||||
BASE_SHA="${{ github.event.client_payload.base_sha }}"
|
||||
HEAD_SHA="${{ github.event.client_payload.head_sha }}"
|
||||
CHANGED_FILES="${{ github.event.client_payload.changed_files }}"
|
||||
TARGET_LANGS="$DEFAULT_TARGET_LANGS"
|
||||
SYNC_MODE="${{ github.event.client_payload.sync_mode || 'incremental' }}"
|
||||
|
||||
if [ -n "${{ github.event.client_payload.changes_base64 }}" ]; then
|
||||
printf '%s' '${{ github.event.client_payload.changes_base64 }}' | base64 -d > /tmp/i18n-changes.json
|
||||
CHANGES_AVAILABLE="true"
|
||||
CHANGES_SOURCE="embedded"
|
||||
elif [ -n "$BASE_SHA" ] && [ -n "$CHANGED_FILES" ]; then
|
||||
export BASE_SHA HEAD_SHA CHANGED_FILES
|
||||
generate_changes_json
|
||||
CHANGES_AVAILABLE="true"
|
||||
CHANGES_SOURCE="recomputed"
|
||||
else
|
||||
echo "" > /tmp/i18n-diff.txt
|
||||
echo "DIFF_AVAILABLE=false" >> $GITHUB_OUTPUT
|
||||
printf '%s' '{"baseSha":"","headSha":"","files":[],"changes":{}}' > /tmp/i18n-changes.json
|
||||
CHANGES_AVAILABLE="false"
|
||||
CHANGES_SOURCE="unavailable"
|
||||
fi
|
||||
else
|
||||
echo "Unsupported event type: ${{ github.event_name }}"
|
||||
exit 1
|
||||
BASE_SHA=""
|
||||
HEAD_SHA=$(git rev-parse HEAD)
|
||||
if [ -n "${{ github.event.inputs.languages }}" ]; then
|
||||
TARGET_LANGS="${{ github.event.inputs.languages }}"
|
||||
else
|
||||
TARGET_LANGS="$DEFAULT_TARGET_LANGS"
|
||||
fi
|
||||
SYNC_MODE="${{ github.event.inputs.mode || 'incremental' }}"
|
||||
if [ -n "${{ github.event.inputs.files }}" ]; then
|
||||
CHANGED_FILES="${{ github.event.inputs.files }}"
|
||||
elif [ "$SYNC_MODE" = "incremental" ]; then
|
||||
BASE_SHA=$(git rev-parse HEAD~1 2>/dev/null || true)
|
||||
if [ -n "$BASE_SHA" ]; then
|
||||
CHANGED_FILES=$(git diff --name-only "$BASE_SHA" "$HEAD_SHA" -- 'web/i18n/en-US/*.json' 2>/dev/null | sed -n 's@^.*/@@p' | sed 's/\.json$//' | tr '\n' ' ' | sed 's/[[:space:]]*$//')
|
||||
else
|
||||
CHANGED_FILES=$(find web/i18n/en-US -maxdepth 1 -type f -name '*.json' -print | sed -n 's@^.*/@@p' | sed 's/\.json$//' | sort | tr '\n' ' ' | sed 's/[[:space:]]*$//')
|
||||
fi
|
||||
elif [ "$SYNC_MODE" = "full" ]; then
|
||||
echo "workflow_dispatch full mode requires the files input to stay within CI limits." >&2
|
||||
exit 1
|
||||
else
|
||||
CHANGED_FILES=""
|
||||
fi
|
||||
|
||||
if [ "$SYNC_MODE" = "incremental" ] && [ -n "$CHANGED_FILES" ]; then
|
||||
export BASE_SHA HEAD_SHA CHANGED_FILES
|
||||
generate_changes_json
|
||||
CHANGES_AVAILABLE="true"
|
||||
CHANGES_SOURCE="local"
|
||||
else
|
||||
printf '%s' '{"baseSha":"","headSha":"","files":[],"changes":{}}' > /tmp/i18n-changes.json
|
||||
CHANGES_AVAILABLE="false"
|
||||
CHANGES_SOURCE="unavailable"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Truncate diff if too large (keep first 50KB)
|
||||
if [ -f /tmp/i18n-diff.txt ]; then
|
||||
head -c 50000 /tmp/i18n-diff.txt > /tmp/i18n-diff-truncated.txt
|
||||
mv /tmp/i18n-diff-truncated.txt /tmp/i18n-diff.txt
|
||||
FILE_ARGS=""
|
||||
if [ -n "$CHANGED_FILES" ]; then
|
||||
FILE_ARGS="--file $CHANGED_FILES"
|
||||
fi
|
||||
|
||||
echo "Detected files: $(cat $GITHUB_OUTPUT | grep CHANGED_FILES || echo 'none')"
|
||||
LANG_ARGS=""
|
||||
if [ -n "$TARGET_LANGS" ]; then
|
||||
LANG_ARGS="--lang $TARGET_LANGS"
|
||||
fi
|
||||
|
||||
{
|
||||
echo "DEFAULT_TARGET_LANGS=$DEFAULT_TARGET_LANGS"
|
||||
echo "BASE_SHA=$BASE_SHA"
|
||||
echo "HEAD_SHA=$HEAD_SHA"
|
||||
echo "CHANGED_FILES=$CHANGED_FILES"
|
||||
echo "TARGET_LANGS=$TARGET_LANGS"
|
||||
echo "SYNC_MODE=$SYNC_MODE"
|
||||
echo "CHANGES_AVAILABLE=$CHANGES_AVAILABLE"
|
||||
echo "CHANGES_SOURCE=$CHANGES_SOURCE"
|
||||
echo "FILE_ARGS=$FILE_ARGS"
|
||||
echo "LANG_ARGS=$LANG_ARGS"
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
|
||||
echo "Files: ${CHANGED_FILES:-<none>}"
|
||||
echo "Languages: ${TARGET_LANGS:-<none>}"
|
||||
echo "Mode: $SYNC_MODE"
|
||||
|
||||
- name: Run Claude Code for Translation Sync
|
||||
if: steps.detect_changes.outputs.CHANGED_FILES != ''
|
||||
uses: anthropics/claude-code-action@ff9acae5886d41a99ed4ec14b7dc147d55834722 # v1.0.77
|
||||
if: steps.context.outputs.CHANGED_FILES != ''
|
||||
uses: anthropics/claude-code-action@476e359e6203e73dad705c8b322e333fabbd7416 # v1.0.119
|
||||
with:
|
||||
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
# Allow github-actions bot to trigger this workflow via repository_dispatch
|
||||
# See: https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
|
||||
allowed_bots: 'github-actions[bot]'
|
||||
show_full_output: ${{ github.event_name == 'workflow_dispatch' }}
|
||||
prompt: |
|
||||
You are a professional i18n synchronization engineer for the Dify project.
|
||||
Your task is to keep all language translations in sync with the English source (en-US).
|
||||
You are the i18n sync agent for the Dify repository.
|
||||
Your job is to keep translations synchronized with the English source files under `${{ github.workspace }}/web/i18n/en-US/`.
|
||||
|
||||
## CRITICAL TOOL RESTRICTIONS
|
||||
- Use **Read** tool to read files (NOT cat or bash)
|
||||
- Use **Edit** tool to modify JSON files (NOT node, jq, or bash scripts)
|
||||
- Use **Bash** ONLY for: git commands, gh commands, pnpm commands
|
||||
- Run bash commands ONE BY ONE, never combine with && or ||
|
||||
- NEVER use `$()` command substitution - it's not supported. Split into separate commands instead.
|
||||
Use absolute paths at all times:
|
||||
- Repo root: `${{ github.workspace }}`
|
||||
- Web directory: `${{ github.workspace }}/web`
|
||||
- Language config: `${{ github.workspace }}/web/i18n-config/languages.ts`
|
||||
|
||||
## WORKING DIRECTORY & ABSOLUTE PATHS
|
||||
Claude Code sandbox working directory may vary. Always use absolute paths:
|
||||
- For pnpm: `pnpm --dir ${{ github.workspace }}/web <command>`
|
||||
- For git: `git -C ${{ github.workspace }} <command>`
|
||||
- For gh: `gh --repo ${{ github.repository }} <command>`
|
||||
- For file paths: `${{ github.workspace }}/web/i18n/`
|
||||
Inputs:
|
||||
- Files in scope: `${{ steps.context.outputs.CHANGED_FILES }}`
|
||||
- Target languages: `${{ steps.context.outputs.TARGET_LANGS }}`
|
||||
- Sync mode: `${{ steps.context.outputs.SYNC_MODE }}`
|
||||
- Base SHA: `${{ steps.context.outputs.BASE_SHA }}`
|
||||
- Head SHA: `${{ steps.context.outputs.HEAD_SHA }}`
|
||||
- Scoped file args: `${{ steps.context.outputs.FILE_ARGS }}`
|
||||
- Scoped language args: `${{ steps.context.outputs.LANG_ARGS }}`
|
||||
- Structured change set available: `${{ steps.context.outputs.CHANGES_AVAILABLE }}`
|
||||
- Structured change set source: `${{ steps.context.outputs.CHANGES_SOURCE }}`
|
||||
- Structured change set file: `/tmp/i18n-changes.json`
|
||||
|
||||
## EFFICIENCY RULES
|
||||
- **ONE Edit per language file** - batch all key additions into a single Edit
|
||||
- Insert new keys at the beginning of JSON (after `{`), lint:fix will sort them
|
||||
- Translate ALL keys for a language mentally first, then do ONE Edit
|
||||
|
||||
## Context
|
||||
- Changed/target files: ${{ steps.detect_changes.outputs.CHANGED_FILES }}
|
||||
- Target languages (empty means all supported): ${{ steps.detect_changes.outputs.TARGET_LANGS }}
|
||||
- Sync mode: ${{ steps.detect_changes.outputs.SYNC_MODE }}
|
||||
- Translation files are located in: ${{ github.workspace }}/web/i18n/{locale}/{filename}.json
|
||||
- Language configuration is in: ${{ github.workspace }}/web/i18n-config/languages.ts
|
||||
- Git diff is available: ${{ steps.detect_changes.outputs.DIFF_AVAILABLE }}
|
||||
|
||||
## CRITICAL DESIGN: Verify First, Then Sync
|
||||
|
||||
You MUST follow this three-phase approach:
|
||||
|
||||
═══════════════════════════════════════════════════════════════
|
||||
║ PHASE 1: VERIFY - Analyze and Generate Change Report ║
|
||||
═══════════════════════════════════════════════════════════════
|
||||
|
||||
### Step 1.1: Analyze Git Diff (for incremental mode)
|
||||
Use the Read tool to read `/tmp/i18n-diff.txt` to see the git diff.
|
||||
|
||||
Parse the diff to categorize changes:
|
||||
- Lines with `+` (not `+++`): Added or modified values
|
||||
- Lines with `-` (not `---`): Removed or old values
|
||||
- Identify specific keys for each category:
|
||||
* ADD: Keys that appear only in `+` lines (new keys)
|
||||
* UPDATE: Keys that appear in both `-` and `+` lines (value changed)
|
||||
* DELETE: Keys that appear only in `-` lines (removed keys)
|
||||
|
||||
### Step 1.2: Read Language Configuration
|
||||
Use the Read tool to read `${{ github.workspace }}/web/i18n-config/languages.ts`.
|
||||
Extract all languages with `supported: true`.
|
||||
|
||||
### Step 1.3: Run i18n:check for Each Language
|
||||
```bash
|
||||
pnpm --dir ${{ github.workspace }}/web install --frozen-lockfile
|
||||
```
|
||||
```bash
|
||||
pnpm --dir ${{ github.workspace }}/web run i18n:check
|
||||
```
|
||||
|
||||
This will report:
|
||||
- Missing keys (need to ADD)
|
||||
- Extra keys (need to DELETE)
|
||||
|
||||
### Step 1.4: Generate Change Report
|
||||
|
||||
Create a structured report identifying:
|
||||
```
|
||||
╔══════════════════════════════════════════════════════════════╗
|
||||
║ I18N SYNC CHANGE REPORT ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ Files to process: [list] ║
|
||||
║ Languages to sync: [list] ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ ADD (New Keys): ║
|
||||
║ - [filename].[key]: "English value" ║
|
||||
║ ... ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ UPDATE (Modified Keys - MUST re-translate): ║
|
||||
║ - [filename].[key]: "Old value" → "New value" ║
|
||||
║ ... ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ DELETE (Extra Keys): ║
|
||||
║ - [language]/[filename].[key] ║
|
||||
║ ... ║
|
||||
╚══════════════════════════════════════════════════════════════╝
|
||||
```
|
||||
|
||||
**IMPORTANT**: For UPDATE detection, compare git diff to find keys where
|
||||
the English value changed. These MUST be re-translated even if target
|
||||
language already has a translation (it's now stale!).
|
||||
|
||||
═══════════════════════════════════════════════════════════════
|
||||
║ PHASE 2: SYNC - Execute Changes Based on Report ║
|
||||
═══════════════════════════════════════════════════════════════
|
||||
|
||||
### Step 2.1: Process ADD Operations (BATCH per language file)
|
||||
|
||||
**CRITICAL WORKFLOW for efficiency:**
|
||||
1. First, translate ALL new keys for ALL languages mentally
|
||||
2. Then, for EACH language file, do ONE Edit operation:
|
||||
- Read the file once
|
||||
- Insert ALL new keys at the beginning (right after the opening `{`)
|
||||
- Don't worry about alphabetical order - lint:fix will sort them later
|
||||
|
||||
Example Edit (adding 3 keys to zh-Hans/app.json):
|
||||
```
|
||||
old_string: '{\n "accessControl"'
|
||||
new_string: '{\n "newKey1": "translation1",\n "newKey2": "translation2",\n "newKey3": "translation3",\n "accessControl"'
|
||||
```
|
||||
|
||||
**IMPORTANT**:
|
||||
- ONE Edit per language file (not one Edit per key!)
|
||||
- Always use the Edit tool. NEVER use bash scripts, node, or jq.
|
||||
|
||||
### Step 2.2: Process UPDATE Operations
|
||||
|
||||
**IMPORTANT: Special handling for zh-Hans and ja-JP**
|
||||
If zh-Hans or ja-JP files were ALSO modified in the same push:
|
||||
- Run: `git -C ${{ github.workspace }} diff HEAD~1 --name-only` and check for zh-Hans or ja-JP files
|
||||
- If found, it means someone manually translated them. Apply these rules:
|
||||
|
||||
1. **Missing keys**: Still ADD them (completeness required)
|
||||
2. **Existing translations**: Compare with the NEW English value:
|
||||
- If translation is **completely wrong** or **unrelated** → Update it
|
||||
- If translation is **roughly correct** (captures the meaning) → Keep it, respect manual work
|
||||
- When in doubt, **keep the manual translation**
|
||||
|
||||
Example:
|
||||
- English changed: "Save" → "Save Changes"
|
||||
- Manual translation: "保存更改" → Keep it (correct meaning)
|
||||
- Manual translation: "删除" → Update it (completely wrong)
|
||||
|
||||
For other languages:
|
||||
Use Edit tool to replace the old value with the new translation.
|
||||
You can batch multiple updates in one Edit if they are adjacent.
|
||||
|
||||
### Step 2.3: Process DELETE Operations
|
||||
For extra keys reported by i18n:check:
|
||||
- Run: `pnpm --dir ${{ github.workspace }}/web run i18n:check --auto-remove`
|
||||
- Or manually remove from target language JSON files
|
||||
|
||||
## Translation Guidelines
|
||||
|
||||
- PRESERVE all placeholders exactly as-is:
|
||||
- `{{variable}}` - Mustache interpolation
|
||||
- `${variable}` - Template literal
|
||||
- `<tag>content</tag>` - HTML tags
|
||||
- `_one`, `_other` - Pluralization suffixes (these are KEY suffixes, not values)
|
||||
|
||||
**CRITICAL: Variable names and tag names MUST stay in English - NEVER translate them**
|
||||
|
||||
✅ CORRECT examples:
|
||||
- English: "{{count}} items" → Japanese: "{{count}} 個のアイテム"
|
||||
- English: "{{name}} updated" → Korean: "{{name}} 업데이트됨"
|
||||
- English: "<email>{{email}}</email>" → Chinese: "<email>{{email}}</email>"
|
||||
- English: "<CustomLink>Marketplace</CustomLink>" → Japanese: "<CustomLink>マーケットプレイス</CustomLink>"
|
||||
|
||||
❌ WRONG examples (NEVER do this - will break the application):
|
||||
- "{{count}}" → "{{カウント}}" ❌ (variable name translated to Japanese)
|
||||
- "{{name}}" → "{{이름}}" ❌ (variable name translated to Korean)
|
||||
- "{{email}}" → "{{邮箱}}" ❌ (variable name translated to Chinese)
|
||||
- "<email>" → "<メール>" ❌ (tag name translated)
|
||||
- "<CustomLink>" → "<自定义链接>" ❌ (component name translated)
|
||||
|
||||
- Use appropriate language register (formal/informal) based on existing translations
|
||||
- Match existing translation style in each language
|
||||
- Technical terms: check existing conventions per language
|
||||
- For CJK languages: no spaces between characters unless necessary
|
||||
- For RTL languages (ar-TN, fa-IR): ensure proper text handling
|
||||
|
||||
## Output Format Requirements
|
||||
- Alphabetical key ordering (if original file uses it)
|
||||
- 2-space indentation
|
||||
- Trailing newline at end of file
|
||||
- Valid JSON (use proper escaping for special characters)
|
||||
|
||||
═══════════════════════════════════════════════════════════════
|
||||
║ PHASE 3: RE-VERIFY - Confirm All Issues Resolved ║
|
||||
═══════════════════════════════════════════════════════════════
|
||||
|
||||
### Step 3.1: Run Lint Fix (IMPORTANT!)
|
||||
```bash
|
||||
pnpm --dir ${{ github.workspace }}/web lint:fix --quiet -- 'i18n/**/*.json'
|
||||
```
|
||||
This ensures:
|
||||
- JSON keys are sorted alphabetically (jsonc/sort-keys rule)
|
||||
- Valid i18n keys (dify-i18n/valid-i18n-keys rule)
|
||||
- No extra keys (dify-i18n/no-extra-keys rule)
|
||||
|
||||
### Step 3.2: Run Final i18n Check
|
||||
```bash
|
||||
pnpm --dir ${{ github.workspace }}/web run i18n:check
|
||||
```
|
||||
|
||||
### Step 3.3: Fix Any Remaining Issues
|
||||
If check reports issues:
|
||||
- Go back to PHASE 2 for unresolved items
|
||||
- Repeat until check passes
|
||||
|
||||
### Step 3.4: Generate Final Summary
|
||||
```
|
||||
╔══════════════════════════════════════════════════════════════╗
|
||||
║ SYNC COMPLETED SUMMARY ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ Language │ Added │ Updated │ Deleted │ Status ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ zh-Hans │ 5 │ 2 │ 1 │ ✓ Complete ║
|
||||
║ ja-JP │ 5 │ 2 │ 1 │ ✓ Complete ║
|
||||
║ ... │ ... │ ... │ ... │ ... ║
|
||||
╠══════════════════════════════════════════════════════════════╣
|
||||
║ i18n:check │ PASSED - All keys in sync ║
|
||||
╚══════════════════════════════════════════════════════════════╝
|
||||
```
|
||||
|
||||
## Mode-Specific Behavior
|
||||
|
||||
**SYNC_MODE = "incremental"** (default):
|
||||
- Focus on keys identified from git diff
|
||||
- Also check i18n:check output for any missing/extra keys
|
||||
- Efficient for small changes
|
||||
|
||||
**SYNC_MODE = "full"**:
|
||||
- Compare ALL keys between en-US and each language
|
||||
- Run i18n:check to identify all discrepancies
|
||||
- Use for first-time sync or fixing historical issues
|
||||
|
||||
## Important Notes
|
||||
|
||||
1. Always run i18n:check BEFORE and AFTER making changes
|
||||
2. The check script is the source of truth for missing/extra keys
|
||||
3. For UPDATE scenario: git diff is the source of truth for changed values
|
||||
4. Create a single commit with all translation changes
|
||||
5. If any translation fails, continue with others and report failures
|
||||
|
||||
═══════════════════════════════════════════════════════════════
|
||||
║ PHASE 4: COMMIT AND CREATE PR ║
|
||||
═══════════════════════════════════════════════════════════════
|
||||
|
||||
After all translations are complete and verified:
|
||||
|
||||
### Step 4.1: Check for changes
|
||||
```bash
|
||||
git -C ${{ github.workspace }} status --porcelain
|
||||
```
|
||||
|
||||
If there are changes:
|
||||
|
||||
### Step 4.2: Create a new branch and commit
|
||||
Run these git commands ONE BY ONE (not combined with &&).
|
||||
**IMPORTANT**: Do NOT use `$()` command substitution. Use two separate commands:
|
||||
|
||||
1. First, get the timestamp:
|
||||
```bash
|
||||
date +%Y%m%d-%H%M%S
|
||||
```
|
||||
(Note the output, e.g., "20260115-143052")
|
||||
|
||||
2. Then create branch using the timestamp value:
|
||||
```bash
|
||||
git -C ${{ github.workspace }} checkout -b chore/i18n-sync-20260115-143052
|
||||
```
|
||||
(Replace "20260115-143052" with the actual timestamp from step 1)
|
||||
|
||||
3. Stage changes:
|
||||
```bash
|
||||
git -C ${{ github.workspace }} add web/i18n/
|
||||
```
|
||||
|
||||
4. Commit:
|
||||
```bash
|
||||
git -C ${{ github.workspace }} commit -m "chore(i18n): sync translations with en-US - Mode: ${{ steps.detect_changes.outputs.SYNC_MODE }}"
|
||||
```
|
||||
|
||||
5. Push:
|
||||
```bash
|
||||
git -C ${{ github.workspace }} push origin HEAD
|
||||
```
|
||||
|
||||
### Step 4.3: Create Pull Request
|
||||
```bash
|
||||
gh pr create --repo ${{ github.repository }} --title "chore(i18n): sync translations with en-US" --body "## Summary
|
||||
|
||||
This PR was automatically generated to sync i18n translation files.
|
||||
|
||||
### Changes
|
||||
- Mode: ${{ steps.detect_changes.outputs.SYNC_MODE }}
|
||||
- Files processed: ${{ steps.detect_changes.outputs.CHANGED_FILES }}
|
||||
|
||||
### Verification
|
||||
- [x] \`i18n:check\` passed
|
||||
- [x] \`lint:fix\` applied
|
||||
|
||||
🤖 Generated with Claude Code GitHub Action" --base main
|
||||
```
|
||||
Tool rules:
|
||||
- Use Read for repository files.
|
||||
- Use Edit for JSON updates.
|
||||
- Use Bash only for `vp`.
|
||||
- Do not use Bash for `git`, `gh`, or branch management.
|
||||
|
||||
Required execution plan:
|
||||
1. Resolve target languages.
|
||||
- Use the provided `Target languages` value as the source of truth.
|
||||
- If it is unexpectedly empty, read `${{ github.workspace }}/web/i18n-config/languages.ts` and use every language with `supported: true` except `en-US`.
|
||||
2. Stay strictly in scope.
|
||||
- Only process the files listed in `Files in scope`.
|
||||
- Only process the resolved target languages, never `en-US`.
|
||||
- Do not touch unrelated i18n files.
|
||||
- Do not modify `${{ github.workspace }}/web/i18n/en-US/`.
|
||||
3. Resolve source changes.
|
||||
- If `Structured change set available` is `true`, read `/tmp/i18n-changes.json` and use it as the source of truth for file-level and key-level changes.
|
||||
- For each file entry:
|
||||
- `added` contains new English keys that need translations.
|
||||
- `updated` contains stale keys whose English source changed; re-translate using the `after` value.
|
||||
- `deleted` contains keys that should be removed from locale files.
|
||||
- `fileDeleted: true` means the English file no longer exists; remove the matching locale file if present.
|
||||
- Read the current English JSON file for any file that still exists so wording, placeholders, and surrounding terminology stay accurate.
|
||||
- If `Structured change set available` is `false`, treat this as a scoped full sync and use the current English files plus scoped checks as the source of truth.
|
||||
4. Run a scoped pre-check before editing:
|
||||
- `vp run dify-web#i18n:check ${{ steps.context.outputs.FILE_ARGS }} ${{ steps.context.outputs.LANG_ARGS }}`
|
||||
- Use this command as the source of truth for missing and extra keys inside the current scope.
|
||||
5. Apply translations.
|
||||
- For every target language and scoped file:
|
||||
- If `fileDeleted` is `true`, remove the locale file if it exists and skip the rest of that file.
|
||||
- If the locale file does not exist yet, create it with `Write` and then continue with `Edit` as needed.
|
||||
- ADD missing keys.
|
||||
- UPDATE stale translations when the English value changed.
|
||||
- DELETE removed keys. Prefer `vp run dify-web#i18n:check ${{ steps.context.outputs.FILE_ARGS }} ${{ steps.context.outputs.LANG_ARGS }} --auto-remove` for extra keys so deletions stay in scope.
|
||||
- Preserve placeholders exactly: `{{variable}}`, `${variable}`, HTML tags, component tags, and variable names.
|
||||
- Match the existing terminology and register used by each locale.
|
||||
- Prefer one Edit per file when stable, but prioritize correctness over batching.
|
||||
6. Verify only the edited files.
|
||||
- Run `vp run dify-web#lint:fix --quiet -- <relative edited i18n file paths under web/>`
|
||||
- Run `vp run dify-web#i18n:check ${{ steps.context.outputs.FILE_ARGS }} ${{ steps.context.outputs.LANG_ARGS }}`
|
||||
- If verification fails, fix the remaining problems before continuing.
|
||||
7. Stop after the scoped locale files are updated and verification passes.
|
||||
- Do not create branches, commits, or pull requests.
|
||||
claude_args: |
|
||||
--max-turns 150
|
||||
--allowedTools "Read,Write,Edit,Bash(git *),Bash(git:*),Bash(gh *),Bash(gh:*),Bash(pnpm *),Bash(pnpm:*),Bash(date *),Bash(date:*),Glob,Grep"
|
||||
--max-turns 120
|
||||
--allowedTools "Read,Write,Edit,Bash(vp *),Bash(vp:*),Glob,Grep"
|
||||
|
||||
- name: Prepare branch metadata
|
||||
id: pr_meta
|
||||
if: steps.context.outputs.CHANGED_FILES != ''
|
||||
shell: bash
|
||||
run: |
|
||||
if [ -z "$(git -C "${{ github.workspace }}" status --porcelain -- web/i18n/)" ]; then
|
||||
echo "has_changes=false" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
SCOPE_HASH=$(printf '%s|%s|%s' "${{ steps.context.outputs.CHANGED_FILES }}" "${{ steps.context.outputs.TARGET_LANGS }}" "${{ steps.context.outputs.SYNC_MODE }}" | sha256sum | cut -c1-8)
|
||||
HEAD_SHORT=$(printf '%s' "${{ steps.context.outputs.HEAD_SHA }}" | cut -c1-12)
|
||||
BRANCH_NAME="chore/i18n-sync-${HEAD_SHORT}-${SCOPE_HASH}"
|
||||
|
||||
{
|
||||
echo "has_changes=true"
|
||||
echo "branch_name=$BRANCH_NAME"
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Commit translation changes
|
||||
if: steps.pr_meta.outputs.has_changes == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
git -C "${{ github.workspace }}" checkout -B "${{ steps.pr_meta.outputs.branch_name }}"
|
||||
git -C "${{ github.workspace }}" add web/i18n/
|
||||
git -C "${{ github.workspace }}" commit -m "chore(i18n): sync translations with en-US"
|
||||
|
||||
- name: Push translation branch
|
||||
if: steps.pr_meta.outputs.has_changes == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
if git -C "${{ github.workspace }}" ls-remote --exit-code --heads origin "${{ steps.pr_meta.outputs.branch_name }}" >/dev/null 2>&1; then
|
||||
git -C "${{ github.workspace }}" push --force-with-lease origin "${{ steps.pr_meta.outputs.branch_name }}"
|
||||
else
|
||||
git -C "${{ github.workspace }}" push --set-upstream origin "${{ steps.pr_meta.outputs.branch_name }}"
|
||||
fi
|
||||
|
||||
- name: Create or update translation PR
|
||||
if: steps.pr_meta.outputs.has_changes == 'true'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
BRANCH_NAME: ${{ steps.pr_meta.outputs.branch_name }}
|
||||
FILES_IN_SCOPE: ${{ steps.context.outputs.CHANGED_FILES }}
|
||||
TARGET_LANGS: ${{ steps.context.outputs.TARGET_LANGS }}
|
||||
SYNC_MODE: ${{ steps.context.outputs.SYNC_MODE }}
|
||||
CHANGES_SOURCE: ${{ steps.context.outputs.CHANGES_SOURCE }}
|
||||
BASE_SHA: ${{ steps.context.outputs.BASE_SHA }}
|
||||
HEAD_SHA: ${{ steps.context.outputs.HEAD_SHA }}
|
||||
REPO_NAME: ${{ github.repository }}
|
||||
shell: bash
|
||||
run: |
|
||||
PR_BODY_FILE=/tmp/i18n-pr-body.md
|
||||
LANG_COUNT=$(printf '%s\n' "$TARGET_LANGS" | wc -w | tr -d ' ')
|
||||
if [ "$LANG_COUNT" = "0" ]; then
|
||||
LANG_COUNT="0"
|
||||
fi
|
||||
export LANG_COUNT
|
||||
|
||||
node <<'NODE' > "$PR_BODY_FILE"
|
||||
const fs = require('node:fs')
|
||||
|
||||
const changesPath = '/tmp/i18n-changes.json'
|
||||
const changes = fs.existsSync(changesPath)
|
||||
? JSON.parse(fs.readFileSync(changesPath, 'utf8'))
|
||||
: { changes: {} }
|
||||
|
||||
const filesInScope = (process.env.FILES_IN_SCOPE || '').split(/\s+/).filter(Boolean)
|
||||
const lines = [
|
||||
'## Summary',
|
||||
'',
|
||||
`- **Files synced**: \`${process.env.FILES_IN_SCOPE || '<none>'}\``,
|
||||
`- **Languages updated**: ${process.env.TARGET_LANGS || '<none>'} (${process.env.LANG_COUNT} languages)`,
|
||||
`- **Sync mode**: ${process.env.SYNC_MODE}${process.env.BASE_SHA ? ` (base: \`${process.env.BASE_SHA.slice(0, 10)}\`, head: \`${process.env.HEAD_SHA.slice(0, 10)}\`)` : ` (head: \`${process.env.HEAD_SHA.slice(0, 10)}\`)`}`,
|
||||
'',
|
||||
'### Key changes',
|
||||
]
|
||||
|
||||
for (const fileName of filesInScope) {
|
||||
const fileChange = changes.changes?.[fileName] || { added: {}, updated: {}, deleted: [], fileDeleted: false }
|
||||
const addedKeys = Object.keys(fileChange.added || {})
|
||||
const updatedKeys = Object.keys(fileChange.updated || {})
|
||||
const deletedKeys = fileChange.deleted || []
|
||||
lines.push(`- \`${fileName}\`: +${addedKeys.length} / ~${updatedKeys.length} / -${deletedKeys.length}${fileChange.fileDeleted ? ' (file deleted in en-US)' : ''}`)
|
||||
}
|
||||
|
||||
lines.push(
|
||||
'',
|
||||
'## Verification',
|
||||
'',
|
||||
`- \`vp run dify-web#i18n:check --file ${process.env.FILES_IN_SCOPE} --lang ${process.env.TARGET_LANGS}\``,
|
||||
`- \`vp run dify-web#lint:fix --quiet -- <edited i18n files under web/>\``,
|
||||
'',
|
||||
'## Notes',
|
||||
'',
|
||||
'- This PR was generated from structured en-US key changes produced by `trigger-i18n-sync.yml`.',
|
||||
`- Structured change source: ${process.env.CHANGES_SOURCE || 'unknown'}.`,
|
||||
'- Branch name is deterministic for the head SHA and scope, so reruns update the same PR instead of opening duplicates.',
|
||||
'',
|
||||
'🤖 Generated with [Claude Code](https://claude.com/claude-code)'
|
||||
)
|
||||
|
||||
process.stdout.write(lines.join('\n'))
|
||||
NODE
|
||||
|
||||
EXISTING_PR_NUMBER=$(gh pr list --repo "$REPO_NAME" --head "$BRANCH_NAME" --state open --json number --jq '.[0].number')
|
||||
|
||||
if [ -n "$EXISTING_PR_NUMBER" ] && [ "$EXISTING_PR_NUMBER" != "null" ]; then
|
||||
gh pr edit "$EXISTING_PR_NUMBER" --repo "$REPO_NAME" --title "chore(i18n): sync translations with en-US" --body-file "$PR_BODY_FILE"
|
||||
else
|
||||
gh pr create --repo "$REPO_NAME" --head "$BRANCH_NAME" --base main --title "chore(i18n): sync translations with en-US" --body-file "$PR_BODY_FILE"
|
||||
fi
|
||||
|
||||
92
.github/workflows/trigger-i18n-sync.yml
vendored
92
.github/workflows/trigger-i18n-sync.yml
vendored
@ -1,9 +1,5 @@
|
||||
name: Trigger i18n Sync on Push
|
||||
|
||||
# This workflow bridges the push event to repository_dispatch
|
||||
# because claude-code-action doesn't support push events directly.
|
||||
# See: https://github.com/langgenius/dify/issues/30743
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
@ -13,10 +9,14 @@ on:
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
concurrency:
|
||||
group: trigger-i18n-sync-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
trigger:
|
||||
if: github.repository == 'langgenius/dify'
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04
|
||||
timeout-minutes: 5
|
||||
|
||||
steps:
|
||||
@ -25,42 +25,66 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Detect changed files and generate diff
|
||||
- name: Detect changed files and build structured change set
|
||||
id: detect
|
||||
shell: bash
|
||||
run: |
|
||||
BEFORE_SHA="${{ github.event.before }}"
|
||||
# Handle edge case: force push may have null/zero SHA
|
||||
if [ -z "$BEFORE_SHA" ] || [ "$BEFORE_SHA" = "0000000000000000000000000000000000000000" ]; then
|
||||
BEFORE_SHA="HEAD~1"
|
||||
BASE_SHA="${{ github.event.before }}"
|
||||
if [ -z "$BASE_SHA" ] || [ "$BASE_SHA" = "0000000000000000000000000000000000000000" ]; then
|
||||
BASE_SHA=$(git rev-parse HEAD~1 2>/dev/null || true)
|
||||
fi
|
||||
HEAD_SHA="${{ github.sha }}"
|
||||
|
||||
# Detect changed i18n files
|
||||
changed=$(git diff --name-only "$BEFORE_SHA" "${{ github.sha }}" -- 'web/i18n/en-US/*.json' 2>/dev/null | xargs -n1 basename 2>/dev/null | sed 's/.json$//' | tr '\n' ' ' || echo "")
|
||||
echo "changed_files=$changed" >> $GITHUB_OUTPUT
|
||||
|
||||
# Generate diff for context
|
||||
git diff "$BEFORE_SHA" "${{ github.sha }}" -- 'web/i18n/en-US/*.json' > /tmp/i18n-diff.txt 2>/dev/null || echo "" > /tmp/i18n-diff.txt
|
||||
|
||||
# Truncate if too large (keep first 50KB to match receiving workflow)
|
||||
head -c 50000 /tmp/i18n-diff.txt > /tmp/i18n-diff-truncated.txt
|
||||
mv /tmp/i18n-diff-truncated.txt /tmp/i18n-diff.txt
|
||||
|
||||
# Base64 encode the diff for safe JSON transport (portable, single-line)
|
||||
diff_base64=$(base64 < /tmp/i18n-diff.txt | tr -d '\n')
|
||||
echo "diff_base64=$diff_base64" >> $GITHUB_OUTPUT
|
||||
|
||||
if [ -n "$changed" ]; then
|
||||
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||
echo "Detected changed files: $changed"
|
||||
if [ -n "$BASE_SHA" ]; then
|
||||
CHANGED_FILES=$(git diff --name-only "$BASE_SHA" "$HEAD_SHA" -- 'web/i18n/en-US/*.json' 2>/dev/null | sed -n 's@^.*/@@p' | sed 's/\.json$//' | tr '\n' ' ' | sed 's/[[:space:]]*$//')
|
||||
else
|
||||
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||
echo "No i18n changes detected"
|
||||
CHANGED_FILES=$(find web/i18n/en-US -maxdepth 1 -type f -name '*.json' -print | sed -n 's@^.*/@@p' | sed 's/\.json$//' | sort | tr '\n' ' ' | sed 's/[[:space:]]*$//')
|
||||
fi
|
||||
|
||||
export BASE_SHA HEAD_SHA CHANGED_FILES
|
||||
node .github/scripts/generate-i18n-changes.mjs
|
||||
|
||||
if [ -n "$CHANGED_FILES" ]; then
|
||||
echo "has_changes=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "has_changes=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
echo "base_sha=$BASE_SHA" >> "$GITHUB_OUTPUT"
|
||||
echo "head_sha=$HEAD_SHA" >> "$GITHUB_OUTPUT"
|
||||
echo "changed_files=$CHANGED_FILES" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Trigger i18n sync workflow
|
||||
if: steps.detect.outputs.has_changes == 'true'
|
||||
uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
|
||||
env:
|
||||
BASE_SHA: ${{ steps.detect.outputs.base_sha }}
|
||||
HEAD_SHA: ${{ steps.detect.outputs.head_sha }}
|
||||
CHANGED_FILES: ${{ steps.detect.outputs.changed_files }}
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
event-type: i18n-sync
|
||||
client-payload: '{"changed_files": "${{ steps.detect.outputs.changed_files }}", "diff_base64": "${{ steps.detect.outputs.diff_base64 }}", "sync_mode": "incremental", "trigger_sha": "${{ github.sha }}"}'
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const fs = require('fs')
|
||||
|
||||
const changesJson = fs.readFileSync('/tmp/i18n-changes.json', 'utf8')
|
||||
const changesBase64 = Buffer.from(changesJson).toString('base64')
|
||||
const maxEmbeddedChangesChars = 48000
|
||||
const changesEmbedded = changesBase64.length <= maxEmbeddedChangesChars
|
||||
|
||||
if (!changesEmbedded) {
|
||||
console.log(`Structured change set too large to embed safely (${changesBase64.length} chars). Downstream workflow will regenerate it from git history.`)
|
||||
}
|
||||
|
||||
await github.rest.repos.createDispatchEvent({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
event_type: 'i18n-sync',
|
||||
client_payload: {
|
||||
changed_files: process.env.CHANGED_FILES,
|
||||
changes_base64: changesEmbedded ? changesBase64 : '',
|
||||
changes_embedded: changesEmbedded,
|
||||
sync_mode: 'incremental',
|
||||
base_sha: process.env.BASE_SHA,
|
||||
head_sha: process.env.HEAD_SHA,
|
||||
},
|
||||
})
|
||||
|
||||
95
.github/workflows/vdb-tests-full.yml
vendored
Normal file
95
.github/workflows/vdb-tests-full.yml
vendored
Normal file
@ -0,0 +1,95 @@
|
||||
name: Run Full VDB Tests
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 3 * * 1'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: vdb-tests-full-${{ github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Full VDB Tests
|
||||
if: github.repository == 'langgenius/dify'
|
||||
runs-on: depot-ubuntu-24.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.12"
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Free Disk Space
|
||||
uses: endersonmenezes/free-disk-space@7901478139cff6e9d44df5972fd8ab8fcade4db1 # v3.2.2
|
||||
with:
|
||||
remove_dotnet: true
|
||||
remove_haskell: true
|
||||
remove_tool_cache: true
|
||||
|
||||
- name: Setup UV and Python
|
||||
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache-dependency-glob: api/uv.lock
|
||||
|
||||
- name: Check UV lockfile
|
||||
run: uv lock --project api --check
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync --project api --dev
|
||||
|
||||
- name: Set up dotenvs
|
||||
run: |
|
||||
cp docker/.env.example docker/.env
|
||||
cp docker/envs/middleware.env.example docker/middleware.env
|
||||
|
||||
- name: Expose Service Ports
|
||||
run: sh .github/workflows/expose_service_ports.sh
|
||||
|
||||
# - name: Set up Vector Store (TiDB)
|
||||
# uses: hoverkraft-tech/compose-action@v2.0.2
|
||||
# with:
|
||||
# compose-file: docker/tidb/docker-compose.yaml
|
||||
# services: |
|
||||
# tidb
|
||||
# tiflash
|
||||
|
||||
- name: Set up Full Vector Store Matrix
|
||||
uses: hoverkraft-tech/compose-action@d2bee4f07e8ca410d6b196d00f90c12e7d48c33a # v2.6.0
|
||||
with:
|
||||
compose-file: |
|
||||
docker/docker-compose.yaml
|
||||
services: |
|
||||
weaviate
|
||||
qdrant
|
||||
couchbase-server
|
||||
etcd
|
||||
minio
|
||||
milvus-standalone
|
||||
pgvecto-rs
|
||||
pgvector
|
||||
chroma
|
||||
elasticsearch
|
||||
oceanbase
|
||||
|
||||
- name: setup test config
|
||||
run: |
|
||||
echo $(pwd)
|
||||
ls -lah .
|
||||
cp api/tests/integration_tests/.env.example api/tests/integration_tests/.env
|
||||
|
||||
# - name: Check VDB Ready (TiDB)
|
||||
# run: uv run --project api python api/providers/vdb/tidb-vector/tests/integration_tests/check_tiflash_ready.py
|
||||
|
||||
- name: Test Vector Stores
|
||||
run: uv run --project api bash dev/pytest/pytest_vdb.sh
|
||||
36
.github/workflows/vdb-tests.yml
vendored
36
.github/workflows/vdb-tests.yml
vendored
@ -1,20 +1,22 @@
|
||||
name: Run VDB Tests
|
||||
name: Run VDB Smoke Tests
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: vdb-tests-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: VDB Tests
|
||||
runs-on: ubuntu-latest
|
||||
name: VDB Smoke Tests
|
||||
runs-on: depot-ubuntu-24.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.11"
|
||||
- "3.12"
|
||||
|
||||
steps:
|
||||
@ -31,7 +33,7 @@ jobs:
|
||||
remove_tool_cache: true
|
||||
|
||||
- name: Setup UV and Python
|
||||
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
|
||||
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: ${{ matrix.python-version }}
|
||||
@ -46,7 +48,7 @@ jobs:
|
||||
- name: Set up dotenvs
|
||||
run: |
|
||||
cp docker/.env.example docker/.env
|
||||
cp docker/middleware.env.example docker/middleware.env
|
||||
cp docker/envs/middleware.env.example docker/middleware.env
|
||||
|
||||
- name: Expose Service Ports
|
||||
run: sh .github/workflows/expose_service_ports.sh
|
||||
@ -59,23 +61,18 @@ jobs:
|
||||
# tidb
|
||||
# tiflash
|
||||
|
||||
- name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase, OceanBase)
|
||||
uses: hoverkraft-tech/compose-action@4894d2492015c1774ee5a13a95b1072093087ec3 # v2.5.0
|
||||
- name: Set up Vector Stores for Smoke Coverage
|
||||
uses: hoverkraft-tech/compose-action@d2bee4f07e8ca410d6b196d00f90c12e7d48c33a # v2.6.0
|
||||
with:
|
||||
compose-file: |
|
||||
docker/docker-compose.yaml
|
||||
services: |
|
||||
db_postgres
|
||||
redis
|
||||
weaviate
|
||||
qdrant
|
||||
couchbase-server
|
||||
etcd
|
||||
minio
|
||||
milvus-standalone
|
||||
pgvecto-rs
|
||||
pgvector
|
||||
chroma
|
||||
elasticsearch
|
||||
oceanbase
|
||||
|
||||
- name: setup test config
|
||||
run: |
|
||||
@ -84,7 +81,12 @@ jobs:
|
||||
cp api/tests/integration_tests/.env.example api/tests/integration_tests/.env
|
||||
|
||||
# - name: Check VDB Ready (TiDB)
|
||||
# run: uv run --project api python api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py
|
||||
# run: uv run --project api python api/providers/vdb/tidb-vector/tests/integration_tests/check_tiflash_ready.py
|
||||
|
||||
- name: Test Vector Stores
|
||||
run: uv run --project api bash dev/pytest/pytest_vdb.sh
|
||||
run: |
|
||||
uv run --project api pytest --timeout "${PYTEST_TIMEOUT:-180}" \
|
||||
api/providers/vdb/vdb-chroma/tests/integration_tests \
|
||||
api/providers/vdb/vdb-pgvector/tests/integration_tests \
|
||||
api/providers/vdb/vdb-qdrant/tests/integration_tests \
|
||||
api/providers/vdb/vdb-weaviate/tests/integration_tests
|
||||
|
||||
68
.github/workflows/web-e2e.yml
vendored
Normal file
68
.github/workflows/web-e2e.yml
vendored
Normal file
@ -0,0 +1,68 @@
|
||||
name: Web Full-Stack E2E
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: web-e2e-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Web Full-Stack E2E
|
||||
runs-on: depot-ubuntu-24.04-4
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup web dependencies
|
||||
uses: ./.github/actions/setup-web
|
||||
|
||||
- name: Setup UV and Python
|
||||
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: "3.12"
|
||||
cache-dependency-glob: api/uv.lock
|
||||
|
||||
- name: Install API dependencies
|
||||
run: uv sync --project api --dev
|
||||
|
||||
- name: Install Playwright browser
|
||||
working-directory: ./e2e
|
||||
run: vp run e2e:install
|
||||
|
||||
- name: Run isolated source-api and built-web Cucumber E2E tests
|
||||
working-directory: ./e2e
|
||||
env:
|
||||
E2E_ADMIN_EMAIL: e2e-admin@example.com
|
||||
E2E_ADMIN_NAME: E2E Admin
|
||||
E2E_ADMIN_PASSWORD: E2eAdmin12345
|
||||
E2E_FORCE_WEB_BUILD: "1"
|
||||
E2E_INIT_PASSWORD: E2eInit12345
|
||||
run: vp run e2e:full
|
||||
|
||||
- name: Upload Cucumber report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: cucumber-report
|
||||
path: e2e/cucumber-report
|
||||
retention-days: 7
|
||||
|
||||
- name: Upload E2E logs
|
||||
if: ${{ !cancelled() }}
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: e2e-logs
|
||||
path: e2e/.logs
|
||||
retention-days: 7
|
||||
52
.github/workflows/web-tests.yml
vendored
52
.github/workflows/web-tests.yml
vendored
@ -16,14 +16,14 @@ concurrency:
|
||||
jobs:
|
||||
test:
|
||||
name: Web Tests (${{ matrix.shardIndex }}/${{ matrix.shardTotal }})
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04-4
|
||||
env:
|
||||
VITEST_COVERAGE_SCOPE: app-components
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
shardIndex: [1, 2, 3, 4, 5, 6]
|
||||
shardTotal: [6]
|
||||
shardIndex: [1, 2, 3, 4]
|
||||
shardTotal: [4]
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@ -43,7 +43,7 @@ jobs:
|
||||
|
||||
- name: Upload blob report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: blob-report-${{ matrix.shardIndex }}
|
||||
path: web/.vitest-reports/*
|
||||
@ -54,7 +54,7 @@ jobs:
|
||||
name: Merge Test Reports
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [test]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: depot-ubuntu-24.04-4
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
defaults:
|
||||
@ -66,7 +66,6 @@ jobs:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup web environment
|
||||
@ -84,19 +83,22 @@ jobs:
|
||||
|
||||
- name: Report coverage
|
||||
if: ${{ env.CODECOV_TOKEN != '' }}
|
||||
uses: codecov/codecov-action@1af58845a975a7985b0beb0cbe6fbbb71a41dbad # v5.5.3
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
|
||||
with:
|
||||
directory: web/coverage
|
||||
flags: web
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ env.CODECOV_TOKEN }}
|
||||
|
||||
web-build:
|
||||
name: Web Build
|
||||
runs-on: ubuntu-latest
|
||||
dify-ui-test:
|
||||
name: dify-ui Tests
|
||||
runs-on: depot-ubuntu-24.04-4
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./web
|
||||
shell: bash
|
||||
working-directory: ./packages/dify-ui
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@ -104,20 +106,20 @@ jobs:
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Check changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files: |
|
||||
web/**
|
||||
.github/workflows/web-tests.yml
|
||||
.github/actions/setup-web/**
|
||||
|
||||
- name: Setup web environment
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
uses: ./.github/actions/setup-web
|
||||
|
||||
- name: Web build check
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
working-directory: ./web
|
||||
run: vp run build
|
||||
- name: Install Chromium for Browser Mode
|
||||
run: vp exec playwright install --with-deps chromium
|
||||
|
||||
- name: Run dify-ui tests
|
||||
run: vp test run --coverage --silent=passed-only
|
||||
|
||||
- name: Report coverage
|
||||
if: ${{ env.CODECOV_TOKEN != '' }}
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
|
||||
with:
|
||||
directory: packages/dify-ui/coverage
|
||||
flags: dify-ui
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ env.CODECOV_TOKEN }}
|
||||
|
||||
@ -759,6 +759,7 @@ EVENT_BUS_REDIS_CHANNEL_TYPE=pubsub
|
||||
# Whether to use Redis cluster mode while use redis as event bus.
|
||||
# It's highly recommended to enable this for large deployments.
|
||||
EVENT_BUS_REDIS_USE_CLUSTERS=false
|
||||
EVENT_BUS_LISTENER_JOIN_TIMEOUT_MS=2000
|
||||
|
||||
# Whether to Enable human input timeout check task
|
||||
ENABLE_HUMAN_INPUT_TIMEOUT_TASK=true
|
||||
|
||||
@ -143,6 +143,7 @@ def initialize_extensions(app: DifyApp):
|
||||
ext_commands,
|
||||
ext_compress,
|
||||
ext_database,
|
||||
ext_enterprise_telemetry,
|
||||
ext_fastopenapi,
|
||||
ext_forward_refs,
|
||||
ext_hosting_provider,
|
||||
@ -193,6 +194,7 @@ def initialize_extensions(app: DifyApp):
|
||||
ext_commands,
|
||||
ext_fastopenapi,
|
||||
ext_otel,
|
||||
ext_enterprise_telemetry,
|
||||
ext_request_logging,
|
||||
ext_session_factory,
|
||||
]
|
||||
|
||||
@ -8,7 +8,7 @@ from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, Settings
|
||||
from libs.file_utils import search_file_upwards
|
||||
|
||||
from .deploy import DeploymentConfig
|
||||
from .enterprise import EnterpriseFeatureConfig
|
||||
from .enterprise import EnterpriseFeatureConfig, EnterpriseTelemetryConfig
|
||||
from .extra import ExtraServiceConfig
|
||||
from .feature import FeatureConfig
|
||||
from .middleware import MiddlewareConfig
|
||||
@ -73,6 +73,8 @@ class DifyConfig(
|
||||
# Enterprise feature configs
|
||||
# **Before using, please contact business@dify.ai by email to inquire about licensing matters.**
|
||||
EnterpriseFeatureConfig,
|
||||
# Enterprise telemetry configs
|
||||
EnterpriseTelemetryConfig,
|
||||
):
|
||||
model_config = SettingsConfigDict(
|
||||
# read from dotenv format config file
|
||||
|
||||
@ -22,3 +22,58 @@ class EnterpriseFeatureConfig(BaseSettings):
|
||||
ENTERPRISE_REQUEST_TIMEOUT: int = Field(
|
||||
ge=1, description="Maximum timeout in seconds for enterprise requests", default=5
|
||||
)
|
||||
|
||||
ENTERPRISE_DISABLE_RUNTIME_CREDENTIAL_CHECK: bool = Field(
|
||||
default=False,
|
||||
description="When disabled, credential policy check is disabled at workflow run time."
|
||||
"You can disable to gain performance by trading off consistency",
|
||||
)
|
||||
|
||||
|
||||
class EnterpriseTelemetryConfig(BaseSettings):
|
||||
"""
|
||||
Configuration for enterprise telemetry.
|
||||
"""
|
||||
|
||||
ENTERPRISE_TELEMETRY_ENABLED: bool = Field(
|
||||
description="Enable enterprise telemetry collection (also requires ENTERPRISE_ENABLED=true).",
|
||||
default=False,
|
||||
)
|
||||
|
||||
ENTERPRISE_OTLP_ENDPOINT: str = Field(
|
||||
description="Enterprise OTEL collector endpoint.",
|
||||
default="",
|
||||
)
|
||||
|
||||
ENTERPRISE_OTLP_HEADERS: str = Field(
|
||||
description="Auth headers for OTLP export (key=value,key2=value2).",
|
||||
default="",
|
||||
)
|
||||
|
||||
ENTERPRISE_OTLP_PROTOCOL: str = Field(
|
||||
description="OTLP protocol: 'http' or 'grpc' (default: http).",
|
||||
default="http",
|
||||
)
|
||||
|
||||
ENTERPRISE_OTLP_API_KEY: str = Field(
|
||||
description="Bearer token for enterprise OTLP export authentication.",
|
||||
default="",
|
||||
)
|
||||
|
||||
ENTERPRISE_INCLUDE_CONTENT: bool = Field(
|
||||
description="Include input/output content in traces (privacy toggle).",
|
||||
# Setting the default value to False to avoid accidentally log PII data in traces.
|
||||
default=False,
|
||||
)
|
||||
|
||||
ENTERPRISE_SERVICE_NAME: str = Field(
|
||||
description="Service name for OTEL resource.",
|
||||
default="dify",
|
||||
)
|
||||
|
||||
ENTERPRISE_OTEL_SAMPLING_RATE: float = Field(
|
||||
description="Sampling rate for enterprise traces (0.0 to 1.0, default 1.0 = 100%).",
|
||||
default=1.0,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
)
|
||||
|
||||
@ -2,6 +2,7 @@ from typing import Literal, Protocol, cast
|
||||
from urllib.parse import quote_plus, urlunparse
|
||||
|
||||
from pydantic import AliasChoices, Field
|
||||
from pydantic.types import NonNegativeInt
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
@ -70,6 +71,24 @@ class RedisPubSubConfig(BaseSettings):
|
||||
default=600,
|
||||
)
|
||||
|
||||
PUBSUB_LISTENER_JOIN_TIMEOUT_MS: NonNegativeInt = Field(
|
||||
validation_alias=AliasChoices("EVENT_BUS_LISTENER_JOIN_TIMEOUT_MS", "PUBSUB_LISTENER_JOIN_TIMEOUT_MS"),
|
||||
description=(
|
||||
"Maximum time (milliseconds) that ``Subscription.close()`` waits for its listener thread to "
|
||||
"finish before returning. Bounds the tail latency between a terminal event being delivered to "
|
||||
"an SSE client and the response stream actually closing.\n\n"
|
||||
"The listener thread blocks on a polling read (XREAD BLOCK for streams, get_message timeout "
|
||||
"for pubsub/sharded) with a fixed 1s window, so close() naturally has to wait up to ~1s for "
|
||||
"the thread to notice the subscription was closed. Setting this lower (e.g. 100) lets close() "
|
||||
"return promptly while the daemon listener thread cleans itself up on the next poll "
|
||||
"boundary - safe because the listener holds no critical state and exits within one poll "
|
||||
"window. Setting it higher (e.g. 5000) gives the listener more grace before close() gives up "
|
||||
"and logs a warning. Default 2000ms preserves the pre-change behaviour.\n\n"
|
||||
"Also accepts ENV: EVENT_BUS_LISTENER_JOIN_TIMEOUT_MS."
|
||||
),
|
||||
default=2000,
|
||||
)
|
||||
|
||||
def _build_default_pubsub_url(self) -> str:
|
||||
defaults = _redis_defaults(self)
|
||||
if not defaults.REDIS_HOST or not defaults.REDIS_PORT:
|
||||
|
||||
@ -875,10 +875,10 @@ class ToolBuiltinProviderSetDefaultApi(Resource):
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
def post(self, provider):
|
||||
current_user, current_tenant_id = current_account_with_tenant()
|
||||
_, current_tenant_id = current_account_with_tenant()
|
||||
payload = BuiltinProviderDefaultCredentialPayload.model_validate(console_ns.payload or {})
|
||||
return BuiltinToolManageService.set_default_provider(
|
||||
tenant_id=current_tenant_id, user_id=current_user.id, provider=provider, id=payload.id
|
||||
tenant_id=current_tenant_id, provider=provider, id=payload.id
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -16,12 +16,14 @@ api = ExternalApi(
|
||||
inner_api_ns = Namespace("inner_api", description="Internal API operations", path="/")
|
||||
|
||||
from . import mail as _mail
|
||||
from .app import dsl as _app_dsl
|
||||
from .plugin import plugin as _plugin
|
||||
from .workspace import workspace as _workspace
|
||||
|
||||
api.add_namespace(inner_api_ns)
|
||||
|
||||
__all__ = [
|
||||
"_app_dsl",
|
||||
"_mail",
|
||||
"_plugin",
|
||||
"_workspace",
|
||||
|
||||
1
api/controllers/inner_api/app/__init__.py
Normal file
1
api/controllers/inner_api/app/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
|
||||
110
api/controllers/inner_api/app/dsl.py
Normal file
110
api/controllers/inner_api/app/dsl.py
Normal file
@ -0,0 +1,110 @@
|
||||
"""Inner API endpoints for app DSL import/export.
|
||||
|
||||
Called by the enterprise admin-api service. Import requires ``creator_email``
|
||||
to attribute the created app; workspace/membership validation is done by the
|
||||
Go admin-api caller.
|
||||
"""
|
||||
|
||||
from flask import request
|
||||
from flask_restx import Resource
|
||||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from controllers.common.schema import register_schema_model
|
||||
from controllers.console.wraps import setup_required
|
||||
from controllers.inner_api import inner_api_ns
|
||||
from controllers.inner_api.wraps import enterprise_inner_api_only
|
||||
from extensions.ext_database import db
|
||||
from models import Account, App
|
||||
from models.account import AccountStatus
|
||||
from services.app_dsl_service import AppDslService, ImportMode, ImportStatus
|
||||
|
||||
|
||||
class InnerAppDSLImportPayload(BaseModel):
|
||||
yaml_content: str = Field(description="YAML DSL content")
|
||||
creator_email: str = Field(description="Email of the workspace member who will own the imported app")
|
||||
name: str | None = Field(default=None, description="Override app name from DSL")
|
||||
description: str | None = Field(default=None, description="Override app description from DSL")
|
||||
|
||||
|
||||
register_schema_model(inner_api_ns, InnerAppDSLImportPayload)
|
||||
|
||||
|
||||
@inner_api_ns.route("/enterprise/workspaces/<string:workspace_id>/dsl/import")
|
||||
class EnterpriseAppDSLImport(Resource):
|
||||
@setup_required
|
||||
@enterprise_inner_api_only
|
||||
@inner_api_ns.doc("enterprise_app_dsl_import")
|
||||
@inner_api_ns.expect(inner_api_ns.models[InnerAppDSLImportPayload.__name__])
|
||||
@inner_api_ns.doc(
|
||||
responses={
|
||||
200: "Import completed",
|
||||
202: "Import pending (DSL version mismatch requires confirmation)",
|
||||
400: "Import failed (business error)",
|
||||
404: "Creator account not found or inactive",
|
||||
}
|
||||
)
|
||||
def post(self, workspace_id: str):
|
||||
"""Import a DSL into a workspace on behalf of a specified creator."""
|
||||
args = InnerAppDSLImportPayload.model_validate(inner_api_ns.payload or {})
|
||||
|
||||
account = _get_active_account(args.creator_email)
|
||||
if account is None:
|
||||
return {"message": f"account '{args.creator_email}' not found or inactive"}, 404
|
||||
|
||||
account.set_tenant_id(workspace_id)
|
||||
|
||||
with Session(db.engine) as session:
|
||||
dsl_service = AppDslService(session)
|
||||
result = dsl_service.import_app(
|
||||
account=account,
|
||||
import_mode=ImportMode.YAML_CONTENT,
|
||||
yaml_content=args.yaml_content,
|
||||
name=args.name,
|
||||
description=args.description,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
if result.status == ImportStatus.FAILED:
|
||||
return result.model_dump(mode="json"), 400
|
||||
if result.status == ImportStatus.PENDING:
|
||||
return result.model_dump(mode="json"), 202
|
||||
return result.model_dump(mode="json"), 200
|
||||
|
||||
|
||||
@inner_api_ns.route("/enterprise/apps/<string:app_id>/dsl")
|
||||
class EnterpriseAppDSLExport(Resource):
|
||||
@setup_required
|
||||
@enterprise_inner_api_only
|
||||
@inner_api_ns.doc(
|
||||
"enterprise_app_dsl_export",
|
||||
responses={
|
||||
200: "Export successful",
|
||||
404: "App not found",
|
||||
},
|
||||
)
|
||||
def get(self, app_id: str):
|
||||
"""Export an app's DSL as YAML."""
|
||||
include_secret = request.args.get("include_secret", "false").lower() == "true"
|
||||
|
||||
app_model = db.session.query(App).filter_by(id=app_id).first()
|
||||
if not app_model:
|
||||
return {"message": "app not found"}, 404
|
||||
|
||||
data = AppDslService.export_dsl(
|
||||
app_model=app_model,
|
||||
include_secret=include_secret,
|
||||
)
|
||||
|
||||
return {"data": data}, 200
|
||||
|
||||
|
||||
def _get_active_account(email: str) -> Account | None:
|
||||
"""Look up an active account by email.
|
||||
|
||||
Workspace membership is already validated by the Go admin-api caller.
|
||||
"""
|
||||
account = db.session.query(Account).filter_by(email=email).first()
|
||||
if account is None or account.status != AccountStatus.ACTIVE:
|
||||
return None
|
||||
return account
|
||||
@ -5,7 +5,7 @@ import logging
|
||||
import threading
|
||||
import uuid
|
||||
from collections.abc import Generator, Mapping, Sequence
|
||||
from typing import TYPE_CHECKING, Any, Literal, Union, overload
|
||||
from typing import TYPE_CHECKING, Any, Literal, TypeVar, Union, overload
|
||||
|
||||
from flask import Flask, current_app
|
||||
from pydantic import ValidationError
|
||||
@ -47,6 +47,7 @@ from extensions.ext_database import db
|
||||
from factories import file_factory
|
||||
from libs.flask_utils import preserve_flask_contexts
|
||||
from models import Account, App, Conversation, EndUser, Message, Workflow, WorkflowNodeExecutionTriggeredFrom
|
||||
from models.base import Base
|
||||
from models.enums import WorkflowRunTriggeredFrom
|
||||
from services.conversation_service import ConversationService
|
||||
from services.workflow_draft_variable_service import (
|
||||
@ -521,10 +522,8 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
|
||||
|
||||
# release database connection, because the following new thread operations may take a long time
|
||||
with Session(bind=db.engine, expire_on_commit=False) as session:
|
||||
workflow = _refresh_model(session=session, model=workflow)
|
||||
message = _refresh_model(session=session, model=message)
|
||||
if message is None:
|
||||
raise RuntimeError("Failed to refresh Message; _refresh_model returned None.")
|
||||
workflow = _refresh_model(session, workflow)
|
||||
message = _refresh_model(session, message)
|
||||
# workflow_ = session.get(Workflow, workflow.id)
|
||||
# assert workflow_ is not None
|
||||
# workflow = workflow_
|
||||
@ -691,21 +690,11 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
|
||||
raise e
|
||||
|
||||
|
||||
@overload
|
||||
def _refresh_model(*, session: Session | None = None, model: Workflow) -> Workflow: ...
|
||||
_T = TypeVar("_T", bound=Base)
|
||||
|
||||
|
||||
@overload
|
||||
def _refresh_model(*, session: Session | None = None, model: Message) -> Message: ...
|
||||
|
||||
|
||||
def _refresh_model(*, session: Session | None = None, model: Any) -> Any:
|
||||
if session is not None:
|
||||
detached_model = session.get(type(model), model.id)
|
||||
assert detached_model is not None
|
||||
return detached_model
|
||||
|
||||
with Session(bind=db.engine, expire_on_commit=False) as refresh_session:
|
||||
detached_model = refresh_session.get(type(model), model.id)
|
||||
assert detached_model is not None
|
||||
return detached_model
|
||||
def _refresh_model(session, model: _T) -> _T:
|
||||
with Session(bind=db.engine, expire_on_commit=False) as session:
|
||||
detach_model = session.get(type(model), model.id)
|
||||
assert detach_model is not None
|
||||
return detach_model
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from collections.abc import Generator, Iterator
|
||||
from collections.abc import Generator
|
||||
from typing import Any, cast
|
||||
|
||||
from core.app.apps.base_app_generate_response_converter import AppGenerateResponseConverter
|
||||
@ -56,8 +56,8 @@ class AdvancedChatAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
|
||||
@classmethod
|
||||
def convert_stream_full_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, Any, None]:
|
||||
"""
|
||||
Convert stream full response.
|
||||
:param stream_response: stream response
|
||||
@ -87,8 +87,8 @@ class AdvancedChatAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
|
||||
@classmethod
|
||||
def convert_stream_simple_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, Any, None]:
|
||||
"""
|
||||
Convert stream simple response.
|
||||
:param stream_response: stream response
|
||||
|
||||
@ -7,7 +7,7 @@ from contextlib import contextmanager
|
||||
from threading import Thread
|
||||
from typing import Any, Union
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import select, update
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME
|
||||
@ -311,11 +311,9 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
|
||||
self._workflow_run_id = run_id
|
||||
|
||||
with self._database_session() as session:
|
||||
message = self._get_message(session=session)
|
||||
if not message:
|
||||
raise ValueError(f"Message not found: {self._message_id}")
|
||||
|
||||
message.workflow_run_id = run_id
|
||||
session.execute(
|
||||
update(Message).where(Message.id == self._message_id).values(workflow_run_id=run_id)
|
||||
)
|
||||
|
||||
workflow_start_resp = self._workflow_response_converter.workflow_start_to_stream_response(
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from collections.abc import Generator, Iterator
|
||||
from collections.abc import Generator
|
||||
from typing import cast
|
||||
|
||||
from core.app.apps.base_app_generate_response_converter import AppGenerateResponseConverter
|
||||
@ -55,7 +55,7 @@ class AgentChatAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
|
||||
@classmethod
|
||||
def convert_stream_full_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
"""
|
||||
Convert stream full response.
|
||||
@ -86,7 +86,7 @@ class AgentChatAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
|
||||
@classmethod
|
||||
def convert_stream_simple_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
"""
|
||||
Convert stream simple response.
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Generator, Iterator, Mapping
|
||||
from typing import Any
|
||||
from collections.abc import Generator, Mapping
|
||||
from typing import Any, Union
|
||||
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||
from core.app.entities.task_entities import AppBlockingResponse, AppStreamResponse
|
||||
@ -16,26 +16,24 @@ class AppGenerateResponseConverter(ABC):
|
||||
|
||||
@classmethod
|
||||
def convert(
|
||||
cls, response: AppBlockingResponse | Iterator[AppStreamResponse], invoke_from: InvokeFrom
|
||||
) -> Mapping[str, Any] | Generator[str | Mapping[str, Any], None, None]:
|
||||
cls, response: Union[AppBlockingResponse, Generator[AppStreamResponse, Any, None]], invoke_from: InvokeFrom
|
||||
) -> Mapping[str, Any] | Generator[str | Mapping[str, Any], Any, None]:
|
||||
if invoke_from in {InvokeFrom.DEBUGGER, InvokeFrom.SERVICE_API}:
|
||||
if isinstance(response, AppBlockingResponse):
|
||||
return cls.convert_blocking_full_response(response)
|
||||
else:
|
||||
stream_response = response
|
||||
|
||||
def _generate_full_response() -> Generator[dict[str, Any] | str, None, None]:
|
||||
yield from cls.convert_stream_full_response(stream_response)
|
||||
def _generate_full_response() -> Generator[dict | str, Any, None]:
|
||||
yield from cls.convert_stream_full_response(response)
|
||||
|
||||
return _generate_full_response()
|
||||
else:
|
||||
if isinstance(response, AppBlockingResponse):
|
||||
return cls.convert_blocking_simple_response(response)
|
||||
else:
|
||||
stream_response = response
|
||||
|
||||
def _generate_simple_response() -> Generator[dict[str, Any] | str, None, None]:
|
||||
yield from cls.convert_stream_simple_response(stream_response)
|
||||
def _generate_simple_response() -> Generator[dict | str, Any, None]:
|
||||
yield from cls.convert_stream_simple_response(response)
|
||||
|
||||
return _generate_simple_response()
|
||||
|
||||
@ -52,14 +50,14 @@ class AppGenerateResponseConverter(ABC):
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def convert_stream_full_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def convert_stream_simple_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@ -224,7 +224,6 @@ class BaseAppGenerator:
|
||||
def _get_draft_var_saver_factory(invoke_from: InvokeFrom, account: Account | EndUser) -> DraftVariableSaverFactory:
|
||||
if invoke_from == InvokeFrom.DEBUGGER:
|
||||
assert isinstance(account, Account)
|
||||
debug_account = account
|
||||
|
||||
def draft_var_saver_factory(
|
||||
session: Session,
|
||||
@ -241,7 +240,7 @@ class BaseAppGenerator:
|
||||
node_type=node_type,
|
||||
node_execution_id=node_execution_id,
|
||||
enclosing_node_id=enclosing_node_id,
|
||||
user=debug_account,
|
||||
user=account,
|
||||
)
|
||||
else:
|
||||
|
||||
|
||||
@ -166,19 +166,15 @@ class ChatAppGenerator(MessageBasedAppGenerator):
|
||||
|
||||
# init generate records
|
||||
(conversation, message) = self._init_generate_records(application_generate_entity, conversation)
|
||||
if conversation is None or message is None:
|
||||
raise RuntimeError("_init_generate_records() returned None for conversation or message")
|
||||
generated_conversation_id = str(conversation.id)
|
||||
generated_message_id = str(message.id)
|
||||
|
||||
# init queue manager
|
||||
queue_manager = MessageBasedAppQueueManager(
|
||||
task_id=application_generate_entity.task_id,
|
||||
user_id=application_generate_entity.user_id,
|
||||
invoke_from=application_generate_entity.invoke_from,
|
||||
conversation_id=generated_conversation_id,
|
||||
conversation_id=conversation.id,
|
||||
app_mode=conversation.mode,
|
||||
message_id=generated_message_id,
|
||||
message_id=message.id,
|
||||
)
|
||||
|
||||
# new thread with request context
|
||||
@ -188,8 +184,8 @@ class ChatAppGenerator(MessageBasedAppGenerator):
|
||||
flask_app=current_app._get_current_object(), # type: ignore
|
||||
application_generate_entity=application_generate_entity,
|
||||
queue_manager=queue_manager,
|
||||
conversation_id=generated_conversation_id,
|
||||
message_id=generated_message_id,
|
||||
conversation_id=conversation.id,
|
||||
message_id=message.id,
|
||||
)
|
||||
|
||||
worker_thread = threading.Thread(target=worker_with_context)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from collections.abc import Generator, Iterator
|
||||
from collections.abc import Generator
|
||||
from typing import cast
|
||||
|
||||
from core.app.apps.base_app_generate_response_converter import AppGenerateResponseConverter
|
||||
@ -55,7 +55,7 @@ class ChatAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
|
||||
@classmethod
|
||||
def convert_stream_full_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
"""
|
||||
Convert stream full response.
|
||||
@ -86,7 +86,7 @@ class ChatAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
|
||||
@classmethod
|
||||
def convert_stream_simple_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
"""
|
||||
Convert stream simple response.
|
||||
|
||||
@ -149,11 +149,6 @@ class CompletionAppGenerator(MessageBasedAppGenerator):
|
||||
|
||||
# init generate records
|
||||
(conversation, message) = self._init_generate_records(application_generate_entity)
|
||||
if conversation is None or message is None:
|
||||
raise RuntimeError(
|
||||
"_init_generate_records() returned None for conversation or message, "
|
||||
"which is required to proceed with generation."
|
||||
)
|
||||
|
||||
# init queue manager
|
||||
queue_manager = MessageBasedAppQueueManager(
|
||||
@ -317,19 +312,15 @@ class CompletionAppGenerator(MessageBasedAppGenerator):
|
||||
|
||||
# init generate records
|
||||
(conversation, message) = self._init_generate_records(application_generate_entity)
|
||||
assert conversation is not None
|
||||
assert message is not None
|
||||
conversation_id = str(conversation.id)
|
||||
message_id = str(message.id)
|
||||
|
||||
# init queue manager
|
||||
queue_manager = MessageBasedAppQueueManager(
|
||||
task_id=application_generate_entity.task_id,
|
||||
user_id=application_generate_entity.user_id,
|
||||
invoke_from=application_generate_entity.invoke_from,
|
||||
conversation_id=conversation_id,
|
||||
conversation_id=conversation.id,
|
||||
app_mode=conversation.mode,
|
||||
message_id=message_id,
|
||||
message_id=message.id,
|
||||
)
|
||||
|
||||
# new thread with request context
|
||||
@ -339,7 +330,7 @@ class CompletionAppGenerator(MessageBasedAppGenerator):
|
||||
flask_app=current_app._get_current_object(), # type: ignore
|
||||
application_generate_entity=application_generate_entity,
|
||||
queue_manager=queue_manager,
|
||||
message_id=message_id,
|
||||
message_id=message.id,
|
||||
)
|
||||
|
||||
worker_thread = threading.Thread(target=worker_with_context)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from collections.abc import Generator, Iterator
|
||||
from collections.abc import Generator
|
||||
from typing import cast
|
||||
|
||||
from core.app.apps.base_app_generate_response_converter import AppGenerateResponseConverter
|
||||
@ -54,7 +54,7 @@ class CompletionAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
|
||||
@classmethod
|
||||
def convert_stream_full_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
"""
|
||||
Convert stream full response.
|
||||
@ -84,7 +84,7 @@ class CompletionAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
|
||||
@classmethod
|
||||
def convert_stream_simple_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
"""
|
||||
Convert stream simple response.
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from collections.abc import Generator, Iterator
|
||||
from collections.abc import Generator
|
||||
from typing import cast
|
||||
|
||||
from core.app.apps.base_app_generate_response_converter import AppGenerateResponseConverter
|
||||
@ -36,7 +36,7 @@ class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
|
||||
@classmethod
|
||||
def convert_stream_full_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
"""
|
||||
Convert stream full response.
|
||||
@ -65,7 +65,7 @@ class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
|
||||
@classmethod
|
||||
def convert_stream_simple_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
"""
|
||||
Convert stream simple response.
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from collections.abc import Generator, Iterator
|
||||
from collections.abc import Generator
|
||||
from typing import cast
|
||||
|
||||
from core.app.apps.base_app_generate_response_converter import AppGenerateResponseConverter
|
||||
@ -36,7 +36,7 @@ class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
|
||||
@classmethod
|
||||
def convert_stream_full_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
"""
|
||||
Convert stream full response.
|
||||
@ -65,7 +65,7 @@ class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
|
||||
|
||||
@classmethod
|
||||
def convert_stream_simple_response(
|
||||
cls, stream_response: Iterator[AppStreamResponse]
|
||||
cls, stream_response: Generator[AppStreamResponse, None, None]
|
||||
) -> Generator[dict | str, None, None]:
|
||||
"""
|
||||
Convert stream simple response.
|
||||
|
||||
@ -1,17 +1,13 @@
|
||||
import logging
|
||||
import time
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import Protocol, TypeAlias
|
||||
from typing import Any, cast
|
||||
|
||||
from pydantic import ValidationError
|
||||
|
||||
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
|
||||
from core.app.entities.agent_strategy import AgentStrategyInfo
|
||||
from core.app.entities.app_invoke_entities import (
|
||||
InvokeFrom,
|
||||
UserFrom,
|
||||
build_dify_run_context,
|
||||
)
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom, build_dify_run_context
|
||||
from core.app.entities.queue_entities import (
|
||||
AppQueueEvent,
|
||||
QueueAgentLogEvent,
|
||||
@ -40,7 +36,7 @@ from core.rag.entities.citation_metadata import RetrievalSourceMetadata
|
||||
from core.workflow.node_factory import DifyNodeFactory, get_default_root_node_id, resolve_workflow_node_class
|
||||
from core.workflow.workflow_entry import WorkflowEntry
|
||||
from dify_graph.entities import GraphInitParams
|
||||
from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter
|
||||
from dify_graph.entities.graph_config import NodeConfigDictAdapter
|
||||
from dify_graph.entities.pause_reason import HumanInputRequired
|
||||
from dify_graph.graph import Graph
|
||||
from dify_graph.graph_engine.layers.base import GraphEngineLayer
|
||||
@ -79,14 +75,6 @@ from tasks.mail_human_input_delivery_task import dispatch_human_input_email_task
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
GraphConfigObject: TypeAlias = dict[str, object]
|
||||
GraphConfigMapping: TypeAlias = Mapping[str, object]
|
||||
|
||||
|
||||
class SingleNodeRunEntity(Protocol):
|
||||
node_id: str
|
||||
inputs: Mapping[str, object]
|
||||
|
||||
|
||||
class WorkflowBasedAppRunner:
|
||||
def __init__(
|
||||
@ -110,7 +98,7 @@ class WorkflowBasedAppRunner:
|
||||
|
||||
def _init_graph(
|
||||
self,
|
||||
graph_config: GraphConfigMapping,
|
||||
graph_config: Mapping[str, Any],
|
||||
graph_runtime_state: GraphRuntimeState,
|
||||
user_from: UserFrom,
|
||||
invoke_from: InvokeFrom,
|
||||
@ -166,8 +154,8 @@ class WorkflowBasedAppRunner:
|
||||
def _prepare_single_node_execution(
|
||||
self,
|
||||
workflow: Workflow,
|
||||
single_iteration_run: SingleNodeRunEntity | None = None,
|
||||
single_loop_run: SingleNodeRunEntity | None = None,
|
||||
single_iteration_run: Any | None = None,
|
||||
single_loop_run: Any | None = None,
|
||||
) -> tuple[Graph, VariablePool, GraphRuntimeState]:
|
||||
"""
|
||||
Prepare graph, variable pool, and runtime state for single node execution
|
||||
@ -220,88 +208,11 @@ class WorkflowBasedAppRunner:
|
||||
# This ensures all nodes in the graph reference the same GraphRuntimeState instance
|
||||
return graph, variable_pool, graph_runtime_state
|
||||
|
||||
@staticmethod
|
||||
def _get_graph_items(graph_config: GraphConfigMapping) -> tuple[list[GraphConfigMapping], list[GraphConfigMapping]]:
|
||||
nodes = graph_config.get("nodes")
|
||||
edges = graph_config.get("edges")
|
||||
if not isinstance(nodes, list):
|
||||
raise ValueError("nodes in workflow graph must be a list")
|
||||
if not isinstance(edges, list):
|
||||
raise ValueError("edges in workflow graph must be a list")
|
||||
|
||||
validated_nodes: list[GraphConfigMapping] = []
|
||||
for node in nodes:
|
||||
if not isinstance(node, Mapping):
|
||||
raise ValueError("nodes in workflow graph must be mappings")
|
||||
validated_nodes.append(node)
|
||||
|
||||
validated_edges: list[GraphConfigMapping] = []
|
||||
for edge in edges:
|
||||
if not isinstance(edge, Mapping):
|
||||
raise ValueError("edges in workflow graph must be mappings")
|
||||
validated_edges.append(edge)
|
||||
|
||||
return validated_nodes, validated_edges
|
||||
|
||||
@staticmethod
|
||||
def _extract_start_node_id(node_config: GraphConfigMapping | None) -> str | None:
|
||||
if node_config is None:
|
||||
return None
|
||||
node_data = node_config.get("data")
|
||||
if not isinstance(node_data, Mapping):
|
||||
return None
|
||||
start_node_id = node_data.get("start_node_id")
|
||||
return start_node_id if isinstance(start_node_id, str) else None
|
||||
|
||||
@classmethod
|
||||
def _build_single_node_graph_config(
|
||||
cls,
|
||||
*,
|
||||
graph_config: GraphConfigMapping,
|
||||
node_id: str,
|
||||
node_type_filter_key: str,
|
||||
) -> tuple[GraphConfigObject, NodeConfigDict]:
|
||||
node_configs, edge_configs = cls._get_graph_items(graph_config)
|
||||
main_node_config = next((node for node in node_configs if node.get("id") == node_id), None)
|
||||
start_node_id = cls._extract_start_node_id(main_node_config)
|
||||
|
||||
filtered_node_configs = [
|
||||
dict(node)
|
||||
for node in node_configs
|
||||
if node.get("id") == node_id
|
||||
or (isinstance(node_data := node.get("data"), Mapping) and node_data.get(node_type_filter_key) == node_id)
|
||||
or (start_node_id and node.get("id") == start_node_id)
|
||||
]
|
||||
if not filtered_node_configs:
|
||||
raise ValueError(f"node id {node_id} not found in workflow graph")
|
||||
|
||||
filtered_node_ids = {
|
||||
str(node_id_value) for node in filtered_node_configs if isinstance((node_id_value := node.get("id")), str)
|
||||
}
|
||||
filtered_edge_configs = [
|
||||
dict(edge)
|
||||
for edge in edge_configs
|
||||
if (edge.get("source") is None or edge.get("source") in filtered_node_ids)
|
||||
and (edge.get("target") is None or edge.get("target") in filtered_node_ids)
|
||||
]
|
||||
|
||||
target_node_config = next((node for node in filtered_node_configs if node.get("id") == node_id), None)
|
||||
if target_node_config is None:
|
||||
raise ValueError(f"node id {node_id} not found in workflow graph")
|
||||
|
||||
return (
|
||||
{
|
||||
"nodes": filtered_node_configs,
|
||||
"edges": filtered_edge_configs,
|
||||
},
|
||||
NodeConfigDictAdapter.validate_python(target_node_config),
|
||||
)
|
||||
|
||||
def _get_graph_and_variable_pool_for_single_node_run(
|
||||
self,
|
||||
workflow: Workflow,
|
||||
node_id: str,
|
||||
user_inputs: Mapping[str, object],
|
||||
user_inputs: dict[str, Any],
|
||||
graph_runtime_state: GraphRuntimeState,
|
||||
node_type_filter_key: str, # 'iteration_id' or 'loop_id'
|
||||
node_type_label: str = "node", # 'iteration' or 'loop' for error messages
|
||||
@ -325,14 +236,41 @@ class WorkflowBasedAppRunner:
|
||||
if not graph_config:
|
||||
raise ValueError("workflow graph not found")
|
||||
|
||||
graph_config = cast(dict[str, Any], graph_config)
|
||||
|
||||
if "nodes" not in graph_config or "edges" not in graph_config:
|
||||
raise ValueError("nodes or edges not found in workflow graph")
|
||||
|
||||
graph_config, target_node_config = self._build_single_node_graph_config(
|
||||
graph_config=graph_config,
|
||||
node_id=node_id,
|
||||
node_type_filter_key=node_type_filter_key,
|
||||
)
|
||||
if not isinstance(graph_config.get("nodes"), list):
|
||||
raise ValueError("nodes in workflow graph must be a list")
|
||||
|
||||
if not isinstance(graph_config.get("edges"), list):
|
||||
raise ValueError("edges in workflow graph must be a list")
|
||||
|
||||
# filter nodes only in the specified node type (iteration or loop)
|
||||
main_node_config = next((n for n in graph_config.get("nodes", []) if n.get("id") == node_id), None)
|
||||
start_node_id = main_node_config.get("data", {}).get("start_node_id") if main_node_config else None
|
||||
node_configs = [
|
||||
node
|
||||
for node in graph_config.get("nodes", [])
|
||||
if node.get("id") == node_id
|
||||
or node.get("data", {}).get(node_type_filter_key, "") == node_id
|
||||
or (start_node_id and node.get("id") == start_node_id)
|
||||
]
|
||||
|
||||
graph_config["nodes"] = node_configs
|
||||
|
||||
node_ids = [node.get("id") for node in node_configs]
|
||||
|
||||
# filter edges only in the specified node type
|
||||
edge_configs = [
|
||||
edge
|
||||
for edge in graph_config.get("edges", [])
|
||||
if (edge.get("source") is None or edge.get("source") in node_ids)
|
||||
and (edge.get("target") is None or edge.get("target") in node_ids)
|
||||
]
|
||||
|
||||
graph_config["edges"] = edge_configs
|
||||
|
||||
# Create required parameters for Graph.init
|
||||
graph_init_params = GraphInitParams(
|
||||
@ -361,6 +299,18 @@ class WorkflowBasedAppRunner:
|
||||
if not graph:
|
||||
raise ValueError("graph not found in workflow")
|
||||
|
||||
# fetch node config from node id
|
||||
target_node_config = None
|
||||
for node in node_configs:
|
||||
if node.get("id") == node_id:
|
||||
target_node_config = node
|
||||
break
|
||||
|
||||
if not target_node_config:
|
||||
raise ValueError(f"{node_type_label} node id not found in workflow graph")
|
||||
|
||||
target_node_config = NodeConfigDictAdapter.validate_python(target_node_config)
|
||||
|
||||
# Get node class
|
||||
node_type = target_node_config["data"].type
|
||||
node_version = str(target_node_config["data"].version)
|
||||
|
||||
@ -213,7 +213,7 @@ class AdvancedChatAppGenerateEntity(ConversationAppGenerateEntity):
|
||||
"""
|
||||
|
||||
node_id: str
|
||||
inputs: Mapping[str, object]
|
||||
inputs: Mapping
|
||||
|
||||
single_iteration_run: SingleIterationRunEntity | None = None
|
||||
|
||||
@ -223,7 +223,7 @@ class AdvancedChatAppGenerateEntity(ConversationAppGenerateEntity):
|
||||
"""
|
||||
|
||||
node_id: str
|
||||
inputs: Mapping[str, object]
|
||||
inputs: Mapping
|
||||
|
||||
single_loop_run: SingleLoopRunEntity | None = None
|
||||
|
||||
@ -243,7 +243,7 @@ class WorkflowAppGenerateEntity(AppGenerateEntity):
|
||||
"""
|
||||
|
||||
node_id: str
|
||||
inputs: Mapping[str, object]
|
||||
inputs: dict
|
||||
|
||||
single_iteration_run: SingleIterationRunEntity | None = None
|
||||
|
||||
@ -253,7 +253,7 @@ class WorkflowAppGenerateEntity(AppGenerateEntity):
|
||||
"""
|
||||
|
||||
node_id: str
|
||||
inputs: Mapping[str, object]
|
||||
inputs: dict
|
||||
|
||||
single_loop_run: SingleLoopRunEntity | None = None
|
||||
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from copy import deepcopy
|
||||
from typing import Any
|
||||
|
||||
from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
|
||||
@ -15,12 +16,17 @@ from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory
|
||||
class DifyCredentialsProvider:
|
||||
tenant_id: str
|
||||
provider_manager: ProviderManager
|
||||
credentials_cache: dict[tuple[str, str], dict[str, Any]]
|
||||
|
||||
def __init__(self, tenant_id: str, provider_manager: ProviderManager | None = None) -> None:
|
||||
self.tenant_id = tenant_id
|
||||
self.provider_manager = provider_manager or ProviderManager()
|
||||
self.credentials_cache = {}
|
||||
|
||||
def fetch(self, provider_name: str, model_name: str) -> dict[str, Any]:
|
||||
if (provider_name, model_name) in self.credentials_cache:
|
||||
return deepcopy(self.credentials_cache[(provider_name, model_name)])
|
||||
|
||||
provider_configurations = self.provider_manager.get_configurations(self.tenant_id)
|
||||
provider_configuration = provider_configurations.get(provider_name)
|
||||
if not provider_configuration:
|
||||
@ -35,6 +41,7 @@ class DifyCredentialsProvider:
|
||||
if credentials is None:
|
||||
raise ProviderTokenNotInitError(f"Model {model_name} credentials is not initialized.")
|
||||
|
||||
self.credentials_cache[(provider_name, model_name)] = deepcopy(credentials)
|
||||
return credentials
|
||||
|
||||
|
||||
@ -44,7 +51,7 @@ class DifyModelFactory:
|
||||
|
||||
def __init__(self, tenant_id: str, model_manager: ModelManager | None = None) -> None:
|
||||
self.tenant_id = tenant_id
|
||||
self.model_manager = model_manager or ModelManager()
|
||||
self.model_manager = model_manager or ModelManager(enable_credentials_cache=True)
|
||||
|
||||
def init_model_instance(self, provider_name: str, model_name: str) -> ModelInstance:
|
||||
return self.model_manager.get_model_instance(
|
||||
@ -56,9 +63,13 @@ class DifyModelFactory:
|
||||
|
||||
|
||||
def build_dify_model_access(tenant_id: str) -> tuple[CredentialsProvider, ModelFactory]:
|
||||
provider_manager = ProviderManager()
|
||||
return (
|
||||
DifyCredentialsProvider(tenant_id=tenant_id),
|
||||
DifyModelFactory(tenant_id=tenant_id),
|
||||
DifyCredentialsProvider(tenant_id=tenant_id, provider_manager=provider_manager),
|
||||
DifyModelFactory(
|
||||
tenant_id=tenant_id,
|
||||
model_manager=ModelManager(enable_credentials_cache=True, provider_manager=provider_manager),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -9,6 +9,7 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from configs import dify_config
|
||||
from constants import HIDDEN_VALUE
|
||||
from core.entities.model_entities import ModelStatus, ModelWithProviderEntity, SimpleModelProviderEntity
|
||||
from core.entities.provider_entities import (
|
||||
@ -144,9 +145,9 @@ class ProviderConfiguration(BaseModel):
|
||||
current_credential_id = self.custom_configuration.provider.current_credential_id
|
||||
|
||||
if current_credential_id:
|
||||
from core.helper.credential_utils import check_credential_policy_compliance
|
||||
from core.helper.credential_utils import runtime_check_credential_policy_compliance
|
||||
|
||||
check_credential_policy_compliance(
|
||||
runtime_check_credential_policy_compliance(
|
||||
credential_id=current_credential_id,
|
||||
provider=self.provider.provider,
|
||||
credential_type=PluginCredentialType.MODEL,
|
||||
@ -155,9 +156,9 @@ class ProviderConfiguration(BaseModel):
|
||||
# no current credential id, check all available credentials
|
||||
if self.custom_configuration.provider:
|
||||
for credential_configuration in self.custom_configuration.provider.available_credentials:
|
||||
from core.helper.credential_utils import check_credential_policy_compliance
|
||||
from core.helper.credential_utils import runtime_check_credential_policy_compliance
|
||||
|
||||
check_credential_policy_compliance(
|
||||
runtime_check_credential_policy_compliance(
|
||||
credential_id=credential_configuration.credential_id,
|
||||
provider=self.provider.provider,
|
||||
credential_type=PluginCredentialType.MODEL,
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
Credential utility functions for checking credential existence and policy compliance.
|
||||
"""
|
||||
|
||||
from configs import dify_config
|
||||
from services.enterprise.plugin_manager_service import PluginCredentialType
|
||||
|
||||
|
||||
@ -39,6 +40,14 @@ def is_credential_exists(credential_id: str, credential_type: "PluginCredentialT
|
||||
return False
|
||||
|
||||
|
||||
def runtime_check_credential_policy_compliance(
|
||||
*args, **kwargs
|
||||
) -> None:
|
||||
if dify_config.ENTERPRISE_DISABLE_RUNTIME_CREDENTIAL_CHECK:
|
||||
return
|
||||
check_credential_policy_compliance(*args, **kwargs)
|
||||
|
||||
|
||||
def check_credential_policy_compliance(
|
||||
credential_id: str, provider: str, credential_type: "PluginCredentialType", check_existence: bool = True
|
||||
) -> None:
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import logging
|
||||
from collections.abc import Callable, Generator, Iterable, Mapping, Sequence
|
||||
from copy import deepcopy
|
||||
from typing import IO, Any, Literal, Optional, Union, cast, overload
|
||||
|
||||
from configs import dify_config
|
||||
@ -33,11 +34,13 @@ class ModelInstance:
|
||||
Model instance class
|
||||
"""
|
||||
|
||||
def __init__(self, provider_model_bundle: ProviderModelBundle, model: str):
|
||||
def __init__(self, provider_model_bundle: ProviderModelBundle, model: str, credentials: dict | None = None):
|
||||
self.provider_model_bundle = provider_model_bundle
|
||||
self.model_name = model
|
||||
self.provider = provider_model_bundle.configuration.provider.provider
|
||||
self.credentials = self._fetch_credentials_from_bundle(provider_model_bundle, model)
|
||||
if credentials is None:
|
||||
credentials = self._fetch_credentials_from_bundle(provider_model_bundle, model)
|
||||
self.credentials = credentials
|
||||
# Runtime LLM invocation fields.
|
||||
self.parameters: Mapping[str, Any] = {}
|
||||
self.stop: Sequence[str] = ()
|
||||
@ -430,10 +433,10 @@ class ModelInstance:
|
||||
|
||||
# Additional policy compliance check as fallback (in case fetch_next didn't catch it)
|
||||
try:
|
||||
from core.helper.credential_utils import check_credential_policy_compliance
|
||||
from core.helper.credential_utils import runtime_check_credential_policy_compliance
|
||||
|
||||
if lb_config.credential_id:
|
||||
check_credential_policy_compliance(
|
||||
runtime_check_credential_policy_compliance(
|
||||
credential_id=lb_config.credential_id,
|
||||
provider=self.provider,
|
||||
credential_type=PluginCredentialType.MODEL,
|
||||
@ -477,8 +480,10 @@ class ModelInstance:
|
||||
|
||||
|
||||
class ModelManager:
|
||||
def __init__(self):
|
||||
self._provider_manager = ProviderManager()
|
||||
def __init__(self, enable_credentials_cache: bool = False, provider_manager: ProviderManager | None = None):
|
||||
self._provider_manager = provider_manager or ProviderManager()
|
||||
self._credentials_cache: dict[tuple[str, str, str, str], Any] = {}
|
||||
self._enable_credentials_cache = enable_credentials_cache
|
||||
|
||||
def get_model_instance(self, tenant_id: str, provider: str, model_type: ModelType, model: str) -> ModelInstance:
|
||||
"""
|
||||
@ -496,7 +501,19 @@ class ModelManager:
|
||||
tenant_id=tenant_id, provider=provider, model_type=model_type
|
||||
)
|
||||
|
||||
return ModelInstance(provider_model_bundle, model)
|
||||
cred_cache_key = (tenant_id, provider, model_type.value, model)
|
||||
|
||||
if cred_cache_key in self._credentials_cache:
|
||||
return ModelInstance(
|
||||
provider_model_bundle,
|
||||
model,
|
||||
deepcopy(self._credentials_cache[cred_cache_key]),
|
||||
)
|
||||
|
||||
ret = ModelInstance(provider_model_bundle, model)
|
||||
if self._enable_credentials_cache:
|
||||
self._credentials_cache[cred_cache_key] = deepcopy(ret.credentials)
|
||||
return ret
|
||||
|
||||
def get_default_provider_model_name(self, tenant_id: str, model_type: ModelType) -> tuple[str | None, str | None]:
|
||||
"""
|
||||
@ -626,10 +643,10 @@ class LBModelManager:
|
||||
|
||||
# Check policy compliance for the selected configuration
|
||||
try:
|
||||
from core.helper.credential_utils import check_credential_policy_compliance
|
||||
from core.helper.credential_utils import runtime_check_credential_policy_compliance
|
||||
|
||||
if config.credential_id:
|
||||
check_credential_policy_compliance(
|
||||
runtime_check_credential_policy_compliance(
|
||||
credential_id=config.credential_id,
|
||||
provider=self._provider,
|
||||
credential_type=PluginCredentialType.MODEL,
|
||||
|
||||
@ -9,8 +9,8 @@ from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
|
||||
class BaseTraceInfo(BaseModel):
|
||||
message_id: str | None = None
|
||||
message_data: Any | None = None
|
||||
inputs: Union[str, dict[str, Any], list] | None = None
|
||||
outputs: Union[str, dict[str, Any], list] | None = None
|
||||
inputs: Union[str, dict[str, Any], list[Any]] | None = None
|
||||
outputs: Union[str, dict[str, Any], list[Any]] | None = None
|
||||
start_time: datetime | None = None
|
||||
end_time: datetime | None = None
|
||||
metadata: dict[str, Any]
|
||||
@ -18,7 +18,7 @@ class BaseTraceInfo(BaseModel):
|
||||
|
||||
@field_validator("inputs", "outputs")
|
||||
@classmethod
|
||||
def ensure_type(cls, v):
|
||||
def ensure_type(cls, v: str | dict[str, Any] | list[Any] | None) -> str | dict[str, Any] | list[Any] | None:
|
||||
if v is None:
|
||||
return None
|
||||
if isinstance(v, str | dict | list):
|
||||
@ -27,6 +27,48 @@ class BaseTraceInfo(BaseModel):
|
||||
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
@property
|
||||
def resolved_trace_id(self) -> str | None:
|
||||
"""Get trace_id with intelligent fallback.
|
||||
|
||||
Priority:
|
||||
1. External trace_id (from X-Trace-Id header)
|
||||
2. workflow_run_id (if this trace type has it)
|
||||
3. message_id (as final fallback)
|
||||
"""
|
||||
if self.trace_id:
|
||||
return self.trace_id
|
||||
|
||||
# Try workflow_run_id (only exists on workflow-related traces)
|
||||
workflow_run_id = getattr(self, "workflow_run_id", None)
|
||||
if workflow_run_id:
|
||||
return workflow_run_id
|
||||
|
||||
# Final fallback to message_id
|
||||
return str(self.message_id) if self.message_id else None
|
||||
|
||||
@property
|
||||
def resolved_parent_context(self) -> tuple[str | None, str | None]:
|
||||
"""Resolve cross-workflow parent linking from metadata.
|
||||
|
||||
Extracts typed parent IDs from the untyped ``parent_trace_context``
|
||||
metadata dict (set by tool_node when invoking nested workflows).
|
||||
|
||||
Returns:
|
||||
(trace_correlation_override, parent_span_id_source) where
|
||||
trace_correlation_override is the outer workflow_run_id and
|
||||
parent_span_id_source is the outer node_execution_id.
|
||||
"""
|
||||
parent_ctx = self.metadata.get("parent_trace_context")
|
||||
if not isinstance(parent_ctx, dict):
|
||||
return None, None
|
||||
trace_override = parent_ctx.get("parent_workflow_run_id")
|
||||
parent_span = parent_ctx.get("parent_node_execution_id")
|
||||
return (
|
||||
trace_override if isinstance(trace_override, str) else None,
|
||||
parent_span if isinstance(parent_span, str) else None,
|
||||
)
|
||||
|
||||
@field_serializer("start_time", "end_time")
|
||||
def serialize_datetime(self, dt: datetime | None) -> str | None:
|
||||
if dt is None:
|
||||
@ -48,7 +90,10 @@ class WorkflowTraceInfo(BaseTraceInfo):
|
||||
workflow_run_version: str
|
||||
error: str | None = None
|
||||
total_tokens: int
|
||||
prompt_tokens: int | None = None
|
||||
completion_tokens: int | None = None
|
||||
file_list: list[str]
|
||||
invoked_by: str | None = None
|
||||
query: str
|
||||
metadata: dict[str, Any]
|
||||
|
||||
@ -59,7 +104,7 @@ class MessageTraceInfo(BaseTraceInfo):
|
||||
answer_tokens: int
|
||||
total_tokens: int
|
||||
error: str | None = None
|
||||
file_list: Union[str, dict[str, Any], list] | None = None
|
||||
file_list: Union[str, dict[str, Any], list[Any]] | None = None
|
||||
message_file_data: Any | None = None
|
||||
conversation_mode: str
|
||||
gen_ai_server_time_to_first_token: float | None = None
|
||||
@ -106,7 +151,7 @@ class ToolTraceInfo(BaseTraceInfo):
|
||||
tool_config: dict[str, Any]
|
||||
time_cost: Union[int, float]
|
||||
tool_parameters: dict[str, Any]
|
||||
file_url: Union[str, None, list] = None
|
||||
file_url: Union[str, None, list[str]] = None
|
||||
|
||||
|
||||
class GenerateNameTraceInfo(BaseTraceInfo):
|
||||
@ -114,6 +159,79 @@ class GenerateNameTraceInfo(BaseTraceInfo):
|
||||
tenant_id: str
|
||||
|
||||
|
||||
class PromptGenerationTraceInfo(BaseTraceInfo):
|
||||
"""Trace information for prompt generation operations (rule-generate, code-generate, etc.)."""
|
||||
|
||||
tenant_id: str
|
||||
user_id: str
|
||||
app_id: str | None = None
|
||||
|
||||
operation_type: str
|
||||
instruction: str
|
||||
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
total_tokens: int
|
||||
|
||||
model_provider: str
|
||||
model_name: str
|
||||
|
||||
latency: float
|
||||
|
||||
total_price: float | None = None
|
||||
currency: str | None = None
|
||||
|
||||
error: str | None = None
|
||||
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
|
||||
class WorkflowNodeTraceInfo(BaseTraceInfo):
|
||||
workflow_id: str
|
||||
workflow_run_id: str
|
||||
tenant_id: str
|
||||
node_execution_id: str
|
||||
node_id: str
|
||||
node_type: str
|
||||
title: str
|
||||
|
||||
status: str
|
||||
error: str | None = None
|
||||
elapsed_time: float
|
||||
|
||||
index: int
|
||||
predecessor_node_id: str | None = None
|
||||
|
||||
total_tokens: int = 0
|
||||
total_price: float = 0.0
|
||||
currency: str | None = None
|
||||
|
||||
model_provider: str | None = None
|
||||
model_name: str | None = None
|
||||
prompt_tokens: int | None = None
|
||||
completion_tokens: int | None = None
|
||||
|
||||
tool_name: str | None = None
|
||||
|
||||
iteration_id: str | None = None
|
||||
iteration_index: int | None = None
|
||||
loop_id: str | None = None
|
||||
loop_index: int | None = None
|
||||
parallel_id: str | None = None
|
||||
|
||||
node_inputs: Mapping[str, Any] | None = None
|
||||
node_outputs: Mapping[str, Any] | None = None
|
||||
process_data: Mapping[str, Any] | None = None
|
||||
|
||||
invoked_by: str | None = None
|
||||
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
|
||||
class DraftNodeExecutionTrace(WorkflowNodeTraceInfo):
|
||||
pass
|
||||
|
||||
|
||||
class TaskData(BaseModel):
|
||||
app_id: str
|
||||
trace_info_type: str
|
||||
@ -128,11 +246,31 @@ trace_info_info_map = {
|
||||
"DatasetRetrievalTraceInfo": DatasetRetrievalTraceInfo,
|
||||
"ToolTraceInfo": ToolTraceInfo,
|
||||
"GenerateNameTraceInfo": GenerateNameTraceInfo,
|
||||
"PromptGenerationTraceInfo": PromptGenerationTraceInfo,
|
||||
"WorkflowNodeTraceInfo": WorkflowNodeTraceInfo,
|
||||
"DraftNodeExecutionTrace": DraftNodeExecutionTrace,
|
||||
}
|
||||
|
||||
|
||||
class OperationType(StrEnum):
|
||||
"""Operation type for token metric labels.
|
||||
|
||||
Used as a metric attribute on ``dify.tokens.input`` / ``dify.tokens.output``
|
||||
counters so consumers can break down token usage by operation.
|
||||
"""
|
||||
|
||||
WORKFLOW = "workflow"
|
||||
NODE_EXECUTION = "node_execution"
|
||||
MESSAGE = "message"
|
||||
RULE_GENERATE = "rule_generate"
|
||||
CODE_GENERATE = "code_generate"
|
||||
STRUCTURED_OUTPUT = "structured_output"
|
||||
INSTRUCTION_MODIFY = "instruction_modify"
|
||||
|
||||
|
||||
class TraceTaskName(StrEnum):
|
||||
CONVERSATION_TRACE = "conversation"
|
||||
DRAFT_NODE_EXECUTION_TRACE = "draft_node_execution"
|
||||
WORKFLOW_TRACE = "workflow"
|
||||
MESSAGE_TRACE = "message"
|
||||
MODERATION_TRACE = "moderation"
|
||||
@ -140,4 +278,6 @@ class TraceTaskName(StrEnum):
|
||||
DATASET_RETRIEVAL_TRACE = "dataset_retrieval"
|
||||
TOOL_TRACE = "tool"
|
||||
GENERATE_NAME_TRACE = "generate_conversation_name"
|
||||
PROMPT_GENERATION_TRACE = "prompt_generation"
|
||||
NODE_EXECUTION_TRACE = "node_execution"
|
||||
DATASOURCE_TRACE = "datasource"
|
||||
|
||||
@ -15,22 +15,32 @@ from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from core.helper.encrypter import batch_decrypt_token, encrypt_token, obfuscated_token
|
||||
from core.ops.entities.config_entity import OPS_FILE_PATH, TracingProviderEnum
|
||||
from core.ops.entities.config_entity import (
|
||||
OPS_FILE_PATH,
|
||||
TracingProviderEnum,
|
||||
)
|
||||
from core.ops.entities.trace_entity import (
|
||||
DatasetRetrievalTraceInfo,
|
||||
DraftNodeExecutionTrace,
|
||||
GenerateNameTraceInfo,
|
||||
MessageTraceInfo,
|
||||
ModerationTraceInfo,
|
||||
PromptGenerationTraceInfo,
|
||||
SuggestedQuestionTraceInfo,
|
||||
TaskData,
|
||||
ToolTraceInfo,
|
||||
TraceTaskName,
|
||||
WorkflowNodeTraceInfo,
|
||||
WorkflowTraceInfo,
|
||||
)
|
||||
from core.ops.utils import get_message_data
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_storage import storage
|
||||
from models.engine import db
|
||||
from models.account import Tenant
|
||||
from models.dataset import Dataset
|
||||
from models.model import App, AppModelConfig, Conversation, Message, MessageFile, TraceAppConfig
|
||||
from models.provider import Provider, ProviderCredential, ProviderModel, ProviderModelCredential, ProviderType
|
||||
from models.tools import ApiToolProvider, BuiltinToolProvider, MCPToolProvider, WorkflowToolProvider
|
||||
from models.workflow import WorkflowAppLog
|
||||
from tasks.ops_trace_task import process_trace_tasks
|
||||
|
||||
@ -40,9 +50,144 @@ if TYPE_CHECKING:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _lookup_app_and_workspace_names(app_id: str | None, tenant_id: str | None) -> tuple[str, str]:
|
||||
"""Return (app_name, workspace_name) for the given IDs. Falls back to empty strings."""
|
||||
app_name = ""
|
||||
workspace_name = ""
|
||||
if not app_id and not tenant_id:
|
||||
return app_name, workspace_name
|
||||
with Session(db.engine) as session:
|
||||
if app_id:
|
||||
name = session.scalar(select(App.name).where(App.id == app_id))
|
||||
if name:
|
||||
app_name = name
|
||||
if tenant_id:
|
||||
name = session.scalar(select(Tenant.name).where(Tenant.id == tenant_id))
|
||||
if name:
|
||||
workspace_name = name
|
||||
return app_name, workspace_name
|
||||
|
||||
|
||||
_PROVIDER_TYPE_TO_MODEL: dict[str, type] = {
|
||||
"builtin": BuiltinToolProvider,
|
||||
"plugin": BuiltinToolProvider,
|
||||
"api": ApiToolProvider,
|
||||
"workflow": WorkflowToolProvider,
|
||||
"mcp": MCPToolProvider,
|
||||
}
|
||||
|
||||
|
||||
def _lookup_credential_name(credential_id: str | None, provider_type: str | None) -> str:
|
||||
if not credential_id:
|
||||
return ""
|
||||
model_cls = _PROVIDER_TYPE_TO_MODEL.get(provider_type or "")
|
||||
if not model_cls:
|
||||
return ""
|
||||
with Session(db.engine) as session:
|
||||
name = session.scalar(select(model_cls.name).where(model_cls.id == credential_id)) # type: ignore[attr-defined]
|
||||
return str(name) if name else ""
|
||||
|
||||
|
||||
def _lookup_llm_credential_info(
|
||||
tenant_id: str | None, provider: str | None, model: str | None, model_type: str | None = "llm"
|
||||
) -> tuple[str | None, str]:
|
||||
"""
|
||||
Lookup LLM credential ID and name for the given provider and model.
|
||||
Returns (credential_id, credential_name).
|
||||
|
||||
Handles async timing issues gracefully - if credential is deleted between lookups,
|
||||
returns the ID but empty name rather than failing.
|
||||
"""
|
||||
if not tenant_id or not provider:
|
||||
return None, ""
|
||||
|
||||
try:
|
||||
with Session(db.engine) as session:
|
||||
# Try to find provider-level or model-level configuration
|
||||
provider_record = session.scalar(
|
||||
select(Provider).where(
|
||||
Provider.tenant_id == tenant_id,
|
||||
Provider.provider_name == provider,
|
||||
Provider.provider_type == ProviderType.CUSTOM,
|
||||
)
|
||||
)
|
||||
|
||||
if not provider_record:
|
||||
return None, ""
|
||||
|
||||
# Check if there's a model-specific config
|
||||
credential_id = None
|
||||
credential_name = ""
|
||||
is_model_level = False
|
||||
|
||||
if model:
|
||||
# Try model-level first
|
||||
model_record = session.scalar(
|
||||
select(ProviderModel).where(
|
||||
ProviderModel.tenant_id == tenant_id,
|
||||
ProviderModel.provider_name == provider,
|
||||
ProviderModel.model_name == model,
|
||||
ProviderModel.model_type == model_type,
|
||||
)
|
||||
)
|
||||
|
||||
if model_record and model_record.credential_id:
|
||||
credential_id = model_record.credential_id
|
||||
is_model_level = True
|
||||
|
||||
if not credential_id and provider_record.credential_id:
|
||||
# Fall back to provider-level credential
|
||||
credential_id = provider_record.credential_id
|
||||
is_model_level = False
|
||||
|
||||
# Lookup credential_name if we have credential_id
|
||||
if credential_id:
|
||||
try:
|
||||
if is_model_level:
|
||||
# Query ProviderModelCredential
|
||||
cred_name = session.scalar(
|
||||
select(ProviderModelCredential.credential_name).where(
|
||||
ProviderModelCredential.id == credential_id
|
||||
)
|
||||
)
|
||||
else:
|
||||
# Query ProviderCredential
|
||||
cred_name = session.scalar(
|
||||
select(ProviderCredential.credential_name).where(ProviderCredential.id == credential_id)
|
||||
)
|
||||
|
||||
if cred_name:
|
||||
credential_name = str(cred_name)
|
||||
except Exception as e:
|
||||
# Credential might have been deleted between lookups (async timing)
|
||||
# Return ID but empty name rather than failing
|
||||
logger.warning(
|
||||
"Failed to lookup credential name for credential_id=%s (provider=%s, model=%s): %s",
|
||||
credential_id,
|
||||
provider,
|
||||
model,
|
||||
str(e),
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
return credential_id, credential_name
|
||||
except Exception as e:
|
||||
# Database query failed or other unexpected error
|
||||
# Return empty rather than propagating error to telemetry emission
|
||||
logger.warning(
|
||||
"Failed to lookup LLM credential info for tenant_id=%s, provider=%s, model=%s: %s",
|
||||
tenant_id,
|
||||
provider,
|
||||
model,
|
||||
str(e),
|
||||
exc_info=True,
|
||||
)
|
||||
return None, ""
|
||||
|
||||
|
||||
class OpsTraceProviderConfigMap(collections.UserDict[str, dict[str, Any]]):
|
||||
def __getitem__(self, key: str) -> dict[str, Any]:
|
||||
match key:
|
||||
def __getitem__(self, provider: str) -> dict[str, Any]:
|
||||
match provider:
|
||||
case TracingProviderEnum.LANGFUSE:
|
||||
from core.ops.entities.config_entity import LangfuseConfig
|
||||
from core.ops.langfuse_trace.langfuse_trace import LangFuseDataTrace
|
||||
@ -149,7 +294,7 @@ class OpsTraceProviderConfigMap(collections.UserDict[str, dict[str, Any]]):
|
||||
}
|
||||
|
||||
case _:
|
||||
raise KeyError(f"Unsupported tracing provider: {key}")
|
||||
raise KeyError(f"Unsupported tracing provider: {provider}")
|
||||
|
||||
|
||||
provider_config_map = OpsTraceProviderConfigMap()
|
||||
@ -314,6 +459,10 @@ class OpsTraceManager:
|
||||
if app_id is None:
|
||||
return None
|
||||
|
||||
# Handle storage_id format (tenant-{uuid}) - not a real app_id
|
||||
if isinstance(app_id, str) and app_id.startswith("tenant-"):
|
||||
return None
|
||||
|
||||
app: App | None = db.session.query(App).where(App.id == app_id).first()
|
||||
|
||||
if app is None:
|
||||
@ -466,8 +615,6 @@ class TraceTask:
|
||||
|
||||
@classmethod
|
||||
def _get_workflow_run_repo(cls):
|
||||
from repositories.factory import DifyAPIRepositoryFactory
|
||||
|
||||
if cls._workflow_run_repo is None:
|
||||
with cls._repo_lock:
|
||||
if cls._workflow_run_repo is None:
|
||||
@ -478,6 +625,77 @@ class TraceTask:
|
||||
cls._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
|
||||
return cls._workflow_run_repo
|
||||
|
||||
@classmethod
|
||||
def _calculate_workflow_token_split(
|
||||
cls, session: "Session", workflow_run_id: str, tenant_id: str
|
||||
) -> tuple[int, int]:
|
||||
"""Sum prompt/completion tokens across all node executions for a workflow run.
|
||||
|
||||
Reads from the ``outputs`` column (where LLM nodes store ``usage.prompt_tokens``
|
||||
and ``usage.completion_tokens``) rather than ``execution_metadata``, which only
|
||||
carries ``total_tokens``. Projects only the ``outputs`` column to avoid loading
|
||||
large JSON blobs unnecessarily.
|
||||
"""
|
||||
import json
|
||||
|
||||
from models.workflow import WorkflowNodeExecutionModel
|
||||
|
||||
rows = (
|
||||
session.execute(
|
||||
select(WorkflowNodeExecutionModel.outputs).where(
|
||||
WorkflowNodeExecutionModel.tenant_id == tenant_id,
|
||||
WorkflowNodeExecutionModel.workflow_run_id == workflow_run_id,
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
total_prompt = 0
|
||||
total_completion = 0
|
||||
|
||||
for raw in rows:
|
||||
if not raw:
|
||||
continue
|
||||
try:
|
||||
outputs = json.loads(raw) if isinstance(raw, str) else raw
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
if not isinstance(outputs, dict):
|
||||
continue
|
||||
usage = outputs.get("usage")
|
||||
if not isinstance(usage, dict):
|
||||
continue
|
||||
prompt = usage.get("prompt_tokens")
|
||||
if isinstance(prompt, (int, float)):
|
||||
total_prompt += int(prompt)
|
||||
completion = usage.get("completion_tokens")
|
||||
if isinstance(completion, (int, float)):
|
||||
total_completion += int(completion)
|
||||
|
||||
return (total_prompt, total_completion)
|
||||
|
||||
@classmethod
|
||||
def _get_user_id_from_metadata(cls, metadata: dict[str, Any]) -> str:
|
||||
"""Extract user ID from metadata, prioritizing end_user over account.
|
||||
|
||||
Returns the actual user ID (end_user or account) who invoked the workflow,
|
||||
regardless of invoke_from context.
|
||||
"""
|
||||
# Priority 1: End user (external users via API/WebApp)
|
||||
if user_id := metadata.get("from_end_user_id"):
|
||||
return f"end_user:{user_id}"
|
||||
|
||||
# Priority 2: Account user (internal users via console/debugger)
|
||||
if user_id := metadata.get("from_account_id"):
|
||||
return f"account:{user_id}"
|
||||
|
||||
# Priority 3: User (internal users via console/debugger)
|
||||
if user_id := metadata.get("user_id"):
|
||||
return f"user:{user_id}"
|
||||
|
||||
return "anonymous"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
trace_type: Any,
|
||||
@ -491,6 +709,7 @@ class TraceTask:
|
||||
self.trace_type = trace_type
|
||||
self.message_id = message_id
|
||||
self.workflow_run_id = workflow_execution.id_ if workflow_execution else None
|
||||
self.workflow_total_tokens: int | None = workflow_execution.total_tokens if workflow_execution else None
|
||||
self.conversation_id = conversation_id
|
||||
self.user_id = user_id
|
||||
self.timer = timer
|
||||
@ -498,6 +717,8 @@ class TraceTask:
|
||||
self.app_id = None
|
||||
self.trace_id = None
|
||||
self.kwargs = kwargs
|
||||
if user_id is not None and "user_id" not in self.kwargs:
|
||||
self.kwargs["user_id"] = user_id
|
||||
external_trace_id = kwargs.get("external_trace_id")
|
||||
if external_trace_id:
|
||||
self.trace_id = external_trace_id
|
||||
@ -509,9 +730,12 @@ class TraceTask:
|
||||
preprocess_map = {
|
||||
TraceTaskName.CONVERSATION_TRACE: lambda: self.conversation_trace(**self.kwargs),
|
||||
TraceTaskName.WORKFLOW_TRACE: lambda: self.workflow_trace(
|
||||
workflow_run_id=self.workflow_run_id, conversation_id=self.conversation_id, user_id=self.user_id
|
||||
workflow_run_id=self.workflow_run_id,
|
||||
conversation_id=self.conversation_id,
|
||||
user_id=self.user_id,
|
||||
total_tokens_override=self.workflow_total_tokens,
|
||||
),
|
||||
TraceTaskName.MESSAGE_TRACE: lambda: self.message_trace(message_id=self.message_id),
|
||||
TraceTaskName.MESSAGE_TRACE: lambda: self.message_trace(message_id=self.message_id, **self.kwargs),
|
||||
TraceTaskName.MODERATION_TRACE: lambda: self.moderation_trace(
|
||||
message_id=self.message_id, timer=self.timer, **self.kwargs
|
||||
),
|
||||
@ -527,6 +751,9 @@ class TraceTask:
|
||||
TraceTaskName.GENERATE_NAME_TRACE: lambda: self.generate_name_trace(
|
||||
conversation_id=self.conversation_id, timer=self.timer, **self.kwargs
|
||||
),
|
||||
TraceTaskName.PROMPT_GENERATION_TRACE: lambda: self.prompt_generation_trace(**self.kwargs),
|
||||
TraceTaskName.NODE_EXECUTION_TRACE: lambda: self.node_execution_trace(**self.kwargs),
|
||||
TraceTaskName.DRAFT_NODE_EXECUTION_TRACE: lambda: self.draft_node_execution_trace(**self.kwargs),
|
||||
}
|
||||
|
||||
return preprocess_map.get(self.trace_type, lambda: None)()
|
||||
@ -541,6 +768,7 @@ class TraceTask:
|
||||
workflow_run_id: str | None,
|
||||
conversation_id: str | None,
|
||||
user_id: str | None,
|
||||
total_tokens_override: int | None = None,
|
||||
):
|
||||
if not workflow_run_id:
|
||||
return {}
|
||||
@ -560,7 +788,7 @@ class TraceTask:
|
||||
workflow_run_version = workflow_run.version
|
||||
error = workflow_run.error or ""
|
||||
|
||||
total_tokens = workflow_run.total_tokens
|
||||
total_tokens = total_tokens_override if total_tokens_override is not None else workflow_run.total_tokens
|
||||
|
||||
file_list = workflow_run_inputs.get("sys.file") or []
|
||||
query = workflow_run_inputs.get("query") or workflow_run_inputs.get("sys.query") or ""
|
||||
@ -581,8 +809,18 @@ class TraceTask:
|
||||
Message.workflow_run_id == workflow_run_id,
|
||||
)
|
||||
message_id = session.scalar(message_data_stmt)
|
||||
prompt_tokens, completion_tokens = self._calculate_workflow_token_split(
|
||||
session, workflow_run_id=workflow_run_id, tenant_id=tenant_id
|
||||
)
|
||||
|
||||
metadata = {
|
||||
from core.telemetry.gateway import is_enterprise_telemetry_enabled
|
||||
|
||||
if is_enterprise_telemetry_enabled():
|
||||
app_name, workspace_name = _lookup_app_and_workspace_names(workflow_run.app_id, tenant_id)
|
||||
else:
|
||||
app_name, workspace_name = "", ""
|
||||
|
||||
metadata: dict[str, Any] = {
|
||||
"workflow_id": workflow_id,
|
||||
"conversation_id": conversation_id,
|
||||
"workflow_run_id": workflow_run_id,
|
||||
@ -595,8 +833,14 @@ class TraceTask:
|
||||
"triggered_from": workflow_run.triggered_from,
|
||||
"user_id": user_id,
|
||||
"app_id": workflow_run.app_id,
|
||||
"app_name": app_name,
|
||||
"workspace_name": workspace_name,
|
||||
}
|
||||
|
||||
parent_trace_context = self.kwargs.get("parent_trace_context")
|
||||
if parent_trace_context:
|
||||
metadata["parent_trace_context"] = parent_trace_context
|
||||
|
||||
workflow_trace_info = WorkflowTraceInfo(
|
||||
trace_id=self.trace_id,
|
||||
workflow_data=workflow_run.to_dict(),
|
||||
@ -611,6 +855,8 @@ class TraceTask:
|
||||
workflow_run_version=workflow_run_version,
|
||||
error=error,
|
||||
total_tokens=total_tokens,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
file_list=file_list,
|
||||
query=query,
|
||||
metadata=metadata,
|
||||
@ -618,10 +864,11 @@ class TraceTask:
|
||||
message_id=message_id,
|
||||
start_time=workflow_run.created_at,
|
||||
end_time=workflow_run.finished_at,
|
||||
invoked_by=self._get_user_id_from_metadata(metadata),
|
||||
)
|
||||
return workflow_trace_info
|
||||
|
||||
def message_trace(self, message_id: str | None):
|
||||
def message_trace(self, message_id: str | None, **kwargs):
|
||||
if not message_id:
|
||||
return {}
|
||||
message_data = get_message_data(message_id)
|
||||
@ -644,6 +891,19 @@ class TraceTask:
|
||||
|
||||
streaming_metrics = self._extract_streaming_metrics(message_data)
|
||||
|
||||
tenant_id = ""
|
||||
with Session(db.engine) as session:
|
||||
tid = session.scalar(select(App.tenant_id).where(App.id == message_data.app_id))
|
||||
if tid:
|
||||
tenant_id = str(tid)
|
||||
|
||||
from core.telemetry.gateway import is_enterprise_telemetry_enabled
|
||||
|
||||
if is_enterprise_telemetry_enabled():
|
||||
app_name, workspace_name = _lookup_app_and_workspace_names(message_data.app_id, tenant_id)
|
||||
else:
|
||||
app_name, workspace_name = "", ""
|
||||
|
||||
metadata = {
|
||||
"conversation_id": message_data.conversation_id,
|
||||
"ls_provider": message_data.model_provider,
|
||||
@ -655,7 +915,14 @@ class TraceTask:
|
||||
"workflow_run_id": message_data.workflow_run_id,
|
||||
"from_source": message_data.from_source,
|
||||
"message_id": message_id,
|
||||
"tenant_id": tenant_id,
|
||||
"app_id": message_data.app_id,
|
||||
"user_id": message_data.from_end_user_id or message_data.from_account_id,
|
||||
"app_name": app_name,
|
||||
"workspace_name": workspace_name,
|
||||
}
|
||||
if node_execution_id := kwargs.get("node_execution_id"):
|
||||
metadata["node_execution_id"] = node_execution_id
|
||||
|
||||
message_tokens = message_data.message_tokens
|
||||
|
||||
@ -672,7 +939,9 @@ class TraceTask:
|
||||
outputs=message_data.answer,
|
||||
file_list=file_list,
|
||||
start_time=created_at,
|
||||
end_time=created_at + timedelta(seconds=message_data.provider_response_latency),
|
||||
end_time=message_data.updated_at
|
||||
if message_data.updated_at and message_data.updated_at > created_at
|
||||
else created_at + timedelta(seconds=message_data.provider_response_latency),
|
||||
metadata=metadata,
|
||||
message_file_data=message_file_data,
|
||||
conversation_mode=conversation_mode,
|
||||
@ -697,6 +966,8 @@ class TraceTask:
|
||||
"preset_response": moderation_result.preset_response,
|
||||
"query": moderation_result.query,
|
||||
}
|
||||
if node_execution_id := kwargs.get("node_execution_id"):
|
||||
metadata["node_execution_id"] = node_execution_id
|
||||
|
||||
# get workflow_app_log_id
|
||||
workflow_app_log_id = None
|
||||
@ -738,6 +1009,8 @@ class TraceTask:
|
||||
"workflow_run_id": message_data.workflow_run_id,
|
||||
"from_source": message_data.from_source,
|
||||
}
|
||||
if node_execution_id := kwargs.get("node_execution_id"):
|
||||
metadata["node_execution_id"] = node_execution_id
|
||||
|
||||
# get workflow_app_log_id
|
||||
workflow_app_log_id = None
|
||||
@ -777,6 +1050,52 @@ class TraceTask:
|
||||
if not message_data:
|
||||
return {}
|
||||
|
||||
tenant_id = ""
|
||||
with Session(db.engine) as session:
|
||||
tid = session.scalar(select(App.tenant_id).where(App.id == message_data.app_id))
|
||||
if tid:
|
||||
tenant_id = str(tid)
|
||||
|
||||
from core.telemetry.gateway import is_enterprise_telemetry_enabled
|
||||
|
||||
if is_enterprise_telemetry_enabled():
|
||||
app_name, workspace_name = _lookup_app_and_workspace_names(message_data.app_id, tenant_id)
|
||||
else:
|
||||
app_name, workspace_name = "", ""
|
||||
|
||||
doc_list = [doc.model_dump() for doc in documents] if documents else []
|
||||
dataset_ids: set[str] = set()
|
||||
for doc in doc_list:
|
||||
doc_meta = doc.get("metadata") or {}
|
||||
did = doc_meta.get("dataset_id")
|
||||
if did:
|
||||
dataset_ids.add(did)
|
||||
|
||||
embedding_models: dict[str, dict[str, str]] = {}
|
||||
if dataset_ids:
|
||||
with Session(db.engine) as session:
|
||||
rows = session.execute(
|
||||
select(Dataset.id, Dataset.embedding_model, Dataset.embedding_model_provider).where(
|
||||
Dataset.id.in_(list(dataset_ids))
|
||||
)
|
||||
).all()
|
||||
for row in rows:
|
||||
embedding_models[str(row[0])] = {
|
||||
"embedding_model": row[1] or "",
|
||||
"embedding_model_provider": row[2] or "",
|
||||
}
|
||||
|
||||
# Extract rerank model info from retrieval_model kwargs
|
||||
rerank_model_provider = ""
|
||||
rerank_model_name = ""
|
||||
if "retrieval_model" in kwargs:
|
||||
retrieval_model = kwargs["retrieval_model"]
|
||||
if isinstance(retrieval_model, dict):
|
||||
reranking_model = retrieval_model.get("reranking_model")
|
||||
if isinstance(reranking_model, dict):
|
||||
rerank_model_provider = reranking_model.get("reranking_provider_name", "")
|
||||
rerank_model_name = reranking_model.get("reranking_model_name", "")
|
||||
|
||||
metadata = {
|
||||
"message_id": message_id,
|
||||
"ls_provider": message_data.model_provider,
|
||||
@ -787,13 +1106,23 @@ class TraceTask:
|
||||
"agent_based": message_data.agent_based,
|
||||
"workflow_run_id": message_data.workflow_run_id,
|
||||
"from_source": message_data.from_source,
|
||||
"tenant_id": tenant_id,
|
||||
"app_id": message_data.app_id,
|
||||
"user_id": message_data.from_end_user_id or message_data.from_account_id,
|
||||
"app_name": app_name,
|
||||
"workspace_name": workspace_name,
|
||||
"embedding_models": embedding_models,
|
||||
"rerank_model_provider": rerank_model_provider,
|
||||
"rerank_model_name": rerank_model_name,
|
||||
}
|
||||
if node_execution_id := kwargs.get("node_execution_id"):
|
||||
metadata["node_execution_id"] = node_execution_id
|
||||
|
||||
dataset_retrieval_trace_info = DatasetRetrievalTraceInfo(
|
||||
trace_id=self.trace_id,
|
||||
message_id=message_id,
|
||||
inputs=message_data.query or message_data.inputs,
|
||||
documents=[doc.model_dump() for doc in documents] if documents else [],
|
||||
documents=doc_list,
|
||||
start_time=timer.get("start"),
|
||||
end_time=timer.get("end"),
|
||||
metadata=metadata,
|
||||
@ -836,6 +1165,10 @@ class TraceTask:
|
||||
"error": error,
|
||||
"tool_parameters": tool_parameters,
|
||||
}
|
||||
if message_data.workflow_run_id:
|
||||
metadata["workflow_run_id"] = message_data.workflow_run_id
|
||||
if node_execution_id := kwargs.get("node_execution_id"):
|
||||
metadata["node_execution_id"] = node_execution_id
|
||||
|
||||
file_url = ""
|
||||
message_file_data = db.session.query(MessageFile).filter_by(message_id=message_id).first()
|
||||
@ -890,6 +1223,8 @@ class TraceTask:
|
||||
"conversation_id": conversation_id,
|
||||
"tenant_id": tenant_id,
|
||||
}
|
||||
if node_execution_id := kwargs.get("node_execution_id"):
|
||||
metadata["node_execution_id"] = node_execution_id
|
||||
|
||||
generate_name_trace_info = GenerateNameTraceInfo(
|
||||
trace_id=self.trace_id,
|
||||
@ -904,6 +1239,182 @@ class TraceTask:
|
||||
|
||||
return generate_name_trace_info
|
||||
|
||||
def prompt_generation_trace(self, **kwargs) -> PromptGenerationTraceInfo | dict:
|
||||
tenant_id = kwargs.get("tenant_id", "")
|
||||
user_id = kwargs.get("user_id", "")
|
||||
app_id = kwargs.get("app_id")
|
||||
operation_type = kwargs.get("operation_type", "")
|
||||
instruction = kwargs.get("instruction", "")
|
||||
generated_output = kwargs.get("generated_output", "")
|
||||
|
||||
prompt_tokens = kwargs.get("prompt_tokens", 0)
|
||||
completion_tokens = kwargs.get("completion_tokens", 0)
|
||||
total_tokens = kwargs.get("total_tokens", 0)
|
||||
|
||||
model_provider = kwargs.get("model_provider", "")
|
||||
model_name = kwargs.get("model_name", "")
|
||||
|
||||
latency = kwargs.get("latency", 0.0)
|
||||
|
||||
timer = kwargs.get("timer")
|
||||
start_time = timer.get("start") if timer else None
|
||||
end_time = timer.get("end") if timer else None
|
||||
|
||||
total_price = kwargs.get("total_price")
|
||||
currency = kwargs.get("currency")
|
||||
|
||||
error = kwargs.get("error")
|
||||
|
||||
app_name = None
|
||||
workspace_name = None
|
||||
if app_id:
|
||||
app_name, workspace_name = _lookup_app_and_workspace_names(app_id, tenant_id)
|
||||
|
||||
metadata = {
|
||||
"tenant_id": tenant_id,
|
||||
"user_id": user_id,
|
||||
"app_id": app_id or "",
|
||||
"app_name": app_name,
|
||||
"workspace_name": workspace_name,
|
||||
"operation_type": operation_type,
|
||||
"model_provider": model_provider,
|
||||
"model_name": model_name,
|
||||
}
|
||||
if node_execution_id := kwargs.get("node_execution_id"):
|
||||
metadata["node_execution_id"] = node_execution_id
|
||||
|
||||
return PromptGenerationTraceInfo(
|
||||
trace_id=self.trace_id,
|
||||
inputs=instruction,
|
||||
outputs=generated_output,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
metadata=metadata,
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
app_id=app_id,
|
||||
operation_type=operation_type,
|
||||
instruction=instruction,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
model_provider=model_provider,
|
||||
model_name=model_name,
|
||||
latency=latency,
|
||||
total_price=total_price,
|
||||
currency=currency,
|
||||
error=error,
|
||||
)
|
||||
|
||||
def node_execution_trace(self, **kwargs) -> WorkflowNodeTraceInfo | dict:
|
||||
node_data: dict = kwargs.get("node_execution_data", {})
|
||||
if not node_data:
|
||||
return {}
|
||||
|
||||
from core.telemetry.gateway import is_enterprise_telemetry_enabled
|
||||
|
||||
if is_enterprise_telemetry_enabled():
|
||||
app_name, workspace_name = _lookup_app_and_workspace_names(
|
||||
node_data.get("app_id"), node_data.get("tenant_id")
|
||||
)
|
||||
else:
|
||||
app_name, workspace_name = "", ""
|
||||
|
||||
# Try tool credential lookup first
|
||||
credential_id = node_data.get("credential_id")
|
||||
if is_enterprise_telemetry_enabled():
|
||||
credential_name = _lookup_credential_name(credential_id, node_data.get("credential_provider_type"))
|
||||
# If no credential_id found (e.g., LLM nodes), try LLM credential lookup
|
||||
if not credential_id:
|
||||
llm_cred_id, llm_cred_name = _lookup_llm_credential_info(
|
||||
tenant_id=node_data.get("tenant_id"),
|
||||
provider=node_data.get("model_provider"),
|
||||
model=node_data.get("model_name"),
|
||||
model_type="llm",
|
||||
)
|
||||
if llm_cred_id:
|
||||
credential_id = llm_cred_id
|
||||
credential_name = llm_cred_name
|
||||
else:
|
||||
credential_name = ""
|
||||
metadata: dict[str, Any] = {
|
||||
"tenant_id": node_data.get("tenant_id"),
|
||||
"app_id": node_data.get("app_id"),
|
||||
"app_name": app_name,
|
||||
"workspace_name": workspace_name,
|
||||
"user_id": node_data.get("user_id"),
|
||||
"invoke_from": node_data.get("invoke_from"),
|
||||
"credential_id": credential_id,
|
||||
"credential_name": credential_name,
|
||||
"dataset_ids": node_data.get("dataset_ids"),
|
||||
"dataset_names": node_data.get("dataset_names"),
|
||||
"plugin_name": node_data.get("plugin_name"),
|
||||
}
|
||||
|
||||
parent_trace_context = node_data.get("parent_trace_context")
|
||||
if parent_trace_context:
|
||||
metadata["parent_trace_context"] = parent_trace_context
|
||||
|
||||
message_id: str | None = None
|
||||
conversation_id = node_data.get("conversation_id")
|
||||
workflow_execution_id = node_data.get("workflow_execution_id")
|
||||
if conversation_id and workflow_execution_id and not parent_trace_context:
|
||||
with Session(db.engine) as session:
|
||||
msg_id = session.scalar(
|
||||
select(Message.id).where(
|
||||
Message.conversation_id == conversation_id,
|
||||
Message.workflow_run_id == workflow_execution_id,
|
||||
)
|
||||
)
|
||||
if msg_id:
|
||||
message_id = str(msg_id)
|
||||
metadata["message_id"] = message_id
|
||||
if conversation_id:
|
||||
metadata["conversation_id"] = conversation_id
|
||||
|
||||
return WorkflowNodeTraceInfo(
|
||||
trace_id=self.trace_id,
|
||||
message_id=message_id,
|
||||
start_time=node_data.get("created_at"),
|
||||
end_time=node_data.get("finished_at"),
|
||||
metadata=metadata,
|
||||
workflow_id=node_data.get("workflow_id", ""),
|
||||
workflow_run_id=node_data.get("workflow_execution_id", ""),
|
||||
tenant_id=node_data.get("tenant_id", ""),
|
||||
node_execution_id=node_data.get("node_execution_id", ""),
|
||||
node_id=node_data.get("node_id", ""),
|
||||
node_type=node_data.get("node_type", ""),
|
||||
title=node_data.get("title", ""),
|
||||
status=node_data.get("status", ""),
|
||||
error=node_data.get("error"),
|
||||
elapsed_time=node_data.get("elapsed_time", 0.0),
|
||||
index=node_data.get("index", 0),
|
||||
predecessor_node_id=node_data.get("predecessor_node_id"),
|
||||
total_tokens=node_data.get("total_tokens", 0),
|
||||
total_price=node_data.get("total_price", 0.0),
|
||||
currency=node_data.get("currency"),
|
||||
model_provider=node_data.get("model_provider"),
|
||||
model_name=node_data.get("model_name"),
|
||||
prompt_tokens=node_data.get("prompt_tokens"),
|
||||
completion_tokens=node_data.get("completion_tokens"),
|
||||
tool_name=node_data.get("tool_name"),
|
||||
iteration_id=node_data.get("iteration_id"),
|
||||
iteration_index=node_data.get("iteration_index"),
|
||||
loop_id=node_data.get("loop_id"),
|
||||
loop_index=node_data.get("loop_index"),
|
||||
parallel_id=node_data.get("parallel_id"),
|
||||
node_inputs=node_data.get("node_inputs"),
|
||||
node_outputs=node_data.get("node_outputs"),
|
||||
process_data=node_data.get("process_data"),
|
||||
invoked_by=self._get_user_id_from_metadata(metadata),
|
||||
)
|
||||
|
||||
def draft_node_execution_trace(self, **kwargs) -> DraftNodeExecutionTrace | dict:
|
||||
node_trace = self.node_execution_trace(**kwargs)
|
||||
if not isinstance(node_trace, WorkflowNodeTraceInfo):
|
||||
return node_trace
|
||||
return DraftNodeExecutionTrace(**node_trace.model_dump())
|
||||
|
||||
def _extract_streaming_metrics(self, message_data) -> dict:
|
||||
if not message_data.message_metadata:
|
||||
return {}
|
||||
@ -937,13 +1448,17 @@ class TraceQueueManager:
|
||||
self.user_id = user_id
|
||||
self.trace_instance = OpsTraceManager.get_ops_trace_instance(app_id)
|
||||
self.flask_app = current_app._get_current_object() # type: ignore
|
||||
|
||||
from core.telemetry.gateway import is_enterprise_telemetry_enabled
|
||||
|
||||
self._enterprise_telemetry_enabled = is_enterprise_telemetry_enabled()
|
||||
if trace_manager_timer is None:
|
||||
self.start_timer()
|
||||
|
||||
def add_trace_task(self, trace_task: TraceTask):
|
||||
global trace_manager_timer, trace_manager_queue
|
||||
try:
|
||||
if self.trace_instance:
|
||||
if self._enterprise_telemetry_enabled or self.trace_instance:
|
||||
trace_task.app_id = self.app_id
|
||||
trace_manager_queue.put(trace_task)
|
||||
except Exception:
|
||||
@ -979,20 +1494,27 @@ class TraceQueueManager:
|
||||
def send_to_celery(self, tasks: list[TraceTask]):
|
||||
with self.flask_app.app_context():
|
||||
for task in tasks:
|
||||
if task.app_id is None:
|
||||
continue
|
||||
storage_id = task.app_id
|
||||
if storage_id is None:
|
||||
tenant_id = task.kwargs.get("tenant_id")
|
||||
if tenant_id:
|
||||
storage_id = f"tenant-{tenant_id}"
|
||||
else:
|
||||
logger.warning("Skipping trace without app_id or tenant_id, trace_type: %s", task.trace_type)
|
||||
continue
|
||||
|
||||
file_id = uuid4().hex
|
||||
trace_info = task.execute()
|
||||
|
||||
task_data = TaskData(
|
||||
app_id=task.app_id,
|
||||
app_id=storage_id,
|
||||
trace_info_type=type(trace_info).__name__,
|
||||
trace_info=trace_info.model_dump() if trace_info else None,
|
||||
)
|
||||
file_path = f"{OPS_FILE_PATH}{task.app_id}/{file_id}.json"
|
||||
file_path = f"{OPS_FILE_PATH}{storage_id}/{file_id}.json"
|
||||
storage.save(file_path, task_data.model_dump_json().encode("utf-8"))
|
||||
file_info = {
|
||||
"file_id": file_id,
|
||||
"app_id": task.app_id,
|
||||
"app_id": storage_id,
|
||||
}
|
||||
process_trace_tasks.delay(file_info) # type: ignore
|
||||
|
||||
@ -209,7 +209,10 @@ class PluginInstaller(BasePluginClient):
|
||||
"GET",
|
||||
f"plugin/{tenant_id}/management/decode/from_identifier",
|
||||
PluginDecodeResponse,
|
||||
params={"plugin_unique_identifier": plugin_unique_identifier},
|
||||
params={
|
||||
"plugin_unique_identifier": plugin_unique_identifier,
|
||||
"PluginUniqueIdentifier": plugin_unique_identifier, # compat with daemon <= 0.5.4
|
||||
},
|
||||
)
|
||||
|
||||
def fetch_plugin_installation_by_ids(
|
||||
|
||||
@ -56,12 +56,37 @@ from services.feature_service import FeatureService
|
||||
|
||||
class ProviderManager:
|
||||
"""
|
||||
ProviderManager is a class that manages the model providers includes Hosting and Customize Model Providers.
|
||||
ProviderManager manages tenant-scoped model provider configuration.
|
||||
|
||||
The runtime adapter is injected by the composition layer so this class stays
|
||||
focused on configuration assembly instead of constructing plugin runtimes.
|
||||
Request-bound managers may carry caller identity in that runtime, and the
|
||||
resulting ``ProviderConfiguration`` objects must reuse it for downstream
|
||||
model-type and schema lookups.
|
||||
|
||||
Configuration assembly is cached per manager instance so call chains that
|
||||
share one request-scoped manager can reuse the same provider graph instead
|
||||
of rebuilding it for every lookup. Call ``clear_configurations_cache()``
|
||||
when a long-lived manager needs to observe writes performed within the same
|
||||
instance scope.
|
||||
"""
|
||||
|
||||
decoding_rsa_key: Any | None
|
||||
decoding_cipher_rsa: Any | None
|
||||
_configurations_cache: dict[str, ProviderConfigurations]
|
||||
|
||||
def __init__(self):
|
||||
self.decoding_rsa_key = None
|
||||
self.decoding_cipher_rsa = None
|
||||
self._configurations_cache = {}
|
||||
|
||||
def clear_configurations_cache(self, tenant_id: str | None = None) -> None:
|
||||
"""Drop assembled provider configurations cached on this manager instance."""
|
||||
if tenant_id is None:
|
||||
self._configurations_cache.clear()
|
||||
return
|
||||
|
||||
self._configurations_cache.pop(tenant_id, None)
|
||||
|
||||
def get_configurations(self, tenant_id: str) -> ProviderConfigurations:
|
||||
"""
|
||||
@ -100,6 +125,10 @@ class ProviderManager:
|
||||
:param tenant_id:
|
||||
:return:
|
||||
"""
|
||||
cached_configurations = self._configurations_cache.get(tenant_id)
|
||||
if cached_configurations is not None:
|
||||
return cached_configurations
|
||||
|
||||
# Get all provider records of the workspace
|
||||
provider_name_to_provider_records_dict = self._get_all_providers(tenant_id)
|
||||
|
||||
@ -258,6 +287,8 @@ class ProviderManager:
|
||||
|
||||
provider_configurations[str(provider_id_entity)] = provider_configuration
|
||||
|
||||
self._configurations_cache[tenant_id] = provider_configurations
|
||||
|
||||
# Return the encapsulated object
|
||||
return provider_configurations
|
||||
|
||||
|
||||
@ -22,7 +22,7 @@ from core.rag.index_processor.constant.query_type import QueryType
|
||||
from core.rag.models.document import Document
|
||||
from core.rag.rerank.rerank_type import RerankMode
|
||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||
from core.tools.signature import sign_upload_file
|
||||
from core.tools.signature import sign_upload_file_preview_url
|
||||
from dify_graph.model_runtime.entities.model_entities import ModelType
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import (
|
||||
@ -864,7 +864,7 @@ class RetrievalService:
|
||||
"name": upload_file.name,
|
||||
"extension": "." + upload_file.extension,
|
||||
"mime_type": upload_file.mime_type,
|
||||
"source_url": sign_upload_file(upload_file.id, upload_file.extension),
|
||||
"source_url": sign_upload_file_preview_url(upload_file.id, upload_file.extension),
|
||||
"size": upload_file.size,
|
||||
}
|
||||
return {"attachment_info": attachment_info, "segment_id": attachment_binding.segment_id}
|
||||
@ -893,7 +893,7 @@ class RetrievalService:
|
||||
"name": upload_file.name,
|
||||
"extension": "." + upload_file.extension,
|
||||
"mime_type": upload_file.mime_type,
|
||||
"source_url": sign_upload_file(upload_file.id, upload_file.extension),
|
||||
"source_url": sign_upload_file_preview_url(upload_file.id, upload_file.extension),
|
||||
"size": upload_file.size,
|
||||
}
|
||||
if attachment_binding:
|
||||
|
||||
@ -115,7 +115,7 @@ class PdfExtractor(BaseExtractor):
|
||||
"""
|
||||
image_content = []
|
||||
upload_files = []
|
||||
base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL
|
||||
base_url = dify_config.FILES_URL
|
||||
|
||||
try:
|
||||
image_objects = page.get_objects(filter=(pdfium_c.FPDF_PAGEOBJ_IMAGE,))
|
||||
|
||||
@ -88,7 +88,7 @@ class WordExtractor(BaseExtractor):
|
||||
def _extract_images_from_docx(self, doc):
|
||||
image_count = 0
|
||||
image_map = {}
|
||||
base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL
|
||||
base_url = dify_config.FILES_URL
|
||||
|
||||
for r_id, rel in doc.part.rels.items():
|
||||
if "image" in rel.target_ref:
|
||||
|
||||
@ -54,7 +54,7 @@ from core.rag.retrieval.template_prompts import (
|
||||
METADATA_FILTER_USER_PROMPT_2,
|
||||
METADATA_FILTER_USER_PROMPT_3,
|
||||
)
|
||||
from core.tools.signature import sign_upload_file
|
||||
from core.tools.signature import sign_upload_file_preview_url
|
||||
from core.tools.utils.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool
|
||||
from core.workflow.nodes.knowledge_retrieval import exc
|
||||
from core.workflow.nodes.knowledge_retrieval.retrieval import (
|
||||
@ -524,7 +524,7 @@ class DatasetRetrieval:
|
||||
related_id=upload_file.id,
|
||||
size=upload_file.size,
|
||||
storage_key=upload_file.key,
|
||||
url=sign_upload_file(upload_file.id, upload_file.extension),
|
||||
url=sign_upload_file_preview_url(upload_file.id, upload_file.extension),
|
||||
)
|
||||
context_files.append(attachment_info)
|
||||
if show_retrieve_source:
|
||||
|
||||
43
api/core/telemetry/__init__.py
Normal file
43
api/core/telemetry/__init__.py
Normal file
@ -0,0 +1,43 @@
|
||||
"""Telemetry facade.
|
||||
|
||||
Thin public API for emitting telemetry events. All routing logic
|
||||
lives in ``core.telemetry.gateway`` which is shared by both CE and EE.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from core.ops.entities.trace_entity import TraceTaskName
|
||||
from core.telemetry.events import TelemetryContext, TelemetryEvent
|
||||
from core.telemetry.gateway import emit as gateway_emit
|
||||
from core.telemetry.gateway import get_trace_task_to_case
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from core.ops.ops_trace_manager import TraceQueueManager
|
||||
|
||||
|
||||
def emit(event: TelemetryEvent, trace_manager: TraceQueueManager | None = None) -> None:
|
||||
"""Emit a telemetry event.
|
||||
|
||||
Translates the ``TelemetryEvent`` (keyed by ``TraceTaskName``) into a
|
||||
``TelemetryCase`` and delegates to ``core.telemetry.gateway.emit()``.
|
||||
"""
|
||||
case = get_trace_task_to_case().get(event.name)
|
||||
if case is None:
|
||||
return
|
||||
|
||||
context: dict[str, object] = {
|
||||
"tenant_id": event.context.tenant_id,
|
||||
"user_id": event.context.user_id,
|
||||
"app_id": event.context.app_id,
|
||||
}
|
||||
gateway_emit(case, context, event.payload, trace_manager)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"TelemetryContext",
|
||||
"TelemetryEvent",
|
||||
"TraceTaskName",
|
||||
"emit",
|
||||
]
|
||||
21
api/core/telemetry/events.py
Normal file
21
api/core/telemetry/events.py
Normal file
@ -0,0 +1,21 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from core.ops.entities.trace_entity import TraceTaskName
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TelemetryContext:
|
||||
tenant_id: str | None = None
|
||||
user_id: str | None = None
|
||||
app_id: str | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TelemetryEvent:
|
||||
name: TraceTaskName
|
||||
context: TelemetryContext
|
||||
payload: dict[str, Any]
|
||||
239
api/core/telemetry/gateway.py
Normal file
239
api/core/telemetry/gateway.py
Normal file
@ -0,0 +1,239 @@
|
||||
"""Telemetry gateway — single routing layer for all editions.
|
||||
|
||||
Maps ``TelemetryCase`` → ``CaseRoute`` and dispatches events to either
|
||||
the CE/EE trace pipeline (``TraceQueueManager``) or the enterprise-only
|
||||
metric/log Celery queue.
|
||||
|
||||
This module lives in ``core/`` so both CE and EE share one routing table
|
||||
and one ``emit()`` entry point. No separate enterprise gateway module is
|
||||
needed — enterprise-specific dispatch (Celery task, payload offloading)
|
||||
is handled here behind lazy imports that no-op in CE.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from core.ops.entities.trace_entity import TraceTaskName
|
||||
from enterprise.telemetry.contracts import CaseRoute, SignalType
|
||||
from extensions.ext_storage import storage
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from core.ops.ops_trace_manager import TraceQueueManager
|
||||
from enterprise.telemetry.contracts import TelemetryCase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
PAYLOAD_SIZE_THRESHOLD_BYTES = 1 * 1024 * 1024
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Routing table — authoritative mapping for all editions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_case_to_trace_task: dict[TelemetryCase, TraceTaskName] | None = None
|
||||
_case_routing: dict[TelemetryCase, CaseRoute] | None = None
|
||||
|
||||
|
||||
def _get_case_to_trace_task() -> dict[TelemetryCase, TraceTaskName]:
|
||||
global _case_to_trace_task
|
||||
if _case_to_trace_task is None:
|
||||
from enterprise.telemetry.contracts import TelemetryCase
|
||||
|
||||
_case_to_trace_task = {
|
||||
TelemetryCase.WORKFLOW_RUN: TraceTaskName.WORKFLOW_TRACE,
|
||||
TelemetryCase.MESSAGE_RUN: TraceTaskName.MESSAGE_TRACE,
|
||||
TelemetryCase.NODE_EXECUTION: TraceTaskName.NODE_EXECUTION_TRACE,
|
||||
TelemetryCase.DRAFT_NODE_EXECUTION: TraceTaskName.DRAFT_NODE_EXECUTION_TRACE,
|
||||
TelemetryCase.PROMPT_GENERATION: TraceTaskName.PROMPT_GENERATION_TRACE,
|
||||
TelemetryCase.TOOL_EXECUTION: TraceTaskName.TOOL_TRACE,
|
||||
TelemetryCase.MODERATION_CHECK: TraceTaskName.MODERATION_TRACE,
|
||||
TelemetryCase.SUGGESTED_QUESTION: TraceTaskName.SUGGESTED_QUESTION_TRACE,
|
||||
TelemetryCase.DATASET_RETRIEVAL: TraceTaskName.DATASET_RETRIEVAL_TRACE,
|
||||
TelemetryCase.GENERATE_NAME: TraceTaskName.GENERATE_NAME_TRACE,
|
||||
}
|
||||
return _case_to_trace_task
|
||||
|
||||
|
||||
def get_trace_task_to_case() -> dict[TraceTaskName, TelemetryCase]:
|
||||
"""Return TraceTaskName → TelemetryCase (inverse of _get_case_to_trace_task)."""
|
||||
return {v: k for k, v in _get_case_to_trace_task().items()}
|
||||
|
||||
|
||||
def _get_case_routing() -> dict[TelemetryCase, CaseRoute]:
|
||||
global _case_routing
|
||||
if _case_routing is None:
|
||||
from enterprise.telemetry.contracts import CaseRoute, SignalType, TelemetryCase
|
||||
|
||||
_case_routing = {
|
||||
# TRACE — CE-eligible (flow in both CE and EE)
|
||||
TelemetryCase.WORKFLOW_RUN: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True),
|
||||
TelemetryCase.MESSAGE_RUN: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True),
|
||||
TelemetryCase.TOOL_EXECUTION: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True),
|
||||
TelemetryCase.MODERATION_CHECK: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True),
|
||||
TelemetryCase.SUGGESTED_QUESTION: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True),
|
||||
TelemetryCase.DATASET_RETRIEVAL: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True),
|
||||
TelemetryCase.GENERATE_NAME: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True),
|
||||
# TRACE — enterprise-only
|
||||
TelemetryCase.NODE_EXECUTION: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=False),
|
||||
TelemetryCase.DRAFT_NODE_EXECUTION: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=False),
|
||||
TelemetryCase.PROMPT_GENERATION: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=False),
|
||||
# METRIC_LOG — enterprise-only (signal-driven, not trace)
|
||||
TelemetryCase.APP_CREATED: CaseRoute(signal_type=SignalType.METRIC_LOG, ce_eligible=False),
|
||||
TelemetryCase.APP_UPDATED: CaseRoute(signal_type=SignalType.METRIC_LOG, ce_eligible=False),
|
||||
TelemetryCase.APP_DELETED: CaseRoute(signal_type=SignalType.METRIC_LOG, ce_eligible=False),
|
||||
TelemetryCase.FEEDBACK_CREATED: CaseRoute(signal_type=SignalType.METRIC_LOG, ce_eligible=False),
|
||||
}
|
||||
return _case_routing
|
||||
|
||||
|
||||
def __getattr__(name: str) -> dict:
|
||||
"""Lazy module-level access to routing tables."""
|
||||
if name == "CASE_ROUTING":
|
||||
return _get_case_routing()
|
||||
if name == "CASE_TO_TRACE_TASK":
|
||||
return _get_case_to_trace_task()
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def is_enterprise_telemetry_enabled() -> bool:
|
||||
try:
|
||||
from enterprise.telemetry.exporter import is_enterprise_telemetry_enabled
|
||||
|
||||
return is_enterprise_telemetry_enabled()
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _handle_payload_sizing(
|
||||
payload: dict[str, Any],
|
||||
tenant_id: str,
|
||||
event_id: str,
|
||||
) -> tuple[dict[str, Any], str | None]:
|
||||
"""Inline or offload payload based on size.
|
||||
|
||||
Returns ``(payload_for_envelope, storage_key | None)``. Payloads
|
||||
exceeding ``PAYLOAD_SIZE_THRESHOLD_BYTES`` are written to object
|
||||
storage and replaced with an empty dict in the envelope.
|
||||
"""
|
||||
try:
|
||||
payload_json = json.dumps(payload)
|
||||
payload_size = len(payload_json.encode("utf-8"))
|
||||
except (TypeError, ValueError):
|
||||
logger.warning("Failed to serialize payload for sizing: event_id=%s", event_id)
|
||||
return payload, None
|
||||
|
||||
if payload_size <= PAYLOAD_SIZE_THRESHOLD_BYTES:
|
||||
return payload, None
|
||||
|
||||
storage_key = f"telemetry/{tenant_id}/{event_id}.json"
|
||||
try:
|
||||
storage.save(storage_key, payload_json.encode("utf-8"))
|
||||
logger.debug("Stored large payload to storage: key=%s, size=%d", storage_key, payload_size)
|
||||
return {}, storage_key
|
||||
except Exception:
|
||||
logger.warning("Failed to store large payload, inlining instead: event_id=%s", event_id, exc_info=True)
|
||||
return payload, None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def emit(
|
||||
case: TelemetryCase,
|
||||
context: dict[str, Any],
|
||||
payload: dict[str, Any],
|
||||
trace_manager: TraceQueueManager | None = None,
|
||||
) -> None:
|
||||
"""Route a telemetry event to the correct pipeline.
|
||||
|
||||
TRACE events are enqueued into ``TraceQueueManager`` (works in both CE
|
||||
and EE). Enterprise-only traces are silently dropped when EE is
|
||||
disabled.
|
||||
|
||||
METRIC_LOG events are dispatched to the enterprise Celery queue;
|
||||
silently dropped when enterprise telemetry is unavailable.
|
||||
"""
|
||||
route = _get_case_routing().get(case)
|
||||
if route is None:
|
||||
logger.warning("Unknown telemetry case: %s, dropping event", case)
|
||||
return
|
||||
|
||||
if not route.ce_eligible and not is_enterprise_telemetry_enabled():
|
||||
logger.debug("Dropping EE-only event: case=%s (EE disabled)", case)
|
||||
return
|
||||
|
||||
if route.signal_type == SignalType.TRACE:
|
||||
_emit_trace(case, context, payload, trace_manager)
|
||||
else:
|
||||
_emit_metric_log(case, context, payload)
|
||||
|
||||
|
||||
def _emit_trace(
|
||||
case: TelemetryCase,
|
||||
context: dict[str, Any],
|
||||
payload: dict[str, Any],
|
||||
trace_manager: TraceQueueManager | None,
|
||||
) -> None:
|
||||
from core.ops.ops_trace_manager import TraceQueueManager as LocalTraceQueueManager
|
||||
from core.ops.ops_trace_manager import TraceTask
|
||||
|
||||
trace_task_name = _get_case_to_trace_task().get(case)
|
||||
if trace_task_name is None:
|
||||
logger.warning("No TraceTaskName mapping for case: %s", case)
|
||||
return
|
||||
|
||||
queue_manager = trace_manager or LocalTraceQueueManager(
|
||||
app_id=context.get("app_id"),
|
||||
user_id=context.get("user_id"),
|
||||
)
|
||||
queue_manager.add_trace_task(TraceTask(trace_task_name, user_id=context.get("user_id"), **payload))
|
||||
logger.debug("Enqueued trace task: case=%s, app_id=%s", case, context.get("app_id"))
|
||||
|
||||
|
||||
def _emit_metric_log(
|
||||
case: TelemetryCase,
|
||||
context: dict[str, Any],
|
||||
payload: dict[str, Any],
|
||||
) -> None:
|
||||
"""Build envelope and dispatch to enterprise Celery queue.
|
||||
|
||||
No-ops when the enterprise telemetry task is not importable (CE mode).
|
||||
"""
|
||||
try:
|
||||
from tasks.enterprise_telemetry_task import process_enterprise_telemetry
|
||||
except ImportError:
|
||||
logger.debug("Enterprise metric/log dispatch unavailable, dropping: case=%s", case)
|
||||
return
|
||||
|
||||
tenant_id = context.get("tenant_id") or ""
|
||||
event_id = str(uuid.uuid4())
|
||||
|
||||
payload_for_envelope, payload_ref = _handle_payload_sizing(payload, tenant_id, event_id)
|
||||
|
||||
from enterprise.telemetry.contracts import TelemetryEnvelope
|
||||
|
||||
envelope = TelemetryEnvelope(
|
||||
case=case,
|
||||
tenant_id=tenant_id,
|
||||
event_id=event_id,
|
||||
payload=payload_for_envelope,
|
||||
metadata={"payload_ref": payload_ref} if payload_ref else None,
|
||||
)
|
||||
|
||||
process_enterprise_telemetry.delay(envelope.model_dump_json())
|
||||
logger.debug(
|
||||
"Enqueued metric/log event: case=%s, tenant_id=%s, event_id=%s",
|
||||
case,
|
||||
tenant_id,
|
||||
event_id,
|
||||
)
|
||||
@ -25,12 +25,14 @@ def sign_tool_file(tool_file_id: str, extension: str, for_external: bool = True)
|
||||
return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
|
||||
|
||||
|
||||
def sign_upload_file(upload_file_id: str, extension: str) -> str:
|
||||
def sign_upload_file_preview_url(upload_file_id: str, extension: str) -> str:
|
||||
"""
|
||||
sign file to get a temporary url for plugin access
|
||||
Sign an upload file to get a temporary image preview URL.
|
||||
|
||||
The URL generated by this function is only for external preview and download,
|
||||
not for internal communication.
|
||||
"""
|
||||
# Use internal URL for plugin/tool file access in Docker environments
|
||||
base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL
|
||||
base_url = dify_config.FILES_URL
|
||||
file_preview_url = f"{base_url}/files/{upload_file_id}/image-preview"
|
||||
|
||||
timestamp = str(int(time.time()))
|
||||
|
||||
@ -246,9 +246,9 @@ class ToolManager:
|
||||
raise ToolProviderNotFoundError(f"builtin provider {provider_id} not found")
|
||||
|
||||
# check if the credential is allowed to be used
|
||||
from core.helper.credential_utils import check_credential_policy_compliance
|
||||
from core.helper.credential_utils import runtime_check_credential_policy_compliance
|
||||
|
||||
check_credential_policy_compliance(
|
||||
runtime_check_credential_policy_compliance(
|
||||
credential_id=builtin_provider.id,
|
||||
provider=provider_id,
|
||||
credential_type=PluginCredentialType.TOOL,
|
||||
@ -1045,10 +1045,9 @@ class ToolManager:
|
||||
continue
|
||||
tool_input = ToolNodeData.ToolInput.model_validate(tool_configurations.get(parameter.name, {}))
|
||||
if tool_input.type == "variable":
|
||||
variable_selector = tool_input.require_variable_selector()
|
||||
variable = variable_pool.get(variable_selector)
|
||||
variable = variable_pool.get(tool_input.value)
|
||||
if variable is None:
|
||||
raise ToolParameterError(f"Variable {variable_selector} does not exist")
|
||||
raise ToolParameterError(f"Variable {tool_input.value} does not exist")
|
||||
parameter_value = variable.value
|
||||
elif tool_input.type == "constant":
|
||||
parameter_value = tool_input.value
|
||||
|
||||
@ -1,24 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import IntEnum, StrEnum, auto
|
||||
from typing import Literal, TypeAlias
|
||||
from typing import Any, Literal, Union
|
||||
|
||||
from pydantic import BaseModel, TypeAdapter, field_validator
|
||||
from pydantic_core.core_schema import ValidationInfo
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.prompt.entities.advanced_prompt_entities import MemoryConfig
|
||||
from core.tools.entities.tool_entities import ToolSelector
|
||||
from dify_graph.entities.base_node_data import BaseNodeData
|
||||
from dify_graph.enums import BuiltinNodeTypes, NodeType
|
||||
|
||||
AgentInputConstantValue: TypeAlias = (
|
||||
list[ToolSelector] | str | int | float | bool | dict[str, object] | list[object] | None
|
||||
)
|
||||
VariableSelector: TypeAlias = list[str]
|
||||
|
||||
_AGENT_INPUT_VALUE_ADAPTER: TypeAdapter[AgentInputConstantValue] = TypeAdapter(AgentInputConstantValue)
|
||||
_AGENT_VARIABLE_SELECTOR_ADAPTER: TypeAdapter[VariableSelector] = TypeAdapter(VariableSelector)
|
||||
|
||||
|
||||
class AgentNodeData(BaseNodeData):
|
||||
type: NodeType = BuiltinNodeTypes.AGENT
|
||||
@ -32,20 +21,8 @@ class AgentNodeData(BaseNodeData):
|
||||
tool_node_version: str | None = None
|
||||
|
||||
class AgentInput(BaseModel):
|
||||
value: Union[list[str], list[ToolSelector], Any]
|
||||
type: Literal["mixed", "variable", "constant"]
|
||||
value: AgentInputConstantValue | VariableSelector
|
||||
|
||||
@field_validator("value", mode="before")
|
||||
@classmethod
|
||||
def validate_value(
|
||||
cls, value: object, validation_info: ValidationInfo
|
||||
) -> AgentInputConstantValue | VariableSelector:
|
||||
input_type = validation_info.data.get("type")
|
||||
if input_type == "variable":
|
||||
return _AGENT_VARIABLE_SELECTOR_ADAPTER.validate_python(value)
|
||||
if input_type in {"mixed", "constant"}:
|
||||
return _AGENT_INPUT_VALUE_ADAPTER.validate_python(value)
|
||||
raise ValueError(f"Unknown agent input type: {input_type}")
|
||||
|
||||
agent_parameters: dict[str, AgentInput]
|
||||
|
||||
|
||||
@ -1,17 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import TypeAlias
|
||||
from collections.abc import Sequence
|
||||
from typing import Any, cast
|
||||
|
||||
from packaging.version import Version
|
||||
from pydantic import TypeAdapter, ValidationError
|
||||
from pydantic import ValidationError
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from core.agent.entities import AgentToolEntity
|
||||
from core.agent.plugin_entities import AgentStrategyParameter
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||
from core.memory.token_buffer_memory import TokenBufferMemory
|
||||
from core.model_manager import ModelInstance, ModelManager
|
||||
from core.plugin.entities.request import InvokeCredentials
|
||||
@ -29,14 +28,6 @@ from .entities import AgentNodeData, AgentOldVersionModelFeatures, ParamsAutoGen
|
||||
from .exceptions import AgentInputTypeError, AgentVariableNotFoundError
|
||||
from .strategy_protocols import ResolvedAgentStrategy
|
||||
|
||||
JsonObject: TypeAlias = dict[str, object]
|
||||
JsonObjectList: TypeAlias = list[JsonObject]
|
||||
VariableSelector: TypeAlias = list[str]
|
||||
|
||||
_JSON_OBJECT_ADAPTER = TypeAdapter(JsonObject)
|
||||
_JSON_OBJECT_LIST_ADAPTER = TypeAdapter(JsonObjectList)
|
||||
_VARIABLE_SELECTOR_ADAPTER = TypeAdapter(VariableSelector)
|
||||
|
||||
|
||||
class AgentRuntimeSupport:
|
||||
def build_parameters(
|
||||
@ -48,12 +39,12 @@ class AgentRuntimeSupport:
|
||||
strategy: ResolvedAgentStrategy,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
invoke_from: InvokeFrom,
|
||||
invoke_from: Any,
|
||||
for_log: bool = False,
|
||||
) -> dict[str, object]:
|
||||
) -> dict[str, Any]:
|
||||
agent_parameters_dictionary = {parameter.name: parameter for parameter in agent_parameters}
|
||||
|
||||
result: dict[str, object] = {}
|
||||
result: dict[str, Any] = {}
|
||||
for parameter_name in node_data.agent_parameters:
|
||||
parameter = agent_parameters_dictionary.get(parameter_name)
|
||||
if not parameter:
|
||||
@ -63,10 +54,9 @@ class AgentRuntimeSupport:
|
||||
agent_input = node_data.agent_parameters[parameter_name]
|
||||
match agent_input.type:
|
||||
case "variable":
|
||||
variable_selector = _VARIABLE_SELECTOR_ADAPTER.validate_python(agent_input.value)
|
||||
variable = variable_pool.get(variable_selector)
|
||||
variable = variable_pool.get(agent_input.value) # type: ignore[arg-type]
|
||||
if variable is None:
|
||||
raise AgentVariableNotFoundError(str(variable_selector))
|
||||
raise AgentVariableNotFoundError(str(agent_input.value))
|
||||
parameter_value = variable.value
|
||||
case "mixed" | "constant":
|
||||
try:
|
||||
@ -89,38 +79,60 @@ class AgentRuntimeSupport:
|
||||
|
||||
value = parameter_value
|
||||
if parameter.type == "array[tools]":
|
||||
tool_payloads = _JSON_OBJECT_LIST_ADAPTER.validate_python(value)
|
||||
value = self._normalize_tool_payloads(
|
||||
strategy=strategy,
|
||||
tools=tool_payloads,
|
||||
variable_pool=variable_pool,
|
||||
)
|
||||
value = cast(list[dict[str, Any]], value)
|
||||
value = [tool for tool in value if tool.get("enabled", False)]
|
||||
value = self._filter_mcp_type_tool(strategy, value)
|
||||
for tool in value:
|
||||
if "schemas" in tool:
|
||||
tool.pop("schemas")
|
||||
parameters = tool.get("parameters", {})
|
||||
if all(isinstance(v, dict) for _, v in parameters.items()):
|
||||
params = {}
|
||||
for key, param in parameters.items():
|
||||
if param.get("auto", ParamsAutoGenerated.OPEN) in (
|
||||
ParamsAutoGenerated.CLOSE,
|
||||
0,
|
||||
):
|
||||
value_param = param.get("value", {})
|
||||
if value_param and value_param.get("type", "") == "variable":
|
||||
variable_selector = value_param.get("value")
|
||||
if not variable_selector:
|
||||
raise ValueError("Variable selector is missing for a variable-type parameter.")
|
||||
|
||||
variable = variable_pool.get(variable_selector)
|
||||
if variable is None:
|
||||
raise AgentVariableNotFoundError(str(variable_selector))
|
||||
|
||||
params[key] = variable.value
|
||||
else:
|
||||
params[key] = value_param.get("value", "") if value_param is not None else None
|
||||
else:
|
||||
params[key] = None
|
||||
parameters = params
|
||||
tool["settings"] = {k: v.get("value", None) for k, v in tool.get("settings", {}).items()}
|
||||
tool["parameters"] = parameters
|
||||
|
||||
if not for_log:
|
||||
if parameter.type == "array[tools]":
|
||||
value = _JSON_OBJECT_LIST_ADAPTER.validate_python(value)
|
||||
value = cast(list[dict[str, Any]], value)
|
||||
tool_value = []
|
||||
for tool in value:
|
||||
provider_type = self._coerce_tool_provider_type(tool.get("type"))
|
||||
setting_params = self._coerce_json_object(tool.get("settings")) or {}
|
||||
parameters = self._coerce_json_object(tool.get("parameters")) or {}
|
||||
provider_type = ToolProviderType(tool.get("type", ToolProviderType.BUILT_IN))
|
||||
setting_params = tool.get("settings", {})
|
||||
parameters = tool.get("parameters", {})
|
||||
manual_input_params = [key for key, value in parameters.items() if value is not None]
|
||||
|
||||
parameters = {**parameters, **setting_params}
|
||||
provider_id = self._coerce_optional_string(tool.get("provider_name")) or ""
|
||||
tool_name = self._coerce_optional_string(tool.get("tool_name")) or ""
|
||||
plugin_unique_identifier = self._coerce_optional_string(tool.get("plugin_unique_identifier"))
|
||||
credential_id = self._coerce_optional_string(tool.get("credential_id"))
|
||||
entity = AgentToolEntity(
|
||||
provider_id=provider_id,
|
||||
provider_id=tool.get("provider_name", ""),
|
||||
provider_type=provider_type,
|
||||
tool_name=tool_name,
|
||||
tool_name=tool.get("tool_name", ""),
|
||||
tool_parameters=parameters,
|
||||
plugin_unique_identifier=plugin_unique_identifier,
|
||||
credential_id=credential_id,
|
||||
plugin_unique_identifier=tool.get("plugin_unique_identifier", None),
|
||||
credential_id=tool.get("credential_id", None),
|
||||
)
|
||||
|
||||
extra = self._coerce_json_object(tool.get("extra")) or {}
|
||||
extra = tool.get("extra", {})
|
||||
|
||||
runtime_variable_pool: VariablePool | None = None
|
||||
if node_data.version != "1" or node_data.tool_node_version is not None:
|
||||
@ -133,9 +145,8 @@ class AgentRuntimeSupport:
|
||||
runtime_variable_pool,
|
||||
)
|
||||
if tool_runtime.entity.description:
|
||||
description_override = self._coerce_optional_string(extra.get("description"))
|
||||
tool_runtime.entity.description.llm = (
|
||||
description_override or tool_runtime.entity.description.llm
|
||||
extra.get("description", "") or tool_runtime.entity.description.llm
|
||||
)
|
||||
for tool_runtime_params in tool_runtime.entity.parameters:
|
||||
tool_runtime_params.form = (
|
||||
@ -156,13 +167,13 @@ class AgentRuntimeSupport:
|
||||
{
|
||||
**tool_runtime.entity.model_dump(mode="json"),
|
||||
"runtime_parameters": runtime_parameters,
|
||||
"credential_id": credential_id,
|
||||
"credential_id": tool.get("credential_id", None),
|
||||
"provider_type": provider_type.value,
|
||||
}
|
||||
)
|
||||
value = tool_value
|
||||
if parameter.type == AgentStrategyParameter.AgentStrategyParameterType.MODEL_SELECTOR:
|
||||
value = _JSON_OBJECT_ADAPTER.validate_python(value)
|
||||
value = cast(dict[str, Any], value)
|
||||
model_instance, model_schema = self.fetch_model(tenant_id=tenant_id, value=value)
|
||||
history_prompt_messages = []
|
||||
if node_data.memory:
|
||||
@ -188,27 +199,17 @@ class AgentRuntimeSupport:
|
||||
|
||||
return result
|
||||
|
||||
def build_credentials(self, *, parameters: Mapping[str, object]) -> InvokeCredentials:
|
||||
def build_credentials(self, *, parameters: dict[str, Any]) -> InvokeCredentials:
|
||||
credentials = InvokeCredentials()
|
||||
credentials.tool_credentials = {}
|
||||
tools = parameters.get("tools")
|
||||
if not isinstance(tools, list):
|
||||
return credentials
|
||||
|
||||
for raw_tool in tools:
|
||||
tool = self._coerce_json_object(raw_tool)
|
||||
if tool is None:
|
||||
continue
|
||||
for tool in parameters.get("tools", []):
|
||||
if not tool.get("credential_id"):
|
||||
continue
|
||||
try:
|
||||
identity = ToolIdentity.model_validate(tool.get("identity", {}))
|
||||
except ValidationError:
|
||||
continue
|
||||
credential_id = self._coerce_optional_string(tool.get("credential_id"))
|
||||
if credential_id is None:
|
||||
continue
|
||||
credentials.tool_credentials[identity.provider] = credential_id
|
||||
credentials.tool_credentials[identity.provider] = tool.get("credential_id", None)
|
||||
return credentials
|
||||
|
||||
def fetch_memory(
|
||||
@ -231,14 +232,14 @@ class AgentRuntimeSupport:
|
||||
|
||||
return TokenBufferMemory(conversation=conversation, model_instance=model_instance)
|
||||
|
||||
def fetch_model(self, *, tenant_id: str, value: Mapping[str, object]) -> tuple[ModelInstance, AIModelEntity | None]:
|
||||
def fetch_model(self, *, tenant_id: str, value: dict[str, Any]) -> tuple[ModelInstance, AIModelEntity | None]:
|
||||
provider_manager = ProviderManager()
|
||||
provider_model_bundle = provider_manager.get_provider_model_bundle(
|
||||
tenant_id=tenant_id,
|
||||
provider=str(value.get("provider", "")),
|
||||
provider=value.get("provider", ""),
|
||||
model_type=ModelType.LLM,
|
||||
)
|
||||
model_name = str(value.get("model", ""))
|
||||
model_name = value.get("model", "")
|
||||
model_credentials = provider_model_bundle.configuration.get_current_credentials(
|
||||
model_type=ModelType.LLM,
|
||||
model=model_name,
|
||||
@ -248,7 +249,7 @@ class AgentRuntimeSupport:
|
||||
model_instance = ModelManager().get_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
provider=provider_name,
|
||||
model_type=ModelType(str(value.get("model_type", ""))),
|
||||
model_type=ModelType(value.get("model_type", "")),
|
||||
model=model_name,
|
||||
)
|
||||
model_schema = model_type_instance.get_model_schema(model_name, model_credentials)
|
||||
@ -267,88 +268,9 @@ class AgentRuntimeSupport:
|
||||
@staticmethod
|
||||
def _filter_mcp_type_tool(
|
||||
strategy: ResolvedAgentStrategy,
|
||||
tools: JsonObjectList,
|
||||
) -> JsonObjectList:
|
||||
tools: list[dict[str, Any]],
|
||||
) -> list[dict[str, Any]]:
|
||||
meta_version = strategy.meta_version
|
||||
if meta_version and Version(meta_version) > Version("0.0.1"):
|
||||
return tools
|
||||
return [tool for tool in tools if tool.get("type") != ToolProviderType.MCP]
|
||||
|
||||
def _normalize_tool_payloads(
|
||||
self,
|
||||
*,
|
||||
strategy: ResolvedAgentStrategy,
|
||||
tools: JsonObjectList,
|
||||
variable_pool: VariablePool,
|
||||
) -> JsonObjectList:
|
||||
enabled_tools = [dict(tool) for tool in tools if bool(tool.get("enabled", False))]
|
||||
normalized_tools = self._filter_mcp_type_tool(strategy, enabled_tools)
|
||||
for tool in normalized_tools:
|
||||
tool.pop("schemas", None)
|
||||
tool["parameters"] = self._resolve_tool_parameters(tool=tool, variable_pool=variable_pool)
|
||||
tool["settings"] = self._resolve_tool_settings(tool)
|
||||
return normalized_tools
|
||||
|
||||
def _resolve_tool_parameters(self, *, tool: Mapping[str, object], variable_pool: VariablePool) -> JsonObject:
|
||||
parameter_configs = self._coerce_named_json_objects(tool.get("parameters"))
|
||||
if parameter_configs is None:
|
||||
raw_parameters = self._coerce_json_object(tool.get("parameters"))
|
||||
return raw_parameters or {}
|
||||
|
||||
resolved_parameters: JsonObject = {}
|
||||
for key, parameter_config in parameter_configs.items():
|
||||
if parameter_config.get("auto", ParamsAutoGenerated.OPEN) in (ParamsAutoGenerated.CLOSE, 0):
|
||||
value_param = self._coerce_json_object(parameter_config.get("value"))
|
||||
if value_param and value_param.get("type") == "variable":
|
||||
variable_selector = _VARIABLE_SELECTOR_ADAPTER.validate_python(value_param.get("value"))
|
||||
variable = variable_pool.get(variable_selector)
|
||||
if variable is None:
|
||||
raise AgentVariableNotFoundError(str(variable_selector))
|
||||
resolved_parameters[key] = variable.value
|
||||
else:
|
||||
resolved_parameters[key] = value_param.get("value", "") if value_param is not None else None
|
||||
else:
|
||||
resolved_parameters[key] = None
|
||||
|
||||
return resolved_parameters
|
||||
|
||||
@staticmethod
|
||||
def _resolve_tool_settings(tool: Mapping[str, object]) -> JsonObject:
|
||||
settings = AgentRuntimeSupport._coerce_named_json_objects(tool.get("settings"))
|
||||
if settings is None:
|
||||
return {}
|
||||
return {key: setting.get("value") for key, setting in settings.items()}
|
||||
|
||||
@staticmethod
|
||||
def _coerce_json_object(value: object) -> JsonObject | None:
|
||||
try:
|
||||
return _JSON_OBJECT_ADAPTER.validate_python(value)
|
||||
except ValidationError:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _coerce_optional_string(value: object) -> str | None:
|
||||
return value if isinstance(value, str) else None
|
||||
|
||||
@staticmethod
|
||||
def _coerce_tool_provider_type(value: object) -> ToolProviderType:
|
||||
if isinstance(value, ToolProviderType):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
return ToolProviderType(value)
|
||||
return ToolProviderType.BUILT_IN
|
||||
|
||||
@classmethod
|
||||
def _coerce_named_json_objects(cls, value: object) -> dict[str, JsonObject] | None:
|
||||
if not isinstance(value, dict):
|
||||
return None
|
||||
|
||||
coerced: dict[str, JsonObject] = {}
|
||||
for key, item in value.items():
|
||||
if not isinstance(key, str):
|
||||
return None
|
||||
json_object = cls._coerce_json_object(item)
|
||||
if json_object is None:
|
||||
return None
|
||||
coerced[key] = json_object
|
||||
return coerced
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import logging
|
||||
import time
|
||||
from collections.abc import Generator, Mapping, Sequence
|
||||
from typing import Any, TypeAlias, cast
|
||||
from typing import Any, cast
|
||||
|
||||
from configs import dify_config
|
||||
from core.app.apps.exc import GenerateTaskStoppedError
|
||||
@ -32,13 +32,6 @@ from models.workflow import Workflow
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SpecialValueScalar: TypeAlias = str | int | float | bool | None
|
||||
SpecialValue: TypeAlias = SpecialValueScalar | File | Mapping[str, "SpecialValue"] | list["SpecialValue"]
|
||||
SerializedSpecialValue: TypeAlias = (
|
||||
SpecialValueScalar | dict[str, "SerializedSpecialValue"] | list["SerializedSpecialValue"]
|
||||
)
|
||||
SingleNodeGraphConfig: TypeAlias = dict[str, list[dict[str, object]]]
|
||||
|
||||
|
||||
class _WorkflowChildEngineBuilder:
|
||||
@staticmethod
|
||||
@ -283,10 +276,10 @@ class WorkflowEntry:
|
||||
@staticmethod
|
||||
def _create_single_node_graph(
|
||||
node_id: str,
|
||||
node_data: Mapping[str, object],
|
||||
node_data: dict[str, Any],
|
||||
node_width: int = 114,
|
||||
node_height: int = 514,
|
||||
) -> SingleNodeGraphConfig:
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Create a minimal graph structure for testing a single node in isolation.
|
||||
|
||||
@ -296,14 +289,14 @@ class WorkflowEntry:
|
||||
:param node_height: height for UI layout (default: 100)
|
||||
:return: graph dictionary with start node and target node
|
||||
"""
|
||||
node_config: dict[str, object] = {
|
||||
node_config = {
|
||||
"id": node_id,
|
||||
"width": node_width,
|
||||
"height": node_height,
|
||||
"type": "custom",
|
||||
"data": dict(node_data),
|
||||
"data": node_data,
|
||||
}
|
||||
start_node_config: dict[str, object] = {
|
||||
start_node_config = {
|
||||
"id": "start",
|
||||
"width": node_width,
|
||||
"height": node_height,
|
||||
@ -328,12 +321,7 @@ class WorkflowEntry:
|
||||
|
||||
@classmethod
|
||||
def run_free_node(
|
||||
cls,
|
||||
node_data: Mapping[str, object],
|
||||
node_id: str,
|
||||
tenant_id: str,
|
||||
user_id: str,
|
||||
user_inputs: Mapping[str, object],
|
||||
cls, node_data: dict[str, Any], node_id: str, tenant_id: str, user_id: str, user_inputs: dict[str, Any]
|
||||
) -> tuple[Node, Generator[GraphNodeEventBase, None, None]]:
|
||||
"""
|
||||
Run free node
|
||||
@ -351,8 +339,6 @@ class WorkflowEntry:
|
||||
graph_dict = cls._create_single_node_graph(node_id, node_data)
|
||||
|
||||
node_type = node_data.get("type", "")
|
||||
if not isinstance(node_type, str):
|
||||
raise ValueError("Node type must be a string")
|
||||
if node_type not in {BuiltinNodeTypes.PARAMETER_EXTRACTOR, BuiltinNodeTypes.QUESTION_CLASSIFIER}:
|
||||
raise ValueError(f"Node type {node_type} not supported")
|
||||
|
||||
@ -383,7 +369,7 @@ class WorkflowEntry:
|
||||
graph_runtime_state = GraphRuntimeState(variable_pool=variable_pool, start_at=time.perf_counter())
|
||||
|
||||
# init workflow run state
|
||||
node_config = NodeConfigDictAdapter.validate_python({"id": node_id, "data": dict(node_data)})
|
||||
node_config = NodeConfigDictAdapter.validate_python({"id": node_id, "data": node_data})
|
||||
node_factory = DifyNodeFactory(
|
||||
graph_init_params=graph_init_params,
|
||||
graph_runtime_state=graph_runtime_state,
|
||||
@ -419,34 +405,30 @@ class WorkflowEntry:
|
||||
raise WorkflowNodeRunFailedError(node=node, err_msg=str(e))
|
||||
|
||||
@staticmethod
|
||||
def handle_special_values(value: Mapping[str, SpecialValue] | None) -> dict[str, SerializedSpecialValue] | None:
|
||||
def handle_special_values(value: Mapping[str, Any] | None) -> Mapping[str, Any] | None:
|
||||
# NOTE(QuantumGhost): Avoid using this function in new code.
|
||||
# Keep values structured as long as possible and only convert to dict
|
||||
# immediately before serialization (e.g., JSON serialization) to maintain
|
||||
# data integrity and type information.
|
||||
result = WorkflowEntry._handle_special_values(value)
|
||||
if result is None:
|
||||
return None
|
||||
if isinstance(result, dict):
|
||||
return result
|
||||
raise TypeError("handle_special_values expects a mapping input")
|
||||
return result if isinstance(result, Mapping) or result is None else dict(result)
|
||||
|
||||
@staticmethod
|
||||
def _handle_special_values(value: SpecialValue) -> SerializedSpecialValue:
|
||||
def _handle_special_values(value: Any):
|
||||
if value is None:
|
||||
return value
|
||||
if isinstance(value, Mapping):
|
||||
res: dict[str, SerializedSpecialValue] = {}
|
||||
if isinstance(value, dict):
|
||||
res = {}
|
||||
for k, v in value.items():
|
||||
res[k] = WorkflowEntry._handle_special_values(v)
|
||||
return res
|
||||
if isinstance(value, list):
|
||||
res_list: list[SerializedSpecialValue] = []
|
||||
res_list = []
|
||||
for item in value:
|
||||
res_list.append(WorkflowEntry._handle_special_values(item))
|
||||
return res_list
|
||||
if isinstance(value, File):
|
||||
return dict(value.to_dict())
|
||||
return value.to_dict()
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
|
||||
@ -112,8 +112,6 @@ def _get_encoded_string(f: File, /) -> str:
|
||||
data = _download_file_content(f.storage_key)
|
||||
case FileTransferMethod.DATASOURCE_FILE:
|
||||
data = _download_file_content(f.storage_key)
|
||||
case _:
|
||||
raise ValueError(f"Unsupported transfer method: {f.transfer_method}")
|
||||
|
||||
return base64.b64encode(data).decode("utf-8")
|
||||
|
||||
|
||||
@ -10,7 +10,6 @@ from pydantic import TypeAdapter
|
||||
from dify_graph.entities.graph_config import NodeConfigDict
|
||||
from dify_graph.enums import ErrorStrategy, NodeExecutionType, NodeState
|
||||
from dify_graph.nodes.base.node import Node
|
||||
from libs.typing import is_str
|
||||
|
||||
from .edge import Edge
|
||||
from .validation import get_graph_validator
|
||||
@ -102,7 +101,7 @@ class Graph:
|
||||
source = edge_config.get("source")
|
||||
target = edge_config.get("target")
|
||||
|
||||
if not is_str(source) or not is_str(target):
|
||||
if not isinstance(source, str) or not isinstance(target, str):
|
||||
continue
|
||||
|
||||
# Create edge
|
||||
@ -110,7 +109,7 @@ class Graph:
|
||||
edge_counter += 1
|
||||
|
||||
source_handle = edge_config.get("sourceHandle", "source")
|
||||
if not is_str(source_handle):
|
||||
if not isinstance(source_handle, str):
|
||||
continue
|
||||
|
||||
edge = Edge(
|
||||
|
||||
@ -133,8 +133,6 @@ class ExecutionLimitsLayer(GraphEngineLayer):
|
||||
elif limit_type == LimitType.TIME_LIMIT:
|
||||
elapsed_time = time.time() - self.start_time if self.start_time else 0
|
||||
reason = f"Maximum execution time exceeded: {elapsed_time:.2f}s > {self.max_time}s"
|
||||
else:
|
||||
return
|
||||
|
||||
self.logger.warning("Execution limit exceeded: %s", reason)
|
||||
|
||||
|
||||
@ -336,7 +336,12 @@ class Node(Generic[NodeDataT]):
|
||||
|
||||
def _restore_execution_id_from_runtime_state(self) -> str | None:
|
||||
graph_execution = self.graph_runtime_state.graph_execution
|
||||
node_executions = graph_execution.node_executions
|
||||
try:
|
||||
node_executions = graph_execution.node_executions
|
||||
except AttributeError:
|
||||
return None
|
||||
if not isinstance(node_executions, dict):
|
||||
return None
|
||||
node_execution = node_executions.get(self._node_id)
|
||||
if node_execution is None:
|
||||
return None
|
||||
@ -390,7 +395,8 @@ class Node(Generic[NodeDataT]):
|
||||
if isinstance(event, NodeEventBase): # pyright: ignore[reportUnnecessaryIsInstance]
|
||||
yield self._dispatch(event)
|
||||
elif isinstance(event, GraphNodeEventBase) and not event.in_iteration_id and not event.in_loop_id: # pyright: ignore[reportUnnecessaryIsInstance]
|
||||
yield event.model_copy(update={"id": self.execution_id})
|
||||
event.id = self.execution_id
|
||||
yield event
|
||||
else:
|
||||
yield event
|
||||
except Exception as e:
|
||||
|
||||
@ -443,10 +443,7 @@ def _extract_text_from_docx(file_content: bytes) -> str:
|
||||
# Keep track of paragraph and table positions
|
||||
content_items: list[tuple[int, str, Table | Paragraph]] = []
|
||||
|
||||
doc_body = getattr(doc.element, "body", None)
|
||||
if doc_body is None:
|
||||
raise TextExtractionError("DOCX body not found")
|
||||
it = iter(doc_body)
|
||||
it = iter(doc.element.body)
|
||||
part = next(it, None)
|
||||
i = 0
|
||||
while part is not None:
|
||||
|
||||
@ -1,8 +1,7 @@
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import Literal, NotRequired
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfig
|
||||
from dify_graph.entities.base_node_data import BaseNodeData
|
||||
@ -11,17 +10,11 @@ from dify_graph.model_runtime.entities import ImagePromptMessageContent, LLMMode
|
||||
from dify_graph.nodes.base.entities import VariableSelector
|
||||
|
||||
|
||||
class StructuredOutputConfig(TypedDict):
|
||||
schema: Mapping[str, object]
|
||||
name: NotRequired[str]
|
||||
description: NotRequired[str]
|
||||
|
||||
|
||||
class ModelConfig(BaseModel):
|
||||
provider: str
|
||||
name: str
|
||||
mode: LLMMode
|
||||
completion_params: dict[str, object] = Field(default_factory=dict)
|
||||
completion_params: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class ContextConfig(BaseModel):
|
||||
@ -40,7 +33,7 @@ class VisionConfig(BaseModel):
|
||||
|
||||
@field_validator("configs", mode="before")
|
||||
@classmethod
|
||||
def convert_none_configs(cls, v: object):
|
||||
def convert_none_configs(cls, v: Any):
|
||||
if v is None:
|
||||
return VisionConfigOptions()
|
||||
return v
|
||||
@ -51,7 +44,7 @@ class PromptConfig(BaseModel):
|
||||
|
||||
@field_validator("jinja2_variables", mode="before")
|
||||
@classmethod
|
||||
def convert_none_jinja2_variables(cls, v: object):
|
||||
def convert_none_jinja2_variables(cls, v: Any):
|
||||
if v is None:
|
||||
return []
|
||||
return v
|
||||
@ -74,7 +67,7 @@ class LLMNodeData(BaseNodeData):
|
||||
memory: MemoryConfig | None = None
|
||||
context: ContextConfig
|
||||
vision: VisionConfig = Field(default_factory=VisionConfig)
|
||||
structured_output: StructuredOutputConfig | None = None
|
||||
structured_output: Mapping[str, Any] | None = None
|
||||
# We used 'structured_output_enabled' in the past, but it's not a good name.
|
||||
structured_output_switch_on: bool = Field(False, alias="structured_output_enabled")
|
||||
reasoning_format: Literal["separated", "tagged"] = Field(
|
||||
@ -97,30 +90,11 @@ class LLMNodeData(BaseNodeData):
|
||||
|
||||
@field_validator("prompt_config", mode="before")
|
||||
@classmethod
|
||||
def convert_none_prompt_config(cls, v: object):
|
||||
def convert_none_prompt_config(cls, v: Any):
|
||||
if v is None:
|
||||
return PromptConfig()
|
||||
return v
|
||||
|
||||
@field_validator("structured_output", mode="before")
|
||||
@classmethod
|
||||
def convert_legacy_structured_output(cls, v: object) -> StructuredOutputConfig | None | object:
|
||||
if not isinstance(v, Mapping):
|
||||
return v
|
||||
|
||||
schema = v.get("schema")
|
||||
if schema is None:
|
||||
return None
|
||||
|
||||
normalized: StructuredOutputConfig = {"schema": schema}
|
||||
name = v.get("name")
|
||||
description = v.get("description")
|
||||
if isinstance(name, str):
|
||||
normalized["name"] = name
|
||||
if isinstance(description, str):
|
||||
normalized["description"] = description
|
||||
return normalized
|
||||
|
||||
@property
|
||||
def structured_output_enabled(self) -> bool:
|
||||
return self.structured_output_switch_on and self.structured_output is not None
|
||||
|
||||
@ -9,7 +9,6 @@ import time
|
||||
from collections.abc import Generator, Mapping, Sequence
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
|
||||
from pydantic import TypeAdapter
|
||||
from sqlalchemy import select
|
||||
|
||||
from core.llm_generator.output_parser.errors import OutputParserError
|
||||
@ -17,7 +16,7 @@ from core.llm_generator.output_parser.structured_output import invoke_llm_with_s
|
||||
from core.model_manager import ModelInstance
|
||||
from core.prompt.entities.advanced_prompt_entities import CompletionModelPromptTemplate, MemoryConfig
|
||||
from core.prompt.utils.prompt_message_util import PromptMessageUtil
|
||||
from core.tools.signature import sign_upload_file
|
||||
from core.tools.signature import sign_upload_file_preview_url
|
||||
from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID
|
||||
from dify_graph.entities import GraphInitParams
|
||||
from dify_graph.entities.graph_config import NodeConfigDict
|
||||
@ -75,7 +74,6 @@ from .entities import (
|
||||
LLMNodeChatModelMessage,
|
||||
LLMNodeCompletionModelPromptTemplate,
|
||||
LLMNodeData,
|
||||
StructuredOutputConfig,
|
||||
)
|
||||
from .exc import (
|
||||
InvalidContextStructureError,
|
||||
@ -90,7 +88,6 @@ if TYPE_CHECKING:
|
||||
from dify_graph.runtime import GraphRuntimeState
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_JSON_OBJECT_ADAPTER = TypeAdapter(dict[str, object])
|
||||
|
||||
|
||||
class LLMNode(Node[LLMNodeData]):
|
||||
@ -361,7 +358,7 @@ class LLMNode(Node[LLMNodeData]):
|
||||
stop: Sequence[str] | None = None,
|
||||
user_id: str,
|
||||
structured_output_enabled: bool,
|
||||
structured_output: StructuredOutputConfig | None = None,
|
||||
structured_output: Mapping[str, Any] | None = None,
|
||||
file_saver: LLMFileSaver,
|
||||
file_outputs: list[File],
|
||||
node_id: str,
|
||||
@ -374,10 +371,8 @@ class LLMNode(Node[LLMNodeData]):
|
||||
model_schema = llm_utils.fetch_model_schema(model_instance=model_instance)
|
||||
|
||||
if structured_output_enabled:
|
||||
if structured_output is None:
|
||||
raise LLMNodeError("Please provide a valid structured output schema")
|
||||
output_schema = LLMNode.fetch_structured_output_schema(
|
||||
structured_output=structured_output,
|
||||
structured_output=structured_output or {},
|
||||
)
|
||||
request_start_time = time.perf_counter()
|
||||
|
||||
@ -717,7 +712,7 @@ class LLMNode(Node[LLMNodeData]):
|
||||
related_id=upload_file.id,
|
||||
size=upload_file.size,
|
||||
storage_key=upload_file.key,
|
||||
url=sign_upload_file(upload_file.id, upload_file.extension),
|
||||
url=sign_upload_file_preview_url(upload_file.id, upload_file.extension),
|
||||
)
|
||||
context_files.append(attachment_info)
|
||||
yield RunRetrieverResourceEvent(
|
||||
@ -929,12 +924,6 @@ class LLMNode(Node[LLMNodeData]):
|
||||
# Extract clean text and reasoning from <think> tags
|
||||
clean_text, reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)
|
||||
|
||||
structured_output = (
|
||||
dict(invoke_result.structured_output)
|
||||
if isinstance(invoke_result, LLMResultWithStructuredOutput) and invoke_result.structured_output is not None
|
||||
else None
|
||||
)
|
||||
|
||||
event = ModelInvokeCompletedEvent(
|
||||
# Use clean_text for separated mode, full_text for tagged mode
|
||||
text=clean_text if reasoning_format == "separated" else full_text,
|
||||
@ -943,7 +932,7 @@ class LLMNode(Node[LLMNodeData]):
|
||||
# Reasoning content for workflow variables and downstream nodes
|
||||
reasoning_content=reasoning_content,
|
||||
# Pass structured output if enabled
|
||||
structured_output=structured_output,
|
||||
structured_output=getattr(invoke_result, "structured_output", None),
|
||||
)
|
||||
if request_latency is not None:
|
||||
event.usage.latency = round(request_latency, 3)
|
||||
@ -977,18 +966,27 @@ class LLMNode(Node[LLMNodeData]):
|
||||
@staticmethod
|
||||
def fetch_structured_output_schema(
|
||||
*,
|
||||
structured_output: StructuredOutputConfig,
|
||||
) -> dict[str, object]:
|
||||
structured_output: Mapping[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Fetch the structured output schema from the node data.
|
||||
|
||||
Returns:
|
||||
dict[str, object]: The structured output schema
|
||||
dict[str, Any]: The structured output schema
|
||||
"""
|
||||
schema = structured_output.get("schema")
|
||||
if not schema:
|
||||
if not structured_output:
|
||||
raise LLMNodeError("Please provide a valid structured output schema")
|
||||
return _JSON_OBJECT_ADAPTER.validate_python(schema)
|
||||
structured_output_schema = json.dumps(structured_output.get("schema", {}), ensure_ascii=False)
|
||||
if not structured_output_schema:
|
||||
raise LLMNodeError("Please provide a valid structured output schema")
|
||||
|
||||
try:
|
||||
schema = json.loads(structured_output_schema)
|
||||
if not isinstance(schema, dict):
|
||||
raise LLMNodeError("structured_output_schema must be a JSON object")
|
||||
return schema
|
||||
except json.JSONDecodeError:
|
||||
raise LLMNodeError("structured_output_schema is not valid JSON format")
|
||||
|
||||
@staticmethod
|
||||
def _save_multimodal_output_and_convert_result_to_markdown(
|
||||
|
||||
@ -1,10 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import StrEnum
|
||||
from typing import Annotated, Any, Literal, TypeAlias, cast
|
||||
from typing import Annotated, Any, Literal
|
||||
|
||||
from pydantic import AfterValidator, BaseModel, Field, TypeAdapter, field_validator
|
||||
from pydantic_core.core_schema import ValidationInfo
|
||||
from pydantic import AfterValidator, BaseModel, Field, field_validator
|
||||
|
||||
from dify_graph.entities.base_node_data import BaseNodeData
|
||||
from dify_graph.enums import BuiltinNodeTypes, NodeType
|
||||
@ -12,12 +9,6 @@ from dify_graph.nodes.base import BaseLoopNodeData, BaseLoopState
|
||||
from dify_graph.utils.condition.entities import Condition
|
||||
from dify_graph.variables.types import SegmentType
|
||||
|
||||
LoopValue: TypeAlias = str | int | float | bool | None | dict[str, Any] | list[Any]
|
||||
LoopValueMapping: TypeAlias = dict[str, LoopValue]
|
||||
VariableSelector: TypeAlias = list[str]
|
||||
|
||||
_VARIABLE_SELECTOR_ADAPTER: TypeAdapter[VariableSelector] = TypeAdapter(VariableSelector)
|
||||
|
||||
_VALID_VAR_TYPE = frozenset(
|
||||
[
|
||||
SegmentType.STRING,
|
||||
@ -38,36 +29,6 @@ def _is_valid_var_type(seg_type: SegmentType) -> SegmentType:
|
||||
return seg_type
|
||||
|
||||
|
||||
def _validate_loop_value(value: object) -> LoopValue:
|
||||
if value is None or isinstance(value, (str, int, float, bool)):
|
||||
return cast(LoopValue, value)
|
||||
|
||||
if isinstance(value, list):
|
||||
return [_validate_loop_value(item) for item in value]
|
||||
|
||||
if isinstance(value, dict):
|
||||
normalized: dict[str, LoopValue] = {}
|
||||
for key, item in value.items():
|
||||
if not isinstance(key, str):
|
||||
raise TypeError("Loop values only support string object keys")
|
||||
normalized[key] = _validate_loop_value(item)
|
||||
return normalized
|
||||
|
||||
raise TypeError("Loop values must be JSON-like primitives, arrays, or objects")
|
||||
|
||||
|
||||
def _validate_loop_value_mapping(value: object) -> LoopValueMapping:
|
||||
if not isinstance(value, dict):
|
||||
raise TypeError("Loop outputs must be an object")
|
||||
|
||||
normalized: LoopValueMapping = {}
|
||||
for key, item in value.items():
|
||||
if not isinstance(key, str):
|
||||
raise TypeError("Loop output keys must be strings")
|
||||
normalized[key] = _validate_loop_value(item)
|
||||
return normalized
|
||||
|
||||
|
||||
class LoopVariableData(BaseModel):
|
||||
"""
|
||||
Loop Variable Data.
|
||||
@ -76,29 +37,7 @@ class LoopVariableData(BaseModel):
|
||||
label: str
|
||||
var_type: Annotated[SegmentType, AfterValidator(_is_valid_var_type)]
|
||||
value_type: Literal["variable", "constant"]
|
||||
value: LoopValue | VariableSelector | None = None
|
||||
|
||||
@field_validator("value", mode="before")
|
||||
@classmethod
|
||||
def validate_value(cls, value: object, validation_info: ValidationInfo) -> LoopValue | VariableSelector | None:
|
||||
value_type = validation_info.data.get("value_type")
|
||||
if value_type == "variable":
|
||||
if value is None:
|
||||
raise ValueError("Variable loop inputs require a selector")
|
||||
return _VARIABLE_SELECTOR_ADAPTER.validate_python(value)
|
||||
if value_type == "constant":
|
||||
return _validate_loop_value(value)
|
||||
raise ValueError(f"Unknown loop variable value type: {value_type}")
|
||||
|
||||
def require_variable_selector(self) -> VariableSelector:
|
||||
if self.value_type != "variable":
|
||||
raise ValueError(f"Expected variable loop input, got {self.value_type}")
|
||||
return _VARIABLE_SELECTOR_ADAPTER.validate_python(self.value)
|
||||
|
||||
def require_constant_value(self) -> LoopValue:
|
||||
if self.value_type != "constant":
|
||||
raise ValueError(f"Expected constant loop input, got {self.value_type}")
|
||||
return _validate_loop_value(self.value)
|
||||
value: Any | list[str] | None = None
|
||||
|
||||
|
||||
class LoopNodeData(BaseLoopNodeData):
|
||||
@ -107,14 +46,14 @@ class LoopNodeData(BaseLoopNodeData):
|
||||
break_conditions: list[Condition] # Conditions to break the loop
|
||||
logical_operator: Literal["and", "or"]
|
||||
loop_variables: list[LoopVariableData] | None = Field(default_factory=list[LoopVariableData])
|
||||
outputs: LoopValueMapping = Field(default_factory=dict)
|
||||
outputs: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
@field_validator("outputs", mode="before")
|
||||
@classmethod
|
||||
def validate_outputs(cls, value: object) -> LoopValueMapping:
|
||||
if value is None:
|
||||
def validate_outputs(cls, v):
|
||||
if v is None:
|
||||
return {}
|
||||
return _validate_loop_value_mapping(value)
|
||||
return v
|
||||
|
||||
|
||||
class LoopStartNodeData(BaseNodeData):
|
||||
@ -138,8 +77,8 @@ class LoopState(BaseLoopState):
|
||||
Loop State.
|
||||
"""
|
||||
|
||||
outputs: list[LoopValue] = Field(default_factory=list)
|
||||
current_output: LoopValue | None = None
|
||||
outputs: list[Any] = Field(default_factory=list)
|
||||
current_output: Any = None
|
||||
|
||||
class MetaData(BaseLoopState.MetaData):
|
||||
"""
|
||||
@ -148,7 +87,7 @@ class LoopState(BaseLoopState):
|
||||
|
||||
loop_length: int
|
||||
|
||||
def get_last_output(self) -> LoopValue | None:
|
||||
def get_last_output(self) -> Any:
|
||||
"""
|
||||
Get last output.
|
||||
"""
|
||||
@ -156,7 +95,7 @@ class LoopState(BaseLoopState):
|
||||
return self.outputs[-1]
|
||||
return None
|
||||
|
||||
def get_current_output(self) -> LoopValue | None:
|
||||
def get_current_output(self) -> Any:
|
||||
"""
|
||||
Get current output.
|
||||
"""
|
||||
|
||||
@ -3,7 +3,7 @@ import json
|
||||
import logging
|
||||
from collections.abc import Callable, Generator, Mapping, Sequence
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Literal, cast
|
||||
from typing import TYPE_CHECKING, Any, Literal, cast
|
||||
|
||||
from dify_graph.entities.graph_config import NodeConfigDictAdapter
|
||||
from dify_graph.enums import (
|
||||
@ -29,7 +29,7 @@ from dify_graph.node_events import (
|
||||
)
|
||||
from dify_graph.nodes.base import LLMUsageTrackingMixin
|
||||
from dify_graph.nodes.base.node import Node
|
||||
from dify_graph.nodes.loop.entities import LoopCompletedReason, LoopNodeData, LoopValue, LoopVariableData
|
||||
from dify_graph.nodes.loop.entities import LoopCompletedReason, LoopNodeData, LoopVariableData
|
||||
from dify_graph.utils.condition.processor import ConditionProcessor
|
||||
from dify_graph.variables import Segment, SegmentType
|
||||
from factories.variable_factory import TypeMismatchError, build_segment_with_type, segment_to_variable
|
||||
@ -60,7 +60,7 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
|
||||
break_conditions = self.node_data.break_conditions
|
||||
logical_operator = self.node_data.logical_operator
|
||||
|
||||
inputs: dict[str, object] = {"loop_count": loop_count}
|
||||
inputs = {"loop_count": loop_count}
|
||||
|
||||
if not self.node_data.start_node_id:
|
||||
raise ValueError(f"field start_node_id in loop {self._node_id} not found")
|
||||
@ -68,14 +68,12 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
|
||||
root_node_id = self.node_data.start_node_id
|
||||
|
||||
# Initialize loop variables in the original variable pool
|
||||
loop_variable_selectors: dict[str, list[str]] = {}
|
||||
loop_variable_selectors = {}
|
||||
if self.node_data.loop_variables:
|
||||
value_processor: dict[Literal["constant", "variable"], Callable[[LoopVariableData], Segment | None]] = {
|
||||
"constant": lambda var: self._get_segment_for_constant(var.var_type, var.require_constant_value()),
|
||||
"constant": lambda var: self._get_segment_for_constant(var.var_type, var.value),
|
||||
"variable": lambda var: (
|
||||
self.graph_runtime_state.variable_pool.get(var.require_variable_selector())
|
||||
if var.value is not None
|
||||
else None
|
||||
self.graph_runtime_state.variable_pool.get(var.value) if isinstance(var.value, list) else None
|
||||
),
|
||||
}
|
||||
for loop_variable in self.node_data.loop_variables:
|
||||
@ -97,7 +95,7 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
|
||||
condition_processor = ConditionProcessor()
|
||||
|
||||
loop_duration_map: dict[str, float] = {}
|
||||
single_loop_variable_map: dict[str, dict[str, LoopValue]] = {} # single loop variable output
|
||||
single_loop_variable_map: dict[str, dict[str, Any]] = {} # single loop variable output
|
||||
loop_usage = LLMUsage.empty_usage()
|
||||
loop_node_ids = self._extract_loop_node_ids_from_config(self.graph_config, self._node_id)
|
||||
|
||||
@ -148,7 +146,7 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
|
||||
loop_usage = self._merge_usage(loop_usage, graph_engine.graph_runtime_state.llm_usage)
|
||||
|
||||
# Collect loop variable values after iteration
|
||||
single_loop_variable: dict[str, LoopValue] = {}
|
||||
single_loop_variable = {}
|
||||
for key, selector in loop_variable_selectors.items():
|
||||
segment = self.graph_runtime_state.variable_pool.get(selector)
|
||||
single_loop_variable[key] = segment.value if segment else None
|
||||
@ -299,29 +297,20 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
|
||||
def _extract_variable_selector_to_variable_mapping(
|
||||
cls,
|
||||
*,
|
||||
graph_config: Mapping[str, object],
|
||||
graph_config: Mapping[str, Any],
|
||||
node_id: str,
|
||||
node_data: LoopNodeData,
|
||||
) -> Mapping[str, Sequence[str]]:
|
||||
variable_mapping: dict[str, Sequence[str]] = {}
|
||||
variable_mapping = {}
|
||||
|
||||
# Extract loop node IDs statically from graph_config
|
||||
|
||||
loop_node_ids = cls._extract_loop_node_ids_from_config(graph_config, node_id)
|
||||
|
||||
# Get node configs from graph_config
|
||||
raw_nodes = graph_config.get("nodes")
|
||||
node_configs: dict[str, Mapping[str, object]] = {}
|
||||
if isinstance(raw_nodes, list):
|
||||
for raw_node in raw_nodes:
|
||||
if not isinstance(raw_node, dict):
|
||||
continue
|
||||
raw_node_id = raw_node.get("id")
|
||||
if isinstance(raw_node_id, str):
|
||||
node_configs[raw_node_id] = raw_node
|
||||
node_configs = {node["id"]: node for node in graph_config.get("nodes", []) if "id" in node}
|
||||
for sub_node_id, sub_node_config in node_configs.items():
|
||||
sub_node_data = sub_node_config.get("data")
|
||||
if not isinstance(sub_node_data, dict) or sub_node_data.get("loop_id") != node_id:
|
||||
if sub_node_config.get("data", {}).get("loop_id") != node_id:
|
||||
continue
|
||||
|
||||
# variable selector to variable mapping
|
||||
@ -352,8 +341,9 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
|
||||
|
||||
for loop_variable in node_data.loop_variables or []:
|
||||
if loop_variable.value_type == "variable":
|
||||
assert loop_variable.value is not None, "Loop variable value must be provided for variable type"
|
||||
# add loop variable to variable mapping
|
||||
selector = loop_variable.require_variable_selector()
|
||||
selector = loop_variable.value
|
||||
variable_mapping[f"{node_id}.{loop_variable.label}"] = selector
|
||||
|
||||
# remove variable out from loop
|
||||
@ -362,7 +352,7 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
|
||||
return variable_mapping
|
||||
|
||||
@classmethod
|
||||
def _extract_loop_node_ids_from_config(cls, graph_config: Mapping[str, object], loop_node_id: str) -> set[str]:
|
||||
def _extract_loop_node_ids_from_config(cls, graph_config: Mapping[str, Any], loop_node_id: str) -> set[str]:
|
||||
"""
|
||||
Extract node IDs that belong to a specific loop from graph configuration.
|
||||
|
||||
@ -373,19 +363,12 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
|
||||
:param loop_node_id: the ID of the loop node
|
||||
:return: set of node IDs that belong to the loop
|
||||
"""
|
||||
loop_node_ids: set[str] = set()
|
||||
loop_node_ids = set()
|
||||
|
||||
# Find all nodes that belong to this loop
|
||||
raw_nodes = graph_config.get("nodes")
|
||||
if not isinstance(raw_nodes, list):
|
||||
return loop_node_ids
|
||||
|
||||
for node in raw_nodes:
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
node_data = node.get("data")
|
||||
if not isinstance(node_data, dict):
|
||||
continue
|
||||
nodes = graph_config.get("nodes", [])
|
||||
for node in nodes:
|
||||
node_data = node.get("data", {})
|
||||
if node_data.get("loop_id") == loop_node_id:
|
||||
node_id = node.get("id")
|
||||
if node_id:
|
||||
@ -394,7 +377,7 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
|
||||
return loop_node_ids
|
||||
|
||||
@staticmethod
|
||||
def _get_segment_for_constant(var_type: SegmentType, original_value: LoopValue | None) -> Segment:
|
||||
def _get_segment_for_constant(var_type: SegmentType, original_value: Any) -> Segment:
|
||||
"""Get the appropriate segment type for a constant value."""
|
||||
# TODO: Refactor for maintainability:
|
||||
# 1. Ensure type handling logic stays synchronized with _VALID_VAR_TYPE (entities.py)
|
||||
@ -406,15 +389,11 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
|
||||
SegmentType.ARRAY_OBJECT,
|
||||
SegmentType.ARRAY_STRING,
|
||||
]:
|
||||
# New typed payloads may already provide native lists, while legacy
|
||||
# configs still serialize array constants as JSON strings.
|
||||
if isinstance(original_value, str):
|
||||
value = json.loads(original_value) if original_value else []
|
||||
elif original_value is None:
|
||||
# Preserve legacy behavior: treat missing/empty array constants as [].
|
||||
value = []
|
||||
if original_value and isinstance(original_value, str):
|
||||
value = json.loads(original_value)
|
||||
else:
|
||||
value = original_value
|
||||
logger.warning("unexpected value for LoopNode, value_type=%s, value=%s", original_value, var_type)
|
||||
value = []
|
||||
else:
|
||||
raise AssertionError("this statement should be unreachable.")
|
||||
try:
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from typing import Annotated, Literal
|
||||
from typing import Annotated, Any, Literal
|
||||
|
||||
from pydantic import (
|
||||
BaseModel,
|
||||
@ -6,7 +6,6 @@ from pydantic import (
|
||||
Field,
|
||||
field_validator,
|
||||
)
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from core.prompt.entities.advanced_prompt_entities import MemoryConfig
|
||||
from dify_graph.entities.base_node_data import BaseNodeData
|
||||
@ -56,7 +55,7 @@ class ParameterConfig(BaseModel):
|
||||
|
||||
@field_validator("name", mode="before")
|
||||
@classmethod
|
||||
def validate_name(cls, value: object) -> str:
|
||||
def validate_name(cls, value) -> str:
|
||||
if not value:
|
||||
raise ValueError("Parameter name is required")
|
||||
if value in {"__reason", "__is_success"}:
|
||||
@ -80,23 +79,6 @@ class ParameterConfig(BaseModel):
|
||||
return element_type
|
||||
|
||||
|
||||
class JsonSchemaArrayItems(TypedDict):
|
||||
type: str
|
||||
|
||||
|
||||
class ParameterJsonSchemaProperty(TypedDict, total=False):
|
||||
description: str
|
||||
type: str
|
||||
items: JsonSchemaArrayItems
|
||||
enum: list[str]
|
||||
|
||||
|
||||
class ParameterJsonSchema(TypedDict):
|
||||
type: Literal["object"]
|
||||
properties: dict[str, ParameterJsonSchemaProperty]
|
||||
required: list[str]
|
||||
|
||||
|
||||
class ParameterExtractorNodeData(BaseNodeData):
|
||||
"""
|
||||
Parameter Extractor Node Data.
|
||||
@ -113,19 +95,19 @@ class ParameterExtractorNodeData(BaseNodeData):
|
||||
|
||||
@field_validator("reasoning_mode", mode="before")
|
||||
@classmethod
|
||||
def set_reasoning_mode(cls, v: object) -> str:
|
||||
return str(v) if v else "function_call"
|
||||
def set_reasoning_mode(cls, v) -> str:
|
||||
return v or "function_call"
|
||||
|
||||
def get_parameter_json_schema(self) -> ParameterJsonSchema:
|
||||
def get_parameter_json_schema(self):
|
||||
"""
|
||||
Get parameter json schema.
|
||||
|
||||
:return: parameter json schema
|
||||
"""
|
||||
parameters: ParameterJsonSchema = {"type": "object", "properties": {}, "required": []}
|
||||
parameters: dict[str, Any] = {"type": "object", "properties": {}, "required": []}
|
||||
|
||||
for parameter in self.parameters:
|
||||
parameter_schema: ParameterJsonSchemaProperty = {"description": parameter.description}
|
||||
parameter_schema: dict[str, Any] = {"description": parameter.description}
|
||||
|
||||
if parameter.type == SegmentType.STRING:
|
||||
parameter_schema["type"] = "string"
|
||||
@ -136,7 +118,7 @@ class ParameterExtractorNodeData(BaseNodeData):
|
||||
raise AssertionError("element type should not be None.")
|
||||
parameter_schema["items"] = {"type": element_type.value}
|
||||
else:
|
||||
parameter_schema["type"] = parameter.type.value
|
||||
parameter_schema["type"] = parameter.type
|
||||
|
||||
if parameter.options:
|
||||
parameter_schema["enum"] = parameter.options
|
||||
|
||||
@ -5,8 +5,6 @@ import uuid
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import TYPE_CHECKING, Any, cast
|
||||
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
from core.model_manager import ModelInstance
|
||||
from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
|
||||
from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate
|
||||
@ -65,7 +63,6 @@ from .prompts import (
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_JSON_OBJECT_ADAPTER = TypeAdapter(dict[str, object])
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from dify_graph.entities import GraphInitParams
|
||||
@ -73,7 +70,7 @@ if TYPE_CHECKING:
|
||||
from dify_graph.runtime import GraphRuntimeState
|
||||
|
||||
|
||||
def extract_json(text: str) -> str | None:
|
||||
def extract_json(text):
|
||||
"""
|
||||
From a given JSON started from '{' or '[' extract the complete JSON object.
|
||||
"""
|
||||
@ -399,15 +396,10 @@ class ParameterExtractorNode(Node[ParameterExtractorNodeData]):
|
||||
)
|
||||
|
||||
# generate tool
|
||||
parameter_schema = node_data.get_parameter_json_schema()
|
||||
tool = PromptMessageTool(
|
||||
name=FUNCTION_CALLING_EXTRACTOR_NAME,
|
||||
description="Extract parameters from the natural language text",
|
||||
parameters={
|
||||
"type": parameter_schema["type"],
|
||||
"properties": dict(parameter_schema["properties"]),
|
||||
"required": list(parameter_schema["required"]),
|
||||
},
|
||||
parameters=node_data.get_parameter_json_schema(),
|
||||
)
|
||||
|
||||
return prompt_messages, [tool]
|
||||
@ -614,21 +606,19 @@ class ParameterExtractorNode(Node[ParameterExtractorNodeData]):
|
||||
else:
|
||||
return None
|
||||
|
||||
def _transform_result(self, data: ParameterExtractorNodeData, result: Mapping[str, object]) -> dict[str, object]:
|
||||
def _transform_result(self, data: ParameterExtractorNodeData, result: dict):
|
||||
"""
|
||||
Transform result into standard format.
|
||||
"""
|
||||
transformed_result: dict[str, object] = {}
|
||||
transformed_result: dict[str, Any] = {}
|
||||
for parameter in data.parameters:
|
||||
if parameter.name in result:
|
||||
param_value = result[parameter.name]
|
||||
# transform value
|
||||
if parameter.type == SegmentType.NUMBER:
|
||||
if isinstance(param_value, (bool, int, float, str)):
|
||||
numeric_value: bool | int | float | str = param_value
|
||||
transformed = self._transform_number(numeric_value)
|
||||
if transformed is not None:
|
||||
transformed_result[parameter.name] = transformed
|
||||
transformed = self._transform_number(param_value)
|
||||
if transformed is not None:
|
||||
transformed_result[parameter.name] = transformed
|
||||
elif parameter.type == SegmentType.BOOLEAN:
|
||||
if isinstance(result[parameter.name], (bool, int)):
|
||||
transformed_result[parameter.name] = bool(result[parameter.name])
|
||||
@ -675,7 +665,7 @@ class ParameterExtractorNode(Node[ParameterExtractorNodeData]):
|
||||
|
||||
return transformed_result
|
||||
|
||||
def _extract_complete_json_response(self, result: str) -> dict[str, object] | None:
|
||||
def _extract_complete_json_response(self, result: str) -> dict | None:
|
||||
"""
|
||||
Extract complete json response.
|
||||
"""
|
||||
@ -686,11 +676,11 @@ class ParameterExtractorNode(Node[ParameterExtractorNodeData]):
|
||||
json_str = extract_json(result[idx:])
|
||||
if json_str:
|
||||
with contextlib.suppress(Exception):
|
||||
return _JSON_OBJECT_ADAPTER.validate_python(json.loads(json_str))
|
||||
return cast(dict, json.loads(json_str))
|
||||
logger.info("extra error: %s", result)
|
||||
return None
|
||||
|
||||
def _extract_json_from_tool_call(self, tool_call: AssistantPromptMessage.ToolCall) -> dict[str, object] | None:
|
||||
def _extract_json_from_tool_call(self, tool_call: AssistantPromptMessage.ToolCall) -> dict | None:
|
||||
"""
|
||||
Extract json from tool call.
|
||||
"""
|
||||
@ -704,16 +694,16 @@ class ParameterExtractorNode(Node[ParameterExtractorNodeData]):
|
||||
json_str = extract_json(result[idx:])
|
||||
if json_str:
|
||||
with contextlib.suppress(Exception):
|
||||
return _JSON_OBJECT_ADAPTER.validate_python(json.loads(json_str))
|
||||
return cast(dict, json.loads(json_str))
|
||||
|
||||
logger.info("extra error: %s", result)
|
||||
return None
|
||||
|
||||
def _generate_default_result(self, data: ParameterExtractorNodeData) -> dict[str, object]:
|
||||
def _generate_default_result(self, data: ParameterExtractorNodeData):
|
||||
"""
|
||||
Generate default result.
|
||||
"""
|
||||
result: dict[str, object] = {}
|
||||
result: dict[str, Any] = {}
|
||||
for parameter in data.parameters:
|
||||
if parameter.type == "number":
|
||||
result[parameter.name] = 0
|
||||
|
||||
@ -1,66 +1,12 @@
|
||||
from __future__ import annotations
|
||||
from typing import Any, Literal, Union
|
||||
|
||||
from typing import Literal, TypeAlias, cast
|
||||
|
||||
from pydantic import BaseModel, TypeAdapter, field_validator
|
||||
from pydantic import BaseModel, field_validator
|
||||
from pydantic_core.core_schema import ValidationInfo
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from core.tools.entities.tool_entities import ToolProviderType
|
||||
from dify_graph.entities.base_node_data import BaseNodeData
|
||||
from dify_graph.enums import BuiltinNodeTypes, NodeType
|
||||
|
||||
ToolConfigurationValue: TypeAlias = str | int | float | bool
|
||||
ToolInputConstantValue: TypeAlias = str | int | float | bool | dict[str, object] | list[object] | None
|
||||
VariableSelector: TypeAlias = list[str]
|
||||
|
||||
_TOOL_INPUT_MIXED_ADAPTER: TypeAdapter[str] = TypeAdapter(str)
|
||||
_TOOL_INPUT_CONSTANT_ADAPTER: TypeAdapter[ToolInputConstantValue] = TypeAdapter(ToolInputConstantValue)
|
||||
_VARIABLE_SELECTOR_ADAPTER: TypeAdapter[VariableSelector] = TypeAdapter(VariableSelector)
|
||||
|
||||
|
||||
class WorkflowToolInputValue(TypedDict):
|
||||
type: Literal["mixed", "variable", "constant"]
|
||||
value: ToolInputConstantValue | VariableSelector
|
||||
|
||||
|
||||
ToolConfigurationEntry: TypeAlias = ToolConfigurationValue | WorkflowToolInputValue
|
||||
ToolConfigurations: TypeAlias = dict[str, ToolConfigurationEntry]
|
||||
|
||||
|
||||
class ToolInputPayload(BaseModel):
|
||||
type: Literal["mixed", "variable", "constant"]
|
||||
value: ToolInputConstantValue | VariableSelector
|
||||
|
||||
@field_validator("value", mode="before")
|
||||
@classmethod
|
||||
def validate_value(
|
||||
cls, value: object, validation_info: ValidationInfo
|
||||
) -> ToolInputConstantValue | VariableSelector:
|
||||
input_type = validation_info.data.get("type")
|
||||
if input_type == "mixed":
|
||||
return _TOOL_INPUT_MIXED_ADAPTER.validate_python(value)
|
||||
if input_type == "variable":
|
||||
return _VARIABLE_SELECTOR_ADAPTER.validate_python(value)
|
||||
if input_type == "constant":
|
||||
return _TOOL_INPUT_CONSTANT_ADAPTER.validate_python(value)
|
||||
raise ValueError(f"Unknown tool input type: {input_type}")
|
||||
|
||||
def require_variable_selector(self) -> VariableSelector:
|
||||
if self.type != "variable":
|
||||
raise ValueError(f"Expected variable tool input, got {self.type}")
|
||||
return _VARIABLE_SELECTOR_ADAPTER.validate_python(self.value)
|
||||
|
||||
|
||||
def _validate_tool_configuration_entry(value: object) -> ToolConfigurationEntry:
|
||||
if isinstance(value, (str, int, float, bool)):
|
||||
return cast(ToolConfigurationEntry, value)
|
||||
|
||||
if isinstance(value, dict):
|
||||
return cast(ToolConfigurationEntry, ToolInputPayload.model_validate(value).model_dump())
|
||||
|
||||
raise TypeError("Tool configuration values must be primitives or workflow tool input objects")
|
||||
|
||||
|
||||
class ToolEntity(BaseModel):
|
||||
provider_id: str
|
||||
@ -68,29 +14,52 @@ class ToolEntity(BaseModel):
|
||||
provider_name: str # redundancy
|
||||
tool_name: str
|
||||
tool_label: str # redundancy
|
||||
tool_configurations: ToolConfigurations
|
||||
tool_configurations: dict[str, Any]
|
||||
credential_id: str | None = None
|
||||
plugin_unique_identifier: str | None = None # redundancy
|
||||
|
||||
@field_validator("tool_configurations", mode="before")
|
||||
@classmethod
|
||||
def validate_tool_configurations(cls, value: object, _validation_info: ValidationInfo) -> ToolConfigurations:
|
||||
def validate_tool_configurations(cls, value, values: ValidationInfo):
|
||||
if not isinstance(value, dict):
|
||||
raise TypeError("tool_configurations must be a dictionary")
|
||||
raise ValueError("tool_configurations must be a dictionary")
|
||||
|
||||
normalized: ToolConfigurations = {}
|
||||
for key, item in value.items():
|
||||
if not isinstance(key, str):
|
||||
raise TypeError("tool_configurations keys must be strings")
|
||||
normalized[key] = _validate_tool_configuration_entry(item)
|
||||
return normalized
|
||||
for key in values.data.get("tool_configurations", {}):
|
||||
value = values.data.get("tool_configurations", {}).get(key)
|
||||
if not isinstance(value, str | int | float | bool):
|
||||
raise ValueError(f"{key} must be a string")
|
||||
|
||||
return value
|
||||
|
||||
|
||||
class ToolNodeData(BaseNodeData, ToolEntity):
|
||||
type: NodeType = BuiltinNodeTypes.TOOL
|
||||
|
||||
class ToolInput(ToolInputPayload):
|
||||
pass
|
||||
class ToolInput(BaseModel):
|
||||
# TODO: check this type
|
||||
value: Union[Any, list[str]]
|
||||
type: Literal["mixed", "variable", "constant"]
|
||||
|
||||
@field_validator("type", mode="before")
|
||||
@classmethod
|
||||
def check_type(cls, value, validation_info: ValidationInfo):
|
||||
typ = value
|
||||
value = validation_info.data.get("value")
|
||||
|
||||
if value is None:
|
||||
return typ
|
||||
|
||||
if typ == "mixed" and not isinstance(value, str):
|
||||
raise ValueError("value must be a string")
|
||||
elif typ == "variable":
|
||||
if not isinstance(value, list):
|
||||
raise ValueError("value must be a list")
|
||||
for val in value:
|
||||
if not isinstance(val, str):
|
||||
raise ValueError("value must be a list of strings")
|
||||
elif typ == "constant" and not isinstance(value, (allowed_types := (str, int, float, bool, dict, list))):
|
||||
raise ValueError(f"value must be one of: {', '.join(t.__name__ for t in allowed_types)}")
|
||||
return typ
|
||||
|
||||
tool_parameters: dict[str, ToolInput]
|
||||
# The version of the tool parameter.
|
||||
@ -100,7 +69,7 @@ class ToolNodeData(BaseNodeData, ToolEntity):
|
||||
|
||||
@field_validator("tool_parameters", mode="before")
|
||||
@classmethod
|
||||
def filter_none_tool_inputs(cls, value: object) -> object:
|
||||
def filter_none_tool_inputs(cls, value):
|
||||
if not isinstance(value, dict):
|
||||
return value
|
||||
|
||||
@ -111,10 +80,8 @@ class ToolNodeData(BaseNodeData, ToolEntity):
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _has_valid_value(tool_input: object) -> bool:
|
||||
def _has_valid_value(tool_input):
|
||||
"""Check if the value is valid"""
|
||||
if isinstance(tool_input, dict):
|
||||
return tool_input.get("value") is not None
|
||||
if isinstance(tool_input, ToolNodeData.ToolInput):
|
||||
return tool_input.value is not None
|
||||
return False
|
||||
return getattr(tool_input, "value", None) is not None
|
||||
|
||||
@ -225,11 +225,10 @@ class ToolNode(Node[ToolNodeData]):
|
||||
continue
|
||||
tool_input = node_data.tool_parameters[parameter_name]
|
||||
if tool_input.type == "variable":
|
||||
variable_selector = tool_input.require_variable_selector()
|
||||
variable = variable_pool.get(variable_selector)
|
||||
variable = variable_pool.get(tool_input.value)
|
||||
if variable is None:
|
||||
if parameter.required:
|
||||
raise ToolParameterError(f"Variable {variable_selector} does not exist")
|
||||
raise ToolParameterError(f"Variable {tool_input.value} does not exist")
|
||||
continue
|
||||
parameter_value = variable.value
|
||||
elif tool_input.type in {"mixed", "constant"}:
|
||||
@ -511,9 +510,8 @@ class ToolNode(Node[ToolNodeData]):
|
||||
for selector in selectors:
|
||||
result[selector.variable] = selector.value_selector
|
||||
case "variable":
|
||||
variable_selector = input.require_variable_selector()
|
||||
selector_key = ".".join(variable_selector)
|
||||
result[f"#{selector_key}#"] = variable_selector
|
||||
selector_key = ".".join(input.value)
|
||||
result[f"#{selector_key}#"] = input.value
|
||||
case "constant":
|
||||
pass
|
||||
|
||||
|
||||
@ -9,7 +9,7 @@ from dify_graph.node_events import NodeRunResult
|
||||
from dify_graph.nodes.base.node import Node
|
||||
from dify_graph.nodes.variable_assigner.common import helpers as common_helpers
|
||||
from dify_graph.nodes.variable_assigner.common.exc import VariableOperatorNodeError
|
||||
from dify_graph.variables import Segment, SegmentType, VariableBase
|
||||
from dify_graph.variables import SegmentType, VariableBase
|
||||
|
||||
from .node_data import VariableAssignerData, WriteMode
|
||||
|
||||
@ -74,29 +74,23 @@ class VariableAssignerNode(Node[VariableAssignerData]):
|
||||
if not isinstance(original_variable, VariableBase):
|
||||
raise VariableOperatorNodeError("assigned variable not found")
|
||||
|
||||
income_value: Segment
|
||||
updated_variable: VariableBase
|
||||
match self.node_data.write_mode:
|
||||
case WriteMode.OVER_WRITE:
|
||||
input_value = self.graph_runtime_state.variable_pool.get(self.node_data.input_variable_selector)
|
||||
if input_value is None:
|
||||
income_value = self.graph_runtime_state.variable_pool.get(self.node_data.input_variable_selector)
|
||||
if not income_value:
|
||||
raise VariableOperatorNodeError("input value not found")
|
||||
income_value = input_value
|
||||
updated_variable = original_variable.model_copy(update={"value": income_value.value})
|
||||
|
||||
case WriteMode.APPEND:
|
||||
input_value = self.graph_runtime_state.variable_pool.get(self.node_data.input_variable_selector)
|
||||
if input_value is None:
|
||||
income_value = self.graph_runtime_state.variable_pool.get(self.node_data.input_variable_selector)
|
||||
if not income_value:
|
||||
raise VariableOperatorNodeError("input value not found")
|
||||
income_value = input_value
|
||||
updated_value = original_variable.value + [income_value.value]
|
||||
updated_variable = original_variable.model_copy(update={"value": updated_value})
|
||||
|
||||
case WriteMode.CLEAR:
|
||||
income_value = SegmentType.get_zero_value(original_variable.value_type)
|
||||
updated_variable = original_variable.model_copy(update={"value": income_value.to_object()})
|
||||
case _:
|
||||
raise VariableOperatorNodeError(f"unsupported write mode: {self.node_data.write_mode}")
|
||||
|
||||
# Over write the variable.
|
||||
self.graph_runtime_state.variable_pool.add(assigned_variable_selector, updated_variable)
|
||||
|
||||
@ -66,11 +66,6 @@ class GraphExecutionProtocol(Protocol):
|
||||
exceptions_count: int
|
||||
pause_reasons: list[PauseReason]
|
||||
|
||||
@property
|
||||
def node_executions(self) -> Mapping[str, NodeExecutionProtocol]:
|
||||
"""Return node execution state keyed by node id for resume support."""
|
||||
...
|
||||
|
||||
def start(self) -> None:
|
||||
"""Transition execution into the running state."""
|
||||
...
|
||||
@ -96,12 +91,6 @@ class GraphExecutionProtocol(Protocol):
|
||||
...
|
||||
|
||||
|
||||
class NodeExecutionProtocol(Protocol):
|
||||
"""Structural interface for per-node execution state used during resume."""
|
||||
|
||||
execution_id: str | None
|
||||
|
||||
|
||||
class ResponseStreamCoordinatorProtocol(Protocol):
|
||||
"""Structural interface for response stream coordinator."""
|
||||
|
||||
|
||||
0
api/enterprise/__init__.py
Normal file
0
api/enterprise/__init__.py
Normal file
525
api/enterprise/telemetry/DATA_DICTIONARY.md
Normal file
525
api/enterprise/telemetry/DATA_DICTIONARY.md
Normal file
@ -0,0 +1,525 @@
|
||||
# Dify Enterprise Telemetry Data Dictionary
|
||||
|
||||
Quick reference for all telemetry signals emitted by Dify Enterprise. For configuration and architecture details, see [README.md](./README.md).
|
||||
|
||||
## Resource Attributes
|
||||
|
||||
Attached to every signal (Span, Metric, Log).
|
||||
|
||||
| Attribute | Type | Example |
|
||||
|-----------|------|---------|
|
||||
| `service.name` | string | `dify` |
|
||||
| `host.name` | string | `dify-api-7f8b` |
|
||||
|
||||
## Traces (Spans)
|
||||
|
||||
### `dify.workflow.run`
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.trace_id` | string | Business trace ID (Workflow Run ID) |
|
||||
| `dify.tenant_id` | string | Tenant identifier |
|
||||
| `dify.app_id` | string | Application identifier |
|
||||
| `dify.workflow.id` | string | Workflow definition ID |
|
||||
| `dify.workflow.run_id` | string | Unique ID for this run |
|
||||
| `dify.workflow.status` | string | `succeeded`, `failed`, `stopped`, etc. |
|
||||
| `dify.workflow.error` | string | Error message if failed |
|
||||
| `dify.workflow.elapsed_time` | float | Total execution time (seconds) |
|
||||
| `dify.invoke_from` | string | `api`, `webapp`, `debug` |
|
||||
| `dify.conversation.id` | string | Conversation ID (optional) |
|
||||
| `dify.message.id` | string | Message ID (optional) |
|
||||
| `dify.invoked_by` | string | User ID who triggered the run |
|
||||
| `gen_ai.usage.total_tokens` | int | Total tokens across all nodes (optional) |
|
||||
| `gen_ai.user.id` | string | End-user identifier (optional) |
|
||||
| `dify.parent.trace_id` | string | Parent workflow trace ID (optional) |
|
||||
| `dify.parent.workflow.run_id` | string | Parent workflow run ID (optional) |
|
||||
| `dify.parent.node.execution_id` | string | Parent node execution ID (optional) |
|
||||
| `dify.parent.app.id` | string | Parent app ID (optional) |
|
||||
|
||||
### `dify.node.execution`
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.trace_id` | string | Business trace ID |
|
||||
| `dify.tenant_id` | string | Tenant identifier |
|
||||
| `dify.app_id` | string | Application identifier |
|
||||
| `dify.workflow.id` | string | Workflow definition ID |
|
||||
| `dify.workflow.run_id` | string | Workflow Run ID |
|
||||
| `dify.message.id` | string | Message ID (optional) |
|
||||
| `dify.conversation.id` | string | Conversation ID (optional) |
|
||||
| `dify.node.execution_id` | string | Unique node execution ID |
|
||||
| `dify.node.id` | string | Node ID in workflow graph |
|
||||
| `dify.node.type` | string | Node type (see appendix) |
|
||||
| `dify.node.title` | string | Display title |
|
||||
| `dify.node.status` | string | `succeeded`, `failed` |
|
||||
| `dify.node.error` | string | Error message if failed |
|
||||
| `dify.node.elapsed_time` | float | Execution time (seconds) |
|
||||
| `dify.node.index` | int | Execution order index |
|
||||
| `dify.node.predecessor_node_id` | string | Triggering node ID |
|
||||
| `dify.node.iteration_id` | string | Iteration ID (optional) |
|
||||
| `dify.node.loop_id` | string | Loop ID (optional) |
|
||||
| `dify.node.parallel_id` | string | Parallel branch ID (optional) |
|
||||
| `dify.node.invoked_by` | string | User ID who triggered execution |
|
||||
| `gen_ai.usage.input_tokens` | int | Prompt tokens (LLM nodes only) |
|
||||
| `gen_ai.usage.output_tokens` | int | Completion tokens (LLM nodes only) |
|
||||
| `gen_ai.usage.total_tokens` | int | Total tokens (LLM nodes only) |
|
||||
| `gen_ai.request.model` | string | LLM model name (LLM nodes only) |
|
||||
| `gen_ai.provider.name` | string | LLM provider name (LLM nodes only) |
|
||||
| `gen_ai.user.id` | string | End-user identifier (optional) |
|
||||
|
||||
### `dify.node.execution.draft`
|
||||
|
||||
Same attributes as `dify.node.execution`. Emitted during Preview/Debug runs.
|
||||
|
||||
## Counters
|
||||
|
||||
All counters are cumulative and emitted at 100% accuracy.
|
||||
|
||||
### Token Counters
|
||||
|
||||
| Metric | Unit | Description |
|
||||
|--------|------|-------------|
|
||||
| `dify.tokens.total` | `{token}` | Total tokens consumed |
|
||||
| `dify.tokens.input` | `{token}` | Input (prompt) tokens |
|
||||
| `dify.tokens.output` | `{token}` | Output (completion) tokens |
|
||||
|
||||
**Labels:**
|
||||
|
||||
- `tenant_id`, `app_id`, `operation_type`, `model_provider`, `model_name`, `node_type` (if node_execution)
|
||||
|
||||
⚠️ **Warning:** `dify.tokens.total` at workflow level includes all node tokens. Filter by `operation_type` to avoid double-counting.
|
||||
|
||||
#### Token Hierarchy & Query Patterns
|
||||
|
||||
Token metrics are emitted at multiple layers. Understanding the hierarchy prevents double-counting:
|
||||
|
||||
```
|
||||
App-level total
|
||||
├── workflow ← sum of all node_execution tokens (DO NOT add both)
|
||||
│ └── node_execution ← per-node breakdown
|
||||
├── message ← independent (non-workflow chat apps only)
|
||||
├── rule_generate ← independent helper LLM call
|
||||
├── code_generate ← independent helper LLM call
|
||||
├── structured_output ← independent helper LLM call
|
||||
└── instruction_modify← independent helper LLM call
|
||||
```
|
||||
|
||||
**Key rule:** `workflow` tokens already include all `node_execution` tokens. Never sum both.
|
||||
|
||||
**Available labels on token metrics:** `tenant_id`, `app_id`, `operation_type`, `model_provider`, `model_name`, `node_type`.
|
||||
App name is only available on span attributes (`dify.app.name`), not metric labels — use `app_id` for metric queries.
|
||||
|
||||
**Common queries** (PromQL):
|
||||
|
||||
```promql
|
||||
# ── Totals ──────────────────────────────────────────────────
|
||||
# App-level total (exclude node_execution to avoid double-counting)
|
||||
sum by (app_id) (dify_tokens_total{operation_type!="node_execution"})
|
||||
|
||||
# Single app total
|
||||
sum (dify_tokens_total{app_id="<app_id>", operation_type!="node_execution"})
|
||||
|
||||
# Per-tenant totals
|
||||
sum by (tenant_id) (dify_tokens_total{operation_type!="node_execution"})
|
||||
|
||||
# ── Drill-down ──────────────────────────────────────────────
|
||||
# Workflow-level tokens for an app
|
||||
sum (dify_tokens_total{app_id="<app_id>", operation_type="workflow"})
|
||||
|
||||
# Node-level breakdown within an app
|
||||
sum by (node_type) (dify_tokens_total{app_id="<app_id>", operation_type="node_execution"})
|
||||
|
||||
# Model breakdown for an app
|
||||
sum by (model_provider, model_name) (dify_tokens_total{app_id="<app_id>"})
|
||||
|
||||
# Input vs output per model
|
||||
sum by (model_name) (dify_tokens_input_total{app_id="<app_id>"})
|
||||
sum by (model_name) (dify_tokens_output_total{app_id="<app_id>"})
|
||||
|
||||
# ── Rates ───────────────────────────────────────────────────
|
||||
# Token consumption rate (per hour)
|
||||
sum(rate(dify_tokens_total{operation_type!="node_execution"}[1h]))
|
||||
|
||||
# Per-app consumption rate
|
||||
sum by (app_id) (rate(dify_tokens_total{operation_type!="node_execution"}[1h]))
|
||||
```
|
||||
|
||||
**Finding `app_id` from app name** (trace query — Tempo / Jaeger):
|
||||
|
||||
```
|
||||
{ resource.dify.app.name = "My Chatbot" } | select(resource.dify.app.id)
|
||||
```
|
||||
|
||||
### Request Counters
|
||||
|
||||
| Metric | Unit | Description |
|
||||
|--------|------|-------------|
|
||||
| `dify.requests.total` | `{request}` | Total operations count |
|
||||
|
||||
**Labels by type:**
|
||||
|
||||
| `type` | Additional Labels |
|
||||
|--------|-------------------|
|
||||
| `workflow` | `tenant_id`, `app_id`, `status`, `invoke_from` |
|
||||
| `node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name`, `status` |
|
||||
| `draft_node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name`, `status` |
|
||||
| `message` | `tenant_id`, `app_id`, `model_provider`, `model_name`, `status`, `invoke_from` |
|
||||
| `tool` | `tenant_id`, `app_id`, `tool_name` |
|
||||
| `moderation` | `tenant_id`, `app_id` |
|
||||
| `suggested_question` | `tenant_id`, `app_id`, `model_provider`, `model_name` |
|
||||
| `dataset_retrieval` | `tenant_id`, `app_id` |
|
||||
| `generate_name` | `tenant_id`, `app_id` |
|
||||
| `prompt_generation` | `tenant_id`, `app_id`, `operation_type`, `model_provider`, `model_name`, `status` |
|
||||
|
||||
### Error Counters
|
||||
|
||||
| Metric | Unit | Description |
|
||||
|--------|------|-------------|
|
||||
| `dify.errors.total` | `{error}` | Total failed operations |
|
||||
|
||||
**Labels by type:**
|
||||
|
||||
| `type` | Additional Labels |
|
||||
|--------|-------------------|
|
||||
| `workflow` | `tenant_id`, `app_id` |
|
||||
| `node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name` |
|
||||
| `draft_node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name` |
|
||||
| `message` | `tenant_id`, `app_id`, `model_provider`, `model_name` |
|
||||
| `tool` | `tenant_id`, `app_id`, `tool_name` |
|
||||
| `prompt_generation` | `tenant_id`, `app_id`, `operation_type`, `model_provider`, `model_name` |
|
||||
|
||||
### Other Counters
|
||||
|
||||
| Metric | Unit | Labels |
|
||||
|--------|------|--------|
|
||||
| `dify.feedback.total` | `{feedback}` | `tenant_id`, `app_id`, `rating` |
|
||||
| `dify.dataset.retrievals.total` | `{retrieval}` | `tenant_id`, `app_id`, `dataset_id`, `embedding_model_provider`, `embedding_model`, `rerank_model_provider`, `rerank_model` |
|
||||
| `dify.app.created.total` | `{app}` | `tenant_id`, `app_id`, `mode` |
|
||||
| `dify.app.updated.total` | `{app}` | `tenant_id`, `app_id` |
|
||||
| `dify.app.deleted.total` | `{app}` | `tenant_id`, `app_id` |
|
||||
|
||||
## Histograms
|
||||
|
||||
| Metric | Unit | Labels |
|
||||
|--------|------|--------|
|
||||
| `dify.workflow.duration` | `s` | `tenant_id`, `app_id`, `status` |
|
||||
| `dify.node.duration` | `s` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name`, `plugin_name` |
|
||||
| `dify.message.duration` | `s` | `tenant_id`, `app_id`, `model_provider`, `model_name` |
|
||||
| `dify.message.time_to_first_token` | `s` | `tenant_id`, `app_id`, `model_provider`, `model_name` |
|
||||
| `dify.tool.duration` | `s` | `tenant_id`, `app_id`, `tool_name` |
|
||||
| `dify.prompt_generation.duration` | `s` | `tenant_id`, `app_id`, `operation_type`, `model_provider`, `model_name` |
|
||||
|
||||
## Structured Logs
|
||||
|
||||
### Span Companion Logs
|
||||
|
||||
Logs that accompany spans. Signal type: `span_detail`
|
||||
|
||||
#### `dify.workflow.run` Companion Log
|
||||
|
||||
**Common attributes:** All span attributes (see Traces section) plus:
|
||||
|
||||
| Additional Attribute | Type | Always Present | Description |
|
||||
|---------------------|------|----------------|-------------|
|
||||
| `dify.app.name` | string | No | Application display name |
|
||||
| `dify.workspace.name` | string | No | Workspace display name |
|
||||
| `dify.workflow.version` | string | Yes | Workflow definition version |
|
||||
| `dify.workflow.inputs` | string/JSON | Yes | Input parameters (content-gated) |
|
||||
| `dify.workflow.outputs` | string/JSON | Yes | Output results (content-gated) |
|
||||
| `dify.workflow.query` | string | No | User query text (content-gated) |
|
||||
|
||||
**Event attributes:**
|
||||
|
||||
- `dify.event.name`: `"dify.workflow.run"`
|
||||
- `dify.event.signal`: `"span_detail"`
|
||||
- `trace_id`, `span_id`, `tenant_id`, `user_id`
|
||||
|
||||
#### `dify.node.execution` and `dify.node.execution.draft` Companion Logs
|
||||
|
||||
**Common attributes:** All span attributes (see Traces section) plus:
|
||||
|
||||
| Additional Attribute | Type | Always Present | Description |
|
||||
|---------------------|------|----------------|-------------|
|
||||
| `dify.app.name` | string | No | Application display name |
|
||||
| `dify.workspace.name` | string | No | Workspace display name |
|
||||
| `dify.invoke_from` | string | No | Invocation source |
|
||||
| `gen_ai.tool.name` | string | No | Tool name (tool nodes only) |
|
||||
| `dify.node.total_price` | float | No | Cost (LLM nodes only) |
|
||||
| `dify.node.currency` | string | No | Currency code (LLM nodes only) |
|
||||
| `dify.node.iteration_index` | int | No | Iteration index (iteration nodes) |
|
||||
| `dify.node.loop_index` | int | No | Loop index (loop nodes) |
|
||||
| `dify.plugin.name` | string | No | Plugin name (tool/knowledge nodes) |
|
||||
| `dify.credential.name` | string | No | Credential name (plugin nodes) |
|
||||
| `dify.credential.id` | string | No | Credential ID (plugin nodes) |
|
||||
| `dify.dataset.ids` | JSON array | No | Dataset IDs (knowledge nodes) |
|
||||
| `dify.dataset.names` | JSON array | No | Dataset names (knowledge nodes) |
|
||||
| `dify.node.inputs` | string/JSON | Yes | Node inputs (content-gated) |
|
||||
| `dify.node.outputs` | string/JSON | Yes | Node outputs (content-gated) |
|
||||
| `dify.node.process_data` | string/JSON | No | Processing data (content-gated) |
|
||||
|
||||
**Event attributes:**
|
||||
|
||||
- `dify.event.name`: `"dify.node.execution"` or `"dify.node.execution.draft"`
|
||||
- `dify.event.signal`: `"span_detail"`
|
||||
- `trace_id`, `span_id`, `tenant_id`, `user_id`
|
||||
|
||||
### Standalone Logs
|
||||
|
||||
Logs without structural spans. Signal type: `metric_only`
|
||||
|
||||
#### `dify.message.run`
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.event.name` | string | `"dify.message.run"` |
|
||||
| `dify.event.signal` | string | `"metric_only"` |
|
||||
| `trace_id` | string | OTEL trace ID (32-char hex) |
|
||||
| `span_id` | string | OTEL span ID (16-char hex) |
|
||||
| `tenant_id` | string | Tenant identifier |
|
||||
| `user_id` | string | User identifier (optional) |
|
||||
| `dify.app_id` | string | Application identifier |
|
||||
| `dify.message.id` | string | Message identifier |
|
||||
| `dify.conversation.id` | string | Conversation ID (optional) |
|
||||
| `dify.workflow.run_id` | string | Workflow run ID (optional) |
|
||||
| `dify.invoke_from` | string | `service-api`, `web-app`, `debugger`, `explore` |
|
||||
| `gen_ai.provider.name` | string | LLM provider |
|
||||
| `gen_ai.request.model` | string | LLM model |
|
||||
| `gen_ai.usage.input_tokens` | int | Input tokens |
|
||||
| `gen_ai.usage.output_tokens` | int | Output tokens |
|
||||
| `gen_ai.usage.total_tokens` | int | Total tokens |
|
||||
| `dify.message.status` | string | `succeeded`, `failed` |
|
||||
| `dify.message.error` | string | Error message (if failed) |
|
||||
| `dify.message.duration` | float | Duration (seconds) |
|
||||
| `dify.message.time_to_first_token` | float | TTFT (seconds) |
|
||||
| `dify.message.inputs` | string/JSON | Inputs (content-gated) |
|
||||
| `dify.message.outputs` | string/JSON | Outputs (content-gated) |
|
||||
|
||||
#### `dify.tool.execution`
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.event.name` | string | `"dify.tool.execution"` |
|
||||
| `dify.event.signal` | string | `"metric_only"` |
|
||||
| `trace_id` | string | OTEL trace ID |
|
||||
| `span_id` | string | OTEL span ID |
|
||||
| `tenant_id` | string | Tenant identifier |
|
||||
| `dify.app_id` | string | Application identifier |
|
||||
| `dify.message.id` | string | Message identifier |
|
||||
| `dify.tool.name` | string | Tool name |
|
||||
| `dify.tool.duration` | float | Duration (seconds) |
|
||||
| `dify.tool.status` | string | `succeeded`, `failed` |
|
||||
| `dify.tool.error` | string | Error message (if failed) |
|
||||
| `dify.tool.inputs` | string/JSON | Inputs (content-gated) |
|
||||
| `dify.tool.outputs` | string/JSON | Outputs (content-gated) |
|
||||
| `dify.tool.parameters` | string/JSON | Parameters (content-gated) |
|
||||
| `dify.tool.config` | string/JSON | Configuration (content-gated) |
|
||||
|
||||
#### `dify.moderation.check`
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.event.name` | string | `"dify.moderation.check"` |
|
||||
| `dify.event.signal` | string | `"metric_only"` |
|
||||
| `trace_id` | string | OTEL trace ID |
|
||||
| `span_id` | string | OTEL span ID |
|
||||
| `tenant_id` | string | Tenant identifier |
|
||||
| `dify.app_id` | string | Application identifier |
|
||||
| `dify.message.id` | string | Message identifier |
|
||||
| `dify.moderation.type` | string | `input`, `output` |
|
||||
| `dify.moderation.action` | string | `pass`, `block`, `flag` |
|
||||
| `dify.moderation.flagged` | boolean | Whether flagged |
|
||||
| `dify.moderation.categories` | JSON array | Flagged categories |
|
||||
| `dify.moderation.query` | string | Content (content-gated) |
|
||||
|
||||
#### `dify.suggested_question.generation`
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.event.name` | string | `"dify.suggested_question.generation"` |
|
||||
| `dify.event.signal` | string | `"metric_only"` |
|
||||
| `trace_id` | string | OTEL trace ID |
|
||||
| `span_id` | string | OTEL span ID |
|
||||
| `tenant_id` | string | Tenant identifier |
|
||||
| `dify.app_id` | string | Application identifier |
|
||||
| `dify.message.id` | string | Message identifier |
|
||||
| `dify.suggested_question.count` | int | Number of questions |
|
||||
| `dify.suggested_question.duration` | float | Duration (seconds) |
|
||||
| `dify.suggested_question.status` | string | `succeeded`, `failed` |
|
||||
| `dify.suggested_question.error` | string | Error message (if failed) |
|
||||
| `dify.suggested_question.questions` | JSON array | Questions (content-gated) |
|
||||
|
||||
#### `dify.dataset.retrieval`
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.event.name` | string | `"dify.dataset.retrieval"` |
|
||||
| `dify.event.signal` | string | `"metric_only"` |
|
||||
| `trace_id` | string | OTEL trace ID |
|
||||
| `span_id` | string | OTEL span ID |
|
||||
| `tenant_id` | string | Tenant identifier |
|
||||
| `dify.app_id` | string | Application identifier |
|
||||
| `dify.message.id` | string | Message identifier |
|
||||
| `dify.dataset.id` | string | Dataset identifier |
|
||||
| `dify.dataset.name` | string | Dataset name |
|
||||
| `dify.dataset.embedding_providers` | JSON array | Embedding model providers (one per dataset) |
|
||||
| `dify.dataset.embedding_models` | JSON array | Embedding models (one per dataset) |
|
||||
| `dify.retrieval.rerank_provider` | string | Rerank model provider |
|
||||
| `dify.retrieval.rerank_model` | string | Rerank model name |
|
||||
| `dify.retrieval.query` | string | Search query (content-gated) |
|
||||
| `dify.retrieval.document_count` | int | Documents retrieved |
|
||||
| `dify.retrieval.duration` | float | Duration (seconds) |
|
||||
| `dify.retrieval.status` | string | `succeeded`, `failed` |
|
||||
| `dify.retrieval.error` | string | Error message (if failed) |
|
||||
| `dify.dataset.documents` | JSON array | Documents (content-gated) |
|
||||
|
||||
#### `dify.generate_name.execution`
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.event.name` | string | `"dify.generate_name.execution"` |
|
||||
| `dify.event.signal` | string | `"metric_only"` |
|
||||
| `trace_id` | string | OTEL trace ID |
|
||||
| `span_id` | string | OTEL span ID |
|
||||
| `tenant_id` | string | Tenant identifier |
|
||||
| `dify.app_id` | string | Application identifier |
|
||||
| `dify.conversation.id` | string | Conversation identifier |
|
||||
| `dify.generate_name.duration` | float | Duration (seconds) |
|
||||
| `dify.generate_name.status` | string | `succeeded`, `failed` |
|
||||
| `dify.generate_name.error` | string | Error message (if failed) |
|
||||
| `dify.generate_name.inputs` | string/JSON | Inputs (content-gated) |
|
||||
| `dify.generate_name.outputs` | string | Generated name (content-gated) |
|
||||
|
||||
#### `dify.prompt_generation.execution`
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.event.name` | string | `"dify.prompt_generation.execution"` |
|
||||
| `dify.event.signal` | string | `"metric_only"` |
|
||||
| `trace_id` | string | OTEL trace ID |
|
||||
| `span_id` | string | OTEL span ID |
|
||||
| `tenant_id` | string | Tenant identifier |
|
||||
| `dify.app_id` | string | Application identifier |
|
||||
| `dify.prompt_generation.operation_type` | string | Operation type (see appendix) |
|
||||
| `gen_ai.provider.name` | string | LLM provider |
|
||||
| `gen_ai.request.model` | string | LLM model |
|
||||
| `gen_ai.usage.input_tokens` | int | Input tokens |
|
||||
| `gen_ai.usage.output_tokens` | int | Output tokens |
|
||||
| `gen_ai.usage.total_tokens` | int | Total tokens |
|
||||
| `dify.prompt_generation.duration` | float | Duration (seconds) |
|
||||
| `dify.prompt_generation.status` | string | `succeeded`, `failed` |
|
||||
| `dify.prompt_generation.error` | string | Error message (if failed) |
|
||||
| `dify.prompt_generation.instruction` | string | Instruction (content-gated) |
|
||||
| `dify.prompt_generation.output` | string/JSON | Output (content-gated) |
|
||||
|
||||
#### `dify.app.created`
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.event.name` | string | `"dify.app.created"` |
|
||||
| `dify.event.signal` | string | `"metric_only"` |
|
||||
| `tenant_id` | string | Tenant identifier |
|
||||
| `dify.app_id` | string | Application identifier |
|
||||
| `dify.app.mode` | string | `chat`, `completion`, `agent-chat`, `workflow` |
|
||||
| `dify.app.created_at` | string | Timestamp (ISO 8601) |
|
||||
|
||||
#### `dify.app.updated`
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.event.name` | string | `"dify.app.updated"` |
|
||||
| `dify.event.signal` | string | `"metric_only"` |
|
||||
| `tenant_id` | string | Tenant identifier |
|
||||
| `dify.app_id` | string | Application identifier |
|
||||
| `dify.app.updated_at` | string | Timestamp (ISO 8601) |
|
||||
|
||||
#### `dify.app.deleted`
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.event.name` | string | `"dify.app.deleted"` |
|
||||
| `dify.event.signal` | string | `"metric_only"` |
|
||||
| `tenant_id` | string | Tenant identifier |
|
||||
| `dify.app_id` | string | Application identifier |
|
||||
| `dify.app.deleted_at` | string | Timestamp (ISO 8601) |
|
||||
|
||||
#### `dify.feedback.created`
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.event.name` | string | `"dify.feedback.created"` |
|
||||
| `dify.event.signal` | string | `"metric_only"` |
|
||||
| `trace_id` | string | OTEL trace ID |
|
||||
| `span_id` | string | OTEL span ID |
|
||||
| `tenant_id` | string | Tenant identifier |
|
||||
| `dify.app_id` | string | Application identifier |
|
||||
| `dify.message.id` | string | Message identifier |
|
||||
| `dify.feedback.rating` | string | `like`, `dislike`, `null` |
|
||||
| `dify.feedback.content` | string | Feedback text (content-gated) |
|
||||
| `dify.feedback.created_at` | string | Timestamp (ISO 8601) |
|
||||
|
||||
#### `dify.telemetry.rehydration_failed`
|
||||
|
||||
Diagnostic event for telemetry system health monitoring.
|
||||
|
||||
| Attribute | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `dify.event.name` | string | `"dify.telemetry.rehydration_failed"` |
|
||||
| `dify.event.signal` | string | `"metric_only"` |
|
||||
| `tenant_id` | string | Tenant identifier |
|
||||
| `dify.telemetry.error` | string | Error message |
|
||||
| `dify.telemetry.payload_type` | string | Payload type (see appendix) |
|
||||
| `dify.telemetry.correlation_id` | string | Correlation ID |
|
||||
|
||||
## Content-Gated Attributes
|
||||
|
||||
When `ENTERPRISE_INCLUDE_CONTENT=false`, these attributes are replaced with reference strings (`ref:{id_type}={uuid}`).
|
||||
|
||||
| Attribute | Signal |
|
||||
|-----------|--------|
|
||||
| `dify.workflow.inputs` | `dify.workflow.run` |
|
||||
| `dify.workflow.outputs` | `dify.workflow.run` |
|
||||
| `dify.workflow.query` | `dify.workflow.run` |
|
||||
| `dify.node.inputs` | `dify.node.execution` |
|
||||
| `dify.node.outputs` | `dify.node.execution` |
|
||||
| `dify.node.process_data` | `dify.node.execution` |
|
||||
| `dify.message.inputs` | `dify.message.run` |
|
||||
| `dify.message.outputs` | `dify.message.run` |
|
||||
| `dify.tool.inputs` | `dify.tool.execution` |
|
||||
| `dify.tool.outputs` | `dify.tool.execution` |
|
||||
| `dify.tool.parameters` | `dify.tool.execution` |
|
||||
| `dify.tool.config` | `dify.tool.execution` |
|
||||
| `dify.moderation.query` | `dify.moderation.check` |
|
||||
| `dify.suggested_question.questions` | `dify.suggested_question.generation` |
|
||||
| `dify.retrieval.query` | `dify.dataset.retrieval` |
|
||||
| `dify.dataset.documents` | `dify.dataset.retrieval` |
|
||||
| `dify.generate_name.inputs` | `dify.generate_name.execution` |
|
||||
| `dify.generate_name.outputs` | `dify.generate_name.execution` |
|
||||
| `dify.prompt_generation.instruction` | `dify.prompt_generation.execution` |
|
||||
| `dify.prompt_generation.output` | `dify.prompt_generation.execution` |
|
||||
| `dify.feedback.content` | `dify.feedback.created` |
|
||||
|
||||
## Appendix
|
||||
|
||||
### Operation Types
|
||||
|
||||
- `workflow`, `node_execution`, `message`, `rule_generate`, `code_generate`, `structured_output`, `instruction_modify`
|
||||
|
||||
### Node Types
|
||||
|
||||
- `start`, `end`, `answer`, `llm`, `knowledge-retrieval`, `knowledge-index`, `if-else`, `code`, `template-transform`, `question-classifier`, `http-request`, `tool`, `datasource`, `variable-aggregator`, `loop`, `iteration`, `parameter-extractor`, `assigner`, `document-extractor`, `list-operator`, `agent`, `trigger-webhook`, `trigger-schedule`, `trigger-plugin`, `human-input`
|
||||
|
||||
### Workflow Statuses
|
||||
|
||||
- `running`, `succeeded`, `failed`, `stopped`, `partial-succeeded`, `paused`
|
||||
|
||||
### Payload Types
|
||||
|
||||
- `workflow`, `node`, `message`, `tool`, `moderation`, `suggested_question`, `dataset_retrieval`, `generate_name`, `prompt_generation`, `app`, `feedback`
|
||||
|
||||
### Null Value Behavior
|
||||
|
||||
**Spans:** Attributes with `null` values are omitted.
|
||||
|
||||
**Logs:** Attributes with `null` values appear as `null` in JSON.
|
||||
|
||||
**Content-Gated:** Replaced with reference strings, not set to `null`.
|
||||
121
api/enterprise/telemetry/README.md
Normal file
121
api/enterprise/telemetry/README.md
Normal file
@ -0,0 +1,121 @@
|
||||
# Dify Enterprise Telemetry
|
||||
|
||||
This document provides an overview of the Dify Enterprise OpenTelemetry (OTEL) exporter and how to configure it for integration with observability stacks like Prometheus, Grafana, Jaeger, or Honeycomb.
|
||||
|
||||
## Overview
|
||||
|
||||
Dify Enterprise uses a "slim span + rich companion log" architecture to provide high-fidelity observability without overwhelming trace storage.
|
||||
|
||||
- **Traces (Spans)**: Capture the structure, identity, and timing of high-level operations (Workflows and Nodes).
|
||||
- **Structured Logs**: Provide deep context (inputs, outputs, metadata) for every event, correlated to spans via `trace_id` and `span_id`.
|
||||
- **Metrics**: Provide 100% accurate counters and histograms for usage, performance, and error tracking.
|
||||
|
||||
### Signal Architecture
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
A[Workflow Run] -->|Span| B(dify.workflow.run)
|
||||
A -->|Log| C(dify.workflow.run detail)
|
||||
B ---|trace_id| C
|
||||
|
||||
D[Node Execution] -->|Span| E(dify.node.execution)
|
||||
D -->|Log| F(dify.node.execution detail)
|
||||
E ---|span_id| F
|
||||
|
||||
G[Message/Tool/etc] -->|Log| H(dify.* event)
|
||||
G -->|Metric| I(dify.* counter/histogram)
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
The Enterprise OTEL exporter is configured via environment variables.
|
||||
|
||||
| Variable | Description | Default |
|
||||
|----------|-------------|---------|
|
||||
| `ENTERPRISE_ENABLED` | Master switch for all enterprise features. | `false` |
|
||||
| `ENTERPRISE_TELEMETRY_ENABLED` | Master switch for enterprise telemetry. | `false` |
|
||||
| `ENTERPRISE_OTLP_ENDPOINT` | OTLP collector endpoint (e.g., `http://otel-collector:4318`). | - |
|
||||
| `ENTERPRISE_OTLP_HEADERS` | Custom headers for OTLP requests (e.g., `x-scope-orgid=tenant1`). | - |
|
||||
| `ENTERPRISE_OTLP_PROTOCOL` | OTLP transport protocol (`http` or `grpc`). | `http` |
|
||||
| `ENTERPRISE_OTLP_API_KEY` | Bearer token for authentication. | - |
|
||||
| `ENTERPRISE_INCLUDE_CONTENT` | Whether to include sensitive content (inputs/outputs) in logs. | `false` |
|
||||
| `ENTERPRISE_SERVICE_NAME` | Service name reported to OTEL. | `dify` |
|
||||
| `ENTERPRISE_OTEL_SAMPLING_RATE` | Sampling rate for traces (0.0 to 1.0). Metrics are always 100%. | `1.0` |
|
||||
|
||||
## Correlation Model
|
||||
|
||||
Dify uses deterministic ID generation to ensure signals are correlated across different services and asynchronous tasks.
|
||||
|
||||
### ID Generation Rules
|
||||
|
||||
- `trace_id`: Derived from the correlation ID (workflow_run_id or node_execution_id for drafts) using `int(UUID(correlation_id))`
|
||||
- `span_id`: Derived from the source ID using the lower 64 bits of `UUID(source_id)`
|
||||
|
||||
### Scenario A: Simple Workflow
|
||||
|
||||
A single workflow run with multiple nodes. All spans and logs share the same `trace_id` (derived from `workflow_run_id`).
|
||||
|
||||
```
|
||||
trace_id = UUID(workflow_run_id)
|
||||
├── [root span] dify.workflow.run (span_id = hash(workflow_run_id))
|
||||
│ ├── [child] dify.node.execution - "Start" (span_id = hash(node_exec_id_1))
|
||||
│ ├── [child] dify.node.execution - "LLM" (span_id = hash(node_exec_id_2))
|
||||
│ └── [child] dify.node.execution - "End" (span_id = hash(node_exec_id_3))
|
||||
```
|
||||
|
||||
### Scenario B: Nested Sub-Workflow
|
||||
|
||||
A workflow calling another workflow via a Tool or Sub-workflow node. The child workflow's spans are linked to the parent via `parent_span_id`. Both workflows share the same trace_id.
|
||||
|
||||
```
|
||||
trace_id = UUID(outer_workflow_run_id) ← shared across both workflows
|
||||
├── [root] dify.workflow.run (outer) (span_id = hash(outer_workflow_run_id))
|
||||
│ ├── dify.node.execution - "Start Node"
|
||||
│ ├── dify.node.execution - "Tool Node" (triggers sub-workflow)
|
||||
│ │ └── [child] dify.workflow.run (inner) (span_id = hash(inner_workflow_run_id))
|
||||
│ │ ├── dify.node.execution - "Inner Start"
|
||||
│ │ └── dify.node.execution - "Inner End"
|
||||
│ └── dify.node.execution - "End Node"
|
||||
```
|
||||
|
||||
**Key attributes for nested workflows:**
|
||||
|
||||
- Inner workflow's `dify.parent.trace_id` = outer `workflow_run_id`
|
||||
- Inner workflow's `dify.parent.node.execution_id` = tool node's `execution_id`
|
||||
- Inner workflow's `dify.parent.workflow.run_id` = outer `workflow_run_id`
|
||||
- Inner workflow's `dify.parent.app.id` = outer `app_id`
|
||||
|
||||
### Scenario C: Draft Node Execution
|
||||
|
||||
A single node run in isolation (debugger/preview mode). It creates its own trace where the node span is the root.
|
||||
|
||||
```
|
||||
trace_id = UUID(node_execution_id) ← own trace, NOT part of any workflow
|
||||
└── dify.node.execution.draft (span_id = hash(node_execution_id))
|
||||
```
|
||||
|
||||
**Key difference:** Draft executions use `node_execution_id` as the correlation_id, so they are NOT children of any workflow trace.
|
||||
|
||||
## Content Gating
|
||||
|
||||
When `ENTERPRISE_INCLUDE_CONTENT` is set to `false`, sensitive content attributes (inputs, outputs, queries) are replaced with reference strings (e.g., `ref:workflow_run_id=...`) to prevent data leakage to the OTEL collector.
|
||||
|
||||
**Reference String Format:**
|
||||
|
||||
```
|
||||
ref:{id_type}={uuid}
|
||||
```
|
||||
|
||||
**Examples:**
|
||||
|
||||
```
|
||||
ref:workflow_run_id=550e8400-e29b-41d4-a716-446655440000
|
||||
ref:node_execution_id=660e8400-e29b-41d4-a716-446655440001
|
||||
ref:message_id=770e8400-e29b-41d4-a716-446655440002
|
||||
```
|
||||
|
||||
To retrieve actual content when gating is enabled, query the Dify database using the provided UUID.
|
||||
|
||||
## Reference
|
||||
|
||||
For a complete list of telemetry signals, attributes, and data structures, see [DATA_DICTIONARY.md](./DATA_DICTIONARY.md).
|
||||
0
api/enterprise/telemetry/__init__.py
Normal file
0
api/enterprise/telemetry/__init__.py
Normal file
73
api/enterprise/telemetry/contracts.py
Normal file
73
api/enterprise/telemetry/contracts.py
Normal file
@ -0,0 +1,73 @@
|
||||
"""Telemetry gateway contracts and data structures.
|
||||
|
||||
This module defines the envelope format for telemetry events and the routing
|
||||
configuration that determines how each event type is processed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
|
||||
class TelemetryCase(StrEnum):
|
||||
"""Enumeration of all known telemetry event cases."""
|
||||
|
||||
WORKFLOW_RUN = "workflow_run"
|
||||
NODE_EXECUTION = "node_execution"
|
||||
DRAFT_NODE_EXECUTION = "draft_node_execution"
|
||||
MESSAGE_RUN = "message_run"
|
||||
TOOL_EXECUTION = "tool_execution"
|
||||
MODERATION_CHECK = "moderation_check"
|
||||
SUGGESTED_QUESTION = "suggested_question"
|
||||
DATASET_RETRIEVAL = "dataset_retrieval"
|
||||
GENERATE_NAME = "generate_name"
|
||||
PROMPT_GENERATION = "prompt_generation"
|
||||
APP_CREATED = "app_created"
|
||||
APP_UPDATED = "app_updated"
|
||||
APP_DELETED = "app_deleted"
|
||||
FEEDBACK_CREATED = "feedback_created"
|
||||
|
||||
|
||||
class SignalType(StrEnum):
|
||||
"""Signal routing type for telemetry cases."""
|
||||
|
||||
TRACE = "trace"
|
||||
METRIC_LOG = "metric_log"
|
||||
|
||||
|
||||
class CaseRoute(BaseModel):
|
||||
"""Routing configuration for a telemetry case.
|
||||
|
||||
Attributes:
|
||||
signal_type: The type of signal (trace or metric_log).
|
||||
ce_eligible: Whether this case is eligible for community edition tracing.
|
||||
"""
|
||||
|
||||
signal_type: SignalType
|
||||
ce_eligible: bool
|
||||
|
||||
|
||||
class TelemetryEnvelope(BaseModel):
|
||||
"""Envelope for telemetry events.
|
||||
|
||||
Attributes:
|
||||
case: The telemetry case type.
|
||||
tenant_id: The tenant identifier.
|
||||
event_id: Unique event identifier for deduplication.
|
||||
payload: The main event payload (inline for small payloads,
|
||||
empty when offloaded to storage via ``payload_ref``).
|
||||
metadata: Optional metadata dictionary. When the gateway
|
||||
offloads a large payload to object storage, this contains
|
||||
``{"payload_ref": "<storage_key>"}``.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(extra="forbid", use_enum_values=False)
|
||||
|
||||
case: TelemetryCase
|
||||
tenant_id: str
|
||||
event_id: str
|
||||
payload: dict[str, Any]
|
||||
metadata: dict[str, Any] | None = None
|
||||
89
api/enterprise/telemetry/draft_trace.py
Normal file
89
api/enterprise/telemetry/draft_trace.py
Normal file
@ -0,0 +1,89 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from core.telemetry import TelemetryContext, TelemetryEvent, TraceTaskName
|
||||
from core.telemetry import emit as telemetry_emit
|
||||
from dify_graph.enums import WorkflowNodeExecutionMetadataKey
|
||||
from models.workflow import WorkflowNodeExecutionModel
|
||||
|
||||
|
||||
def enqueue_draft_node_execution_trace(
|
||||
*,
|
||||
execution: WorkflowNodeExecutionModel,
|
||||
outputs: Mapping[str, Any] | None,
|
||||
workflow_execution_id: str | None,
|
||||
user_id: str,
|
||||
) -> None:
|
||||
node_data = _build_node_execution_data(
|
||||
execution=execution,
|
||||
outputs=outputs,
|
||||
workflow_execution_id=workflow_execution_id,
|
||||
)
|
||||
telemetry_emit(
|
||||
TelemetryEvent(
|
||||
name=TraceTaskName.DRAFT_NODE_EXECUTION_TRACE,
|
||||
context=TelemetryContext(
|
||||
tenant_id=execution.tenant_id,
|
||||
user_id=user_id,
|
||||
app_id=execution.app_id,
|
||||
),
|
||||
payload={"node_execution_data": node_data},
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _build_node_execution_data(
|
||||
*,
|
||||
execution: WorkflowNodeExecutionModel,
|
||||
outputs: Mapping[str, Any] | None,
|
||||
workflow_execution_id: str | None,
|
||||
) -> dict[str, Any]:
|
||||
metadata = execution.execution_metadata_dict
|
||||
node_outputs = outputs if outputs is not None else execution.outputs_dict
|
||||
execution_id = workflow_execution_id or execution.workflow_run_id or execution.id
|
||||
process_data = execution.process_data_dict or {}
|
||||
|
||||
# Extract token breakdown from outputs.usage (set by LLM node)
|
||||
usage: Mapping[str, Any] = {}
|
||||
if isinstance(node_outputs, Mapping):
|
||||
raw_usage = node_outputs.get("usage")
|
||||
if isinstance(raw_usage, Mapping):
|
||||
usage = raw_usage
|
||||
|
||||
return {
|
||||
"workflow_id": execution.workflow_id,
|
||||
"workflow_execution_id": execution_id,
|
||||
"tenant_id": execution.tenant_id,
|
||||
"app_id": execution.app_id,
|
||||
"node_execution_id": execution.id,
|
||||
"node_id": execution.node_id,
|
||||
"node_type": execution.node_type,
|
||||
"title": execution.title,
|
||||
"status": execution.status,
|
||||
"error": execution.error,
|
||||
"elapsed_time": execution.elapsed_time,
|
||||
"index": execution.index,
|
||||
"predecessor_node_id": execution.predecessor_node_id,
|
||||
"created_at": execution.created_at,
|
||||
"finished_at": execution.finished_at,
|
||||
"total_tokens": metadata.get(WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS, 0),
|
||||
"total_price": metadata.get(WorkflowNodeExecutionMetadataKey.TOTAL_PRICE, 0.0),
|
||||
"currency": metadata.get(WorkflowNodeExecutionMetadataKey.CURRENCY),
|
||||
"model_provider": process_data.get("model_provider"),
|
||||
"model_name": process_data.get("model_name"),
|
||||
"prompt_tokens": usage.get("prompt_tokens"),
|
||||
"completion_tokens": usage.get("completion_tokens"),
|
||||
"tool_name": (metadata.get(WorkflowNodeExecutionMetadataKey.TOOL_INFO) or {}).get("tool_name")
|
||||
if isinstance(metadata.get(WorkflowNodeExecutionMetadataKey.TOOL_INFO), dict)
|
||||
else None,
|
||||
"iteration_id": metadata.get(WorkflowNodeExecutionMetadataKey.ITERATION_ID),
|
||||
"iteration_index": metadata.get(WorkflowNodeExecutionMetadataKey.ITERATION_INDEX),
|
||||
"loop_id": metadata.get(WorkflowNodeExecutionMetadataKey.LOOP_ID),
|
||||
"loop_index": metadata.get(WorkflowNodeExecutionMetadataKey.LOOP_INDEX),
|
||||
"parallel_id": metadata.get(WorkflowNodeExecutionMetadataKey.PARALLEL_ID),
|
||||
"node_inputs": execution.inputs_dict,
|
||||
"node_outputs": node_outputs,
|
||||
"process_data": execution.process_data_dict,
|
||||
}
|
||||
966
api/enterprise/telemetry/enterprise_trace.py
Normal file
966
api/enterprise/telemetry/enterprise_trace.py
Normal file
@ -0,0 +1,966 @@
|
||||
"""Enterprise trace handler — duck-typed, NOT a BaseTraceInstance subclass.
|
||||
|
||||
Invoked directly in the Celery task, not through OpsTraceManager dispatch.
|
||||
Only requires a matching ``trace(trace_info)`` method signature.
|
||||
|
||||
Signal strategy:
|
||||
- **Traces (spans)**: workflow run, node execution, draft node execution only.
|
||||
- **Metrics + structured logs**: all other event types.
|
||||
|
||||
Token metric labels (unified structure):
|
||||
All token metrics (dify.tokens.input, dify.tokens.output, dify.tokens.total) use the
|
||||
same label set for consistent filtering and aggregation:
|
||||
- tenant_id: Tenant identifier
|
||||
- app_id: Application identifier
|
||||
- operation_type: Source of token usage (workflow | node_execution | message | rule_generate | etc.)
|
||||
- model_provider: LLM provider name (empty string if not applicable)
|
||||
- model_name: LLM model name (empty string if not applicable)
|
||||
- node_type: Workflow node type (empty string if not node_execution)
|
||||
|
||||
This unified structure allows filtering by operation_type to separate:
|
||||
- Workflow-level aggregates (operation_type=workflow)
|
||||
- Individual node executions (operation_type=node_execution)
|
||||
- Direct message calls (operation_type=message)
|
||||
- Prompt generation operations (operation_type=rule_generate, code_generate, etc.)
|
||||
|
||||
Without this, tokens are double-counted when querying totals (workflow totals include
|
||||
node totals, since workflow.total_tokens is the sum of all node tokens).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, cast
|
||||
|
||||
from opentelemetry.util.types import AttributeValue
|
||||
|
||||
from core.ops.entities.trace_entity import (
|
||||
BaseTraceInfo,
|
||||
DatasetRetrievalTraceInfo,
|
||||
DraftNodeExecutionTrace,
|
||||
GenerateNameTraceInfo,
|
||||
MessageTraceInfo,
|
||||
ModerationTraceInfo,
|
||||
OperationType,
|
||||
PromptGenerationTraceInfo,
|
||||
SuggestedQuestionTraceInfo,
|
||||
ToolTraceInfo,
|
||||
WorkflowNodeTraceInfo,
|
||||
WorkflowTraceInfo,
|
||||
)
|
||||
from enterprise.telemetry.entities import (
|
||||
EnterpriseTelemetryCounter,
|
||||
EnterpriseTelemetryEvent,
|
||||
EnterpriseTelemetryHistogram,
|
||||
EnterpriseTelemetrySpan,
|
||||
TokenMetricLabels,
|
||||
)
|
||||
from enterprise.telemetry.telemetry_log import emit_metric_only_event, emit_telemetry_log
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EnterpriseOtelTrace:
|
||||
"""Duck-typed enterprise trace handler.
|
||||
|
||||
``*_trace`` methods emit spans (workflow/node only) or structured logs
|
||||
(all other events), plus metrics at 100 % accuracy.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
from extensions.ext_enterprise_telemetry import get_enterprise_exporter
|
||||
|
||||
exporter = get_enterprise_exporter()
|
||||
if exporter is None:
|
||||
raise RuntimeError("EnterpriseOtelTrace instantiated but exporter is not initialized")
|
||||
self._exporter = exporter
|
||||
|
||||
def trace(self, trace_info: BaseTraceInfo) -> None:
|
||||
if isinstance(trace_info, WorkflowTraceInfo):
|
||||
self._workflow_trace(trace_info)
|
||||
elif isinstance(trace_info, MessageTraceInfo):
|
||||
self._message_trace(trace_info)
|
||||
elif isinstance(trace_info, ToolTraceInfo):
|
||||
self._tool_trace(trace_info)
|
||||
elif isinstance(trace_info, DraftNodeExecutionTrace):
|
||||
self._draft_node_execution_trace(trace_info)
|
||||
elif isinstance(trace_info, WorkflowNodeTraceInfo):
|
||||
self._node_execution_trace(trace_info)
|
||||
elif isinstance(trace_info, ModerationTraceInfo):
|
||||
self._moderation_trace(trace_info)
|
||||
elif isinstance(trace_info, SuggestedQuestionTraceInfo):
|
||||
self._suggested_question_trace(trace_info)
|
||||
elif isinstance(trace_info, DatasetRetrievalTraceInfo):
|
||||
self._dataset_retrieval_trace(trace_info)
|
||||
elif isinstance(trace_info, GenerateNameTraceInfo):
|
||||
self._generate_name_trace(trace_info)
|
||||
elif isinstance(trace_info, PromptGenerationTraceInfo):
|
||||
self._prompt_generation_trace(trace_info)
|
||||
else:
|
||||
raise AssertionError("this statment should be unreachable")
|
||||
|
||||
def _common_attrs(self, trace_info: BaseTraceInfo) -> dict[str, Any]:
|
||||
metadata = self._metadata(trace_info)
|
||||
tenant_id, app_id, user_id = self._context_ids(trace_info, metadata)
|
||||
return {
|
||||
"dify.trace_id": trace_info.resolved_trace_id,
|
||||
"dify.tenant_id": tenant_id,
|
||||
"dify.app_id": app_id,
|
||||
"dify.app.name": metadata.get("app_name"),
|
||||
"dify.workspace.name": metadata.get("workspace_name"),
|
||||
"gen_ai.user.id": user_id,
|
||||
"dify.message.id": trace_info.message_id,
|
||||
}
|
||||
|
||||
def _metadata(self, trace_info: BaseTraceInfo) -> dict[str, Any]:
|
||||
return trace_info.metadata
|
||||
|
||||
def _context_ids(
|
||||
self,
|
||||
trace_info: BaseTraceInfo,
|
||||
metadata: dict[str, Any],
|
||||
) -> tuple[str | None, str | None, str | None]:
|
||||
tenant_id = getattr(trace_info, "tenant_id", None) or metadata.get("tenant_id")
|
||||
app_id = getattr(trace_info, "app_id", None) or metadata.get("app_id")
|
||||
user_id = getattr(trace_info, "user_id", None) or metadata.get("user_id")
|
||||
return tenant_id, app_id, user_id
|
||||
|
||||
def _labels(self, **values: AttributeValue) -> dict[str, AttributeValue]:
|
||||
return dict(values)
|
||||
|
||||
def _safe_payload_value(self, value: Any) -> str | dict[str, Any] | list[object] | None:
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
if isinstance(value, dict):
|
||||
return cast(dict[str, Any], value)
|
||||
if isinstance(value, list):
|
||||
items: list[object] = []
|
||||
for item in cast(list[object], value):
|
||||
items.append(item)
|
||||
return items
|
||||
return None
|
||||
|
||||
def _content_or_ref(self, value: Any, ref: str) -> Any:
|
||||
if self._exporter.include_content:
|
||||
return self._maybe_json(value)
|
||||
return ref
|
||||
|
||||
def _maybe_json(self, value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
try:
|
||||
return json.dumps(value, default=str)
|
||||
except (TypeError, ValueError):
|
||||
return str(value)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# SPAN-emitting handlers (workflow, node execution, draft node)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _workflow_trace(self, info: WorkflowTraceInfo) -> None:
|
||||
metadata = self._metadata(info)
|
||||
tenant_id, app_id, user_id = self._context_ids(info, metadata)
|
||||
# -- Span attrs: identity + structure + status + timing + gen_ai scalars --
|
||||
span_attrs: dict[str, Any] = {
|
||||
"dify.trace_id": info.resolved_trace_id,
|
||||
"dify.tenant_id": tenant_id,
|
||||
"dify.app_id": app_id,
|
||||
"dify.workflow.id": info.workflow_id,
|
||||
"dify.workflow.run_id": info.workflow_run_id,
|
||||
"dify.workflow.status": info.workflow_run_status,
|
||||
"dify.workflow.error": info.error,
|
||||
"dify.workflow.elapsed_time": info.workflow_run_elapsed_time,
|
||||
"dify.invoke_from": metadata.get("triggered_from"),
|
||||
"dify.conversation.id": info.conversation_id,
|
||||
"dify.message.id": info.message_id,
|
||||
"dify.invoked_by": info.invoked_by,
|
||||
"gen_ai.usage.total_tokens": info.total_tokens,
|
||||
"gen_ai.user.id": user_id,
|
||||
}
|
||||
|
||||
trace_correlation_override, parent_span_id_source = info.resolved_parent_context
|
||||
|
||||
parent_ctx = metadata.get("parent_trace_context")
|
||||
if isinstance(parent_ctx, dict):
|
||||
parent_ctx_dict = cast(dict[str, Any], parent_ctx)
|
||||
span_attrs["dify.parent.trace_id"] = parent_ctx_dict.get("trace_id")
|
||||
span_attrs["dify.parent.node.execution_id"] = parent_ctx_dict.get("parent_node_execution_id")
|
||||
span_attrs["dify.parent.workflow.run_id"] = parent_ctx_dict.get("parent_workflow_run_id")
|
||||
span_attrs["dify.parent.app.id"] = parent_ctx_dict.get("parent_app_id")
|
||||
|
||||
self._exporter.export_span(
|
||||
EnterpriseTelemetrySpan.WORKFLOW_RUN,
|
||||
span_attrs,
|
||||
correlation_id=info.workflow_run_id,
|
||||
span_id_source=info.workflow_run_id,
|
||||
start_time=info.start_time,
|
||||
end_time=info.end_time,
|
||||
trace_correlation_override=trace_correlation_override,
|
||||
parent_span_id_source=parent_span_id_source,
|
||||
)
|
||||
|
||||
# -- Companion log: ALL attrs (span + detail) for full picture --
|
||||
log_attrs: dict[str, Any] = {**span_attrs}
|
||||
log_attrs.update(
|
||||
{
|
||||
"dify.app.name": metadata.get("app_name"),
|
||||
"dify.workspace.name": metadata.get("workspace_name"),
|
||||
"gen_ai.user.id": user_id,
|
||||
"gen_ai.usage.total_tokens": info.total_tokens,
|
||||
"dify.workflow.version": info.workflow_run_version,
|
||||
}
|
||||
)
|
||||
|
||||
ref = f"ref:workflow_run_id={info.workflow_run_id}"
|
||||
log_attrs["dify.workflow.inputs"] = self._content_or_ref(info.workflow_run_inputs, ref)
|
||||
log_attrs["dify.workflow.outputs"] = self._content_or_ref(info.workflow_run_outputs, ref)
|
||||
log_attrs["dify.workflow.query"] = self._content_or_ref(info.query, ref)
|
||||
|
||||
emit_telemetry_log(
|
||||
event_name=EnterpriseTelemetryEvent.WORKFLOW_RUN,
|
||||
attributes=log_attrs,
|
||||
signal="span_detail",
|
||||
trace_id_source=info.workflow_run_id,
|
||||
span_id_source=info.workflow_run_id,
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# -- Metrics --
|
||||
labels = self._labels(
|
||||
tenant_id=tenant_id or "",
|
||||
app_id=app_id or "",
|
||||
)
|
||||
token_labels = TokenMetricLabels(
|
||||
tenant_id=tenant_id or "",
|
||||
app_id=app_id or "",
|
||||
operation_type=OperationType.WORKFLOW,
|
||||
model_provider="",
|
||||
model_name="",
|
||||
node_type="",
|
||||
).to_dict()
|
||||
self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels)
|
||||
if info.prompt_tokens is not None and info.prompt_tokens > 0:
|
||||
self._exporter.increment_counter(EnterpriseTelemetryCounter.INPUT_TOKENS, info.prompt_tokens, token_labels)
|
||||
if info.completion_tokens is not None and info.completion_tokens > 0:
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.completion_tokens, token_labels
|
||||
)
|
||||
invoke_from = metadata.get("triggered_from", "")
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.REQUESTS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type="workflow",
|
||||
status=info.workflow_run_status,
|
||||
invoke_from=invoke_from,
|
||||
),
|
||||
)
|
||||
# Prefer wall-clock timestamps over the elapsed_time field: elapsed_time defaults
|
||||
# to 0 in the DB and can be stale if the Celery write races with the trace task.
|
||||
# start_time = workflow_run.created_at, end_time = workflow_run.finished_at.
|
||||
if info.start_time and info.end_time:
|
||||
workflow_duration = (info.end_time - info.start_time).total_seconds()
|
||||
elif info.workflow_run_elapsed_time:
|
||||
workflow_duration = float(info.workflow_run_elapsed_time)
|
||||
else:
|
||||
workflow_duration = 0.0
|
||||
self._exporter.record_histogram(
|
||||
EnterpriseTelemetryHistogram.WORKFLOW_DURATION,
|
||||
workflow_duration,
|
||||
self._labels(
|
||||
**labels,
|
||||
status=info.workflow_run_status,
|
||||
),
|
||||
)
|
||||
|
||||
if info.error:
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.ERRORS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type="workflow",
|
||||
),
|
||||
)
|
||||
|
||||
def _node_execution_trace(self, info: WorkflowNodeTraceInfo) -> None:
|
||||
self._emit_node_execution_trace(info, EnterpriseTelemetrySpan.NODE_EXECUTION, "node")
|
||||
|
||||
def _draft_node_execution_trace(self, info: DraftNodeExecutionTrace) -> None:
|
||||
self._emit_node_execution_trace(
|
||||
info,
|
||||
EnterpriseTelemetrySpan.DRAFT_NODE_EXECUTION,
|
||||
"draft_node",
|
||||
correlation_id_override=info.node_execution_id,
|
||||
trace_correlation_override_param=info.workflow_run_id,
|
||||
)
|
||||
|
||||
def _emit_node_execution_trace(
|
||||
self,
|
||||
info: WorkflowNodeTraceInfo,
|
||||
span_name: EnterpriseTelemetrySpan,
|
||||
request_type: str,
|
||||
correlation_id_override: str | None = None,
|
||||
trace_correlation_override_param: str | None = None,
|
||||
) -> None:
|
||||
metadata = self._metadata(info)
|
||||
tenant_id, app_id, user_id = self._context_ids(info, metadata)
|
||||
# -- Span attrs: identity + structure + status + timing + gen_ai scalars --
|
||||
span_attrs: dict[str, Any] = {
|
||||
"dify.trace_id": info.resolved_trace_id,
|
||||
"dify.tenant_id": tenant_id,
|
||||
"dify.app_id": app_id,
|
||||
"dify.workflow.id": info.workflow_id,
|
||||
"dify.workflow.run_id": info.workflow_run_id,
|
||||
"dify.message.id": info.message_id,
|
||||
"dify.conversation.id": metadata.get("conversation_id"),
|
||||
"dify.node.execution_id": info.node_execution_id,
|
||||
"dify.node.id": info.node_id,
|
||||
"dify.node.type": info.node_type,
|
||||
"dify.node.title": info.title,
|
||||
"dify.node.status": info.status,
|
||||
"dify.node.error": info.error,
|
||||
"dify.node.elapsed_time": info.elapsed_time,
|
||||
"dify.node.index": info.index,
|
||||
"dify.node.predecessor_node_id": info.predecessor_node_id,
|
||||
"dify.node.iteration_id": info.iteration_id,
|
||||
"dify.node.loop_id": info.loop_id,
|
||||
"dify.node.parallel_id": info.parallel_id,
|
||||
"dify.node.invoked_by": info.invoked_by,
|
||||
"gen_ai.usage.input_tokens": info.prompt_tokens,
|
||||
"gen_ai.usage.output_tokens": info.completion_tokens,
|
||||
"gen_ai.usage.total_tokens": info.total_tokens,
|
||||
"gen_ai.request.model": info.model_name,
|
||||
"gen_ai.provider.name": info.model_provider,
|
||||
"gen_ai.user.id": user_id,
|
||||
}
|
||||
|
||||
resolved_override, _ = info.resolved_parent_context
|
||||
trace_correlation_override = trace_correlation_override_param or resolved_override
|
||||
|
||||
effective_correlation_id = correlation_id_override or info.workflow_run_id
|
||||
self._exporter.export_span(
|
||||
span_name,
|
||||
span_attrs,
|
||||
correlation_id=effective_correlation_id,
|
||||
span_id_source=info.node_execution_id,
|
||||
start_time=info.start_time,
|
||||
end_time=info.end_time,
|
||||
trace_correlation_override=trace_correlation_override,
|
||||
)
|
||||
|
||||
# -- Companion log: ALL attrs (span + detail) --
|
||||
log_attrs: dict[str, Any] = {**span_attrs}
|
||||
log_attrs.update(
|
||||
{
|
||||
"dify.app.name": metadata.get("app_name"),
|
||||
"dify.workspace.name": metadata.get("workspace_name"),
|
||||
"dify.invoke_from": metadata.get("invoke_from"),
|
||||
"gen_ai.user.id": user_id,
|
||||
"gen_ai.usage.total_tokens": info.total_tokens,
|
||||
"dify.node.total_price": info.total_price,
|
||||
"dify.node.currency": info.currency,
|
||||
"gen_ai.provider.name": info.model_provider,
|
||||
"gen_ai.request.model": info.model_name,
|
||||
"gen_ai.tool.name": info.tool_name,
|
||||
"dify.node.iteration_index": info.iteration_index,
|
||||
"dify.node.loop_index": info.loop_index,
|
||||
"dify.plugin.name": metadata.get("plugin_name"),
|
||||
"dify.credential.name": metadata.get("credential_name"),
|
||||
"dify.credential.id": metadata.get("credential_id"),
|
||||
"dify.dataset.ids": self._maybe_json(metadata.get("dataset_ids")),
|
||||
"dify.dataset.names": self._maybe_json(metadata.get("dataset_names")),
|
||||
}
|
||||
)
|
||||
|
||||
ref = f"ref:node_execution_id={info.node_execution_id}"
|
||||
log_attrs["dify.node.inputs"] = self._content_or_ref(info.node_inputs, ref)
|
||||
log_attrs["dify.node.outputs"] = self._content_or_ref(info.node_outputs, ref)
|
||||
log_attrs["dify.node.process_data"] = self._content_or_ref(info.process_data, ref)
|
||||
|
||||
emit_telemetry_log(
|
||||
event_name=span_name.value,
|
||||
attributes=log_attrs,
|
||||
signal="span_detail",
|
||||
trace_id_source=info.workflow_run_id,
|
||||
span_id_source=info.node_execution_id,
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# -- Metrics --
|
||||
labels = self._labels(
|
||||
tenant_id=tenant_id or "",
|
||||
app_id=app_id or "",
|
||||
node_type=info.node_type,
|
||||
model_provider=info.model_provider or "",
|
||||
)
|
||||
if info.total_tokens:
|
||||
token_labels = TokenMetricLabels(
|
||||
tenant_id=tenant_id or "",
|
||||
app_id=app_id or "",
|
||||
operation_type=OperationType.NODE_EXECUTION,
|
||||
model_provider=info.model_provider or "",
|
||||
model_name=info.model_name or "",
|
||||
node_type=info.node_type,
|
||||
).to_dict()
|
||||
self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels)
|
||||
if info.prompt_tokens is not None and info.prompt_tokens > 0:
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.INPUT_TOKENS, info.prompt_tokens, token_labels
|
||||
)
|
||||
if info.completion_tokens is not None and info.completion_tokens > 0:
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.completion_tokens, token_labels
|
||||
)
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.REQUESTS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type=request_type,
|
||||
status=info.status,
|
||||
model_name=info.model_name or "",
|
||||
),
|
||||
)
|
||||
duration_labels = dict(labels)
|
||||
duration_labels["model_name"] = info.model_name or ""
|
||||
plugin_name = metadata.get("plugin_name")
|
||||
if plugin_name and info.node_type in {"tool", "knowledge-retrieval"}:
|
||||
duration_labels["plugin_name"] = plugin_name
|
||||
self._exporter.record_histogram(EnterpriseTelemetryHistogram.NODE_DURATION, info.elapsed_time, duration_labels)
|
||||
|
||||
if info.error:
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.ERRORS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type=request_type,
|
||||
model_name=info.model_name or "",
|
||||
),
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# METRIC-ONLY handlers (structured log + counters/histograms)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _message_trace(self, info: MessageTraceInfo) -> None:
|
||||
metadata = self._metadata(info)
|
||||
tenant_id, app_id, user_id = self._context_ids(info, metadata)
|
||||
attrs = self._common_attrs(info)
|
||||
attrs.update(
|
||||
{
|
||||
"dify.invoke_from": metadata.get("from_source"),
|
||||
"dify.conversation.id": metadata.get("conversation_id"),
|
||||
"dify.conversation.mode": info.conversation_mode,
|
||||
"gen_ai.provider.name": metadata.get("ls_provider"),
|
||||
"gen_ai.request.model": metadata.get("ls_model_name"),
|
||||
"gen_ai.usage.input_tokens": info.message_tokens,
|
||||
"gen_ai.usage.output_tokens": info.answer_tokens,
|
||||
"gen_ai.usage.total_tokens": info.total_tokens,
|
||||
"dify.message.status": metadata.get("status"),
|
||||
"dify.message.error": info.error,
|
||||
"dify.message.from_source": metadata.get("from_source"),
|
||||
"dify.message.from_end_user_id": metadata.get("from_end_user_id"),
|
||||
"dify.message.from_account_id": metadata.get("from_account_id"),
|
||||
"dify.streaming": info.is_streaming_request,
|
||||
"dify.message.time_to_first_token": info.gen_ai_server_time_to_first_token,
|
||||
"dify.message.streaming_duration": info.llm_streaming_time_to_generate,
|
||||
"dify.workflow.run_id": metadata.get("workflow_run_id"),
|
||||
}
|
||||
)
|
||||
|
||||
if info.start_time and info.end_time:
|
||||
attrs["dify.message.duration"] = (info.end_time - info.start_time).total_seconds()
|
||||
|
||||
node_execution_id = metadata.get("node_execution_id")
|
||||
if node_execution_id:
|
||||
attrs["dify.node.execution_id"] = node_execution_id
|
||||
|
||||
ref = f"ref:message_id={info.message_id}"
|
||||
inputs = self._safe_payload_value(info.inputs)
|
||||
outputs = self._safe_payload_value(info.outputs)
|
||||
attrs["dify.message.inputs"] = self._content_or_ref(inputs, ref)
|
||||
attrs["dify.message.outputs"] = self._content_or_ref(outputs, ref)
|
||||
|
||||
emit_metric_only_event(
|
||||
event_name=EnterpriseTelemetryEvent.MESSAGE_RUN,
|
||||
attributes=attrs,
|
||||
trace_id_source=metadata.get("workflow_run_id") or (str(info.message_id) if info.message_id else None),
|
||||
span_id_source=node_execution_id,
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
labels = self._labels(
|
||||
tenant_id=tenant_id or "",
|
||||
app_id=app_id or "",
|
||||
model_provider=metadata.get("ls_provider") or "",
|
||||
model_name=metadata.get("ls_model_name") or "",
|
||||
)
|
||||
token_labels = TokenMetricLabels(
|
||||
tenant_id=tenant_id or "",
|
||||
app_id=app_id or "",
|
||||
operation_type=OperationType.MESSAGE,
|
||||
model_provider=metadata.get("ls_provider") or "",
|
||||
model_name=metadata.get("ls_model_name") or "",
|
||||
node_type="",
|
||||
).to_dict()
|
||||
self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels)
|
||||
if info.message_tokens > 0:
|
||||
self._exporter.increment_counter(EnterpriseTelemetryCounter.INPUT_TOKENS, info.message_tokens, token_labels)
|
||||
if info.answer_tokens > 0:
|
||||
self._exporter.increment_counter(EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.answer_tokens, token_labels)
|
||||
invoke_from = metadata.get("from_source", "")
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.REQUESTS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type="message",
|
||||
status=metadata.get("status", ""),
|
||||
invoke_from=invoke_from,
|
||||
),
|
||||
)
|
||||
|
||||
if info.start_time and info.end_time:
|
||||
duration = (info.end_time - info.start_time).total_seconds()
|
||||
self._exporter.record_histogram(EnterpriseTelemetryHistogram.MESSAGE_DURATION, duration, labels)
|
||||
|
||||
if info.gen_ai_server_time_to_first_token is not None:
|
||||
self._exporter.record_histogram(
|
||||
EnterpriseTelemetryHistogram.MESSAGE_TTFT, info.gen_ai_server_time_to_first_token, labels
|
||||
)
|
||||
|
||||
if info.error:
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.ERRORS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type="message",
|
||||
),
|
||||
)
|
||||
|
||||
def _tool_trace(self, info: ToolTraceInfo) -> None:
|
||||
metadata = self._metadata(info)
|
||||
tenant_id, app_id, user_id = self._context_ids(info, metadata)
|
||||
attrs = self._common_attrs(info)
|
||||
attrs.update(
|
||||
{
|
||||
"dify.tool.name": info.tool_name,
|
||||
"dify.tool.duration": float(info.time_cost),
|
||||
"dify.tool.status": "failed" if info.error else "succeeded",
|
||||
"dify.tool.error": info.error,
|
||||
"dify.workflow.run_id": metadata.get("workflow_run_id"),
|
||||
}
|
||||
)
|
||||
node_execution_id = metadata.get("node_execution_id")
|
||||
if node_execution_id:
|
||||
attrs["dify.node.execution_id"] = node_execution_id
|
||||
|
||||
ref = f"ref:message_id={info.message_id}"
|
||||
attrs["dify.tool.inputs"] = self._content_or_ref(info.tool_inputs, ref)
|
||||
attrs["dify.tool.outputs"] = self._content_or_ref(info.tool_outputs, ref)
|
||||
attrs["dify.tool.parameters"] = self._content_or_ref(info.tool_parameters, ref)
|
||||
attrs["dify.tool.config"] = self._content_or_ref(info.tool_config, ref)
|
||||
|
||||
emit_metric_only_event(
|
||||
event_name=EnterpriseTelemetryEvent.TOOL_EXECUTION,
|
||||
attributes=attrs,
|
||||
trace_id_source=info.resolved_trace_id,
|
||||
span_id_source=node_execution_id,
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
labels = self._labels(
|
||||
tenant_id=tenant_id or "",
|
||||
app_id=app_id or "",
|
||||
tool_name=info.tool_name,
|
||||
)
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.REQUESTS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type="tool",
|
||||
),
|
||||
)
|
||||
self._exporter.record_histogram(EnterpriseTelemetryHistogram.TOOL_DURATION, float(info.time_cost), labels)
|
||||
|
||||
if info.error:
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.ERRORS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type="tool",
|
||||
),
|
||||
)
|
||||
|
||||
def _moderation_trace(self, info: ModerationTraceInfo) -> None:
|
||||
metadata = self._metadata(info)
|
||||
tenant_id, app_id, user_id = self._context_ids(info, metadata)
|
||||
attrs = self._common_attrs(info)
|
||||
attrs.update(
|
||||
{
|
||||
"dify.moderation.flagged": info.flagged,
|
||||
"dify.moderation.action": info.action,
|
||||
"dify.moderation.preset_response": info.preset_response,
|
||||
"dify.moderation.type": metadata.get("moderation_type", "input"),
|
||||
"dify.moderation.categories": self._maybe_json(metadata.get("moderation_categories", [])),
|
||||
"dify.workflow.run_id": metadata.get("workflow_run_id"),
|
||||
}
|
||||
)
|
||||
node_execution_id = metadata.get("node_execution_id")
|
||||
if node_execution_id:
|
||||
attrs["dify.node.execution_id"] = node_execution_id
|
||||
|
||||
attrs["dify.moderation.query"] = self._content_or_ref(
|
||||
info.query,
|
||||
f"ref:message_id={info.message_id}",
|
||||
)
|
||||
|
||||
emit_metric_only_event(
|
||||
event_name=EnterpriseTelemetryEvent.MODERATION_CHECK,
|
||||
attributes=attrs,
|
||||
trace_id_source=info.resolved_trace_id,
|
||||
span_id_source=node_execution_id,
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
labels = self._labels(
|
||||
tenant_id=tenant_id or "",
|
||||
app_id=app_id or "",
|
||||
)
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.REQUESTS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type="moderation",
|
||||
),
|
||||
)
|
||||
|
||||
def _suggested_question_trace(self, info: SuggestedQuestionTraceInfo) -> None:
|
||||
metadata = self._metadata(info)
|
||||
tenant_id, app_id, user_id = self._context_ids(info, metadata)
|
||||
attrs = self._common_attrs(info)
|
||||
duration: float | None = None
|
||||
if info.start_time is not None and info.end_time is not None:
|
||||
duration = (info.end_time - info.start_time).total_seconds()
|
||||
error = info.error or (info.metadata.get("error") if info.metadata else None)
|
||||
status = "failed" if error else (info.status or "succeeded")
|
||||
attrs.update(
|
||||
{
|
||||
"gen_ai.usage.total_tokens": info.total_tokens,
|
||||
"dify.suggested_question.status": status,
|
||||
"dify.suggested_question.error": error,
|
||||
"dify.suggested_question.duration": duration,
|
||||
"gen_ai.provider.name": info.model_provider,
|
||||
"gen_ai.request.model": info.model_id,
|
||||
"dify.suggested_question.count": len(info.suggested_question),
|
||||
"dify.workflow.run_id": metadata.get("workflow_run_id"),
|
||||
}
|
||||
)
|
||||
node_execution_id = metadata.get("node_execution_id")
|
||||
if node_execution_id:
|
||||
attrs["dify.node.execution_id"] = node_execution_id
|
||||
|
||||
attrs["dify.suggested_question.questions"] = self._content_or_ref(
|
||||
info.suggested_question,
|
||||
f"ref:message_id={info.message_id}",
|
||||
)
|
||||
|
||||
emit_metric_only_event(
|
||||
event_name=EnterpriseTelemetryEvent.SUGGESTED_QUESTION_GENERATION,
|
||||
attributes=attrs,
|
||||
trace_id_source=info.resolved_trace_id,
|
||||
span_id_source=node_execution_id,
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
labels = self._labels(
|
||||
tenant_id=tenant_id or "",
|
||||
app_id=app_id or "",
|
||||
)
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.REQUESTS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type="suggested_question",
|
||||
model_provider=info.model_provider or "",
|
||||
model_name=info.model_id or "",
|
||||
),
|
||||
)
|
||||
|
||||
def _dataset_retrieval_trace(self, info: DatasetRetrievalTraceInfo) -> None:
|
||||
metadata = self._metadata(info)
|
||||
tenant_id, app_id, user_id = self._context_ids(info, metadata)
|
||||
attrs = self._common_attrs(info)
|
||||
attrs["dify.retrieval.error"] = info.error
|
||||
attrs["dify.retrieval.status"] = "failed" if info.error else "succeeded"
|
||||
if info.start_time and info.end_time:
|
||||
attrs["dify.retrieval.duration"] = (info.end_time - info.start_time).total_seconds()
|
||||
attrs["dify.workflow.run_id"] = metadata.get("workflow_run_id")
|
||||
node_execution_id = metadata.get("node_execution_id")
|
||||
if node_execution_id:
|
||||
attrs["dify.node.execution_id"] = node_execution_id
|
||||
|
||||
docs: list[dict[str, Any]] = []
|
||||
documents_any: Any = info.documents
|
||||
documents_list: list[Any] = cast(list[Any], documents_any) if isinstance(documents_any, list) else []
|
||||
for entry in documents_list:
|
||||
if isinstance(entry, dict):
|
||||
entry_dict: dict[str, Any] = cast(dict[str, Any], entry)
|
||||
docs.append(entry_dict)
|
||||
dataset_ids: list[str] = []
|
||||
dataset_names: list[str] = []
|
||||
structured_docs: list[dict[str, Any]] = []
|
||||
for doc in docs:
|
||||
meta_raw = doc.get("metadata")
|
||||
meta: dict[str, Any] = cast(dict[str, Any], meta_raw) if isinstance(meta_raw, dict) else {}
|
||||
did = meta.get("dataset_id")
|
||||
dname = meta.get("dataset_name")
|
||||
if did and did not in dataset_ids:
|
||||
dataset_ids.append(did)
|
||||
if dname and dname not in dataset_names:
|
||||
dataset_names.append(dname)
|
||||
structured_docs.append(
|
||||
{
|
||||
"dataset_id": did,
|
||||
"document_id": meta.get("document_id"),
|
||||
"segment_id": meta.get("segment_id"),
|
||||
"score": meta.get("score"),
|
||||
}
|
||||
)
|
||||
|
||||
attrs["dify.dataset.id"] = self._maybe_json(dataset_ids)
|
||||
attrs["dify.dataset.name"] = self._maybe_json(dataset_names)
|
||||
attrs["dify.retrieval.document_count"] = len(docs)
|
||||
|
||||
embedding_models_raw: Any = metadata.get("embedding_models")
|
||||
embedding_models: dict[str, Any] = (
|
||||
cast(dict[str, Any], embedding_models_raw) if isinstance(embedding_models_raw, dict) else {}
|
||||
)
|
||||
if embedding_models:
|
||||
providers: list[str] = []
|
||||
models: list[str] = []
|
||||
for ds_info in embedding_models.values():
|
||||
if isinstance(ds_info, dict):
|
||||
ds_info_dict: dict[str, Any] = cast(dict[str, Any], ds_info)
|
||||
p = ds_info_dict.get("embedding_model_provider", "")
|
||||
m = ds_info_dict.get("embedding_model", "")
|
||||
if p and p not in providers:
|
||||
providers.append(p)
|
||||
if m and m not in models:
|
||||
models.append(m)
|
||||
attrs["dify.dataset.embedding_providers"] = self._maybe_json(providers)
|
||||
attrs["dify.dataset.embedding_models"] = self._maybe_json(models)
|
||||
|
||||
# Add rerank model to logs
|
||||
rerank_provider = metadata.get("rerank_model_provider", "")
|
||||
rerank_model = metadata.get("rerank_model_name", "")
|
||||
if rerank_provider or rerank_model:
|
||||
attrs["dify.retrieval.rerank_provider"] = rerank_provider
|
||||
attrs["dify.retrieval.rerank_model"] = rerank_model
|
||||
|
||||
ref = f"ref:message_id={info.message_id}"
|
||||
retrieval_inputs = self._safe_payload_value(info.inputs)
|
||||
attrs["dify.retrieval.query"] = self._content_or_ref(retrieval_inputs, ref)
|
||||
attrs["dify.dataset.documents"] = self._content_or_ref(structured_docs, ref)
|
||||
|
||||
emit_metric_only_event(
|
||||
event_name=EnterpriseTelemetryEvent.DATASET_RETRIEVAL,
|
||||
attributes=attrs,
|
||||
trace_id_source=metadata.get("workflow_run_id") or (str(info.message_id) if info.message_id else None),
|
||||
span_id_source=node_execution_id or (str(info.message_id) if info.message_id else None),
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
labels = self._labels(
|
||||
tenant_id=tenant_id or "",
|
||||
app_id=app_id or "",
|
||||
)
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.REQUESTS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type="dataset_retrieval",
|
||||
),
|
||||
)
|
||||
|
||||
for did in dataset_ids:
|
||||
# Get embedding model for this specific dataset
|
||||
ds_embedding_info = embedding_models.get(did, {})
|
||||
embedding_provider = ds_embedding_info.get("embedding_model_provider", "")
|
||||
embedding_model = ds_embedding_info.get("embedding_model", "")
|
||||
|
||||
# Get rerank model (same for all datasets in this retrieval)
|
||||
rerank_provider = metadata.get("rerank_model_provider", "")
|
||||
rerank_model = metadata.get("rerank_model_name", "")
|
||||
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.DATASET_RETRIEVALS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
dataset_id=did,
|
||||
embedding_model_provider=embedding_provider,
|
||||
embedding_model=embedding_model,
|
||||
rerank_model_provider=rerank_provider,
|
||||
rerank_model=rerank_model,
|
||||
),
|
||||
)
|
||||
|
||||
def _generate_name_trace(self, info: GenerateNameTraceInfo) -> None:
|
||||
metadata = self._metadata(info)
|
||||
tenant_id, app_id, user_id = self._context_ids(info, metadata)
|
||||
attrs = self._common_attrs(info)
|
||||
attrs["dify.conversation.id"] = info.conversation_id
|
||||
node_execution_id = metadata.get("node_execution_id")
|
||||
if node_execution_id:
|
||||
attrs["dify.node.execution_id"] = node_execution_id
|
||||
|
||||
duration: float | None = None
|
||||
if info.start_time is not None and info.end_time is not None:
|
||||
duration = (info.end_time - info.start_time).total_seconds()
|
||||
error: str | None = metadata.get("error") if metadata else None
|
||||
status = "failed" if error else "succeeded"
|
||||
attrs["dify.generate_name.duration"] = duration
|
||||
attrs["dify.generate_name.status"] = status
|
||||
attrs["dify.generate_name.error"] = error
|
||||
|
||||
ref = f"ref:conversation_id={info.conversation_id}"
|
||||
inputs = self._safe_payload_value(info.inputs)
|
||||
outputs = self._safe_payload_value(info.outputs)
|
||||
attrs["dify.generate_name.inputs"] = self._content_or_ref(inputs, ref)
|
||||
attrs["dify.generate_name.outputs"] = self._content_or_ref(outputs, ref)
|
||||
|
||||
emit_metric_only_event(
|
||||
event_name=EnterpriseTelemetryEvent.GENERATE_NAME_EXECUTION,
|
||||
attributes=attrs,
|
||||
trace_id_source=info.resolved_trace_id,
|
||||
span_id_source=node_execution_id,
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
labels = self._labels(
|
||||
tenant_id=tenant_id or "",
|
||||
app_id=app_id or "",
|
||||
)
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.REQUESTS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type="generate_name",
|
||||
),
|
||||
)
|
||||
|
||||
def _prompt_generation_trace(self, info: PromptGenerationTraceInfo) -> None:
|
||||
metadata = self._metadata(info)
|
||||
tenant_id, app_id, user_id = self._context_ids(info, metadata)
|
||||
attrs = {
|
||||
"dify.trace_id": info.resolved_trace_id,
|
||||
"dify.tenant_id": tenant_id,
|
||||
"gen_ai.user.id": user_id,
|
||||
"dify.app_id": app_id or "",
|
||||
"dify.app.name": metadata.get("app_name"),
|
||||
"dify.workspace.name": metadata.get("workspace_name"),
|
||||
"dify.prompt_generation.operation_type": info.operation_type,
|
||||
"gen_ai.provider.name": info.model_provider,
|
||||
"gen_ai.request.model": info.model_name,
|
||||
"gen_ai.usage.input_tokens": info.prompt_tokens,
|
||||
"gen_ai.usage.output_tokens": info.completion_tokens,
|
||||
"gen_ai.usage.total_tokens": info.total_tokens,
|
||||
"dify.prompt_generation.duration": info.latency,
|
||||
"dify.prompt_generation.status": "failed" if info.error else "succeeded",
|
||||
"dify.prompt_generation.error": info.error,
|
||||
}
|
||||
node_execution_id = metadata.get("node_execution_id")
|
||||
if node_execution_id:
|
||||
attrs["dify.node.execution_id"] = node_execution_id
|
||||
|
||||
if info.total_price is not None:
|
||||
attrs["dify.prompt_generation.total_price"] = info.total_price
|
||||
attrs["dify.prompt_generation.currency"] = info.currency
|
||||
|
||||
ref = f"ref:trace_id={info.trace_id}"
|
||||
outputs = self._safe_payload_value(info.outputs)
|
||||
attrs["dify.prompt_generation.instruction"] = self._content_or_ref(info.instruction, ref)
|
||||
attrs["dify.prompt_generation.output"] = self._content_or_ref(outputs, ref)
|
||||
|
||||
emit_metric_only_event(
|
||||
event_name=EnterpriseTelemetryEvent.PROMPT_GENERATION_EXECUTION,
|
||||
attributes=attrs,
|
||||
trace_id_source=info.resolved_trace_id,
|
||||
span_id_source=node_execution_id,
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
token_labels = TokenMetricLabels(
|
||||
tenant_id=tenant_id or "",
|
||||
app_id=app_id or "",
|
||||
operation_type=info.operation_type,
|
||||
model_provider=info.model_provider,
|
||||
model_name=info.model_name,
|
||||
node_type="",
|
||||
).to_dict()
|
||||
|
||||
labels = self._labels(
|
||||
tenant_id=tenant_id or "",
|
||||
app_id=app_id or "",
|
||||
operation_type=info.operation_type,
|
||||
model_provider=info.model_provider,
|
||||
model_name=info.model_name,
|
||||
)
|
||||
|
||||
self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels)
|
||||
if info.prompt_tokens > 0:
|
||||
self._exporter.increment_counter(EnterpriseTelemetryCounter.INPUT_TOKENS, info.prompt_tokens, token_labels)
|
||||
if info.completion_tokens > 0:
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.completion_tokens, token_labels
|
||||
)
|
||||
|
||||
prompt_status = "failed" if info.error else "succeeded"
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.REQUESTS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type="prompt_generation",
|
||||
status=prompt_status,
|
||||
),
|
||||
)
|
||||
|
||||
self._exporter.record_histogram(
|
||||
EnterpriseTelemetryHistogram.PROMPT_GENERATION_DURATION,
|
||||
info.latency,
|
||||
labels,
|
||||
)
|
||||
|
||||
if info.error:
|
||||
self._exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.ERRORS,
|
||||
1,
|
||||
self._labels(
|
||||
**labels,
|
||||
type="prompt_generation",
|
||||
),
|
||||
)
|
||||
121
api/enterprise/telemetry/entities/__init__.py
Normal file
121
api/enterprise/telemetry/entities/__init__.py
Normal file
@ -0,0 +1,121 @@
|
||||
from enum import StrEnum
|
||||
from typing import cast
|
||||
|
||||
from opentelemetry.util.types import AttributeValue
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
|
||||
class EnterpriseTelemetrySpan(StrEnum):
|
||||
WORKFLOW_RUN = "dify.workflow.run"
|
||||
NODE_EXECUTION = "dify.node.execution"
|
||||
DRAFT_NODE_EXECUTION = "dify.node.execution.draft"
|
||||
|
||||
|
||||
class EnterpriseTelemetryEvent(StrEnum):
|
||||
"""Event names for enterprise telemetry logs."""
|
||||
|
||||
APP_CREATED = "dify.app.created"
|
||||
APP_UPDATED = "dify.app.updated"
|
||||
APP_DELETED = "dify.app.deleted"
|
||||
FEEDBACK_CREATED = "dify.feedback.created"
|
||||
WORKFLOW_RUN = "dify.workflow.run"
|
||||
MESSAGE_RUN = "dify.message.run"
|
||||
TOOL_EXECUTION = "dify.tool.execution"
|
||||
MODERATION_CHECK = "dify.moderation.check"
|
||||
SUGGESTED_QUESTION_GENERATION = "dify.suggested_question.generation"
|
||||
DATASET_RETRIEVAL = "dify.dataset.retrieval"
|
||||
GENERATE_NAME_EXECUTION = "dify.generate_name.execution"
|
||||
PROMPT_GENERATION_EXECUTION = "dify.prompt_generation.execution"
|
||||
REHYDRATION_FAILED = "dify.telemetry.rehydration_failed"
|
||||
|
||||
|
||||
class EnterpriseTelemetryCounter(StrEnum):
|
||||
TOKENS = "tokens"
|
||||
INPUT_TOKENS = "input_tokens"
|
||||
OUTPUT_TOKENS = "output_tokens"
|
||||
REQUESTS = "requests"
|
||||
ERRORS = "errors"
|
||||
FEEDBACK = "feedback"
|
||||
DATASET_RETRIEVALS = "dataset_retrievals"
|
||||
APP_CREATED = "app_created"
|
||||
APP_UPDATED = "app_updated"
|
||||
APP_DELETED = "app_deleted"
|
||||
|
||||
|
||||
class EnterpriseTelemetryHistogram(StrEnum):
|
||||
WORKFLOW_DURATION = "workflow_duration"
|
||||
NODE_DURATION = "node_duration"
|
||||
MESSAGE_DURATION = "message_duration"
|
||||
MESSAGE_TTFT = "message_ttft"
|
||||
TOOL_DURATION = "tool_duration"
|
||||
PROMPT_GENERATION_DURATION = "prompt_generation_duration"
|
||||
|
||||
|
||||
class TokenMetricLabels(BaseModel):
|
||||
"""Unified label structure for all dify.token.* metrics.
|
||||
|
||||
All token counters (dify.tokens.input, dify.tokens.output, dify.tokens.total) MUST
|
||||
use this exact label set to ensure consistent filtering and aggregation across
|
||||
different operation types.
|
||||
|
||||
Attributes:
|
||||
tenant_id: Tenant identifier.
|
||||
app_id: Application identifier.
|
||||
operation_type: Source of token usage (workflow | node_execution | message |
|
||||
rule_generate | code_generate | structured_output | instruction_modify).
|
||||
model_provider: LLM provider name. Empty string if not applicable (e.g., workflow-level).
|
||||
model_name: LLM model name. Empty string if not applicable (e.g., workflow-level).
|
||||
node_type: Workflow node type. Empty string unless operation_type=node_execution.
|
||||
|
||||
Usage:
|
||||
labels = TokenMetricLabels(
|
||||
tenant_id="tenant-123",
|
||||
app_id="app-456",
|
||||
operation_type=OperationType.WORKFLOW,
|
||||
model_provider="",
|
||||
model_name="",
|
||||
node_type="",
|
||||
)
|
||||
exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.INPUT_TOKENS,
|
||||
100,
|
||||
labels.to_dict()
|
||||
)
|
||||
|
||||
Design rationale:
|
||||
Without this unified structure, tokens get double-counted when querying totals
|
||||
because workflow.total_tokens is already the sum of all node tokens. The
|
||||
operation_type label allows filtering to separate workflow-level aggregates from
|
||||
node-level detail, while keeping the same label cardinality for consistent queries.
|
||||
"""
|
||||
|
||||
tenant_id: str
|
||||
app_id: str
|
||||
operation_type: str
|
||||
model_provider: str
|
||||
model_name: str
|
||||
node_type: str
|
||||
|
||||
model_config = ConfigDict(extra="forbid", frozen=True)
|
||||
|
||||
def to_dict(self) -> dict[str, AttributeValue]:
|
||||
return cast(
|
||||
dict[str, AttributeValue],
|
||||
{
|
||||
"tenant_id": self.tenant_id,
|
||||
"app_id": self.app_id,
|
||||
"operation_type": self.operation_type,
|
||||
"model_provider": self.model_provider,
|
||||
"model_name": self.model_name,
|
||||
"node_type": self.node_type,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"EnterpriseTelemetryCounter",
|
||||
"EnterpriseTelemetryEvent",
|
||||
"EnterpriseTelemetryHistogram",
|
||||
"EnterpriseTelemetrySpan",
|
||||
"TokenMetricLabels",
|
||||
]
|
||||
72
api/enterprise/telemetry/event_handlers.py
Normal file
72
api/enterprise/telemetry/event_handlers.py
Normal file
@ -0,0 +1,72 @@
|
||||
"""Blinker signal handlers for enterprise telemetry.
|
||||
|
||||
Registered at import time via ``@signal.connect`` decorators.
|
||||
Import must happen during ``ext_enterprise_telemetry.init_app()`` to
|
||||
ensure handlers fire. Each handler delegates to ``core.telemetry.gateway``
|
||||
which handles routing, EE-gating, and dispatch.
|
||||
|
||||
All handlers are best-effort: exceptions are caught and logged so that
|
||||
telemetry failures never break user-facing operations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from events.app_event import app_was_created, app_was_deleted, app_was_updated
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = [
|
||||
"_handle_app_created",
|
||||
"_handle_app_deleted",
|
||||
"_handle_app_updated",
|
||||
]
|
||||
|
||||
|
||||
@app_was_created.connect
|
||||
def _handle_app_created(sender: object, **kwargs: object) -> None:
|
||||
try:
|
||||
from core.telemetry.gateway import emit as gateway_emit
|
||||
from enterprise.telemetry.contracts import TelemetryCase
|
||||
|
||||
gateway_emit(
|
||||
case=TelemetryCase.APP_CREATED,
|
||||
context={"tenant_id": str(getattr(sender, "tenant_id", "") or "")},
|
||||
payload={
|
||||
"app_id": getattr(sender, "id", None),
|
||||
"mode": getattr(sender, "mode", None),
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to emit app_created telemetry", exc_info=True)
|
||||
|
||||
|
||||
@app_was_updated.connect
|
||||
def _handle_app_updated(sender: object, **kwargs: object) -> None:
|
||||
try:
|
||||
from core.telemetry.gateway import emit as gateway_emit
|
||||
from enterprise.telemetry.contracts import TelemetryCase
|
||||
|
||||
gateway_emit(
|
||||
case=TelemetryCase.APP_UPDATED,
|
||||
context={"tenant_id": str(getattr(sender, "tenant_id", "") or "")},
|
||||
payload={"app_id": getattr(sender, "id", None)},
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to emit app_updated telemetry", exc_info=True)
|
||||
|
||||
|
||||
@app_was_deleted.connect
|
||||
def _handle_app_deleted(sender: object, **kwargs: object) -> None:
|
||||
try:
|
||||
from core.telemetry.gateway import emit as gateway_emit
|
||||
from enterprise.telemetry.contracts import TelemetryCase
|
||||
|
||||
gateway_emit(
|
||||
case=TelemetryCase.APP_DELETED,
|
||||
context={"tenant_id": str(getattr(sender, "tenant_id", "") or "")},
|
||||
payload={"app_id": getattr(sender, "id", None)},
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to emit app_deleted telemetry", exc_info=True)
|
||||
283
api/enterprise/telemetry/exporter.py
Normal file
283
api/enterprise/telemetry/exporter.py
Normal file
@ -0,0 +1,283 @@
|
||||
"""Enterprise OTEL exporter — shared by EnterpriseOtelTrace, event handlers, and direct instrumentation.
|
||||
|
||||
Uses dedicated TracerProvider and MeterProvider instances (configurable sampling,
|
||||
independent from ext_otel.py infrastructure).
|
||||
|
||||
Initialized once during Flask extension init (single-threaded via ext_enterprise_telemetry.py).
|
||||
Accessed via ``ext_enterprise_telemetry.get_enterprise_exporter()`` from any thread/process.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import socket
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any, cast
|
||||
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.baggage import get_all
|
||||
from opentelemetry.baggage.propagation import W3CBaggagePropagator
|
||||
from opentelemetry.context import Context
|
||||
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as GRPCMetricExporter
|
||||
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCSpanExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as HTTPMetricExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPSpanExporter
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
||||
from opentelemetry.sdk.trace.sampling import ParentBasedTraceIdRatio
|
||||
from opentelemetry.semconv.resource import ResourceAttributes
|
||||
from opentelemetry.trace import SpanContext, TraceFlags
|
||||
from opentelemetry.util.types import Attributes, AttributeValue
|
||||
|
||||
from configs import dify_config
|
||||
from enterprise.telemetry.entities import EnterpriseTelemetryCounter, EnterpriseTelemetryHistogram
|
||||
from enterprise.telemetry.id_generator import (
|
||||
CorrelationIdGenerator,
|
||||
compute_deterministic_span_id,
|
||||
set_correlation_id,
|
||||
set_span_id_source,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_enterprise_telemetry_enabled() -> bool:
|
||||
return bool(dify_config.ENTERPRISE_ENABLED and dify_config.ENTERPRISE_TELEMETRY_ENABLED)
|
||||
|
||||
|
||||
def _parse_otlp_headers(raw: str) -> dict[str, str]:
|
||||
ctx = W3CBaggagePropagator().extract({"baggage": raw})
|
||||
return {k: v for k, v in get_all(ctx).items() if isinstance(v, str)}
|
||||
|
||||
|
||||
def _datetime_to_ns(dt: datetime) -> int:
|
||||
"""Convert a datetime to nanoseconds since epoch (OTEL convention)."""
|
||||
# Ensure we always interpret naive datetimes as UTC instead of local time.
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=UTC)
|
||||
else:
|
||||
dt = dt.astimezone(UTC)
|
||||
return int(dt.timestamp() * 1_000_000_000)
|
||||
|
||||
|
||||
class _ExporterFactory:
|
||||
def __init__(self, protocol: str, endpoint: str, headers: dict[str, str], insecure: bool):
|
||||
self._protocol = protocol
|
||||
self._endpoint = endpoint
|
||||
self._headers = headers
|
||||
self._grpc_headers = tuple(headers.items()) if headers else None
|
||||
self._http_headers = headers or None
|
||||
self._insecure = insecure
|
||||
|
||||
def create_trace_exporter(self) -> HTTPSpanExporter | GRPCSpanExporter:
|
||||
if self._protocol == "grpc":
|
||||
return GRPCSpanExporter(
|
||||
endpoint=self._endpoint or None,
|
||||
headers=self._grpc_headers,
|
||||
insecure=self._insecure,
|
||||
)
|
||||
trace_endpoint = f"{self._endpoint}/v1/traces" if self._endpoint else ""
|
||||
return HTTPSpanExporter(endpoint=trace_endpoint or None, headers=self._http_headers)
|
||||
|
||||
def create_metric_exporter(self) -> HTTPMetricExporter | GRPCMetricExporter:
|
||||
if self._protocol == "grpc":
|
||||
return GRPCMetricExporter(
|
||||
endpoint=self._endpoint or None,
|
||||
headers=self._grpc_headers,
|
||||
insecure=self._insecure,
|
||||
)
|
||||
metric_endpoint = f"{self._endpoint}/v1/metrics" if self._endpoint else ""
|
||||
return HTTPMetricExporter(endpoint=metric_endpoint or None, headers=self._http_headers)
|
||||
|
||||
|
||||
class EnterpriseExporter:
|
||||
"""Shared OTEL exporter for all enterprise telemetry.
|
||||
|
||||
``export_span`` creates spans with optional real timestamps, deterministic
|
||||
span/trace IDs, and cross-workflow parent linking.
|
||||
``increment_counter`` / ``record_histogram`` emit OTEL metrics at 100% accuracy.
|
||||
"""
|
||||
|
||||
def __init__(self, config: object) -> None:
|
||||
endpoint: str = getattr(config, "ENTERPRISE_OTLP_ENDPOINT", "")
|
||||
headers_raw: str = getattr(config, "ENTERPRISE_OTLP_HEADERS", "")
|
||||
protocol: str = (getattr(config, "ENTERPRISE_OTLP_PROTOCOL", "http") or "http").lower()
|
||||
service_name: str = getattr(config, "ENTERPRISE_SERVICE_NAME", "dify")
|
||||
sampling_rate: float = getattr(config, "ENTERPRISE_OTEL_SAMPLING_RATE", 1.0)
|
||||
self.include_content: bool = getattr(config, "ENTERPRISE_INCLUDE_CONTENT", True)
|
||||
api_key: str = getattr(config, "ENTERPRISE_OTLP_API_KEY", "")
|
||||
|
||||
# Auto-detect TLS: https:// uses secure, everything else is insecure
|
||||
insecure = not endpoint.startswith("https://")
|
||||
|
||||
resource = Resource(
|
||||
attributes={
|
||||
ResourceAttributes.SERVICE_NAME: service_name,
|
||||
ResourceAttributes.HOST_NAME: socket.gethostname(),
|
||||
}
|
||||
)
|
||||
sampler = ParentBasedTraceIdRatio(sampling_rate)
|
||||
id_generator = CorrelationIdGenerator()
|
||||
self._tracer_provider = TracerProvider(resource=resource, sampler=sampler, id_generator=id_generator)
|
||||
|
||||
headers = _parse_otlp_headers(headers_raw)
|
||||
if api_key:
|
||||
if "authorization" in headers:
|
||||
logger.warning(
|
||||
"ENTERPRISE_OTLP_API_KEY is set but ENTERPRISE_OTLP_HEADERS also contains "
|
||||
"'authorization'; the API key will take precedence."
|
||||
)
|
||||
headers["authorization"] = f"Bearer {api_key}"
|
||||
factory = _ExporterFactory(protocol, endpoint, headers, insecure=insecure)
|
||||
|
||||
trace_exporter = factory.create_trace_exporter()
|
||||
self._tracer_provider.add_span_processor(BatchSpanProcessor(trace_exporter))
|
||||
self._tracer = self._tracer_provider.get_tracer("dify.enterprise")
|
||||
|
||||
metric_exporter = factory.create_metric_exporter()
|
||||
self._meter_provider = MeterProvider(
|
||||
resource=resource,
|
||||
metric_readers=[PeriodicExportingMetricReader(metric_exporter)],
|
||||
)
|
||||
meter = self._meter_provider.get_meter("dify.enterprise")
|
||||
self._counters = {
|
||||
EnterpriseTelemetryCounter.TOKENS: meter.create_counter("dify.tokens.total", unit="{token}"),
|
||||
EnterpriseTelemetryCounter.INPUT_TOKENS: meter.create_counter("dify.tokens.input", unit="{token}"),
|
||||
EnterpriseTelemetryCounter.OUTPUT_TOKENS: meter.create_counter("dify.tokens.output", unit="{token}"),
|
||||
EnterpriseTelemetryCounter.REQUESTS: meter.create_counter("dify.requests.total", unit="{request}"),
|
||||
EnterpriseTelemetryCounter.ERRORS: meter.create_counter("dify.errors.total", unit="{error}"),
|
||||
EnterpriseTelemetryCounter.FEEDBACK: meter.create_counter("dify.feedback.total", unit="{feedback}"),
|
||||
EnterpriseTelemetryCounter.DATASET_RETRIEVALS: meter.create_counter(
|
||||
"dify.dataset.retrievals.total", unit="{retrieval}"
|
||||
),
|
||||
EnterpriseTelemetryCounter.APP_CREATED: meter.create_counter("dify.app.created.total", unit="{app}"),
|
||||
EnterpriseTelemetryCounter.APP_UPDATED: meter.create_counter("dify.app.updated.total", unit="{app}"),
|
||||
EnterpriseTelemetryCounter.APP_DELETED: meter.create_counter("dify.app.deleted.total", unit="{app}"),
|
||||
}
|
||||
self._histograms = {
|
||||
EnterpriseTelemetryHistogram.WORKFLOW_DURATION: meter.create_histogram("dify.workflow.duration", unit="s"),
|
||||
EnterpriseTelemetryHistogram.NODE_DURATION: meter.create_histogram("dify.node.duration", unit="s"),
|
||||
EnterpriseTelemetryHistogram.MESSAGE_DURATION: meter.create_histogram("dify.message.duration", unit="s"),
|
||||
EnterpriseTelemetryHistogram.MESSAGE_TTFT: meter.create_histogram(
|
||||
"dify.message.time_to_first_token", unit="s"
|
||||
),
|
||||
EnterpriseTelemetryHistogram.TOOL_DURATION: meter.create_histogram("dify.tool.duration", unit="s"),
|
||||
EnterpriseTelemetryHistogram.PROMPT_GENERATION_DURATION: meter.create_histogram(
|
||||
"dify.prompt_generation.duration", unit="s"
|
||||
),
|
||||
}
|
||||
|
||||
def export_span(
|
||||
self,
|
||||
name: str,
|
||||
attributes: dict[str, Any],
|
||||
correlation_id: str | None = None,
|
||||
span_id_source: str | None = None,
|
||||
start_time: datetime | None = None,
|
||||
end_time: datetime | None = None,
|
||||
trace_correlation_override: str | None = None,
|
||||
parent_span_id_source: str | None = None,
|
||||
) -> None:
|
||||
"""Export an OTEL span with optional deterministic IDs and real timestamps.
|
||||
|
||||
Args:
|
||||
name: Span operation name.
|
||||
attributes: Span attributes dict.
|
||||
correlation_id: Source for trace_id derivation (groups spans in one trace).
|
||||
span_id_source: Source for deterministic span_id (e.g. workflow_run_id or node_execution_id).
|
||||
start_time: Real span start time. When None, uses current time.
|
||||
end_time: Real span end time. When None, span ends immediately.
|
||||
trace_correlation_override: Override trace_id source (for cross-workflow linking).
|
||||
When set, trace_id is derived from this instead of ``correlation_id``.
|
||||
parent_span_id_source: Override parent span_id source (for cross-workflow linking).
|
||||
When set, parent span_id is derived from this value. When None and
|
||||
``correlation_id`` is set, parent is the workflow root span.
|
||||
"""
|
||||
effective_trace_correlation = trace_correlation_override or correlation_id
|
||||
set_correlation_id(effective_trace_correlation)
|
||||
set_span_id_source(span_id_source)
|
||||
|
||||
try:
|
||||
parent_context: Context | None = None
|
||||
# A span is the "root" of its correlation group when span_id_source == correlation_id
|
||||
# (i.e. a workflow root span). All other spans are children.
|
||||
if parent_span_id_source:
|
||||
# Cross-workflow linking: parent is an explicit span (e.g. tool node in outer workflow)
|
||||
parent_span_id = compute_deterministic_span_id(parent_span_id_source)
|
||||
try:
|
||||
parent_trace_id = int(uuid.UUID(effective_trace_correlation)) if effective_trace_correlation else 0
|
||||
except (ValueError, AttributeError):
|
||||
logger.warning(
|
||||
"Invalid trace correlation UUID for cross-workflow link: %s, span=%s",
|
||||
effective_trace_correlation,
|
||||
name,
|
||||
)
|
||||
parent_trace_id = 0
|
||||
if parent_trace_id:
|
||||
parent_span_context = SpanContext(
|
||||
trace_id=parent_trace_id,
|
||||
span_id=parent_span_id,
|
||||
is_remote=True,
|
||||
trace_flags=TraceFlags(TraceFlags.SAMPLED),
|
||||
)
|
||||
parent_context = trace.set_span_in_context(trace.NonRecordingSpan(parent_span_context))
|
||||
elif correlation_id and correlation_id != span_id_source:
|
||||
# Child span: parent is the correlation-group root (workflow root span)
|
||||
parent_span_id = compute_deterministic_span_id(correlation_id)
|
||||
try:
|
||||
parent_trace_id = int(uuid.UUID(effective_trace_correlation or correlation_id))
|
||||
except (ValueError, AttributeError):
|
||||
logger.warning(
|
||||
"Invalid trace correlation UUID for child span link: %s, span=%s",
|
||||
effective_trace_correlation or correlation_id,
|
||||
name,
|
||||
)
|
||||
parent_trace_id = 0
|
||||
if parent_trace_id:
|
||||
parent_span_context = SpanContext(
|
||||
trace_id=parent_trace_id,
|
||||
span_id=parent_span_id,
|
||||
is_remote=True,
|
||||
trace_flags=TraceFlags(TraceFlags.SAMPLED),
|
||||
)
|
||||
parent_context = trace.set_span_in_context(trace.NonRecordingSpan(parent_span_context))
|
||||
|
||||
span_start_time = _datetime_to_ns(start_time) if start_time is not None else None
|
||||
span_end_on_exit = end_time is None
|
||||
|
||||
with self._tracer.start_as_current_span(
|
||||
name,
|
||||
context=parent_context,
|
||||
start_time=span_start_time,
|
||||
end_on_exit=span_end_on_exit,
|
||||
) as span:
|
||||
for key, value in attributes.items():
|
||||
if value is not None:
|
||||
span.set_attribute(key, value)
|
||||
if end_time is not None:
|
||||
span.end(end_time=_datetime_to_ns(end_time))
|
||||
except Exception:
|
||||
logger.exception("Failed to export span %s", name)
|
||||
finally:
|
||||
set_correlation_id(None)
|
||||
set_span_id_source(None)
|
||||
|
||||
def increment_counter(
|
||||
self, name: EnterpriseTelemetryCounter, value: int, labels: dict[str, AttributeValue]
|
||||
) -> None:
|
||||
counter = self._counters.get(name)
|
||||
if counter:
|
||||
counter.add(value, cast(Attributes, labels))
|
||||
|
||||
def record_histogram(
|
||||
self, name: EnterpriseTelemetryHistogram, value: float, labels: dict[str, AttributeValue]
|
||||
) -> None:
|
||||
histogram = self._histograms.get(name)
|
||||
if histogram:
|
||||
histogram.record(value, cast(Attributes, labels))
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._tracer_provider.shutdown()
|
||||
self._meter_provider.shutdown()
|
||||
75
api/enterprise/telemetry/id_generator.py
Normal file
75
api/enterprise/telemetry/id_generator.py
Normal file
@ -0,0 +1,75 @@
|
||||
"""Custom OTEL ID Generator for correlation-based trace/span ID derivation.
|
||||
|
||||
Uses contextvars for thread-safe correlation_id -> trace_id mapping.
|
||||
When a span_id_source is set, the span_id is derived deterministically
|
||||
from that value, enabling any span to reference another as parent
|
||||
without depending on span creation order.
|
||||
"""
|
||||
|
||||
import random
|
||||
import uuid
|
||||
from contextvars import ContextVar
|
||||
|
||||
from opentelemetry.sdk.trace.id_generator import IdGenerator
|
||||
|
||||
_correlation_id_context: ContextVar[str | None] = ContextVar("correlation_id", default=None)
|
||||
_span_id_source_context: ContextVar[str | None] = ContextVar("span_id_source", default=None)
|
||||
|
||||
|
||||
def set_correlation_id(correlation_id: str | None) -> None:
|
||||
_correlation_id_context.set(correlation_id)
|
||||
|
||||
|
||||
def get_correlation_id() -> str | None:
|
||||
return _correlation_id_context.get()
|
||||
|
||||
|
||||
def set_span_id_source(source_id: str | None) -> None:
|
||||
"""Set the source for deterministic span_id generation.
|
||||
|
||||
When set, ``generate_span_id()`` derives the span_id from this value
|
||||
(lower 64 bits of the UUID). Pass the ``workflow_run_id`` for workflow
|
||||
root spans or ``node_execution_id`` for node spans.
|
||||
"""
|
||||
_span_id_source_context.set(source_id)
|
||||
|
||||
|
||||
def compute_deterministic_span_id(source_id: str) -> int:
|
||||
"""Derive a deterministic span_id from any UUID string.
|
||||
|
||||
Uses the lower 64 bits of the UUID, guaranteeing non-zero output
|
||||
(OTEL requires span_id != 0).
|
||||
"""
|
||||
span_id = uuid.UUID(source_id).int & ((1 << 64) - 1)
|
||||
return span_id if span_id != 0 else 1
|
||||
|
||||
|
||||
class CorrelationIdGenerator(IdGenerator):
|
||||
"""ID generator that derives trace_id and optionally span_id from context.
|
||||
|
||||
- trace_id: always derived from correlation_id (groups all spans in one trace)
|
||||
- span_id: derived from span_id_source when set (enables deterministic
|
||||
parent-child linking), otherwise random
|
||||
"""
|
||||
|
||||
def generate_trace_id(self) -> int:
|
||||
correlation_id = _correlation_id_context.get()
|
||||
if correlation_id:
|
||||
try:
|
||||
return uuid.UUID(correlation_id).int
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
return random.getrandbits(128)
|
||||
|
||||
def generate_span_id(self) -> int:
|
||||
source = _span_id_source_context.get()
|
||||
if source:
|
||||
try:
|
||||
return compute_deterministic_span_id(source)
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
|
||||
span_id = random.getrandbits(64)
|
||||
while span_id == 0:
|
||||
span_id = random.getrandbits(64)
|
||||
return span_id
|
||||
421
api/enterprise/telemetry/metric_handler.py
Normal file
421
api/enterprise/telemetry/metric_handler.py
Normal file
@ -0,0 +1,421 @@
|
||||
"""Enterprise metric/log event handler.
|
||||
|
||||
This module processes metric and log telemetry events after they've been
|
||||
dequeued from the enterprise_telemetry Celery queue. It handles case routing,
|
||||
idempotency checking, and payload rehydration.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
from enterprise.telemetry.contracts import TelemetryCase, TelemetryEnvelope
|
||||
from extensions.ext_redis import redis_client
|
||||
from extensions.ext_storage import storage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EnterpriseMetricHandler:
|
||||
"""Handler for enterprise metric and log telemetry events.
|
||||
|
||||
Processes envelopes from the enterprise_telemetry queue, routing each
|
||||
case to the appropriate handler method. Implements idempotency checking
|
||||
and payload rehydration with fallback.
|
||||
"""
|
||||
|
||||
def _increment_diagnostic_counter(self, counter_name: str, labels: dict[str, str] | None = None) -> None:
|
||||
"""Increment a diagnostic counter for operational monitoring.
|
||||
|
||||
Args:
|
||||
counter_name: Name of the counter (e.g., 'processed_total', 'deduped_total').
|
||||
labels: Optional labels for the counter.
|
||||
"""
|
||||
try:
|
||||
from extensions.ext_enterprise_telemetry import get_enterprise_exporter
|
||||
|
||||
exporter = get_enterprise_exporter()
|
||||
if not exporter:
|
||||
return
|
||||
|
||||
full_counter_name = f"enterprise_telemetry.handler.{counter_name}"
|
||||
logger.debug(
|
||||
"Diagnostic counter: %s, labels=%s",
|
||||
full_counter_name,
|
||||
labels or {},
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Failed to increment diagnostic counter: %s", counter_name, exc_info=True)
|
||||
|
||||
def handle(self, envelope: TelemetryEnvelope) -> None:
|
||||
"""Main entry point for processing telemetry envelopes.
|
||||
|
||||
Args:
|
||||
envelope: The telemetry envelope to process.
|
||||
"""
|
||||
# Check for duplicate events
|
||||
if self._is_duplicate(envelope):
|
||||
logger.debug(
|
||||
"Skipping duplicate event: tenant_id=%s, event_id=%s",
|
||||
envelope.tenant_id,
|
||||
envelope.event_id,
|
||||
)
|
||||
self._increment_diagnostic_counter("deduped_total")
|
||||
return
|
||||
|
||||
# Route to appropriate handler based on case
|
||||
case = envelope.case
|
||||
if case == TelemetryCase.APP_CREATED:
|
||||
self._on_app_created(envelope)
|
||||
self._increment_diagnostic_counter("processed_total", {"case": "app_created"})
|
||||
elif case == TelemetryCase.APP_UPDATED:
|
||||
self._on_app_updated(envelope)
|
||||
self._increment_diagnostic_counter("processed_total", {"case": "app_updated"})
|
||||
elif case == TelemetryCase.APP_DELETED:
|
||||
self._on_app_deleted(envelope)
|
||||
self._increment_diagnostic_counter("processed_total", {"case": "app_deleted"})
|
||||
elif case == TelemetryCase.FEEDBACK_CREATED:
|
||||
self._on_feedback_created(envelope)
|
||||
self._increment_diagnostic_counter("processed_total", {"case": "feedback_created"})
|
||||
elif case == TelemetryCase.MESSAGE_RUN:
|
||||
self._on_message_run(envelope)
|
||||
self._increment_diagnostic_counter("processed_total", {"case": "message_run"})
|
||||
elif case == TelemetryCase.TOOL_EXECUTION:
|
||||
self._on_tool_execution(envelope)
|
||||
self._increment_diagnostic_counter("processed_total", {"case": "tool_execution"})
|
||||
elif case == TelemetryCase.MODERATION_CHECK:
|
||||
self._on_moderation_check(envelope)
|
||||
self._increment_diagnostic_counter("processed_total", {"case": "moderation_check"})
|
||||
elif case == TelemetryCase.SUGGESTED_QUESTION:
|
||||
self._on_suggested_question(envelope)
|
||||
self._increment_diagnostic_counter("processed_total", {"case": "suggested_question"})
|
||||
elif case == TelemetryCase.DATASET_RETRIEVAL:
|
||||
self._on_dataset_retrieval(envelope)
|
||||
self._increment_diagnostic_counter("processed_total", {"case": "dataset_retrieval"})
|
||||
elif case == TelemetryCase.GENERATE_NAME:
|
||||
self._on_generate_name(envelope)
|
||||
self._increment_diagnostic_counter("processed_total", {"case": "generate_name"})
|
||||
elif case == TelemetryCase.PROMPT_GENERATION:
|
||||
self._on_prompt_generation(envelope)
|
||||
self._increment_diagnostic_counter("processed_total", {"case": "prompt_generation"})
|
||||
else:
|
||||
logger.warning(
|
||||
"Unknown telemetry case: %s (tenant_id=%s, event_id=%s)",
|
||||
case,
|
||||
envelope.tenant_id,
|
||||
envelope.event_id,
|
||||
)
|
||||
|
||||
def _is_duplicate(self, envelope: TelemetryEnvelope) -> bool:
|
||||
"""Check if this event has already been processed.
|
||||
|
||||
Uses Redis with TTL for deduplication. Returns True if duplicate,
|
||||
False if first time seeing this event.
|
||||
|
||||
Args:
|
||||
envelope: The telemetry envelope to check.
|
||||
|
||||
Returns:
|
||||
True if this event_id has been seen before, False otherwise.
|
||||
"""
|
||||
dedup_key = f"telemetry:dedup:{envelope.tenant_id}:{envelope.event_id}"
|
||||
|
||||
try:
|
||||
# Atomic set-if-not-exists with 1h TTL
|
||||
# Returns True if key was set (first time), None if already exists (duplicate)
|
||||
was_set = redis_client.set(dedup_key, b"1", nx=True, ex=3600)
|
||||
return was_set is None
|
||||
except Exception:
|
||||
# Fail open: if Redis is unavailable, process the event
|
||||
# (prefer occasional duplicate over lost data)
|
||||
logger.warning(
|
||||
"Redis unavailable for deduplication check, processing event anyway: %s",
|
||||
envelope.event_id,
|
||||
exc_info=True,
|
||||
)
|
||||
return False
|
||||
|
||||
def _rehydrate(self, envelope: TelemetryEnvelope) -> dict[str, Any]:
|
||||
"""Rehydrate payload from storage reference or inline data.
|
||||
|
||||
If the envelope payload is empty and metadata contains a
|
||||
``payload_ref``, the full payload is loaded from object storage
|
||||
(where the gateway wrote it as JSON). When both the inline
|
||||
payload and storage resolution fail, a degraded-event marker
|
||||
is emitted so the gap is observable.
|
||||
|
||||
Args:
|
||||
envelope: The telemetry envelope containing payload data.
|
||||
|
||||
Returns:
|
||||
The rehydrated payload dictionary, or ``{}`` on total failure.
|
||||
"""
|
||||
payload = envelope.payload
|
||||
|
||||
# Resolve from object storage when the gateway offloaded a large payload.
|
||||
if not payload and envelope.metadata:
|
||||
payload_ref = envelope.metadata.get("payload_ref")
|
||||
if payload_ref:
|
||||
try:
|
||||
payload_bytes = storage.load(payload_ref)
|
||||
payload = json.loads(payload_bytes.decode("utf-8"))
|
||||
logger.debug("Loaded payload from storage: key=%s", payload_ref)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to load payload from storage: key=%s, event_id=%s",
|
||||
payload_ref,
|
||||
envelope.event_id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
if not payload:
|
||||
# Storage resolution failed or no data available — emit degraded event.
|
||||
logger.error(
|
||||
"Payload rehydration failed for event_id=%s, tenant_id=%s, case=%s",
|
||||
envelope.event_id,
|
||||
envelope.tenant_id,
|
||||
envelope.case,
|
||||
)
|
||||
from enterprise.telemetry.entities import EnterpriseTelemetryEvent
|
||||
from enterprise.telemetry.telemetry_log import emit_metric_only_event
|
||||
|
||||
emit_metric_only_event(
|
||||
event_name=EnterpriseTelemetryEvent.REHYDRATION_FAILED,
|
||||
attributes={
|
||||
"tenant_id": envelope.tenant_id,
|
||||
"dify.telemetry.error": f"Payload rehydration failed for event_id={envelope.event_id}",
|
||||
"dify.telemetry.payload_type": envelope.case,
|
||||
"dify.telemetry.correlation_id": envelope.event_id,
|
||||
},
|
||||
tenant_id=envelope.tenant_id,
|
||||
)
|
||||
self._increment_diagnostic_counter("rehydration_failed_total")
|
||||
return {}
|
||||
|
||||
return payload
|
||||
|
||||
# Stub methods for each metric/log case
|
||||
# These will be implemented in later tasks with actual emission logic
|
||||
|
||||
def _on_app_created(self, envelope: TelemetryEnvelope) -> None:
|
||||
"""Handle app created event."""
|
||||
from enterprise.telemetry.entities import EnterpriseTelemetryCounter, EnterpriseTelemetryEvent
|
||||
from enterprise.telemetry.telemetry_log import emit_metric_only_event
|
||||
from extensions.ext_enterprise_telemetry import get_enterprise_exporter
|
||||
|
||||
exporter = get_enterprise_exporter()
|
||||
if not exporter:
|
||||
logger.debug("No exporter available for APP_CREATED: event_id=%s", envelope.event_id)
|
||||
return
|
||||
|
||||
payload = self._rehydrate(envelope)
|
||||
if not payload:
|
||||
return
|
||||
|
||||
attrs = {
|
||||
"dify.app_id": payload.get("app_id"),
|
||||
"dify.tenant_id": envelope.tenant_id,
|
||||
"dify.event.id": envelope.event_id,
|
||||
"dify.app.mode": payload.get("mode"),
|
||||
"dify.app.created_at": datetime.now(UTC).isoformat(),
|
||||
}
|
||||
|
||||
emit_metric_only_event(
|
||||
event_name=EnterpriseTelemetryEvent.APP_CREATED,
|
||||
attributes=attrs,
|
||||
tenant_id=envelope.tenant_id,
|
||||
)
|
||||
exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.APP_CREATED,
|
||||
1,
|
||||
{
|
||||
"tenant_id": envelope.tenant_id,
|
||||
"app_id": str(payload.get("app_id", "")),
|
||||
"mode": str(payload.get("mode", "")),
|
||||
},
|
||||
)
|
||||
|
||||
def _on_app_updated(self, envelope: TelemetryEnvelope) -> None:
|
||||
"""Handle app updated event."""
|
||||
from enterprise.telemetry.entities import EnterpriseTelemetryCounter, EnterpriseTelemetryEvent
|
||||
from enterprise.telemetry.telemetry_log import emit_metric_only_event
|
||||
from extensions.ext_enterprise_telemetry import get_enterprise_exporter
|
||||
|
||||
exporter = get_enterprise_exporter()
|
||||
if not exporter:
|
||||
logger.debug("No exporter available for APP_UPDATED: event_id=%s", envelope.event_id)
|
||||
return
|
||||
|
||||
payload = self._rehydrate(envelope)
|
||||
if not payload:
|
||||
return
|
||||
|
||||
attrs = {
|
||||
"dify.app_id": payload.get("app_id"),
|
||||
"dify.tenant_id": envelope.tenant_id,
|
||||
"dify.event.id": envelope.event_id,
|
||||
"dify.app.updated_at": datetime.now(UTC).isoformat(),
|
||||
}
|
||||
|
||||
emit_metric_only_event(
|
||||
event_name=EnterpriseTelemetryEvent.APP_UPDATED,
|
||||
attributes=attrs,
|
||||
tenant_id=envelope.tenant_id,
|
||||
)
|
||||
exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.APP_UPDATED,
|
||||
1,
|
||||
{
|
||||
"tenant_id": envelope.tenant_id,
|
||||
"app_id": str(payload.get("app_id", "")),
|
||||
},
|
||||
)
|
||||
|
||||
def _on_app_deleted(self, envelope: TelemetryEnvelope) -> None:
|
||||
"""Handle app deleted event."""
|
||||
from enterprise.telemetry.entities import EnterpriseTelemetryCounter, EnterpriseTelemetryEvent
|
||||
from enterprise.telemetry.telemetry_log import emit_metric_only_event
|
||||
from extensions.ext_enterprise_telemetry import get_enterprise_exporter
|
||||
|
||||
exporter = get_enterprise_exporter()
|
||||
if not exporter:
|
||||
logger.debug("No exporter available for APP_DELETED: event_id=%s", envelope.event_id)
|
||||
return
|
||||
|
||||
payload = self._rehydrate(envelope)
|
||||
if not payload:
|
||||
return
|
||||
|
||||
attrs = {
|
||||
"dify.app_id": payload.get("app_id"),
|
||||
"dify.tenant_id": envelope.tenant_id,
|
||||
"dify.event.id": envelope.event_id,
|
||||
"dify.app.deleted_at": datetime.now(UTC).isoformat(),
|
||||
}
|
||||
|
||||
emit_metric_only_event(
|
||||
event_name=EnterpriseTelemetryEvent.APP_DELETED,
|
||||
attributes=attrs,
|
||||
tenant_id=envelope.tenant_id,
|
||||
)
|
||||
exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.APP_DELETED,
|
||||
1,
|
||||
{
|
||||
"tenant_id": envelope.tenant_id,
|
||||
"app_id": str(payload.get("app_id", "")),
|
||||
},
|
||||
)
|
||||
|
||||
def _on_feedback_created(self, envelope: TelemetryEnvelope) -> None:
|
||||
"""Handle feedback created event."""
|
||||
from enterprise.telemetry.entities import EnterpriseTelemetryCounter, EnterpriseTelemetryEvent
|
||||
from enterprise.telemetry.telemetry_log import emit_metric_only_event
|
||||
from extensions.ext_enterprise_telemetry import get_enterprise_exporter
|
||||
|
||||
exporter = get_enterprise_exporter()
|
||||
if not exporter:
|
||||
logger.debug("No exporter available for FEEDBACK_CREATED: event_id=%s", envelope.event_id)
|
||||
return
|
||||
|
||||
payload = self._rehydrate(envelope)
|
||||
if not payload:
|
||||
return
|
||||
|
||||
include_content = exporter.include_content
|
||||
attrs: dict = {
|
||||
"dify.message.id": payload.get("message_id"),
|
||||
"dify.tenant_id": envelope.tenant_id,
|
||||
"dify.event.id": envelope.event_id,
|
||||
"dify.app_id": payload.get("app_id"),
|
||||
"dify.conversation.id": payload.get("conversation_id"),
|
||||
"gen_ai.user.id": payload.get("from_end_user_id") or payload.get("from_account_id"),
|
||||
"dify.feedback.rating": payload.get("rating"),
|
||||
"dify.feedback.from_source": payload.get("from_source"),
|
||||
"dify.feedback.created_at": datetime.now(UTC).isoformat(),
|
||||
}
|
||||
if include_content:
|
||||
attrs["dify.feedback.content"] = payload.get("content")
|
||||
|
||||
user_id = payload.get("from_end_user_id") or payload.get("from_account_id")
|
||||
emit_metric_only_event(
|
||||
event_name=EnterpriseTelemetryEvent.FEEDBACK_CREATED,
|
||||
attributes=attrs,
|
||||
tenant_id=envelope.tenant_id,
|
||||
user_id=str(user_id or ""),
|
||||
)
|
||||
exporter.increment_counter(
|
||||
EnterpriseTelemetryCounter.FEEDBACK,
|
||||
1,
|
||||
{
|
||||
"tenant_id": envelope.tenant_id,
|
||||
"app_id": str(payload.get("app_id", "")),
|
||||
"rating": str(payload.get("rating", "")),
|
||||
},
|
||||
)
|
||||
|
||||
def _on_message_run(self, envelope: TelemetryEnvelope) -> None:
|
||||
"""Handle message run event.
|
||||
|
||||
Intentionally a no-op: metrics and structured logs for message runs are
|
||||
emitted directly by EnterpriseOtelTrace._message_trace at trace time,
|
||||
not through the metric handler queue path.
|
||||
"""
|
||||
logger.debug("Processing MESSAGE_RUN: event_id=%s", envelope.event_id)
|
||||
|
||||
def _on_tool_execution(self, envelope: TelemetryEnvelope) -> None:
|
||||
"""Handle tool execution event.
|
||||
|
||||
Intentionally a no-op: metrics and structured logs for tool executions
|
||||
are emitted directly by EnterpriseOtelTrace._tool_trace at trace time,
|
||||
not through the metric handler queue path.
|
||||
"""
|
||||
logger.debug("Processing TOOL_EXECUTION: event_id=%s", envelope.event_id)
|
||||
|
||||
def _on_moderation_check(self, envelope: TelemetryEnvelope) -> None:
|
||||
"""Handle moderation check event.
|
||||
|
||||
Intentionally a no-op: metrics and structured logs for moderation checks
|
||||
are emitted directly by EnterpriseOtelTrace._moderation_trace at trace time,
|
||||
not through the metric handler queue path.
|
||||
"""
|
||||
logger.debug("Processing MODERATION_CHECK: event_id=%s", envelope.event_id)
|
||||
|
||||
def _on_suggested_question(self, envelope: TelemetryEnvelope) -> None:
|
||||
"""Handle suggested question event.
|
||||
|
||||
Intentionally a no-op: metrics and structured logs for suggested questions
|
||||
are emitted directly by EnterpriseOtelTrace._suggested_question_trace at
|
||||
trace time, not through the metric handler queue path.
|
||||
"""
|
||||
logger.debug("Processing SUGGESTED_QUESTION: event_id=%s", envelope.event_id)
|
||||
|
||||
def _on_dataset_retrieval(self, envelope: TelemetryEnvelope) -> None:
|
||||
"""Handle dataset retrieval event.
|
||||
|
||||
Intentionally a no-op: metrics and structured logs for dataset retrievals
|
||||
are emitted directly by EnterpriseOtelTrace._dataset_retrieval_trace at
|
||||
trace time, not through the metric handler queue path.
|
||||
"""
|
||||
logger.debug("Processing DATASET_RETRIEVAL: event_id=%s", envelope.event_id)
|
||||
|
||||
def _on_generate_name(self, envelope: TelemetryEnvelope) -> None:
|
||||
"""Handle generate name event.
|
||||
|
||||
Intentionally a no-op: metrics and structured logs for generate name
|
||||
operations are emitted directly by EnterpriseOtelTrace._generate_name_trace
|
||||
at trace time, not through the metric handler queue path.
|
||||
"""
|
||||
logger.debug("Processing GENERATE_NAME: event_id=%s", envelope.event_id)
|
||||
|
||||
def _on_prompt_generation(self, envelope: TelemetryEnvelope) -> None:
|
||||
"""Handle prompt generation event.
|
||||
|
||||
Intentionally a no-op: metrics and structured logs for prompt generation
|
||||
operations are emitted directly by EnterpriseOtelTrace._prompt_generation_trace
|
||||
at trace time, not through the metric handler queue path.
|
||||
"""
|
||||
logger.debug("Processing PROMPT_GENERATION: event_id=%s", envelope.event_id)
|
||||
122
api/enterprise/telemetry/telemetry_log.py
Normal file
122
api/enterprise/telemetry/telemetry_log.py
Normal file
@ -0,0 +1,122 @@
|
||||
"""Structured-log emitter for enterprise telemetry events.
|
||||
|
||||
Emits structured JSON log lines correlated with OTEL traces via trace_id.
|
||||
Picked up by ``StructuredJSONFormatter`` → stdout/Loki/Elastic.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from functools import lru_cache
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from enterprise.telemetry.entities import EnterpriseTelemetryEvent
|
||||
|
||||
logger = logging.getLogger("dify.telemetry")
|
||||
|
||||
|
||||
@lru_cache(maxsize=4096)
|
||||
def compute_trace_id_hex(uuid_str: str | None) -> str:
|
||||
"""Convert a business UUID string to a 32-hex OTEL-compatible trace_id.
|
||||
|
||||
Returns empty string when *uuid_str* is ``None`` or invalid.
|
||||
"""
|
||||
if not uuid_str:
|
||||
return ""
|
||||
normalized = uuid_str.strip().lower()
|
||||
if len(normalized) == 32 and all(ch in "0123456789abcdef" for ch in normalized):
|
||||
return normalized
|
||||
try:
|
||||
return f"{uuid.UUID(normalized).int:032x}"
|
||||
except (ValueError, AttributeError):
|
||||
return ""
|
||||
|
||||
|
||||
@lru_cache(maxsize=4096)
|
||||
def compute_span_id_hex(uuid_str: str | None) -> str:
|
||||
if not uuid_str:
|
||||
return ""
|
||||
normalized = uuid_str.strip().lower()
|
||||
if len(normalized) == 16 and all(ch in "0123456789abcdef" for ch in normalized):
|
||||
return normalized
|
||||
try:
|
||||
from enterprise.telemetry.id_generator import compute_deterministic_span_id
|
||||
|
||||
return f"{compute_deterministic_span_id(normalized):016x}"
|
||||
except (ValueError, AttributeError):
|
||||
return ""
|
||||
|
||||
|
||||
def emit_telemetry_log(
|
||||
*,
|
||||
event_name: str | EnterpriseTelemetryEvent,
|
||||
attributes: dict[str, Any],
|
||||
signal: str = "metric_only",
|
||||
trace_id_source: str | None = None,
|
||||
span_id_source: str | None = None,
|
||||
tenant_id: str | None = None,
|
||||
user_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit a structured log line for a telemetry event.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
event_name:
|
||||
Canonical event name, e.g. ``"dify.workflow.run"``.
|
||||
attributes:
|
||||
All event-specific attributes (already built by the caller).
|
||||
signal:
|
||||
``"metric_only"`` for events with no span, ``"span_detail"``
|
||||
for detail logs accompanying a slim span.
|
||||
trace_id_source:
|
||||
A UUID string (e.g. ``workflow_run_id``) used to derive a 32-hex
|
||||
trace_id for cross-signal correlation.
|
||||
tenant_id:
|
||||
Tenant identifier (for the ``IdentityContextFilter``).
|
||||
user_id:
|
||||
User identifier (for the ``IdentityContextFilter``).
|
||||
"""
|
||||
if not logger.isEnabledFor(logging.INFO):
|
||||
return
|
||||
attrs = {
|
||||
"dify.event.name": event_name,
|
||||
"dify.event.signal": signal,
|
||||
**attributes,
|
||||
}
|
||||
|
||||
extra: dict[str, Any] = {"attributes": attrs}
|
||||
|
||||
trace_id_hex = compute_trace_id_hex(trace_id_source)
|
||||
if trace_id_hex:
|
||||
extra["trace_id"] = trace_id_hex
|
||||
span_id_hex = compute_span_id_hex(span_id_source)
|
||||
if span_id_hex:
|
||||
extra["span_id"] = span_id_hex
|
||||
if tenant_id:
|
||||
extra["tenant_id"] = tenant_id
|
||||
if user_id:
|
||||
extra["user_id"] = user_id
|
||||
|
||||
logger.info("telemetry.%s", signal, extra=extra)
|
||||
|
||||
|
||||
def emit_metric_only_event(
|
||||
*,
|
||||
event_name: str | EnterpriseTelemetryEvent,
|
||||
attributes: dict[str, Any],
|
||||
trace_id_source: str | None = None,
|
||||
span_id_source: str | None = None,
|
||||
tenant_id: str | None = None,
|
||||
user_id: str | None = None,
|
||||
) -> None:
|
||||
emit_telemetry_log(
|
||||
event_name=event_name,
|
||||
attributes=attributes,
|
||||
signal="metric_only",
|
||||
trace_id_source=trace_id_source,
|
||||
span_id_source=span_id_source,
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
@ -0,0 +1,25 @@
|
||||
class _EventHook:
|
||||
def __init__(self):
|
||||
self._handlers = []
|
||||
|
||||
def __iadd__(self, handler):
|
||||
self._handlers.append(handler)
|
||||
return self
|
||||
|
||||
def __isub__(self, handler):
|
||||
try:
|
||||
self._handlers.remove(handler)
|
||||
except ValueError:
|
||||
pass
|
||||
return self
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
for handler in list(self._handlers):
|
||||
handler(*args, **kwargs)
|
||||
|
||||
|
||||
class Events:
|
||||
def __getattr__(self, name):
|
||||
hook = _EventHook()
|
||||
setattr(self, name, hook)
|
||||
return hook
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user