Files
ragflow/api/db/services/canvas_service.py
plind dd76653dc1 feat: add tag management for Agents with filtering and sorting (#14774) (#14799)
## Summary
Closes #14774.

Adds free-form tags on agents (UserCanvas) with full UI + API:
- Stored as comma-separated `tags` column on `UserCanvas` with online
migration.
- New endpoints: `GET /v1/agents/tags` (aggregate counts) and `PUT
/v1/agent/<id>/tags` (write). `GET /v1/agents` accepts a `tags=` query.
- "Edit tags" item in agent dropdown opens a chip-style editor dialog;
tags render as badges on each agent card.
- New "Tags" facet in the agents filter bar, with counts.

## Implementation notes
- **Tag matching is exact-token**: the SQL filter wraps stored tags as
`,…,` and matches `,ml,` so `ml` doesn't match `ml-ops`.
- **Server-side normalization** in `UserCanvasService.update_tags`:
dedup (case-insensitive), per-tag cap of 64 chars, total length capped
at 512 chars to fit the column, commas inside tag values are replaced
with spaces.
- **Tenant authorization**: `PUT /v1/agent/<id>/tags` gates on
`UserCanvasService.accessible(canvas_id, tenant_id)`.
- **Tag listing scope**: `UserCanvasService.list_tags` follows the same
own + team-shared rule as `get_by_tenant_ids`.
- **i18n**: keys added to `en.ts` and `zh.ts` only (per project
convention; other locales fall back).
- **`HomeCard`** gets a non-breaking `extra?: ReactNode` slot for the
chip row; no `src/components/ui/` files modified.

## Test plan
- [ ] Backend boot runs `migrate_db` → confirm `user_canvas.tags` column
exists (`DESCRIBE user_canvas`).
- [ ] Agents page renders cards normally (no console error from missing
field).
- [ ] `⋯ → Edit tags` opens a dialog that stays open (regression: dialog
was unmounting with the dropdown).
- [ ] Typing a tag without pressing Enter and clicking Save persists it
(regression: last typed tag was being dropped).
- [ ] Chip input supports Enter/comma to commit, Backspace on empty to
remove, `×` to remove individual chip.
- [ ] Tag containing a comma sent via API is stored with the comma
replaced by a space.
- [ ] 20 long tags sent via API does not error (length cap silently
truncates).
- [ ] "Tags" filter in the filter bar shows counts and narrows the list.
- [ ] Filtering by `ml` does **not** return agents tagged `ml-ops`.
- [ ] UI in Chinese shows 编辑标签 / 添加标签以整理和筛选你的智能体 etc.
- [ ] `PUT /v1/agent/<other-tenant-id>/tags` returns `Agent not found or
no permission.`
2026-05-13 21:41:32 +08:00

481 lines
18 KiB
Python

#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json
import logging
import time
from functools import reduce
from operator import or_
from uuid import uuid4
from agent.canvas import Canvas
from api.db import CanvasCategory, TenantPermission
from api.db.db_models import DB, CanvasTemplate, User, UserCanvas, API4Conversation, UserCanvasVersion
from api.db.services.api_service import API4ConversationService
from api.db.services.common_service import CommonService
from api.db.services.user_canvas_version import UserCanvasVersionService
from common.misc_utils import get_uuid, thread_pool_exec
from api.utils.api_utils import get_data_openai
import tiktoken
from peewee import fn
class CanvasTemplateService(CommonService):
model = CanvasTemplate
class DataFlowTemplateService(CommonService):
"""
Alias of CanvasTemplateService
"""
model = CanvasTemplate
class UserCanvasService(CommonService):
model = UserCanvas
@classmethod
@DB.connection_context()
def get_list(cls, tenant_id,
page_number, items_per_page, orderby, desc, id, title, canvas_category=CanvasCategory.Agent):
agents = cls.model.select()
if id:
agents = agents.where(cls.model.id == id)
if title:
agents = agents.where(cls.model.title == title)
agents = agents.where(cls.model.user_id == tenant_id)
agents = agents.where(cls.model.canvas_category == canvas_category)
if desc:
agents = agents.order_by(cls.model.getter_by(orderby).desc())
else:
agents = agents.order_by(cls.model.getter_by(orderby).asc())
agents = agents.paginate(page_number, items_per_page)
return list(agents.dicts())
@classmethod
@DB.connection_context()
def get_all_agents_by_tenant_ids(cls, tenant_ids, user_id):
# will get all permitted agents, be cautious
fields = [
cls.model.id,
cls.model.avatar,
cls.model.title,
cls.model.permission,
cls.model.canvas_type,
cls.model.canvas_category
]
# find team agents and owned agents
agents = cls.model.select(*fields).where(
(cls.model.user_id.in_(tenant_ids) & (cls.model.permission == TenantPermission.TEAM.value)) | (
cls.model.user_id == user_id
)
)
# sort by create_time, asc
agents.order_by(cls.model.create_time.asc())
# maybe cause slow query by deep paginate, optimize later
offset, limit = 0, 50
res = []
while True:
ag_batch = agents.offset(offset).limit(limit)
_temp = list(ag_batch.dicts())
if not _temp:
break
res.extend(_temp)
offset += limit
return res
@classmethod
@DB.connection_context()
def get_by_canvas_id(cls, pid):
try:
fields = [
cls.model.id,
cls.model.avatar,
cls.model.title,
cls.model.dsl,
cls.model.description,
cls.model.permission,
cls.model.update_time,
cls.model.user_id,
cls.model.create_time,
cls.model.create_date,
cls.model.update_date,
cls.model.canvas_category,
User.nickname,
User.avatar.alias('tenant_avatar'),
]
agents = cls.model.select(*fields) \
.join(User, on=(cls.model.user_id == User.id)) \
.where(cls.model.id == pid)
# obj = cls.model.query(id=pid)[0]
return True, agents.dicts()[0]
except Exception as e:
logging.exception(e)
return False, None
@classmethod
@DB.connection_context()
def get_basic_info_by_canvas_ids(cls, canvas_id):
fields = [
cls.model.id,
cls.model.avatar,
cls.model.user_id,
cls.model.title,
cls.model.permission,
cls.model.canvas_category
]
return cls.model.select(*fields).where(cls.model.id.in_(canvas_id)).dicts()
@classmethod
@DB.connection_context()
def get_by_tenant_ids(
cls,
joined_tenant_ids,
user_id,
page_number,
items_per_page,
orderby,
desc,
keywords,
canvas_category=None,
tags=None,
):
fields = [
cls.model.id,
cls.model.avatar,
cls.model.title,
cls.model.description,
cls.model.permission,
cls.model.user_id.alias("tenant_id"),
User.nickname,
User.avatar.alias('tenant_avatar'),
cls.model.update_time,
cls.model.canvas_category,
cls.model.tags,
]
if keywords:
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id)),
(fn.LOWER(cls.model.title).contains(keywords.lower()))
)
else:
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id))
)
if canvas_category:
agents = agents.where(cls.model.canvas_category == canvas_category)
if tags:
tag_list = [t.strip() for t in tags if t and t.strip()] if isinstance(tags, (list, tuple)) else [t.strip() for t in str(tags).split(",") if t.strip()]
if tag_list:
# Wrap value with commas so 'ml' doesn't match 'ml-ops'.
wrapped = fn.CONCAT(",", cls.model.tags, ",")
clauses = [wrapped.contains(f",{t},") for t in tag_list]
agents = agents.where(reduce(or_, clauses))
if desc:
agents = agents.order_by(cls.model.getter_by(orderby).desc())
else:
agents = agents.order_by(cls.model.getter_by(orderby).asc())
count = agents.count()
if page_number and items_per_page:
agents = agents.paginate(page_number, items_per_page)
agents_list = list(agents.dicts())
# Get latest release time for each canvas
if agents_list:
canvas_ids = [a['id'] for a in agents_list]
release_times = (
UserCanvasVersion.select(UserCanvasVersion.user_canvas_id, fn.MAX(UserCanvasVersion.create_time).alias("release_time"))
.where((UserCanvasVersion.user_canvas_id.in_(canvas_ids)) & (UserCanvasVersion.release))
.group_by(UserCanvasVersion.user_canvas_id)
)
release_time_map = {r.user_canvas_id: r.release_time for r in release_times}
for agent in agents_list:
agent['release_time'] = release_time_map.get(agent['id'])
return agents_list, count
@classmethod
@DB.connection_context()
def list_tags(cls, joined_tenant_ids, user_id, canvas_category=None):
"""Return {tag: agent_count} aggregated across agents visible to the user."""
query = cls.model.select(cls.model.tags).where(
((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id)
)
if canvas_category:
query = query.where(cls.model.canvas_category == canvas_category)
counts: dict[str, int] = {}
for row in query.dicts():
for t in (row.get("tags") or "").split(","):
t = t.strip()
if t:
counts[t] = counts.get(t, 0) + 1
logging.info(
"UserCanvasService.list_tags user=%s canvas_category=%s tags_count=%d",
user_id,
canvas_category,
len(counts),
)
return counts
# Tag storage is a single comma-separated CharField(max_length=512);
# commas inside a tag would corrupt the encoding, so strip them on write.
TAGS_FIELD_MAX = 512
TAG_MAX_LEN = 64
@classmethod
@DB.connection_context()
def update_tags(cls, canvas_id, tags):
"""Persist a normalized comma-separated tag string for the given canvas."""
if isinstance(tags, (list, tuple)):
cleaned = [str(t).replace(",", " ").strip() for t in tags if t and str(t).strip()]
else:
cleaned = [t.strip() for t in str(tags or "").split(",") if t.strip()]
# Dedupe (case-insensitive, preserve order), cap individual tag length,
# then truncate the joined value so it always fits the column.
seen = set()
normalized = []
used = 0
for t in cleaned:
t = t[: cls.TAG_MAX_LEN]
key = t.lower()
if key in seen:
continue
extra = len(t) + (1 if normalized else 0)
if used + extra > cls.TAGS_FIELD_MAX:
break
seen.add(key)
normalized.append(t)
used += extra
value = ",".join(normalized)
rows_affected = cls.model.update(tags=value).where(cls.model.id == canvas_id).execute()
logging.info(
"UserCanvasService.update_tags canvas_id=%s tags_count=%d rows=%d",
canvas_id,
len(normalized),
rows_affected,
)
return rows_affected
@classmethod
@DB.connection_context()
def accessible(cls, canvas_id, tenant_id):
from api.db.services.user_service import UserTenantService
e, c = UserCanvasService.get_by_canvas_id(canvas_id)
if not e:
return False
tids = [t.tenant_id for t in UserTenantService.query(user_id=tenant_id)]
if c["user_id"] == tenant_id:
return True
if c["user_id"] not in tids:
return False
if c["permission"] != TenantPermission.TEAM.value:
return False
return True
@classmethod
def get_agent_dsl_with_release(cls, agent_id, release_mode=False, tenant_id=None):
e, cvs = cls.get_by_id(agent_id)
if not e:
raise LookupError("Agent not found.")
if release_mode:
released_version = UserCanvasVersionService.get_latest_released(agent_id)
if not released_version:
raise PermissionError("No available published version")
dsl = released_version.dsl
else:
dsl = cvs.dsl
if not isinstance(dsl, str):
dsl = json.dumps(dsl, ensure_ascii=False)
return cvs, dsl
async def completion(tenant_id, agent_id, session_id=None, **kwargs):
query = kwargs.get("query", "") or kwargs.get("question", "")
files = kwargs.get("files", [])
inputs = kwargs.get("inputs", {})
user_id = kwargs.get("user_id", "")
custom_header = kwargs.get("custom_header", "")
release_mode = str(kwargs.get("release", "")).strip().lower()
if session_id:
e, conv = await thread_pool_exec(API4ConversationService.get_by_id, session_id)
if not e:
raise LookupError("Session not found!")
if not conv.message:
conv.message = []
if not isinstance(conv.dsl, str):
conv.dsl = json.dumps(conv.dsl, ensure_ascii=False)
canvas = Canvas(conv.dsl, tenant_id, agent_id, canvas_id=agent_id, custom_header=custom_header)
else:
cvs, dsl = await thread_pool_exec(UserCanvasService.get_agent_dsl_with_release, agent_id, release_mode=release_mode == "true", tenant_id=tenant_id)
session_id = get_uuid()
canvas = Canvas(dsl, tenant_id, agent_id, canvas_id=cvs.id, custom_header=custom_header)
canvas.reset()
# Get the version title based on release_mode
version_title = await thread_pool_exec(UserCanvasVersionService.get_latest_version_title, cvs.id, release_mode=release_mode == "true")
conv = {"id": session_id, "dialog_id": cvs.id, "user_id": user_id, "message": [], "source": "agent", "dsl": dsl, "reference": [], "version_title": version_title}
await thread_pool_exec(API4ConversationService.save, **conv)
conv = API4Conversation(**conv)
message_id = str(uuid4())
conv.message.append({
"role": "user",
"content": query,
"id": message_id,
"files": files
})
txt = ""
async for ans in canvas.run(query=query, files=files, user_id=user_id, inputs=inputs):
ans["session_id"] = session_id
if ans["event"] == "message":
txt += ans["data"]["content"]
if ans["data"].get("start_to_think", False):
txt += "<think>"
elif ans["data"].get("end_to_think", False):
txt += "</think>"
yield "data:" + json.dumps(ans, ensure_ascii=False) + "\n\n"
conv.message.append({"role": "assistant", "content": txt, "created_at": time.time(), "id": message_id})
conv.reference = canvas.get_reference()
conv.errors = canvas.error
conv.dsl = str(canvas)
conv = conv.to_dict()
await thread_pool_exec(API4ConversationService.append_message, conv["id"], conv)
async def completion_openai(tenant_id, agent_id, question, session_id=None, stream=True, **kwargs):
tiktoken_encoder = tiktoken.get_encoding("cl100k_base")
prompt_tokens = len(tiktoken_encoder.encode(str(question)))
user_id = kwargs.get("user_id", "")
if stream:
completion_tokens = 0
try:
async for ans in completion(
tenant_id=tenant_id,
agent_id=agent_id,
session_id=session_id,
query=question,
user_id=user_id,
**kwargs
):
if isinstance(ans, str):
try:
ans = json.loads(ans[5:]) # remove "data:"
except Exception as e:
logging.exception(f"Agent OpenAI-Compatible completion_openai parse answer failed: {e}")
continue
if ans.get("event") not in ["message", "message_end"]:
continue
content_piece = ""
if ans["event"] == "message":
content_piece = ans["data"]["content"]
completion_tokens += len(tiktoken_encoder.encode(content_piece))
openai_data = get_data_openai(
id=session_id or str(uuid4()),
model=agent_id,
content=content_piece,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
stream=True
)
if ans.get("data", {}).get("reference", None):
openai_data["choices"][0]["delta"]["reference"] = ans["data"]["reference"]
yield "data: " + json.dumps(openai_data, ensure_ascii=False) + "\n\n"
yield "data: [DONE]\n\n"
except Exception as e:
logging.exception(e)
yield "data: " + json.dumps(
get_data_openai(
id=session_id or str(uuid4()),
model=agent_id,
content=f"**ERROR**: {str(e)}",
finish_reason="stop",
prompt_tokens=prompt_tokens,
completion_tokens=len(tiktoken_encoder.encode(f"**ERROR**: {str(e)}")),
stream=True
),
ensure_ascii=False
) + "\n\n"
yield "data: [DONE]\n\n"
else:
try:
all_content = ""
reference = {}
async for ans in completion(
tenant_id=tenant_id,
agent_id=agent_id,
session_id=session_id,
query=question,
user_id=user_id,
**kwargs
):
if isinstance(ans, str):
ans = json.loads(ans[5:])
if ans.get("event") not in ["message", "message_end"]:
continue
if ans["event"] == "message":
all_content += ans["data"]["content"]
if ans.get("data", {}).get("reference", None):
reference.update(ans["data"]["reference"])
completion_tokens = len(tiktoken_encoder.encode(all_content))
openai_data = get_data_openai(
id=session_id or str(uuid4()),
model=agent_id,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
content=all_content,
finish_reason="stop",
param=None
)
if reference:
openai_data["choices"][0]["message"]["reference"] = reference
yield openai_data
except Exception as e:
logging.exception(e)
yield get_data_openai(
id=session_id or str(uuid4()),
model=agent_id,
prompt_tokens=prompt_tokens,
completion_tokens=len(tiktoken_encoder.encode(f"**ERROR**: {str(e)}")),
content=f"**ERROR**: {str(e)}",
finish_reason="stop",
param=None
)