mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-01 05:17:51 +08:00
Closes #14590 ## Self Checks - [x] I have searched for existing issues [search for existing issues](https://github.com/infiniflow/ragflow/issues), including closed ones. - [x] I confirm that I am using English to submit this report ([Language Policy](https://github.com/infiniflow/ragflow/issues/5910)). - [x] Non-english title submitions will be closed directly ( 非英文标题的提交将会被直接关闭 ) ([Language Policy](https://github.com/infiniflow/ragflow/issues/5910)). - [x] Please do not modify this template :) and fill in all the required fields. ## RAGFlow workspace code commit ID `a1b2c3d4e5f67890123456789abcdef12345678` ## RAGFlow image version `0.13.1` ## Other environment information - Hardware parameters: N/A - OS type: Linux 6.17.0-22-generic - Others: API key authentication via `Authorization: Bearer <token>` ## Actual behavior The chatbot API endpoints: - `POST /chatbots/<dialog_id>/completions` - `GET /chatbots/<dialog_id>/info` validate only that the bearer token exists in `APIToken`, but do not verify that `dialog_id` belongs to the same tenant as that token. Current flow (simplified): 1. Route extracts bearer token and checks `APIToken.query(beta=token)`. 2. If token exists, request is accepted. 3. Downstream service resolves dialog globally by ID (`DialogService.get_by_id(dialog_id)` in `conversation_service.py`). 4. No tenant ownership check is enforced for `dialog_id`. Impact: Any user with a valid API key can attempt arbitrary `dialog_id` values and access/invoke chatbots outside their own tenant boundary if IDs are known/guessed/leaked. Security classification: - Vulnerability class: Broken Access Control (IDOR, OWASP Top 10 A01) - Severity recommendation: Critical - Exploit prerequisite: any valid API key + discoverable target `dialog_id` ## Expected behavior Requests to `/chatbots/<dialog_id>/completions` and `/chatbots/<dialog_id>/info` must be authorized only when: 1. bearer token is valid, and 2. `dialog_id` belongs to the same `tenant_id` as the token. Otherwise, reject with authorization failure (e.g., 403 or 404-equivalent policy). ## Steps to reproduce 1. Prepare two tenants: - Tenant A with API key `TOKEN_A` - Tenant B with chatbot `dialog_id = DIALOG_B` 2. Send request from Tenant A to Tenant B chatbot completion endpoint: ```bash curl -X POST "https://<host>/chatbots/DIALOG_B/completions" \ -H "Authorization: Bearer TOKEN_A" \ -H "Content-Type: application/json" \ -d '{"question":"hello","stream":false}' ``` 3. Observe request is processed (or reaches dialog resolution) without tenant ownership rejection. 4. Repeat against info endpoint: ```bash curl -X GET "https://<host>/chatbots/DIALOG_B/info" \ -H "Authorization: Bearer TOKEN_A" ``` 5. Observe the same missing ownership enforcement. ## Additional information Affected code paths: - `api/apps/sdk/session.py` - `chatbot_completions(dialog_id)` - `chatbots_inputs(dialog_id)` - `api/db/services/conversation_service.py` - `async_iframe_completion(...)` uses global dialog lookup Suggested fix: 1. In both chatbot endpoints: - Resolve `tenant_id = objs[0].tenant_id` from validated token. - Fetch dialog with tenant-scoped query (`DialogService.query(id=dialog_id, tenant_id=tenant_id)`). - Reject if dialog is not found/owned by tenant. 2. Defense in depth: - Require and enforce `tenant_id` in service-layer dialog resolution for external flows. - Avoid global `get_by_id(dialog_id)` where user-controlled dialog IDs are reachable. 3. Add regression tests: - Positive: same-tenant token + dialog succeeds. - Negative: cross-tenant token + dialog fails for both endpoints.
296 lines
11 KiB
Python
296 lines
11 KiB
Python
#
|
|
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
import time
|
|
import logging
|
|
from uuid import uuid4
|
|
from common.constants import StatusEnum
|
|
from api.db.db_models import Conversation, DB
|
|
from api.db.services.api_service import API4ConversationService
|
|
from api.db.services.common_service import CommonService
|
|
from api.db.services.dialog_service import DialogService, async_chat
|
|
from common.misc_utils import get_uuid
|
|
import json
|
|
|
|
from rag.prompts.generator import chunks_format
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ConversationService(CommonService):
|
|
model = Conversation
|
|
|
|
@classmethod
|
|
@DB.connection_context()
|
|
def get_list(cls, dialog_id, page_number, items_per_page, orderby, desc, id, name, user_id=None):
|
|
sessions = cls.model.select().where(cls.model.dialog_id == dialog_id)
|
|
if id:
|
|
sessions = sessions.where(cls.model.id == id)
|
|
if name:
|
|
sessions = sessions.where(cls.model.name == name)
|
|
if user_id:
|
|
sessions = sessions.where(cls.model.user_id == user_id)
|
|
if desc:
|
|
sessions = sessions.order_by(cls.model.getter_by(orderby).desc())
|
|
else:
|
|
sessions = sessions.order_by(cls.model.getter_by(orderby).asc())
|
|
|
|
if items_per_page > 0:
|
|
sessions = sessions.paginate(page_number, items_per_page)
|
|
|
|
return list(sessions.dicts())
|
|
|
|
@classmethod
|
|
@DB.connection_context()
|
|
def get_all_conversation_by_dialog_ids(cls, dialog_ids):
|
|
sessions = cls.model.select().where(cls.model.dialog_id.in_(dialog_ids))
|
|
sessions.order_by(cls.model.create_time.asc())
|
|
offset, limit = 0, 100
|
|
res = []
|
|
while True:
|
|
s_batch = sessions.offset(offset).limit(limit)
|
|
_temp = list(s_batch.dicts())
|
|
if not _temp:
|
|
break
|
|
res.extend(_temp)
|
|
offset += limit
|
|
return res
|
|
|
|
|
|
def structure_answer(conv, ans, message_id, session_id):
|
|
reference = ans["reference"]
|
|
if not isinstance(reference, dict):
|
|
reference = {}
|
|
ans["reference"] = {}
|
|
is_final = ans.get("final", True)
|
|
|
|
chunk_list = chunks_format(reference)
|
|
|
|
reference["chunks"] = chunk_list
|
|
ans["id"] = message_id
|
|
ans["session_id"] = session_id
|
|
|
|
if not conv:
|
|
return ans
|
|
|
|
if not conv.message:
|
|
conv.message = []
|
|
content = ans["answer"]
|
|
if ans.get("start_to_think"):
|
|
content = "<think>"
|
|
elif ans.get("end_to_think"):
|
|
content = "</think>"
|
|
|
|
if not conv.message or conv.message[-1].get("role", "") != "assistant":
|
|
conv.message.append({"role": "assistant", "content": content, "created_at": time.time(), "id": message_id})
|
|
else:
|
|
if is_final:
|
|
if ans.get("answer"):
|
|
conv.message[-1] = {"role": "assistant", "content": ans["answer"], "created_at": time.time(), "id": message_id}
|
|
else:
|
|
conv.message[-1]["created_at"] = time.time()
|
|
conv.message[-1]["id"] = message_id
|
|
else:
|
|
conv.message[-1]["content"] = (conv.message[-1].get("content") or "") + content
|
|
conv.message[-1]["created_at"] = time.time()
|
|
conv.message[-1]["id"] = message_id
|
|
if conv.reference:
|
|
should_update_reference = is_final or bool(reference.get("chunks")) or bool(reference.get("doc_aggs"))
|
|
if should_update_reference:
|
|
conv.reference[-1] = reference
|
|
return ans
|
|
|
|
|
|
async def async_completion(tenant_id, chat_id, question, name="New session", session_id=None, stream=True, **kwargs):
|
|
assert name, "`name` can not be empty."
|
|
dia = DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value)
|
|
assert dia, "You do not own the chat."
|
|
|
|
if not session_id:
|
|
session_id = get_uuid()
|
|
conv = {
|
|
"id": session_id,
|
|
"dialog_id": chat_id,
|
|
"name": name,
|
|
"message": [{"role": "assistant", "content": dia[0].prompt_config.get("prologue"), "created_at": time.time()}],
|
|
"user_id": kwargs.get("user_id", "")
|
|
}
|
|
ConversationService.save(**conv)
|
|
if stream:
|
|
yield "data:" + json.dumps({"code": 0, "message": "",
|
|
"data": {
|
|
"answer": conv["message"][0]["content"],
|
|
"reference": {},
|
|
"audio_binary": None,
|
|
"id": None,
|
|
"session_id": session_id
|
|
}},
|
|
ensure_ascii=False) + "\n\n"
|
|
yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"
|
|
return
|
|
else:
|
|
answer = {
|
|
"answer": conv["message"][0]["content"],
|
|
"reference": {},
|
|
"audio_binary": None,
|
|
"id": None,
|
|
"session_id": session_id
|
|
}
|
|
yield answer
|
|
return
|
|
|
|
conv = ConversationService.query(id=session_id, dialog_id=chat_id)
|
|
if not conv:
|
|
raise LookupError("Session does not exist")
|
|
|
|
conv = conv[0]
|
|
msg = []
|
|
question = {
|
|
"content": question,
|
|
"role": "user",
|
|
"id": str(uuid4())
|
|
}
|
|
|
|
# Propagate runtime attachments so downstream chat flow can resolve file content.
|
|
if isinstance(kwargs.get("files"), list) and kwargs["files"]:
|
|
question["files"] = kwargs["files"]
|
|
|
|
conv.message.append(question)
|
|
for m in conv.message:
|
|
if m["role"] == "system":
|
|
continue
|
|
if m["role"] == "assistant" and not msg:
|
|
continue
|
|
msg.append(m)
|
|
message_id = msg[-1].get("id")
|
|
e, dia = DialogService.get_by_id(conv.dialog_id)
|
|
|
|
kb_ids = kwargs.get("kb_ids",[])
|
|
dia.kb_ids = list(set(dia.kb_ids + kb_ids))
|
|
if not conv.reference:
|
|
conv.reference = []
|
|
conv.message.append({"role": "assistant", "content": "", "id": message_id})
|
|
conv.reference.append({"chunks": [], "doc_aggs": []})
|
|
|
|
if stream:
|
|
try:
|
|
async for ans in async_chat(dia, msg, True, **kwargs):
|
|
ans = structure_answer(conv, ans, message_id, session_id)
|
|
yield "data:" + json.dumps({"code": 0, "data": ans}, ensure_ascii=False) + "\n\n"
|
|
ConversationService.update_by_id(conv.id, conv.to_dict())
|
|
except Exception as e:
|
|
yield "data:" + json.dumps({"code": 500, "message": str(e),
|
|
"data": {"answer": "**ERROR**: " + str(e), "reference": []}},
|
|
ensure_ascii=False) + "\n\n"
|
|
yield "data:" + json.dumps({"code": 0, "data": True}, ensure_ascii=False) + "\n\n"
|
|
|
|
else:
|
|
answer = None
|
|
async for ans in async_chat(dia, msg, False, **kwargs):
|
|
answer = structure_answer(conv, ans, message_id, session_id)
|
|
ConversationService.update_by_id(conv.id, conv.to_dict())
|
|
break
|
|
yield answer
|
|
|
|
async def async_iframe_completion(dialog_id, question, session_id=None, stream=True, tenant_id=None, **kwargs):
|
|
if tenant_id:
|
|
exists, dia = DialogService.get_by_id(dialog_id)
|
|
if (not exists
|
|
or getattr(dia, "tenant_id", None) != tenant_id
|
|
or str(getattr(dia, "status", "")) != StatusEnum.VALID.value):
|
|
logger.warning(
|
|
"Dialog lookup failed for tenant-scoped iframe completion: "
|
|
"tenant_id=%s dialog_id=%s required_status=%s",
|
|
tenant_id,
|
|
dialog_id,
|
|
StatusEnum.VALID.value,
|
|
)
|
|
raise AssertionError("Dialog not found")
|
|
else:
|
|
e, dia = DialogService.get_by_id(dialog_id)
|
|
assert e, "Dialog not found"
|
|
if not session_id:
|
|
session_id = get_uuid()
|
|
conv = {
|
|
"id": session_id,
|
|
"dialog_id": dialog_id,
|
|
"user_id": kwargs.get("user_id", ""),
|
|
"message": [{"role": "assistant", "content": dia.prompt_config["prologue"], "created_at": time.time()}]
|
|
}
|
|
API4ConversationService.save(**conv)
|
|
yield "data:" + json.dumps({"code": 0, "message": "",
|
|
"data": {
|
|
"answer": conv["message"][0]["content"],
|
|
"reference": {},
|
|
"audio_binary": None,
|
|
"id": None,
|
|
"session_id": session_id
|
|
}},
|
|
ensure_ascii=False) + "\n\n"
|
|
yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"
|
|
return
|
|
else:
|
|
session_id = session_id
|
|
e, conv = API4ConversationService.get_by_id(session_id)
|
|
assert e, "Session not found!"
|
|
assert conv.dialog_id == dialog_id, "Session does not belong to this dialog"
|
|
|
|
if not conv.message:
|
|
conv.message = []
|
|
messages = conv.message
|
|
question = {
|
|
"role": "user",
|
|
"content": question,
|
|
"id": str(uuid4())
|
|
}
|
|
messages.append(question)
|
|
|
|
msg = []
|
|
for m in messages:
|
|
if m["role"] == "system":
|
|
continue
|
|
if m["role"] == "assistant" and not msg:
|
|
continue
|
|
msg.append(m)
|
|
if not msg[-1].get("id"):
|
|
msg[-1]["id"] = get_uuid()
|
|
message_id = msg[-1]["id"]
|
|
|
|
if not conv.reference:
|
|
conv.reference = []
|
|
conv.reference.append({"chunks": [], "doc_aggs": []})
|
|
|
|
if stream:
|
|
try:
|
|
async for ans in async_chat(dia, msg, True, **kwargs):
|
|
ans = structure_answer(conv, ans, message_id, session_id)
|
|
yield "data:" + json.dumps({"code": 0, "message": "", "data": ans},
|
|
ensure_ascii=False) + "\n\n"
|
|
API4ConversationService.append_message(conv.id, conv.to_dict())
|
|
except Exception as e:
|
|
yield "data:" + json.dumps({"code": 500, "message": str(e),
|
|
"data": {"answer": "**ERROR**: " + str(e), "reference": []}},
|
|
ensure_ascii=False) + "\n\n"
|
|
yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"
|
|
|
|
else:
|
|
answer = None
|
|
async for ans in async_chat(dia, msg, False, **kwargs):
|
|
answer = structure_answer(conv, ans, message_id, session_id)
|
|
API4ConversationService.append_message(conv.id, conv.to_dict())
|
|
break
|
|
yield answer
|