mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-19 03:35:11 +08:00
Refa(sdk/python/test): remove unused testcases and utilities (#12505)
### What problem does this PR solve? Removed the following dir: - sdk/python/test/libs/ - sdk/python/test/test_http_api/ - sdk/python/test/test_sdk_api/ ### Type of change - [x] Refactoring
This commit is contained in:
@ -1,15 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
@ -1,25 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from requests.auth import AuthBase
|
||||
|
||||
|
||||
class RAGFlowHttpApiAuth(AuthBase):
|
||||
def __init__(self, token):
|
||||
self._token = token
|
||||
|
||||
def __call__(self, r):
|
||||
r.headers["Authorization"] = f'Bearer {self._token}'
|
||||
return r
|
||||
@ -1,63 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import base64
|
||||
import functools
|
||||
import hashlib
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def encode_avatar(image_path):
|
||||
with Path.open(image_path, "rb") as file:
|
||||
binary_data = file.read()
|
||||
base64_encoded = base64.b64encode(binary_data).decode("utf-8")
|
||||
return base64_encoded
|
||||
|
||||
|
||||
def compare_by_hash(file1, file2, algorithm="sha256"):
|
||||
def _calc_hash(file_path):
|
||||
hash_func = hashlib.new(algorithm)
|
||||
with open(file_path, "rb") as f:
|
||||
while chunk := f.read(8192):
|
||||
hash_func.update(chunk)
|
||||
return hash_func.hexdigest()
|
||||
|
||||
return _calc_hash(file1) == _calc_hash(file2)
|
||||
|
||||
|
||||
def wait_for(timeout=10, interval=1, error_msg="Timeout"):
|
||||
def decorator(func):
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
start_time = time.time()
|
||||
while True:
|
||||
result = func(*args, **kwargs)
|
||||
if result is True:
|
||||
return result
|
||||
elapsed = time.time() - start_time
|
||||
if elapsed > timeout:
|
||||
assert False, error_msg
|
||||
time.sleep(interval)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def is_sorted(data, field, descending=True):
|
||||
timestamps = [ds[field] for ds in data]
|
||||
return all(a >= b for a, b in zip(timestamps, timestamps[1:])) if descending else all(a <= b for a, b in zip(timestamps, timestamps[1:]))
|
||||
@ -1,107 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import json
|
||||
|
||||
from docx import Document # pip install python-docx
|
||||
from openpyxl import Workbook # pip install openpyxl
|
||||
from PIL import Image, ImageDraw # pip install Pillow
|
||||
from pptx import Presentation # pip install python-pptx
|
||||
from reportlab.pdfgen import canvas # pip install reportlab
|
||||
|
||||
|
||||
def create_docx_file(path):
|
||||
doc = Document()
|
||||
doc.add_paragraph("这是一个测试 DOCX 文件。")
|
||||
doc.save(path)
|
||||
return path
|
||||
|
||||
|
||||
def create_excel_file(path):
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws["A1"] = "测试 Excel 文件"
|
||||
wb.save(path)
|
||||
return path
|
||||
|
||||
|
||||
def create_ppt_file(path):
|
||||
prs = Presentation()
|
||||
slide = prs.slides.add_slide(prs.slide_layouts[0])
|
||||
slide.shapes.title.text = "测试 PPT 文件"
|
||||
prs.save(path)
|
||||
return path
|
||||
|
||||
|
||||
def create_image_file(path):
|
||||
img = Image.new("RGB", (100, 100), color="blue")
|
||||
draw = ImageDraw.Draw(img)
|
||||
draw.text((10, 40), "Test", fill="white")
|
||||
img.save(path)
|
||||
return path
|
||||
|
||||
|
||||
def create_pdf_file(path):
|
||||
if not isinstance(path, str):
|
||||
path = str(path)
|
||||
c = canvas.Canvas(path)
|
||||
c.drawString(100, 750, "测试 PDF 文件")
|
||||
c.save()
|
||||
return path
|
||||
|
||||
|
||||
def create_txt_file(path):
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write("这是测试 TXT 文件的内容。")
|
||||
return path
|
||||
|
||||
|
||||
def create_md_file(path):
|
||||
md_content = "# 测试 MD 文件\n\n这是一份 Markdown 格式的测试文件。"
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(md_content)
|
||||
return path
|
||||
|
||||
|
||||
def create_json_file(path):
|
||||
data = {"message": "这是测试 JSON 文件", "value": 123}
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
return path
|
||||
|
||||
|
||||
def create_eml_file(path):
|
||||
eml_content = (
|
||||
"From: sender@example.com\n"
|
||||
"To: receiver@example.com\n"
|
||||
"Subject: 测试 EML 文件\n\n"
|
||||
"这是一封测试邮件的内容。\n"
|
||||
)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(eml_content)
|
||||
return path
|
||||
|
||||
|
||||
def create_html_file(path):
|
||||
html_content = (
|
||||
"<html>\n"
|
||||
"<head><title>测试 HTML 文件</title></head>\n"
|
||||
"<body><h1>这是一个测试 HTML 文件</h1></body>\n"
|
||||
"</html>"
|
||||
)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(html_content)
|
||||
return path
|
||||
@ -1,28 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
import hypothesis.strategies as st
|
||||
|
||||
|
||||
@st.composite
|
||||
def valid_names(draw):
|
||||
base_chars = "abcdefghijklmnopqrstuvwxyz_"
|
||||
first_char = draw(st.sampled_from([c for c in base_chars if c.isalpha() or c == "_"]))
|
||||
remaining = draw(st.text(alphabet=st.sampled_from(base_chars), min_size=0, max_size=128 - 2))
|
||||
|
||||
name = (first_char + remaining)[:128]
|
||||
return name.encode("utf-8").decode("utf-8")
|
||||
@ -1,257 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from libs.utils.file_utils import create_txt_file
|
||||
from requests_toolbelt import MultipartEncoder
|
||||
|
||||
HEADERS = {"Content-Type": "application/json"}
|
||||
HOST_ADDRESS = os.getenv("HOST_ADDRESS", "http://127.0.0.1:9380")
|
||||
DATASETS_API_URL = "/api/v1/datasets"
|
||||
FILE_API_URL = "/api/v1/datasets/{dataset_id}/documents"
|
||||
FILE_CHUNK_API_URL = "/api/v1/datasets/{dataset_id}/chunks"
|
||||
CHUNK_API_URL = "/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks"
|
||||
CHAT_ASSISTANT_API_URL = "/api/v1/chats"
|
||||
SESSION_WITH_CHAT_ASSISTANT_API_URL = "/api/v1/chats/{chat_id}/sessions"
|
||||
SESSION_WITH_AGENT_API_URL = "/api/v1/agents/{agent_id}/sessions"
|
||||
|
||||
INVALID_API_TOKEN = "invalid_key_123"
|
||||
DATASET_NAME_LIMIT = 128
|
||||
DOCUMENT_NAME_LIMIT = 128
|
||||
CHAT_ASSISTANT_NAME_LIMIT = 255
|
||||
SESSION_WITH_CHAT_NAME_LIMIT = 255
|
||||
|
||||
|
||||
# DATASET MANAGEMENT
|
||||
def create_dataset(auth, payload=None, *, headers=HEADERS, data=None):
|
||||
res = requests.post(url=f"{HOST_ADDRESS}{DATASETS_API_URL}", headers=headers, auth=auth, json=payload, data=data)
|
||||
return res.json()
|
||||
|
||||
|
||||
def list_datasets(auth, params=None, *, headers=HEADERS):
|
||||
res = requests.get(url=f"{HOST_ADDRESS}{DATASETS_API_URL}", headers=headers, auth=auth, params=params)
|
||||
return res.json()
|
||||
|
||||
|
||||
def update_dataset(auth, dataset_id, payload=None, *, headers=HEADERS, data=None):
|
||||
res = requests.put(url=f"{HOST_ADDRESS}{DATASETS_API_URL}/{dataset_id}", headers=headers, auth=auth, json=payload, data=data)
|
||||
return res.json()
|
||||
|
||||
|
||||
def delete_datasets(auth, payload=None, *, headers=HEADERS, data=None):
|
||||
res = requests.delete(url=f"{HOST_ADDRESS}{DATASETS_API_URL}", headers=headers, auth=auth, json=payload, data=data)
|
||||
return res.json()
|
||||
|
||||
|
||||
def batch_create_datasets(auth, num):
|
||||
ids = []
|
||||
for i in range(num):
|
||||
res = create_dataset(auth, {"name": f"dataset_{i}"})
|
||||
ids.append(res["data"]["id"])
|
||||
return ids
|
||||
|
||||
|
||||
# FILE MANAGEMENT WITHIN DATASET
|
||||
def upload_documnets(auth, dataset_id, files_path=None):
|
||||
url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id)
|
||||
|
||||
if files_path is None:
|
||||
files_path = []
|
||||
|
||||
fields = []
|
||||
file_objects = []
|
||||
try:
|
||||
for fp in files_path:
|
||||
p = Path(fp)
|
||||
f = p.open("rb")
|
||||
fields.append(("file", (p.name, f)))
|
||||
file_objects.append(f)
|
||||
m = MultipartEncoder(fields=fields)
|
||||
|
||||
res = requests.post(
|
||||
url=url,
|
||||
headers={"Content-Type": m.content_type},
|
||||
auth=auth,
|
||||
data=m,
|
||||
)
|
||||
return res.json()
|
||||
finally:
|
||||
for f in file_objects:
|
||||
f.close()
|
||||
|
||||
|
||||
def download_document(auth, dataset_id, document_id, save_path):
|
||||
url = f"{HOST_ADDRESS}{FILE_API_URL}/{document_id}".format(dataset_id=dataset_id)
|
||||
res = requests.get(url=url, auth=auth, stream=True)
|
||||
try:
|
||||
if res.status_code == 200:
|
||||
with open(save_path, "wb") as f:
|
||||
for chunk in res.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
finally:
|
||||
res.close()
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def list_documnets(auth, dataset_id, params=None):
|
||||
url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id)
|
||||
res = requests.get(url=url, headers=HEADERS, auth=auth, params=params)
|
||||
return res.json()
|
||||
|
||||
|
||||
def update_documnet(auth, dataset_id, document_id, payload=None):
|
||||
url = f"{HOST_ADDRESS}{FILE_API_URL}/{document_id}".format(dataset_id=dataset_id)
|
||||
res = requests.put(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def delete_documnets(auth, dataset_id, payload=None):
|
||||
url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id)
|
||||
res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def parse_documnets(auth, dataset_id, payload=None):
|
||||
url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id)
|
||||
res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def stop_parse_documnets(auth, dataset_id, payload=None):
|
||||
url = f"{HOST_ADDRESS}{FILE_CHUNK_API_URL}".format(dataset_id=dataset_id)
|
||||
res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def bulk_upload_documents(auth, dataset_id, num, tmp_path):
|
||||
fps = []
|
||||
for i in range(num):
|
||||
fp = create_txt_file(tmp_path / f"ragflow_test_upload_{i}.txt")
|
||||
fps.append(fp)
|
||||
res = upload_documnets(auth, dataset_id, fps)
|
||||
document_ids = []
|
||||
for document in res["data"]:
|
||||
document_ids.append(document["id"])
|
||||
return document_ids
|
||||
|
||||
|
||||
# CHUNK MANAGEMENT WITHIN DATASET
|
||||
def add_chunk(auth, dataset_id, document_id, payload=None):
|
||||
url = f"{HOST_ADDRESS}{CHUNK_API_URL}".format(dataset_id=dataset_id, document_id=document_id)
|
||||
res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def list_chunks(auth, dataset_id, document_id, params=None):
|
||||
url = f"{HOST_ADDRESS}{CHUNK_API_URL}".format(dataset_id=dataset_id, document_id=document_id)
|
||||
res = requests.get(url=url, headers=HEADERS, auth=auth, params=params)
|
||||
return res.json()
|
||||
|
||||
|
||||
def update_chunk(auth, dataset_id, document_id, chunk_id, payload=None):
|
||||
url = f"{HOST_ADDRESS}{CHUNK_API_URL}/{chunk_id}".format(dataset_id=dataset_id, document_id=document_id)
|
||||
res = requests.put(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def delete_chunks(auth, dataset_id, document_id, payload=None):
|
||||
url = f"{HOST_ADDRESS}{CHUNK_API_URL}".format(dataset_id=dataset_id, document_id=document_id)
|
||||
res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def retrieval_chunks(auth, payload=None):
|
||||
url = f"{HOST_ADDRESS}/api/v1/retrieval"
|
||||
res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def batch_add_chunks(auth, dataset_id, document_id, num):
|
||||
chunk_ids = []
|
||||
for i in range(num):
|
||||
res = add_chunk(auth, dataset_id, document_id, {"content": f"chunk test {i}"})
|
||||
chunk_ids.append(res["data"]["chunk"]["id"])
|
||||
return chunk_ids
|
||||
|
||||
|
||||
# CHAT ASSISTANT MANAGEMENT
|
||||
def create_chat_assistant(auth, payload=None):
|
||||
url = f"{HOST_ADDRESS}{CHAT_ASSISTANT_API_URL}"
|
||||
res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def list_chat_assistants(auth, params=None):
|
||||
url = f"{HOST_ADDRESS}{CHAT_ASSISTANT_API_URL}"
|
||||
res = requests.get(url=url, headers=HEADERS, auth=auth, params=params)
|
||||
return res.json()
|
||||
|
||||
|
||||
def update_chat_assistant(auth, chat_assistant_id, payload=None):
|
||||
url = f"{HOST_ADDRESS}{CHAT_ASSISTANT_API_URL}/{chat_assistant_id}"
|
||||
res = requests.put(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def delete_chat_assistants(auth, payload=None):
|
||||
url = f"{HOST_ADDRESS}{CHAT_ASSISTANT_API_URL}"
|
||||
res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def batch_create_chat_assistants(auth, num):
|
||||
chat_assistant_ids = []
|
||||
for i in range(num):
|
||||
res = create_chat_assistant(auth, {"name": f"test_chat_assistant_{i}", "dataset_ids": []})
|
||||
chat_assistant_ids.append(res["data"]["id"])
|
||||
return chat_assistant_ids
|
||||
|
||||
|
||||
# SESSION MANAGEMENT
|
||||
def create_session_with_chat_assistant(auth, chat_assistant_id, payload=None):
|
||||
url = f"{HOST_ADDRESS}{SESSION_WITH_CHAT_ASSISTANT_API_URL}".format(chat_id=chat_assistant_id)
|
||||
res = requests.post(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def list_session_with_chat_assistants(auth, chat_assistant_id, params=None):
|
||||
url = f"{HOST_ADDRESS}{SESSION_WITH_CHAT_ASSISTANT_API_URL}".format(chat_id=chat_assistant_id)
|
||||
res = requests.get(url=url, headers=HEADERS, auth=auth, params=params)
|
||||
return res.json()
|
||||
|
||||
|
||||
def update_session_with_chat_assistant(auth, chat_assistant_id, session_id, payload=None):
|
||||
url = f"{HOST_ADDRESS}{SESSION_WITH_CHAT_ASSISTANT_API_URL}/{session_id}".format(chat_id=chat_assistant_id)
|
||||
res = requests.put(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def delete_session_with_chat_assistants(auth, chat_assistant_id, payload=None):
|
||||
url = f"{HOST_ADDRESS}{SESSION_WITH_CHAT_ASSISTANT_API_URL}".format(chat_id=chat_assistant_id)
|
||||
res = requests.delete(url=url, headers=HEADERS, auth=auth, json=payload)
|
||||
return res.json()
|
||||
|
||||
|
||||
def batch_add_sessions_with_chat_assistant(auth, chat_assistant_id, num):
|
||||
session_ids = []
|
||||
for i in range(num):
|
||||
res = create_session_with_chat_assistant(auth, chat_assistant_id, {"name": f"session_with_chat_assistant_{i}"})
|
||||
session_ids.append(res["data"]["id"])
|
||||
return session_ids
|
||||
@ -1,202 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from common import (
|
||||
add_chunk,
|
||||
batch_create_datasets,
|
||||
bulk_upload_documents,
|
||||
create_chat_assistant,
|
||||
delete_chat_assistants,
|
||||
delete_datasets,
|
||||
delete_session_with_chat_assistants,
|
||||
list_documnets,
|
||||
parse_documnets,
|
||||
)
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import wait_for
|
||||
from libs.utils.file_utils import (
|
||||
create_docx_file,
|
||||
create_eml_file,
|
||||
create_excel_file,
|
||||
create_html_file,
|
||||
create_image_file,
|
||||
create_json_file,
|
||||
create_md_file,
|
||||
create_pdf_file,
|
||||
create_ppt_file,
|
||||
create_txt_file,
|
||||
)
|
||||
|
||||
MARKER_EXPRESSIONS = {
|
||||
"p1": "p1",
|
||||
"p2": "p1 or p2",
|
||||
"p3": "p1 or p2 or p3",
|
||||
}
|
||||
HOST_ADDRESS = os.getenv("HOST_ADDRESS", "http://127.0.0.1:9380")
|
||||
|
||||
|
||||
def pytest_addoption(parser: pytest.Parser) -> None:
|
||||
parser.addoption(
|
||||
"--level",
|
||||
action="store",
|
||||
default="p2",
|
||||
choices=list(MARKER_EXPRESSIONS.keys()),
|
||||
help=f"Test level ({'/'.join(MARKER_EXPRESSIONS)}): p1=smoke, p2=core, p3=full",
|
||||
)
|
||||
|
||||
|
||||
def pytest_configure(config: pytest.Config) -> None:
|
||||
level = config.getoption("--level")
|
||||
config.option.markexpr = MARKER_EXPRESSIONS[level]
|
||||
if config.option.verbose > 0:
|
||||
print(f"\n[CONFIG] Active test level: {level}")
|
||||
|
||||
|
||||
@wait_for(30, 1, "Document parsing timeout")
|
||||
def condition(_auth, _dataset_id):
|
||||
res = list_documnets(_auth, _dataset_id)
|
||||
for doc in res["data"]["docs"]:
|
||||
if doc["run"] != "DONE":
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def get_http_api_auth(get_api_key_fixture):
|
||||
return RAGFlowHttpApiAuth(get_api_key_fixture)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def clear_datasets(request, get_http_api_auth):
|
||||
def cleanup():
|
||||
delete_datasets(get_http_api_auth, {"ids": None})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def clear_chat_assistants(request, get_http_api_auth):
|
||||
def cleanup():
|
||||
delete_chat_assistants(get_http_api_auth)
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def clear_session_with_chat_assistants(request, get_http_api_auth, add_chat_assistants):
|
||||
_, _, chat_assistant_ids = add_chat_assistants
|
||||
|
||||
def cleanup():
|
||||
for chat_assistant_id in chat_assistant_ids:
|
||||
delete_session_with_chat_assistants(get_http_api_auth, chat_assistant_id)
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def generate_test_files(request, tmp_path):
|
||||
file_creators = {
|
||||
"docx": (tmp_path / "ragflow_test.docx", create_docx_file),
|
||||
"excel": (tmp_path / "ragflow_test.xlsx", create_excel_file),
|
||||
"ppt": (tmp_path / "ragflow_test.pptx", create_ppt_file),
|
||||
"image": (tmp_path / "ragflow_test.png", create_image_file),
|
||||
"pdf": (tmp_path / "ragflow_test.pdf", create_pdf_file),
|
||||
"txt": (tmp_path / "ragflow_test.txt", create_txt_file),
|
||||
"md": (tmp_path / "ragflow_test.md", create_md_file),
|
||||
"json": (tmp_path / "ragflow_test.json", create_json_file),
|
||||
"eml": (tmp_path / "ragflow_test.eml", create_eml_file),
|
||||
"html": (tmp_path / "ragflow_test.html", create_html_file),
|
||||
}
|
||||
|
||||
files = {}
|
||||
for file_type, (file_path, creator_func) in file_creators.items():
|
||||
if request.param in ["", file_type]:
|
||||
creator_func(file_path)
|
||||
files[file_type] = file_path
|
||||
return files
|
||||
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def ragflow_tmp_dir(request, tmp_path_factory):
|
||||
class_name = request.cls.__name__
|
||||
return tmp_path_factory.mktemp(class_name)
|
||||
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def add_dataset(request, get_http_api_auth):
|
||||
def cleanup():
|
||||
delete_datasets(get_http_api_auth, {"ids": None})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
dataset_ids = batch_create_datasets(get_http_api_auth, 1)
|
||||
return dataset_ids[0]
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def add_dataset_func(request, get_http_api_auth):
|
||||
def cleanup():
|
||||
delete_datasets(get_http_api_auth, {"ids": None})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
return batch_create_datasets(get_http_api_auth, 1)[0]
|
||||
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def add_document(get_http_api_auth, add_dataset, ragflow_tmp_dir):
|
||||
dataset_id = add_dataset
|
||||
document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 1, ragflow_tmp_dir)
|
||||
return dataset_id, document_ids[0]
|
||||
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def add_chunks(get_http_api_auth, add_document):
|
||||
dataset_id, document_id = add_document
|
||||
parse_documnets(get_http_api_auth, dataset_id, {"document_ids": [document_id]})
|
||||
condition(get_http_api_auth, dataset_id)
|
||||
|
||||
chunk_ids = []
|
||||
for i in range(4):
|
||||
res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": f"chunk test {i}"})
|
||||
chunk_ids.append(res["data"]["chunk"]["id"])
|
||||
|
||||
# issues/6487
|
||||
from time import sleep
|
||||
|
||||
sleep(1)
|
||||
return dataset_id, document_id, chunk_ids
|
||||
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def add_chat_assistants(request, get_http_api_auth, add_document):
|
||||
def cleanup():
|
||||
delete_chat_assistants(get_http_api_auth)
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
dataset_id, document_id = add_document
|
||||
parse_documnets(get_http_api_auth, dataset_id, {"document_ids": [document_id]})
|
||||
condition(get_http_api_auth, dataset_id)
|
||||
|
||||
chat_assistant_ids = []
|
||||
for i in range(5):
|
||||
res = create_chat_assistant(get_http_api_auth, {"name": f"test_chat_assistant_{i}", "dataset_ids": [dataset_id]})
|
||||
chat_assistant_ids.append(res["data"]["id"])
|
||||
|
||||
return dataset_id, document_id, chat_assistant_ids
|
||||
@ -1,46 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import pytest
|
||||
from common import create_chat_assistant, delete_chat_assistants, list_documnets, parse_documnets
|
||||
from libs.utils import wait_for
|
||||
|
||||
|
||||
@wait_for(30, 1, "Document parsing timeout")
|
||||
def condition(_auth, _dataset_id):
|
||||
res = list_documnets(_auth, _dataset_id)
|
||||
for doc in res["data"]["docs"]:
|
||||
if doc["run"] != "DONE":
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def add_chat_assistants_func(request, get_http_api_auth, add_document):
|
||||
def cleanup():
|
||||
delete_chat_assistants(get_http_api_auth)
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
dataset_id, document_id = add_document
|
||||
parse_documnets(get_http_api_auth, dataset_id, {"document_ids": [document_id]})
|
||||
condition(get_http_api_auth, dataset_id)
|
||||
|
||||
chat_assistant_ids = []
|
||||
for i in range(5):
|
||||
res = create_chat_assistant(get_http_api_auth, {"name": f"test_chat_assistant_{i}", "dataset_ids": [dataset_id]})
|
||||
chat_assistant_ids.append(res["data"]["id"])
|
||||
|
||||
return dataset_id, document_id, chat_assistant_ids
|
||||
@ -1,241 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import pytest
|
||||
from common import CHAT_ASSISTANT_NAME_LIMIT, INVALID_API_TOKEN, create_chat_assistant
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import encode_avatar
|
||||
from libs.utils.file_utils import create_image_file
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = create_chat_assistant(auth)
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("clear_chat_assistants")
|
||||
class TestChatAssistantCreate:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
({"name": "valid_name"}, 0, ""),
|
||||
pytest.param({"name": "a" * (CHAT_ASSISTANT_NAME_LIMIT + 1)}, 102, "", marks=pytest.mark.skip(reason="issues/")),
|
||||
pytest.param({"name": 1}, 100, "", marks=pytest.mark.skip(reason="issues/")),
|
||||
({"name": ""}, 102, "`name` is required."),
|
||||
({"name": "duplicated_name"}, 102, "Duplicated chat name in creating chat."),
|
||||
({"name": "case insensitive"}, 102, "Duplicated chat name in creating chat."),
|
||||
],
|
||||
)
|
||||
def test_name(self, get_http_api_auth, add_chunks, payload, expected_code, expected_message):
|
||||
payload["dataset_ids"] = [] # issues/
|
||||
if payload["name"] == "duplicated_name":
|
||||
create_chat_assistant(get_http_api_auth, payload)
|
||||
elif payload["name"] == "case insensitive":
|
||||
create_chat_assistant(get_http_api_auth, {"name": payload["name"].upper()})
|
||||
|
||||
res = create_chat_assistant(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code, res
|
||||
if expected_code == 0:
|
||||
assert res["data"]["name"] == payload["name"]
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_ids, expected_code, expected_message",
|
||||
[
|
||||
([], 0, ""),
|
||||
(lambda r: [r], 0, ""),
|
||||
(["invalid_dataset_id"], 102, "You don't own the dataset invalid_dataset_id"),
|
||||
("invalid_dataset_id", 102, "You don't own the dataset i"),
|
||||
],
|
||||
)
|
||||
def test_dataset_ids(self, get_http_api_auth, add_chunks, dataset_ids, expected_code, expected_message):
|
||||
dataset_id, _, _ = add_chunks
|
||||
payload = {"name": "ragflow test"}
|
||||
if callable(dataset_ids):
|
||||
payload["dataset_ids"] = dataset_ids(dataset_id)
|
||||
else:
|
||||
payload["dataset_ids"] = dataset_ids
|
||||
|
||||
res = create_chat_assistant(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code, res
|
||||
if expected_code == 0:
|
||||
assert res["data"]["name"] == payload["name"]
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_avatar(self, get_http_api_auth, tmp_path):
|
||||
fn = create_image_file(tmp_path / "ragflow_test.png")
|
||||
payload = {"name": "avatar_test", "avatar": encode_avatar(fn), "dataset_ids": []}
|
||||
res = create_chat_assistant(get_http_api_auth, payload)
|
||||
assert res["code"] == 0
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"llm, expected_code, expected_message",
|
||||
[
|
||||
({}, 0, ""),
|
||||
({"model_name": "glm-4"}, 0, ""),
|
||||
({"model_name": "unknown"}, 102, "`model_name` unknown doesn't exist"),
|
||||
({"temperature": 0}, 0, ""),
|
||||
({"temperature": 1}, 0, ""),
|
||||
pytest.param({"temperature": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"temperature": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"temperature": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"top_p": 0}, 0, ""),
|
||||
({"top_p": 1}, 0, ""),
|
||||
pytest.param({"top_p": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"top_p": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"top_p": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"presence_penalty": 0}, 0, ""),
|
||||
({"presence_penalty": 1}, 0, ""),
|
||||
pytest.param({"presence_penalty": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"presence_penalty": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"presence_penalty": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"frequency_penalty": 0}, 0, ""),
|
||||
({"frequency_penalty": 1}, 0, ""),
|
||||
pytest.param({"frequency_penalty": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"frequency_penalty": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"frequency_penalty": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"max_token": 0}, 0, ""),
|
||||
({"max_token": 1024}, 0, ""),
|
||||
pytest.param({"max_token": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"max_token": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"max_token": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"unknown": "unknown"}, 0, "", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
def test_llm(self, get_http_api_auth, add_chunks, llm, expected_code, expected_message):
|
||||
dataset_id, _, _ = add_chunks
|
||||
payload = {"name": "llm_test", "dataset_ids": [dataset_id], "llm": llm}
|
||||
res = create_chat_assistant(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if llm:
|
||||
for k, v in llm.items():
|
||||
assert res["data"]["llm"][k] == v
|
||||
else:
|
||||
assert res["data"]["llm"]["model_name"] == "glm-4-flash@ZHIPU-AI"
|
||||
assert res["data"]["llm"]["temperature"] == 0.1
|
||||
assert res["data"]["llm"]["top_p"] == 0.3
|
||||
assert res["data"]["llm"]["presence_penalty"] == 0.4
|
||||
assert res["data"]["llm"]["frequency_penalty"] == 0.7
|
||||
assert res["data"]["llm"]["max_tokens"] == 512
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"prompt, expected_code, expected_message",
|
||||
[
|
||||
({}, 0, ""),
|
||||
({"similarity_threshold": 0}, 0, ""),
|
||||
({"similarity_threshold": 1}, 0, ""),
|
||||
pytest.param({"similarity_threshold": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"similarity_threshold": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"similarity_threshold": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"keywords_similarity_weight": 0}, 0, ""),
|
||||
({"keywords_similarity_weight": 1}, 0, ""),
|
||||
pytest.param({"keywords_similarity_weight": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"keywords_similarity_weight": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"keywords_similarity_weight": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"variables": []}, 0, ""),
|
||||
({"top_n": 0}, 0, ""),
|
||||
({"top_n": 1}, 0, ""),
|
||||
pytest.param({"top_n": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"top_n": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"top_n": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"empty_response": "Hello World"}, 0, ""),
|
||||
({"empty_response": ""}, 0, ""),
|
||||
({"empty_response": "!@#$%^&*()"}, 0, ""),
|
||||
({"empty_response": "中文测试"}, 0, ""),
|
||||
pytest.param({"empty_response": 123}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"empty_response": True}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"empty_response": " "}, 0, "", marks=pytest.mark.skip),
|
||||
({"opener": "Hello World"}, 0, ""),
|
||||
({"opener": ""}, 0, ""),
|
||||
({"opener": "!@#$%^&*()"}, 0, ""),
|
||||
({"opener": "中文测试"}, 0, ""),
|
||||
pytest.param({"opener": 123}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"opener": True}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"opener": " "}, 0, "", marks=pytest.mark.skip),
|
||||
({"show_quote": True}, 0, ""),
|
||||
({"show_quote": False}, 0, ""),
|
||||
({"prompt": "Hello World {knowledge}"}, 0, ""),
|
||||
({"prompt": "{knowledge}"}, 0, ""),
|
||||
({"prompt": "!@#$%^&*() {knowledge}"}, 0, ""),
|
||||
({"prompt": "中文测试 {knowledge}"}, 0, ""),
|
||||
({"prompt": "Hello World"}, 102, "Parameter 'knowledge' is not used"),
|
||||
({"prompt": "Hello World", "variables": []}, 0, ""),
|
||||
pytest.param({"prompt": 123}, 100, """AttributeError("\'int\' object has no attribute \'find\'")""", marks=pytest.mark.skip),
|
||||
pytest.param({"prompt": True}, 100, """AttributeError("\'int\' object has no attribute \'find\'")""", marks=pytest.mark.skip),
|
||||
pytest.param({"unknown": "unknown"}, 0, "", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
def test_prompt(self, get_http_api_auth, add_chunks, prompt, expected_code, expected_message):
|
||||
dataset_id, _, _ = add_chunks
|
||||
payload = {"name": "prompt_test", "dataset_ids": [dataset_id], "prompt": prompt}
|
||||
res = create_chat_assistant(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if prompt:
|
||||
for k, v in prompt.items():
|
||||
if k == "keywords_similarity_weight":
|
||||
assert res["data"]["prompt"][k] == 1 - v
|
||||
else:
|
||||
assert res["data"]["prompt"][k] == v
|
||||
else:
|
||||
assert res["data"]["prompt"]["similarity_threshold"] == 0.2
|
||||
assert res["data"]["prompt"]["keywords_similarity_weight"] == 0.7
|
||||
assert res["data"]["prompt"]["top_n"] == 6
|
||||
assert res["data"]["prompt"]["variables"] == [{"key": "knowledge", "optional": False}]
|
||||
assert res["data"]["prompt"]["rerank_model"] == ""
|
||||
assert res["data"]["prompt"]["empty_response"] == "Sorry! No relevant content was found in the knowledge base!"
|
||||
assert res["data"]["prompt"]["opener"] == "Hi! I'm your assistant. What can I do for you?"
|
||||
assert res["data"]["prompt"]["show_quote"] is True
|
||||
assert (
|
||||
res["data"]["prompt"]["prompt"]
|
||||
== 'You are an intelligent assistant. Please summarize the content of the dataset to answer the question. Please list the data in the dataset and answer in detail. When all dataset content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the dataset!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.'
|
||||
)
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestChatAssistantCreate2:
|
||||
@pytest.mark.p2
|
||||
def test_unparsed_document(self, get_http_api_auth, add_document):
|
||||
dataset_id, _ = add_document
|
||||
payload = {"name": "prompt_test", "dataset_ids": [dataset_id]}
|
||||
res = create_chat_assistant(get_http_api_auth, payload)
|
||||
assert res["code"] == 102
|
||||
assert "doesn't own parsed file" in res["message"]
|
||||
@ -1,124 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, batch_create_chat_assistants, delete_chat_assistants, list_chat_assistants
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = delete_chat_assistants(auth)
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestChatAssistantsDelete:
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message, remaining",
|
||||
[
|
||||
pytest.param(None, 0, "", 0, marks=pytest.mark.p3),
|
||||
pytest.param({"ids": []}, 0, "", 0, marks=pytest.mark.p3),
|
||||
pytest.param({"ids": ["invalid_id"]}, 102, "Assistant(invalid_id) not found.", 5, marks=pytest.mark.p3),
|
||||
pytest.param({"ids": ["\n!?。;!?\"'"]}, 102, """Assistant(\n!?。;!?"\') not found.""", 5, marks=pytest.mark.p3),
|
||||
pytest.param("not json", 100, "AttributeError(\"'str' object has no attribute 'get'\")", 5, marks=pytest.mark.p3),
|
||||
pytest.param(lambda r: {"ids": r[:1]}, 0, "", 4, marks=pytest.mark.p3),
|
||||
pytest.param(lambda r: {"ids": r}, 0, "", 0, marks=pytest.mark.p1),
|
||||
],
|
||||
)
|
||||
def test_basic_scenarios(self, get_http_api_auth, add_chat_assistants_func, payload, expected_code, expected_message, remaining):
|
||||
_, _, chat_assistant_ids = add_chat_assistants_func
|
||||
if callable(payload):
|
||||
payload = payload(chat_assistant_ids)
|
||||
res = delete_chat_assistants(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if res["code"] != 0:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
res = list_chat_assistants(get_http_api_auth)
|
||||
assert len(res["data"]) == remaining
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"payload",
|
||||
[
|
||||
pytest.param(lambda r: {"ids": ["invalid_id"] + r}, marks=pytest.mark.p3),
|
||||
pytest.param(lambda r: {"ids": r[:1] + ["invalid_id"] + r[1:5]}, marks=pytest.mark.p1),
|
||||
pytest.param(lambda r: {"ids": r + ["invalid_id"]}, marks=pytest.mark.p3),
|
||||
],
|
||||
)
|
||||
def test_delete_partial_invalid_id(self, get_http_api_auth, add_chat_assistants_func, payload):
|
||||
_, _, chat_assistant_ids = add_chat_assistants_func
|
||||
if callable(payload):
|
||||
payload = payload(chat_assistant_ids)
|
||||
res = delete_chat_assistants(get_http_api_auth, payload)
|
||||
assert res["code"] == 0
|
||||
assert res["data"]["errors"][0] == "Assistant(invalid_id) not found."
|
||||
assert res["data"]["success_count"] == 5
|
||||
|
||||
res = list_chat_assistants(get_http_api_auth)
|
||||
assert len(res["data"]) == 0
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_repeated_deletion(self, get_http_api_auth, add_chat_assistants_func):
|
||||
_, _, chat_assistant_ids = add_chat_assistants_func
|
||||
res = delete_chat_assistants(get_http_api_auth, {"ids": chat_assistant_ids})
|
||||
assert res["code"] == 0
|
||||
|
||||
res = delete_chat_assistants(get_http_api_auth, {"ids": chat_assistant_ids})
|
||||
assert res["code"] == 102
|
||||
assert "not found" in res["message"]
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_duplicate_deletion(self, get_http_api_auth, add_chat_assistants_func):
|
||||
_, _, chat_assistant_ids = add_chat_assistants_func
|
||||
res = delete_chat_assistants(get_http_api_auth, {"ids": chat_assistant_ids + chat_assistant_ids})
|
||||
assert res["code"] == 0
|
||||
assert "Duplicate assistant ids" in res["data"]["errors"][0]
|
||||
assert res["data"]["success_count"] == 5
|
||||
|
||||
res = list_chat_assistants(get_http_api_auth)
|
||||
assert res["code"] == 0
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_deletion(self, get_http_api_auth):
|
||||
ids = batch_create_chat_assistants(get_http_api_auth, 100)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(delete_chat_assistants, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(100)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_delete_10k(self, get_http_api_auth):
|
||||
ids = batch_create_chat_assistants(get_http_api_auth, 10_000)
|
||||
res = delete_chat_assistants(get_http_api_auth, {"ids": ids})
|
||||
assert res["code"] == 0
|
||||
|
||||
res = list_chat_assistants(get_http_api_auth)
|
||||
assert len(res["data"]) == 0
|
||||
@ -1,311 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, delete_datasets, list_chat_assistants
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import is_sorted
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = list_chat_assistants(auth)
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("add_chat_assistants")
|
||||
class TestChatAssistantsList:
|
||||
@pytest.mark.p1
|
||||
def test_default(self, get_http_api_auth):
|
||||
res = list_chat_assistants(get_http_api_auth)
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]) == 5
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
({"page": None, "page_size": 2}, 0, 2, ""),
|
||||
({"page": 0, "page_size": 2}, 0, 2, ""),
|
||||
({"page": 2, "page_size": 2}, 0, 2, ""),
|
||||
({"page": 3, "page_size": 2}, 0, 1, ""),
|
||||
({"page": "3", "page_size": 2}, 0, 1, ""),
|
||||
pytest.param(
|
||||
{"page": -1, "page_size": 2},
|
||||
100,
|
||||
0,
|
||||
"1064",
|
||||
marks=pytest.mark.skip(reason="issues/5851"),
|
||||
),
|
||||
pytest.param(
|
||||
{"page": "a", "page_size": 2},
|
||||
100,
|
||||
0,
|
||||
"""ValueError("invalid literal for int() with base 10: \'a\'")""",
|
||||
marks=pytest.mark.skip(reason="issues/5851"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_page(self, get_http_api_auth, params, expected_code, expected_page_size, expected_message):
|
||||
res = list_chat_assistants(get_http_api_auth, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]) == expected_page_size
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
({"page_size": None}, 0, 5, ""),
|
||||
({"page_size": 0}, 0, 0, ""),
|
||||
({"page_size": 1}, 0, 1, ""),
|
||||
({"page_size": 6}, 0, 5, ""),
|
||||
({"page_size": "1"}, 0, 1, ""),
|
||||
pytest.param(
|
||||
{"page_size": -1},
|
||||
100,
|
||||
0,
|
||||
"1064",
|
||||
marks=pytest.mark.skip(reason="issues/5851"),
|
||||
),
|
||||
pytest.param(
|
||||
{"page_size": "a"},
|
||||
100,
|
||||
0,
|
||||
"""ValueError("invalid literal for int() with base 10: \'a\'")""",
|
||||
marks=pytest.mark.skip(reason="issues/5851"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_page_size(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
params,
|
||||
expected_code,
|
||||
expected_page_size,
|
||||
expected_message,
|
||||
):
|
||||
res = list_chat_assistants(get_http_api_auth, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]) == expected_page_size
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, assertions, expected_message",
|
||||
[
|
||||
({"orderby": None}, 0, lambda r: (is_sorted(r["data"], "create_time", True)), ""),
|
||||
({"orderby": "create_time"}, 0, lambda r: (is_sorted(r["data"], "create_time", True)), ""),
|
||||
({"orderby": "update_time"}, 0, lambda r: (is_sorted(r["data"], "update_time", True)), ""),
|
||||
pytest.param(
|
||||
{"orderby": "name", "desc": "False"},
|
||||
0,
|
||||
lambda r: (is_sorted(r["data"], "name", False)),
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/5851"),
|
||||
),
|
||||
pytest.param(
|
||||
{"orderby": "unknown"},
|
||||
102,
|
||||
0,
|
||||
"orderby should be create_time or update_time",
|
||||
marks=pytest.mark.skip(reason="issues/5851"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_orderby(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
params,
|
||||
expected_code,
|
||||
assertions,
|
||||
expected_message,
|
||||
):
|
||||
res = list_chat_assistants(get_http_api_auth, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if callable(assertions):
|
||||
assert assertions(res)
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, assertions, expected_message",
|
||||
[
|
||||
({"desc": None}, 0, lambda r: (is_sorted(r["data"], "create_time", True)), ""),
|
||||
({"desc": "true"}, 0, lambda r: (is_sorted(r["data"], "create_time", True)), ""),
|
||||
({"desc": "True"}, 0, lambda r: (is_sorted(r["data"], "create_time", True)), ""),
|
||||
({"desc": True}, 0, lambda r: (is_sorted(r["data"], "create_time", True)), ""),
|
||||
({"desc": "false"}, 0, lambda r: (is_sorted(r["data"], "create_time", False)), ""),
|
||||
({"desc": "False"}, 0, lambda r: (is_sorted(r["data"], "create_time", False)), ""),
|
||||
({"desc": False}, 0, lambda r: (is_sorted(r["data"], "create_time", False)), ""),
|
||||
({"desc": "False", "orderby": "update_time"}, 0, lambda r: (is_sorted(r["data"], "update_time", False)), ""),
|
||||
pytest.param(
|
||||
{"desc": "unknown"},
|
||||
102,
|
||||
0,
|
||||
"desc should be true or false",
|
||||
marks=pytest.mark.skip(reason="issues/5851"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_desc(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
params,
|
||||
expected_code,
|
||||
assertions,
|
||||
expected_message,
|
||||
):
|
||||
res = list_chat_assistants(get_http_api_auth, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if callable(assertions):
|
||||
assert assertions(res)
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, expected_num, expected_message",
|
||||
[
|
||||
({"name": None}, 0, 5, ""),
|
||||
({"name": ""}, 0, 5, ""),
|
||||
({"name": "test_chat_assistant_1"}, 0, 1, ""),
|
||||
({"name": "unknown"}, 102, 0, "The chat doesn't exist"),
|
||||
],
|
||||
)
|
||||
def test_name(self, get_http_api_auth, params, expected_code, expected_num, expected_message):
|
||||
res = list_chat_assistants(get_http_api_auth, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if params["name"] in [None, ""]:
|
||||
assert len(res["data"]) == expected_num
|
||||
else:
|
||||
assert res["data"][0]["name"] == params["name"]
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"chat_assistant_id, expected_code, expected_num, expected_message",
|
||||
[
|
||||
(None, 0, 5, ""),
|
||||
("", 0, 5, ""),
|
||||
(lambda r: r[0], 0, 1, ""),
|
||||
("unknown", 102, 0, "The chat doesn't exist"),
|
||||
],
|
||||
)
|
||||
def test_id(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_chat_assistants,
|
||||
chat_assistant_id,
|
||||
expected_code,
|
||||
expected_num,
|
||||
expected_message,
|
||||
):
|
||||
_, _, chat_assistant_ids = add_chat_assistants
|
||||
if callable(chat_assistant_id):
|
||||
params = {"id": chat_assistant_id(chat_assistant_ids)}
|
||||
else:
|
||||
params = {"id": chat_assistant_id}
|
||||
|
||||
res = list_chat_assistants(get_http_api_auth, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if params["id"] in [None, ""]:
|
||||
assert len(res["data"]) == expected_num
|
||||
else:
|
||||
assert res["data"][0]["id"] == params["id"]
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"chat_assistant_id, name, expected_code, expected_num, expected_message",
|
||||
[
|
||||
(lambda r: r[0], "test_chat_assistant_0", 0, 1, ""),
|
||||
(lambda r: r[0], "test_chat_assistant_1", 102, 0, "The chat doesn't exist"),
|
||||
(lambda r: r[0], "unknown", 102, 0, "The chat doesn't exist"),
|
||||
("id", "chat_assistant_0", 102, 0, "The chat doesn't exist"),
|
||||
],
|
||||
)
|
||||
def test_name_and_id(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_chat_assistants,
|
||||
chat_assistant_id,
|
||||
name,
|
||||
expected_code,
|
||||
expected_num,
|
||||
expected_message,
|
||||
):
|
||||
_, _, chat_assistant_ids = add_chat_assistants
|
||||
if callable(chat_assistant_id):
|
||||
params = {"id": chat_assistant_id(chat_assistant_ids), "name": name}
|
||||
else:
|
||||
params = {"id": chat_assistant_id, "name": name}
|
||||
|
||||
res = list_chat_assistants(get_http_api_auth, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]) == expected_num
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_list(self, get_http_api_auth):
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(list_chat_assistants, get_http_api_auth) for i in range(100)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_invalid_params(self, get_http_api_auth):
|
||||
params = {"a": "b"}
|
||||
res = list_chat_assistants(get_http_api_auth, params=params)
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]) == 5
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_list_chats_after_deleting_associated_dataset(self, get_http_api_auth, add_chat_assistants):
|
||||
dataset_id, _, _ = add_chat_assistants
|
||||
res = delete_datasets(get_http_api_auth, {"ids": [dataset_id]})
|
||||
assert res["code"] == 0
|
||||
|
||||
res = list_chat_assistants(get_http_api_auth)
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]) == 5
|
||||
@ -1,228 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import pytest
|
||||
from common import CHAT_ASSISTANT_NAME_LIMIT, INVALID_API_TOKEN, list_chat_assistants, update_chat_assistant
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import encode_avatar
|
||||
from libs.utils.file_utils import create_image_file
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = update_chat_assistant(auth, "chat_assistant_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestChatAssistantUpdate:
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
pytest.param({"name": "valid_name"}, 0, "", marks=pytest.mark.p1),
|
||||
pytest.param({"name": "a" * (CHAT_ASSISTANT_NAME_LIMIT + 1)}, 102, "", marks=pytest.mark.skip(reason="issues/")),
|
||||
pytest.param({"name": 1}, 100, "", marks=pytest.mark.skip(reason="issues/")),
|
||||
pytest.param({"name": ""}, 102, "`name` cannot be empty.", marks=pytest.mark.p3),
|
||||
pytest.param({"name": "test_chat_assistant_1"}, 102, "Duplicated chat name in updating chat.", marks=pytest.mark.p3),
|
||||
pytest.param({"name": "TEST_CHAT_ASSISTANT_1"}, 102, "Duplicated chat name in updating chat.", marks=pytest.mark.p3),
|
||||
],
|
||||
)
|
||||
def test_name(self, get_http_api_auth, add_chat_assistants_func, payload, expected_code, expected_message):
|
||||
_, _, chat_assistant_ids = add_chat_assistants_func
|
||||
|
||||
res = update_chat_assistant(get_http_api_auth, chat_assistant_ids[0], payload)
|
||||
assert res["code"] == expected_code, res
|
||||
if expected_code == 0:
|
||||
res = list_chat_assistants(get_http_api_auth, {"id": chat_assistant_ids[0]})
|
||||
assert res["data"][0]["name"] == payload.get("name")
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_ids, expected_code, expected_message",
|
||||
[
|
||||
pytest.param([], 0, "", marks=pytest.mark.skip(reason="issues/")),
|
||||
pytest.param(lambda r: [r], 0, "", marks=pytest.mark.p1),
|
||||
pytest.param(["invalid_dataset_id"], 102, "You don't own the dataset invalid_dataset_id", marks=pytest.mark.p3),
|
||||
pytest.param("invalid_dataset_id", 102, "You don't own the dataset i", marks=pytest.mark.p3),
|
||||
],
|
||||
)
|
||||
def test_dataset_ids(self, get_http_api_auth, add_chat_assistants_func, dataset_ids, expected_code, expected_message):
|
||||
dataset_id, _, chat_assistant_ids = add_chat_assistants_func
|
||||
payload = {"name": "ragflow test"}
|
||||
if callable(dataset_ids):
|
||||
payload["dataset_ids"] = dataset_ids(dataset_id)
|
||||
else:
|
||||
payload["dataset_ids"] = dataset_ids
|
||||
|
||||
res = update_chat_assistant(get_http_api_auth, chat_assistant_ids[0], payload)
|
||||
assert res["code"] == expected_code, res
|
||||
if expected_code == 0:
|
||||
res = list_chat_assistants(get_http_api_auth, {"id": chat_assistant_ids[0]})
|
||||
assert res["data"][0]["name"] == payload.get("name")
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_avatar(self, get_http_api_auth, add_chat_assistants_func, tmp_path):
|
||||
dataset_id, _, chat_assistant_ids = add_chat_assistants_func
|
||||
fn = create_image_file(tmp_path / "ragflow_test.png")
|
||||
payload = {"name": "avatar_test", "avatar": encode_avatar(fn), "dataset_ids": [dataset_id]}
|
||||
res = update_chat_assistant(get_http_api_auth, chat_assistant_ids[0], payload)
|
||||
assert res["code"] == 0
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"llm, expected_code, expected_message",
|
||||
[
|
||||
({}, 100, "ValueError"),
|
||||
({"model_name": "glm-4"}, 0, ""),
|
||||
({"model_name": "unknown"}, 102, "`model_name` unknown doesn't exist"),
|
||||
({"temperature": 0}, 0, ""),
|
||||
({"temperature": 1}, 0, ""),
|
||||
pytest.param({"temperature": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"temperature": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"temperature": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"top_p": 0}, 0, ""),
|
||||
({"top_p": 1}, 0, ""),
|
||||
pytest.param({"top_p": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"top_p": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"top_p": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"presence_penalty": 0}, 0, ""),
|
||||
({"presence_penalty": 1}, 0, ""),
|
||||
pytest.param({"presence_penalty": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"presence_penalty": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"presence_penalty": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"frequency_penalty": 0}, 0, ""),
|
||||
({"frequency_penalty": 1}, 0, ""),
|
||||
pytest.param({"frequency_penalty": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"frequency_penalty": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"frequency_penalty": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"max_token": 0}, 0, ""),
|
||||
({"max_token": 1024}, 0, ""),
|
||||
pytest.param({"max_token": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"max_token": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"max_token": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"unknown": "unknown"}, 0, "", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
def test_llm(self, get_http_api_auth, add_chat_assistants_func, llm, expected_code, expected_message):
|
||||
dataset_id, _, chat_assistant_ids = add_chat_assistants_func
|
||||
payload = {"name": "llm_test", "dataset_ids": [dataset_id], "llm": llm}
|
||||
res = update_chat_assistant(get_http_api_auth, chat_assistant_ids[0], payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
res = list_chat_assistants(get_http_api_auth, {"id": chat_assistant_ids[0]})
|
||||
if llm:
|
||||
for k, v in llm.items():
|
||||
assert res["data"][0]["llm"][k] == v
|
||||
else:
|
||||
assert res["data"][0]["llm"]["model_name"] == "glm-4-flash@ZHIPU-AI"
|
||||
assert res["data"][0]["llm"]["temperature"] == 0.1
|
||||
assert res["data"][0]["llm"]["top_p"] == 0.3
|
||||
assert res["data"][0]["llm"]["presence_penalty"] == 0.4
|
||||
assert res["data"][0]["llm"]["frequency_penalty"] == 0.7
|
||||
assert res["data"][0]["llm"]["max_tokens"] == 512
|
||||
else:
|
||||
assert expected_message in res["message"]
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"prompt, expected_code, expected_message",
|
||||
[
|
||||
({}, 100, "ValueError"),
|
||||
({"similarity_threshold": 0}, 0, ""),
|
||||
({"similarity_threshold": 1}, 0, ""),
|
||||
pytest.param({"similarity_threshold": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"similarity_threshold": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"similarity_threshold": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"keywords_similarity_weight": 0}, 0, ""),
|
||||
({"keywords_similarity_weight": 1}, 0, ""),
|
||||
pytest.param({"keywords_similarity_weight": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"keywords_similarity_weight": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"keywords_similarity_weight": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"variables": []}, 0, ""),
|
||||
({"top_n": 0}, 0, ""),
|
||||
({"top_n": 1}, 0, ""),
|
||||
pytest.param({"top_n": -1}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"top_n": 10}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"top_n": "a"}, 0, "", marks=pytest.mark.skip),
|
||||
({"empty_response": "Hello World"}, 0, ""),
|
||||
({"empty_response": ""}, 0, ""),
|
||||
({"empty_response": "!@#$%^&*()"}, 0, ""),
|
||||
({"empty_response": "中文测试"}, 0, ""),
|
||||
pytest.param({"empty_response": 123}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"empty_response": True}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"empty_response": " "}, 0, "", marks=pytest.mark.skip),
|
||||
({"opener": "Hello World"}, 0, ""),
|
||||
({"opener": ""}, 0, ""),
|
||||
({"opener": "!@#$%^&*()"}, 0, ""),
|
||||
({"opener": "中文测试"}, 0, ""),
|
||||
pytest.param({"opener": 123}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"opener": True}, 0, "", marks=pytest.mark.skip),
|
||||
pytest.param({"opener": " "}, 0, "", marks=pytest.mark.skip),
|
||||
({"show_quote": True}, 0, ""),
|
||||
({"show_quote": False}, 0, ""),
|
||||
({"prompt": "Hello World {knowledge}"}, 0, ""),
|
||||
({"prompt": "{knowledge}"}, 0, ""),
|
||||
({"prompt": "!@#$%^&*() {knowledge}"}, 0, ""),
|
||||
({"prompt": "中文测试 {knowledge}"}, 0, ""),
|
||||
({"prompt": "Hello World"}, 102, "Parameter 'knowledge' is not used"),
|
||||
({"prompt": "Hello World", "variables": []}, 0, ""),
|
||||
pytest.param({"prompt": 123}, 100, """AttributeError("\'int\' object has no attribute \'find\'")""", marks=pytest.mark.skip),
|
||||
pytest.param({"prompt": True}, 100, """AttributeError("\'int\' object has no attribute \'find\'")""", marks=pytest.mark.skip),
|
||||
pytest.param({"unknown": "unknown"}, 0, "", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
def test_prompt(self, get_http_api_auth, add_chat_assistants_func, prompt, expected_code, expected_message):
|
||||
dataset_id, _, chat_assistant_ids = add_chat_assistants_func
|
||||
payload = {"name": "prompt_test", "dataset_ids": [dataset_id], "prompt": prompt}
|
||||
res = update_chat_assistant(get_http_api_auth, chat_assistant_ids[0], payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
res = list_chat_assistants(get_http_api_auth, {"id": chat_assistant_ids[0]})
|
||||
if prompt:
|
||||
for k, v in prompt.items():
|
||||
if k == "keywords_similarity_weight":
|
||||
assert res["data"][0]["prompt"][k] == 1 - v
|
||||
else:
|
||||
assert res["data"][0]["prompt"][k] == v
|
||||
else:
|
||||
assert res["data"]["prompt"][0]["similarity_threshold"] == 0.2
|
||||
assert res["data"]["prompt"][0]["keywords_similarity_weight"] == 0.7
|
||||
assert res["data"]["prompt"][0]["top_n"] == 6
|
||||
assert res["data"]["prompt"][0]["variables"] == [{"key": "knowledge", "optional": False}]
|
||||
assert res["data"]["prompt"][0]["rerank_model"] == ""
|
||||
assert res["data"]["prompt"][0]["empty_response"] == "Sorry! No relevant content was found in the knowledge base!"
|
||||
assert res["data"]["prompt"][0]["opener"] == "Hi! I'm your assistant. What can I do for you?"
|
||||
assert res["data"]["prompt"][0]["show_quote"] is True
|
||||
assert (
|
||||
res["data"]["prompt"][0]["prompt"]
|
||||
== 'You are an intelligent assistant. Please summarize the content of the dataset to answer the question. Please list the data in the dataset and answer in detail. When all dataset content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the dataset!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.'
|
||||
)
|
||||
else:
|
||||
assert expected_message in res["message"]
|
||||
@ -1,52 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
import pytest
|
||||
from common import add_chunk, delete_chunks, list_documnets, parse_documnets
|
||||
from libs.utils import wait_for
|
||||
|
||||
|
||||
@wait_for(30, 1, "Document parsing timeout")
|
||||
def condition(_auth, _dataset_id):
|
||||
res = list_documnets(_auth, _dataset_id)
|
||||
for doc in res["data"]["docs"]:
|
||||
if doc["run"] != "DONE":
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def add_chunks_func(request, get_http_api_auth, add_document):
|
||||
dataset_id, document_id = add_document
|
||||
parse_documnets(get_http_api_auth, dataset_id, {"document_ids": [document_id]})
|
||||
condition(get_http_api_auth, dataset_id)
|
||||
|
||||
chunk_ids = []
|
||||
for i in range(4):
|
||||
res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": f"chunk test {i}"})
|
||||
chunk_ids.append(res["data"]["chunk"]["id"])
|
||||
|
||||
# issues/6487
|
||||
from time import sleep
|
||||
|
||||
sleep(1)
|
||||
|
||||
def cleanup():
|
||||
delete_chunks(get_http_api_auth, dataset_id, document_id, {"chunk_ids": chunk_ids})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
return dataset_id, document_id, chunk_ids
|
||||
@ -1,250 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, add_chunk, delete_documnets, list_chunks
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
def validate_chunk_details(dataset_id, document_id, payload, res):
|
||||
chunk = res["data"]["chunk"]
|
||||
assert chunk["dataset_id"] == dataset_id
|
||||
assert chunk["document_id"] == document_id
|
||||
assert chunk["content"] == payload["content"]
|
||||
if "important_keywords" in payload:
|
||||
assert chunk["important_keywords"] == payload["important_keywords"]
|
||||
if "questions" in payload:
|
||||
assert chunk["questions"] == [str(q).strip() for q in payload.get("questions", []) if str(q).strip()]
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = add_chunk(auth, "dataset_id", "document_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestAddChunk:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
({"content": None}, 100, """TypeError("unsupported operand type(s) for +: \'NoneType\' and \'str\'")"""),
|
||||
({"content": ""}, 102, "`content` is required"),
|
||||
pytest.param(
|
||||
{"content": 1},
|
||||
100,
|
||||
"""TypeError("unsupported operand type(s) for +: \'int\' and \'str\'")""",
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
({"content": "a"}, 0, ""),
|
||||
({"content": " "}, 102, "`content` is required"),
|
||||
({"content": "\n!?。;!?\"'"}, 0, ""),
|
||||
],
|
||||
)
|
||||
def test_content(self, get_http_api_auth, add_document, payload, expected_code, expected_message):
|
||||
dataset_id, document_id = add_document
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
chunks_count = res["data"]["doc"]["chunk_count"]
|
||||
res = add_chunk(get_http_api_auth, dataset_id, document_id, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
validate_chunk_details(dataset_id, document_id, payload, res)
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert res["data"]["doc"]["chunk_count"] == chunks_count + 1
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
({"content": "chunk test", "important_keywords": ["a", "b", "c"]}, 0, ""),
|
||||
({"content": "chunk test", "important_keywords": [""]}, 0, ""),
|
||||
(
|
||||
{"content": "chunk test", "important_keywords": [1]},
|
||||
100,
|
||||
"TypeError('sequence item 0: expected str instance, int found')",
|
||||
),
|
||||
({"content": "chunk test", "important_keywords": ["a", "a"]}, 0, ""),
|
||||
({"content": "chunk test", "important_keywords": "abc"}, 102, "`important_keywords` is required to be a list"),
|
||||
({"content": "chunk test", "important_keywords": 123}, 102, "`important_keywords` is required to be a list"),
|
||||
],
|
||||
)
|
||||
def test_important_keywords(self, get_http_api_auth, add_document, payload, expected_code, expected_message):
|
||||
dataset_id, document_id = add_document
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
chunks_count = res["data"]["doc"]["chunk_count"]
|
||||
res = add_chunk(get_http_api_auth, dataset_id, document_id, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
validate_chunk_details(dataset_id, document_id, payload, res)
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert res["data"]["doc"]["chunk_count"] == chunks_count + 1
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
({"content": "chunk test", "questions": ["a", "b", "c"]}, 0, ""),
|
||||
({"content": "chunk test", "questions": [""]}, 0, ""),
|
||||
({"content": "chunk test", "questions": [1]}, 100, "TypeError('sequence item 0: expected str instance, int found')"),
|
||||
({"content": "chunk test", "questions": ["a", "a"]}, 0, ""),
|
||||
({"content": "chunk test", "questions": "abc"}, 102, "`questions` is required to be a list"),
|
||||
({"content": "chunk test", "questions": 123}, 102, "`questions` is required to be a list"),
|
||||
],
|
||||
)
|
||||
def test_questions(self, get_http_api_auth, add_document, payload, expected_code, expected_message):
|
||||
dataset_id, document_id = add_document
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
chunks_count = res["data"]["doc"]["chunk_count"]
|
||||
res = add_chunk(get_http_api_auth, dataset_id, document_id, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
validate_chunk_details(dataset_id, document_id, payload, res)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
assert res["data"]["doc"]["chunk_count"] == chunks_count + 1
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<NotFound '404: Not Found'>"),
|
||||
(
|
||||
"invalid_dataset_id",
|
||||
102,
|
||||
"You don't own the dataset invalid_dataset_id.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_dataset_id(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_document,
|
||||
dataset_id,
|
||||
expected_code,
|
||||
expected_message,
|
||||
):
|
||||
_, document_id = add_document
|
||||
res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "a"})
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"document_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"),
|
||||
(
|
||||
"invalid_document_id",
|
||||
102,
|
||||
"You don't own the document invalid_document_id.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_document_id(self, get_http_api_auth, add_document, document_id, expected_code, expected_message):
|
||||
dataset_id, _ = add_document
|
||||
res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "chunk test"})
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_repeated_add_chunk(self, get_http_api_auth, add_document):
|
||||
payload = {"content": "chunk test"}
|
||||
dataset_id, document_id = add_document
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
chunks_count = res["data"]["doc"]["chunk_count"]
|
||||
res = add_chunk(get_http_api_auth, dataset_id, document_id, payload)
|
||||
assert res["code"] == 0
|
||||
validate_chunk_details(dataset_id, document_id, payload, res)
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert res["data"]["doc"]["chunk_count"] == chunks_count + 1
|
||||
|
||||
res = add_chunk(get_http_api_auth, dataset_id, document_id, payload)
|
||||
assert res["code"] == 0
|
||||
validate_chunk_details(dataset_id, document_id, payload, res)
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert res["data"]["doc"]["chunk_count"] == chunks_count + 2
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_add_chunk_to_deleted_document(self, get_http_api_auth, add_document):
|
||||
dataset_id, document_id = add_document
|
||||
delete_documnets(get_http_api_auth, dataset_id, {"ids": [document_id]})
|
||||
res = add_chunk(get_http_api_auth, dataset_id, document_id, {"content": "chunk test"})
|
||||
assert res["code"] == 102
|
||||
assert res["message"] == f"You don't own the document {document_id}."
|
||||
|
||||
@pytest.mark.skip(reason="issues/6411")
|
||||
def test_concurrent_add_chunk(self, get_http_api_auth, add_document):
|
||||
chunk_num = 50
|
||||
dataset_id, document_id = add_document
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
chunks_count = res["data"]["doc"]["chunk_count"]
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
add_chunk,
|
||||
get_http_api_auth,
|
||||
dataset_id,
|
||||
document_id,
|
||||
{"content": f"chunk test {i}"},
|
||||
)
|
||||
for i in range(chunk_num)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert res["data"]["doc"]["chunk_count"] == chunks_count + chunk_num
|
||||
@ -1,194 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, batch_add_chunks, delete_chunks, list_chunks
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = delete_chunks(auth, "dataset_id", "document_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestChunksDeletion:
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<NotFound '404: Not Found'>"),
|
||||
(
|
||||
"invalid_dataset_id",
|
||||
102,
|
||||
"You don't own the dataset invalid_dataset_id.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_dataset_id(self, get_http_api_auth, add_chunks_func, dataset_id, expected_code, expected_message):
|
||||
_, document_id, chunk_ids = add_chunks_func
|
||||
res = delete_chunks(get_http_api_auth, dataset_id, document_id, {"chunk_ids": chunk_ids})
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"document_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"),
|
||||
("invalid_document_id", 100, """LookupError("Can't find the document with ID invalid_document_id!")"""),
|
||||
],
|
||||
)
|
||||
def test_invalid_document_id(self, get_http_api_auth, add_chunks_func, document_id, expected_code, expected_message):
|
||||
dataset_id, _, chunk_ids = add_chunks_func
|
||||
res = delete_chunks(get_http_api_auth, dataset_id, document_id, {"chunk_ids": chunk_ids})
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"payload",
|
||||
[
|
||||
pytest.param(lambda r: {"chunk_ids": ["invalid_id"] + r}, marks=pytest.mark.p3),
|
||||
pytest.param(lambda r: {"chunk_ids": r[:1] + ["invalid_id"] + r[1:4]}, marks=pytest.mark.p1),
|
||||
pytest.param(lambda r: {"chunk_ids": r + ["invalid_id"]}, marks=pytest.mark.p3),
|
||||
],
|
||||
)
|
||||
def test_delete_partial_invalid_id(self, get_http_api_auth, add_chunks_func, payload):
|
||||
dataset_id, document_id, chunk_ids = add_chunks_func
|
||||
if callable(payload):
|
||||
payload = payload(chunk_ids)
|
||||
res = delete_chunks(get_http_api_auth, dataset_id, document_id, payload)
|
||||
assert res["code"] == 102
|
||||
assert res["message"] == "rm_chunk deleted chunks 4, expect 5"
|
||||
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert len(res["data"]["chunks"]) == 1
|
||||
assert res["data"]["total"] == 1
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_repeated_deletion(self, get_http_api_auth, add_chunks_func):
|
||||
dataset_id, document_id, chunk_ids = add_chunks_func
|
||||
payload = {"chunk_ids": chunk_ids}
|
||||
res = delete_chunks(get_http_api_auth, dataset_id, document_id, payload)
|
||||
assert res["code"] == 0
|
||||
|
||||
res = delete_chunks(get_http_api_auth, dataset_id, document_id, payload)
|
||||
assert res["code"] == 102
|
||||
assert res["message"] == "rm_chunk deleted chunks 0, expect 4"
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_duplicate_deletion(self, get_http_api_auth, add_chunks_func):
|
||||
dataset_id, document_id, chunk_ids = add_chunks_func
|
||||
res = delete_chunks(get_http_api_auth, dataset_id, document_id, {"chunk_ids": chunk_ids * 2})
|
||||
assert res["code"] == 0
|
||||
assert "Duplicate chunk ids" in res["data"]["errors"][0]
|
||||
assert res["data"]["success_count"] == 4
|
||||
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert len(res["data"]["chunks"]) == 1
|
||||
assert res["data"]["total"] == 1
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_deletion(self, get_http_api_auth, add_document):
|
||||
chunks_num = 100
|
||||
dataset_id, document_id = add_document
|
||||
chunk_ids = batch_add_chunks(get_http_api_auth, dataset_id, document_id, chunks_num)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
delete_chunks,
|
||||
get_http_api_auth,
|
||||
dataset_id,
|
||||
document_id,
|
||||
{"chunk_ids": chunk_ids[i : i + 1]},
|
||||
)
|
||||
for i in range(chunks_num)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_delete_1k(self, get_http_api_auth, add_document):
|
||||
chunks_num = 1_000
|
||||
dataset_id, document_id = add_document
|
||||
chunk_ids = batch_add_chunks(get_http_api_auth, dataset_id, document_id, chunks_num)
|
||||
|
||||
# issues/6487
|
||||
from time import sleep
|
||||
|
||||
sleep(1)
|
||||
|
||||
res = delete_chunks(get_http_api_auth, dataset_id, document_id, {"chunk_ids": chunk_ids})
|
||||
assert res["code"] == 0
|
||||
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert len(res["data"]["chunks"]) == 1
|
||||
assert res["data"]["total"] == 1
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message, remaining",
|
||||
[
|
||||
pytest.param(None, 100, """TypeError("argument of type \'NoneType\' is not iterable")""", 5, marks=pytest.mark.skip),
|
||||
pytest.param({"chunk_ids": ["invalid_id"]}, 102, "rm_chunk deleted chunks 0, expect 1", 5, marks=pytest.mark.p3),
|
||||
pytest.param("not json", 100, """UnboundLocalError("local variable \'duplicate_messages\' referenced before assignment")""", 5, marks=pytest.mark.skip(reason="pull/6376")),
|
||||
pytest.param(lambda r: {"chunk_ids": r[:1]}, 0, "", 4, marks=pytest.mark.p3),
|
||||
pytest.param(lambda r: {"chunk_ids": r}, 0, "", 1, marks=pytest.mark.p1),
|
||||
pytest.param({"chunk_ids": []}, 0, "", 0, marks=pytest.mark.p3),
|
||||
],
|
||||
)
|
||||
def test_basic_scenarios(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_chunks_func,
|
||||
payload,
|
||||
expected_code,
|
||||
expected_message,
|
||||
remaining,
|
||||
):
|
||||
dataset_id, document_id, chunk_ids = add_chunks_func
|
||||
if callable(payload):
|
||||
payload = payload(chunk_ids)
|
||||
res = delete_chunks(get_http_api_auth, dataset_id, document_id, payload)
|
||||
assert res["code"] == expected_code
|
||||
if res["code"] != 0:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert len(res["data"]["chunks"]) == remaining
|
||||
assert res["data"]["total"] == remaining
|
||||
@ -1,209 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, batch_add_chunks, list_chunks
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = list_chunks(auth, "dataset_id", "document_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestChunksList:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
({"page": None, "page_size": 2}, 0, 2, ""),
|
||||
pytest.param({"page": 0, "page_size": 2}, 100, 0, "ValueError('Search does not support negative slicing.')", marks=pytest.mark.skip),
|
||||
({"page": 2, "page_size": 2}, 0, 2, ""),
|
||||
({"page": 3, "page_size": 2}, 0, 1, ""),
|
||||
({"page": "3", "page_size": 2}, 0, 1, ""),
|
||||
pytest.param({"page": -1, "page_size": 2}, 100, 0, "ValueError('Search does not support negative slicing.')", marks=pytest.mark.skip),
|
||||
pytest.param({"page": "a", "page_size": 2}, 100, 0, """ValueError("invalid literal for int() with base 10: \'a\'")""", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
def test_page(self, get_http_api_auth, add_chunks, params, expected_code, expected_page_size, expected_message):
|
||||
dataset_id, document_id, _ = add_chunks
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]["chunks"]) == expected_page_size
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
({"page_size": None}, 0, 5, ""),
|
||||
pytest.param({"page_size": 0}, 0, 5, ""),
|
||||
pytest.param({"page_size": 0}, 100, 0, ""),
|
||||
({"page_size": 1}, 0, 1, ""),
|
||||
({"page_size": 6}, 0, 5, ""),
|
||||
({"page_size": "1"}, 0, 1, ""),
|
||||
pytest.param({"page_size": -1}, 0, 5, "", marks=pytest.mark.skip),
|
||||
pytest.param({"page_size": "a"}, 100, 0, """ValueError("invalid literal for int() with base 10: \'a\'")""", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
def test_page_size(self, get_http_api_auth, add_chunks, params, expected_code, expected_page_size, expected_message):
|
||||
dataset_id, document_id, _ = add_chunks
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]["chunks"]) == expected_page_size
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_page_size",
|
||||
[
|
||||
({"keywords": None}, 5),
|
||||
({"keywords": ""}, 5),
|
||||
({"keywords": "1"}, 1),
|
||||
pytest.param({"keywords": "chunk"}, 4, marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6509")),
|
||||
({"keywords": "ragflow"}, 1),
|
||||
({"keywords": "unknown"}, 0),
|
||||
],
|
||||
)
|
||||
def test_keywords(self, get_http_api_auth, add_chunks, params, expected_page_size):
|
||||
dataset_id, document_id, _ = add_chunks
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params)
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]["chunks"]) == expected_page_size
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"chunk_id, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
(None, 0, 5, ""),
|
||||
("", 0, 5, ""),
|
||||
pytest.param(lambda r: r[0], 0, 1, "", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6499")),
|
||||
pytest.param("unknown", 100, 0, """AttributeError("\'NoneType\' object has no attribute \'keys\'")""", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
def test_id(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_chunks,
|
||||
chunk_id,
|
||||
expected_code,
|
||||
expected_page_size,
|
||||
expected_message,
|
||||
):
|
||||
dataset_id, document_id, chunk_ids = add_chunks
|
||||
if callable(chunk_id):
|
||||
params = {"id": chunk_id(chunk_ids)}
|
||||
else:
|
||||
params = {"id": chunk_id}
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if params["id"] in [None, ""]:
|
||||
assert len(res["data"]["chunks"]) == expected_page_size
|
||||
else:
|
||||
assert res["data"]["chunks"][0]["id"] == params["id"]
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_invalid_params(self, get_http_api_auth, add_chunks):
|
||||
dataset_id, document_id, _ = add_chunks
|
||||
params = {"a": "b"}
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id, params=params)
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]["chunks"]) == 5
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_list(self, get_http_api_auth, add_chunks):
|
||||
dataset_id, document_id, _ = add_chunks
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(list_chunks, get_http_api_auth, dataset_id, document_id) for i in range(100)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
assert all(len(r["data"]["chunks"]) == 5 for r in responses)
|
||||
|
||||
@pytest.mark.p1
|
||||
def test_default(self, get_http_api_auth, add_document):
|
||||
dataset_id, document_id = add_document
|
||||
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
chunks_count = res["data"]["doc"]["chunk_count"]
|
||||
batch_add_chunks(get_http_api_auth, dataset_id, document_id, 31)
|
||||
# issues/6487
|
||||
from time import sleep
|
||||
|
||||
sleep(3)
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]["chunks"]) == 30
|
||||
assert res["data"]["doc"]["chunk_count"] == chunks_count + 31
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<NotFound '404: Not Found'>"),
|
||||
(
|
||||
"invalid_dataset_id",
|
||||
102,
|
||||
"You don't own the dataset invalid_dataset_id.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_dataset_id(self, get_http_api_auth, add_chunks, dataset_id, expected_code, expected_message):
|
||||
_, document_id, _ = add_chunks
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"document_id, expected_code, expected_message",
|
||||
[
|
||||
("", 102, "The dataset not own the document chunks."),
|
||||
(
|
||||
"invalid_document_id",
|
||||
102,
|
||||
"You don't own the document invalid_document_id.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_document_id(self, get_http_api_auth, add_chunks, document_id, expected_code, expected_message):
|
||||
dataset_id, _, _ = add_chunks
|
||||
res = list_chunks(get_http_api_auth, dataset_id, document_id)
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
@ -1,313 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from common import (
|
||||
INVALID_API_TOKEN,
|
||||
retrieval_chunks,
|
||||
)
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = retrieval_chunks(auth)
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestChunksRetrieval:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
({"question": "chunk", "dataset_ids": None}, 0, 4, ""),
|
||||
({"question": "chunk", "document_ids": None}, 102, 0, "`dataset_ids` is required."),
|
||||
({"question": "chunk", "dataset_ids": None, "document_ids": None}, 0, 4, ""),
|
||||
({"question": "chunk"}, 102, 0, "`dataset_ids` is required."),
|
||||
],
|
||||
)
|
||||
def test_basic_scenarios(self, get_http_api_auth, add_chunks, payload, expected_code, expected_page_size, expected_message):
|
||||
dataset_id, document_id, _ = add_chunks
|
||||
if "dataset_ids" in payload:
|
||||
payload["dataset_ids"] = [dataset_id]
|
||||
if "document_ids" in payload:
|
||||
payload["document_ids"] = [document_id]
|
||||
res = retrieval_chunks(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]["chunks"]) == expected_page_size
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
pytest.param(
|
||||
{"page": None, "page_size": 2},
|
||||
100,
|
||||
2,
|
||||
"""TypeError("int() argument must be a string, a bytes-like object or a real number, not \'NoneType\'")""",
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
pytest.param(
|
||||
{"page": 0, "page_size": 2},
|
||||
100,
|
||||
0,
|
||||
"ValueError('Search does not support negative slicing.')",
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
pytest.param({"page": 2, "page_size": 2}, 0, 2, "", marks=pytest.mark.skip(reason="issues/6646")),
|
||||
({"page": 3, "page_size": 2}, 0, 0, ""),
|
||||
({"page": "3", "page_size": 2}, 0, 0, ""),
|
||||
pytest.param(
|
||||
{"page": -1, "page_size": 2},
|
||||
100,
|
||||
0,
|
||||
"ValueError('Search does not support negative slicing.')",
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
pytest.param(
|
||||
{"page": "a", "page_size": 2},
|
||||
100,
|
||||
0,
|
||||
"""ValueError("invalid literal for int() with base 10: \'a\'")""",
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_page(self, get_http_api_auth, add_chunks, payload, expected_code, expected_page_size, expected_message):
|
||||
dataset_id, _, _ = add_chunks
|
||||
payload.update({"question": "chunk", "dataset_ids": [dataset_id]})
|
||||
res = retrieval_chunks(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]["chunks"]) == expected_page_size
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
pytest.param(
|
||||
{"page_size": None},
|
||||
100,
|
||||
0,
|
||||
"""TypeError("int() argument must be a string, a bytes-like object or a real number, not \'NoneType\'")""",
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
# ({"page_size": 0}, 0, 0, ""),
|
||||
({"page_size": 1}, 0, 1, ""),
|
||||
({"page_size": 5}, 0, 4, ""),
|
||||
({"page_size": "1"}, 0, 1, ""),
|
||||
# ({"page_size": -1}, 0, 0, ""),
|
||||
pytest.param(
|
||||
{"page_size": "a"},
|
||||
100,
|
||||
0,
|
||||
"""ValueError("invalid literal for int() with base 10: \'a\'")""",
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_page_size(self, get_http_api_auth, add_chunks, payload, expected_code, expected_page_size, expected_message):
|
||||
dataset_id, _, _ = add_chunks
|
||||
payload.update({"question": "chunk", "dataset_ids": [dataset_id]})
|
||||
|
||||
res = retrieval_chunks(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]["chunks"]) == expected_page_size
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
({"vector_similarity_weight": 0}, 0, 4, ""),
|
||||
({"vector_similarity_weight": 0.5}, 0, 4, ""),
|
||||
({"vector_similarity_weight": 10}, 0, 4, ""),
|
||||
pytest.param(
|
||||
{"vector_similarity_weight": "a"},
|
||||
100,
|
||||
0,
|
||||
"""ValueError("could not convert string to float: \'a\'")""",
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_vector_similarity_weight(self, get_http_api_auth, add_chunks, payload, expected_code, expected_page_size, expected_message):
|
||||
dataset_id, _, _ = add_chunks
|
||||
payload.update({"question": "chunk", "dataset_ids": [dataset_id]})
|
||||
res = retrieval_chunks(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]["chunks"]) == expected_page_size
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
({"top_k": 10}, 0, 4, ""),
|
||||
pytest.param(
|
||||
{"top_k": 1},
|
||||
0,
|
||||
4,
|
||||
"",
|
||||
marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in ["infinity", "opensearch"], reason="Infinity"),
|
||||
),
|
||||
pytest.param(
|
||||
{"top_k": 1},
|
||||
0,
|
||||
1,
|
||||
"",
|
||||
marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "opensearch", "elasticsearch"], reason="elasticsearch"),
|
||||
),
|
||||
pytest.param(
|
||||
{"top_k": -1},
|
||||
100,
|
||||
4,
|
||||
"must be greater than 0",
|
||||
marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in ["infinity", "opensearch"], reason="Infinity"),
|
||||
),
|
||||
pytest.param(
|
||||
{"top_k": -1},
|
||||
100,
|
||||
4,
|
||||
"3014",
|
||||
marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "opensearch", "elasticsearch"], reason="elasticsearch"),
|
||||
),
|
||||
pytest.param(
|
||||
{"top_k": "a"},
|
||||
100,
|
||||
0,
|
||||
"""ValueError("invalid literal for int() with base 10: \'a\'")""",
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_top_k(self, get_http_api_auth, add_chunks, payload, expected_code, expected_page_size, expected_message):
|
||||
dataset_id, _, _ = add_chunks
|
||||
payload.update({"question": "chunk", "dataset_ids": [dataset_id]})
|
||||
res = retrieval_chunks(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]["chunks"]) == expected_page_size
|
||||
else:
|
||||
assert expected_message in res["message"]
|
||||
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
({"rerank_id": "BAAI/bge-reranker-v2-m3"}, 0, ""),
|
||||
pytest.param({"rerank_id": "unknown"}, 100, "LookupError('Model(unknown) not authorized')", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
def test_rerank_id(self, get_http_api_auth, add_chunks, payload, expected_code, expected_message):
|
||||
dataset_id, _, _ = add_chunks
|
||||
payload.update({"question": "chunk", "dataset_ids": [dataset_id]})
|
||||
res = retrieval_chunks(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]["chunks"]) > 0
|
||||
else:
|
||||
assert expected_message in res["message"]
|
||||
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
({"keyword": True}, 0, 5, ""),
|
||||
({"keyword": "True"}, 0, 5, ""),
|
||||
({"keyword": False}, 0, 5, ""),
|
||||
({"keyword": "False"}, 0, 5, ""),
|
||||
({"keyword": None}, 0, 5, ""),
|
||||
],
|
||||
)
|
||||
def test_keyword(self, get_http_api_auth, add_chunks, payload, expected_code, expected_page_size, expected_message):
|
||||
dataset_id, _, _ = add_chunks
|
||||
payload.update({"question": "chunk test", "dataset_ids": [dataset_id]})
|
||||
res = retrieval_chunks(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]["chunks"]) == expected_page_size
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_highlight, expected_message",
|
||||
[
|
||||
({"highlight": True}, 0, True, ""),
|
||||
({"highlight": "True"}, 0, True, ""),
|
||||
pytest.param({"highlight": False}, 0, False, "", marks=pytest.mark.skip(reason="issues/6648")),
|
||||
({"highlight": "False"}, 0, False, ""),
|
||||
pytest.param({"highlight": None}, 0, False, "", marks=pytest.mark.skip(reason="issues/6648")),
|
||||
],
|
||||
)
|
||||
def test_highlight(self, get_http_api_auth, add_chunks, payload, expected_code, expected_highlight, expected_message):
|
||||
dataset_id, _, _ = add_chunks
|
||||
payload.update({"question": "chunk", "dataset_ids": [dataset_id]})
|
||||
res = retrieval_chunks(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_highlight:
|
||||
for chunk in res["data"]["chunks"]:
|
||||
assert "highlight" in chunk
|
||||
else:
|
||||
for chunk in res["data"]["chunks"]:
|
||||
assert "highlight" not in chunk
|
||||
|
||||
if expected_code != 0:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_invalid_params(self, get_http_api_auth, add_chunks):
|
||||
dataset_id, _, _ = add_chunks
|
||||
payload = {"question": "chunk", "dataset_ids": [dataset_id], "a": "b"}
|
||||
res = retrieval_chunks(get_http_api_auth, payload)
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]["chunks"]) == 4
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_retrieval(self, get_http_api_auth, add_chunks):
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
dataset_id, _, _ = add_chunks
|
||||
payload = {"question": "chunk", "dataset_ids": [dataset_id]}
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(retrieval_chunks, get_http_api_auth, payload) for i in range(100)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
@ -1,246 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from random import randint
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, delete_documnets, update_chunk
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = update_chunk(auth, "dataset_id", "document_id", "chunk_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestUpdatedChunk:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
({"content": None}, 100, "TypeError('expected string or bytes-like object')"),
|
||||
pytest.param(
|
||||
{"content": ""},
|
||||
100,
|
||||
"""APIRequestFailedError(\'Error code: 400, with error text {"error":{"code":"1213","message":"未正常接收到prompt参数。"}}\')""",
|
||||
marks=pytest.mark.skip(reason="issues/6541"),
|
||||
),
|
||||
pytest.param(
|
||||
{"content": 1},
|
||||
100,
|
||||
"TypeError('expected string or bytes-like object')",
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
({"content": "update chunk"}, 0, ""),
|
||||
pytest.param(
|
||||
{"content": " "},
|
||||
100,
|
||||
"""APIRequestFailedError(\'Error code: 400, with error text {"error":{"code":"1213","message":"未正常接收到prompt参数。"}}\')""",
|
||||
marks=pytest.mark.skip(reason="issues/6541"),
|
||||
),
|
||||
({"content": "\n!?。;!?\"'"}, 0, ""),
|
||||
],
|
||||
)
|
||||
def test_content(self, get_http_api_auth, add_chunks, payload, expected_code, expected_message):
|
||||
dataset_id, document_id, chunk_ids = add_chunks
|
||||
res = update_chunk(get_http_api_auth, dataset_id, document_id, chunk_ids[0], payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code != 0:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
({"important_keywords": ["a", "b", "c"]}, 0, ""),
|
||||
({"important_keywords": [""]}, 0, ""),
|
||||
({"important_keywords": [1]}, 100, "TypeError('sequence item 0: expected str instance, int found')"),
|
||||
({"important_keywords": ["a", "a"]}, 0, ""),
|
||||
({"important_keywords": "abc"}, 102, "`important_keywords` should be a list"),
|
||||
({"important_keywords": 123}, 102, "`important_keywords` should be a list"),
|
||||
],
|
||||
)
|
||||
def test_important_keywords(self, get_http_api_auth, add_chunks, payload, expected_code, expected_message):
|
||||
dataset_id, document_id, chunk_ids = add_chunks
|
||||
res = update_chunk(get_http_api_auth, dataset_id, document_id, chunk_ids[0], payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code != 0:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
({"questions": ["a", "b", "c"]}, 0, ""),
|
||||
({"questions": [""]}, 0, ""),
|
||||
({"questions": [1]}, 100, "TypeError('sequence item 0: expected str instance, int found')"),
|
||||
({"questions": ["a", "a"]}, 0, ""),
|
||||
({"questions": "abc"}, 102, "`questions` should be a list"),
|
||||
({"questions": 123}, 102, "`questions` should be a list"),
|
||||
],
|
||||
)
|
||||
def test_questions(self, get_http_api_auth, add_chunks, payload, expected_code, expected_message):
|
||||
dataset_id, document_id, chunk_ids = add_chunks
|
||||
res = update_chunk(get_http_api_auth, dataset_id, document_id, chunk_ids[0], payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code != 0:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
({"available": True}, 0, ""),
|
||||
pytest.param({"available": "True"}, 100, """ValueError("invalid literal for int() with base 10: \'True\'")""", marks=pytest.mark.skip),
|
||||
({"available": 1}, 0, ""),
|
||||
({"available": False}, 0, ""),
|
||||
pytest.param({"available": "False"}, 100, """ValueError("invalid literal for int() with base 10: \'False\'")""", marks=pytest.mark.skip),
|
||||
({"available": 0}, 0, ""),
|
||||
],
|
||||
)
|
||||
def test_available(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_chunks,
|
||||
payload,
|
||||
expected_code,
|
||||
expected_message,
|
||||
):
|
||||
dataset_id, document_id, chunk_ids = add_chunks
|
||||
res = update_chunk(get_http_api_auth, dataset_id, document_id, chunk_ids[0], payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code != 0:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<NotFound '404: Not Found'>"),
|
||||
pytest.param("invalid_dataset_id", 102, "You don't own the dataset invalid_dataset_id.", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="infinity")),
|
||||
pytest.param("invalid_dataset_id", 102, "Can't find this chunk", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "opensearch","elasticsearch"], reason="elasticsearch")),
|
||||
],
|
||||
)
|
||||
def test_invalid_dataset_id(self, get_http_api_auth, add_chunks, dataset_id, expected_code, expected_message):
|
||||
_, document_id, chunk_ids = add_chunks
|
||||
res = update_chunk(get_http_api_auth, dataset_id, document_id, chunk_ids[0])
|
||||
assert res["code"] == expected_code
|
||||
assert expected_message in res["message"]
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"document_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<NotFound '404: Not Found'>"),
|
||||
(
|
||||
"invalid_document_id",
|
||||
102,
|
||||
"You don't own the document invalid_document_id.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_document_id(self, get_http_api_auth, add_chunks, document_id, expected_code, expected_message):
|
||||
dataset_id, _, chunk_ids = add_chunks
|
||||
res = update_chunk(get_http_api_auth, dataset_id, document_id, chunk_ids[0])
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"chunk_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"),
|
||||
(
|
||||
"invalid_document_id",
|
||||
102,
|
||||
"Can't find this chunk invalid_document_id",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_chunk_id(self, get_http_api_auth, add_chunks, chunk_id, expected_code, expected_message):
|
||||
dataset_id, document_id, _ = add_chunks
|
||||
res = update_chunk(get_http_api_auth, dataset_id, document_id, chunk_id)
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_repeated_update_chunk(self, get_http_api_auth, add_chunks):
|
||||
dataset_id, document_id, chunk_ids = add_chunks
|
||||
res = update_chunk(get_http_api_auth, dataset_id, document_id, chunk_ids[0], {"content": "chunk test 1"})
|
||||
assert res["code"] == 0
|
||||
|
||||
res = update_chunk(get_http_api_auth, dataset_id, document_id, chunk_ids[0], {"content": "chunk test 2"})
|
||||
assert res["code"] == 0
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
({"unknown_key": "unknown_value"}, 0, ""),
|
||||
({}, 0, ""),
|
||||
pytest.param(None, 100, """TypeError("argument of type \'NoneType\' is not iterable")""", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
def test_invalid_params(self, get_http_api_auth, add_chunks, payload, expected_code, expected_message):
|
||||
dataset_id, document_id, chunk_ids = add_chunks
|
||||
res = update_chunk(get_http_api_auth, dataset_id, document_id, chunk_ids[0], payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code != 0:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="issues/6554")
|
||||
def test_concurrent_update_chunk(self, get_http_api_auth, add_chunks):
|
||||
chunk_num = 50
|
||||
dataset_id, document_id, chunk_ids = add_chunks
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
update_chunk,
|
||||
get_http_api_auth,
|
||||
dataset_id,
|
||||
document_id,
|
||||
chunk_ids[randint(0, 3)],
|
||||
{"content": f"update chunk test {i}"},
|
||||
)
|
||||
for i in range(chunk_num)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_update_chunk_to_deleted_document(self, get_http_api_auth, add_chunks):
|
||||
dataset_id, document_id, chunk_ids = add_chunks
|
||||
delete_documnets(get_http_api_auth, dataset_id, {"ids": [document_id]})
|
||||
res = update_chunk(get_http_api_auth, dataset_id, document_id, chunk_ids[0])
|
||||
assert res["code"] == 102
|
||||
assert res["message"] == f"Can't find this chunk {chunk_ids[0]}"
|
||||
@ -1,39 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
import pytest
|
||||
from common import batch_create_datasets, delete_datasets
|
||||
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def add_datasets(get_http_api_auth, request):
|
||||
def cleanup():
|
||||
delete_datasets(get_http_api_auth, {"ids": None})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
return batch_create_datasets(get_http_api_auth, 5)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def add_datasets_func(get_http_api_auth, request):
|
||||
def cleanup():
|
||||
delete_datasets(get_http_api_auth, {"ids": None})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
return batch_create_datasets(get_http_api_auth, 3)
|
||||
@ -1,735 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import DATASET_NAME_LIMIT, INVALID_API_TOKEN, create_dataset
|
||||
from hypothesis import example, given, settings
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import encode_avatar
|
||||
from libs.utils.file_utils import create_image_file
|
||||
from libs.utils.hypothesis_utils import valid_names
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("clear_datasets")
|
||||
class TestAuthorization:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
ids=["empty_auth", "invalid_api_token"],
|
||||
)
|
||||
def test_auth_invalid(self, auth, expected_code, expected_message):
|
||||
res = create_dataset(auth, {"name": "auth_test"})
|
||||
assert res["code"] == expected_code, res
|
||||
assert res["message"] == expected_message, res
|
||||
|
||||
|
||||
class TestRquest:
|
||||
@pytest.mark.p3
|
||||
def test_content_type_bad(self, get_http_api_auth):
|
||||
BAD_CONTENT_TYPE = "text/xml"
|
||||
res = create_dataset(get_http_api_auth, {"name": "bad_content_type"}, headers={"Content-Type": BAD_CONTENT_TYPE})
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == f"Unsupported content type: Expected application/json, got {BAD_CONTENT_TYPE}", res
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_message",
|
||||
[
|
||||
("a", "Malformed JSON syntax: Missing commas/brackets or invalid encoding"),
|
||||
('"a"', "Invalid request payload: expected object, got str"),
|
||||
],
|
||||
ids=["malformed_json_syntax", "invalid_request_payload_type"],
|
||||
)
|
||||
def test_payload_bad(self, get_http_api_auth, payload, expected_message):
|
||||
res = create_dataset(get_http_api_auth, data=payload)
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == expected_message, res
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("clear_datasets")
|
||||
class TestCapability:
|
||||
@pytest.mark.p3
|
||||
def test_create_dataset_1k(self, get_http_api_auth):
|
||||
for i in range(1_000):
|
||||
payload = {"name": f"dataset_{i}"}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, f"Failed to create dataset {i}"
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_create_dataset_concurrent(self, get_http_api_auth):
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(create_dataset, get_http_api_auth, {"name": f"dataset_{i}"}) for i in range(100)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses), responses
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("clear_datasets")
|
||||
class TestDatasetCreate:
|
||||
@pytest.mark.p1
|
||||
@given(name=valid_names())
|
||||
@example("a" * 128)
|
||||
@settings(max_examples=20)
|
||||
def test_name(self, get_http_api_auth, name):
|
||||
res = create_dataset(get_http_api_auth, {"name": name})
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["name"] == name, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"name, expected_message",
|
||||
[
|
||||
("", "String should have at least 1 character"),
|
||||
(" ", "String should have at least 1 character"),
|
||||
("a" * (DATASET_NAME_LIMIT + 1), "String should have at most 128 characters"),
|
||||
(0, "Input should be a valid string"),
|
||||
(None, "Input should be a valid string"),
|
||||
],
|
||||
ids=["empty_name", "space_name", "too_long_name", "invalid_name", "None_name"],
|
||||
)
|
||||
def test_name_invalid(self, get_http_api_auth, name, expected_message):
|
||||
payload = {"name": name}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert expected_message in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_name_duplicated(self, get_http_api_auth):
|
||||
name = "duplicated_name"
|
||||
payload = {"name": name}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 103, res
|
||||
assert res["message"] == f"Dataset name '{name}' already exists", res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_name_case_insensitive(self, get_http_api_auth):
|
||||
name = "CaseInsensitive"
|
||||
payload = {"name": name.upper()}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
payload = {"name": name.lower()}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 103, res
|
||||
assert res["message"] == f"Dataset name '{name.lower()}' already exists", res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_avatar(self, get_http_api_auth, tmp_path):
|
||||
fn = create_image_file(tmp_path / "ragflow_test.png")
|
||||
payload = {
|
||||
"name": "avatar",
|
||||
"avatar": f"data:image/png;base64,{encode_avatar(fn)}",
|
||||
}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_avatar_exceeds_limit_length(self, get_http_api_auth):
|
||||
payload = {"name": "avatar_exceeds_limit_length", "avatar": "a" * 65536}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "String should have at most 65535 characters" in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"name, prefix, expected_message",
|
||||
[
|
||||
("empty_prefix", "", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"),
|
||||
("missing_comma", "data:image/png;base64", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"),
|
||||
("unsupported_mine_type", "invalid_mine_prefix:image/png;base64,", "Invalid MIME prefix format. Must start with 'data:'"),
|
||||
("invalid_mine_type", "data:unsupported_mine_type;base64,", "Unsupported MIME type. Allowed: ['image/jpeg', 'image/png']"),
|
||||
],
|
||||
ids=["empty_prefix", "missing_comma", "unsupported_mine_type", "invalid_mine_type"],
|
||||
)
|
||||
def test_avatar_invalid_prefix(self, get_http_api_auth, tmp_path, name, prefix, expected_message):
|
||||
fn = create_image_file(tmp_path / "ragflow_test.png")
|
||||
payload = {
|
||||
"name": name,
|
||||
"avatar": f"{prefix}{encode_avatar(fn)}",
|
||||
}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert expected_message in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_avatar_unset(self, get_http_api_auth):
|
||||
payload = {"name": "avatar_unset"}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["avatar"] is None, res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_avatar_none(self, get_http_api_auth):
|
||||
payload = {"name": "avatar_none", "avatar": None}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["avatar"] is None, res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_description(self, get_http_api_auth):
|
||||
payload = {"name": "description", "description": "description"}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["description"] == "description", res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_description_exceeds_limit_length(self, get_http_api_auth):
|
||||
payload = {"name": "description_exceeds_limit_length", "description": "a" * 65536}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "String should have at most 65535 characters" in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_description_unset(self, get_http_api_auth):
|
||||
payload = {"name": "description_unset"}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["description"] is None, res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_description_none(self, get_http_api_auth):
|
||||
payload = {"name": "description_none", "description": None}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["description"] is None, res
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"name, embedding_model",
|
||||
[
|
||||
("BAAI/bge-small-en-v1.5@Builtin", "BAAI/bge-small-en-v1.5@Builtin"),
|
||||
("embedding-3@ZHIPU-AI", "embedding-3@ZHIPU-AI"),
|
||||
],
|
||||
ids=["builtin_baai", "tenant_zhipu"],
|
||||
)
|
||||
def test_embedding_model(self, get_http_api_auth, name, embedding_model):
|
||||
payload = {"name": name, "embedding_model": embedding_model}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["embedding_model"] == embedding_model, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"name, embedding_model",
|
||||
[
|
||||
("unknown_llm_name", "unknown@ZHIPU-AI"),
|
||||
("unknown_llm_factory", "embedding-3@unknown"),
|
||||
("tenant_no_auth_default_tenant_llm", "text-embedding-v3@Tongyi-Qianwen"),
|
||||
("tenant_no_auth", "text-embedding-3-small@OpenAI"),
|
||||
],
|
||||
ids=["unknown_llm_name", "unknown_llm_factory", "tenant_no_auth_default_tenant_llm", "tenant_no_auth"],
|
||||
)
|
||||
def test_embedding_model_invalid(self, get_http_api_auth, name, embedding_model):
|
||||
payload = {"name": name, "embedding_model": embedding_model}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
if "tenant_no_auth" in name:
|
||||
assert res["message"] == f"Unauthorized model: <{embedding_model}>", res
|
||||
else:
|
||||
assert res["message"] == f"Unsupported model: <{embedding_model}>", res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"name, embedding_model",
|
||||
[
|
||||
("missing_at", "BAAI/bge-small-en-v1.5Builtin"),
|
||||
("missing_model_name", "@Builtin"),
|
||||
("missing_provider", "BAAI/bge-small-en-v1.5@"),
|
||||
("whitespace_only_model_name", " @Builtin"),
|
||||
("whitespace_only_provider", "BAAI/bge-small-en-v1.5@ "),
|
||||
],
|
||||
ids=["missing_at", "empty_model_name", "empty_provider", "whitespace_only_model_name", "whitespace_only_provider"],
|
||||
)
|
||||
def test_embedding_model_format(self, get_http_api_auth, name, embedding_model):
|
||||
payload = {"name": name, "embedding_model": embedding_model}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
if name == "missing_at":
|
||||
assert "Embedding model identifier must follow <model_name>@<provider> format" in res["message"], res
|
||||
else:
|
||||
assert "Both model_name and provider must be non-empty strings" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_embedding_model_unset(self, get_http_api_auth):
|
||||
payload = {"name": "embedding_model_unset"}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["embedding_model"] == "BAAI/bge-small-en-v1.5@Builtin", res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_embedding_model_none(self, get_http_api_auth):
|
||||
payload = {"name": "embedding_model_none", "embedding_model": None}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Input should be a valid string" in res["message"], res
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"name, permission",
|
||||
[
|
||||
("me", "me"),
|
||||
("team", "team"),
|
||||
("me_upercase", "ME"),
|
||||
("team_upercase", "TEAM"),
|
||||
("whitespace", " ME "),
|
||||
],
|
||||
ids=["me", "team", "me_upercase", "team_upercase", "whitespace"],
|
||||
)
|
||||
def test_permission(self, get_http_api_auth, name, permission):
|
||||
payload = {"name": name, "permission": permission}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["permission"] == permission.lower().strip(), res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"name, permission",
|
||||
[
|
||||
("empty", ""),
|
||||
("unknown", "unknown"),
|
||||
("type_error", list()),
|
||||
],
|
||||
ids=["empty", "unknown", "type_error"],
|
||||
)
|
||||
def test_permission_invalid(self, get_http_api_auth, name, permission):
|
||||
payload = {"name": name, "permission": permission}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101
|
||||
assert "Input should be 'me' or 'team'" in res["message"]
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_permission_unset(self, get_http_api_auth):
|
||||
payload = {"name": "permission_unset"}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["permission"] == "me", res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_permission_none(self, get_http_api_auth):
|
||||
payload = {"name": "permission_none", "permission": None}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Input should be 'me' or 'team'" in res["message"], res
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"name, chunk_method",
|
||||
[
|
||||
("naive", "naive"),
|
||||
("book", "book"),
|
||||
("email", "email"),
|
||||
("laws", "laws"),
|
||||
("manual", "manual"),
|
||||
("one", "one"),
|
||||
("paper", "paper"),
|
||||
("picture", "picture"),
|
||||
("presentation", "presentation"),
|
||||
("qa", "qa"),
|
||||
("table", "table"),
|
||||
("tag", "tag"),
|
||||
],
|
||||
ids=["naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table", "tag"],
|
||||
)
|
||||
def test_chunk_method(self, get_http_api_auth, name, chunk_method):
|
||||
payload = {"name": name, "chunk_method": chunk_method}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["chunk_method"] == chunk_method, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"name, chunk_method",
|
||||
[
|
||||
("empty", ""),
|
||||
("unknown", "unknown"),
|
||||
("type_error", list()),
|
||||
],
|
||||
ids=["empty", "unknown", "type_error"],
|
||||
)
|
||||
def test_chunk_method_invalid(self, get_http_api_auth, name, chunk_method):
|
||||
payload = {"name": name, "chunk_method": chunk_method}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_chunk_method_unset(self, get_http_api_auth):
|
||||
payload = {"name": "chunk_method_unset"}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["chunk_method"] == "naive", res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_chunk_method_none(self, get_http_api_auth):
|
||||
payload = {"name": "chunk_method_none", "chunk_method": None}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"name, pagerank",
|
||||
[
|
||||
("pagerank_min", 0),
|
||||
("pagerank_mid", 50),
|
||||
("pagerank_max", 100),
|
||||
],
|
||||
ids=["min", "mid", "max"],
|
||||
)
|
||||
def test_pagerank(self, get_http_api_auth, name, pagerank):
|
||||
payload = {"name": name, "pagerank": pagerank}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["pagerank"] == pagerank, res
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"name, pagerank, expected_message",
|
||||
[
|
||||
("pagerank_min_limit", -1, "Input should be greater than or equal to 0"),
|
||||
("pagerank_max_limit", 101, "Input should be less than or equal to 100"),
|
||||
],
|
||||
ids=["min_limit", "max_limit"],
|
||||
)
|
||||
def test_pagerank_invalid(self, get_http_api_auth, name, pagerank, expected_message):
|
||||
payload = {"name": name, "pagerank": pagerank}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert expected_message in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_pagerank_unset(self, get_http_api_auth):
|
||||
payload = {"name": "pagerank_unset"}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["pagerank"] == 0, res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_pagerank_none(self, get_http_api_auth):
|
||||
payload = {"name": "pagerank_unset", "pagerank": None}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Input should be a valid integer" in res["message"], res
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"name, parser_config",
|
||||
[
|
||||
("auto_keywords_min", {"auto_keywords": 0}),
|
||||
("auto_keywords_mid", {"auto_keywords": 16}),
|
||||
("auto_keywords_max", {"auto_keywords": 32}),
|
||||
("auto_questions_min", {"auto_questions": 0}),
|
||||
("auto_questions_mid", {"auto_questions": 5}),
|
||||
("auto_questions_max", {"auto_questions": 10}),
|
||||
("chunk_token_num_min", {"chunk_token_num": 1}),
|
||||
("chunk_token_num_mid", {"chunk_token_num": 1024}),
|
||||
("chunk_token_num_max", {"chunk_token_num": 2048}),
|
||||
("delimiter", {"delimiter": "\n"}),
|
||||
("delimiter_space", {"delimiter": " "}),
|
||||
("html4excel_true", {"html4excel": True}),
|
||||
("html4excel_false", {"html4excel": False}),
|
||||
("layout_recognize_DeepDOC", {"layout_recognize": "DeepDOC"}),
|
||||
("layout_recognize_navie", {"layout_recognize": "Plain Text"}),
|
||||
("tag_kb_ids", {"tag_kb_ids": ["1", "2"]}),
|
||||
("topn_tags_min", {"topn_tags": 1}),
|
||||
("topn_tags_mid", {"topn_tags": 5}),
|
||||
("topn_tags_max", {"topn_tags": 10}),
|
||||
("filename_embd_weight_min", {"filename_embd_weight": 0.1}),
|
||||
("filename_embd_weight_mid", {"filename_embd_weight": 0.5}),
|
||||
("filename_embd_weight_max", {"filename_embd_weight": 1.0}),
|
||||
("task_page_size_min", {"task_page_size": 1}),
|
||||
("task_page_size_None", {"task_page_size": None}),
|
||||
("pages", {"pages": [[1, 100]]}),
|
||||
("pages_none", {"pages": None}),
|
||||
("graphrag_true", {"graphrag": {"use_graphrag": True}}),
|
||||
("graphrag_false", {"graphrag": {"use_graphrag": False}}),
|
||||
("graphrag_entity_types", {"graphrag": {"entity_types": ["age", "sex", "height", "weight"]}}),
|
||||
("graphrag_method_general", {"graphrag": {"method": "general"}}),
|
||||
("graphrag_method_light", {"graphrag": {"method": "light"}}),
|
||||
("graphrag_community_true", {"graphrag": {"community": True}}),
|
||||
("graphrag_community_false", {"graphrag": {"community": False}}),
|
||||
("graphrag_resolution_true", {"graphrag": {"resolution": True}}),
|
||||
("graphrag_resolution_false", {"graphrag": {"resolution": False}}),
|
||||
("raptor_true", {"raptor": {"use_raptor": True}}),
|
||||
("raptor_false", {"raptor": {"use_raptor": False}}),
|
||||
("raptor_prompt", {"raptor": {"prompt": "Who are you?"}}),
|
||||
("raptor_max_token_min", {"raptor": {"max_token": 1}}),
|
||||
("raptor_max_token_mid", {"raptor": {"max_token": 1024}}),
|
||||
("raptor_max_token_max", {"raptor": {"max_token": 2048}}),
|
||||
("raptor_threshold_min", {"raptor": {"threshold": 0.0}}),
|
||||
("raptor_threshold_mid", {"raptor": {"threshold": 0.5}}),
|
||||
("raptor_threshold_max", {"raptor": {"threshold": 1.0}}),
|
||||
("raptor_max_cluster_min", {"raptor": {"max_cluster": 1}}),
|
||||
("raptor_max_cluster_mid", {"raptor": {"max_cluster": 512}}),
|
||||
("raptor_max_cluster_max", {"raptor": {"max_cluster": 1024}}),
|
||||
("raptor_random_seed_min", {"raptor": {"random_seed": 0}}),
|
||||
],
|
||||
ids=[
|
||||
"auto_keywords_min",
|
||||
"auto_keywords_mid",
|
||||
"auto_keywords_max",
|
||||
"auto_questions_min",
|
||||
"auto_questions_mid",
|
||||
"auto_questions_max",
|
||||
"chunk_token_num_min",
|
||||
"chunk_token_num_mid",
|
||||
"chunk_token_num_max",
|
||||
"delimiter",
|
||||
"delimiter_space",
|
||||
"html4excel_true",
|
||||
"html4excel_false",
|
||||
"layout_recognize_DeepDOC",
|
||||
"layout_recognize_navie",
|
||||
"tag_kb_ids",
|
||||
"topn_tags_min",
|
||||
"topn_tags_mid",
|
||||
"topn_tags_max",
|
||||
"filename_embd_weight_min",
|
||||
"filename_embd_weight_mid",
|
||||
"filename_embd_weight_max",
|
||||
"task_page_size_min",
|
||||
"task_page_size_None",
|
||||
"pages",
|
||||
"pages_none",
|
||||
"graphrag_true",
|
||||
"graphrag_false",
|
||||
"graphrag_entity_types",
|
||||
"graphrag_method_general",
|
||||
"graphrag_method_light",
|
||||
"graphrag_community_true",
|
||||
"graphrag_community_false",
|
||||
"graphrag_resolution_true",
|
||||
"graphrag_resolution_false",
|
||||
"raptor_true",
|
||||
"raptor_false",
|
||||
"raptor_prompt",
|
||||
"raptor_max_token_min",
|
||||
"raptor_max_token_mid",
|
||||
"raptor_max_token_max",
|
||||
"raptor_threshold_min",
|
||||
"raptor_threshold_mid",
|
||||
"raptor_threshold_max",
|
||||
"raptor_max_cluster_min",
|
||||
"raptor_max_cluster_mid",
|
||||
"raptor_max_cluster_max",
|
||||
"raptor_random_seed_min",
|
||||
],
|
||||
)
|
||||
def test_parser_config(self, get_http_api_auth, name, parser_config):
|
||||
payload = {"name": name, "parser_config": parser_config}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
for k, v in parser_config.items():
|
||||
if isinstance(v, dict):
|
||||
for kk, vv in v.items():
|
||||
assert res["data"]["parser_config"][k][kk] == vv, res
|
||||
else:
|
||||
assert res["data"]["parser_config"][k] == v, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"name, parser_config, expected_message",
|
||||
[
|
||||
("auto_keywords_min_limit", {"auto_keywords": -1}, "Input should be greater than or equal to 0"),
|
||||
("auto_keywords_max_limit", {"auto_keywords": 33}, "Input should be less than or equal to 32"),
|
||||
("auto_keywords_float_not_allowed", {"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
("auto_keywords_type_invalid", {"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
("auto_questions_min_limit", {"auto_questions": -1}, "Input should be greater than or equal to 0"),
|
||||
("auto_questions_max_limit", {"auto_questions": 11}, "Input should be less than or equal to 10"),
|
||||
("auto_questions_float_not_allowed", {"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
("auto_questions_type_invalid", {"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
("chunk_token_num_min_limit", {"chunk_token_num": 0}, "Input should be greater than or equal to 1"),
|
||||
("chunk_token_num_max_limit", {"chunk_token_num": 2049}, "Input should be less than or equal to 2048"),
|
||||
("chunk_token_num_float_not_allowed", {"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
("chunk_token_num_type_invalid", {"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
("delimiter_empty", {"delimiter": ""}, "String should have at least 1 character"),
|
||||
("html4excel_type_invalid", {"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"),
|
||||
("tag_kb_ids_not_list", {"tag_kb_ids": "1,2"}, "Input should be a valid list"),
|
||||
("tag_kb_ids_int_in_list", {"tag_kb_ids": [1, 2]}, "Input should be a valid string"),
|
||||
("topn_tags_min_limit", {"topn_tags": 0}, "Input should be greater than or equal to 1"),
|
||||
("topn_tags_max_limit", {"topn_tags": 11}, "Input should be less than or equal to 10"),
|
||||
("topn_tags_float_not_allowed", {"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
("topn_tags_type_invalid", {"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
("filename_embd_weight_min_limit", {"filename_embd_weight": -1}, "Input should be greater than or equal to 0"),
|
||||
("filename_embd_weight_max_limit", {"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"),
|
||||
("filename_embd_weight_type_invalid", {"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"),
|
||||
("task_page_size_min_limit", {"task_page_size": 0}, "Input should be greater than or equal to 1"),
|
||||
("task_page_size_float_not_allowed", {"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
("task_page_size_type_invalid", {"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
("pages_not_list", {"pages": "1,2"}, "Input should be a valid list"),
|
||||
("pages_not_list_in_list", {"pages": ["1,2"]}, "Input should be a valid list"),
|
||||
("pages_not_int_list", {"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
("graphrag_type_invalid", {"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"),
|
||||
("graphrag_entity_types_not_list", {"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"),
|
||||
("graphrag_entity_types_not_str_in_list", {"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"),
|
||||
("graphrag_method_unknown", {"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"),
|
||||
("graphrag_method_none", {"graphrag": {"method": None}}, "Input should be 'light' or 'general'"),
|
||||
("graphrag_community_type_invalid", {"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"),
|
||||
("graphrag_resolution_type_invalid", {"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"),
|
||||
("raptor_type_invalid", {"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"),
|
||||
("raptor_prompt_empty", {"raptor": {"prompt": ""}}, "String should have at least 1 character"),
|
||||
("raptor_prompt_space", {"raptor": {"prompt": " "}}, "String should have at least 1 character"),
|
||||
("raptor_max_token_min_limit", {"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"),
|
||||
("raptor_max_token_max_limit", {"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"),
|
||||
("raptor_max_token_float_not_allowed", {"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
("raptor_max_token_type_invalid", {"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
("raptor_threshold_min_limit", {"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"),
|
||||
("raptor_threshold_max_limit", {"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"),
|
||||
("raptor_threshold_type_invalid", {"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"),
|
||||
("raptor_max_cluster_min_limit", {"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"),
|
||||
("raptor_max_cluster_max_limit", {"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"),
|
||||
("raptor_max_cluster_float_not_allowed", {"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"),
|
||||
("raptor_max_cluster_type_invalid", {"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
("raptor_random_seed_min_limit", {"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"),
|
||||
("raptor_random_seed_float_not_allowed", {"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
("raptor_random_seed_type_invalid", {"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
("parser_config_type_invalid", {"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"),
|
||||
],
|
||||
ids=[
|
||||
"auto_keywords_min_limit",
|
||||
"auto_keywords_max_limit",
|
||||
"auto_keywords_float_not_allowed",
|
||||
"auto_keywords_type_invalid",
|
||||
"auto_questions_min_limit",
|
||||
"auto_questions_max_limit",
|
||||
"auto_questions_float_not_allowed",
|
||||
"auto_questions_type_invalid",
|
||||
"chunk_token_num_min_limit",
|
||||
"chunk_token_num_max_limit",
|
||||
"chunk_token_num_float_not_allowed",
|
||||
"chunk_token_num_type_invalid",
|
||||
"delimiter_empty",
|
||||
"html4excel_type_invalid",
|
||||
"tag_kb_ids_not_list",
|
||||
"tag_kb_ids_int_in_list",
|
||||
"topn_tags_min_limit",
|
||||
"topn_tags_max_limit",
|
||||
"topn_tags_float_not_allowed",
|
||||
"topn_tags_type_invalid",
|
||||
"filename_embd_weight_min_limit",
|
||||
"filename_embd_weight_max_limit",
|
||||
"filename_embd_weight_type_invalid",
|
||||
"task_page_size_min_limit",
|
||||
"task_page_size_float_not_allowed",
|
||||
"task_page_size_type_invalid",
|
||||
"pages_not_list",
|
||||
"pages_not_list_in_list",
|
||||
"pages_not_int_list",
|
||||
"graphrag_type_invalid",
|
||||
"graphrag_entity_types_not_list",
|
||||
"graphrag_entity_types_not_str_in_list",
|
||||
"graphrag_method_unknown",
|
||||
"graphrag_method_none",
|
||||
"graphrag_community_type_invalid",
|
||||
"graphrag_resolution_type_invalid",
|
||||
"raptor_type_invalid",
|
||||
"raptor_prompt_empty",
|
||||
"raptor_prompt_space",
|
||||
"raptor_max_token_min_limit",
|
||||
"raptor_max_token_max_limit",
|
||||
"raptor_max_token_float_not_allowed",
|
||||
"raptor_max_token_type_invalid",
|
||||
"raptor_threshold_min_limit",
|
||||
"raptor_threshold_max_limit",
|
||||
"raptor_threshold_type_invalid",
|
||||
"raptor_max_cluster_min_limit",
|
||||
"raptor_max_cluster_max_limit",
|
||||
"raptor_max_cluster_float_not_allowed",
|
||||
"raptor_max_cluster_type_invalid",
|
||||
"raptor_random_seed_min_limit",
|
||||
"raptor_random_seed_float_not_allowed",
|
||||
"raptor_random_seed_type_invalid",
|
||||
"parser_config_type_invalid",
|
||||
],
|
||||
)
|
||||
def test_parser_config_invalid(self, get_http_api_auth, name, parser_config, expected_message):
|
||||
payload = {"name": name, "parser_config": parser_config}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert expected_message in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_parser_config_empty(self, get_http_api_auth):
|
||||
payload = {"name": "parser_config_empty", "parser_config": {}}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["parser_config"] == {
|
||||
"chunk_token_num": 128,
|
||||
"delimiter": r"\n",
|
||||
"html4excel": False,
|
||||
"layout_recognize": "DeepDOC",
|
||||
"raptor": {"use_raptor": False},
|
||||
}, res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_parser_config_unset(self, get_http_api_auth):
|
||||
payload = {"name": "parser_config_unset"}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["parser_config"] == {
|
||||
"chunk_token_num": 128,
|
||||
"delimiter": r"\n",
|
||||
"html4excel": False,
|
||||
"layout_recognize": "DeepDOC",
|
||||
"raptor": {"use_raptor": False},
|
||||
}, res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_parser_config_none(self, get_http_api_auth):
|
||||
payload = {"name": "parser_config_none", "parser_config": None}
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"]["parser_config"] == {
|
||||
"chunk_token_num": 128,
|
||||
"delimiter": "\\n",
|
||||
"html4excel": False,
|
||||
"layout_recognize": "DeepDOC",
|
||||
"raptor": {"use_raptor": False},
|
||||
}, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload",
|
||||
[
|
||||
{"name": "id", "id": "id"},
|
||||
{"name": "tenant_id", "tenant_id": "e57c1966f99211efb41e9e45646e0111"},
|
||||
{"name": "created_by", "created_by": "created_by"},
|
||||
{"name": "create_date", "create_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
|
||||
{"name": "create_time", "create_time": 1741671443322},
|
||||
{"name": "update_date", "update_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
|
||||
{"name": "update_time", "update_time": 1741671443339},
|
||||
{"name": "document_count", "document_count": 1},
|
||||
{"name": "chunk_count", "chunk_count": 1},
|
||||
{"name": "token_num", "token_num": 1},
|
||||
{"name": "status", "status": "1"},
|
||||
{"name": "unknown_field", "unknown_field": "unknown_field"},
|
||||
],
|
||||
)
|
||||
def test_unsupported_field(self, get_http_api_auth, payload):
|
||||
res = create_dataset(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Extra inputs are not permitted" in res["message"], res
|
||||
@ -1,219 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import uuid
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import (
|
||||
INVALID_API_TOKEN,
|
||||
batch_create_datasets,
|
||||
delete_datasets,
|
||||
list_datasets,
|
||||
)
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
class TestAuthorization:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_auth_invalid(self, auth, expected_code, expected_message):
|
||||
res = delete_datasets(auth)
|
||||
assert res["code"] == expected_code, res
|
||||
assert res["message"] == expected_message, res
|
||||
|
||||
|
||||
class TestRquest:
|
||||
@pytest.mark.p3
|
||||
def test_content_type_bad(self, get_http_api_auth):
|
||||
BAD_CONTENT_TYPE = "text/xml"
|
||||
res = delete_datasets(get_http_api_auth, headers={"Content-Type": BAD_CONTENT_TYPE})
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == f"Unsupported content type: Expected application/json, got {BAD_CONTENT_TYPE}", res
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_message",
|
||||
[
|
||||
("a", "Malformed JSON syntax: Missing commas/brackets or invalid encoding"),
|
||||
('"a"', "Invalid request payload: expected object, got str"),
|
||||
],
|
||||
ids=["malformed_json_syntax", "invalid_request_payload_type"],
|
||||
)
|
||||
def test_payload_bad(self, get_http_api_auth, payload, expected_message):
|
||||
res = delete_datasets(get_http_api_auth, data=payload)
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == expected_message, res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_payload_unset(self, get_http_api_auth):
|
||||
res = delete_datasets(get_http_api_auth, None)
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res
|
||||
|
||||
|
||||
class TestCapability:
|
||||
@pytest.mark.p3
|
||||
def test_delete_dataset_1k(self, get_http_api_auth):
|
||||
ids = batch_create_datasets(get_http_api_auth, 1_000)
|
||||
res = delete_datasets(get_http_api_auth, {"ids": ids})
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 0, res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_deletion(self, get_http_api_auth):
|
||||
dataset_num = 1_000
|
||||
ids = batch_create_datasets(get_http_api_auth, dataset_num)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(delete_datasets, get_http_api_auth, {"ids": ids[i : i + 1]}) for i in range(dataset_num)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses), responses
|
||||
|
||||
|
||||
class TestDatasetsDelete:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"func, expected_code, expected_message, remaining",
|
||||
[
|
||||
(lambda r: {"ids": r[:1]}, 0, "", 2),
|
||||
(lambda r: {"ids": r}, 0, "", 0),
|
||||
],
|
||||
ids=["single_dataset", "multiple_datasets"],
|
||||
)
|
||||
def test_ids(self, get_http_api_auth, add_datasets_func, func, expected_code, expected_message, remaining):
|
||||
dataset_ids = add_datasets_func
|
||||
if callable(func):
|
||||
payload = func(dataset_ids)
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == expected_code, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == remaining, res
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.usefixtures("add_dataset_func")
|
||||
def test_ids_empty(self, get_http_api_auth):
|
||||
payload = {"ids": []}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 1, res
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.usefixtures("add_datasets_func")
|
||||
def test_ids_none(self, get_http_api_auth):
|
||||
payload = {"ids": None}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 0, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.usefixtures("add_dataset_func")
|
||||
def test_id_not_uuid(self, get_http_api_auth):
|
||||
payload = {"ids": ["not_uuid"]}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Invalid UUID1 format" in res["message"], res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 1, res
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.usefixtures("add_dataset_func")
|
||||
def test_id_not_uuid1(self, get_http_api_auth):
|
||||
payload = {"ids": [uuid.uuid4().hex]}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Invalid UUID1 format" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.usefixtures("add_dataset_func")
|
||||
def test_id_wrong_uuid(self, get_http_api_auth):
|
||||
payload = {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"]}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 108, res
|
||||
assert "lacks permission for dataset" in res["message"], res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 1, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
lambda r: {"ids": ["d94a8dc02c9711f0930f7fbc369eab6d"] + r},
|
||||
lambda r: {"ids": r[:1] + ["d94a8dc02c9711f0930f7fbc369eab6d"] + r[1:3]},
|
||||
lambda r: {"ids": r + ["d94a8dc02c9711f0930f7fbc369eab6d"]},
|
||||
],
|
||||
)
|
||||
def test_ids_partial_invalid(self, get_http_api_auth, add_datasets_func, func):
|
||||
dataset_ids = add_datasets_func
|
||||
if callable(func):
|
||||
payload = func(dataset_ids)
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 108, res
|
||||
assert "lacks permission for dataset" in res["message"], res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 3, res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_ids_duplicate(self, get_http_api_auth, add_datasets_func):
|
||||
dataset_ids = add_datasets_func
|
||||
payload = {"ids": dataset_ids + dataset_ids}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Duplicate ids:" in res["message"], res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 3, res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_repeated_delete(self, get_http_api_auth, add_datasets_func):
|
||||
dataset_ids = add_datasets_func
|
||||
payload = {"ids": dataset_ids}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 108, res
|
||||
assert "lacks permission for dataset" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.usefixtures("add_dataset_func")
|
||||
def test_field_unsupported(self, get_http_api_auth):
|
||||
payload = {"unknown_field": "unknown_field"}
|
||||
res = delete_datasets(get_http_api_auth, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Extra inputs are not permitted" in res["message"], res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert len(res["data"]) == 1, res
|
||||
@ -1,339 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import uuid
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, list_datasets
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import is_sorted
|
||||
|
||||
|
||||
class TestAuthorization:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_auth_invalid(self, auth, expected_code, expected_message):
|
||||
res = list_datasets(auth)
|
||||
assert res["code"] == expected_code, res
|
||||
assert res["message"] == expected_message, res
|
||||
|
||||
|
||||
class TestCapability:
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_list(self, get_http_api_auth):
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(list_datasets, get_http_api_auth) for i in range(100)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses), responses
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("add_datasets")
|
||||
class TestDatasetsList:
|
||||
@pytest.mark.p1
|
||||
def test_params_unset(self, get_http_api_auth):
|
||||
res = list_datasets(get_http_api_auth, None)
|
||||
assert res["code"] == 0, res
|
||||
assert len(res["data"]) == 5, res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_params_empty(self, get_http_api_auth):
|
||||
res = list_datasets(get_http_api_auth, {})
|
||||
assert res["code"] == 0, res
|
||||
assert len(res["data"]) == 5, res
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_page_size",
|
||||
[
|
||||
({"page": 2, "page_size": 2}, 2),
|
||||
({"page": 3, "page_size": 2}, 1),
|
||||
({"page": 4, "page_size": 2}, 0),
|
||||
({"page": "2", "page_size": 2}, 2),
|
||||
({"page": 1, "page_size": 10}, 5),
|
||||
],
|
||||
ids=["normal_middle_page", "normal_last_partial_page", "beyond_max_page", "string_page_number", "full_data_single_page"],
|
||||
)
|
||||
def test_page(self, get_http_api_auth, params, expected_page_size):
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0, res
|
||||
assert len(res["data"]) == expected_page_size, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, expected_message",
|
||||
[
|
||||
({"page": 0}, 101, "Input should be greater than or equal to 1"),
|
||||
({"page": "a"}, 101, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
],
|
||||
ids=["page_0", "page_a"],
|
||||
)
|
||||
def test_page_invalid(self, get_http_api_auth, params, expected_code, expected_message):
|
||||
res = list_datasets(get_http_api_auth, params=params)
|
||||
assert res["code"] == expected_code, res
|
||||
assert expected_message in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_page_none(self, get_http_api_auth):
|
||||
params = {"page": None}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0, res
|
||||
assert len(res["data"]) == 5, res
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_page_size",
|
||||
[
|
||||
({"page_size": 1}, 1),
|
||||
({"page_size": 3}, 3),
|
||||
({"page_size": 5}, 5),
|
||||
({"page_size": 6}, 5),
|
||||
({"page_size": "1"}, 1),
|
||||
],
|
||||
ids=["min_valid_page_size", "medium_page_size", "page_size_equals_total", "page_size_exceeds_total", "string_type_page_size"],
|
||||
)
|
||||
def test_page_size(self, get_http_api_auth, params, expected_page_size):
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0, res
|
||||
assert len(res["data"]) == expected_page_size, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, expected_message",
|
||||
[
|
||||
({"page_size": 0}, 101, "Input should be greater than or equal to 1"),
|
||||
({"page_size": "a"}, 101, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
],
|
||||
)
|
||||
def test_page_size_invalid(self, get_http_api_auth, params, expected_code, expected_message):
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == expected_code, res
|
||||
assert expected_message in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_page_size_none(self, get_http_api_auth):
|
||||
params = {"page_size": None}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0, res
|
||||
assert len(res["data"]) == 5, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"params, assertions",
|
||||
[
|
||||
({"orderby": "create_time"}, lambda r: (is_sorted(r["data"], "create_time", True))),
|
||||
({"orderby": "update_time"}, lambda r: (is_sorted(r["data"], "update_time", True))),
|
||||
({"orderby": "CREATE_TIME"}, lambda r: (is_sorted(r["data"], "create_time", True))),
|
||||
({"orderby": "UPDATE_TIME"}, lambda r: (is_sorted(r["data"], "update_time", True))),
|
||||
({"orderby": " create_time "}, lambda r: (is_sorted(r["data"], "update_time", True))),
|
||||
],
|
||||
ids=["orderby_create_time", "orderby_update_time", "orderby_create_time_upper", "orderby_update_time_upper", "whitespace"],
|
||||
)
|
||||
def test_orderby(self, get_http_api_auth, params, assertions):
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0, res
|
||||
if callable(assertions):
|
||||
assert assertions(res), res
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"params",
|
||||
[
|
||||
{"orderby": ""},
|
||||
{"orderby": "unknown"},
|
||||
],
|
||||
ids=["empty", "unknown"],
|
||||
)
|
||||
def test_orderby_invalid(self, get_http_api_auth, params):
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 101, res
|
||||
assert "Input should be 'create_time' or 'update_time'" in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_orderby_none(self, get_http_api_auth):
|
||||
params = {"order_by": None}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0, res
|
||||
assert is_sorted(res["data"], "create_time", True), res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"params, assertions",
|
||||
[
|
||||
({"desc": True}, lambda r: (is_sorted(r["data"], "create_time", True))),
|
||||
({"desc": False}, lambda r: (is_sorted(r["data"], "create_time", False))),
|
||||
({"desc": "true"}, lambda r: (is_sorted(r["data"], "create_time", True))),
|
||||
({"desc": "false"}, lambda r: (is_sorted(r["data"], "create_time", False))),
|
||||
({"desc": 1}, lambda r: (is_sorted(r["data"], "create_time", True))),
|
||||
({"desc": 0}, lambda r: (is_sorted(r["data"], "create_time", False))),
|
||||
({"desc": "yes"}, lambda r: (is_sorted(r["data"], "create_time", True))),
|
||||
({"desc": "no"}, lambda r: (is_sorted(r["data"], "create_time", False))),
|
||||
({"desc": "y"}, lambda r: (is_sorted(r["data"], "create_time", True))),
|
||||
({"desc": "n"}, lambda r: (is_sorted(r["data"], "create_time", False))),
|
||||
],
|
||||
ids=["desc=True", "desc=False", "desc=true", "desc=false", "desc=1", "desc=0", "desc=yes", "desc=no", "desc=y", "desc=n"],
|
||||
)
|
||||
def test_desc(self, get_http_api_auth, params, assertions):
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0, res
|
||||
if callable(assertions):
|
||||
assert assertions(res), res
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"params",
|
||||
[
|
||||
{"desc": 3.14},
|
||||
{"desc": "unknown"},
|
||||
],
|
||||
ids=["empty", "unknown"],
|
||||
)
|
||||
def test_desc_invalid(self, get_http_api_auth, params):
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 101, res
|
||||
assert "Input should be a valid boolean, unable to interpret input" in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_desc_none(self, get_http_api_auth):
|
||||
params = {"desc": None}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0, res
|
||||
assert is_sorted(res["data"], "create_time", True), res
|
||||
|
||||
@pytest.mark.p1
|
||||
def test_name(self, get_http_api_auth):
|
||||
params = {"name": "dataset_1"}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0, res
|
||||
assert len(res["data"]) == 1, res
|
||||
assert res["data"][0]["name"] == "dataset_1", res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_name_wrong(self, get_http_api_auth):
|
||||
params = {"name": "wrong name"}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 108, res
|
||||
assert "lacks permission for dataset" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_name_empty(self, get_http_api_auth):
|
||||
params = {"name": ""}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0, res
|
||||
assert len(res["data"]) == 5, res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_name_none(self, get_http_api_auth):
|
||||
params = {"name": None}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0, res
|
||||
assert len(res["data"]) == 5, res
|
||||
|
||||
@pytest.mark.p1
|
||||
def test_id(self, get_http_api_auth, add_datasets):
|
||||
dataset_ids = add_datasets
|
||||
params = {"id": dataset_ids[0]}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]) == 1
|
||||
assert res["data"][0]["id"] == dataset_ids[0]
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_id_not_uuid(self, get_http_api_auth):
|
||||
params = {"id": "not_uuid"}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 101, res
|
||||
assert "Invalid UUID1 format" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_id_not_uuid1(self, get_http_api_auth):
|
||||
params = {"id": uuid.uuid4().hex}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 101, res
|
||||
assert "Invalid UUID1 format" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_id_wrong_uuid(self, get_http_api_auth):
|
||||
params = {"id": "d94a8dc02c9711f0930f7fbc369eab6d"}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 108, res
|
||||
assert "lacks permission for dataset" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_id_empty(self, get_http_api_auth):
|
||||
params = {"id": ""}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 101, res
|
||||
assert "Invalid UUID1 format" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_id_none(self, get_http_api_auth):
|
||||
params = {"id": None}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0, res
|
||||
assert len(res["data"]) == 5, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"func, name, expected_num",
|
||||
[
|
||||
(lambda r: r[0], "dataset_0", 1),
|
||||
(lambda r: r[0], "dataset_1", 0),
|
||||
],
|
||||
ids=["name_and_id_match", "name_and_id_mismatch"],
|
||||
)
|
||||
def test_name_and_id(self, get_http_api_auth, add_datasets, func, name, expected_num):
|
||||
dataset_ids = add_datasets
|
||||
if callable(func):
|
||||
params = {"id": func(dataset_ids), "name": name}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 0, res
|
||||
assert len(res["data"]) == expected_num, res
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_id, name",
|
||||
[
|
||||
(lambda r: r[0], "wrong_name"),
|
||||
(uuid.uuid1().hex, "dataset_0"),
|
||||
],
|
||||
ids=["name", "id"],
|
||||
)
|
||||
def test_name_and_id_wrong(self, get_http_api_auth, add_datasets, dataset_id, name):
|
||||
dataset_ids = add_datasets
|
||||
if callable(dataset_id):
|
||||
params = {"id": dataset_id(dataset_ids), "name": name}
|
||||
else:
|
||||
params = {"id": dataset_id, "name": name}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 108, res
|
||||
assert "lacks permission for dataset" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_field_unsupported(self, get_http_api_auth):
|
||||
params = {"unknown_field": "unknown_field"}
|
||||
res = list_datasets(get_http_api_auth, params)
|
||||
assert res["code"] == 101, res
|
||||
assert "Extra inputs are not permitted" in res["message"], res
|
||||
@ -1,819 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import uuid
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import DATASET_NAME_LIMIT, INVALID_API_TOKEN, list_datasets, update_dataset
|
||||
from hypothesis import HealthCheck, example, given, settings
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import encode_avatar
|
||||
from libs.utils.file_utils import create_image_file
|
||||
from libs.utils.hypothesis_utils import valid_names
|
||||
|
||||
# TODO: Missing scenario for updating embedding_model with chunk_count != 0
|
||||
|
||||
|
||||
class TestAuthorization:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
ids=["empty_auth", "invalid_api_token"],
|
||||
)
|
||||
def test_auth_invalid(self, auth, expected_code, expected_message):
|
||||
res = update_dataset(auth, "dataset_id")
|
||||
assert res["code"] == expected_code, res
|
||||
assert res["message"] == expected_message, res
|
||||
|
||||
|
||||
class TestRquest:
|
||||
@pytest.mark.p3
|
||||
def test_bad_content_type(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
BAD_CONTENT_TYPE = "text/xml"
|
||||
res = update_dataset(get_http_api_auth, dataset_id, {"name": "bad_content_type"}, headers={"Content-Type": BAD_CONTENT_TYPE})
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == f"Unsupported content type: Expected application/json, got {BAD_CONTENT_TYPE}", res
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_message",
|
||||
[
|
||||
("a", "Malformed JSON syntax: Missing commas/brackets or invalid encoding"),
|
||||
('"a"', "Invalid request payload: expected object, got str"),
|
||||
],
|
||||
ids=["malformed_json_syntax", "invalid_request_payload_type"],
|
||||
)
|
||||
def test_payload_bad(self, get_http_api_auth, add_dataset_func, payload, expected_message):
|
||||
dataset_id = add_dataset_func
|
||||
res = update_dataset(get_http_api_auth, dataset_id, data=payload)
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == expected_message, res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_payload_empty(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
res = update_dataset(get_http_api_auth, dataset_id, {})
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == "No properties were modified", res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_payload_unset(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
res = update_dataset(get_http_api_auth, dataset_id, None)
|
||||
assert res["code"] == 101, res
|
||||
assert res["message"] == "Malformed JSON syntax: Missing commas/brackets or invalid encoding", res
|
||||
|
||||
|
||||
class TestCapability:
|
||||
@pytest.mark.p3
|
||||
def test_update_dateset_concurrent(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(update_dataset, get_http_api_auth, dataset_id, {"name": f"dataset_{i}"}) for i in range(100)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses), responses
|
||||
|
||||
|
||||
class TestDatasetUpdate:
|
||||
@pytest.mark.p3
|
||||
def test_dataset_id_not_uuid(self, get_http_api_auth):
|
||||
payload = {"name": "not uuid"}
|
||||
res = update_dataset(get_http_api_auth, "not_uuid", payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Invalid UUID1 format" in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_dataset_id_not_uuid1(self, get_http_api_auth):
|
||||
payload = {"name": "not uuid1"}
|
||||
res = update_dataset(get_http_api_auth, uuid.uuid4().hex, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Invalid UUID1 format" in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_dataset_id_wrong_uuid(self, get_http_api_auth):
|
||||
payload = {"name": "wrong uuid"}
|
||||
res = update_dataset(get_http_api_auth, "d94a8dc02c9711f0930f7fbc369eab6d", payload)
|
||||
assert res["code"] == 108, res
|
||||
assert "lacks permission for dataset" in res["message"], res
|
||||
|
||||
@pytest.mark.p1
|
||||
@given(name=valid_names())
|
||||
@example("a" * 128)
|
||||
@settings(max_examples=20, suppress_health_check=[HealthCheck.function_scoped_fixture])
|
||||
def test_name(self, get_http_api_auth, add_dataset_func, name):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"name": name}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["name"] == name, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"name, expected_message",
|
||||
[
|
||||
("", "String should have at least 1 character"),
|
||||
(" ", "String should have at least 1 character"),
|
||||
("a" * (DATASET_NAME_LIMIT + 1), "String should have at most 128 characters"),
|
||||
(0, "Input should be a valid string"),
|
||||
(None, "Input should be a valid string"),
|
||||
],
|
||||
ids=["empty_name", "space_name", "too_long_name", "invalid_name", "None_name"],
|
||||
)
|
||||
def test_name_invalid(self, get_http_api_auth, add_dataset_func, name, expected_message):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"name": name}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert expected_message in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_name_duplicated(self, get_http_api_auth, add_datasets_func):
|
||||
dataset_ids = add_datasets_func[0]
|
||||
name = "dataset_1"
|
||||
payload = {"name": name}
|
||||
res = update_dataset(get_http_api_auth, dataset_ids, payload)
|
||||
assert res["code"] == 102, res
|
||||
assert res["message"] == f"Dataset name '{name}' already exists", res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_name_case_insensitive(self, get_http_api_auth, add_datasets_func):
|
||||
dataset_id = add_datasets_func[0]
|
||||
name = "DATASET_1"
|
||||
payload = {"name": name}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 102, res
|
||||
assert res["message"] == f"Dataset name '{name}' already exists", res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_avatar(self, get_http_api_auth, add_dataset_func, tmp_path):
|
||||
dataset_id = add_dataset_func
|
||||
fn = create_image_file(tmp_path / "ragflow_test.png")
|
||||
payload = {
|
||||
"avatar": f"data:image/png;base64,{encode_avatar(fn)}",
|
||||
}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["avatar"] == f"data:image/png;base64,{encode_avatar(fn)}", res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_avatar_exceeds_limit_length(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"avatar": "a" * 65536}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "String should have at most 65535 characters" in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"avatar_prefix, expected_message",
|
||||
[
|
||||
("", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"),
|
||||
("data:image/png;base64", "Missing MIME prefix. Expected format: data:<mime>;base64,<data>"),
|
||||
("invalid_mine_prefix:image/png;base64,", "Invalid MIME prefix format. Must start with 'data:'"),
|
||||
("data:unsupported_mine_type;base64,", "Unsupported MIME type. Allowed: ['image/jpeg', 'image/png']"),
|
||||
],
|
||||
ids=["empty_prefix", "missing_comma", "unsupported_mine_type", "invalid_mine_type"],
|
||||
)
|
||||
def test_avatar_invalid_prefix(self, get_http_api_auth, add_dataset_func, tmp_path, avatar_prefix, expected_message):
|
||||
dataset_id = add_dataset_func
|
||||
fn = create_image_file(tmp_path / "ragflow_test.png")
|
||||
payload = {"avatar": f"{avatar_prefix}{encode_avatar(fn)}"}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert expected_message in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_avatar_none(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"avatar": None}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["avatar"] is None, res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_description(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"description": "description"}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0
|
||||
|
||||
res = list_datasets(get_http_api_auth, {"id": dataset_id})
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["description"] == "description"
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_description_exceeds_limit_length(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"description": "a" * 65536}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "String should have at most 65535 characters" in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_description_none(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"description": None}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth, {"id": dataset_id})
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["description"] is None
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"embedding_model",
|
||||
[
|
||||
"BAAI/bge-small-en-v1.5@Builtin",
|
||||
"embedding-3@ZHIPU-AI",
|
||||
],
|
||||
ids=["builtin_baai", "tenant_zhipu"],
|
||||
)
|
||||
def test_embedding_model(self, get_http_api_auth, add_dataset_func, embedding_model):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"embedding_model": embedding_model}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["embedding_model"] == embedding_model, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"name, embedding_model",
|
||||
[
|
||||
("unknown_llm_name", "unknown@ZHIPU-AI"),
|
||||
("unknown_llm_factory", "embedding-3@unknown"),
|
||||
("tenant_no_auth_default_tenant_llm", "text-embedding-v3@Tongyi-Qianwen"),
|
||||
("tenant_no_auth", "text-embedding-3-small@OpenAI"),
|
||||
],
|
||||
ids=["unknown_llm_name", "unknown_llm_factory", "tenant_no_auth_default_tenant_llm", "tenant_no_auth"],
|
||||
)
|
||||
def test_embedding_model_invalid(self, get_http_api_auth, add_dataset_func, name, embedding_model):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"name": name, "embedding_model": embedding_model}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
if "tenant_no_auth" in name:
|
||||
assert res["message"] == f"Unauthorized model: <{embedding_model}>", res
|
||||
else:
|
||||
assert res["message"] == f"Unsupported model: <{embedding_model}>", res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"name, embedding_model",
|
||||
[
|
||||
("missing_at", "BAAI/bge-small-en-v1.5Builtin"),
|
||||
("missing_model_name", "@Builtin"),
|
||||
("missing_provider", "BAAI/bge-small-en-v1.5@"),
|
||||
("whitespace_only_model_name", " @Builtin"),
|
||||
("whitespace_only_provider", "BAAI/bge-small-en-v1.5@ "),
|
||||
],
|
||||
ids=["missing_at", "empty_model_name", "empty_provider", "whitespace_only_model_name", "whitespace_only_provider"],
|
||||
)
|
||||
def test_embedding_model_format(self, get_http_api_auth, add_dataset_func, name, embedding_model):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"name": name, "embedding_model": embedding_model}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
if name == "missing_at":
|
||||
assert "Embedding model identifier must follow <model_name>@<provider> format" in res["message"], res
|
||||
else:
|
||||
assert "Both model_name and provider must be non-empty strings" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_embedding_model_none(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"embedding_model": None}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Input should be a valid string" in res["message"], res
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"permission",
|
||||
[
|
||||
"me",
|
||||
"team",
|
||||
"ME",
|
||||
"TEAM",
|
||||
" ME ",
|
||||
],
|
||||
ids=["me", "team", "me_upercase", "team_upercase", "whitespace"],
|
||||
)
|
||||
def test_permission(self, get_http_api_auth, add_dataset_func, permission):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"permission": permission}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["permission"] == permission.lower().strip(), res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"permission",
|
||||
[
|
||||
"",
|
||||
"unknown",
|
||||
list(),
|
||||
],
|
||||
ids=["empty", "unknown", "type_error"],
|
||||
)
|
||||
def test_permission_invalid(self, get_http_api_auth, add_dataset_func, permission):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"permission": permission}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101
|
||||
assert "Input should be 'me' or 'team'" in res["message"]
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_permission_none(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"permission": None}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Input should be 'me' or 'team'" in res["message"], res
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"chunk_method",
|
||||
[
|
||||
"naive",
|
||||
"book",
|
||||
"email",
|
||||
"laws",
|
||||
"manual",
|
||||
"one",
|
||||
"paper",
|
||||
"picture",
|
||||
"presentation",
|
||||
"qa",
|
||||
"table",
|
||||
"tag",
|
||||
],
|
||||
ids=["naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table", "tag"],
|
||||
)
|
||||
def test_chunk_method(self, get_http_api_auth, add_dataset_func, chunk_method):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"chunk_method": chunk_method}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["chunk_method"] == chunk_method, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"chunk_method",
|
||||
[
|
||||
"",
|
||||
"unknown",
|
||||
list(),
|
||||
],
|
||||
ids=["empty", "unknown", "type_error"],
|
||||
)
|
||||
def test_chunk_method_invalid(self, get_http_api_auth, add_dataset_func, chunk_method):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"chunk_method": chunk_method}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_chunk_method_none(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"chunk_method": None}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize("pagerank", [0, 50, 100], ids=["min", "mid", "max"])
|
||||
def test_pagerank(self, get_http_api_auth, add_dataset_func, pagerank):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"pagerank": pagerank}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0
|
||||
|
||||
res = list_datasets(get_http_api_auth, {"id": dataset_id})
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["pagerank"] == pagerank
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"pagerank, expected_message",
|
||||
[
|
||||
(-1, "Input should be greater than or equal to 0"),
|
||||
(101, "Input should be less than or equal to 100"),
|
||||
],
|
||||
ids=["min_limit", "max_limit"],
|
||||
)
|
||||
def test_pagerank_invalid(self, get_http_api_auth, add_dataset_func, pagerank, expected_message):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"pagerank": pagerank}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert expected_message in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_pagerank_none(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"pagerank": None}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Input should be a valid integer" in res["message"], res
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"parser_config",
|
||||
[
|
||||
{"auto_keywords": 0},
|
||||
{"auto_keywords": 16},
|
||||
{"auto_keywords": 32},
|
||||
{"auto_questions": 0},
|
||||
{"auto_questions": 5},
|
||||
{"auto_questions": 10},
|
||||
{"chunk_token_num": 1},
|
||||
{"chunk_token_num": 1024},
|
||||
{"chunk_token_num": 2048},
|
||||
{"delimiter": "\n"},
|
||||
{"delimiter": " "},
|
||||
{"html4excel": True},
|
||||
{"html4excel": False},
|
||||
{"layout_recognize": "DeepDOC"},
|
||||
{"layout_recognize": "Plain Text"},
|
||||
{"tag_kb_ids": ["1", "2"]},
|
||||
{"topn_tags": 1},
|
||||
{"topn_tags": 5},
|
||||
{"topn_tags": 10},
|
||||
{"filename_embd_weight": 0.1},
|
||||
{"filename_embd_weight": 0.5},
|
||||
{"filename_embd_weight": 1.0},
|
||||
{"task_page_size": 1},
|
||||
{"task_page_size": None},
|
||||
{"pages": [[1, 100]]},
|
||||
{"pages": None},
|
||||
{"graphrag": {"use_graphrag": True}},
|
||||
{"graphrag": {"use_graphrag": False}},
|
||||
{"graphrag": {"entity_types": ["age", "sex", "height", "weight"]}},
|
||||
{"graphrag": {"method": "general"}},
|
||||
{"graphrag": {"method": "light"}},
|
||||
{"graphrag": {"community": True}},
|
||||
{"graphrag": {"community": False}},
|
||||
{"graphrag": {"resolution": True}},
|
||||
{"graphrag": {"resolution": False}},
|
||||
{"raptor": {"use_raptor": True}},
|
||||
{"raptor": {"use_raptor": False}},
|
||||
{"raptor": {"prompt": "Who are you?"}},
|
||||
{"raptor": {"max_token": 1}},
|
||||
{"raptor": {"max_token": 1024}},
|
||||
{"raptor": {"max_token": 2048}},
|
||||
{"raptor": {"threshold": 0.0}},
|
||||
{"raptor": {"threshold": 0.5}},
|
||||
{"raptor": {"threshold": 1.0}},
|
||||
{"raptor": {"max_cluster": 1}},
|
||||
{"raptor": {"max_cluster": 512}},
|
||||
{"raptor": {"max_cluster": 1024}},
|
||||
{"raptor": {"random_seed": 0}},
|
||||
],
|
||||
ids=[
|
||||
"auto_keywords_min",
|
||||
"auto_keywords_mid",
|
||||
"auto_keywords_max",
|
||||
"auto_questions_min",
|
||||
"auto_questions_mid",
|
||||
"auto_questions_max",
|
||||
"chunk_token_num_min",
|
||||
"chunk_token_num_mid",
|
||||
"chunk_token_num_max",
|
||||
"delimiter",
|
||||
"delimiter_space",
|
||||
"html4excel_true",
|
||||
"html4excel_false",
|
||||
"layout_recognize_DeepDOC",
|
||||
"layout_recognize_navie",
|
||||
"tag_kb_ids",
|
||||
"topn_tags_min",
|
||||
"topn_tags_mid",
|
||||
"topn_tags_max",
|
||||
"filename_embd_weight_min",
|
||||
"filename_embd_weight_mid",
|
||||
"filename_embd_weight_max",
|
||||
"task_page_size_min",
|
||||
"task_page_size_None",
|
||||
"pages",
|
||||
"pages_none",
|
||||
"graphrag_true",
|
||||
"graphrag_false",
|
||||
"graphrag_entity_types",
|
||||
"graphrag_method_general",
|
||||
"graphrag_method_light",
|
||||
"graphrag_community_true",
|
||||
"graphrag_community_false",
|
||||
"graphrag_resolution_true",
|
||||
"graphrag_resolution_false",
|
||||
"raptor_true",
|
||||
"raptor_false",
|
||||
"raptor_prompt",
|
||||
"raptor_max_token_min",
|
||||
"raptor_max_token_mid",
|
||||
"raptor_max_token_max",
|
||||
"raptor_threshold_min",
|
||||
"raptor_threshold_mid",
|
||||
"raptor_threshold_max",
|
||||
"raptor_max_cluster_min",
|
||||
"raptor_max_cluster_mid",
|
||||
"raptor_max_cluster_max",
|
||||
"raptor_random_seed_min",
|
||||
],
|
||||
)
|
||||
def test_parser_config(self, get_http_api_auth, add_dataset_func, parser_config):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"parser_config": parser_config}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert res["code"] == 0, res
|
||||
for k, v in parser_config.items():
|
||||
if isinstance(v, dict):
|
||||
for kk, vv in v.items():
|
||||
assert res["data"][0]["parser_config"][k][kk] == vv, res
|
||||
else:
|
||||
assert res["data"][0]["parser_config"][k] == v, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"parser_config, expected_message",
|
||||
[
|
||||
({"auto_keywords": -1}, "Input should be greater than or equal to 0"),
|
||||
({"auto_keywords": 33}, "Input should be less than or equal to 32"),
|
||||
({"auto_keywords": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
({"auto_keywords": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
({"auto_questions": -1}, "Input should be greater than or equal to 0"),
|
||||
({"auto_questions": 11}, "Input should be less than or equal to 10"),
|
||||
({"auto_questions": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
({"auto_questions": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
({"chunk_token_num": 0}, "Input should be greater than or equal to 1"),
|
||||
({"chunk_token_num": 2049}, "Input should be less than or equal to 2048"),
|
||||
({"chunk_token_num": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
({"chunk_token_num": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
({"delimiter": ""}, "String should have at least 1 character"),
|
||||
({"html4excel": "string"}, "Input should be a valid boolean, unable to interpret input"),
|
||||
({"tag_kb_ids": "1,2"}, "Input should be a valid list"),
|
||||
({"tag_kb_ids": [1, 2]}, "Input should be a valid string"),
|
||||
({"topn_tags": 0}, "Input should be greater than or equal to 1"),
|
||||
({"topn_tags": 11}, "Input should be less than or equal to 10"),
|
||||
({"topn_tags": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
({"topn_tags": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
({"filename_embd_weight": -1}, "Input should be greater than or equal to 0"),
|
||||
({"filename_embd_weight": 1.1}, "Input should be less than or equal to 1"),
|
||||
({"filename_embd_weight": "string"}, "Input should be a valid number, unable to parse string as a number"),
|
||||
({"task_page_size": 0}, "Input should be greater than or equal to 1"),
|
||||
({"task_page_size": 3.14}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
({"task_page_size": "string"}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
({"pages": "1,2"}, "Input should be a valid list"),
|
||||
({"pages": ["1,2"]}, "Input should be a valid list"),
|
||||
({"pages": [["string1", "string2"]]}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
({"graphrag": {"use_graphrag": "string"}}, "Input should be a valid boolean, unable to interpret input"),
|
||||
({"graphrag": {"entity_types": "1,2"}}, "Input should be a valid list"),
|
||||
({"graphrag": {"entity_types": [1, 2]}}, "nput should be a valid string"),
|
||||
({"graphrag": {"method": "unknown"}}, "Input should be 'light' or 'general'"),
|
||||
({"graphrag": {"method": None}}, "Input should be 'light' or 'general'"),
|
||||
({"graphrag": {"community": "string"}}, "Input should be a valid boolean, unable to interpret input"),
|
||||
({"graphrag": {"resolution": "string"}}, "Input should be a valid boolean, unable to interpret input"),
|
||||
({"raptor": {"use_raptor": "string"}}, "Input should be a valid boolean, unable to interpret input"),
|
||||
({"raptor": {"prompt": ""}}, "String should have at least 1 character"),
|
||||
({"raptor": {"prompt": " "}}, "String should have at least 1 character"),
|
||||
({"raptor": {"max_token": 0}}, "Input should be greater than or equal to 1"),
|
||||
({"raptor": {"max_token": 2049}}, "Input should be less than or equal to 2048"),
|
||||
({"raptor": {"max_token": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
({"raptor": {"max_token": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
({"raptor": {"threshold": -0.1}}, "Input should be greater than or equal to 0"),
|
||||
({"raptor": {"threshold": 1.1}}, "Input should be less than or equal to 1"),
|
||||
({"raptor": {"threshold": "string"}}, "Input should be a valid number, unable to parse string as a number"),
|
||||
({"raptor": {"max_cluster": 0}}, "Input should be greater than or equal to 1"),
|
||||
({"raptor": {"max_cluster": 1025}}, "Input should be less than or equal to 1024"),
|
||||
({"raptor": {"max_cluster": 3.14}}, "Input should be a valid integer, got a number with a fractional par"),
|
||||
({"raptor": {"max_cluster": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
({"raptor": {"random_seed": -1}}, "Input should be greater than or equal to 0"),
|
||||
({"raptor": {"random_seed": 3.14}}, "Input should be a valid integer, got a number with a fractional part"),
|
||||
({"raptor": {"random_seed": "string"}}, "Input should be a valid integer, unable to parse string as an integer"),
|
||||
({"delimiter": "a" * 65536}, "Parser config exceeds size limit (max 65,535 characters)"),
|
||||
],
|
||||
ids=[
|
||||
"auto_keywords_min_limit",
|
||||
"auto_keywords_max_limit",
|
||||
"auto_keywords_float_not_allowed",
|
||||
"auto_keywords_type_invalid",
|
||||
"auto_questions_min_limit",
|
||||
"auto_questions_max_limit",
|
||||
"auto_questions_float_not_allowed",
|
||||
"auto_questions_type_invalid",
|
||||
"chunk_token_num_min_limit",
|
||||
"chunk_token_num_max_limit",
|
||||
"chunk_token_num_float_not_allowed",
|
||||
"chunk_token_num_type_invalid",
|
||||
"delimiter_empty",
|
||||
"html4excel_type_invalid",
|
||||
"tag_kb_ids_not_list",
|
||||
"tag_kb_ids_int_in_list",
|
||||
"topn_tags_min_limit",
|
||||
"topn_tags_max_limit",
|
||||
"topn_tags_float_not_allowed",
|
||||
"topn_tags_type_invalid",
|
||||
"filename_embd_weight_min_limit",
|
||||
"filename_embd_weight_max_limit",
|
||||
"filename_embd_weight_type_invalid",
|
||||
"task_page_size_min_limit",
|
||||
"task_page_size_float_not_allowed",
|
||||
"task_page_size_type_invalid",
|
||||
"pages_not_list",
|
||||
"pages_not_list_in_list",
|
||||
"pages_not_int_list",
|
||||
"graphrag_type_invalid",
|
||||
"graphrag_entity_types_not_list",
|
||||
"graphrag_entity_types_not_str_in_list",
|
||||
"graphrag_method_unknown",
|
||||
"graphrag_method_none",
|
||||
"graphrag_community_type_invalid",
|
||||
"graphrag_resolution_type_invalid",
|
||||
"raptor_type_invalid",
|
||||
"raptor_prompt_empty",
|
||||
"raptor_prompt_space",
|
||||
"raptor_max_token_min_limit",
|
||||
"raptor_max_token_max_limit",
|
||||
"raptor_max_token_float_not_allowed",
|
||||
"raptor_max_token_type_invalid",
|
||||
"raptor_threshold_min_limit",
|
||||
"raptor_threshold_max_limit",
|
||||
"raptor_threshold_type_invalid",
|
||||
"raptor_max_cluster_min_limit",
|
||||
"raptor_max_cluster_max_limit",
|
||||
"raptor_max_cluster_float_not_allowed",
|
||||
"raptor_max_cluster_type_invalid",
|
||||
"raptor_random_seed_min_limit",
|
||||
"raptor_random_seed_float_not_allowed",
|
||||
"raptor_random_seed_type_invalid",
|
||||
"parser_config_type_invalid",
|
||||
],
|
||||
)
|
||||
def test_parser_config_invalid(self, get_http_api_auth, add_dataset_func, parser_config, expected_message):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"parser_config": parser_config}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert expected_message in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_parser_config_empty(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"parser_config": {}}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["parser_config"] == {
|
||||
"chunk_token_num": 128,
|
||||
"delimiter": r"\n",
|
||||
"html4excel": False,
|
||||
"layout_recognize": "DeepDOC",
|
||||
"raptor": {"use_raptor": False},
|
||||
}, res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_parser_config_none(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"parser_config": None}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth, {"id": dataset_id})
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["parser_config"] == {
|
||||
"chunk_token_num": 128,
|
||||
"delimiter": r"\n",
|
||||
"html4excel": False,
|
||||
"layout_recognize": "DeepDOC",
|
||||
"raptor": {"use_raptor": False},
|
||||
}, res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_parser_config_empty_with_chunk_method_change(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"chunk_method": "qa", "parser_config": {}}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["parser_config"] == {"raptor": {"use_raptor": False}}, res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_parser_config_unset_with_chunk_method_change(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"chunk_method": "qa"}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["parser_config"] == {"raptor": {"use_raptor": False}}, res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_parser_config_none_with_chunk_method_change(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
payload = {"chunk_method": "qa", "parser_config": None}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth, {"id": dataset_id})
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["parser_config"] == {"raptor": {"use_raptor": False}}, res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload",
|
||||
[
|
||||
{"id": "id"},
|
||||
{"tenant_id": "e57c1966f99211efb41e9e45646e0111"},
|
||||
{"created_by": "created_by"},
|
||||
{"create_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
|
||||
{"create_time": 1741671443322},
|
||||
{"update_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
|
||||
{"update_time": 1741671443339},
|
||||
{"document_count": 1},
|
||||
{"chunk_count": 1},
|
||||
{"token_num": 1},
|
||||
{"status": "1"},
|
||||
{"unknown_field": "unknown_field"},
|
||||
],
|
||||
)
|
||||
def test_field_unsupported(self, get_http_api_auth, add_dataset_func, payload):
|
||||
dataset_id = add_dataset_func
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 101, res
|
||||
assert "Extra inputs are not permitted" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_field_unset(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert res["code"] == 0, res
|
||||
original_data = res["data"][0]
|
||||
|
||||
payload = {"name": "default_unset"}
|
||||
res = update_dataset(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_datasets(get_http_api_auth)
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"][0]["avatar"] == original_data["avatar"], res
|
||||
assert res["data"][0]["description"] == original_data["description"], res
|
||||
assert res["data"][0]["embedding_model"] == original_data["embedding_model"], res
|
||||
assert res["data"][0]["permission"] == original_data["permission"], res
|
||||
assert res["data"][0]["chunk_method"] == original_data["chunk_method"], res
|
||||
assert res["data"][0]["pagerank"] == original_data["pagerank"], res
|
||||
assert res["data"][0]["parser_config"] == {
|
||||
"chunk_token_num": 128,
|
||||
"delimiter": r"\n",
|
||||
"html4excel": False,
|
||||
"layout_recognize": "DeepDOC",
|
||||
"raptor": {"use_raptor": False},
|
||||
}, res
|
||||
@ -1,51 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
import pytest
|
||||
from common import bulk_upload_documents, delete_documnets
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def add_document_func(request, get_http_api_auth, add_dataset, ragflow_tmp_dir):
|
||||
dataset_id = add_dataset
|
||||
document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 1, ragflow_tmp_dir)
|
||||
|
||||
def cleanup():
|
||||
delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
return dataset_id, document_ids[0]
|
||||
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def add_documents(request, get_http_api_auth, add_dataset, ragflow_tmp_dir):
|
||||
dataset_id = add_dataset
|
||||
document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 5, ragflow_tmp_dir)
|
||||
|
||||
def cleanup():
|
||||
delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
return dataset_id, document_ids
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def add_documents_func(get_http_api_auth, add_dataset_func, ragflow_tmp_dir):
|
||||
dataset_id = add_dataset_func
|
||||
document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, 3, ragflow_tmp_dir)
|
||||
|
||||
return dataset_id, document_ids
|
||||
@ -1,181 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, bulk_upload_documents, delete_documnets, list_documnets
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = delete_documnets(auth, "dataset_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestDocumentsDeletion:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message, remaining",
|
||||
[
|
||||
(None, 0, "", 0),
|
||||
({"ids": []}, 0, "", 0),
|
||||
({"ids": ["invalid_id"]}, 102, "Documents not found: ['invalid_id']", 3),
|
||||
(
|
||||
{"ids": ["\n!?。;!?\"'"]},
|
||||
102,
|
||||
"""Documents not found: [\'\\n!?。;!?"\\\'\']""",
|
||||
3,
|
||||
),
|
||||
(
|
||||
"not json",
|
||||
100,
|
||||
"AttributeError(\"'str' object has no attribute 'get'\")",
|
||||
3,
|
||||
),
|
||||
(lambda r: {"ids": r[:1]}, 0, "", 2),
|
||||
(lambda r: {"ids": r}, 0, "", 0),
|
||||
],
|
||||
)
|
||||
def test_basic_scenarios(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_documents_func,
|
||||
payload,
|
||||
expected_code,
|
||||
expected_message,
|
||||
remaining,
|
||||
):
|
||||
dataset_id, document_ids = add_documents_func
|
||||
if callable(payload):
|
||||
payload = payload(document_ids)
|
||||
res = delete_documnets(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == expected_code
|
||||
if res["code"] != 0:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
res = list_documnets(get_http_api_auth, dataset_id)
|
||||
assert len(res["data"]["docs"]) == remaining
|
||||
assert res["data"]["total"] == remaining
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"),
|
||||
(
|
||||
"invalid_dataset_id",
|
||||
102,
|
||||
"You don't own the dataset invalid_dataset_id. ",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_dataset_id(self, get_http_api_auth, add_documents_func, dataset_id, expected_code, expected_message):
|
||||
_, document_ids = add_documents_func
|
||||
res = delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids[:1]})
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload",
|
||||
[
|
||||
lambda r: {"ids": ["invalid_id"] + r},
|
||||
lambda r: {"ids": r[:1] + ["invalid_id"] + r[1:3]},
|
||||
lambda r: {"ids": r + ["invalid_id"]},
|
||||
],
|
||||
)
|
||||
def test_delete_partial_invalid_id(self, get_http_api_auth, add_documents_func, payload):
|
||||
dataset_id, document_ids = add_documents_func
|
||||
if callable(payload):
|
||||
payload = payload(document_ids)
|
||||
res = delete_documnets(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 102
|
||||
assert res["message"] == "Documents not found: ['invalid_id']"
|
||||
|
||||
res = list_documnets(get_http_api_auth, dataset_id)
|
||||
assert len(res["data"]["docs"]) == 0
|
||||
assert res["data"]["total"] == 0
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_repeated_deletion(self, get_http_api_auth, add_documents_func):
|
||||
dataset_id, document_ids = add_documents_func
|
||||
res = delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids})
|
||||
assert res["code"] == 0
|
||||
|
||||
res = delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids})
|
||||
assert res["code"] == 102
|
||||
assert "Documents not found" in res["message"]
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_duplicate_deletion(self, get_http_api_auth, add_documents_func):
|
||||
dataset_id, document_ids = add_documents_func
|
||||
res = delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids + document_ids})
|
||||
assert res["code"] == 0
|
||||
assert "Duplicate document ids" in res["data"]["errors"][0]
|
||||
assert res["data"]["success_count"] == 3
|
||||
|
||||
res = list_documnets(get_http_api_auth, dataset_id)
|
||||
assert len(res["data"]["docs"]) == 0
|
||||
assert res["data"]["total"] == 0
|
||||
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_deletion(get_http_api_auth, add_dataset, tmp_path):
|
||||
documnets_num = 100
|
||||
dataset_id = add_dataset
|
||||
document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, documnets_num, tmp_path)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
delete_documnets,
|
||||
get_http_api_auth,
|
||||
dataset_id,
|
||||
{"ids": document_ids[i : i + 1]},
|
||||
)
|
||||
for i in range(documnets_num)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_delete_1k(get_http_api_auth, add_dataset, tmp_path):
|
||||
documnets_num = 1_000
|
||||
dataset_id = add_dataset
|
||||
document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, documnets_num, tmp_path)
|
||||
res = list_documnets(get_http_api_auth, dataset_id)
|
||||
assert res["data"]["total"] == documnets_num
|
||||
|
||||
res = delete_documnets(get_http_api_auth, dataset_id, {"ids": document_ids})
|
||||
assert res["code"] == 0
|
||||
|
||||
res = list_documnets(get_http_api_auth, dataset_id)
|
||||
assert res["data"]["total"] == 0
|
||||
@ -1,178 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import json
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, bulk_upload_documents, download_document, upload_documnets
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import compare_by_hash
|
||||
from requests import codes
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, tmp_path, auth, expected_code, expected_message):
|
||||
res = download_document(auth, "dataset_id", "document_id", tmp_path / "ragflow_tes.txt")
|
||||
assert res.status_code == codes.ok
|
||||
with (tmp_path / "ragflow_tes.txt").open("r") as f:
|
||||
response_json = json.load(f)
|
||||
assert response_json["code"] == expected_code
|
||||
assert response_json["message"] == expected_message
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"generate_test_files",
|
||||
[
|
||||
"docx",
|
||||
"excel",
|
||||
"ppt",
|
||||
"image",
|
||||
"pdf",
|
||||
"txt",
|
||||
"md",
|
||||
"json",
|
||||
"eml",
|
||||
"html",
|
||||
],
|
||||
indirect=True,
|
||||
)
|
||||
def test_file_type_validation(get_http_api_auth, add_dataset, generate_test_files, request):
|
||||
dataset_id = add_dataset
|
||||
fp = generate_test_files[request.node.callspec.params["generate_test_files"]]
|
||||
res = upload_documnets(get_http_api_auth, dataset_id, [fp])
|
||||
document_id = res["data"][0]["id"]
|
||||
|
||||
res = download_document(
|
||||
get_http_api_auth,
|
||||
dataset_id,
|
||||
document_id,
|
||||
fp.with_stem("ragflow_test_download"),
|
||||
)
|
||||
assert res.status_code == codes.ok
|
||||
assert compare_by_hash(
|
||||
fp,
|
||||
fp.with_stem("ragflow_test_download"),
|
||||
)
|
||||
|
||||
|
||||
class TestDocumentDownload:
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"document_id, expected_code, expected_message",
|
||||
[
|
||||
(
|
||||
"invalid_document_id",
|
||||
102,
|
||||
"The dataset not own the document invalid_document_id.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_document_id(self, get_http_api_auth, add_documents, tmp_path, document_id, expected_code, expected_message):
|
||||
dataset_id, _ = add_documents
|
||||
res = download_document(
|
||||
get_http_api_auth,
|
||||
dataset_id,
|
||||
document_id,
|
||||
tmp_path / "ragflow_test_download_1.txt",
|
||||
)
|
||||
assert res.status_code == codes.ok
|
||||
with (tmp_path / "ragflow_test_download_1.txt").open("r") as f:
|
||||
response_json = json.load(f)
|
||||
assert response_json["code"] == expected_code
|
||||
assert response_json["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<NotFound '404: Not Found'>"),
|
||||
(
|
||||
"invalid_dataset_id",
|
||||
102,
|
||||
"You do not own the dataset invalid_dataset_id.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_dataset_id(self, get_http_api_auth, add_documents, tmp_path, dataset_id, expected_code, expected_message):
|
||||
_, document_ids = add_documents
|
||||
res = download_document(
|
||||
get_http_api_auth,
|
||||
dataset_id,
|
||||
document_ids[0],
|
||||
tmp_path / "ragflow_test_download_1.txt",
|
||||
)
|
||||
assert res.status_code == codes.ok
|
||||
with (tmp_path / "ragflow_test_download_1.txt").open("r") as f:
|
||||
response_json = json.load(f)
|
||||
assert response_json["code"] == expected_code
|
||||
assert response_json["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_same_file_repeat(self, get_http_api_auth, add_documents, tmp_path, ragflow_tmp_dir):
|
||||
num = 5
|
||||
dataset_id, document_ids = add_documents
|
||||
for i in range(num):
|
||||
res = download_document(
|
||||
get_http_api_auth,
|
||||
dataset_id,
|
||||
document_ids[0],
|
||||
tmp_path / f"ragflow_test_download_{i}.txt",
|
||||
)
|
||||
assert res.status_code == codes.ok
|
||||
assert compare_by_hash(
|
||||
ragflow_tmp_dir / "ragflow_test_upload_0.txt",
|
||||
tmp_path / f"ragflow_test_download_{i}.txt",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_download(get_http_api_auth, add_dataset, tmp_path):
|
||||
document_count = 20
|
||||
dataset_id = add_dataset
|
||||
document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_count, tmp_path)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
download_document,
|
||||
get_http_api_auth,
|
||||
dataset_id,
|
||||
document_ids[i],
|
||||
tmp_path / f"ragflow_test_download_{i}.txt",
|
||||
)
|
||||
for i in range(document_count)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r.status_code == codes.ok for r in responses)
|
||||
for i in range(document_count):
|
||||
assert compare_by_hash(
|
||||
tmp_path / f"ragflow_test_upload_{i}.txt",
|
||||
tmp_path / f"ragflow_test_download_{i}.txt",
|
||||
)
|
||||
@ -1,357 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, list_documnets
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import is_sorted
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = list_documnets(auth, "dataset_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestDocumentsList:
|
||||
@pytest.mark.p1
|
||||
def test_default(self, get_http_api_auth, add_documents):
|
||||
dataset_id, _ = add_documents
|
||||
res = list_documnets(get_http_api_auth, dataset_id)
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]["docs"]) == 5
|
||||
assert res["data"]["total"] == 5
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"),
|
||||
(
|
||||
"invalid_dataset_id",
|
||||
102,
|
||||
"You don't own the dataset invalid_dataset_id. ",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_dataset_id(self, get_http_api_auth, dataset_id, expected_code, expected_message):
|
||||
res = list_documnets(get_http_api_auth, dataset_id)
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
({"page": None, "page_size": 2}, 0, 2, ""),
|
||||
({"page": 0, "page_size": 2}, 0, 2, ""),
|
||||
({"page": 2, "page_size": 2}, 0, 2, ""),
|
||||
({"page": 3, "page_size": 2}, 0, 1, ""),
|
||||
({"page": "3", "page_size": 2}, 0, 1, ""),
|
||||
pytest.param(
|
||||
{"page": -1, "page_size": 2},
|
||||
100,
|
||||
0,
|
||||
"1064",
|
||||
marks=pytest.mark.skip(reason="issues/5851"),
|
||||
),
|
||||
pytest.param(
|
||||
{"page": "a", "page_size": 2},
|
||||
100,
|
||||
0,
|
||||
"""ValueError("invalid literal for int() with base 10: \'a\'")""",
|
||||
marks=pytest.mark.skip(reason="issues/5851"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_page(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_documents,
|
||||
params,
|
||||
expected_code,
|
||||
expected_page_size,
|
||||
expected_message,
|
||||
):
|
||||
dataset_id, _ = add_documents
|
||||
res = list_documnets(get_http_api_auth, dataset_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]["docs"]) == expected_page_size
|
||||
assert res["data"]["total"] == 5
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
({"page_size": None}, 0, 5, ""),
|
||||
({"page_size": 0}, 0, 0, ""),
|
||||
({"page_size": 1}, 0, 1, ""),
|
||||
({"page_size": 6}, 0, 5, ""),
|
||||
({"page_size": "1"}, 0, 1, ""),
|
||||
pytest.param(
|
||||
{"page_size": -1},
|
||||
100,
|
||||
0,
|
||||
"1064",
|
||||
marks=pytest.mark.skip(reason="issues/5851"),
|
||||
),
|
||||
pytest.param(
|
||||
{"page_size": "a"},
|
||||
100,
|
||||
0,
|
||||
"""ValueError("invalid literal for int() with base 10: \'a\'")""",
|
||||
marks=pytest.mark.skip(reason="issues/5851"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_page_size(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_documents,
|
||||
params,
|
||||
expected_code,
|
||||
expected_page_size,
|
||||
expected_message,
|
||||
):
|
||||
dataset_id, _ = add_documents
|
||||
res = list_documnets(get_http_api_auth, dataset_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]["docs"]) == expected_page_size
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, assertions, expected_message",
|
||||
[
|
||||
({"orderby": None}, 0, lambda r: (is_sorted(r["data"]["docs"], "create_time", True)), ""),
|
||||
({"orderby": "create_time"}, 0, lambda r: (is_sorted(r["data"]["docs"], "create_time", True)), ""),
|
||||
({"orderby": "update_time"}, 0, lambda r: (is_sorted(r["data"]["docs"], "update_time", True)), ""),
|
||||
pytest.param({"orderby": "name", "desc": "False"}, 0, lambda r: (is_sorted(r["data"]["docs"], "name", False)), "", marks=pytest.mark.skip(reason="issues/5851")),
|
||||
pytest.param({"orderby": "unknown"}, 102, 0, "orderby should be create_time or update_time", marks=pytest.mark.skip(reason="issues/5851")),
|
||||
],
|
||||
)
|
||||
def test_orderby(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_documents,
|
||||
params,
|
||||
expected_code,
|
||||
assertions,
|
||||
expected_message,
|
||||
):
|
||||
dataset_id, _ = add_documents
|
||||
res = list_documnets(get_http_api_auth, dataset_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if callable(assertions):
|
||||
assert assertions(res)
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, assertions, expected_message",
|
||||
[
|
||||
({"desc": None}, 0, lambda r: (is_sorted(r["data"]["docs"], "create_time", True)), ""),
|
||||
({"desc": "true"}, 0, lambda r: (is_sorted(r["data"]["docs"], "create_time", True)), ""),
|
||||
({"desc": "True"}, 0, lambda r: (is_sorted(r["data"]["docs"], "create_time", True)), ""),
|
||||
({"desc": True}, 0, lambda r: (is_sorted(r["data"]["docs"], "create_time", True)), ""),
|
||||
pytest.param({"desc": "false"}, 0, lambda r: (is_sorted(r["data"]["docs"], "create_time", False)), "", marks=pytest.mark.skip(reason="issues/5851")),
|
||||
({"desc": "False"}, 0, lambda r: (is_sorted(r["data"]["docs"], "create_time", False)), ""),
|
||||
({"desc": False}, 0, lambda r: (is_sorted(r["data"]["docs"], "create_time", False)), ""),
|
||||
({"desc": "False", "orderby": "update_time"}, 0, lambda r: (is_sorted(r["data"]["docs"], "update_time", False)), ""),
|
||||
pytest.param({"desc": "unknown"}, 102, 0, "desc should be true or false", marks=pytest.mark.skip(reason="issues/5851")),
|
||||
],
|
||||
)
|
||||
def test_desc(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_documents,
|
||||
params,
|
||||
expected_code,
|
||||
assertions,
|
||||
expected_message,
|
||||
):
|
||||
dataset_id, _ = add_documents
|
||||
res = list_documnets(get_http_api_auth, dataset_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if callable(assertions):
|
||||
assert assertions(res)
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_num",
|
||||
[
|
||||
({"keywords": None}, 5),
|
||||
({"keywords": ""}, 5),
|
||||
({"keywords": "0"}, 1),
|
||||
({"keywords": "ragflow_test_upload"}, 5),
|
||||
({"keywords": "unknown"}, 0),
|
||||
],
|
||||
)
|
||||
def test_keywords(self, get_http_api_auth, add_documents, params, expected_num):
|
||||
dataset_id, _ = add_documents
|
||||
res = list_documnets(get_http_api_auth, dataset_id, params=params)
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]["docs"]) == expected_num
|
||||
assert res["data"]["total"] == expected_num
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, expected_num, expected_message",
|
||||
[
|
||||
({"name": None}, 0, 5, ""),
|
||||
({"name": ""}, 0, 5, ""),
|
||||
({"name": "ragflow_test_upload_0.txt"}, 0, 1, ""),
|
||||
(
|
||||
{"name": "unknown.txt"},
|
||||
102,
|
||||
0,
|
||||
"You don't own the document unknown.txt.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_name(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_documents,
|
||||
params,
|
||||
expected_code,
|
||||
expected_num,
|
||||
expected_message,
|
||||
):
|
||||
dataset_id, _ = add_documents
|
||||
res = list_documnets(get_http_api_auth, dataset_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if params["name"] in [None, ""]:
|
||||
assert len(res["data"]["docs"]) == expected_num
|
||||
else:
|
||||
assert res["data"]["docs"][0]["name"] == params["name"]
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"document_id, expected_code, expected_num, expected_message",
|
||||
[
|
||||
(None, 0, 5, ""),
|
||||
("", 0, 5, ""),
|
||||
(lambda r: r[0], 0, 1, ""),
|
||||
("unknown.txt", 102, 0, "You don't own the document unknown.txt."),
|
||||
],
|
||||
)
|
||||
def test_id(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_documents,
|
||||
document_id,
|
||||
expected_code,
|
||||
expected_num,
|
||||
expected_message,
|
||||
):
|
||||
dataset_id, document_ids = add_documents
|
||||
if callable(document_id):
|
||||
params = {"id": document_id(document_ids)}
|
||||
else:
|
||||
params = {"id": document_id}
|
||||
res = list_documnets(get_http_api_auth, dataset_id, params=params)
|
||||
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if params["id"] in [None, ""]:
|
||||
assert len(res["data"]["docs"]) == expected_num
|
||||
else:
|
||||
assert res["data"]["docs"][0]["id"] == params["id"]
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"document_id, name, expected_code, expected_num, expected_message",
|
||||
[
|
||||
(lambda r: r[0], "ragflow_test_upload_0.txt", 0, 1, ""),
|
||||
(lambda r: r[0], "ragflow_test_upload_1.txt", 0, 0, ""),
|
||||
(lambda r: r[0], "unknown", 102, 0, "You don't own the document unknown."),
|
||||
(
|
||||
"id",
|
||||
"ragflow_test_upload_0.txt",
|
||||
102,
|
||||
0,
|
||||
"You don't own the document id.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_name_and_id(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_documents,
|
||||
document_id,
|
||||
name,
|
||||
expected_code,
|
||||
expected_num,
|
||||
expected_message,
|
||||
):
|
||||
dataset_id, document_ids = add_documents
|
||||
if callable(document_id):
|
||||
params = {"id": document_id(document_ids), "name": name}
|
||||
else:
|
||||
params = {"id": document_id, "name": name}
|
||||
|
||||
res = list_documnets(get_http_api_auth, dataset_id, params=params)
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]["docs"]) == expected_num
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_list(self, get_http_api_auth, add_documents):
|
||||
dataset_id, _ = add_documents
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(list_documnets, get_http_api_auth, dataset_id) for i in range(100)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_invalid_params(self, get_http_api_auth, add_documents):
|
||||
dataset_id, _ = add_documents
|
||||
params = {"a": "b"}
|
||||
res = list_documnets(get_http_api_auth, dataset_id, params=params)
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]["docs"]) == 5
|
||||
@ -1,217 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, bulk_upload_documents, list_documnets, parse_documnets
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import wait_for
|
||||
|
||||
|
||||
@wait_for(30, 1, "Document parsing timeout")
|
||||
def condition(_auth, _dataset_id, _document_ids=None):
|
||||
res = list_documnets(_auth, _dataset_id)
|
||||
target_docs = res["data"]["docs"]
|
||||
|
||||
if _document_ids is None:
|
||||
for doc in target_docs:
|
||||
if doc["run"] != "DONE":
|
||||
return False
|
||||
return True
|
||||
|
||||
target_ids = set(_document_ids)
|
||||
for doc in target_docs:
|
||||
if doc["id"] in target_ids:
|
||||
if doc.get("run") != "DONE":
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def validate_document_details(auth, dataset_id, document_ids):
|
||||
for document_id in document_ids:
|
||||
res = list_documnets(auth, dataset_id, params={"id": document_id})
|
||||
doc = res["data"]["docs"][0]
|
||||
assert doc["run"] == "DONE"
|
||||
assert len(doc["process_begin_at"]) > 0
|
||||
assert doc["process_duration"] > 0
|
||||
assert doc["progress"] > 0
|
||||
assert "Task done" in doc["progress_msg"]
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = parse_documnets(auth, "dataset_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestDocumentsParse:
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
pytest.param(None, 102, """AttributeError("\'NoneType\' object has no attribute \'get\'")""", marks=pytest.mark.skip),
|
||||
pytest.param({"document_ids": []}, 102, "`document_ids` is required", marks=pytest.mark.p1),
|
||||
pytest.param({"document_ids": ["invalid_id"]}, 102, "Documents not found: ['invalid_id']", marks=pytest.mark.p3),
|
||||
pytest.param({"document_ids": ["\n!?。;!?\"'"]}, 102, """Documents not found: [\'\\n!?。;!?"\\\'\']""", marks=pytest.mark.p3),
|
||||
pytest.param("not json", 102, "AttributeError(\"'str' object has no attribute 'get'\")", marks=pytest.mark.skip),
|
||||
pytest.param(lambda r: {"document_ids": r[:1]}, 0, "", marks=pytest.mark.p1),
|
||||
pytest.param(lambda r: {"document_ids": r}, 0, "", marks=pytest.mark.p1),
|
||||
],
|
||||
)
|
||||
def test_basic_scenarios(self, get_http_api_auth, add_documents_func, payload, expected_code, expected_message):
|
||||
dataset_id, document_ids = add_documents_func
|
||||
if callable(payload):
|
||||
payload = payload(document_ids)
|
||||
res = parse_documnets(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code != 0:
|
||||
assert res["message"] == expected_message
|
||||
if expected_code == 0:
|
||||
condition(get_http_api_auth, dataset_id, payload["document_ids"])
|
||||
validate_document_details(get_http_api_auth, dataset_id, payload["document_ids"])
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"),
|
||||
(
|
||||
"invalid_dataset_id",
|
||||
102,
|
||||
"You don't own the dataset invalid_dataset_id.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_dataset_id(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_documents_func,
|
||||
dataset_id,
|
||||
expected_code,
|
||||
expected_message,
|
||||
):
|
||||
_, document_ids = add_documents_func
|
||||
res = parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"payload",
|
||||
[
|
||||
pytest.param(lambda r: {"document_ids": ["invalid_id"] + r}, marks=pytest.mark.p3),
|
||||
pytest.param(lambda r: {"document_ids": r[:1] + ["invalid_id"] + r[1:3]}, marks=pytest.mark.p1),
|
||||
pytest.param(lambda r: {"document_ids": r + ["invalid_id"]}, marks=pytest.mark.p3),
|
||||
],
|
||||
)
|
||||
def test_parse_partial_invalid_document_id(self, get_http_api_auth, add_documents_func, payload):
|
||||
dataset_id, document_ids = add_documents_func
|
||||
if callable(payload):
|
||||
payload = payload(document_ids)
|
||||
res = parse_documnets(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 102
|
||||
assert res["message"] == "Documents not found: ['invalid_id']"
|
||||
|
||||
condition(get_http_api_auth, dataset_id)
|
||||
|
||||
validate_document_details(get_http_api_auth, dataset_id, document_ids)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_repeated_parse(self, get_http_api_auth, add_documents_func):
|
||||
dataset_id, document_ids = add_documents_func
|
||||
res = parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
assert res["code"] == 0
|
||||
|
||||
condition(get_http_api_auth, dataset_id)
|
||||
|
||||
res = parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
assert res["code"] == 0
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_duplicate_parse(self, get_http_api_auth, add_documents_func):
|
||||
dataset_id, document_ids = add_documents_func
|
||||
res = parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids})
|
||||
assert res["code"] == 0
|
||||
assert "Duplicate document ids" in res["data"]["errors"][0]
|
||||
assert res["data"]["success_count"] == 3
|
||||
|
||||
condition(get_http_api_auth, dataset_id)
|
||||
|
||||
validate_document_details(get_http_api_auth, dataset_id, document_ids)
|
||||
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_parse_100_files(get_http_api_auth, add_dataset_func, tmp_path):
|
||||
@wait_for(100, 1, "Document parsing timeout")
|
||||
def condition(_auth, _dataset_id, _document_num):
|
||||
res = list_documnets(_auth, _dataset_id, {"page_size": _document_num})
|
||||
for doc in res["data"]["docs"]:
|
||||
if doc["run"] != "DONE":
|
||||
return False
|
||||
return True
|
||||
|
||||
document_num = 100
|
||||
dataset_id = add_dataset_func
|
||||
document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path)
|
||||
res = parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
assert res["code"] == 0
|
||||
|
||||
condition(get_http_api_auth, dataset_id, document_num)
|
||||
|
||||
validate_document_details(get_http_api_auth, dataset_id, document_ids)
|
||||
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_parse(get_http_api_auth, add_dataset_func, tmp_path):
|
||||
@wait_for(120, 1, "Document parsing timeout")
|
||||
def condition(_auth, _dataset_id, _document_num):
|
||||
res = list_documnets(_auth, _dataset_id, {"page_size": _document_num})
|
||||
for doc in res["data"]["docs"]:
|
||||
if doc["run"] != "DONE":
|
||||
return False
|
||||
return True
|
||||
|
||||
document_num = 100
|
||||
dataset_id = add_dataset_func
|
||||
document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
parse_documnets,
|
||||
get_http_api_auth,
|
||||
dataset_id,
|
||||
{"document_ids": document_ids[i : i + 1]},
|
||||
)
|
||||
for i in range(document_num)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
|
||||
condition(get_http_api_auth, dataset_id, document_num)
|
||||
|
||||
validate_document_details(get_http_api_auth, dataset_id, document_ids)
|
||||
@ -1,202 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from time import sleep
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, bulk_upload_documents, list_documnets, parse_documnets, stop_parse_documnets
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import wait_for
|
||||
|
||||
|
||||
def validate_document_parse_done(auth, dataset_id, document_ids):
|
||||
for document_id in document_ids:
|
||||
res = list_documnets(auth, dataset_id, params={"id": document_id})
|
||||
doc = res["data"]["docs"][0]
|
||||
assert doc["run"] == "DONE"
|
||||
assert len(doc["process_begin_at"]) > 0
|
||||
assert doc["process_duration"] > 0
|
||||
assert doc["progress"] > 0
|
||||
assert "Task done" in doc["progress_msg"]
|
||||
|
||||
|
||||
def validate_document_parse_cancel(auth, dataset_id, document_ids):
|
||||
for document_id in document_ids:
|
||||
res = list_documnets(auth, dataset_id, params={"id": document_id})
|
||||
doc = res["data"]["docs"][0]
|
||||
assert doc["run"] == "CANCEL"
|
||||
assert len(doc["process_begin_at"]) > 0
|
||||
assert doc["progress"] == 0.0
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = stop_parse_documnets(auth, "dataset_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
@pytest.mark.skip
|
||||
class TestDocumentsParseStop:
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
pytest.param(None, 102, """AttributeError("\'NoneType\' object has no attribute \'get\'")""", marks=pytest.mark.skip),
|
||||
pytest.param({"document_ids": []}, 102, "`document_ids` is required", marks=pytest.mark.p1),
|
||||
pytest.param({"document_ids": ["invalid_id"]}, 102, "You don't own the document invalid_id.", marks=pytest.mark.p3),
|
||||
pytest.param({"document_ids": ["\n!?。;!?\"'"]}, 102, """You don\'t own the document \n!?。;!?"\'.""", marks=pytest.mark.p3),
|
||||
pytest.param("not json", 102, "AttributeError(\"'str' object has no attribute 'get'\")", marks=pytest.mark.skip),
|
||||
pytest.param(lambda r: {"document_ids": r[:1]}, 0, "", marks=pytest.mark.p1),
|
||||
pytest.param(lambda r: {"document_ids": r}, 0, "", marks=pytest.mark.p1),
|
||||
],
|
||||
)
|
||||
def test_basic_scenarios(self, get_http_api_auth, add_documents_func, payload, expected_code, expected_message):
|
||||
@wait_for(10, 1, "Document parsing timeout")
|
||||
def condition(_auth, _dataset_id, _document_ids):
|
||||
for _document_id in _document_ids:
|
||||
res = list_documnets(_auth, _dataset_id, {"id": _document_id})
|
||||
if res["data"]["docs"][0]["run"] != "DONE":
|
||||
return False
|
||||
return True
|
||||
|
||||
dataset_id, document_ids = add_documents_func
|
||||
parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
|
||||
if callable(payload):
|
||||
payload = payload(document_ids)
|
||||
|
||||
res = stop_parse_documnets(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
completed_document_ids = list(set(document_ids) - set(payload["document_ids"]))
|
||||
condition(get_http_api_auth, dataset_id, completed_document_ids)
|
||||
validate_document_parse_cancel(get_http_api_auth, dataset_id, payload["document_ids"])
|
||||
validate_document_parse_done(get_http_api_auth, dataset_id, completed_document_ids)
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_dataset_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"),
|
||||
(
|
||||
"invalid_dataset_id",
|
||||
102,
|
||||
"You don't own the dataset invalid_dataset_id.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_dataset_id(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_documents_func,
|
||||
invalid_dataset_id,
|
||||
expected_code,
|
||||
expected_message,
|
||||
):
|
||||
dataset_id, document_ids = add_documents_func
|
||||
parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
res = stop_parse_documnets(get_http_api_auth, invalid_dataset_id, {"document_ids": document_ids})
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.parametrize(
|
||||
"payload",
|
||||
[
|
||||
lambda r: {"document_ids": ["invalid_id"] + r},
|
||||
lambda r: {"document_ids": r[:1] + ["invalid_id"] + r[1:3]},
|
||||
lambda r: {"document_ids": r + ["invalid_id"]},
|
||||
],
|
||||
)
|
||||
def test_stop_parse_partial_invalid_document_id(self, get_http_api_auth, add_documents_func, payload):
|
||||
dataset_id, document_ids = add_documents_func
|
||||
parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
|
||||
if callable(payload):
|
||||
payload = payload(document_ids)
|
||||
res = stop_parse_documnets(get_http_api_auth, dataset_id, payload)
|
||||
assert res["code"] == 102
|
||||
assert res["message"] == "You don't own the document invalid_id."
|
||||
|
||||
validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_repeated_stop_parse(self, get_http_api_auth, add_documents_func):
|
||||
dataset_id, document_ids = add_documents_func
|
||||
parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
res = stop_parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
assert res["code"] == 0
|
||||
|
||||
res = stop_parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
assert res["code"] == 102
|
||||
assert res["message"] == "Can't stop parsing document with progress at 0 or 1"
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_duplicate_stop_parse(self, get_http_api_auth, add_documents_func):
|
||||
dataset_id, document_ids = add_documents_func
|
||||
parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
res = stop_parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids + document_ids})
|
||||
assert res["code"] == 0
|
||||
assert res["data"]["success_count"] == 3
|
||||
assert f"Duplicate document ids: {document_ids[0]}" in res["data"]["errors"]
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="unstable")
|
||||
def test_stop_parse_100_files(get_http_api_auth, add_dataset_func, tmp_path):
|
||||
document_num = 100
|
||||
dataset_id = add_dataset_func
|
||||
document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path)
|
||||
parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
sleep(1)
|
||||
res = stop_parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
assert res["code"] == 0
|
||||
validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="unstable")
|
||||
def test_concurrent_parse(get_http_api_auth, add_dataset_func, tmp_path):
|
||||
document_num = 50
|
||||
dataset_id = add_dataset_func
|
||||
document_ids = bulk_upload_documents(get_http_api_auth, dataset_id, document_num, tmp_path)
|
||||
parse_documnets(get_http_api_auth, dataset_id, {"document_ids": document_ids})
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
stop_parse_documnets,
|
||||
get_http_api_auth,
|
||||
dataset_id,
|
||||
{"document_ids": document_ids[i : i + 1]},
|
||||
)
|
||||
for i in range(document_num)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
validate_document_parse_cancel(get_http_api_auth, dataset_id, document_ids)
|
||||
@ -1,547 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
import pytest
|
||||
from common import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN, list_documnets, update_documnet
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = update_documnet(auth, "dataset_id", "document_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestDocumentsUpdated:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"name, expected_code, expected_message",
|
||||
[
|
||||
("new_name.txt", 0, ""),
|
||||
(
|
||||
f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt",
|
||||
101,
|
||||
"The name should be less than 128 bytes.",
|
||||
),
|
||||
(
|
||||
0,
|
||||
100,
|
||||
"""AttributeError("\'int\' object has no attribute \'encode\'")""",
|
||||
),
|
||||
(
|
||||
None,
|
||||
100,
|
||||
"""AttributeError("\'NoneType\' object has no attribute \'encode\'")""",
|
||||
),
|
||||
(
|
||||
"",
|
||||
101,
|
||||
"The extension of file can't be changed",
|
||||
),
|
||||
(
|
||||
"ragflow_test_upload_0",
|
||||
101,
|
||||
"The extension of file can't be changed",
|
||||
),
|
||||
(
|
||||
"ragflow_test_upload_1.txt",
|
||||
102,
|
||||
"Duplicated document name in the same dataset.",
|
||||
),
|
||||
(
|
||||
"RAGFLOW_TEST_UPLOAD_1.TXT",
|
||||
0,
|
||||
"",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_name(self, get_http_api_auth, add_documents, name, expected_code, expected_message):
|
||||
dataset_id, document_ids = add_documents
|
||||
res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], {"name": name})
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
res = list_documnets(get_http_api_auth, dataset_id, {"id": document_ids[0]})
|
||||
assert res["data"]["docs"][0]["name"] == name
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"document_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"),
|
||||
(
|
||||
"invalid_document_id",
|
||||
102,
|
||||
"The dataset doesn't own the document.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_document_id(self, get_http_api_auth, add_documents, document_id, expected_code, expected_message):
|
||||
dataset_id, _ = add_documents
|
||||
res = update_documnet(get_http_api_auth, dataset_id, document_id, {"name": "new_name.txt"})
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<NotFound '404: Not Found'>"),
|
||||
(
|
||||
"invalid_dataset_id",
|
||||
102,
|
||||
"You don't own the dataset.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_dataset_id(self, get_http_api_auth, add_documents, dataset_id, expected_code, expected_message):
|
||||
_, document_ids = add_documents
|
||||
res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], {"name": "new_name.txt"})
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"meta_fields, expected_code, expected_message",
|
||||
[({"test": "test"}, 0, ""), ("test", 102, "meta_fields must be a dictionary")],
|
||||
)
|
||||
def test_meta_fields(self, get_http_api_auth, add_documents, meta_fields, expected_code, expected_message):
|
||||
dataset_id, document_ids = add_documents
|
||||
res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], {"meta_fields": meta_fields})
|
||||
if expected_code == 0:
|
||||
res = list_documnets(get_http_api_auth, dataset_id, {"id": document_ids[0]})
|
||||
assert res["data"]["docs"][0]["meta_fields"] == meta_fields
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"chunk_method, expected_code, expected_message",
|
||||
[
|
||||
("naive", 0, ""),
|
||||
("manual", 0, ""),
|
||||
("qa", 0, ""),
|
||||
("table", 0, ""),
|
||||
("paper", 0, ""),
|
||||
("book", 0, ""),
|
||||
("laws", 0, ""),
|
||||
("presentation", 0, ""),
|
||||
("picture", 0, ""),
|
||||
("one", 0, ""),
|
||||
("knowledge_graph", 0, ""),
|
||||
("email", 0, ""),
|
||||
("tag", 0, ""),
|
||||
("", 102, "`chunk_method` doesn't exist"),
|
||||
(
|
||||
"other_chunk_method",
|
||||
102,
|
||||
"`chunk_method` other_chunk_method doesn't exist",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_chunk_method(self, get_http_api_auth, add_documents, chunk_method, expected_code, expected_message):
|
||||
dataset_id, document_ids = add_documents
|
||||
res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], {"chunk_method": chunk_method})
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
res = list_documnets(get_http_api_auth, dataset_id, {"id": document_ids[0]})
|
||||
if chunk_method == "":
|
||||
assert res["data"]["docs"][0]["chunk_method"] == "naive"
|
||||
else:
|
||||
assert res["data"]["docs"][0]["chunk_method"] == chunk_method
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
({"chunk_count": 1}, 102, "Can't change `chunk_count`."),
|
||||
pytest.param(
|
||||
{"create_date": "Fri, 14 Mar 2025 16:53:42 GMT"},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param(
|
||||
{"create_time": 1},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param(
|
||||
{"created_by": "ragflow_test"},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param(
|
||||
{"dataset_id": "ragflow_test"},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param(
|
||||
{"id": "ragflow_test"},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param(
|
||||
{"location": "ragflow_test.txt"},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param(
|
||||
{"process_begin_at": 1},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param(
|
||||
{"process_duration": 1.0},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param({"progress": 1.0}, 102, "Can't change `progress`."),
|
||||
pytest.param(
|
||||
{"progress_msg": "ragflow_test"},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param(
|
||||
{"run": "ragflow_test"},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param(
|
||||
{"size": 1},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param(
|
||||
{"source_type": "ragflow_test"},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param(
|
||||
{"thumbnail": "ragflow_test"},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
({"token_count": 1}, 102, "Can't change `token_count`."),
|
||||
pytest.param(
|
||||
{"type": "ragflow_test"},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param(
|
||||
{"update_date": "Fri, 14 Mar 2025 16:33:17 GMT"},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
pytest.param(
|
||||
{"update_time": 1},
|
||||
102,
|
||||
"The input parameters are invalid.",
|
||||
marks=pytest.mark.skip(reason="issues/6104"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_field(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_documents,
|
||||
payload,
|
||||
expected_code,
|
||||
expected_message,
|
||||
):
|
||||
dataset_id, document_ids = add_documents
|
||||
res = update_documnet(get_http_api_auth, dataset_id, document_ids[0], payload)
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestUpdateDocumentParserConfig:
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"chunk_method, parser_config, expected_code, expected_message",
|
||||
[
|
||||
("naive", {}, 0, ""),
|
||||
(
|
||||
"naive",
|
||||
{
|
||||
"chunk_token_num": 128,
|
||||
"layout_recognize": "DeepDOC",
|
||||
"html4excel": False,
|
||||
"delimiter": r"\n",
|
||||
"task_page_size": 12,
|
||||
"raptor": {"use_raptor": False},
|
||||
},
|
||||
0,
|
||||
"",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"chunk_token_num": -1},
|
||||
100,
|
||||
"AssertionError('chunk_token_num should be in range from 1 to 100000000')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"chunk_token_num": 0},
|
||||
100,
|
||||
"AssertionError('chunk_token_num should be in range from 1 to 100000000')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"chunk_token_num": 100000000},
|
||||
100,
|
||||
"AssertionError('chunk_token_num should be in range from 1 to 100000000')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"chunk_token_num": 3.14},
|
||||
102,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"chunk_token_num": "1024"},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
(
|
||||
"naive",
|
||||
{"layout_recognize": "DeepDOC"},
|
||||
0,
|
||||
"",
|
||||
),
|
||||
(
|
||||
"naive",
|
||||
{"layout_recognize": "Naive"},
|
||||
0,
|
||||
"",
|
||||
),
|
||||
("naive", {"html4excel": True}, 0, ""),
|
||||
("naive", {"html4excel": False}, 0, ""),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"html4excel": 1},
|
||||
100,
|
||||
"AssertionError('html4excel should be True or False')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
("naive", {"delimiter": ""}, 0, ""),
|
||||
("naive", {"delimiter": "`##`"}, 0, ""),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"delimiter": 1},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"task_page_size": -1},
|
||||
100,
|
||||
"AssertionError('task_page_size should be in range from 1 to 100000000')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"task_page_size": 0},
|
||||
100,
|
||||
"AssertionError('task_page_size should be in range from 1 to 100000000')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"task_page_size": 100000000},
|
||||
100,
|
||||
"AssertionError('task_page_size should be in range from 1 to 100000000')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"task_page_size": 3.14},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"task_page_size": "1024"},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
("naive", {"raptor": {"use_raptor": True}}, 0, ""),
|
||||
("naive", {"raptor": {"use_raptor": False}}, 0, ""),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"invalid_key": "invalid_value"},
|
||||
100,
|
||||
"""AssertionError("Abnormal \'parser_config\'. Invalid key: invalid_key")""",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_keywords": -1},
|
||||
100,
|
||||
"AssertionError('auto_keywords should be in range from 0 to 32')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_keywords": 32},
|
||||
100,
|
||||
"AssertionError('auto_keywords should be in range from 0 to 32')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_questions": 3.14},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_keywords": "1024"},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_questions": -1},
|
||||
100,
|
||||
"AssertionError('auto_questions should be in range from 0 to 10')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_questions": 10},
|
||||
100,
|
||||
"AssertionError('auto_questions should be in range from 0 to 10')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_questions": 3.14},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_questions": "1024"},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"topn_tags": -1},
|
||||
100,
|
||||
"AssertionError('topn_tags should be in range from 0 to 10')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"topn_tags": 10},
|
||||
100,
|
||||
"AssertionError('topn_tags should be in range from 0 to 10')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"topn_tags": 3.14},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"topn_tags": "1024"},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_parser_config(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_documents,
|
||||
chunk_method,
|
||||
parser_config,
|
||||
expected_code,
|
||||
expected_message,
|
||||
):
|
||||
dataset_id, document_ids = add_documents
|
||||
res = update_documnet(
|
||||
get_http_api_auth,
|
||||
dataset_id,
|
||||
document_ids[0],
|
||||
{"chunk_method": chunk_method, "parser_config": parser_config},
|
||||
)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
res = list_documnets(get_http_api_auth, dataset_id, {"id": document_ids[0]})
|
||||
if parser_config == {}:
|
||||
assert res["data"]["docs"][0]["parser_config"] == {
|
||||
"chunk_token_num": 128,
|
||||
"delimiter": r"\n",
|
||||
"html4excel": False,
|
||||
"layout_recognize": "DeepDOC",
|
||||
"raptor": {"use_raptor": False},
|
||||
}
|
||||
else:
|
||||
for k, v in parser_config.items():
|
||||
assert res["data"]["docs"][0]["parser_config"][k] == v
|
||||
if expected_code != 0 or expected_message:
|
||||
assert res["message"] == expected_message
|
||||
@ -1,218 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import string
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from common import DOCUMENT_NAME_LIMIT, FILE_API_URL, HOST_ADDRESS, INVALID_API_TOKEN, list_datasets, upload_documnets
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils.file_utils import create_txt_file
|
||||
from requests_toolbelt import MultipartEncoder
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.usefixtures("clear_datasets")
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = upload_documnets(auth, "dataset_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestDocumentsUpload:
|
||||
@pytest.mark.p1
|
||||
def test_valid_single_upload(self, get_http_api_auth, add_dataset_func, tmp_path):
|
||||
dataset_id = add_dataset_func
|
||||
fp = create_txt_file(tmp_path / "ragflow_test.txt")
|
||||
res = upload_documnets(get_http_api_auth, dataset_id, [fp])
|
||||
assert res["code"] == 0
|
||||
assert res["data"][0]["dataset_id"] == dataset_id
|
||||
assert res["data"][0]["name"] == fp.name
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"generate_test_files",
|
||||
[
|
||||
"docx",
|
||||
"excel",
|
||||
"ppt",
|
||||
"image",
|
||||
"pdf",
|
||||
"txt",
|
||||
"md",
|
||||
"json",
|
||||
"eml",
|
||||
"html",
|
||||
],
|
||||
indirect=True,
|
||||
)
|
||||
def test_file_type_validation(self, get_http_api_auth, add_dataset_func, generate_test_files, request):
|
||||
dataset_id = add_dataset_func
|
||||
fp = generate_test_files[request.node.callspec.params["generate_test_files"]]
|
||||
res = upload_documnets(get_http_api_auth, dataset_id, [fp])
|
||||
assert res["code"] == 0
|
||||
assert res["data"][0]["dataset_id"] == dataset_id
|
||||
assert res["data"][0]["name"] == fp.name
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"file_type",
|
||||
["exe", "unknown"],
|
||||
)
|
||||
def test_unsupported_file_type(self, get_http_api_auth, add_dataset_func, tmp_path, file_type):
|
||||
dataset_id = add_dataset_func
|
||||
fp = tmp_path / f"ragflow_test.{file_type}"
|
||||
fp.touch()
|
||||
res = upload_documnets(get_http_api_auth, dataset_id, [fp])
|
||||
assert res["code"] == 500
|
||||
assert res["message"] == f"ragflow_test.{file_type}: This type of file has not been supported yet!"
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_missing_file(self, get_http_api_auth, add_dataset_func):
|
||||
dataset_id = add_dataset_func
|
||||
res = upload_documnets(get_http_api_auth, dataset_id)
|
||||
assert res["code"] == 101
|
||||
assert res["message"] == "No file part!"
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_empty_file(self, get_http_api_auth, add_dataset_func, tmp_path):
|
||||
dataset_id = add_dataset_func
|
||||
fp = tmp_path / "empty.txt"
|
||||
fp.touch()
|
||||
|
||||
res = upload_documnets(get_http_api_auth, dataset_id, [fp])
|
||||
assert res["code"] == 0
|
||||
assert res["data"][0]["size"] == 0
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_filename_empty(self, get_http_api_auth, add_dataset_func, tmp_path):
|
||||
dataset_id = add_dataset_func
|
||||
fp = create_txt_file(tmp_path / "ragflow_test.txt")
|
||||
url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id)
|
||||
fields = (("file", ("", fp.open("rb"))),)
|
||||
m = MultipartEncoder(fields=fields)
|
||||
res = requests.post(
|
||||
url=url,
|
||||
headers={"Content-Type": m.content_type},
|
||||
auth=get_http_api_auth,
|
||||
data=m,
|
||||
)
|
||||
assert res.json()["code"] == 101
|
||||
assert res.json()["message"] == "No file selected!"
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_filename_exceeds_max_length(self, get_http_api_auth, add_dataset_func, tmp_path):
|
||||
dataset_id = add_dataset_func
|
||||
# filename_length = 129
|
||||
fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt")
|
||||
res = upload_documnets(get_http_api_auth, dataset_id, [fp])
|
||||
assert res["code"] == 101
|
||||
assert res["message"] == "File name should be less than 128 bytes."
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_invalid_dataset_id(self, get_http_api_auth, tmp_path):
|
||||
fp = create_txt_file(tmp_path / "ragflow_test.txt")
|
||||
res = upload_documnets(get_http_api_auth, "invalid_dataset_id", [fp])
|
||||
assert res["code"] == 100
|
||||
assert res["message"] == """LookupError("Can\'t find the dataset with ID invalid_dataset_id!")"""
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_duplicate_files(self, get_http_api_auth, add_dataset_func, tmp_path):
|
||||
dataset_id = add_dataset_func
|
||||
fp = create_txt_file(tmp_path / "ragflow_test.txt")
|
||||
res = upload_documnets(get_http_api_auth, dataset_id, [fp, fp])
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]) == 2
|
||||
for i in range(len(res["data"])):
|
||||
assert res["data"][i]["dataset_id"] == dataset_id
|
||||
expected_name = fp.name
|
||||
if i != 0:
|
||||
expected_name = f"{fp.stem}({i}){fp.suffix}"
|
||||
assert res["data"][i]["name"] == expected_name
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_same_file_repeat(self, get_http_api_auth, add_dataset_func, tmp_path):
|
||||
dataset_id = add_dataset_func
|
||||
fp = create_txt_file(tmp_path / "ragflow_test.txt")
|
||||
for i in range(10):
|
||||
res = upload_documnets(get_http_api_auth, dataset_id, [fp])
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]) == 1
|
||||
assert res["data"][0]["dataset_id"] == dataset_id
|
||||
expected_name = fp.name
|
||||
if i != 0:
|
||||
expected_name = f"{fp.stem}({i}){fp.suffix}"
|
||||
assert res["data"][0]["name"] == expected_name
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_filename_special_characters(self, get_http_api_auth, add_dataset_func, tmp_path):
|
||||
dataset_id = add_dataset_func
|
||||
illegal_chars = '<>:"/\\|?*'
|
||||
translation_table = str.maketrans({char: "_" for char in illegal_chars})
|
||||
safe_filename = string.punctuation.translate(translation_table)
|
||||
fp = tmp_path / f"{safe_filename}.txt"
|
||||
fp.write_text("Sample text content")
|
||||
|
||||
res = upload_documnets(get_http_api_auth, dataset_id, [fp])
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]) == 1
|
||||
assert res["data"][0]["dataset_id"] == dataset_id
|
||||
assert res["data"][0]["name"] == fp.name
|
||||
|
||||
@pytest.mark.p1
|
||||
def test_multiple_files(self, get_http_api_auth, add_dataset_func, tmp_path):
|
||||
dataset_id = add_dataset_func
|
||||
expected_document_count = 20
|
||||
fps = []
|
||||
for i in range(expected_document_count):
|
||||
fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt")
|
||||
fps.append(fp)
|
||||
res = upload_documnets(get_http_api_auth, dataset_id, fps)
|
||||
assert res["code"] == 0
|
||||
|
||||
res = list_datasets(get_http_api_auth, {"id": dataset_id})
|
||||
assert res["data"][0]["document_count"] == expected_document_count
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_upload(self, get_http_api_auth, add_dataset_func, tmp_path):
|
||||
dataset_id = add_dataset_func
|
||||
|
||||
expected_document_count = 20
|
||||
fps = []
|
||||
for i in range(expected_document_count):
|
||||
fp = create_txt_file(tmp_path / f"ragflow_test_{i}.txt")
|
||||
fps.append(fp)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(upload_documnets, get_http_api_auth, dataset_id, fps[i : i + 1]) for i in range(expected_document_count)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
|
||||
res = list_datasets(get_http_api_auth, {"id": dataset_id})
|
||||
assert res["data"][0]["document_count"] == expected_document_count
|
||||
@ -1,53 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import pytest
|
||||
from common import create_session_with_chat_assistant, delete_session_with_chat_assistants
|
||||
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def add_sessions_with_chat_assistant(request, get_http_api_auth, add_chat_assistants):
|
||||
_, _, chat_assistant_ids = add_chat_assistants
|
||||
|
||||
def cleanup():
|
||||
for chat_assistant_id in chat_assistant_ids:
|
||||
delete_session_with_chat_assistants(get_http_api_auth, chat_assistant_id)
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
session_ids = []
|
||||
for i in range(5):
|
||||
res = create_session_with_chat_assistant(get_http_api_auth, chat_assistant_ids[0], {"name": f"session_with_chat_assistant_{i}"})
|
||||
session_ids.append(res["data"]["id"])
|
||||
|
||||
return chat_assistant_ids[0], session_ids
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def add_sessions_with_chat_assistant_func(request, get_http_api_auth, add_chat_assistants):
|
||||
_, _, chat_assistant_ids = add_chat_assistants
|
||||
|
||||
def cleanup():
|
||||
for chat_assistant_id in chat_assistant_ids:
|
||||
delete_session_with_chat_assistants(get_http_api_auth, chat_assistant_id)
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
session_ids = []
|
||||
for i in range(5):
|
||||
res = create_session_with_chat_assistant(get_http_api_auth, chat_assistant_ids[0], {"name": f"session_with_chat_assistant_{i}"})
|
||||
session_ids.append(res["data"]["id"])
|
||||
|
||||
return chat_assistant_ids[0], session_ids
|
||||
@ -1,117 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, SESSION_WITH_CHAT_NAME_LIMIT, create_session_with_chat_assistant, delete_chat_assistants, list_session_with_chat_assistants
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = create_session_with_chat_assistant(auth, "chat_assistant_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("clear_session_with_chat_assistants")
|
||||
class TestSessionWithChatAssistantCreate:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
({"name": "valid_name"}, 0, ""),
|
||||
pytest.param({"name": "a" * (SESSION_WITH_CHAT_NAME_LIMIT + 1)}, 102, "", marks=pytest.mark.skip(reason="issues/")),
|
||||
pytest.param({"name": 1}, 100, "", marks=pytest.mark.skip(reason="issues/")),
|
||||
({"name": ""}, 102, "`name` can not be empty."),
|
||||
({"name": "duplicated_name"}, 0, ""),
|
||||
({"name": "case insensitive"}, 0, ""),
|
||||
],
|
||||
)
|
||||
def test_name(self, get_http_api_auth, add_chat_assistants, payload, expected_code, expected_message):
|
||||
_, _, chat_assistant_ids = add_chat_assistants
|
||||
if payload["name"] == "duplicated_name":
|
||||
create_session_with_chat_assistant(get_http_api_auth, chat_assistant_ids[0], payload)
|
||||
elif payload["name"] == "case insensitive":
|
||||
create_session_with_chat_assistant(get_http_api_auth, chat_assistant_ids[0], {"name": payload["name"].upper()})
|
||||
|
||||
res = create_session_with_chat_assistant(get_http_api_auth, chat_assistant_ids[0], payload)
|
||||
assert res["code"] == expected_code, res
|
||||
if expected_code == 0:
|
||||
assert res["data"]["name"] == payload["name"]
|
||||
assert res["data"]["chat_id"] == chat_assistant_ids[0]
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"chat_assistant_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"),
|
||||
("invalid_chat_assistant_id", 102, "You do not own the assistant."),
|
||||
],
|
||||
)
|
||||
def test_invalid_chat_assistant_id(self, get_http_api_auth, chat_assistant_id, expected_code, expected_message):
|
||||
res = create_session_with_chat_assistant(get_http_api_auth, chat_assistant_id, {"name": "valid_name"})
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_create_session(self, get_http_api_auth, add_chat_assistants):
|
||||
chunk_num = 1000
|
||||
_, _, chat_assistant_ids = add_chat_assistants
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_ids[0])
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
chunks_count = len(res["data"])
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
create_session_with_chat_assistant,
|
||||
get_http_api_auth,
|
||||
chat_assistant_ids[0],
|
||||
{"name": f"session with chat assistant test {i}"},
|
||||
)
|
||||
for i in range(chunk_num)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_ids[0], {"page_size": chunk_num})
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert len(res["data"]) == chunks_count + chunk_num
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_add_session_to_deleted_chat_assistant(self, get_http_api_auth, add_chat_assistants):
|
||||
_, _, chat_assistant_ids = add_chat_assistants
|
||||
res = delete_chat_assistants(get_http_api_auth, {"ids": [chat_assistant_ids[0]]})
|
||||
assert res["code"] == 0
|
||||
res = create_session_with_chat_assistant(get_http_api_auth, chat_assistant_ids[0], {"name": "valid_name"})
|
||||
assert res["code"] == 102
|
||||
assert res["message"] == "You do not own the assistant."
|
||||
@ -1,170 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, batch_add_sessions_with_chat_assistant, delete_session_with_chat_assistants, list_session_with_chat_assistants
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = delete_session_with_chat_assistants(auth, "chat_assistant_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestSessionWithChatAssistantDelete:
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"chat_assistant_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"),
|
||||
(
|
||||
"invalid_chat_assistant_id",
|
||||
102,
|
||||
"You don't own the chat",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_chat_assistant_id(self, get_http_api_auth, add_sessions_with_chat_assistant_func, chat_assistant_id, expected_code, expected_message):
|
||||
_, session_ids = add_sessions_with_chat_assistant_func
|
||||
res = delete_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, {"ids": session_ids})
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"payload",
|
||||
[
|
||||
pytest.param(lambda r: {"ids": ["invalid_id"] + r}, marks=pytest.mark.p3),
|
||||
pytest.param(lambda r: {"ids": r[:1] + ["invalid_id"] + r[1:5]}, marks=pytest.mark.p1),
|
||||
pytest.param(lambda r: {"ids": r + ["invalid_id"]}, marks=pytest.mark.p3),
|
||||
],
|
||||
)
|
||||
def test_delete_partial_invalid_id(self, get_http_api_auth, add_sessions_with_chat_assistant_func, payload):
|
||||
chat_assistant_id, session_ids = add_sessions_with_chat_assistant_func
|
||||
if callable(payload):
|
||||
payload = payload(session_ids)
|
||||
res = delete_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, payload)
|
||||
assert res["code"] == 0
|
||||
assert res["data"]["errors"][0] == "The chat doesn't own the session invalid_id"
|
||||
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert len(res["data"]) == 0
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_repeated_deletion(self, get_http_api_auth, add_sessions_with_chat_assistant_func):
|
||||
chat_assistant_id, session_ids = add_sessions_with_chat_assistant_func
|
||||
payload = {"ids": session_ids}
|
||||
res = delete_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, payload)
|
||||
assert res["code"] == 0
|
||||
|
||||
res = delete_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, payload)
|
||||
assert res["code"] == 102
|
||||
assert "The chat doesn't own the session" in res["message"]
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_duplicate_deletion(self, get_http_api_auth, add_sessions_with_chat_assistant_func):
|
||||
chat_assistant_id, session_ids = add_sessions_with_chat_assistant_func
|
||||
res = delete_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, {"ids": session_ids * 2})
|
||||
assert res["code"] == 0
|
||||
assert "Duplicate session ids" in res["data"]["errors"][0]
|
||||
assert res["data"]["success_count"] == 5
|
||||
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert len(res["data"]) == 0
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_deletion(self, get_http_api_auth, add_chat_assistants):
|
||||
sessions_num = 100
|
||||
_, _, chat_assistant_ids = add_chat_assistants
|
||||
session_ids = batch_add_sessions_with_chat_assistant(get_http_api_auth, chat_assistant_ids[0], sessions_num)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
delete_session_with_chat_assistants,
|
||||
get_http_api_auth,
|
||||
chat_assistant_ids[0],
|
||||
{"ids": session_ids[i : i + 1]},
|
||||
)
|
||||
for i in range(sessions_num)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_delete_1k(self, get_http_api_auth, add_chat_assistants):
|
||||
sessions_num = 1_000
|
||||
_, _, chat_assistant_ids = add_chat_assistants
|
||||
session_ids = batch_add_sessions_with_chat_assistant(get_http_api_auth, chat_assistant_ids[0], sessions_num)
|
||||
|
||||
res = delete_session_with_chat_assistants(get_http_api_auth, chat_assistant_ids[0], {"ids": session_ids})
|
||||
assert res["code"] == 0
|
||||
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_ids[0])
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert len(res["data"]) == 0
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message, remaining",
|
||||
[
|
||||
pytest.param(None, 0, """TypeError("argument of type \'NoneType\' is not iterable")""", 0, marks=pytest.mark.skip),
|
||||
pytest.param({"ids": ["invalid_id"]}, 102, "The chat doesn't own the session invalid_id", 5, marks=pytest.mark.p3),
|
||||
pytest.param("not json", 100, """AttributeError("\'str\' object has no attribute \'get\'")""", 5, marks=pytest.mark.skip),
|
||||
pytest.param(lambda r: {"ids": r[:1]}, 0, "", 4, marks=pytest.mark.p3),
|
||||
pytest.param(lambda r: {"ids": r}, 0, "", 0, marks=pytest.mark.p1),
|
||||
pytest.param({"ids": []}, 0, "", 0, marks=pytest.mark.p3),
|
||||
],
|
||||
)
|
||||
def test_basic_scenarios(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_sessions_with_chat_assistant_func,
|
||||
payload,
|
||||
expected_code,
|
||||
expected_message,
|
||||
remaining,
|
||||
):
|
||||
chat_assistant_id, session_ids = add_sessions_with_chat_assistant_func
|
||||
if callable(payload):
|
||||
payload = payload(session_ids)
|
||||
res = delete_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, payload)
|
||||
assert res["code"] == expected_code
|
||||
if res["code"] != 0:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_id)
|
||||
if res["code"] != 0:
|
||||
assert False, res
|
||||
assert len(res["data"]) == remaining
|
||||
@ -1,247 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, delete_chat_assistants, list_session_with_chat_assistants
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from libs.utils import is_sorted
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = list_session_with_chat_assistants(auth, "chat_assistant_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestSessionsWithChatAssistantList:
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
({"page": None, "page_size": 2}, 0, 2, ""),
|
||||
pytest.param({"page": 0, "page_size": 2}, 100, 0, "ValueError('Search does not support negative slicing.')", marks=pytest.mark.skip),
|
||||
({"page": 2, "page_size": 2}, 0, 2, ""),
|
||||
({"page": 3, "page_size": 2}, 0, 1, ""),
|
||||
({"page": "3", "page_size": 2}, 0, 1, ""),
|
||||
pytest.param({"page": -1, "page_size": 2}, 100, 0, "ValueError('Search does not support negative slicing.')", marks=pytest.mark.skip),
|
||||
pytest.param({"page": "a", "page_size": 2}, 100, 0, """ValueError("invalid literal for int() with base 10: \'a\'")""", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
def test_page(self, get_http_api_auth, add_sessions_with_chat_assistant, params, expected_code, expected_page_size, expected_message):
|
||||
chat_assistant_id, _ = add_sessions_with_chat_assistant
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]) == expected_page_size
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, expected_page_size, expected_message",
|
||||
[
|
||||
({"page_size": None}, 0, 5, ""),
|
||||
({"page_size": 0}, 0, 0, ""),
|
||||
({"page_size": 1}, 0, 1, ""),
|
||||
({"page_size": 6}, 0, 5, ""),
|
||||
({"page_size": "1"}, 0, 1, ""),
|
||||
pytest.param({"page_size": -1}, 0, 5, "", marks=pytest.mark.skip),
|
||||
pytest.param({"page_size": "a"}, 100, 0, """ValueError("invalid literal for int() with base 10: \'a\'")""", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
def test_page_size(self, get_http_api_auth, add_sessions_with_chat_assistant, params, expected_code, expected_page_size, expected_message):
|
||||
chat_assistant_id, _ = add_sessions_with_chat_assistant
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]) == expected_page_size
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, assertions, expected_message",
|
||||
[
|
||||
({"orderby": None}, 0, lambda r: (is_sorted(r["data"], "create_time", True)), ""),
|
||||
({"orderby": "create_time"}, 0, lambda r: (is_sorted(r["data"], "create_time", True)), ""),
|
||||
({"orderby": "update_time"}, 0, lambda r: (is_sorted(r["data"], "update_time", True)), ""),
|
||||
({"orderby": "name", "desc": "False"}, 0, lambda r: (is_sorted(r["data"], "name", False)), ""),
|
||||
pytest.param({"orderby": "unknown"}, 102, 0, "orderby should be create_time or update_time", marks=pytest.mark.skip(reason="issues/")),
|
||||
],
|
||||
)
|
||||
def test_orderby(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_sessions_with_chat_assistant,
|
||||
params,
|
||||
expected_code,
|
||||
assertions,
|
||||
expected_message,
|
||||
):
|
||||
chat_assistant_id, _ = add_sessions_with_chat_assistant
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if callable(assertions):
|
||||
assert assertions(res)
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, assertions, expected_message",
|
||||
[
|
||||
({"desc": None}, 0, lambda r: (is_sorted(r["data"], "create_time", True)), ""),
|
||||
({"desc": "true"}, 0, lambda r: (is_sorted(r["data"], "create_time", True)), ""),
|
||||
({"desc": "True"}, 0, lambda r: (is_sorted(r["data"], "create_time", True)), ""),
|
||||
({"desc": True}, 0, lambda r: (is_sorted(r["data"], "create_time", True)), ""),
|
||||
({"desc": "false"}, 0, lambda r: (is_sorted(r["data"], "create_time", False)), ""),
|
||||
({"desc": "False"}, 0, lambda r: (is_sorted(r["data"], "create_time", False)), ""),
|
||||
({"desc": False}, 0, lambda r: (is_sorted(r["data"], "create_time", False)), ""),
|
||||
({"desc": "False", "orderby": "update_time"}, 0, lambda r: (is_sorted(r["data"], "update_time", False)), ""),
|
||||
pytest.param({"desc": "unknown"}, 102, 0, "desc should be true or false", marks=pytest.mark.skip(reason="issues/")),
|
||||
],
|
||||
)
|
||||
def test_desc(
|
||||
self,
|
||||
get_http_api_auth,
|
||||
add_sessions_with_chat_assistant,
|
||||
params,
|
||||
expected_code,
|
||||
assertions,
|
||||
expected_message,
|
||||
):
|
||||
chat_assistant_id, _ = add_sessions_with_chat_assistant
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if callable(assertions):
|
||||
assert assertions(res)
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_code, expected_num, expected_message",
|
||||
[
|
||||
({"name": None}, 0, 5, ""),
|
||||
({"name": ""}, 0, 5, ""),
|
||||
({"name": "session_with_chat_assistant_1"}, 0, 1, ""),
|
||||
({"name": "unknown"}, 0, 0, ""),
|
||||
],
|
||||
)
|
||||
def test_name(self, get_http_api_auth, add_sessions_with_chat_assistant, params, expected_code, expected_num, expected_message):
|
||||
chat_assistant_id, _ = add_sessions_with_chat_assistant
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if params["name"] == "session_with_chat_assistant_1":
|
||||
assert res["data"][0]["name"] == params["name"]
|
||||
else:
|
||||
assert len(res["data"]) == expected_num
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p1
|
||||
@pytest.mark.parametrize(
|
||||
"session_id, expected_code, expected_num, expected_message",
|
||||
[
|
||||
(None, 0, 5, ""),
|
||||
("", 0, 5, ""),
|
||||
(lambda r: r[0], 0, 1, ""),
|
||||
("unknown", 0, 0, "The chat doesn't exist"),
|
||||
],
|
||||
)
|
||||
def test_id(self, get_http_api_auth, add_sessions_with_chat_assistant, session_id, expected_code, expected_num, expected_message):
|
||||
chat_assistant_id, session_ids = add_sessions_with_chat_assistant
|
||||
if callable(session_id):
|
||||
params = {"id": session_id(session_ids)}
|
||||
else:
|
||||
params = {"id": session_id}
|
||||
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
if params["id"] == session_ids[0]:
|
||||
assert res["data"][0]["id"] == params["id"]
|
||||
else:
|
||||
assert len(res["data"]) == expected_num
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"session_id, name, expected_code, expected_num, expected_message",
|
||||
[
|
||||
(lambda r: r[0], "session_with_chat_assistant_0", 0, 1, ""),
|
||||
(lambda r: r[0], "session_with_chat_assistant_100", 0, 0, ""),
|
||||
(lambda r: r[0], "unknown", 0, 0, ""),
|
||||
("id", "session_with_chat_assistant_0", 0, 0, ""),
|
||||
],
|
||||
)
|
||||
def test_name_and_id(self, get_http_api_auth, add_sessions_with_chat_assistant, session_id, name, expected_code, expected_num, expected_message):
|
||||
chat_assistant_id, session_ids = add_sessions_with_chat_assistant
|
||||
if callable(session_id):
|
||||
params = {"id": session_id(session_ids), "name": name}
|
||||
else:
|
||||
params = {"id": session_id, "name": name}
|
||||
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, params=params)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code == 0:
|
||||
assert len(res["data"]) == expected_num
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_list(self, get_http_api_auth, add_sessions_with_chat_assistant):
|
||||
chat_assistant_id, _ = add_sessions_with_chat_assistant
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(list_session_with_chat_assistants, get_http_api_auth, chat_assistant_id) for i in range(100)]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_invalid_params(self, get_http_api_auth, add_sessions_with_chat_assistant):
|
||||
chat_assistant_id, _ = add_sessions_with_chat_assistant
|
||||
params = {"a": "b"}
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, params=params)
|
||||
assert res["code"] == 0
|
||||
assert len(res["data"]) == 5
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_list_chats_after_deleting_associated_chat_assistant(self, get_http_api_auth, add_sessions_with_chat_assistant):
|
||||
chat_assistant_id, _ = add_sessions_with_chat_assistant
|
||||
res = delete_chat_assistants(get_http_api_auth, {"ids": [chat_assistant_id]})
|
||||
assert res["code"] == 0
|
||||
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_id)
|
||||
assert res["code"] == 102
|
||||
assert "You don't own the assistant" in res["message"]
|
||||
@ -1,148 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from random import randint
|
||||
|
||||
import pytest
|
||||
from common import INVALID_API_TOKEN, SESSION_WITH_CHAT_NAME_LIMIT, delete_chat_assistants, list_session_with_chat_assistants, update_session_with_chat_assistant
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, auth, expected_code, expected_message):
|
||||
res = update_session_with_chat_assistant(auth, "chat_assistant_id", "session_id")
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
|
||||
class TestSessionWithChatAssistantUpdate:
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
pytest.param({"name": "valid_name"}, 0, "", marks=pytest.mark.p1),
|
||||
pytest.param({"name": "a" * (SESSION_WITH_CHAT_NAME_LIMIT + 1)}, 102, "", marks=pytest.mark.skip(reason="issues/")),
|
||||
pytest.param({"name": 1}, 100, "", marks=pytest.mark.skip(reason="issues/")),
|
||||
pytest.param({"name": ""}, 102, "`name` can not be empty.", marks=pytest.mark.p3),
|
||||
pytest.param({"name": "duplicated_name"}, 0, "", marks=pytest.mark.p3),
|
||||
pytest.param({"name": "case insensitive"}, 0, "", marks=pytest.mark.p3),
|
||||
],
|
||||
)
|
||||
def test_name(self, get_http_api_auth, add_sessions_with_chat_assistant_func, payload, expected_code, expected_message):
|
||||
chat_assistant_id, session_ids = add_sessions_with_chat_assistant_func
|
||||
if payload["name"] == "duplicated_name":
|
||||
update_session_with_chat_assistant(get_http_api_auth, chat_assistant_id, session_ids[0], payload)
|
||||
elif payload["name"] == "case insensitive":
|
||||
update_session_with_chat_assistant(get_http_api_auth, chat_assistant_id, session_ids[0], {"name": payload["name"].upper()})
|
||||
|
||||
res = update_session_with_chat_assistant(get_http_api_auth, chat_assistant_id, session_ids[0], payload)
|
||||
assert res["code"] == expected_code, res
|
||||
if expected_code == 0:
|
||||
res = list_session_with_chat_assistants(get_http_api_auth, chat_assistant_id, {"id": session_ids[0]})
|
||||
assert res["data"][0]["name"] == payload["name"]
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"chat_assistant_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<NotFound '404: Not Found'>"),
|
||||
pytest.param("invalid_chat_assistant_id", 102, "Session does not exist", marks=pytest.mark.skip(reason="issues/")),
|
||||
],
|
||||
)
|
||||
def test_invalid_chat_assistant_id(self, get_http_api_auth, add_sessions_with_chat_assistant_func, chat_assistant_id, expected_code, expected_message):
|
||||
_, session_ids = add_sessions_with_chat_assistant_func
|
||||
res = update_session_with_chat_assistant(get_http_api_auth, chat_assistant_id, session_ids[0], {"name": "valid_name"})
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"session_id, expected_code, expected_message",
|
||||
[
|
||||
("", 100, "<MethodNotAllowed '405: Method Not Allowed'>"),
|
||||
("invalid_session_id", 102, "Session does not exist"),
|
||||
],
|
||||
)
|
||||
def test_invalid_session_id(self, get_http_api_auth, add_sessions_with_chat_assistant_func, session_id, expected_code, expected_message):
|
||||
chat_assistant_id, _ = add_sessions_with_chat_assistant_func
|
||||
res = update_session_with_chat_assistant(get_http_api_auth, chat_assistant_id, session_id, {"name": "valid_name"})
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_repeated_update_session(self, get_http_api_auth, add_sessions_with_chat_assistant_func):
|
||||
chat_assistant_id, session_ids = add_sessions_with_chat_assistant_func
|
||||
res = update_session_with_chat_assistant(get_http_api_auth, chat_assistant_id, session_ids[0], {"name": "valid_name_1"})
|
||||
assert res["code"] == 0
|
||||
|
||||
res = update_session_with_chat_assistant(get_http_api_auth, chat_assistant_id, session_ids[0], {"name": "valid_name_2"})
|
||||
assert res["code"] == 0
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
pytest.param({"unknown_key": "unknown_value"}, 100, "ValueError", marks=pytest.mark.skip),
|
||||
({}, 0, ""),
|
||||
pytest.param(None, 100, "TypeError", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
def test_invalid_params(self, get_http_api_auth, add_sessions_with_chat_assistant_func, payload, expected_code, expected_message):
|
||||
chat_assistant_id, session_ids = add_sessions_with_chat_assistant_func
|
||||
res = update_session_with_chat_assistant(get_http_api_auth, chat_assistant_id, session_ids[0], payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_code != 0:
|
||||
assert expected_message in res["message"]
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_update_session(self, get_http_api_auth, add_sessions_with_chat_assistant_func):
|
||||
chunk_num = 50
|
||||
chat_assistant_id, session_ids = add_sessions_with_chat_assistant_func
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
update_session_with_chat_assistant,
|
||||
get_http_api_auth,
|
||||
chat_assistant_id,
|
||||
session_ids[randint(0, 4)],
|
||||
{"name": f"update session test {i}"},
|
||||
)
|
||||
for i in range(chunk_num)
|
||||
]
|
||||
responses = [f.result() for f in futures]
|
||||
assert all(r["code"] == 0 for r in responses)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_update_session_to_deleted_chat_assistant(self, get_http_api_auth, add_sessions_with_chat_assistant_func):
|
||||
chat_assistant_id, session_ids = add_sessions_with_chat_assistant_func
|
||||
delete_chat_assistants(get_http_api_auth, {"ids": [chat_assistant_id]})
|
||||
res = update_session_with_chat_assistant(get_http_api_auth, chat_assistant_id, session_ids[0], {"name": "valid_name"})
|
||||
assert res["code"] == 102
|
||||
assert res["message"] == "You do not own the session"
|
||||
@ -1,19 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')
|
||||
@ -1,19 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
def test_get_email(get_email):
|
||||
print("\nEmail account:", flush=True)
|
||||
print(f"{get_email}\n", flush=True)
|
||||
@ -1,36 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from ragflow_sdk import RAGFlow, Agent
|
||||
from common import HOST_ADDRESS
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="")
|
||||
def test_list_agents_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
rag.list_agents()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="")
|
||||
def test_converse_with_agent_with_success(get_api_key_fixture):
|
||||
API_KEY = "ragflow-BkOGNhYjIyN2JiODExZWY5MzVhMDI0Mm"
|
||||
agent_id = "ebfada2eb2bc11ef968a0242ac120006"
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
lang = "Chinese"
|
||||
file = "How is the weather tomorrow?"
|
||||
Agent.ask(agent_id=agent_id, rag=rag, lang=lang, file=file)
|
||||
@ -1,131 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from common import HOST_ADDRESS
|
||||
from ragflow_sdk import RAGFlow
|
||||
from ragflow_sdk.modules.chat import Chat
|
||||
|
||||
|
||||
def test_create_chat_with_name(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
kb = rag.create_dataset(name="test_create_chat")
|
||||
display_name = "ragflow.txt"
|
||||
with open("test_data/ragflow.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
document = {"display_name": display_name, "blob": blob}
|
||||
documents = []
|
||||
documents.append(document)
|
||||
docs = kb.upload_documents(documents)
|
||||
for doc in docs:
|
||||
doc.add_chunk("This is a test to add chunk")
|
||||
llm = Chat.LLM(
|
||||
rag,
|
||||
{
|
||||
"model_name": "glm-4-flash@ZHIPU-AI",
|
||||
"temperature": 0.1,
|
||||
"top_p": 0.3,
|
||||
"presence_penalty": 0.4,
|
||||
"frequency_penalty": 0.7,
|
||||
"max_tokens": 512,
|
||||
},
|
||||
)
|
||||
rag.create_chat("test_create_chat", dataset_ids=[kb.id], llm=llm)
|
||||
|
||||
|
||||
def test_update_chat_with_name(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
kb = rag.create_dataset(name="test_update_chat")
|
||||
display_name = "ragflow.txt"
|
||||
with open("test_data/ragflow.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
document = {"display_name": display_name, "blob": blob}
|
||||
documents = []
|
||||
documents.append(document)
|
||||
docs = kb.upload_documents(documents)
|
||||
for doc in docs:
|
||||
doc.add_chunk("This is a test to add chunk")
|
||||
llm = Chat.LLM(
|
||||
rag,
|
||||
{
|
||||
"model_name": "glm-4-flash@ZHIPU-AI",
|
||||
"temperature": 0.1,
|
||||
"top_p": 0.3,
|
||||
"presence_penalty": 0.4,
|
||||
"frequency_penalty": 0.7,
|
||||
"max_tokens": 512,
|
||||
},
|
||||
)
|
||||
chat = rag.create_chat("test_update_chat", dataset_ids=[kb.id], llm=llm)
|
||||
chat.update({"name": "new_chat"})
|
||||
|
||||
|
||||
def test_delete_chats_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
kb = rag.create_dataset(name="test_delete_chat")
|
||||
display_name = "ragflow.txt"
|
||||
with open("test_data/ragflow.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
document = {"display_name": display_name, "blob": blob}
|
||||
documents = []
|
||||
documents.append(document)
|
||||
docs = kb.upload_documents(documents)
|
||||
for doc in docs:
|
||||
doc.add_chunk("This is a test to add chunk")
|
||||
llm = Chat.LLM(
|
||||
rag,
|
||||
{
|
||||
"model_name": "glm-4-flash@ZHIPU-AI",
|
||||
"temperature": 0.1,
|
||||
"top_p": 0.3,
|
||||
"presence_penalty": 0.4,
|
||||
"frequency_penalty": 0.7,
|
||||
"max_tokens": 512,
|
||||
},
|
||||
)
|
||||
chat = rag.create_chat("test_delete_chat", dataset_ids=[kb.id], llm=llm)
|
||||
rag.delete_chats(ids=[chat.id])
|
||||
|
||||
|
||||
def test_list_chats_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
kb = rag.create_dataset(name="test_list_chats")
|
||||
display_name = "ragflow.txt"
|
||||
with open("test_data/ragflow.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
document = {"display_name": display_name, "blob": blob}
|
||||
documents = []
|
||||
documents.append(document)
|
||||
docs = kb.upload_documents(documents)
|
||||
for doc in docs:
|
||||
doc.add_chunk("This is a test to add chunk")
|
||||
llm = Chat.LLM(
|
||||
rag,
|
||||
{
|
||||
"model_name": "glm-4-flash@ZHIPU-AI",
|
||||
"temperature": 0.1,
|
||||
"top_p": 0.3,
|
||||
"presence_penalty": 0.4,
|
||||
"frequency_penalty": 0.7,
|
||||
"max_tokens": 512,
|
||||
},
|
||||
)
|
||||
rag.create_chat("test_list_1", dataset_ids=[kb.id], llm=llm)
|
||||
rag.create_chat("test_list_2", dataset_ids=[kb.id], llm=llm)
|
||||
rag.list_chats()
|
||||
@ -1,216 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from ragflow_sdk import RAGFlow
|
||||
from common import HOST_ADDRESS
|
||||
from time import sleep
|
||||
|
||||
|
||||
def test_parse_document_with_txt(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_parse_document")
|
||||
name = 'ragflow_test.txt'
|
||||
with open("test_data/ragflow_test.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
docs = ds.upload_documents([{"display_name": name, "blob": blob}])
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents(document_ids=[doc.id])
|
||||
'''
|
||||
for n in range(100):
|
||||
if doc.progress == 1:
|
||||
break
|
||||
sleep(1)
|
||||
else:
|
||||
raise Exception("Run time ERROR: Document parsing did not complete in time.")
|
||||
'''
|
||||
|
||||
|
||||
def test_parse_and_cancel_document(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_parse_and_cancel_document")
|
||||
name = 'ragflow_test.txt'
|
||||
with open("test_data/ragflow_test.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
docs = ds.upload_documents([{"display_name": name, "blob": blob}])
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents(document_ids=[doc.id])
|
||||
sleep(1)
|
||||
if 0 < doc.progress < 1:
|
||||
ds.async_cancel_parse_documents(document_ids=[doc.id])
|
||||
|
||||
|
||||
def test_bulk_parse_documents(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_bulk_parse_and_cancel_documents")
|
||||
with open("test_data/ragflow.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
documents = [
|
||||
{'display_name': 'test1.txt', 'blob': blob},
|
||||
{'display_name': 'test2.txt', 'blob': blob},
|
||||
{'display_name': 'test3.txt', 'blob': blob}
|
||||
]
|
||||
docs = ds.upload_documents(documents)
|
||||
ids = [doc.id for doc in docs]
|
||||
ds.async_parse_documents(ids)
|
||||
'''
|
||||
for n in range(100):
|
||||
all_completed = all(doc.progress == 1 for doc in docs)
|
||||
if all_completed:
|
||||
break
|
||||
sleep(1)
|
||||
else:
|
||||
raise Exception("Run time ERROR: Bulk document parsing did not complete in time.")
|
||||
'''
|
||||
|
||||
|
||||
def test_list_chunks_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_list_chunks_with_success")
|
||||
with open("test_data/ragflow_test.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
'''
|
||||
# chunk_size = 1024 * 1024
|
||||
# chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
|
||||
documents = [
|
||||
{'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
|
||||
]
|
||||
'''
|
||||
documents = [{"display_name": "test_list_chunks_with_success.txt", "blob": blob}]
|
||||
docs = ds.upload_documents(documents)
|
||||
ids = [doc.id for doc in docs]
|
||||
ds.async_parse_documents(ids)
|
||||
'''
|
||||
for n in range(100):
|
||||
all_completed = all(doc.progress == 1 for doc in docs)
|
||||
if all_completed:
|
||||
break
|
||||
sleep(1)
|
||||
else:
|
||||
raise Exception("Run time ERROR: Chunk document parsing did not complete in time.")
|
||||
'''
|
||||
doc = docs[0]
|
||||
doc.list_chunks()
|
||||
|
||||
|
||||
def test_add_chunk_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_add_chunk_with_success")
|
||||
with open("test_data/ragflow_test.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
'''
|
||||
# chunk_size = 1024 * 1024
|
||||
# chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
|
||||
documents = [
|
||||
{'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
|
||||
]
|
||||
'''
|
||||
documents = [{"display_name": "test_list_chunks_with_success.txt", "blob": blob}]
|
||||
docs = ds.upload_documents(documents)
|
||||
doc = docs[0]
|
||||
doc.add_chunk(content="This is a chunk addition test")
|
||||
|
||||
|
||||
def test_delete_chunk_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_delete_chunk_with_success")
|
||||
with open("test_data/ragflow_test.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
'''
|
||||
# chunk_size = 1024 * 1024
|
||||
# chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
|
||||
documents = [
|
||||
{'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
|
||||
]
|
||||
'''
|
||||
documents = [{"display_name": "test_delete_chunk_with_success.txt", "blob": blob}]
|
||||
docs = ds.upload_documents(documents)
|
||||
doc = docs[0]
|
||||
chunk = doc.add_chunk(content="This is a chunk addition test")
|
||||
sleep(5)
|
||||
doc.delete_chunks([chunk.id])
|
||||
|
||||
|
||||
def test_update_chunk_content(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_update_chunk_content_with_success")
|
||||
with open("test_data/ragflow_test.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
'''
|
||||
# chunk_size = 1024 * 1024
|
||||
# chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
|
||||
documents = [
|
||||
{'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
|
||||
]
|
||||
'''
|
||||
documents = [{"display_name": "test_update_chunk_content_with_success.txt", "blob": blob}]
|
||||
docs = ds.upload_documents(documents)
|
||||
doc = docs[0]
|
||||
chunk = doc.add_chunk(content="This is a chunk addition test")
|
||||
# For Elasticsearch, the chunk is not searchable in shot time (~2s).
|
||||
sleep(3)
|
||||
chunk.update({"content": "This is a updated content"})
|
||||
|
||||
|
||||
def test_update_chunk_available(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_update_chunk_available_with_success")
|
||||
with open("test_data/ragflow_test.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
'''
|
||||
# chunk_size = 1024 * 1024
|
||||
# chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
|
||||
documents = [
|
||||
{'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
|
||||
]
|
||||
'''
|
||||
documents = [{"display_name": "test_update_chunk_available_with_success.txt", "blob": blob}]
|
||||
docs = ds.upload_documents(documents)
|
||||
doc = docs[0]
|
||||
chunk = doc.add_chunk(content="This is a chunk addition test")
|
||||
# For Elasticsearch, the chunk is not searchable in shot time (~2s).
|
||||
sleep(3)
|
||||
chunk.update({"available": 0})
|
||||
|
||||
|
||||
def test_retrieve_chunks(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="retrieval")
|
||||
with open("test_data/ragflow_test.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
'''
|
||||
# chunk_size = 1024 * 1024
|
||||
# chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
|
||||
documents = [
|
||||
{'display_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
|
||||
]
|
||||
'''
|
||||
documents = [{"display_name": "test_retrieve_chunks.txt", "blob": blob}]
|
||||
docs = ds.upload_documents(documents)
|
||||
doc = docs[0]
|
||||
doc.add_chunk(content="This is a chunk addition test")
|
||||
rag.retrieve(dataset_ids=[ds.id], document_ids=[doc.id])
|
||||
rag.delete_datasets(ids=[ds.id])
|
||||
|
||||
# test different parameters for the retrieval
|
||||
@ -1,77 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import random
|
||||
|
||||
import pytest
|
||||
from common import HOST_ADDRESS
|
||||
from ragflow_sdk import RAGFlow
|
||||
|
||||
|
||||
def test_create_dataset_with_name(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
rag.create_dataset("test_create_dataset_with_name")
|
||||
|
||||
|
||||
def test_create_dataset_with_duplicated_name(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
rag.create_dataset("test_create_dataset_with_duplicated_name")
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
rag.create_dataset("test_create_dataset_with_duplicated_name")
|
||||
assert str(exc_info.value) == "Dataset name 'test_create_dataset_with_duplicated_name' already exists"
|
||||
|
||||
|
||||
def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email"]
|
||||
random_chunk_method = random.choice(valid_chunk_methods)
|
||||
rag.create_dataset("test_create_dataset_with_random_chunk_method", chunk_method=random_chunk_method)
|
||||
|
||||
|
||||
def test_create_dataset_with_invalid_parameter(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
chunk_method = "invalid_chunk_method"
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
rag.create_dataset("test_create_dataset_with_invalid_chunk_method", chunk_method=chunk_method)
|
||||
assert (
|
||||
str(exc_info.value)
|
||||
== f"Field: <chunk_method> - Message: <Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'> - Value: <{chunk_method}>"
|
||||
)
|
||||
|
||||
|
||||
def test_update_dataset_with_name(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset("test_update_dataset")
|
||||
ds.update({"name": "updated_dataset"})
|
||||
|
||||
|
||||
def test_delete_datasets_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset("test_delete_dataset")
|
||||
rag.delete_datasets(ids=[ds.id])
|
||||
|
||||
|
||||
def test_list_datasets_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
rag.create_dataset("test_list_datasets")
|
||||
rag.list_datasets()
|
||||
@ -1,198 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from ragflow_sdk import RAGFlow
|
||||
from common import HOST_ADDRESS
|
||||
import pytest
|
||||
|
||||
|
||||
def test_upload_document_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_upload_document")
|
||||
blob = b"Sample document content for test."
|
||||
with open("test_data/ragflow.txt", "rb") as file:
|
||||
blob_2 = file.read()
|
||||
document_infos = []
|
||||
document_infos.append({"display_name": "test_1.txt", "blob": blob})
|
||||
document_infos.append({"display_name": "test_2.txt", "blob": blob_2})
|
||||
ds.upload_documents(document_infos)
|
||||
|
||||
|
||||
def test_update_document_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_update_document")
|
||||
blob = b"Sample document content for test."
|
||||
document_infos = [{"display_name": "test.txt", "blob": blob}]
|
||||
docs = ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
doc.update({"chunk_method": "manual", "name": "manual.txt"})
|
||||
|
||||
|
||||
def test_download_document_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_download_document")
|
||||
blob = b"Sample document content for test."
|
||||
document_infos = [{"display_name": "test_1.txt", "blob": blob}]
|
||||
docs = ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
with open("test_download.txt", "wb+") as file:
|
||||
file.write(doc.download())
|
||||
|
||||
|
||||
def test_list_documents_in_dataset_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_list_documents")
|
||||
blob = b"Sample document content for test."
|
||||
document_infos = [{"display_name": "test.txt", "blob": blob}]
|
||||
ds.upload_documents(document_infos)
|
||||
ds.list_documents(keywords="test", page=1, page_size=12)
|
||||
|
||||
|
||||
def test_delete_documents_in_dataset_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_delete_documents")
|
||||
name = "test_delete_documents.txt"
|
||||
blob = b"Sample document content for test."
|
||||
document_infos = [{"display_name": name, "blob": blob}]
|
||||
docs = ds.upload_documents(document_infos)
|
||||
ds.delete_documents([docs[0].id])
|
||||
|
||||
|
||||
# upload and parse the document with different in different parse method.
|
||||
def test_upload_and_parse_pdf_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_pdf_document")
|
||||
with open("test_data/test.pdf", "rb") as file:
|
||||
blob = file.read()
|
||||
document_infos = [{"display_name": "test.pdf", "blob": blob}]
|
||||
docs = ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
|
||||
def test_upload_and_parse_docx_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_docx_document")
|
||||
with open("test_data/test.docx", "rb") as file:
|
||||
blob = file.read()
|
||||
document_infos = [{"display_name": "test.docx", "blob": blob}]
|
||||
docs = ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
|
||||
def test_upload_and_parse_excel_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_excel_document")
|
||||
with open("test_data/test.xlsx", "rb") as file:
|
||||
blob = file.read()
|
||||
document_infos = [{"display_name": "test.xlsx", "blob": blob}]
|
||||
docs = ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
|
||||
def test_upload_and_parse_ppt_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_ppt_document")
|
||||
with open("test_data/test.ppt", "rb") as file:
|
||||
blob = file.read()
|
||||
document_infos = [{"display_name": "test.ppt", "blob": blob}]
|
||||
docs = ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
|
||||
def test_upload_and_parse_image_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_image_document")
|
||||
with open("test_data/test.jpg", "rb") as file:
|
||||
blob = file.read()
|
||||
document_infos = [{"display_name": "test.jpg", "blob": blob}]
|
||||
docs = ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
|
||||
def test_upload_and_parse_txt_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_txt_document")
|
||||
with open("test_data/test.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
document_infos = [{"display_name": "test.txt", "blob": blob}]
|
||||
docs = ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
|
||||
def test_upload_and_parse_md_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_md_document")
|
||||
with open("test_data/test.md", "rb") as file:
|
||||
blob = file.read()
|
||||
document_infos = [{"display_name": "test.md", "blob": blob}]
|
||||
docs = ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
|
||||
def test_upload_and_parse_json_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_json_document")
|
||||
with open("test_data/test.json", "rb") as file:
|
||||
blob = file.read()
|
||||
document_infos = [{"display_name": "test.json", "blob": blob}]
|
||||
docs = ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="")
|
||||
def test_upload_and_parse_eml_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_eml_document")
|
||||
with open("test_data/test.eml", "rb") as file:
|
||||
blob = file.read()
|
||||
document_infos = [{"display_name": "test.eml", "blob": blob}]
|
||||
docs = ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
|
||||
def test_upload_and_parse_html_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_html_document")
|
||||
with open("test_data/test.html", "rb") as file:
|
||||
blob = file.read()
|
||||
document_infos = [{"display_name": "test.html", "blob": blob}]
|
||||
docs = ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
@ -1,145 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from ragflow_sdk import RAGFlow
|
||||
from common import HOST_ADDRESS
|
||||
import pytest
|
||||
|
||||
|
||||
def test_create_session_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
kb = rag.create_dataset(name="test_create_session")
|
||||
display_name = "ragflow.txt"
|
||||
with open("test_data/ragflow.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
document = {"display_name": display_name, "blob": blob}
|
||||
documents = []
|
||||
documents.append(document)
|
||||
docs = kb.upload_documents(documents)
|
||||
for doc in docs:
|
||||
doc.add_chunk("This is a test to add chunk")
|
||||
assistant = rag.create_chat("test_create_session", dataset_ids=[kb.id])
|
||||
assistant.create_session()
|
||||
|
||||
|
||||
def test_create_conversation_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
kb = rag.create_dataset(name="test_create_conversation")
|
||||
display_name = "ragflow.txt"
|
||||
with open("test_data/ragflow.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
document = {"display_name": display_name, "blob": blob}
|
||||
documents = []
|
||||
documents.append(document)
|
||||
docs = kb.upload_documents(documents)
|
||||
for doc in docs:
|
||||
doc.add_chunk("This is a test to add chunk")
|
||||
assistant = rag.create_chat("test_create_conversation", dataset_ids=[kb.id])
|
||||
session = assistant.create_session()
|
||||
question = "What is AI"
|
||||
for ans in session.ask(question):
|
||||
pass
|
||||
|
||||
# assert not ans.content.startswith("**ERROR**"), "Please check this error."
|
||||
|
||||
|
||||
def test_delete_sessions_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
kb = rag.create_dataset(name="test_delete_session")
|
||||
display_name = "ragflow.txt"
|
||||
with open("test_data/ragflow.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
document = {"display_name": display_name, "blob": blob}
|
||||
documents = []
|
||||
documents.append(document)
|
||||
docs = kb.upload_documents(documents)
|
||||
for doc in docs:
|
||||
doc.add_chunk("This is a test to add chunk")
|
||||
assistant = rag.create_chat("test_delete_session", dataset_ids=[kb.id])
|
||||
session = assistant.create_session()
|
||||
assistant.delete_sessions(ids=[session.id])
|
||||
|
||||
|
||||
def test_update_session_with_name(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
kb = rag.create_dataset(name="test_update_session")
|
||||
display_name = "ragflow.txt"
|
||||
with open("test_data/ragflow.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
document = {"display_name": display_name, "blob": blob}
|
||||
documents = []
|
||||
documents.append(document)
|
||||
docs = kb.upload_documents(documents)
|
||||
for doc in docs:
|
||||
doc.add_chunk("This is a test to add chunk")
|
||||
assistant = rag.create_chat("test_update_session", dataset_ids=[kb.id])
|
||||
session = assistant.create_session(name="old session")
|
||||
session.update({"name": "new session"})
|
||||
|
||||
|
||||
def test_list_sessions_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
kb = rag.create_dataset(name="test_list_session")
|
||||
display_name = "ragflow.txt"
|
||||
with open("test_data/ragflow.txt", "rb") as file:
|
||||
blob = file.read()
|
||||
document = {"display_name": display_name, "blob": blob}
|
||||
documents = []
|
||||
documents.append(document)
|
||||
docs = kb.upload_documents(documents)
|
||||
for doc in docs:
|
||||
doc.add_chunk("This is a test to add chunk")
|
||||
assistant = rag.create_chat("test_list_session", dataset_ids=[kb.id])
|
||||
assistant.create_session("test_1")
|
||||
assistant.create_session("test_2")
|
||||
assistant.list_sessions()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="")
|
||||
def test_create_agent_session_with_success(get_api_key_fixture):
|
||||
API_KEY = "ragflow-BkOGNhYjIyN2JiODExZWY5MzVhMDI0Mm"
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
agent = rag.list_agents(id="2e45b5209c1011efa3e90242ac120006")[0]
|
||||
agent.create_session()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="")
|
||||
def test_create_agent_conversation_with_success(get_api_key_fixture):
|
||||
API_KEY = "ragflow-BkOGNhYjIyN2JiODExZWY5MzVhMDI0Mm"
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
agent = rag.list_agents(id="2e45b5209c1011efa3e90242ac120006")[0]
|
||||
session = agent.create_session()
|
||||
session.ask("What is this job")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="")
|
||||
def test_list_agent_sessions_with_success(get_api_key_fixture):
|
||||
API_KEY = "ragflow-BkOGNhYjIyN2JiODExZWY5MzVhMDI0Mm"
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
agent = rag.list_agents(id="2e45b5209c1011efa3e90242ac120006")[0]
|
||||
agent.list_sessions()
|
||||
|
||||
@pytest.mark.skip(reason="")
|
||||
def test_delete_session_of_agent_with_success(get_api_key_fixture):
|
||||
API_KEY = "ragflow-BkOGNhYjIyN2JiODExZWY5MzVhMDI0Mm"
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
agent = rag.list_agents(id="2e45b5209c1011efa3e90242ac120006")[0]
|
||||
agent.delete_sessions(ids=["test_1"])
|
||||
@ -1 +0,0 @@
|
||||
{"data":null,"code":100,"message":"TypeError(\"download_document() got an unexpected keyword argument 'tenant_id'\")"}
|
||||
@ -1,29 +0,0 @@
|
||||
|
||||
|
||||
Introducing RagFlow: Revolutionizing Natural Language Processing with Retrieval-Augmented Generation
|
||||
|
||||
In the ever-evolving landscape of Natural Language Processing (NLP), new techniques and frameworks continue to push the boundaries of what machines can understand and generate from human language. Among these innovative advancements, RagFlow stands out as a pioneering approach that combines the power of retrieval and generation to revolutionize the way we interact with text-based data.
|
||||
|
||||
What is RagFlow?
|
||||
|
||||
RagFlow, short for Retrieval-Augmented Generation Flow, is a framework designed to enhance the capabilities of NLP models by integrating a retrieval component into the generation process. This approach leverages large-scale knowledge bases and text corpora to retrieve relevant information that can inform and enrich the output generated by the model. By doing so, RagFlow enables models to produce more accurate, informative, and contextually relevant responses, surpassing the limitations of traditional generation-only or retrieval-only systems.
|
||||
|
||||
The Core Concept
|
||||
|
||||
At its core, RagFlow operates on two fundamental principles:
|
||||
|
||||
Retrieval: The first step involves identifying and retrieving relevant information from a vast collection of text sources. This can include web pages, academic articles, books, or any other form of unstructured text data. RagFlow employs advanced retrieval algorithms, often based on neural networks and vector similarity, to quickly and accurately locate the most pertinent information for a given query or task.
|
||||
Generation: Once relevant information has been retrieved, RagFlow leverages generative NLP models to produce the final output. These models, such as transformers or GPT-like architectures, are trained to understand the context provided by the retrieved information and generate coherent, fluent text that incorporates this knowledge. The integration of retrieval and generation allows RagFlow to generate responses that are not only grammatically correct but also semantically rich and contextually appropriate.
|
||||
Advantages of RagFlow
|
||||
|
||||
Increased Accuracy and Relevance: By incorporating retrieved information, RagFlow can generate responses that are more accurate and relevant to the user's query or task. This is particularly useful in domains where factual accuracy and contextual relevance are crucial, such as question answering, summarization, and knowledge-intensive dialogue systems.
|
||||
Scalability and Flexibility: RagFlow's reliance on large-scale text corpora and retrieval algorithms makes it highly scalable to new domains and datasets. As more data becomes available, the retrieval component can be easily updated to incorporate new information, while the generative model can be fine-tuned to adapt to specific tasks or user preferences.
|
||||
Improved Efficiency: By leveraging pre-existing knowledge bases and retrieval algorithms, RagFlow can reduce the computational burden on the generative model. This allows the model to focus on generating high-quality output rather than searching for relevant information from scratch, resulting in improved efficiency and faster response times.
|
||||
Applications and Future Directions
|
||||
|
||||
RagFlow has the potential to transform a wide range of NLP applications, including but not limited to:
|
||||
|
||||
Question Answering Systems: By retrieving relevant passages and generating precise answers, RagFlow can enhance the accuracy and comprehensiveness of question answering systems.
|
||||
Document Summarization: By identifying key information and generating concise summaries, RagFlow can help users quickly grasp the main points of lengthy documents.
|
||||
Creative Writing and Storytelling: By incorporating retrieved elements into the generation process, RagFlow can inspire and augment creative writing, enabling machines to produce more engaging and original stories.
|
||||
As the field of NLP continues to evolve, RagFlow represents a promising direction for leveraging the power of both retrieval and generation. With further research and development, we can expect to see even more sophisticated and versatile RagFlow-based systems that push the boundaries of what machines can achieve with human language.
|
||||
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
|
Before Width: | Height: | Size: 87 KiB |
@ -1,107 +0,0 @@
|
||||
{
|
||||
"单车": [
|
||||
"自行车"
|
||||
],
|
||||
"青禾服装": [
|
||||
"青禾服饰"
|
||||
],
|
||||
"救济灾民": [
|
||||
"救助",
|
||||
"灾民救济",
|
||||
"赈济"
|
||||
],
|
||||
"左移": [],
|
||||
"低速": [],
|
||||
"雨果网": [],
|
||||
"钢小二": [
|
||||
"成立于2013年,位于江苏省无锡市,是一家以从事研究和试验发展为主的企业"
|
||||
],
|
||||
"第五项": [
|
||||
"5项"
|
||||
],
|
||||
"铸排机": [
|
||||
"机排",
|
||||
"排铸机",
|
||||
"排铸"
|
||||
],
|
||||
"金淳高分子": [],
|
||||
"麦门冬汤": [],
|
||||
"错位": [],
|
||||
"佰特吉姆": [],
|
||||
"楼体": [],
|
||||
"展美科技": [
|
||||
"美展"
|
||||
],
|
||||
"中寮": [],
|
||||
"贪官汙吏": [
|
||||
"...",
|
||||
"贪吏",
|
||||
"贪官污吏"
|
||||
],
|
||||
"掩蔽部": [
|
||||
"掩 蔽 部"
|
||||
],
|
||||
"海宏智能": [],
|
||||
"中寰": [],
|
||||
"万次": [],
|
||||
"领星资本": [
|
||||
"星领"
|
||||
],
|
||||
"肯讯": [],
|
||||
"坎肩": [],
|
||||
"爱农人": [],
|
||||
"易美餐": [],
|
||||
"寸丝半粟": [],
|
||||
"罗丹萍": [],
|
||||
"转导物": [],
|
||||
"泊寓": [],
|
||||
"万欧": [
|
||||
"欧万"
|
||||
],
|
||||
"友聚惠": [
|
||||
"友惠",
|
||||
"惠友"
|
||||
],
|
||||
"舞牙弄爪": [
|
||||
":形容凶猛的样子,比喻威胁、恐吓",
|
||||
"原形容猛兽的凶相,后常用来比喻猖狂凶恶的样子",
|
||||
"成语解释:原形容猛兽的凶相,后常用来比喻猖狂凶恶的样子",
|
||||
"原形容猛兽的凶相,后常用来比喻猖狂(好工具hao86.com",
|
||||
"牙舞爪",
|
||||
"形容猛兽凶恶可怕。也比喻猖狂凶恶",
|
||||
"舞爪"
|
||||
],
|
||||
"上海致上": [
|
||||
"上海上",
|
||||
"上海市"
|
||||
],
|
||||
"迪因加": [],
|
||||
"李正茂": [],
|
||||
"君来投": [],
|
||||
"双掌空": [
|
||||
"双掌 空",
|
||||
"空掌",
|
||||
"两手空空"
|
||||
],
|
||||
"浩石": [
|
||||
"石浩",
|
||||
"皓石"
|
||||
],
|
||||
"云阅文学": [],
|
||||
"阿斯帕": [],
|
||||
"中导": [],
|
||||
"以诚相待": [],
|
||||
"中融金服": [],
|
||||
"尚股网": [],
|
||||
"叶立钦": [
|
||||
"叶利钦"
|
||||
],
|
||||
"新信钱包": [
|
||||
"信信"
|
||||
],
|
||||
"赛苏投资": [
|
||||
"投资者"
|
||||
],
|
||||
"售价": [],
|
||||
"帮医网": []
|
||||
}
|
||||
@ -1,21 +0,0 @@
|
||||
Quod equidem non reprehendo;
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quibus natura iure responderit non esse verum aliunde finem beate vivendi, a se principia rei gerendae peti; Quae enim adhuc protulisti, popularia sunt, ego autem a te elegantiora desidero. Duo Reges: constructio interrete. Tum Lucius: Mihi vero ista valde probata sunt, quod item fratri puto. Bestiarum vero nullum iudicium puto. Nihil enim iam habes, quod ad corpus referas; Deinde prima illa, quae in congressu solemus: Quid tu, inquit, huc? Et homini, qui ceteris animantibus plurimum praestat, praecipue a natura nihil datum esse dicemus?
|
||||
|
||||
Iam id ipsum absurdum, maximum malum neglegi. Quod ea non occurrentia fingunt, vincunt Aristonem; Atqui perspicuum est hominem e corpore animoque constare, cum primae sint animi partes, secundae corporis. Fieri, inquam, Triari, nullo pacto potest, ut non dicas, quid non probes eius, a quo dissentias. Equidem e Cn. An dubium est, quin virtus ita maximam partem optineat in rebus humanis, ut reliquas obruat?
|
||||
|
||||
Quis istum dolorem timet?
|
||||
Summus dolor plures dies manere non potest? Dicet pro me ipsa virtus nec dubitabit isti vestro beato M. Tubulum fuisse, qua illum, cuius is condemnatus est rogatione, P. Quod si ita sit, cur opera philosophiae sit danda nescio.
|
||||
|
||||
Ex eorum enim scriptis et institutis cum omnis doctrina liberalis, omnis historia.
|
||||
Quod si ita est, sequitur id ipsum, quod te velle video, omnes semper beatos esse sapientes. Cum enim fertur quasi torrens oratio, quamvis multa cuiusque modi rapiat, nihil tamen teneas, nihil apprehendas, nusquam orationem rapidam coerceas. Ita redarguitur ipse a sese, convincunturque scripta eius probitate ipsius ac moribus. At quanta conantur! Mundum hunc omnem oppidum esse nostrum! Incendi igitur eos, qui audiunt, vides. Vide, ne magis, inquam, tuum fuerit, cum re idem tibi, quod mihi, videretur, non nova te rebus nomina inponere. Qui-vere falsone, quaerere mittimus-dicitur oculis se privasse; Si ista mala sunt, in quae potest incidere sapiens, sapientem esse non esse ad beate vivendum satis. At vero si ad vitem sensus accesserit, ut appetitum quendam habeat et per se ipsa moveatur, quid facturam putas?
|
||||
|
||||
Quem si tenueris, non modo meum Ciceronem, sed etiam me ipsum abducas licebit.
|
||||
Stulti autem malorum memoria torquentur, sapientes bona praeterita grata recordatione renovata delectant.
|
||||
Esse enim quam vellet iniquus iustus poterat inpune.
|
||||
Quae autem natura suae primae institutionis oblita est?
|
||||
Verum tamen cum de rebus grandioribus dicas, ipsae res verba rapiunt;
|
||||
Hoc est non modo cor non habere, sed ne palatum quidem.
|
||||
Voluptatem cum summum bonum diceret, primum in eo ipso parum vidit, deinde hoc quoque alienum; Sed tu istuc dixti bene Latine, parum plane. Nam haec ipsa mihi erunt in promptu, quae modo audivi, nec ante aggrediar, quam te ab istis, quos dicis, instructum videro. Fatebuntur Stoici haec omnia dicta esse praeclare, neque eam causam Zenoni desciscendi fuisse. Non autem hoc: igitur ne illud quidem. Ratio quidem vestra sic cogit. Cum audissem Antiochum, Brute, ut solebam, cum M. An quod ita callida est, ut optime possit architectari voluptates?
|
||||
|
||||
Idemne, quod iucunde?
|
||||
Haec mihi videtur delicatior, ut ita dicam, molliorque ratio, quam virtutis vis gravitasque postulat. Sed quoniam et advesperascit et mihi ad villam revertendum est, nunc quidem hactenus; Cuius ad naturam apta ratio vera illa et summa lex a philosophis dicitur. Neque solum ea communia, verum etiam paria esse dixerunt. Sed nunc, quod agimus; A mene tu?
|
||||
Binary file not shown.
Binary file not shown.
@ -1,21 +0,0 @@
|
||||
Quod equidem non reprehendo;
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quibus natura iure responderit non esse verum aliunde finem beate vivendi, a se principia rei gerendae peti; Quae enim adhuc protulisti, popularia sunt, ego autem a te elegantiora desidero. Duo Reges: constructio interrete. Tum Lucius: Mihi vero ista valde probata sunt, quod item fratri puto. Bestiarum vero nullum iudicium puto. Nihil enim iam habes, quod ad corpus referas; Deinde prima illa, quae in congressu solemus: Quid tu, inquit, huc? Et homini, qui ceteris animantibus plurimum praestat, praecipue a natura nihil datum esse dicemus?
|
||||
|
||||
Iam id ipsum absurdum, maximum malum neglegi. Quod ea non occurrentia fingunt, vincunt Aristonem; Atqui perspicuum est hominem e corpore animoque constare, cum primae sint animi partes, secundae corporis. Fieri, inquam, Triari, nullo pacto potest, ut non dicas, quid non probes eius, a quo dissentias. Equidem e Cn. An dubium est, quin virtus ita maximam partem optineat in rebus humanis, ut reliquas obruat?
|
||||
|
||||
Quis istum dolorem timet?
|
||||
Summus dolor plures dies manere non potest? Dicet pro me ipsa virtus nec dubitabit isti vestro beato M. Tubulum fuisse, qua illum, cuius is condemnatus est rogatione, P. Quod si ita sit, cur opera philosophiae sit danda nescio.
|
||||
|
||||
Ex eorum enim scriptis et institutis cum omnis doctrina liberalis, omnis historia.
|
||||
Quod si ita est, sequitur id ipsum, quod te velle video, omnes semper beatos esse sapientes. Cum enim fertur quasi torrens oratio, quamvis multa cuiusque modi rapiat, nihil tamen teneas, nihil apprehendas, nusquam orationem rapidam coerceas. Ita redarguitur ipse a sese, convincunturque scripta eius probitate ipsius ac moribus. At quanta conantur! Mundum hunc omnem oppidum esse nostrum! Incendi igitur eos, qui audiunt, vides. Vide, ne magis, inquam, tuum fuerit, cum re idem tibi, quod mihi, videretur, non nova te rebus nomina inponere. Qui-vere falsone, quaerere mittimus-dicitur oculis se privasse; Si ista mala sunt, in quae potest incidere sapiens, sapientem esse non esse ad beate vivendum satis. At vero si ad vitem sensus accesserit, ut appetitum quendam habeat et per se ipsa moveatur, quid facturam putas?
|
||||
|
||||
Quem si tenueris, non modo meum Ciceronem, sed etiam me ipsum abducas licebit.
|
||||
Stulti autem malorum memoria torquentur, sapientes bona praeterita grata recordatione renovata delectant.
|
||||
Esse enim quam vellet iniquus iustus poterat inpune.
|
||||
Quae autem natura suae primae institutionis oblita est?
|
||||
Verum tamen cum de rebus grandioribus dicas, ipsae res verba rapiunt;
|
||||
Hoc est non modo cor non habere, sed ne palatum quidem.
|
||||
Voluptatem cum summum bonum diceret, primum in eo ipso parum vidit, deinde hoc quoque alienum; Sed tu istuc dixti bene Latine, parum plane. Nam haec ipsa mihi erunt in promptu, quae modo audivi, nec ante aggrediar, quam te ab istis, quos dicis, instructum videro. Fatebuntur Stoici haec omnia dicta esse praeclare, neque eam causam Zenoni desciscendi fuisse. Non autem hoc: igitur ne illud quidem. Ratio quidem vestra sic cogit. Cum audissem Antiochum, Brute, ut solebam, cum M. An quod ita callida est, ut optime possit architectari voluptates?
|
||||
|
||||
Idemne, quod iucunde?
|
||||
Haec mihi videtur delicatior, ut ita dicam, molliorque ratio, quam virtutis vis gravitasque postulat. Sed quoniam et advesperascit et mihi ad villam revertendum est, nunc quidem hactenus; Cuius ad naturam apta ratio vera illa et summa lex a philosophis dicitur. Neque solum ea communia, verum etiam paria esse dixerunt. Sed nunc, quod agimus; A mene tu?
|
||||
Binary file not shown.
Reference in New Issue
Block a user