fix: support dense_vector from ES fields response (ES 9.x compatibility) (#13972)

fix: support dense_vector from ES fields response (ES 9.x compatibility)

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] Configuration Chore (non-breaking change which updates
configuration)


## Summary by CodeRabbit

* **Bug Fixes**
* More accurate handling and unwrapping of dense-vector fields so
returned values have correct shapes.
* Field selection reliably limits returned data and falls back to
alternate result locations when needed.
* Use of consistent result IDs and tolerant handling when score values
are missing.

* **Chores / Configuration**
* Increased build memory and adjusted build-time flags for the frontend
build.
* Simplified runtime model/GPU checks and removed an automated runtime
GPU-install attempt.

* **Build Fixes**
* `web/vite.config.ts`: make `build.minify` and `build.sourcemap`
respect `VITE_MINIFY` and `VITE_BUILD_SOURCEMAP` env vars from
Dockerfile instead of hardcoding `terser` and `true`.

* **Environment**
* Allow stack version override and default the runtime image tag to
"latest".

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

* **Bug Fixes**
* Correct unwrapping of dense-vector fields and reliable field selection
with fallback locations.
* Consistent use of hit-level IDs and tolerant handling when score
values are missing.

* **Chores / Configuration**
* Increased frontend build memory and added build-time minify/sourcemap
flags; build minification and sourcemap now configurable.
* Removed runtime GPU detection for model initialization; force CPU
initialization.

* **Environment**
* Allow stack version override and default runtime image tag to
"latest".

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Zhichang Yu
2026-04-09 17:44:13 +08:00
committed by GitHub
parent 107fe6cf90
commit b7744e053e
50 changed files with 142 additions and 124 deletions

View File

@ -35,26 +35,14 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
apt update && \
apt --no-install-recommends install -y ca-certificates; \
if [ "$NEED_MIRROR" == "1" ]; then \
sed -i 's|http://archive.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
sed -i 's|http://security.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
sed -i 's|http://archive.ubuntu.com/ubuntu|https://mirrors.aliyun.com/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
sed -i 's|http://security.ubuntu.com/ubuntu|https://mirrors.aliyun.com/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
fi; \
rm -f /etc/apt/apt.conf.d/docker-clean && \
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
chmod 1777 /tmp && \
apt update && \
apt install -y build-essential && \
apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
apt install -y pkg-config libicu-dev libgdiplus && \
apt install -y default-jdk && \
apt install -y libatk-bridge2.0-0 && \
apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
apt install -y libjemalloc-dev && \
apt install -y gnupg unzip curl wget git vim less && \
apt install -y ghostscript && \
apt install -y pandoc && \
apt install -y texlive && \
apt install -y fonts-freefont-ttf fonts-noto-cjk && \
apt install -y postgresql-client
apt install -y build-essential libglib2.0-0 libglx-mesa0 libgl1 pkg-config libicu-dev libgdiplus default-jdk libatk-bridge2.0-0 libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev libjemalloc-dev gnupg unzip curl wget git vim less ghostscript pandoc texlive fonts-freefont-ttf fonts-noto-cjk postgresql-client
# Download resource from GitHub to /usr/share/infinity
RUN mkdir -p /usr/share/infinity/resource && \
@ -165,8 +153,8 @@ RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
COPY web web
COPY docs docs
RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
export NODE_OPTIONS="--max-old-space-size=4096" && \
cd web && npm install && npm run build
cd web && NODE_OPTIONS="--max-old-space-size=8192" npm install && \
NODE_OPTIONS="--max-old-space-size=8192" VITE_BUILD_SOURCEMAP=false VITE_MINIFY=esbuild npm run build
COPY .git /ragflow/.git

View File

@ -38,7 +38,6 @@ from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from common.file_utils import get_project_base_directory
from common.misc_utils import pip_install_torch
from deepdoc.vision import OCR, AscendLayoutRecognizer, LayoutRecognizer, Recognizer, TableStructureRecognizer
from rag.nlp import rag_tokenizer
from rag.prompts.generator import vision_llm_describe_prompt
@ -91,14 +90,9 @@ class RAGFlowPdfParser:
self.tbl_det = TableStructureRecognizer()
self.updown_cnt_mdl = xgb.Booster()
try:
pip_install_torch()
import torch.cuda
if torch.cuda.is_available():
self.updown_cnt_mdl.set_param({"device": "cuda"})
except Exception:
logging.info("No torch found.")
# xgboost model is very small; using CPU explicitly
self.updown_cnt_mdl.set_param({"device": "cpu"})
logging.info("updown_cnt_mdl initialized on CPU")
try:
model_dir = os.path.join(get_project_base_directory(), "rag/res/deepdoc")
self.updown_cnt_mdl.load_model(os.path.join(model_dir, "updown_concat_xgb.model"))

View File

@ -28,7 +28,7 @@ DEVICE=${DEVICE:-cpu}
COMPOSE_PROFILES=${DOC_ENGINE},${DEVICE}
# The version of Elasticsearch.
STACK_VERSION=8.11.3
STACK_VERSION=${STACK_VERSION:-8.11.3}
# The hostname where the Elasticsearch service is exposed
ES_HOST=es01
@ -159,7 +159,7 @@ GO_ADMIN_PORT=9383
API_PROXY_SCHEME=python # use pure python server deployment
# The RAGFlow Docker image to download. v0.22+ doesn't include embedding models.
RAGFLOW_IMAGE=infiniflow/ragflow:v0.24.0
RAGFLOW_IMAGE=infiniflow/ragflow:latest
# If you cannot download the RAGFlow Docker image:
# RAGFLOW_IMAGE=swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow:v0.24.0

View File

@ -762,7 +762,7 @@ class Parser(ProcessBase):
sections = [line.strip() for line in content.splitlines() if line and line.strip()]
if conf.get("remove_toc"):
sections = remove_toc_word(sections, outlines)
sections = remove_toc_word(sections, [])
if conf.get("output_format") == "json":
self.set_output(

View File

@ -91,7 +91,7 @@ class KGSearch(Dealer):
es_res = self.dataStore.get_fields(es_res, ["content_with_weight", "_score", "from_entity_kwd", "to_entity_kwd",
"weight_int"])
for _, ent in es_res.items():
if get_float(ent["_score"]) < sim_thr:
if get_float(ent.get("_score", 0)) < sim_thr:
continue
f, t = sorted([ent["from_entity_kwd"], ent["to_entity_kwd"]])
if isinstance(f, list):
@ -99,7 +99,7 @@ class KGSearch(Dealer):
if isinstance(t, list):
t = t[0]
res[(f, t)] = {
"sim": get_float(ent["_score"]),
"sim": get_float(ent.get("_score", 0)),
"pagerank": get_float(ent.get("weight_int", 0)),
"description": ent["content_with_weight"]
}

View File

@ -278,7 +278,7 @@ class _BlobLikeBase(SyncBase):
)
)
begin_info = (
_begin_info = (
"totally"
if task["reindex"] == "1" or not task["poll_range_start"]
else "from {}".format(task["poll_range_start"])
@ -289,7 +289,7 @@ class _BlobLikeBase(SyncBase):
bucket_type,
self.conf["bucket_name"],
self.conf.get("prefix", ""),
begin_info,
_begin_info,
)
)
return document_batch_generator
@ -377,10 +377,10 @@ class Confluence(SyncBase):
# Determine the time range for synchronization based on reindex or poll_range_start
if task["reindex"] == "1" or not task["poll_range_start"]:
start_time = 0.0
begin_info = "totally"
_begin_info = "totally"
else:
start_time = task["poll_range_start"].timestamp()
begin_info = f"from {task['poll_range_start']}"
_begin_info = f"from {task['poll_range_start']}"
end_time = datetime.now(timezone.utc).timestamp()
@ -442,7 +442,7 @@ class Notion(SyncBase):
datetime.now(timezone.utc).timestamp())
)
begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(
_begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(
task["poll_range_start"])
self.log_connection("Notion", f"root({self.conf['root_page_id']})", task)
return document_generator
@ -470,7 +470,7 @@ class Discord(SyncBase):
datetime.now(timezone.utc).timestamp())
)
begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(
_begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(
task["poll_range_start"])
self.log_connection("Discord", f"servers({server_ids}), channel({channel_names})", task)
return document_generator
@ -516,7 +516,7 @@ class Gmail(SyncBase):
if task["reindex"] == "1" or not task.get("poll_range_start"):
start_time = None
end_time = None
begin_info = "totally"
_begin_info = "totally"
document_generator = self.connector.load_from_state()
else:
poll_start = task["poll_range_start"]
@ -524,12 +524,12 @@ class Gmail(SyncBase):
if poll_start is None:
start_time = None
end_time = None
begin_info = "totally"
_begin_info = "totally"
document_generator = self.connector.load_from_state()
else:
start_time = poll_start.timestamp()
end_time = datetime.now(timezone.utc).timestamp()
begin_info = f"from {poll_start}"
_begin_info = f"from {poll_start}"
document_generator = self.connector.poll_source(start_time, end_time)
try:
@ -549,13 +549,13 @@ class Dropbox(SyncBase):
if task["reindex"] == "1" or not task["poll_range_start"]:
document_generator = self.connector.load_from_state()
begin_info = "totally"
_begin_info = "totally"
else:
poll_start = task["poll_range_start"]
document_generator = self.connector.poll_source(
poll_start.timestamp(), datetime.now(timezone.utc).timestamp()
)
begin_info = f"from {poll_start}"
_begin_info = f"from {poll_start}"
self.log_connection("Dropbox", "workspace", task)
return document_generator
@ -588,10 +588,10 @@ class GoogleDrive(SyncBase):
if task["reindex"] == "1" or not task["poll_range_start"]:
start_time = 0.0
begin_info = "totally"
_begin_info = "totally"
else:
start_time = task["poll_range_start"].timestamp()
begin_info = f"from {task['poll_range_start']}"
_begin_info = f"from {task['poll_range_start']}"
end_time = datetime.now(timezone.utc).timestamp()
raw_batch_size = self.conf.get("sync_batch_size") or self.conf.get("batch_size") or INDEX_BATCH_SIZE
@ -682,10 +682,10 @@ class Jira(SyncBase):
if task["reindex"] == "1" or not task["poll_range_start"]:
start_time = 0.0
begin_info = "totally"
_begin_info = "totally"
else:
start_time = task["poll_range_start"].timestamp()
begin_info = f"from {task['poll_range_start']}"
_begin_info = f"from {task['poll_range_start']}"
end_time = datetime.now(timezone.utc).timestamp()
@ -788,12 +788,12 @@ class WebDAV(SyncBase):
if task["reindex"] == "1" or not task["poll_range_start"]:
document_batch_generator = self.connector.load_from_state()
begin_info = "totally"
_begin_info = "totally"
else:
start_ts = task["poll_range_start"].timestamp()
end_ts = datetime.now(timezone.utc).timestamp()
document_batch_generator = self.connector.poll_source(start_ts, end_ts)
begin_info = "from {}".format(task["poll_range_start"])
_begin_info = "from {}".format(task["poll_range_start"])
self.log_connection("WebDAV", f"{self.conf['base_url']}(path: {self.conf.get('remote_path', '/')})", task)
@ -820,13 +820,13 @@ class Moodle(SyncBase):
if task["reindex"] == "1" or poll_start is None:
document_generator = self.connector.load_from_state()
begin_info = "totally"
_begin_info = "totally"
else:
document_generator = self.connector.poll_source(
poll_start.timestamp(),
datetime.now(timezone.utc).timestamp(),
)
begin_info = f"from {poll_start}"
_begin_info = f"from {poll_start}"
self.log_connection("Moodle", self.conf["moodle_url"], task)
return document_generator
@ -860,13 +860,13 @@ class BOX(SyncBase):
if task["reindex"] == "1" or poll_start is None:
document_generator = self.connector.load_from_state()
begin_info = "totally"
_begin_info = "totally"
else:
document_generator = self.connector.poll_source(
poll_start.timestamp(),
datetime.now(timezone.utc).timestamp(),
)
begin_info = f"from {poll_start}"
_begin_info = f"from {poll_start}"
self.log_connection("Box", f"folder_id({self.conf['folder_id']})", task)
return document_generator
@ -896,13 +896,13 @@ class Airtable(SyncBase):
if task.get("reindex") == "1" or poll_start is None:
document_generator = self.connector.load_from_state()
begin_info = "totally"
_begin_info = "totally"
else:
document_generator = self.connector.poll_source(
poll_start.timestamp(),
datetime.now(timezone.utc).timestamp(),
)
begin_info = f"from {poll_start}"
_begin_info = f"from {poll_start}"
self.log_connection(
"Airtable",
@ -931,18 +931,18 @@ class Asana(SyncBase):
if task.get("reindex") == "1" or not task.get("poll_range_start"):
document_generator = self.connector.load_from_state()
begin_info = "totally"
_begin_info = "totally"
else:
poll_start = task.get("poll_range_start")
if poll_start is None:
document_generator = self.connector.load_from_state()
begin_info = "totally"
_begin_info = "totally"
else:
document_generator = self.connector.poll_source(
poll_start.timestamp(),
datetime.now(timezone.utc).timestamp(),
)
begin_info = f"from {poll_start}"
_begin_info = f"from {poll_start}"
self.log_connection(
"Asana",
@ -979,10 +979,10 @@ class Github(SyncBase):
file_list = None
if task.get("reindex") == "1" or not task.get("poll_range_start"):
start_time = datetime.fromtimestamp(0, tz=timezone.utc)
begin_info = "totally"
_begin_info = "totally"
else:
start_time = task.get("poll_range_start")
begin_info = f"from {start_time}"
_begin_info = f"from {start_time}"
if self.conf.get("sync_deleted_files"):
file_list = []
for slim_batch in self.connector.retrieve_all_slim_docs_perm_sync():
@ -1041,10 +1041,10 @@ class IMAP(SyncBase):
end_time = datetime.now(timezone.utc).timestamp()
if task["reindex"] == "1" or not task["poll_range_start"]:
start_time = end_time - self.conf.get("poll_range",30) * 24 * 60 * 60
begin_info = "totally"
_begin_info = "totally"
else:
start_time = task["poll_range_start"].timestamp()
begin_info = f"from {task['poll_range_start']}"
_begin_info = f"from {task['poll_range_start']}"
raw_batch_size = self.conf.get("sync_batch_size") or self.conf.get("batch_size") or INDEX_BATCH_SIZE
try:
batch_size = int(raw_batch_size)
@ -1101,10 +1101,10 @@ class Zendesk(SyncBase):
end_time = datetime.now(timezone.utc).timestamp()
if task["reindex"] == "1" or not task.get("poll_range_start"):
start_time = 0
begin_info = "totally"
_begin_info = "totally"
else:
start_time = task["poll_range_start"].timestamp()
begin_info = f"from {task['poll_range_start']}"
_begin_info = f"from {task['poll_range_start']}"
raw_batch_size = (
self.conf.get("sync_batch_size")
@ -1193,18 +1193,18 @@ class Gitlab(SyncBase):
if task["reindex"] == "1" or not task["poll_range_start"]:
document_generator = self.connector.load_from_state()
begin_info = "totally"
_begin_info = "totally"
else:
poll_start = task["poll_range_start"]
if poll_start is None:
document_generator = self.connector.load_from_state()
begin_info = "totally"
_begin_info = "totally"
else:
document_generator = self.connector.poll_source(
poll_start.timestamp(),
datetime.now(timezone.utc).timestamp()
)
begin_info = "from {}".format(poll_start)
_begin_info = "from {}".format(poll_start)
self.log_connection("Gitlab", f"({self.conf['project_name']})", task)
return document_generator
@ -1228,10 +1228,10 @@ class Bitbucket(SyncBase):
if task["reindex"] == "1" or not task["poll_range_start"]:
start_time = datetime.fromtimestamp(0, tz=timezone.utc)
begin_info = "totally"
_begin_info = "totally"
else:
start_time = task.get("poll_range_start")
begin_info = f"from {start_time}"
_begin_info = f"from {start_time}"
end_time = datetime.now(timezone.utc)
@ -1284,13 +1284,13 @@ class SeaFile(SyncBase):
poll_start = task.get("poll_range_start")
if task["reindex"] == "1" or poll_start is None:
document_generator = self.connector.load_from_state()
begin_info = "totally"
_begin_info = "totally"
else:
document_generator = self.connector.poll_source(
poll_start.timestamp(),
datetime.now(timezone.utc).timestamp(),
)
begin_info = f"from {poll_start}"
_begin_info = f"from {poll_start}"
scope = conf.get("sync_scope", "account")
extra = ""
@ -1328,13 +1328,13 @@ class DingTalkAITable(SyncBase):
if task.get("reindex") == "1" or poll_start is None:
document_generator = self.connector.load_from_state()
begin_info = "totally"
_begin_info = "totally"
else:
document_generator = self.connector.poll_source(
poll_start.timestamp(),
datetime.now(timezone.utc).timestamp(),
)
begin_info = f"from {poll_start}"
_begin_info = f"from {poll_start}"
self.log_connection(
"DingTalk AI Table",
@ -1371,14 +1371,14 @@ class MySQL(SyncBase):
if task["reindex"] == "1" or not task["poll_range_start"]:
document_generator = self.connector.load_from_state()
begin_info = "totally"
_begin_info = "totally"
else:
poll_start = task["poll_range_start"]
document_generator = self.connector.poll_source(
poll_start.timestamp(),
datetime.now(timezone.utc).timestamp()
)
begin_info = f"from {poll_start}"
_begin_info = f"from {poll_start}"
self.log_connection("MySQL", f"{self.conf.get('host')}:{self.conf.get('database')}", task)
return document_generator
@ -1410,14 +1410,14 @@ class PostgreSQL(SyncBase):
if task["reindex"] == "1" or not task["poll_range_start"]:
document_generator = self.connector.load_from_state()
begin_info = "totally"
_begin_info = "totally"
else:
poll_start = task["poll_range_start"]
document_generator = self.connector.poll_source(
poll_start.timestamp(),
datetime.now(timezone.utc).timestamp()
)
begin_info = f"from {poll_start}"
_begin_info = f"from {poll_start}"
self.log_connection("PostgreSQL", f"{self.conf.get('host')}:{self.conf.get('database')}", task)
return document_generator

View File

@ -253,7 +253,18 @@ class ESConnection(ESConnectionBase):
if limit > 0 and not use_search_after:
s = s[offset:offset + limit]
# Filter _source to only requested fields for efficiency, and add vector
# fields to "fields" param so they appear in hit.fields when ES 9.x
# exclude_source_vectors is enabled (dense_vector not in _source).
if select_fields:
s = s.source(select_fields)
q = s.to_dict()
# ES 9.x: dense_vector fields excluded from _source; request them via fields.
# Note: knn does NOT have a "fields" parameter - adding it inside the knn
# object causes BadRequestError on ES 9.x. We add "fields" at top level.
vector_fields = [f for f in (select_fields or []) if f.endswith("_vec")]
if vector_fields:
q["fields"] = vector_fields
self.logger.debug(f"ESConnection.search {str(index_names)} query: " + json.dumps(q))
for i in range(ATTEMPT_TIME):
@ -565,8 +576,24 @@ class ESConnection(ESConnectionBase):
res_fields = {}
if not fields:
return {}
for d in self._get_source(res):
m = {n: d.get(n) for n in fields if d.get(n) is not None}
hits = res.get("hits", {}).get("hits", [])
for hit in hits:
doc_id = hit.get("_id")
d = hit.get("_source", {})
# Also extract fields from ES "fields" response (used by dense_vector in ES 9.x)
hit_fields = hit.get("fields", {})
m = {}
for n in fields:
# First check _source
if d.get(n) is not None:
m[n] = d.get(n)
# Then check fields (ES 9.x stores dense_vector here, not in _source)
elif n in hit_fields:
vals = hit_fields[n]
# ES fields response wraps dense_vector in 2 levels: [[v1,v2,...]] -> [v1,v2,...]
if isinstance(vals, list) and len(vals) == 1:
vals = vals[0]
m[n] = vals
for n, v in m.items():
if isinstance(v, list):
m[n] = v
@ -580,5 +607,5 @@ class ESConnection(ESConnectionBase):
# m[n] = remove_redundant_spaces(m[n])
if m:
res_fields[d["id"]] = m
res_fields[doc_id] = m
return res_fields

View File

@ -18,7 +18,7 @@ from time import sleep
from ragflow_sdk import RAGFlow
from configs import HOST_ADDRESS, VERSION
import pytest
from common import (
from test_common import (
batch_add_chunks,
batch_create_datasets,
bulk_upload_documents,

View File

@ -14,7 +14,7 @@
# limitations under the License.
#
import pytest
from common import api_new_token, api_rm_token, api_stats, api_token_list, batch_create_chats
from test_common import api_new_token, api_rm_token, api_stats, api_token_list, batch_create_chats
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -18,7 +18,7 @@
from time import sleep
import pytest
from common import batch_add_chunks, delete_chunks, list_chunks, list_documents, parse_documents
from test_common import batch_add_chunks, delete_chunks, list_chunks, list_documents, parse_documents
from utils import wait_for

View File

@ -16,7 +16,7 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import add_chunk, delete_document, get_chunk, list_chunks
from test_common import add_chunk, delete_document, get_chunk, list_chunks
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -17,7 +17,7 @@ import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import batch_add_chunks, list_chunks, update_chunk
from test_common import batch_add_chunks, list_chunks, update_chunk
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -17,7 +17,7 @@ import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import retrieval_chunks
from test_common import retrieval_chunks
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -16,7 +16,7 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import batch_add_chunks, delete_chunks, list_chunks
from test_common import batch_add_chunks, delete_chunks, list_chunks
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -20,7 +20,7 @@ from random import randint
from time import sleep
import pytest
from common import delete_document, list_chunks, update_chunk
from test_common import delete_document, list_chunks, update_chunk
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -13,6 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from rag.flow.chunker.token_chunker import TokenChunker, TokenChunkerParam
__all__ = ["TokenChunker", "TokenChunkerParam"]
__all__ = []

View File

@ -21,7 +21,7 @@ from pathlib import Path
from types import ModuleType, SimpleNamespace
import pytest
from common import bulk_upload_documents, delete_document, list_documents
from test_common import bulk_upload_documents, delete_document, list_documents
class _DummyManager:

View File

@ -19,7 +19,7 @@ from types import SimpleNamespace
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from test_web_api.common import create_document, list_datasets
from test_common import create_document, list_datasets
from configs import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth
from utils.file_utils import create_txt_file

View File

@ -17,7 +17,7 @@ import asyncio
from types import SimpleNamespace
import pytest
from common import (
from test_common import (
document_change_status,
document_filter,
document_infos,

View File

@ -18,7 +18,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
from types import SimpleNamespace
import pytest
from common import list_documents
from test_common import list_documents
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth
from utils import is_sorted

View File

@ -18,7 +18,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
from types import SimpleNamespace
import pytest
from common import bulk_upload_documents, list_documents, parse_documents
from test_common import bulk_upload_documents, list_documents, parse_documents
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth
from utils import wait_for

View File

@ -17,7 +17,7 @@ import asyncio
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import bulk_upload_documents, delete_document, list_documents
from test_common import bulk_upload_documents, delete_document, list_documents
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -20,7 +20,7 @@ from types import ModuleType, SimpleNamespace
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import list_datasets, upload_documents
from test_common import list_datasets, upload_documents
from configs import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth
from utils.file_utils import create_txt_file

View File

@ -14,7 +14,7 @@
# limitations under the License.
#
import pytest
from common import batch_create_datasets, list_datasets, delete_datasets
from test_common import batch_create_datasets, list_datasets, delete_datasets
from libs.auth import RAGFlowWebApiAuth
from pytest import FixtureRequest
from ragflow_sdk import RAGFlow

View File

@ -16,7 +16,7 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import create_dataset
from test_common import create_dataset
from configs import DATASET_NAME_LIMIT, INVALID_API_TOKEN
from hypothesis import example, given, settings
from libs.auth import RAGFlowWebApiAuth

View File

@ -14,7 +14,7 @@
# limitations under the License.
#
import pytest
from common import (
from test_common import (
detail_kb,
)
from configs import INVALID_API_TOKEN

View File

@ -14,7 +14,7 @@
# limitations under the License.
#
import pytest
from test_web_api.common import (
from test_common import (
kb_delete_pipeline_logs,
kb_list_pipeline_dataset_logs,
kb_list_pipeline_logs,

View File

@ -16,7 +16,7 @@
import uuid
import pytest
from test_web_api.common import (
from test_common import (
delete_knowledge_graph,
kb_basic_info,
kb_get_meta,

View File

@ -17,7 +17,7 @@ import json
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from common import list_datasets
from test_common import list_datasets
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth
from utils import is_sorted

View File

@ -15,7 +15,7 @@
#
import pytest
from common import (
from test_common import (
list_datasets,
delete_datasets,
)

View File

@ -17,7 +17,7 @@ import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from test_web_api.common import update_dataset
from test_common import update_dataset
from configs import DATASET_NAME_LIMIT, INVALID_API_TOKEN
from hypothesis import HealthCheck, example, given, settings
from libs.auth import RAGFlowWebApiAuth

View File

@ -14,7 +14,7 @@
# limitations under the License.
#
import pytest
from common import llm_factories, llm_list
from test_common import llm_factories, llm_list
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -15,7 +15,7 @@
#
import pytest
import random
from test_web_api.common import create_memory, list_memory, delete_memory
from test_common import create_memory, list_memory, delete_memory
@pytest.fixture(scope="function")
def add_memory_func(request, WebApiAuth):

View File

@ -17,7 +17,7 @@ import random
import re
import pytest
from test_web_api.common import create_memory
from test_common import create_memory
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -16,7 +16,7 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from test_web_api.common import list_memory, get_memory_config
from test_common import list_memory, get_memory_config
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -14,7 +14,7 @@
# limitations under the License.
#
import pytest
from test_web_api.common import (list_memory, delete_memory)
from test_common import (list_memory, delete_memory)
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -16,7 +16,7 @@
import re
import pytest
from test_web_api.common import update_memory
from test_common import update_memory
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth
from utils import encode_avatar

View File

@ -18,7 +18,7 @@ import uuid
import pytest
import random
from test_web_api.common import create_memory, list_memory, add_message, delete_memory
from test_common import create_memory, list_memory, add_message, delete_memory
@pytest.fixture(scope="class")

View File

@ -17,7 +17,7 @@ import time
import uuid
import pytest
from test_web_api.common import list_memory_message, add_message
from test_common import list_memory_message, add_message
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -16,7 +16,7 @@
import random
import pytest
import requests
from test_web_api.common import forget_message, list_memory_message, get_message_content
from test_common import forget_message, list_memory_message, get_message_content
from configs import HOST_ADDRESS, INVALID_API_TOKEN, VERSION
from libs.auth import RAGFlowWebApiAuth

View File

@ -16,7 +16,7 @@
import random
import pytest
from test_web_api.common import get_message_content, get_recent_message
from test_common import get_message_content, get_recent_message
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -16,7 +16,7 @@
import random
import pytest
from test_web_api.common import get_recent_message
from test_common import get_recent_message
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -17,7 +17,7 @@ import os
import random
import pytest
from test_web_api.common import list_memory_message
from test_common import list_memory_message
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -14,7 +14,7 @@
# limitations under the License.
#
import pytest
from test_web_api.common import search_message, list_memory_message
from test_common import search_message, list_memory_message
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -17,7 +17,7 @@ import random
import pytest
import requests
from test_web_api.common import update_message_status, list_memory_message, get_message_content
from test_common import update_message_status, list_memory_message, get_message_content
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth
from configs import HOST_ADDRESS, VERSION

View File

@ -19,7 +19,7 @@ from pathlib import Path
from types import ModuleType
import pytest
from common import plugin_llm_tools
from test_common import plugin_llm_tools
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -16,7 +16,7 @@
import uuid
import pytest
from common import search_create, search_detail, search_list, search_rm, search_update
from test_common import search_create, search_detail, search_list, search_rm, search_update
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth

View File

@ -14,7 +14,7 @@
# limitations under the License.
#
import pytest
from common import (
from test_common import (
system_config,
system_delete_token,
system_new_token,

View File

@ -26,6 +26,17 @@ const inspectorBabelPlugin = (): import('vite').Plugin => ({
},
});
type MinifyValue = boolean | 'esbuild' | 'terser';
function resolveMinify(value: string | undefined): MinifyValue {
if (value === undefined) return 'terser';
const lower = value.toLowerCase();
if (lower === 'false') return false;
if (lower === 'esbuild') return 'esbuild';
if (lower === 'terser') return 'terser';
return 'terser';
}
// https://vitejs.dev/config/
export default defineConfig(({ mode }) => {
const env = loadEnv(mode, process.cwd(), '');
@ -229,7 +240,7 @@ export default defineConfig(({ mode }) => {
plugins: [],
treeshake: true,
},
minify: 'terser',
minify: resolveMinify(env.VITE_MINIFY),
terserOptions: {
compress: {
drop_console: true, // delete console
@ -246,7 +257,7 @@ export default defineConfig(({ mode }) => {
comments: false, // Delete comments
},
},
sourcemap: true,
sourcemap: env.VITE_BUILD_SOURCEMAP !== 'false',
cssCodeSplit: true,
target: 'es2015',
},