mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-04-19 18:27:33 +08:00
fix: support dense_vector from ES fields response (ES 9.x compatibility) (#13972)
fix: support dense_vector from ES fields response (ES 9.x compatibility) - [x] Bug Fix (non-breaking change which fixes an issue) - [x] Configuration Chore (non-breaking change which updates configuration) ## Summary by CodeRabbit * **Bug Fixes** * More accurate handling and unwrapping of dense-vector fields so returned values have correct shapes. * Field selection reliably limits returned data and falls back to alternate result locations when needed. * Use of consistent result IDs and tolerant handling when score values are missing. * **Chores / Configuration** * Increased build memory and adjusted build-time flags for the frontend build. * Simplified runtime model/GPU checks and removed an automated runtime GPU-install attempt. * **Build Fixes** * `web/vite.config.ts`: make `build.minify` and `build.sourcemap` respect `VITE_MINIFY` and `VITE_BUILD_SOURCEMAP` env vars from Dockerfile instead of hardcoding `terser` and `true`. * **Environment** * Allow stack version override and default the runtime image tag to "latest". <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Correct unwrapping of dense-vector fields and reliable field selection with fallback locations. * Consistent use of hit-level IDs and tolerant handling when score values are missing. * **Chores / Configuration** * Increased frontend build memory and added build-time minify/sourcemap flags; build minification and sourcemap now configurable. * Removed runtime GPU detection for model initialization; force CPU initialization. * **Environment** * Allow stack version override and default runtime image tag to "latest". <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
22
Dockerfile
22
Dockerfile
@ -35,26 +35,14 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
||||
apt update && \
|
||||
apt --no-install-recommends install -y ca-certificates; \
|
||||
if [ "$NEED_MIRROR" == "1" ]; then \
|
||||
sed -i 's|http://archive.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
|
||||
sed -i 's|http://security.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
|
||||
sed -i 's|http://archive.ubuntu.com/ubuntu|https://mirrors.aliyun.com/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
|
||||
sed -i 's|http://security.ubuntu.com/ubuntu|https://mirrors.aliyun.com/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
|
||||
fi; \
|
||||
rm -f /etc/apt/apt.conf.d/docker-clean && \
|
||||
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
|
||||
chmod 1777 /tmp && \
|
||||
apt update && \
|
||||
apt install -y build-essential && \
|
||||
apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
|
||||
apt install -y pkg-config libicu-dev libgdiplus && \
|
||||
apt install -y default-jdk && \
|
||||
apt install -y libatk-bridge2.0-0 && \
|
||||
apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
|
||||
apt install -y libjemalloc-dev && \
|
||||
apt install -y gnupg unzip curl wget git vim less && \
|
||||
apt install -y ghostscript && \
|
||||
apt install -y pandoc && \
|
||||
apt install -y texlive && \
|
||||
apt install -y fonts-freefont-ttf fonts-noto-cjk && \
|
||||
apt install -y postgresql-client
|
||||
apt install -y build-essential libglib2.0-0 libglx-mesa0 libgl1 pkg-config libicu-dev libgdiplus default-jdk libatk-bridge2.0-0 libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev libjemalloc-dev gnupg unzip curl wget git vim less ghostscript pandoc texlive fonts-freefont-ttf fonts-noto-cjk postgresql-client
|
||||
|
||||
# Download resource from GitHub to /usr/share/infinity
|
||||
RUN mkdir -p /usr/share/infinity/resource && \
|
||||
@ -165,8 +153,8 @@ RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
|
||||
COPY web web
|
||||
COPY docs docs
|
||||
RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
|
||||
export NODE_OPTIONS="--max-old-space-size=4096" && \
|
||||
cd web && npm install && npm run build
|
||||
cd web && NODE_OPTIONS="--max-old-space-size=8192" npm install && \
|
||||
NODE_OPTIONS="--max-old-space-size=8192" VITE_BUILD_SOURCEMAP=false VITE_MINIFY=esbuild npm run build
|
||||
|
||||
COPY .git /ragflow/.git
|
||||
|
||||
|
||||
@ -38,7 +38,6 @@ from sklearn.cluster import KMeans
|
||||
from sklearn.metrics import silhouette_score
|
||||
|
||||
from common.file_utils import get_project_base_directory
|
||||
from common.misc_utils import pip_install_torch
|
||||
from deepdoc.vision import OCR, AscendLayoutRecognizer, LayoutRecognizer, Recognizer, TableStructureRecognizer
|
||||
from rag.nlp import rag_tokenizer
|
||||
from rag.prompts.generator import vision_llm_describe_prompt
|
||||
@ -91,14 +90,9 @@ class RAGFlowPdfParser:
|
||||
self.tbl_det = TableStructureRecognizer()
|
||||
|
||||
self.updown_cnt_mdl = xgb.Booster()
|
||||
try:
|
||||
pip_install_torch()
|
||||
import torch.cuda
|
||||
|
||||
if torch.cuda.is_available():
|
||||
self.updown_cnt_mdl.set_param({"device": "cuda"})
|
||||
except Exception:
|
||||
logging.info("No torch found.")
|
||||
# xgboost model is very small; using CPU explicitly
|
||||
self.updown_cnt_mdl.set_param({"device": "cpu"})
|
||||
logging.info("updown_cnt_mdl initialized on CPU")
|
||||
try:
|
||||
model_dir = os.path.join(get_project_base_directory(), "rag/res/deepdoc")
|
||||
self.updown_cnt_mdl.load_model(os.path.join(model_dir, "updown_concat_xgb.model"))
|
||||
|
||||
@ -28,7 +28,7 @@ DEVICE=${DEVICE:-cpu}
|
||||
COMPOSE_PROFILES=${DOC_ENGINE},${DEVICE}
|
||||
|
||||
# The version of Elasticsearch.
|
||||
STACK_VERSION=8.11.3
|
||||
STACK_VERSION=${STACK_VERSION:-8.11.3}
|
||||
|
||||
# The hostname where the Elasticsearch service is exposed
|
||||
ES_HOST=es01
|
||||
@ -159,7 +159,7 @@ GO_ADMIN_PORT=9383
|
||||
API_PROXY_SCHEME=python # use pure python server deployment
|
||||
|
||||
# The RAGFlow Docker image to download. v0.22+ doesn't include embedding models.
|
||||
RAGFLOW_IMAGE=infiniflow/ragflow:v0.24.0
|
||||
RAGFLOW_IMAGE=infiniflow/ragflow:latest
|
||||
|
||||
# If you cannot download the RAGFlow Docker image:
|
||||
# RAGFLOW_IMAGE=swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow:v0.24.0
|
||||
|
||||
@ -762,7 +762,7 @@ class Parser(ProcessBase):
|
||||
|
||||
sections = [line.strip() for line in content.splitlines() if line and line.strip()]
|
||||
if conf.get("remove_toc"):
|
||||
sections = remove_toc_word(sections, outlines)
|
||||
sections = remove_toc_word(sections, [])
|
||||
|
||||
if conf.get("output_format") == "json":
|
||||
self.set_output(
|
||||
|
||||
@ -91,7 +91,7 @@ class KGSearch(Dealer):
|
||||
es_res = self.dataStore.get_fields(es_res, ["content_with_weight", "_score", "from_entity_kwd", "to_entity_kwd",
|
||||
"weight_int"])
|
||||
for _, ent in es_res.items():
|
||||
if get_float(ent["_score"]) < sim_thr:
|
||||
if get_float(ent.get("_score", 0)) < sim_thr:
|
||||
continue
|
||||
f, t = sorted([ent["from_entity_kwd"], ent["to_entity_kwd"]])
|
||||
if isinstance(f, list):
|
||||
@ -99,7 +99,7 @@ class KGSearch(Dealer):
|
||||
if isinstance(t, list):
|
||||
t = t[0]
|
||||
res[(f, t)] = {
|
||||
"sim": get_float(ent["_score"]),
|
||||
"sim": get_float(ent.get("_score", 0)),
|
||||
"pagerank": get_float(ent.get("weight_int", 0)),
|
||||
"description": ent["content_with_weight"]
|
||||
}
|
||||
|
||||
@ -278,7 +278,7 @@ class _BlobLikeBase(SyncBase):
|
||||
)
|
||||
)
|
||||
|
||||
begin_info = (
|
||||
_begin_info = (
|
||||
"totally"
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]
|
||||
else "from {}".format(task["poll_range_start"])
|
||||
@ -289,7 +289,7 @@ class _BlobLikeBase(SyncBase):
|
||||
bucket_type,
|
||||
self.conf["bucket_name"],
|
||||
self.conf.get("prefix", ""),
|
||||
begin_info,
|
||||
_begin_info,
|
||||
)
|
||||
)
|
||||
return document_batch_generator
|
||||
@ -377,10 +377,10 @@ class Confluence(SyncBase):
|
||||
# Determine the time range for synchronization based on reindex or poll_range_start
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]:
|
||||
start_time = 0.0
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
start_time = task["poll_range_start"].timestamp()
|
||||
begin_info = f"from {task['poll_range_start']}"
|
||||
_begin_info = f"from {task['poll_range_start']}"
|
||||
|
||||
end_time = datetime.now(timezone.utc).timestamp()
|
||||
|
||||
@ -442,7 +442,7 @@ class Notion(SyncBase):
|
||||
datetime.now(timezone.utc).timestamp())
|
||||
)
|
||||
|
||||
begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(
|
||||
_begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(
|
||||
task["poll_range_start"])
|
||||
self.log_connection("Notion", f"root({self.conf['root_page_id']})", task)
|
||||
return document_generator
|
||||
@ -470,7 +470,7 @@ class Discord(SyncBase):
|
||||
datetime.now(timezone.utc).timestamp())
|
||||
)
|
||||
|
||||
begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(
|
||||
_begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(
|
||||
task["poll_range_start"])
|
||||
self.log_connection("Discord", f"servers({server_ids}), channel({channel_names})", task)
|
||||
return document_generator
|
||||
@ -516,7 +516,7 @@ class Gmail(SyncBase):
|
||||
if task["reindex"] == "1" or not task.get("poll_range_start"):
|
||||
start_time = None
|
||||
end_time = None
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
document_generator = self.connector.load_from_state()
|
||||
else:
|
||||
poll_start = task["poll_range_start"]
|
||||
@ -524,12 +524,12 @@ class Gmail(SyncBase):
|
||||
if poll_start is None:
|
||||
start_time = None
|
||||
end_time = None
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
document_generator = self.connector.load_from_state()
|
||||
else:
|
||||
start_time = poll_start.timestamp()
|
||||
end_time = datetime.now(timezone.utc).timestamp()
|
||||
begin_info = f"from {poll_start}"
|
||||
_begin_info = f"from {poll_start}"
|
||||
document_generator = self.connector.poll_source(start_time, end_time)
|
||||
|
||||
try:
|
||||
@ -549,13 +549,13 @@ class Dropbox(SyncBase):
|
||||
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
poll_start = task["poll_range_start"]
|
||||
document_generator = self.connector.poll_source(
|
||||
poll_start.timestamp(), datetime.now(timezone.utc).timestamp()
|
||||
)
|
||||
begin_info = f"from {poll_start}"
|
||||
_begin_info = f"from {poll_start}"
|
||||
|
||||
self.log_connection("Dropbox", "workspace", task)
|
||||
return document_generator
|
||||
@ -588,10 +588,10 @@ class GoogleDrive(SyncBase):
|
||||
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]:
|
||||
start_time = 0.0
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
start_time = task["poll_range_start"].timestamp()
|
||||
begin_info = f"from {task['poll_range_start']}"
|
||||
_begin_info = f"from {task['poll_range_start']}"
|
||||
|
||||
end_time = datetime.now(timezone.utc).timestamp()
|
||||
raw_batch_size = self.conf.get("sync_batch_size") or self.conf.get("batch_size") or INDEX_BATCH_SIZE
|
||||
@ -682,10 +682,10 @@ class Jira(SyncBase):
|
||||
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]:
|
||||
start_time = 0.0
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
start_time = task["poll_range_start"].timestamp()
|
||||
begin_info = f"from {task['poll_range_start']}"
|
||||
_begin_info = f"from {task['poll_range_start']}"
|
||||
|
||||
end_time = datetime.now(timezone.utc).timestamp()
|
||||
|
||||
@ -788,12 +788,12 @@ class WebDAV(SyncBase):
|
||||
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]:
|
||||
document_batch_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
start_ts = task["poll_range_start"].timestamp()
|
||||
end_ts = datetime.now(timezone.utc).timestamp()
|
||||
document_batch_generator = self.connector.poll_source(start_ts, end_ts)
|
||||
begin_info = "from {}".format(task["poll_range_start"])
|
||||
_begin_info = "from {}".format(task["poll_range_start"])
|
||||
|
||||
self.log_connection("WebDAV", f"{self.conf['base_url']}(path: {self.conf.get('remote_path', '/')})", task)
|
||||
|
||||
@ -820,13 +820,13 @@ class Moodle(SyncBase):
|
||||
|
||||
if task["reindex"] == "1" or poll_start is None:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
document_generator = self.connector.poll_source(
|
||||
poll_start.timestamp(),
|
||||
datetime.now(timezone.utc).timestamp(),
|
||||
)
|
||||
begin_info = f"from {poll_start}"
|
||||
_begin_info = f"from {poll_start}"
|
||||
|
||||
self.log_connection("Moodle", self.conf["moodle_url"], task)
|
||||
return document_generator
|
||||
@ -860,13 +860,13 @@ class BOX(SyncBase):
|
||||
|
||||
if task["reindex"] == "1" or poll_start is None:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
document_generator = self.connector.poll_source(
|
||||
poll_start.timestamp(),
|
||||
datetime.now(timezone.utc).timestamp(),
|
||||
)
|
||||
begin_info = f"from {poll_start}"
|
||||
_begin_info = f"from {poll_start}"
|
||||
self.log_connection("Box", f"folder_id({self.conf['folder_id']})", task)
|
||||
return document_generator
|
||||
|
||||
@ -896,13 +896,13 @@ class Airtable(SyncBase):
|
||||
|
||||
if task.get("reindex") == "1" or poll_start is None:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
document_generator = self.connector.poll_source(
|
||||
poll_start.timestamp(),
|
||||
datetime.now(timezone.utc).timestamp(),
|
||||
)
|
||||
begin_info = f"from {poll_start}"
|
||||
_begin_info = f"from {poll_start}"
|
||||
|
||||
self.log_connection(
|
||||
"Airtable",
|
||||
@ -931,18 +931,18 @@ class Asana(SyncBase):
|
||||
|
||||
if task.get("reindex") == "1" or not task.get("poll_range_start"):
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
poll_start = task.get("poll_range_start")
|
||||
if poll_start is None:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
document_generator = self.connector.poll_source(
|
||||
poll_start.timestamp(),
|
||||
datetime.now(timezone.utc).timestamp(),
|
||||
)
|
||||
begin_info = f"from {poll_start}"
|
||||
_begin_info = f"from {poll_start}"
|
||||
|
||||
self.log_connection(
|
||||
"Asana",
|
||||
@ -979,10 +979,10 @@ class Github(SyncBase):
|
||||
file_list = None
|
||||
if task.get("reindex") == "1" or not task.get("poll_range_start"):
|
||||
start_time = datetime.fromtimestamp(0, tz=timezone.utc)
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
start_time = task.get("poll_range_start")
|
||||
begin_info = f"from {start_time}"
|
||||
_begin_info = f"from {start_time}"
|
||||
if self.conf.get("sync_deleted_files"):
|
||||
file_list = []
|
||||
for slim_batch in self.connector.retrieve_all_slim_docs_perm_sync():
|
||||
@ -1041,10 +1041,10 @@ class IMAP(SyncBase):
|
||||
end_time = datetime.now(timezone.utc).timestamp()
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]:
|
||||
start_time = end_time - self.conf.get("poll_range",30) * 24 * 60 * 60
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
start_time = task["poll_range_start"].timestamp()
|
||||
begin_info = f"from {task['poll_range_start']}"
|
||||
_begin_info = f"from {task['poll_range_start']}"
|
||||
raw_batch_size = self.conf.get("sync_batch_size") or self.conf.get("batch_size") or INDEX_BATCH_SIZE
|
||||
try:
|
||||
batch_size = int(raw_batch_size)
|
||||
@ -1101,10 +1101,10 @@ class Zendesk(SyncBase):
|
||||
end_time = datetime.now(timezone.utc).timestamp()
|
||||
if task["reindex"] == "1" or not task.get("poll_range_start"):
|
||||
start_time = 0
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
start_time = task["poll_range_start"].timestamp()
|
||||
begin_info = f"from {task['poll_range_start']}"
|
||||
_begin_info = f"from {task['poll_range_start']}"
|
||||
|
||||
raw_batch_size = (
|
||||
self.conf.get("sync_batch_size")
|
||||
@ -1193,18 +1193,18 @@ class Gitlab(SyncBase):
|
||||
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
poll_start = task["poll_range_start"]
|
||||
if poll_start is None:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
document_generator = self.connector.poll_source(
|
||||
poll_start.timestamp(),
|
||||
datetime.now(timezone.utc).timestamp()
|
||||
)
|
||||
begin_info = "from {}".format(poll_start)
|
||||
_begin_info = "from {}".format(poll_start)
|
||||
self.log_connection("Gitlab", f"({self.conf['project_name']})", task)
|
||||
return document_generator
|
||||
|
||||
@ -1228,10 +1228,10 @@ class Bitbucket(SyncBase):
|
||||
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]:
|
||||
start_time = datetime.fromtimestamp(0, tz=timezone.utc)
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
start_time = task.get("poll_range_start")
|
||||
begin_info = f"from {start_time}"
|
||||
_begin_info = f"from {start_time}"
|
||||
|
||||
end_time = datetime.now(timezone.utc)
|
||||
|
||||
@ -1284,13 +1284,13 @@ class SeaFile(SyncBase):
|
||||
poll_start = task.get("poll_range_start")
|
||||
if task["reindex"] == "1" or poll_start is None:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
document_generator = self.connector.poll_source(
|
||||
poll_start.timestamp(),
|
||||
datetime.now(timezone.utc).timestamp(),
|
||||
)
|
||||
begin_info = f"from {poll_start}"
|
||||
_begin_info = f"from {poll_start}"
|
||||
|
||||
scope = conf.get("sync_scope", "account")
|
||||
extra = ""
|
||||
@ -1328,13 +1328,13 @@ class DingTalkAITable(SyncBase):
|
||||
|
||||
if task.get("reindex") == "1" or poll_start is None:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
document_generator = self.connector.poll_source(
|
||||
poll_start.timestamp(),
|
||||
datetime.now(timezone.utc).timestamp(),
|
||||
)
|
||||
begin_info = f"from {poll_start}"
|
||||
_begin_info = f"from {poll_start}"
|
||||
|
||||
self.log_connection(
|
||||
"DingTalk AI Table",
|
||||
@ -1371,14 +1371,14 @@ class MySQL(SyncBase):
|
||||
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
poll_start = task["poll_range_start"]
|
||||
document_generator = self.connector.poll_source(
|
||||
poll_start.timestamp(),
|
||||
datetime.now(timezone.utc).timestamp()
|
||||
)
|
||||
begin_info = f"from {poll_start}"
|
||||
_begin_info = f"from {poll_start}"
|
||||
|
||||
self.log_connection("MySQL", f"{self.conf.get('host')}:{self.conf.get('database')}", task)
|
||||
return document_generator
|
||||
@ -1410,14 +1410,14 @@ class PostgreSQL(SyncBase):
|
||||
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
poll_start = task["poll_range_start"]
|
||||
document_generator = self.connector.poll_source(
|
||||
poll_start.timestamp(),
|
||||
datetime.now(timezone.utc).timestamp()
|
||||
)
|
||||
begin_info = f"from {poll_start}"
|
||||
_begin_info = f"from {poll_start}"
|
||||
|
||||
self.log_connection("PostgreSQL", f"{self.conf.get('host')}:{self.conf.get('database')}", task)
|
||||
return document_generator
|
||||
|
||||
@ -253,7 +253,18 @@ class ESConnection(ESConnectionBase):
|
||||
|
||||
if limit > 0 and not use_search_after:
|
||||
s = s[offset:offset + limit]
|
||||
# Filter _source to only requested fields for efficiency, and add vector
|
||||
# fields to "fields" param so they appear in hit.fields when ES 9.x
|
||||
# exclude_source_vectors is enabled (dense_vector not in _source).
|
||||
if select_fields:
|
||||
s = s.source(select_fields)
|
||||
q = s.to_dict()
|
||||
# ES 9.x: dense_vector fields excluded from _source; request them via fields.
|
||||
# Note: knn does NOT have a "fields" parameter - adding it inside the knn
|
||||
# object causes BadRequestError on ES 9.x. We add "fields" at top level.
|
||||
vector_fields = [f for f in (select_fields or []) if f.endswith("_vec")]
|
||||
if vector_fields:
|
||||
q["fields"] = vector_fields
|
||||
self.logger.debug(f"ESConnection.search {str(index_names)} query: " + json.dumps(q))
|
||||
|
||||
for i in range(ATTEMPT_TIME):
|
||||
@ -565,8 +576,24 @@ class ESConnection(ESConnectionBase):
|
||||
res_fields = {}
|
||||
if not fields:
|
||||
return {}
|
||||
for d in self._get_source(res):
|
||||
m = {n: d.get(n) for n in fields if d.get(n) is not None}
|
||||
hits = res.get("hits", {}).get("hits", [])
|
||||
for hit in hits:
|
||||
doc_id = hit.get("_id")
|
||||
d = hit.get("_source", {})
|
||||
# Also extract fields from ES "fields" response (used by dense_vector in ES 9.x)
|
||||
hit_fields = hit.get("fields", {})
|
||||
m = {}
|
||||
for n in fields:
|
||||
# First check _source
|
||||
if d.get(n) is not None:
|
||||
m[n] = d.get(n)
|
||||
# Then check fields (ES 9.x stores dense_vector here, not in _source)
|
||||
elif n in hit_fields:
|
||||
vals = hit_fields[n]
|
||||
# ES fields response wraps dense_vector in 2 levels: [[v1,v2,...]] -> [v1,v2,...]
|
||||
if isinstance(vals, list) and len(vals) == 1:
|
||||
vals = vals[0]
|
||||
m[n] = vals
|
||||
for n, v in m.items():
|
||||
if isinstance(v, list):
|
||||
m[n] = v
|
||||
@ -580,5 +607,5 @@ class ESConnection(ESConnectionBase):
|
||||
# m[n] = remove_redundant_spaces(m[n])
|
||||
|
||||
if m:
|
||||
res_fields[d["id"]] = m
|
||||
res_fields[doc_id] = m
|
||||
return res_fields
|
||||
|
||||
@ -18,7 +18,7 @@ from time import sleep
|
||||
from ragflow_sdk import RAGFlow
|
||||
from configs import HOST_ADDRESS, VERSION
|
||||
import pytest
|
||||
from common import (
|
||||
from test_common import (
|
||||
batch_add_chunks,
|
||||
batch_create_datasets,
|
||||
bulk_upload_documents,
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import pytest
|
||||
from common import api_new_token, api_rm_token, api_stats, api_token_list, batch_create_chats
|
||||
from test_common import api_new_token, api_rm_token, api_stats, api_token_list, batch_create_chats
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -18,7 +18,7 @@
|
||||
from time import sleep
|
||||
|
||||
import pytest
|
||||
from common import batch_add_chunks, delete_chunks, list_chunks, list_documents, parse_documents
|
||||
from test_common import batch_add_chunks, delete_chunks, list_chunks, list_documents, parse_documents
|
||||
from utils import wait_for
|
||||
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from common import add_chunk, delete_document, get_chunk, list_chunks
|
||||
from test_common import add_chunk, delete_document, get_chunk, list_chunks
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ import os
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from common import batch_add_chunks, list_chunks, update_chunk
|
||||
from test_common import batch_add_chunks, list_chunks, update_chunk
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ import os
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from common import retrieval_chunks
|
||||
from test_common import retrieval_chunks
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from common import batch_add_chunks, delete_chunks, list_chunks
|
||||
from test_common import batch_add_chunks, delete_chunks, list_chunks
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -20,7 +20,7 @@ from random import randint
|
||||
from time import sleep
|
||||
|
||||
import pytest
|
||||
from common import delete_document, list_chunks, update_chunk
|
||||
from test_common import delete_document, list_chunks, update_chunk
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -13,6 +13,4 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from rag.flow.chunker.token_chunker import TokenChunker, TokenChunkerParam
|
||||
|
||||
__all__ = ["TokenChunker", "TokenChunkerParam"]
|
||||
__all__ = []
|
||||
|
||||
@ -21,7 +21,7 @@ from pathlib import Path
|
||||
from types import ModuleType, SimpleNamespace
|
||||
|
||||
import pytest
|
||||
from common import bulk_upload_documents, delete_document, list_documents
|
||||
from test_common import bulk_upload_documents, delete_document, list_documents
|
||||
|
||||
|
||||
class _DummyManager:
|
||||
|
||||
@ -19,7 +19,7 @@ from types import SimpleNamespace
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from test_web_api.common import create_document, list_datasets
|
||||
from test_common import create_document, list_datasets
|
||||
from configs import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
from utils.file_utils import create_txt_file
|
||||
|
||||
@ -17,7 +17,7 @@ import asyncio
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
from common import (
|
||||
from test_common import (
|
||||
document_change_status,
|
||||
document_filter,
|
||||
document_infos,
|
||||
|
||||
@ -18,7 +18,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
from common import list_documents
|
||||
from test_common import list_documents
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
from utils import is_sorted
|
||||
|
||||
@ -18,7 +18,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
from common import bulk_upload_documents, list_documents, parse_documents
|
||||
from test_common import bulk_upload_documents, list_documents, parse_documents
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
from utils import wait_for
|
||||
|
||||
@ -17,7 +17,7 @@ import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from common import bulk_upload_documents, delete_document, list_documents
|
||||
from test_common import bulk_upload_documents, delete_document, list_documents
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -20,7 +20,7 @@ from types import ModuleType, SimpleNamespace
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from common import list_datasets, upload_documents
|
||||
from test_common import list_datasets, upload_documents
|
||||
from configs import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
from utils.file_utils import create_txt_file
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import pytest
|
||||
from common import batch_create_datasets, list_datasets, delete_datasets
|
||||
from test_common import batch_create_datasets, list_datasets, delete_datasets
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
from pytest import FixtureRequest
|
||||
from ragflow_sdk import RAGFlow
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from common import create_dataset
|
||||
from test_common import create_dataset
|
||||
from configs import DATASET_NAME_LIMIT, INVALID_API_TOKEN
|
||||
from hypothesis import example, given, settings
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import pytest
|
||||
from common import (
|
||||
from test_common import (
|
||||
detail_kb,
|
||||
)
|
||||
from configs import INVALID_API_TOKEN
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import pytest
|
||||
from test_web_api.common import (
|
||||
from test_common import (
|
||||
kb_delete_pipeline_logs,
|
||||
kb_list_pipeline_dataset_logs,
|
||||
kb_list_pipeline_logs,
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
from test_web_api.common import (
|
||||
from test_common import (
|
||||
delete_knowledge_graph,
|
||||
kb_basic_info,
|
||||
kb_get_meta,
|
||||
|
||||
@ -17,7 +17,7 @@ import json
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from common import list_datasets
|
||||
from test_common import list_datasets
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
from utils import is_sorted
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
#
|
||||
|
||||
import pytest
|
||||
from common import (
|
||||
from test_common import (
|
||||
list_datasets,
|
||||
delete_datasets,
|
||||
)
|
||||
|
||||
@ -17,7 +17,7 @@ import os
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from test_web_api.common import update_dataset
|
||||
from test_common import update_dataset
|
||||
from configs import DATASET_NAME_LIMIT, INVALID_API_TOKEN
|
||||
from hypothesis import HealthCheck, example, given, settings
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import pytest
|
||||
from common import llm_factories, llm_list
|
||||
from test_common import llm_factories, llm_list
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
#
|
||||
import pytest
|
||||
import random
|
||||
from test_web_api.common import create_memory, list_memory, delete_memory
|
||||
from test_common import create_memory, list_memory, delete_memory
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def add_memory_func(request, WebApiAuth):
|
||||
|
||||
@ -17,7 +17,7 @@ import random
|
||||
import re
|
||||
|
||||
import pytest
|
||||
from test_web_api.common import create_memory
|
||||
from test_common import create_memory
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import pytest
|
||||
from test_web_api.common import list_memory, get_memory_config
|
||||
from test_common import list_memory, get_memory_config
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import pytest
|
||||
from test_web_api.common import (list_memory, delete_memory)
|
||||
from test_common import (list_memory, delete_memory)
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
import re
|
||||
|
||||
import pytest
|
||||
from test_web_api.common import update_memory
|
||||
from test_common import update_memory
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
from utils import encode_avatar
|
||||
|
||||
@ -18,7 +18,7 @@ import uuid
|
||||
|
||||
import pytest
|
||||
import random
|
||||
from test_web_api.common import create_memory, list_memory, add_message, delete_memory
|
||||
from test_common import create_memory, list_memory, add_message, delete_memory
|
||||
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
|
||||
@ -17,7 +17,7 @@ import time
|
||||
import uuid
|
||||
import pytest
|
||||
|
||||
from test_web_api.common import list_memory_message, add_message
|
||||
from test_common import list_memory_message, add_message
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
import random
|
||||
import pytest
|
||||
import requests
|
||||
from test_web_api.common import forget_message, list_memory_message, get_message_content
|
||||
from test_common import forget_message, list_memory_message, get_message_content
|
||||
from configs import HOST_ADDRESS, INVALID_API_TOKEN, VERSION
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
import random
|
||||
|
||||
import pytest
|
||||
from test_web_api.common import get_message_content, get_recent_message
|
||||
from test_common import get_message_content, get_recent_message
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
import random
|
||||
|
||||
import pytest
|
||||
from test_web_api.common import get_recent_message
|
||||
from test_common import get_recent_message
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ import os
|
||||
import random
|
||||
|
||||
import pytest
|
||||
from test_web_api.common import list_memory_message
|
||||
from test_common import list_memory_message
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import pytest
|
||||
from test_web_api.common import search_message, list_memory_message
|
||||
from test_common import search_message, list_memory_message
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ import random
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from test_web_api.common import update_message_status, list_memory_message, get_message_content
|
||||
from test_common import update_message_status, list_memory_message, get_message_content
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
from configs import HOST_ADDRESS, VERSION
|
||||
|
||||
@ -19,7 +19,7 @@ from pathlib import Path
|
||||
from types import ModuleType
|
||||
|
||||
import pytest
|
||||
from common import plugin_llm_tools
|
||||
from test_common import plugin_llm_tools
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
from common import search_create, search_detail, search_list, search_rm, search_update
|
||||
from test_common import search_create, search_detail, search_list, search_rm, search_update
|
||||
from configs import INVALID_API_TOKEN
|
||||
from libs.auth import RAGFlowWebApiAuth
|
||||
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
import pytest
|
||||
from common import (
|
||||
from test_common import (
|
||||
system_config,
|
||||
system_delete_token,
|
||||
system_new_token,
|
||||
|
||||
@ -26,6 +26,17 @@ const inspectorBabelPlugin = (): import('vite').Plugin => ({
|
||||
},
|
||||
});
|
||||
|
||||
type MinifyValue = boolean | 'esbuild' | 'terser';
|
||||
|
||||
function resolveMinify(value: string | undefined): MinifyValue {
|
||||
if (value === undefined) return 'terser';
|
||||
const lower = value.toLowerCase();
|
||||
if (lower === 'false') return false;
|
||||
if (lower === 'esbuild') return 'esbuild';
|
||||
if (lower === 'terser') return 'terser';
|
||||
return 'terser';
|
||||
}
|
||||
|
||||
// https://vitejs.dev/config/
|
||||
export default defineConfig(({ mode }) => {
|
||||
const env = loadEnv(mode, process.cwd(), '');
|
||||
@ -229,7 +240,7 @@ export default defineConfig(({ mode }) => {
|
||||
plugins: [],
|
||||
treeshake: true,
|
||||
},
|
||||
minify: 'terser',
|
||||
minify: resolveMinify(env.VITE_MINIFY),
|
||||
terserOptions: {
|
||||
compress: {
|
||||
drop_console: true, // delete console
|
||||
@ -246,7 +257,7 @@ export default defineConfig(({ mode }) => {
|
||||
comments: false, // Delete comments
|
||||
},
|
||||
},
|
||||
sourcemap: true,
|
||||
sourcemap: env.VITE_BUILD_SOURCEMAP !== 'false',
|
||||
cssCodeSplit: true,
|
||||
target: 'es2015',
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user