fix: support dense_vector from ES fields response (ES 9.x compatibility) (#13972)

fix: support dense_vector from ES fields response (ES 9.x compatibility) - [x] Bug Fix (non-breaking change which fixes an issue) - [x] Configuration Chore (non-breaking change which updates configuration) ## Summary by CodeRabbit * **Bug Fixes** * More accurate handling and unwrapping of dense-vector fields so returned values have correct shapes. * Field selection reliably limits returned data and falls back to alternate result locations when needed. * Use of consistent result IDs and tolerant handling when score values are missing. * **Chores / Configuration** * Increased build memory and adjusted build-time flags for the frontend build. * Simplified runtime model/GPU checks and removed an automated runtime GPU-install attempt. * **Build Fixes** * `web/vite.config.ts`: make `build.minify` and `build.sourcemap` respect `VITE_MINIFY` and `VITE_BUILD_SOURCEMAP` env vars from Dockerfile instead of hardcoding `terser` and `true`. * **Environment** * Allow stack version override and default the runtime image tag to "latest".  ## Summary by CodeRabbit * **Bug Fixes** * Correct unwrapping of dense-vector fields and reliable field selection with fallback locations. * Consistent use of hit-level IDs and tolerant handling when score values are missing. * **Chores / Configuration** * Increased frontend build memory and added build-time minify/sourcemap flags; build minification and sourcemap now configurable. * Removed runtime GPU detection for model initialization; force CPU initialization. * **Environment** * Allow stack version override and default runtime image tag to "latest".  --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-19 18:27:33 +08:00 · 2026-04-09 17:44:13 +08:00
parent 107fe6cf90
commit b7744e053e
50 changed files with 142 additions and 124 deletions
--- a/22
+++ b/22
@ -35,26 +35,14 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
    apt update && \
    apt --no-install-recommends install -y ca-certificates; \
    if [ "$NEED_MIRROR" == "1" ]; then \
-        sed -i 's|http://archive.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
-        sed -i 's|http://security.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
+        sed -i 's|http://archive.ubuntu.com/ubuntu|https://mirrors.aliyun.com/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
+        sed -i 's|http://security.ubuntu.com/ubuntu|https://mirrors.aliyun.com/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
    fi; \
    rm -f /etc/apt/apt.conf.d/docker-clean && \
    echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
    chmod 1777 /tmp && \
    apt update && \
-    apt install -y build-essential && \
-    apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
-    apt install -y pkg-config libicu-dev libgdiplus && \
-    apt install -y default-jdk && \
-    apt install -y libatk-bridge2.0-0 && \
-    apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
-    apt install -y libjemalloc-dev && \
-    apt install -y gnupg unzip curl wget git vim less && \
-    apt install -y ghostscript && \
-    apt install -y pandoc && \
-    apt install -y texlive && \
-    apt install -y fonts-freefont-ttf fonts-noto-cjk && \
-    apt install -y postgresql-client
+    apt install -y build-essential libglib2.0-0 libglx-mesa0 libgl1 pkg-config libicu-dev libgdiplus default-jdk libatk-bridge2.0-0 libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev libjemalloc-dev gnupg unzip curl wget git vim less ghostscript pandoc texlive fonts-freefont-ttf fonts-noto-cjk postgresql-client

 # Download resource from GitHub to /usr/share/infinity
 RUN mkdir -p /usr/share/infinity/resource && \
@ -165,8 +153,8 @@ RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
 COPY web web
 COPY docs docs
 RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
-    export NODE_OPTIONS="--max-old-space-size=4096" && \
-    cd web && npm install && npm run build
+    cd web && NODE_OPTIONS="--max-old-space-size=8192" npm install && \
+    NODE_OPTIONS="--max-old-space-size=8192" VITE_BUILD_SOURCEMAP=false VITE_MINIFY=esbuild npm run build

 COPY .git /ragflow/.git

--- a/deepdoc/parser/pdf_parser.py
+++ b/deepdoc/parser/pdf_parser.py
@ -38,7 +38,6 @@ from sklearn.cluster import KMeans
 from sklearn.metrics import silhouette_score

 from common.file_utils import get_project_base_directory
-from common.misc_utils import pip_install_torch
 from deepdoc.vision import OCR, AscendLayoutRecognizer, LayoutRecognizer, Recognizer, TableStructureRecognizer
 from rag.nlp import rag_tokenizer
 from rag.prompts.generator import vision_llm_describe_prompt
@ -91,14 +90,9 @@ class RAGFlowPdfParser:
        self.tbl_det = TableStructureRecognizer()

        self.updown_cnt_mdl = xgb.Booster()
-        try:
-            pip_install_torch()
-            import torch.cuda
-
-            if torch.cuda.is_available():
-                self.updown_cnt_mdl.set_param({"device": "cuda"})
-        except Exception:
-            logging.info("No torch found.")
+        # xgboost model is very small; using CPU explicitly
+        self.updown_cnt_mdl.set_param({"device": "cpu"})
+        logging.info("updown_cnt_mdl initialized on CPU")
        try:
            model_dir = os.path.join(get_project_base_directory(), "rag/res/deepdoc")
            self.updown_cnt_mdl.load_model(os.path.join(model_dir, "updown_concat_xgb.model"))
--- a/docker/.env
+++ b/docker/.env
@ -28,7 +28,7 @@ DEVICE=${DEVICE:-cpu}
 COMPOSE_PROFILES=${DOC_ENGINE},${DEVICE}

 # The version of Elasticsearch.
-STACK_VERSION=8.11.3
+STACK_VERSION=${STACK_VERSION:-8.11.3}

 # The hostname where the Elasticsearch service is exposed
 ES_HOST=es01
@ -159,7 +159,7 @@ GO_ADMIN_PORT=9383
 API_PROXY_SCHEME=python # use pure python server deployment

 # The RAGFlow Docker image to download. v0.22+ doesn't include embedding models.
-RAGFLOW_IMAGE=infiniflow/ragflow:v0.24.0
+RAGFLOW_IMAGE=infiniflow/ragflow:latest

 # If you cannot download the RAGFlow Docker image:
 # RAGFLOW_IMAGE=swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow:v0.24.0
--- a/rag/flow/parser/parser.py
+++ b/rag/flow/parser/parser.py
@ -762,7 +762,7 @@ class Parser(ProcessBase):

            sections = [line.strip() for line in content.splitlines() if line and line.strip()]
            if conf.get("remove_toc"):
-                sections = remove_toc_word(sections, outlines)
+                sections = remove_toc_word(sections, [])

            if conf.get("output_format") == "json":
                self.set_output(
--- a/rag/graphrag/search.py
+++ b/rag/graphrag/search.py
@ -91,7 +91,7 @@ class KGSearch(Dealer):
        es_res = self.dataStore.get_fields(es_res, ["content_with_weight", "_score", "from_entity_kwd", "to_entity_kwd",
                                                   "weight_int"])
        for _, ent in es_res.items():
-            if get_float(ent["_score"]) < sim_thr:
+            if get_float(ent.get("_score", 0)) < sim_thr:
                continue
            f, t = sorted([ent["from_entity_kwd"], ent["to_entity_kwd"]])
            if isinstance(f, list):
@ -99,7 +99,7 @@ class KGSearch(Dealer):
            if isinstance(t, list):
                t = t[0]
            res[(f, t)] = {
-                "sim": get_float(ent["_score"]),
+                "sim": get_float(ent.get("_score", 0)),
                "pagerank": get_float(ent.get("weight_int", 0)),
                "description": ent["content_with_weight"]
            }
--- a/rag/svr/sync_data_source.py
+++ b/rag/svr/sync_data_source.py
@ -278,7 +278,7 @@ class _BlobLikeBase(SyncBase):
            )
        )

-        begin_info = (
+        _begin_info = (
            "totally"
            if task["reindex"] == "1" or not task["poll_range_start"]
            else "from {}".format(task["poll_range_start"])
@ -289,7 +289,7 @@ class _BlobLikeBase(SyncBase):
                bucket_type,
                self.conf["bucket_name"],
                self.conf.get("prefix", ""),
-                begin_info,
+                _begin_info,
            )
        )
        return document_batch_generator
@ -377,10 +377,10 @@ class Confluence(SyncBase):
        # Determine the time range for synchronization based on reindex or poll_range_start
        if task["reindex"] == "1" or not task["poll_range_start"]:
            start_time = 0.0
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            start_time = task["poll_range_start"].timestamp()
-            begin_info = f"from {task['poll_range_start']}"
+            _begin_info = f"from {task['poll_range_start']}"

        end_time = datetime.now(timezone.utc).timestamp()

@ -442,7 +442,7 @@ class Notion(SyncBase):
                                            datetime.now(timezone.utc).timestamp())
        )

-        begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(
+        _begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(
            task["poll_range_start"])
        self.log_connection("Notion", f"root({self.conf['root_page_id']})", task)
        return document_generator
@ -470,7 +470,7 @@ class Discord(SyncBase):
                                            datetime.now(timezone.utc).timestamp())
        )

-        begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(
+        _begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(
            task["poll_range_start"])
        self.log_connection("Discord", f"servers({server_ids}), channel({channel_names})", task)
        return document_generator
@ -516,7 +516,7 @@ class Gmail(SyncBase):
        if task["reindex"] == "1" or not task.get("poll_range_start"):
            start_time = None
            end_time = None
-            begin_info = "totally"
+            _begin_info = "totally"
            document_generator = self.connector.load_from_state()
        else:
            poll_start = task["poll_range_start"]
@ -524,12 +524,12 @@ class Gmail(SyncBase):
            if poll_start is None:
                start_time = None
                end_time = None
-                begin_info = "totally"
+                _begin_info = "totally"
                document_generator = self.connector.load_from_state()
            else:
                start_time = poll_start.timestamp()
                end_time = datetime.now(timezone.utc).timestamp()
-                begin_info = f"from {poll_start}"
+                _begin_info = f"from {poll_start}"
                document_generator = self.connector.poll_source(start_time, end_time)

        try:
@ -549,13 +549,13 @@ class Dropbox(SyncBase):

        if task["reindex"] == "1" or not task["poll_range_start"]:
            document_generator = self.connector.load_from_state()
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            poll_start = task["poll_range_start"]
            document_generator = self.connector.poll_source(
                poll_start.timestamp(), datetime.now(timezone.utc).timestamp()
            )
-            begin_info = f"from {poll_start}"
+            _begin_info = f"from {poll_start}"

        self.log_connection("Dropbox", "workspace", task)
        return document_generator
@ -588,10 +588,10 @@ class GoogleDrive(SyncBase):

        if task["reindex"] == "1" or not task["poll_range_start"]:
            start_time = 0.0
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            start_time = task["poll_range_start"].timestamp()
-            begin_info = f"from {task['poll_range_start']}"
+            _begin_info = f"from {task['poll_range_start']}"

        end_time = datetime.now(timezone.utc).timestamp()
        raw_batch_size = self.conf.get("sync_batch_size") or self.conf.get("batch_size") or INDEX_BATCH_SIZE
@ -682,10 +682,10 @@ class Jira(SyncBase):

        if task["reindex"] == "1" or not task["poll_range_start"]:
            start_time = 0.0
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            start_time = task["poll_range_start"].timestamp()
-            begin_info = f"from {task['poll_range_start']}"
+            _begin_info = f"from {task['poll_range_start']}"

        end_time = datetime.now(timezone.utc).timestamp()

@ -788,12 +788,12 @@ class WebDAV(SyncBase):

        if task["reindex"] == "1" or not task["poll_range_start"]:
            document_batch_generator = self.connector.load_from_state()
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            start_ts = task["poll_range_start"].timestamp()
            end_ts = datetime.now(timezone.utc).timestamp()
            document_batch_generator = self.connector.poll_source(start_ts, end_ts)
-            begin_info = "from {}".format(task["poll_range_start"])
+            _begin_info = "from {}".format(task["poll_range_start"])

        self.log_connection("WebDAV", f"{self.conf['base_url']}(path: {self.conf.get('remote_path', '/')})", task)

@ -820,13 +820,13 @@ class Moodle(SyncBase):

        if task["reindex"] == "1" or poll_start is None:
            document_generator = self.connector.load_from_state()
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            document_generator = self.connector.poll_source(
                poll_start.timestamp(),
                datetime.now(timezone.utc).timestamp(),
            )
-            begin_info = f"from {poll_start}"
+            _begin_info = f"from {poll_start}"

        self.log_connection("Moodle", self.conf["moodle_url"], task)
        return document_generator
@ -860,13 +860,13 @@ class BOX(SyncBase):

        if task["reindex"] == "1" or poll_start is None:
            document_generator = self.connector.load_from_state()
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            document_generator = self.connector.poll_source(
                poll_start.timestamp(),
                datetime.now(timezone.utc).timestamp(),
            )
-            begin_info = f"from {poll_start}"
+            _begin_info = f"from {poll_start}"
        self.log_connection("Box", f"folder_id({self.conf['folder_id']})", task)
        return document_generator

@ -896,13 +896,13 @@ class Airtable(SyncBase):

        if task.get("reindex") == "1" or poll_start is None:
            document_generator = self.connector.load_from_state()
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            document_generator = self.connector.poll_source(
                poll_start.timestamp(),
                datetime.now(timezone.utc).timestamp(),
            )
-            begin_info = f"from {poll_start}"
+            _begin_info = f"from {poll_start}"

        self.log_connection(
            "Airtable",
@ -931,18 +931,18 @@ class Asana(SyncBase):

        if task.get("reindex") == "1" or not task.get("poll_range_start"):
            document_generator = self.connector.load_from_state()
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            poll_start = task.get("poll_range_start")
            if poll_start is None:
                document_generator = self.connector.load_from_state()
-                begin_info = "totally"
+                _begin_info = "totally"
            else:
                document_generator = self.connector.poll_source(
                    poll_start.timestamp(),
                    datetime.now(timezone.utc).timestamp(),
                )
-                begin_info = f"from {poll_start}"
+                _begin_info = f"from {poll_start}"

        self.log_connection(
            "Asana",
@ -979,10 +979,10 @@ class Github(SyncBase):
        file_list = None
        if task.get("reindex") == "1" or not task.get("poll_range_start"):
            start_time = datetime.fromtimestamp(0, tz=timezone.utc)
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            start_time = task.get("poll_range_start")
-            begin_info = f"from {start_time}"
+            _begin_info = f"from {start_time}"
            if self.conf.get("sync_deleted_files"):
                file_list = []
                for slim_batch in self.connector.retrieve_all_slim_docs_perm_sync():
@ -1041,10 +1041,10 @@ class IMAP(SyncBase):
        end_time = datetime.now(timezone.utc).timestamp()
        if task["reindex"] == "1" or not task["poll_range_start"]:
            start_time = end_time - self.conf.get("poll_range",30) * 24 * 60 * 60
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            start_time = task["poll_range_start"].timestamp()
-            begin_info = f"from {task['poll_range_start']}"
+            _begin_info = f"from {task['poll_range_start']}"
        raw_batch_size = self.conf.get("sync_batch_size") or self.conf.get("batch_size") or INDEX_BATCH_SIZE
        try:
            batch_size = int(raw_batch_size)
@ -1101,10 +1101,10 @@ class Zendesk(SyncBase):
        end_time = datetime.now(timezone.utc).timestamp()
        if task["reindex"] == "1" or not task.get("poll_range_start"):
            start_time = 0
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            start_time = task["poll_range_start"].timestamp()
-            begin_info = f"from {task['poll_range_start']}"
+            _begin_info = f"from {task['poll_range_start']}"

        raw_batch_size = (
            self.conf.get("sync_batch_size")
@ -1193,18 +1193,18 @@ class Gitlab(SyncBase):

        if task["reindex"] == "1" or not task["poll_range_start"]:
            document_generator = self.connector.load_from_state()
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            poll_start = task["poll_range_start"]
            if poll_start is None:
                document_generator = self.connector.load_from_state()
-                begin_info = "totally"
+                _begin_info = "totally"
            else:
                document_generator = self.connector.poll_source(
                    poll_start.timestamp(),
                    datetime.now(timezone.utc).timestamp()
                )
-                begin_info = "from {}".format(poll_start)
+                _begin_info = "from {}".format(poll_start)
        self.log_connection("Gitlab", f"({self.conf['project_name']})", task)
        return document_generator

@ -1228,10 +1228,10 @@ class Bitbucket(SyncBase):

        if task["reindex"] == "1" or not task["poll_range_start"]:
            start_time = datetime.fromtimestamp(0, tz=timezone.utc)
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            start_time = task.get("poll_range_start")
-            begin_info = f"from {start_time}"
+            _begin_info = f"from {start_time}"
        
        end_time = datetime.now(timezone.utc)

@ -1284,13 +1284,13 @@ class SeaFile(SyncBase):
        poll_start = task.get("poll_range_start")
        if task["reindex"] == "1" or poll_start is None:
            document_generator = self.connector.load_from_state()
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            document_generator = self.connector.poll_source(
                poll_start.timestamp(),
                datetime.now(timezone.utc).timestamp(),
            )
-            begin_info = f"from {poll_start}"
+            _begin_info = f"from {poll_start}"

        scope = conf.get("sync_scope", "account")
        extra = ""
@ -1328,13 +1328,13 @@ class DingTalkAITable(SyncBase):

        if task.get("reindex") == "1" or poll_start is None:
            document_generator = self.connector.load_from_state()
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            document_generator = self.connector.poll_source(
                poll_start.timestamp(),
                datetime.now(timezone.utc).timestamp(),
            )
-            begin_info = f"from {poll_start}"
+            _begin_info = f"from {poll_start}"

        self.log_connection(
            "DingTalk AI Table",
@ -1371,14 +1371,14 @@ class MySQL(SyncBase):

        if task["reindex"] == "1" or not task["poll_range_start"]:
            document_generator = self.connector.load_from_state()
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            poll_start = task["poll_range_start"]
            document_generator = self.connector.poll_source(
                poll_start.timestamp(),
                datetime.now(timezone.utc).timestamp()
            )
-            begin_info = f"from {poll_start}"
+            _begin_info = f"from {poll_start}"

        self.log_connection("MySQL", f"{self.conf.get('host')}:{self.conf.get('database')}", task)
        return document_generator
@ -1410,14 +1410,14 @@ class PostgreSQL(SyncBase):

        if task["reindex"] == "1" or not task["poll_range_start"]:
            document_generator = self.connector.load_from_state()
-            begin_info = "totally"
+            _begin_info = "totally"
        else:
            poll_start = task["poll_range_start"]
            document_generator = self.connector.poll_source(
                poll_start.timestamp(),
                datetime.now(timezone.utc).timestamp()
            )
-            begin_info = f"from {poll_start}"
+            _begin_info = f"from {poll_start}"

        self.log_connection("PostgreSQL", f"{self.conf.get('host')}:{self.conf.get('database')}", task)
        return document_generator
--- a/rag/utils/es_conn.py
+++ b/rag/utils/es_conn.py
@ -253,7 +253,18 @@ class ESConnection(ESConnectionBase):

        if limit > 0 and not use_search_after:
            s = s[offset:offset + limit]
+        # Filter _source to only requested fields for efficiency, and add vector
+        # fields to "fields" param so they appear in hit.fields when ES 9.x
+        # exclude_source_vectors is enabled (dense_vector not in _source).
+        if select_fields:
+            s = s.source(select_fields)
        q = s.to_dict()
+        # ES 9.x: dense_vector fields excluded from _source; request them via fields.
+        # Note: knn does NOT have a "fields" parameter - adding it inside the knn
+        # object causes BadRequestError on ES 9.x. We add "fields" at top level.
+        vector_fields = [f for f in (select_fields or []) if f.endswith("_vec")]
+        if vector_fields:
+            q["fields"] = vector_fields
        self.logger.debug(f"ESConnection.search {str(index_names)} query: " + json.dumps(q))

        for i in range(ATTEMPT_TIME):
@ -565,8 +576,24 @@ class ESConnection(ESConnectionBase):
        res_fields = {}
        if not fields:
            return {}
-        for d in self._get_source(res):
-            m = {n: d.get(n) for n in fields if d.get(n) is not None}
+        hits = res.get("hits", {}).get("hits", [])
+        for hit in hits:
+            doc_id = hit.get("_id")
+            d = hit.get("_source", {})
+            # Also extract fields from ES "fields" response (used by dense_vector in ES 9.x)
+            hit_fields = hit.get("fields", {})
+            m = {}
+            for n in fields:
+                # First check _source
+                if d.get(n) is not None:
+                    m[n] = d.get(n)
+                # Then check fields (ES 9.x stores dense_vector here, not in _source)
+                elif n in hit_fields:
+                    vals = hit_fields[n]
+                    # ES fields response wraps dense_vector in 2 levels: [[v1,v2,...]] -> [v1,v2,...]
+                    if isinstance(vals, list) and len(vals) == 1:
+                        vals = vals[0]
+                    m[n] = vals
            for n, v in m.items():
                if isinstance(v, list):
                    m[n] = v
@ -580,5 +607,5 @@ class ESConnection(ESConnectionBase):
                #     m[n] = remove_redundant_spaces(m[n])

            if m:
-                res_fields[d["id"]] = m
+                res_fields[doc_id] = m
        return res_fields
--- a/test/testcases/test_web_api/conftest.py
+++ b/test/testcases/test_web_api/conftest.py
@ -18,7 +18,7 @@ from time import sleep
 from ragflow_sdk import RAGFlow
 from configs import HOST_ADDRESS, VERSION
 import pytest
-from common import (
+from test_common import (
    batch_add_chunks,
    batch_create_datasets,
    bulk_upload_documents,
--- a/test/testcases/test_web_api/test_api_app/test_api_tokens.py
+++ b/test/testcases/test_web_api/test_api_app/test_api_tokens.py
@ -14,7 +14,7 @@
 #  limitations under the License.
 #
 import pytest
-from common import api_new_token, api_rm_token, api_stats, api_token_list, batch_create_chats
+from test_common import api_new_token, api_rm_token, api_stats, api_token_list, batch_create_chats
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_chunk_app/conftest.py
+++ b/test/testcases/test_web_api/test_chunk_app/conftest.py
@ -18,7 +18,7 @@
 from time import sleep

 import pytest
-from common import batch_add_chunks, delete_chunks, list_chunks, list_documents, parse_documents
+from test_common import batch_add_chunks, delete_chunks, list_chunks, list_documents, parse_documents
 from utils import wait_for


--- a/test/testcases/test_web_api/test_chunk_app/test_create_chunk.py
+++ b/test/testcases/test_web_api/test_chunk_app/test_create_chunk.py
@ -16,7 +16,7 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from common import add_chunk, delete_document, get_chunk, list_chunks
+from test_common import add_chunk, delete_document, get_chunk, list_chunks
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_chunk_app/test_list_chunks.py
+++ b/test/testcases/test_web_api/test_chunk_app/test_list_chunks.py
@ -17,7 +17,7 @@ import os
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from common import batch_add_chunks, list_chunks, update_chunk
+from test_common import batch_add_chunks, list_chunks, update_chunk
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_chunk_app/test_retrieval_chunks.py
+++ b/test/testcases/test_web_api/test_chunk_app/test_retrieval_chunks.py
@ -17,7 +17,7 @@ import os
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from common import retrieval_chunks
+from test_common import retrieval_chunks
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_chunk_app/test_rm_chunks.py
+++ b/test/testcases/test_web_api/test_chunk_app/test_rm_chunks.py
@ -16,7 +16,7 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from common import batch_add_chunks, delete_chunks, list_chunks
+from test_common import batch_add_chunks, delete_chunks, list_chunks
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_chunk_app/test_update_chunk.py
+++ b/test/testcases/test_web_api/test_chunk_app/test_update_chunk.py
@ -20,7 +20,7 @@ from random import randint
 from time import sleep

 import pytest
-from common import delete_document, list_chunks, update_chunk
+from test_common import delete_document, list_chunks, update_chunk
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_chunk_feedback/init.py
+++ b/test/testcases/test_web_api/test_chunk_feedback/init.py
@ -13,6 +13,4 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

-from rag.flow.chunker.token_chunker import TokenChunker, TokenChunkerParam
-
-__all__ = ["TokenChunker", "TokenChunkerParam"]
+__all__ = []
--- a/test/testcases/test_web_api/test_common.py
+++ b/test/testcases/test_web_api/test_common.py
--- a/test/testcases/test_web_api/test_document_app/conftest.py
+++ b/test/testcases/test_web_api/test_document_app/conftest.py
@ -21,7 +21,7 @@ from pathlib import Path
 from types import ModuleType, SimpleNamespace

 import pytest
-from common import bulk_upload_documents, delete_document, list_documents
+from test_common import bulk_upload_documents, delete_document, list_documents


 class _DummyManager:
--- a/test/testcases/test_web_api/test_document_app/test_create_document.py
+++ b/test/testcases/test_web_api/test_document_app/test_create_document.py
@ -19,7 +19,7 @@ from types import SimpleNamespace
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from test_web_api.common import create_document, list_datasets
+from test_common import create_document, list_datasets
 from configs import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth
 from utils.file_utils import create_txt_file
--- a/test/testcases/test_web_api/test_document_app/test_document_metadata.py
+++ b/test/testcases/test_web_api/test_document_app/test_document_metadata.py
@ -17,7 +17,7 @@ import asyncio
 from types import SimpleNamespace

 import pytest
-from common import (
+from test_common import (
    document_change_status,
    document_filter,
    document_infos,
--- a/test/testcases/test_web_api/test_document_app/test_list_documents.py
+++ b/test/testcases/test_web_api/test_document_app/test_list_documents.py
@ -18,7 +18,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 from types import SimpleNamespace

 import pytest
-from common import list_documents
+from test_common import list_documents
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth
 from utils import is_sorted
--- a/test/testcases/test_web_api/test_document_app/test_paser_documents.py
+++ b/test/testcases/test_web_api/test_document_app/test_paser_documents.py
@ -18,7 +18,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 from types import SimpleNamespace

 import pytest
-from common import bulk_upload_documents, list_documents, parse_documents
+from test_common import bulk_upload_documents, list_documents, parse_documents
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth
 from utils import wait_for
--- a/test/testcases/test_web_api/test_document_app/test_rm_documents.py
+++ b/test/testcases/test_web_api/test_document_app/test_rm_documents.py
@ -17,7 +17,7 @@ import asyncio
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from common import bulk_upload_documents, delete_document, list_documents
+from test_common import bulk_upload_documents, delete_document, list_documents
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_document_app/test_upload_documents.py
+++ b/test/testcases/test_web_api/test_document_app/test_upload_documents.py
@ -20,7 +20,7 @@ from types import ModuleType, SimpleNamespace
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from common import list_datasets, upload_documents
+from test_common import list_datasets, upload_documents
 from configs import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth
 from utils.file_utils import create_txt_file
--- a/test/testcases/test_web_api/test_kb_app/conftest.py
+++ b/test/testcases/test_web_api/test_kb_app/conftest.py
@ -14,7 +14,7 @@
 #  limitations under the License.
 #
 import pytest
-from common import batch_create_datasets, list_datasets, delete_datasets
+from test_common import batch_create_datasets, list_datasets, delete_datasets
 from libs.auth import RAGFlowWebApiAuth
 from pytest import FixtureRequest
 from ragflow_sdk import RAGFlow
--- a/test/testcases/test_web_api/test_kb_app/test_create_kb.py
+++ b/test/testcases/test_web_api/test_kb_app/test_create_kb.py
@ -16,7 +16,7 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from common import create_dataset
+from test_common import create_dataset
 from configs import DATASET_NAME_LIMIT, INVALID_API_TOKEN
 from hypothesis import example, given, settings
 from libs.auth import RAGFlowWebApiAuth
--- a/test/testcases/test_web_api/test_kb_app/test_detail_kb.py
+++ b/test/testcases/test_web_api/test_kb_app/test_detail_kb.py
@ -14,7 +14,7 @@
 #  limitations under the License.
 #
 import pytest
-from common import (
+from test_common import (
    detail_kb,
 )
 from configs import INVALID_API_TOKEN
--- a/test/testcases/test_web_api/test_kb_app/test_kb_pipeline_tasks.py
+++ b/test/testcases/test_web_api/test_kb_app/test_kb_pipeline_tasks.py
@ -14,7 +14,7 @@
 #  limitations under the License.
 #
 import pytest
-from test_web_api.common import (
+from test_common import (
    kb_delete_pipeline_logs,
    kb_list_pipeline_dataset_logs,
    kb_list_pipeline_logs,
--- a/test/testcases/test_web_api/test_kb_app/test_kb_tags_meta.py
+++ b/test/testcases/test_web_api/test_kb_app/test_kb_tags_meta.py
@ -16,7 +16,7 @@
 import uuid

 import pytest
-from test_web_api.common import (
+from test_common import (
    delete_knowledge_graph,
    kb_basic_info,
    kb_get_meta,
--- a/test/testcases/test_web_api/test_kb_app/test_list_kbs.py
+++ b/test/testcases/test_web_api/test_kb_app/test_list_kbs.py
@ -17,7 +17,7 @@ import json
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from common import list_datasets
+from test_common import list_datasets
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth
 from utils import is_sorted
--- a/test/testcases/test_web_api/test_kb_app/test_rm_kb.py
+++ b/test/testcases/test_web_api/test_kb_app/test_rm_kb.py
@ -15,7 +15,7 @@
 #

 import pytest
-from common import (
+from test_common import (
    list_datasets,
    delete_datasets,
 )
--- a/test/testcases/test_web_api/test_kb_app/test_update_kb.py
+++ b/test/testcases/test_web_api/test_kb_app/test_update_kb.py
@ -17,7 +17,7 @@ import os
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from test_web_api.common import update_dataset
+from test_common import update_dataset
 from configs import DATASET_NAME_LIMIT, INVALID_API_TOKEN
 from hypothesis import HealthCheck, example, given, settings
 from libs.auth import RAGFlowWebApiAuth
--- a/test/testcases/test_web_api/test_llm_app/test_llm_list.py
+++ b/test/testcases/test_web_api/test_llm_app/test_llm_list.py
@ -14,7 +14,7 @@
 #  limitations under the License.
 #
 import pytest
-from common import llm_factories, llm_list
+from test_common import llm_factories, llm_list
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_memory_app/conftest.py
+++ b/test/testcases/test_web_api/test_memory_app/conftest.py
@ -15,7 +15,7 @@
 #
 import pytest
 import random
-from test_web_api.common import create_memory, list_memory, delete_memory
+from test_common import create_memory, list_memory, delete_memory

@pytest.fixture(scope="function")
 def add_memory_func(request, WebApiAuth):
--- a/test/testcases/test_web_api/test_memory_app/test_create_memory.py
+++ b/test/testcases/test_web_api/test_memory_app/test_create_memory.py
@ -17,7 +17,7 @@ import random
 import re

 import pytest
-from test_web_api.common import create_memory
+from test_common import create_memory
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_memory_app/test_list_memory.py
+++ b/test/testcases/test_web_api/test_memory_app/test_list_memory.py
@ -16,7 +16,7 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import pytest
-from test_web_api.common import list_memory, get_memory_config
+from test_common import list_memory, get_memory_config
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_memory_app/test_rm_memory.py
+++ b/test/testcases/test_web_api/test_memory_app/test_rm_memory.py
@ -14,7 +14,7 @@
 #  limitations under the License.
 #
 import pytest
-from test_web_api.common import (list_memory, delete_memory)
+from test_common import (list_memory, delete_memory)
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_memory_app/test_update_memory.py
+++ b/test/testcases/test_web_api/test_memory_app/test_update_memory.py
@ -16,7 +16,7 @@
 import re

 import pytest
-from test_web_api.common import update_memory
+from test_common import update_memory
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth
 from utils import encode_avatar
--- a/test/testcases/test_web_api/test_message_app/conftest.py
+++ b/test/testcases/test_web_api/test_message_app/conftest.py
@ -18,7 +18,7 @@ import uuid

 import pytest
 import random
-from test_web_api.common import create_memory, list_memory, add_message, delete_memory
+from test_common import create_memory, list_memory, add_message, delete_memory


@pytest.fixture(scope="class")
--- a/test/testcases/test_web_api/test_message_app/test_add_message.py
+++ b/test/testcases/test_web_api/test_message_app/test_add_message.py
@ -17,7 +17,7 @@ import time
 import uuid
 import pytest

-from test_web_api.common import list_memory_message, add_message
+from test_common import list_memory_message, add_message
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_message_app/test_forget_message.py
+++ b/test/testcases/test_web_api/test_message_app/test_forget_message.py
@ -16,7 +16,7 @@
 import random
 import pytest
 import requests
-from test_web_api.common import forget_message, list_memory_message, get_message_content
+from test_common import forget_message, list_memory_message, get_message_content
 from configs import HOST_ADDRESS, INVALID_API_TOKEN, VERSION
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_message_app/test_get_message_content.py
+++ b/test/testcases/test_web_api/test_message_app/test_get_message_content.py
@ -16,7 +16,7 @@
 import random

 import pytest
-from test_web_api.common import get_message_content, get_recent_message
+from test_common import get_message_content, get_recent_message
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_message_app/test_get_recent_message.py
+++ b/test/testcases/test_web_api/test_message_app/test_get_recent_message.py
@ -16,7 +16,7 @@
 import random

 import pytest
-from test_web_api.common import get_recent_message
+from test_common import get_recent_message
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_message_app/test_list_message.py
+++ b/test/testcases/test_web_api/test_message_app/test_list_message.py
@ -17,7 +17,7 @@ import os
 import random

 import pytest
-from test_web_api.common import list_memory_message
+from test_common import list_memory_message
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_message_app/test_search_message.py
+++ b/test/testcases/test_web_api/test_message_app/test_search_message.py
@ -14,7 +14,7 @@
 #  limitations under the License.
 #
 import pytest
-from test_web_api.common import search_message, list_memory_message
+from test_common import search_message, list_memory_message
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_message_app/test_update_message_status.py
+++ b/test/testcases/test_web_api/test_message_app/test_update_message_status.py
@ -17,7 +17,7 @@ import random

 import pytest
 import requests
-from test_web_api.common import update_message_status, list_memory_message, get_message_content
+from test_common import update_message_status, list_memory_message, get_message_content
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth
 from configs import HOST_ADDRESS, VERSION
--- a/test/testcases/test_web_api/test_plugin_app/test_llm_tools.py
+++ b/test/testcases/test_web_api/test_plugin_app/test_llm_tools.py
@ -19,7 +19,7 @@ from pathlib import Path
 from types import ModuleType

 import pytest
-from common import plugin_llm_tools
+from test_common import plugin_llm_tools
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_search_app/test_search_crud.py
+++ b/test/testcases/test_web_api/test_search_app/test_search_crud.py
@ -16,7 +16,7 @@
 import uuid

 import pytest
-from common import search_create, search_detail, search_list, search_rm, search_update
+from test_common import search_create, search_detail, search_list, search_rm, search_update
 from configs import INVALID_API_TOKEN
 from libs.auth import RAGFlowWebApiAuth

--- a/test/testcases/test_web_api/test_system_app/test_system_basic.py
+++ b/test/testcases/test_web_api/test_system_app/test_system_basic.py
@ -14,7 +14,7 @@
 #  limitations under the License.
 #
 import pytest
-from common import (
+from test_common import (
    system_config,
    system_delete_token,
    system_new_token,
--- a/web/vite.config.ts
+++ b/web/vite.config.ts
@ -26,6 +26,17 @@ const inspectorBabelPlugin = (): import('vite').Plugin => ({
  },
 });

+type MinifyValue = boolean | 'esbuild' | 'terser';
+
+function resolveMinify(value: string | undefined): MinifyValue {
+  if (value === undefined) return 'terser';
+  const lower = value.toLowerCase();
+  if (lower === 'false') return false;
+  if (lower === 'esbuild') return 'esbuild';
+  if (lower === 'terser') return 'terser';
+  return 'terser';
+}
+
 // https://vitejs.dev/config/
 export default defineConfig(({ mode }) => {
  const env = loadEnv(mode, process.cwd(), '');
@ -229,7 +240,7 @@ export default defineConfig(({ mode }) => {
        plugins: [],
        treeshake: true,
      },
-      minify: 'terser',
+      minify: resolveMinify(env.VITE_MINIFY),
      terserOptions: {
        compress: {
          drop_console: true, // delete console
@ -246,7 +257,7 @@ export default defineConfig(({ mode }) => {
          comments: false, // Delete comments
        },
      },
-      sourcemap: true,
+      sourcemap: env.VITE_BUILD_SOURCEMAP !== 'false',
      cssCodeSplit: true,
      target: 'es2015',
    },