From 839b60376839fc49bb4eef9c53678de670f6d156 Mon Sep 17 00:00:00 2001 From: statxc <181730535+statxc@users.noreply.github.com> Date: Wed, 4 Mar 2026 05:09:33 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20Add=20PDF=20parser=20selection=20to=20A?= =?UTF-8?q?gent=20Begin=20and=20Await=20Response=20comp=E2=80=A6=20(#13325?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Issue: #12756 ### What problem does this PR solve? When users upload files through Agent's Begin or Await Response components, the parsing is hardcoded to "Plain Text", ignoring all other available parsers (DeepDOC, TCADP, Docling, MinerU, PaddleOCR). This PR adds a PDF parser dropdown to these components so users can select the appropriate parser for their file inputs. ### Changes **Backend** - `agent/component/fillup.py` - Added `layout_recognize` param to `UserFillUpParam`, forwarded to `FileService.get_files()` - `agent/component/begin.py` - Same forwarding in `Begin._invoke()` - `agent/canvas.py` - Extract Begin's `layout_recognize` for `sys.files` parsing, added param to `get_files_async()` / `get_files()` - `api/db/services/file_service.py` - Added `layout_recognize` param to `parse()` and `get_files()`, replacing hardcoded `"Plain Text"` - `rag/app/naive.py` - Added `"plain text"` and `"tcadp parser"` aliases to PARSERS dict to match dropdown values after `.lower()` **Frontend** - `web/src/pages/agent/form/begin-form/index.tsx` - Show `LayoutRecognizeFormField` dropdown when file inputs exist - `web/src/pages/agent/form/begin-form/schema.ts` - Added `layout_recognize` to Zod schema - `web/src/pages/agent/form/user-fill-up-form/index.tsx` - Same dropdown for Await Response component ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- agent/canvas.py | 18 ++++++++++++------ agent/component/begin.py | 3 ++- agent/component/fillup.py | 4 +++- api/db/services/file_service.py | 8 ++++---- web/src/pages/agent/form/begin-form/index.tsx | 18 ++++++++++++++++-- web/src/pages/agent/form/begin-form/schema.ts | 1 + .../agent/form/user-fill-up-form/index.tsx | 18 +++++++++++++++++- 7 files changed, 55 insertions(+), 15 deletions(-) diff --git a/agent/canvas.py b/agent/canvas.py index c9d672e6c..2b219ef16 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -386,10 +386,16 @@ class Canvas(Graph): continue self.components[k]["obj"].set_output(kk, vv) + layout_recognize = None + for cpn in self.components.values(): + if cpn["obj"].component_name.lower() == "begin": + layout_recognize = getattr(cpn["obj"]._param, "layout_recognize", None) + break + for k in kwargs.keys(): if k in ["query", "user_id", "files"] and kwargs[k]: if k == "files": - self.globals[f"sys.{k}"] = await self.get_files_async(kwargs[k]) + self.globals[f"sys.{k}"] = await self.get_files_async(kwargs[k], layout_recognize) else: self.globals[f"sys.{k}"] = kwargs[k] if not self.globals["sys.conversation_turns"] : @@ -740,7 +746,7 @@ class Canvas(Graph): def get_component_input_elements(self, cpnnm): return self.components[cpnnm]["obj"].get_input_elements() - async def get_files_async(self, files: Union[None, list[dict]]) -> list[str]: + async def get_files_async(self, files: Union[None, list[dict]], layout_recognize: str = None) -> list[str]: if not files: return [] def image_to_base64(file): @@ -748,7 +754,7 @@ class Canvas(Graph): base64.b64encode(FileService.get_blob(file["created_by"], file["id"])).decode("utf-8")) def parse_file(file): blob = FileService.get_blob(file["created_by"], file["id"]) - return FileService.parse(file["name"], blob, True, file["created_by"]) + return FileService.parse(file["name"], blob, True, file["created_by"], layout_recognize) loop = asyncio.get_running_loop() tasks = [] for file in files: @@ -758,15 +764,15 @@ class Canvas(Graph): tasks.append(loop.run_in_executor(self._thread_pool, parse_file, file)) return await asyncio.gather(*tasks) - def get_files(self, files: Union[None, list[dict]]) -> list[str]: + def get_files(self, files: Union[None, list[dict]], layout_recognize: str = None) -> list[str]: """ Synchronous wrapper for get_files_async, used by sync component invoke paths. """ loop = getattr(self, "_loop", None) if loop and loop.is_running(): - return asyncio.run_coroutine_threadsafe(self.get_files_async(files), loop).result() + return asyncio.run_coroutine_threadsafe(self.get_files_async(files, layout_recognize), loop).result() - return asyncio.run(self.get_files_async(files)) + return asyncio.run(self.get_files_async(files, layout_recognize)) def tool_use_callback(self, agent_id: str, func_name: str, params: dict, result: Any, elapsed_time=None): agent_ids = agent_id.split("-->") diff --git a/agent/component/begin.py b/agent/component/begin.py index 819e46c25..c4da78cab 100644 --- a/agent/component/begin.py +++ b/agent/component/begin.py @@ -41,6 +41,7 @@ class Begin(UserFillUp): if self.check_if_canceled("Begin processing"): return + layout_recognize = self._param.layout_recognize or None for k, v in kwargs.get("inputs", {}).items(): if self.check_if_canceled("Begin processing"): return @@ -52,7 +53,7 @@ class Begin(UserFillUp): file_value = v["value"] # Support both single file (backward compatibility) and multiple files files = file_value if isinstance(file_value, list) else [file_value] - v = FileService.get_files(files) + v = FileService.get_files(files, layout_recognize=layout_recognize) else: v = v.get("value") self.set_output(k, v) diff --git a/agent/component/fillup.py b/agent/component/fillup.py index b97e6ca52..90ccde10f 100644 --- a/agent/component/fillup.py +++ b/agent/component/fillup.py @@ -27,6 +27,7 @@ class UserFillUpParam(ComponentParamBase): super().__init__() self.enable_tips = True self.tips = "Please fill up the form" + self.layout_recognize = "" def check(self) -> bool: return True @@ -61,6 +62,7 @@ class UserFillUp(ComponentBase): content = re.sub(r"\{%s\}"%k, ans, content) self.set_output("tips", content) + layout_recognize = self._param.layout_recognize or None for k, v in kwargs.get("inputs", {}).items(): if self.check_if_canceled("UserFillUp processing"): return @@ -71,7 +73,7 @@ class UserFillUp(ComponentBase): file_value = v["value"] # Support both single file (backward compatibility) and multiple files files = file_value if isinstance(file_value, list) else [file_value] - v = FileService.get_files(files) + v = FileService.get_files(files, layout_recognize=layout_recognize) else: v = v.get("value") self.set_output(k, v) diff --git a/api/db/services/file_service.py b/api/db/services/file_service.py index 498199393..d31004c93 100644 --- a/api/db/services/file_service.py +++ b/api/db/services/file_service.py @@ -519,7 +519,7 @@ class FileService(CommonService): return "\n\n".join(res) @staticmethod - def parse(filename, blob, img_base64=True, tenant_id=None): + def parse(filename, blob, img_base64=True, tenant_id=None, layout_recognize=None): from rag.app import audio, email, naive, picture, presentation from api.apps import current_user @@ -527,7 +527,7 @@ class FileService(CommonService): pass FACTORY = {ParserType.PRESENTATION.value: presentation, ParserType.PICTURE.value: picture, ParserType.AUDIO.value: audio, ParserType.EMAIL.value: email} - parser_config = {"chunk_token_num": 16096, "delimiter": "\n!?;。;!?", "layout_recognize": "Plain Text"} + parser_config = {"chunk_token_num": 16096, "delimiter": "\n!?;。;!?", "layout_recognize": layout_recognize or "Plain Text"} kwargs = {"lang": "English", "callback": dummy, "parser_config": parser_config, "from_page": 0, "to_page": 100000, "tenant_id": current_user.id if current_user else tenant_id} file_type = filename_type(filename) if img_base64 and file_type == FileType.VISUAL.value: @@ -663,7 +663,7 @@ class FileService(CommonService): return structured(file.filename, filename_type(file.filename), file.read(), file.content_type) @staticmethod - def get_files(files: Union[None, list[dict]], raw: bool = False) -> Union[list[str], tuple[list[str], list[dict]]]: + def get_files(files: Union[None, list[dict]], raw: bool = False, layout_recognize: str = None) -> Union[list[str], tuple[list[str], list[dict]]]: if not files: return [] def image_to_base64(file): @@ -679,7 +679,7 @@ class FileService(CommonService): else: threads.append(exe.submit(image_to_base64, file)) continue - threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"])) + threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"], layout_recognize)) if raw: return [th.result() for th in threads], imgs diff --git a/web/src/pages/agent/form/begin-form/index.tsx b/web/src/pages/agent/form/begin-form/index.tsx index 6c8664fce..4351cc161 100644 --- a/web/src/pages/agent/form/begin-form/index.tsx +++ b/web/src/pages/agent/form/begin-form/index.tsx @@ -1,4 +1,5 @@ import { Collapse } from '@/components/collapse'; +import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field'; import { Button } from '@/components/ui/button'; import { Form, @@ -15,10 +16,10 @@ import { FormTooltip } from '@/components/ui/tooltip'; import { zodResolver } from '@hookform/resolvers/zod'; import { t } from 'i18next'; import { Plus } from 'lucide-react'; -import { memo, useEffect, useRef } from 'react'; +import { memo, useEffect, useMemo, useRef } from 'react'; import { useForm, useWatch } from 'react-hook-form'; import { useTranslation } from 'react-i18next'; -import { AgentDialogueMode } from '../../constant'; +import { AgentDialogueMode, BeginQueryType } from '../../constant'; import { INextOperatorForm } from '../../interface'; import { ParameterDialog } from './parameter-dialog'; import { QueryTable } from './query-table'; @@ -51,6 +52,11 @@ function BeginForm({ node }: INextOperatorForm) { const inputs = useWatch({ control: form.control, name: 'inputs' }); const mode = useWatch({ control: form.control, name: 'mode' }); + const hasFileInput = useMemo( + () => inputs?.some((x) => x.type === BeginQueryType.File), + [inputs], + ); + const enablePrologue = useWatch({ control: form.control, name: 'enablePrologue', @@ -193,6 +199,14 @@ function BeginForm({ node }: INextOperatorForm) { submit={ok} > )} + {hasFileInput && ( + + )} )} diff --git a/web/src/pages/agent/form/begin-form/schema.ts b/web/src/pages/agent/form/begin-form/schema.ts index 33fb586e7..bcb58f00c 100644 --- a/web/src/pages/agent/form/begin-form/schema.ts +++ b/web/src/pages/agent/form/begin-form/schema.ts @@ -5,6 +5,7 @@ export const BeginFormSchema = z.object({ enablePrologue: z.boolean().optional(), prologue: z.string().trim().optional(), mode: z.string(), + layout_recognize: z.string().optional(), inputs: z .array( z.object({ diff --git a/web/src/pages/agent/form/user-fill-up-form/index.tsx b/web/src/pages/agent/form/user-fill-up-form/index.tsx index 96087de7a..22ce1c742 100644 --- a/web/src/pages/agent/form/user-fill-up-form/index.tsx +++ b/web/src/pages/agent/form/user-fill-up-form/index.tsx @@ -1,4 +1,5 @@ import { Collapse } from '@/components/collapse'; +import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field'; import { Button } from '@/components/ui/button'; import { Form, @@ -12,10 +13,11 @@ import { Switch } from '@/components/ui/switch'; import { FormTooltip } from '@/components/ui/tooltip'; import { zodResolver } from '@hookform/resolvers/zod'; import { Plus } from 'lucide-react'; -import { memo } from 'react'; +import { memo, useMemo } from 'react'; import { useForm, useWatch } from 'react-hook-form'; import { useTranslation } from 'react-i18next'; import { z } from 'zod'; +import { BeginQueryType } from '../../constant'; import { BeginQuery, INextOperatorForm } from '../../interface'; import { ParameterDialog } from '../begin-form/parameter-dialog'; import { QueryTable } from '../begin-form/query-table'; @@ -33,6 +35,7 @@ function UserFillUpForm({ node }: INextOperatorForm) { const FormSchema = z.object({ enable_tips: z.boolean().optional(), tips: z.string().trim().optional(), + layout_recognize: z.string().optional(), inputs: z .array( z.object({ @@ -59,6 +62,11 @@ function UserFillUpForm({ node }: INextOperatorForm) { name: 'inputs', }); + const hasFileInput = useMemo( + () => inputs?.some((x) => x.type === BeginQueryType.File), + [inputs], + ); + const outputList = inputs?.map((item) => ({ title: item.name, type: item.type, @@ -155,6 +163,14 @@ function UserFillUpForm({ node }: INextOperatorForm) { submit={ok} > )} + {hasFileInput && ( + + )}