feat: Add PDF parser selection to Agent Begin and Await Response comp… (#13325)

### Issue: #12756

### What problem does this PR solve?

When users upload files through Agent's Begin or Await Response
components, the parsing is hardcoded to "Plain Text", ignoring all other
available parsers (DeepDOC, TCADP, Docling, MinerU, PaddleOCR). This PR
adds a PDF parser dropdown to these components so users can select the
appropriate parser for their file inputs.


### Changes

**Backend**
- `agent/component/fillup.py` - Added `layout_recognize` param to
`UserFillUpParam`, forwarded to `FileService.get_files()`
- `agent/component/begin.py` - Same forwarding in `Begin._invoke()`
- `agent/canvas.py` - Extract Begin's `layout_recognize` for `sys.files`
parsing, added param to `get_files_async()` / `get_files()`
- `api/db/services/file_service.py` - Added `layout_recognize` param to
`parse()` and `get_files()`, replacing hardcoded `"Plain Text"`
- `rag/app/naive.py` - Added `"plain text"` and `"tcadp parser"` aliases
to PARSERS dict to match dropdown values after `.lower()`

**Frontend**
- `web/src/pages/agent/form/begin-form/index.tsx` - Show
`LayoutRecognizeFormField` dropdown when file inputs exist
- `web/src/pages/agent/form/begin-form/schema.ts` - Added
`layout_recognize` to Zod schema
- `web/src/pages/agent/form/user-fill-up-form/index.tsx` - Same dropdown
for Await Response component


### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
statxc
2026-03-04 05:09:33 +02:00
committed by GitHub
parent 7715bad04e
commit 839b603768
7 changed files with 55 additions and 15 deletions

View File

@ -386,10 +386,16 @@ class Canvas(Graph):
continue
self.components[k]["obj"].set_output(kk, vv)
layout_recognize = None
for cpn in self.components.values():
if cpn["obj"].component_name.lower() == "begin":
layout_recognize = getattr(cpn["obj"]._param, "layout_recognize", None)
break
for k in kwargs.keys():
if k in ["query", "user_id", "files"] and kwargs[k]:
if k == "files":
self.globals[f"sys.{k}"] = await self.get_files_async(kwargs[k])
self.globals[f"sys.{k}"] = await self.get_files_async(kwargs[k], layout_recognize)
else:
self.globals[f"sys.{k}"] = kwargs[k]
if not self.globals["sys.conversation_turns"] :
@ -740,7 +746,7 @@ class Canvas(Graph):
def get_component_input_elements(self, cpnnm):
return self.components[cpnnm]["obj"].get_input_elements()
async def get_files_async(self, files: Union[None, list[dict]]) -> list[str]:
async def get_files_async(self, files: Union[None, list[dict]], layout_recognize: str = None) -> list[str]:
if not files:
return []
def image_to_base64(file):
@ -748,7 +754,7 @@ class Canvas(Graph):
base64.b64encode(FileService.get_blob(file["created_by"], file["id"])).decode("utf-8"))
def parse_file(file):
blob = FileService.get_blob(file["created_by"], file["id"])
return FileService.parse(file["name"], blob, True, file["created_by"])
return FileService.parse(file["name"], blob, True, file["created_by"], layout_recognize)
loop = asyncio.get_running_loop()
tasks = []
for file in files:
@ -758,15 +764,15 @@ class Canvas(Graph):
tasks.append(loop.run_in_executor(self._thread_pool, parse_file, file))
return await asyncio.gather(*tasks)
def get_files(self, files: Union[None, list[dict]]) -> list[str]:
def get_files(self, files: Union[None, list[dict]], layout_recognize: str = None) -> list[str]:
"""
Synchronous wrapper for get_files_async, used by sync component invoke paths.
"""
loop = getattr(self, "_loop", None)
if loop and loop.is_running():
return asyncio.run_coroutine_threadsafe(self.get_files_async(files), loop).result()
return asyncio.run_coroutine_threadsafe(self.get_files_async(files, layout_recognize), loop).result()
return asyncio.run(self.get_files_async(files))
return asyncio.run(self.get_files_async(files, layout_recognize))
def tool_use_callback(self, agent_id: str, func_name: str, params: dict, result: Any, elapsed_time=None):
agent_ids = agent_id.split("-->")

View File

@ -41,6 +41,7 @@ class Begin(UserFillUp):
if self.check_if_canceled("Begin processing"):
return
layout_recognize = self._param.layout_recognize or None
for k, v in kwargs.get("inputs", {}).items():
if self.check_if_canceled("Begin processing"):
return
@ -52,7 +53,7 @@ class Begin(UserFillUp):
file_value = v["value"]
# Support both single file (backward compatibility) and multiple files
files = file_value if isinstance(file_value, list) else [file_value]
v = FileService.get_files(files)
v = FileService.get_files(files, layout_recognize=layout_recognize)
else:
v = v.get("value")
self.set_output(k, v)

View File

@ -27,6 +27,7 @@ class UserFillUpParam(ComponentParamBase):
super().__init__()
self.enable_tips = True
self.tips = "Please fill up the form"
self.layout_recognize = ""
def check(self) -> bool:
return True
@ -61,6 +62,7 @@ class UserFillUp(ComponentBase):
content = re.sub(r"\{%s\}"%k, ans, content)
self.set_output("tips", content)
layout_recognize = self._param.layout_recognize or None
for k, v in kwargs.get("inputs", {}).items():
if self.check_if_canceled("UserFillUp processing"):
return
@ -71,7 +73,7 @@ class UserFillUp(ComponentBase):
file_value = v["value"]
# Support both single file (backward compatibility) and multiple files
files = file_value if isinstance(file_value, list) else [file_value]
v = FileService.get_files(files)
v = FileService.get_files(files, layout_recognize=layout_recognize)
else:
v = v.get("value")
self.set_output(k, v)

View File

@ -519,7 +519,7 @@ class FileService(CommonService):
return "\n\n".join(res)
@staticmethod
def parse(filename, blob, img_base64=True, tenant_id=None):
def parse(filename, blob, img_base64=True, tenant_id=None, layout_recognize=None):
from rag.app import audio, email, naive, picture, presentation
from api.apps import current_user
@ -527,7 +527,7 @@ class FileService(CommonService):
pass
FACTORY = {ParserType.PRESENTATION.value: presentation, ParserType.PICTURE.value: picture, ParserType.AUDIO.value: audio, ParserType.EMAIL.value: email}
parser_config = {"chunk_token_num": 16096, "delimiter": "\n!?;。;!?", "layout_recognize": "Plain Text"}
parser_config = {"chunk_token_num": 16096, "delimiter": "\n!?;。;!?", "layout_recognize": layout_recognize or "Plain Text"}
kwargs = {"lang": "English", "callback": dummy, "parser_config": parser_config, "from_page": 0, "to_page": 100000, "tenant_id": current_user.id if current_user else tenant_id}
file_type = filename_type(filename)
if img_base64 and file_type == FileType.VISUAL.value:
@ -663,7 +663,7 @@ class FileService(CommonService):
return structured(file.filename, filename_type(file.filename), file.read(), file.content_type)
@staticmethod
def get_files(files: Union[None, list[dict]], raw: bool = False) -> Union[list[str], tuple[list[str], list[dict]]]:
def get_files(files: Union[None, list[dict]], raw: bool = False, layout_recognize: str = None) -> Union[list[str], tuple[list[str], list[dict]]]:
if not files:
return []
def image_to_base64(file):
@ -679,7 +679,7 @@ class FileService(CommonService):
else:
threads.append(exe.submit(image_to_base64, file))
continue
threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"]))
threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"], layout_recognize))
if raw:
return [th.result() for th in threads], imgs

View File

@ -1,4 +1,5 @@
import { Collapse } from '@/components/collapse';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { Button } from '@/components/ui/button';
import {
Form,
@ -15,10 +16,10 @@ import { FormTooltip } from '@/components/ui/tooltip';
import { zodResolver } from '@hookform/resolvers/zod';
import { t } from 'i18next';
import { Plus } from 'lucide-react';
import { memo, useEffect, useRef } from 'react';
import { memo, useEffect, useMemo, useRef } from 'react';
import { useForm, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import { AgentDialogueMode } from '../../constant';
import { AgentDialogueMode, BeginQueryType } from '../../constant';
import { INextOperatorForm } from '../../interface';
import { ParameterDialog } from './parameter-dialog';
import { QueryTable } from './query-table';
@ -51,6 +52,11 @@ function BeginForm({ node }: INextOperatorForm) {
const inputs = useWatch({ control: form.control, name: 'inputs' });
const mode = useWatch({ control: form.control, name: 'mode' });
const hasFileInput = useMemo(
() => inputs?.some((x) => x.type === BeginQueryType.File),
[inputs],
);
const enablePrologue = useWatch({
control: form.control,
name: 'enablePrologue',
@ -193,6 +199,14 @@ function BeginForm({ node }: INextOperatorForm) {
submit={ok}
></ParameterDialog>
)}
{hasFileInput && (
<LayoutRecognizeFormField
name="layout_recognize"
horizontal={false}
showMineruOptions={false}
showPaddleocrOptions={false}
></LayoutRecognizeFormField>
)}
</>
)}
</Form>

View File

@ -5,6 +5,7 @@ export const BeginFormSchema = z.object({
enablePrologue: z.boolean().optional(),
prologue: z.string().trim().optional(),
mode: z.string(),
layout_recognize: z.string().optional(),
inputs: z
.array(
z.object({

View File

@ -1,4 +1,5 @@
import { Collapse } from '@/components/collapse';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { Button } from '@/components/ui/button';
import {
Form,
@ -12,10 +13,11 @@ import { Switch } from '@/components/ui/switch';
import { FormTooltip } from '@/components/ui/tooltip';
import { zodResolver } from '@hookform/resolvers/zod';
import { Plus } from 'lucide-react';
import { memo } from 'react';
import { memo, useMemo } from 'react';
import { useForm, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import { z } from 'zod';
import { BeginQueryType } from '../../constant';
import { BeginQuery, INextOperatorForm } from '../../interface';
import { ParameterDialog } from '../begin-form/parameter-dialog';
import { QueryTable } from '../begin-form/query-table';
@ -33,6 +35,7 @@ function UserFillUpForm({ node }: INextOperatorForm) {
const FormSchema = z.object({
enable_tips: z.boolean().optional(),
tips: z.string().trim().optional(),
layout_recognize: z.string().optional(),
inputs: z
.array(
z.object({
@ -59,6 +62,11 @@ function UserFillUpForm({ node }: INextOperatorForm) {
name: 'inputs',
});
const hasFileInput = useMemo(
() => inputs?.some((x) => x.type === BeginQueryType.File),
[inputs],
);
const outputList = inputs?.map((item) => ({
title: item.name,
type: item.type,
@ -155,6 +163,14 @@ function UserFillUpForm({ node }: INextOperatorForm) {
submit={ok}
></ParameterDialog>
)}
{hasFileInput && (
<LayoutRecognizeFormField
name="layout_recognize"
horizontal={false}
showMineruOptions={false}
showPaddleocrOptions={false}
></LayoutRecognizeFormField>
)}
</Form>
<Output list={outputList}></Output>
</section>