diff --git a/agent/canvas.py b/agent/canvas.py
index c9d672e6c..2b219ef16 100644
--- a/agent/canvas.py
+++ b/agent/canvas.py
@@ -386,10 +386,16 @@ class Canvas(Graph):
continue
self.components[k]["obj"].set_output(kk, vv)
+ layout_recognize = None
+ for cpn in self.components.values():
+ if cpn["obj"].component_name.lower() == "begin":
+ layout_recognize = getattr(cpn["obj"]._param, "layout_recognize", None)
+ break
+
for k in kwargs.keys():
if k in ["query", "user_id", "files"] and kwargs[k]:
if k == "files":
- self.globals[f"sys.{k}"] = await self.get_files_async(kwargs[k])
+ self.globals[f"sys.{k}"] = await self.get_files_async(kwargs[k], layout_recognize)
else:
self.globals[f"sys.{k}"] = kwargs[k]
if not self.globals["sys.conversation_turns"] :
@@ -740,7 +746,7 @@ class Canvas(Graph):
def get_component_input_elements(self, cpnnm):
return self.components[cpnnm]["obj"].get_input_elements()
- async def get_files_async(self, files: Union[None, list[dict]]) -> list[str]:
+ async def get_files_async(self, files: Union[None, list[dict]], layout_recognize: str = None) -> list[str]:
if not files:
return []
def image_to_base64(file):
@@ -748,7 +754,7 @@ class Canvas(Graph):
base64.b64encode(FileService.get_blob(file["created_by"], file["id"])).decode("utf-8"))
def parse_file(file):
blob = FileService.get_blob(file["created_by"], file["id"])
- return FileService.parse(file["name"], blob, True, file["created_by"])
+ return FileService.parse(file["name"], blob, True, file["created_by"], layout_recognize)
loop = asyncio.get_running_loop()
tasks = []
for file in files:
@@ -758,15 +764,15 @@ class Canvas(Graph):
tasks.append(loop.run_in_executor(self._thread_pool, parse_file, file))
return await asyncio.gather(*tasks)
- def get_files(self, files: Union[None, list[dict]]) -> list[str]:
+ def get_files(self, files: Union[None, list[dict]], layout_recognize: str = None) -> list[str]:
"""
Synchronous wrapper for get_files_async, used by sync component invoke paths.
"""
loop = getattr(self, "_loop", None)
if loop and loop.is_running():
- return asyncio.run_coroutine_threadsafe(self.get_files_async(files), loop).result()
+ return asyncio.run_coroutine_threadsafe(self.get_files_async(files, layout_recognize), loop).result()
- return asyncio.run(self.get_files_async(files))
+ return asyncio.run(self.get_files_async(files, layout_recognize))
def tool_use_callback(self, agent_id: str, func_name: str, params: dict, result: Any, elapsed_time=None):
agent_ids = agent_id.split("-->")
diff --git a/agent/component/begin.py b/agent/component/begin.py
index 819e46c25..c4da78cab 100644
--- a/agent/component/begin.py
+++ b/agent/component/begin.py
@@ -41,6 +41,7 @@ class Begin(UserFillUp):
if self.check_if_canceled("Begin processing"):
return
+ layout_recognize = self._param.layout_recognize or None
for k, v in kwargs.get("inputs", {}).items():
if self.check_if_canceled("Begin processing"):
return
@@ -52,7 +53,7 @@ class Begin(UserFillUp):
file_value = v["value"]
# Support both single file (backward compatibility) and multiple files
files = file_value if isinstance(file_value, list) else [file_value]
- v = FileService.get_files(files)
+ v = FileService.get_files(files, layout_recognize=layout_recognize)
else:
v = v.get("value")
self.set_output(k, v)
diff --git a/agent/component/fillup.py b/agent/component/fillup.py
index b97e6ca52..90ccde10f 100644
--- a/agent/component/fillup.py
+++ b/agent/component/fillup.py
@@ -27,6 +27,7 @@ class UserFillUpParam(ComponentParamBase):
super().__init__()
self.enable_tips = True
self.tips = "Please fill up the form"
+ self.layout_recognize = ""
def check(self) -> bool:
return True
@@ -61,6 +62,7 @@ class UserFillUp(ComponentBase):
content = re.sub(r"\{%s\}"%k, ans, content)
self.set_output("tips", content)
+ layout_recognize = self._param.layout_recognize or None
for k, v in kwargs.get("inputs", {}).items():
if self.check_if_canceled("UserFillUp processing"):
return
@@ -71,7 +73,7 @@ class UserFillUp(ComponentBase):
file_value = v["value"]
# Support both single file (backward compatibility) and multiple files
files = file_value if isinstance(file_value, list) else [file_value]
- v = FileService.get_files(files)
+ v = FileService.get_files(files, layout_recognize=layout_recognize)
else:
v = v.get("value")
self.set_output(k, v)
diff --git a/api/db/services/file_service.py b/api/db/services/file_service.py
index 498199393..d31004c93 100644
--- a/api/db/services/file_service.py
+++ b/api/db/services/file_service.py
@@ -519,7 +519,7 @@ class FileService(CommonService):
return "\n\n".join(res)
@staticmethod
- def parse(filename, blob, img_base64=True, tenant_id=None):
+ def parse(filename, blob, img_base64=True, tenant_id=None, layout_recognize=None):
from rag.app import audio, email, naive, picture, presentation
from api.apps import current_user
@@ -527,7 +527,7 @@ class FileService(CommonService):
pass
FACTORY = {ParserType.PRESENTATION.value: presentation, ParserType.PICTURE.value: picture, ParserType.AUDIO.value: audio, ParserType.EMAIL.value: email}
- parser_config = {"chunk_token_num": 16096, "delimiter": "\n!?;。;!?", "layout_recognize": "Plain Text"}
+ parser_config = {"chunk_token_num": 16096, "delimiter": "\n!?;。;!?", "layout_recognize": layout_recognize or "Plain Text"}
kwargs = {"lang": "English", "callback": dummy, "parser_config": parser_config, "from_page": 0, "to_page": 100000, "tenant_id": current_user.id if current_user else tenant_id}
file_type = filename_type(filename)
if img_base64 and file_type == FileType.VISUAL.value:
@@ -663,7 +663,7 @@ class FileService(CommonService):
return structured(file.filename, filename_type(file.filename), file.read(), file.content_type)
@staticmethod
- def get_files(files: Union[None, list[dict]], raw: bool = False) -> Union[list[str], tuple[list[str], list[dict]]]:
+ def get_files(files: Union[None, list[dict]], raw: bool = False, layout_recognize: str = None) -> Union[list[str], tuple[list[str], list[dict]]]:
if not files:
return []
def image_to_base64(file):
@@ -679,7 +679,7 @@ class FileService(CommonService):
else:
threads.append(exe.submit(image_to_base64, file))
continue
- threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"]))
+ threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"], layout_recognize))
if raw:
return [th.result() for th in threads], imgs
diff --git a/web/src/pages/agent/form/begin-form/index.tsx b/web/src/pages/agent/form/begin-form/index.tsx
index 6c8664fce..4351cc161 100644
--- a/web/src/pages/agent/form/begin-form/index.tsx
+++ b/web/src/pages/agent/form/begin-form/index.tsx
@@ -1,4 +1,5 @@
import { Collapse } from '@/components/collapse';
+import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { Button } from '@/components/ui/button';
import {
Form,
@@ -15,10 +16,10 @@ import { FormTooltip } from '@/components/ui/tooltip';
import { zodResolver } from '@hookform/resolvers/zod';
import { t } from 'i18next';
import { Plus } from 'lucide-react';
-import { memo, useEffect, useRef } from 'react';
+import { memo, useEffect, useMemo, useRef } from 'react';
import { useForm, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
-import { AgentDialogueMode } from '../../constant';
+import { AgentDialogueMode, BeginQueryType } from '../../constant';
import { INextOperatorForm } from '../../interface';
import { ParameterDialog } from './parameter-dialog';
import { QueryTable } from './query-table';
@@ -51,6 +52,11 @@ function BeginForm({ node }: INextOperatorForm) {
const inputs = useWatch({ control: form.control, name: 'inputs' });
const mode = useWatch({ control: form.control, name: 'mode' });
+ const hasFileInput = useMemo(
+ () => inputs?.some((x) => x.type === BeginQueryType.File),
+ [inputs],
+ );
+
const enablePrologue = useWatch({
control: form.control,
name: 'enablePrologue',
@@ -193,6 +199,14 @@ function BeginForm({ node }: INextOperatorForm) {
submit={ok}
>
)}
+ {hasFileInput && (
+
+ )}
>
)}
diff --git a/web/src/pages/agent/form/begin-form/schema.ts b/web/src/pages/agent/form/begin-form/schema.ts
index 33fb586e7..bcb58f00c 100644
--- a/web/src/pages/agent/form/begin-form/schema.ts
+++ b/web/src/pages/agent/form/begin-form/schema.ts
@@ -5,6 +5,7 @@ export const BeginFormSchema = z.object({
enablePrologue: z.boolean().optional(),
prologue: z.string().trim().optional(),
mode: z.string(),
+ layout_recognize: z.string().optional(),
inputs: z
.array(
z.object({
diff --git a/web/src/pages/agent/form/user-fill-up-form/index.tsx b/web/src/pages/agent/form/user-fill-up-form/index.tsx
index 96087de7a..22ce1c742 100644
--- a/web/src/pages/agent/form/user-fill-up-form/index.tsx
+++ b/web/src/pages/agent/form/user-fill-up-form/index.tsx
@@ -1,4 +1,5 @@
import { Collapse } from '@/components/collapse';
+import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { Button } from '@/components/ui/button';
import {
Form,
@@ -12,10 +13,11 @@ import { Switch } from '@/components/ui/switch';
import { FormTooltip } from '@/components/ui/tooltip';
import { zodResolver } from '@hookform/resolvers/zod';
import { Plus } from 'lucide-react';
-import { memo } from 'react';
+import { memo, useMemo } from 'react';
import { useForm, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import { z } from 'zod';
+import { BeginQueryType } from '../../constant';
import { BeginQuery, INextOperatorForm } from '../../interface';
import { ParameterDialog } from '../begin-form/parameter-dialog';
import { QueryTable } from '../begin-form/query-table';
@@ -33,6 +35,7 @@ function UserFillUpForm({ node }: INextOperatorForm) {
const FormSchema = z.object({
enable_tips: z.boolean().optional(),
tips: z.string().trim().optional(),
+ layout_recognize: z.string().optional(),
inputs: z
.array(
z.object({
@@ -59,6 +62,11 @@ function UserFillUpForm({ node }: INextOperatorForm) {
name: 'inputs',
});
+ const hasFileInput = useMemo(
+ () => inputs?.some((x) => x.type === BeginQueryType.File),
+ [inputs],
+ );
+
const outputList = inputs?.map((item) => ({
title: item.name,
type: item.type,
@@ -155,6 +163,14 @@ function UserFillUpForm({ node }: INextOperatorForm) {
submit={ok}
>
)}
+ {hasFileInput && (
+
+ )}