diff --git a/agent/canvas.py b/agent/canvas.py index c9d672e6c..2b219ef16 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -386,10 +386,16 @@ class Canvas(Graph): continue self.components[k]["obj"].set_output(kk, vv) + layout_recognize = None + for cpn in self.components.values(): + if cpn["obj"].component_name.lower() == "begin": + layout_recognize = getattr(cpn["obj"]._param, "layout_recognize", None) + break + for k in kwargs.keys(): if k in ["query", "user_id", "files"] and kwargs[k]: if k == "files": - self.globals[f"sys.{k}"] = await self.get_files_async(kwargs[k]) + self.globals[f"sys.{k}"] = await self.get_files_async(kwargs[k], layout_recognize) else: self.globals[f"sys.{k}"] = kwargs[k] if not self.globals["sys.conversation_turns"] : @@ -740,7 +746,7 @@ class Canvas(Graph): def get_component_input_elements(self, cpnnm): return self.components[cpnnm]["obj"].get_input_elements() - async def get_files_async(self, files: Union[None, list[dict]]) -> list[str]: + async def get_files_async(self, files: Union[None, list[dict]], layout_recognize: str = None) -> list[str]: if not files: return [] def image_to_base64(file): @@ -748,7 +754,7 @@ class Canvas(Graph): base64.b64encode(FileService.get_blob(file["created_by"], file["id"])).decode("utf-8")) def parse_file(file): blob = FileService.get_blob(file["created_by"], file["id"]) - return FileService.parse(file["name"], blob, True, file["created_by"]) + return FileService.parse(file["name"], blob, True, file["created_by"], layout_recognize) loop = asyncio.get_running_loop() tasks = [] for file in files: @@ -758,15 +764,15 @@ class Canvas(Graph): tasks.append(loop.run_in_executor(self._thread_pool, parse_file, file)) return await asyncio.gather(*tasks) - def get_files(self, files: Union[None, list[dict]]) -> list[str]: + def get_files(self, files: Union[None, list[dict]], layout_recognize: str = None) -> list[str]: """ Synchronous wrapper for get_files_async, used by sync component invoke paths. """ loop = getattr(self, "_loop", None) if loop and loop.is_running(): - return asyncio.run_coroutine_threadsafe(self.get_files_async(files), loop).result() + return asyncio.run_coroutine_threadsafe(self.get_files_async(files, layout_recognize), loop).result() - return asyncio.run(self.get_files_async(files)) + return asyncio.run(self.get_files_async(files, layout_recognize)) def tool_use_callback(self, agent_id: str, func_name: str, params: dict, result: Any, elapsed_time=None): agent_ids = agent_id.split("-->") diff --git a/agent/component/begin.py b/agent/component/begin.py index 819e46c25..c4da78cab 100644 --- a/agent/component/begin.py +++ b/agent/component/begin.py @@ -41,6 +41,7 @@ class Begin(UserFillUp): if self.check_if_canceled("Begin processing"): return + layout_recognize = self._param.layout_recognize or None for k, v in kwargs.get("inputs", {}).items(): if self.check_if_canceled("Begin processing"): return @@ -52,7 +53,7 @@ class Begin(UserFillUp): file_value = v["value"] # Support both single file (backward compatibility) and multiple files files = file_value if isinstance(file_value, list) else [file_value] - v = FileService.get_files(files) + v = FileService.get_files(files, layout_recognize=layout_recognize) else: v = v.get("value") self.set_output(k, v) diff --git a/agent/component/fillup.py b/agent/component/fillup.py index b97e6ca52..90ccde10f 100644 --- a/agent/component/fillup.py +++ b/agent/component/fillup.py @@ -27,6 +27,7 @@ class UserFillUpParam(ComponentParamBase): super().__init__() self.enable_tips = True self.tips = "Please fill up the form" + self.layout_recognize = "" def check(self) -> bool: return True @@ -61,6 +62,7 @@ class UserFillUp(ComponentBase): content = re.sub(r"\{%s\}"%k, ans, content) self.set_output("tips", content) + layout_recognize = self._param.layout_recognize or None for k, v in kwargs.get("inputs", {}).items(): if self.check_if_canceled("UserFillUp processing"): return @@ -71,7 +73,7 @@ class UserFillUp(ComponentBase): file_value = v["value"] # Support both single file (backward compatibility) and multiple files files = file_value if isinstance(file_value, list) else [file_value] - v = FileService.get_files(files) + v = FileService.get_files(files, layout_recognize=layout_recognize) else: v = v.get("value") self.set_output(k, v) diff --git a/api/db/services/file_service.py b/api/db/services/file_service.py index 498199393..d31004c93 100644 --- a/api/db/services/file_service.py +++ b/api/db/services/file_service.py @@ -519,7 +519,7 @@ class FileService(CommonService): return "\n\n".join(res) @staticmethod - def parse(filename, blob, img_base64=True, tenant_id=None): + def parse(filename, blob, img_base64=True, tenant_id=None, layout_recognize=None): from rag.app import audio, email, naive, picture, presentation from api.apps import current_user @@ -527,7 +527,7 @@ class FileService(CommonService): pass FACTORY = {ParserType.PRESENTATION.value: presentation, ParserType.PICTURE.value: picture, ParserType.AUDIO.value: audio, ParserType.EMAIL.value: email} - parser_config = {"chunk_token_num": 16096, "delimiter": "\n!?;。;!?", "layout_recognize": "Plain Text"} + parser_config = {"chunk_token_num": 16096, "delimiter": "\n!?;。;!?", "layout_recognize": layout_recognize or "Plain Text"} kwargs = {"lang": "English", "callback": dummy, "parser_config": parser_config, "from_page": 0, "to_page": 100000, "tenant_id": current_user.id if current_user else tenant_id} file_type = filename_type(filename) if img_base64 and file_type == FileType.VISUAL.value: @@ -663,7 +663,7 @@ class FileService(CommonService): return structured(file.filename, filename_type(file.filename), file.read(), file.content_type) @staticmethod - def get_files(files: Union[None, list[dict]], raw: bool = False) -> Union[list[str], tuple[list[str], list[dict]]]: + def get_files(files: Union[None, list[dict]], raw: bool = False, layout_recognize: str = None) -> Union[list[str], tuple[list[str], list[dict]]]: if not files: return [] def image_to_base64(file): @@ -679,7 +679,7 @@ class FileService(CommonService): else: threads.append(exe.submit(image_to_base64, file)) continue - threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"])) + threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"], layout_recognize)) if raw: return [th.result() for th in threads], imgs diff --git a/web/src/pages/agent/form/begin-form/index.tsx b/web/src/pages/agent/form/begin-form/index.tsx index 6c8664fce..4351cc161 100644 --- a/web/src/pages/agent/form/begin-form/index.tsx +++ b/web/src/pages/agent/form/begin-form/index.tsx @@ -1,4 +1,5 @@ import { Collapse } from '@/components/collapse'; +import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field'; import { Button } from '@/components/ui/button'; import { Form, @@ -15,10 +16,10 @@ import { FormTooltip } from '@/components/ui/tooltip'; import { zodResolver } from '@hookform/resolvers/zod'; import { t } from 'i18next'; import { Plus } from 'lucide-react'; -import { memo, useEffect, useRef } from 'react'; +import { memo, useEffect, useMemo, useRef } from 'react'; import { useForm, useWatch } from 'react-hook-form'; import { useTranslation } from 'react-i18next'; -import { AgentDialogueMode } from '../../constant'; +import { AgentDialogueMode, BeginQueryType } from '../../constant'; import { INextOperatorForm } from '../../interface'; import { ParameterDialog } from './parameter-dialog'; import { QueryTable } from './query-table'; @@ -51,6 +52,11 @@ function BeginForm({ node }: INextOperatorForm) { const inputs = useWatch({ control: form.control, name: 'inputs' }); const mode = useWatch({ control: form.control, name: 'mode' }); + const hasFileInput = useMemo( + () => inputs?.some((x) => x.type === BeginQueryType.File), + [inputs], + ); + const enablePrologue = useWatch({ control: form.control, name: 'enablePrologue', @@ -193,6 +199,14 @@ function BeginForm({ node }: INextOperatorForm) { submit={ok} > )} + {hasFileInput && ( + + )} )} diff --git a/web/src/pages/agent/form/begin-form/schema.ts b/web/src/pages/agent/form/begin-form/schema.ts index 33fb586e7..bcb58f00c 100644 --- a/web/src/pages/agent/form/begin-form/schema.ts +++ b/web/src/pages/agent/form/begin-form/schema.ts @@ -5,6 +5,7 @@ export const BeginFormSchema = z.object({ enablePrologue: z.boolean().optional(), prologue: z.string().trim().optional(), mode: z.string(), + layout_recognize: z.string().optional(), inputs: z .array( z.object({ diff --git a/web/src/pages/agent/form/user-fill-up-form/index.tsx b/web/src/pages/agent/form/user-fill-up-form/index.tsx index 96087de7a..22ce1c742 100644 --- a/web/src/pages/agent/form/user-fill-up-form/index.tsx +++ b/web/src/pages/agent/form/user-fill-up-form/index.tsx @@ -1,4 +1,5 @@ import { Collapse } from '@/components/collapse'; +import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field'; import { Button } from '@/components/ui/button'; import { Form, @@ -12,10 +13,11 @@ import { Switch } from '@/components/ui/switch'; import { FormTooltip } from '@/components/ui/tooltip'; import { zodResolver } from '@hookform/resolvers/zod'; import { Plus } from 'lucide-react'; -import { memo } from 'react'; +import { memo, useMemo } from 'react'; import { useForm, useWatch } from 'react-hook-form'; import { useTranslation } from 'react-i18next'; import { z } from 'zod'; +import { BeginQueryType } from '../../constant'; import { BeginQuery, INextOperatorForm } from '../../interface'; import { ParameterDialog } from '../begin-form/parameter-dialog'; import { QueryTable } from '../begin-form/query-table'; @@ -33,6 +35,7 @@ function UserFillUpForm({ node }: INextOperatorForm) { const FormSchema = z.object({ enable_tips: z.boolean().optional(), tips: z.string().trim().optional(), + layout_recognize: z.string().optional(), inputs: z .array( z.object({ @@ -59,6 +62,11 @@ function UserFillUpForm({ node }: INextOperatorForm) { name: 'inputs', }); + const hasFileInput = useMemo( + () => inputs?.some((x) => x.type === BeginQueryType.File), + [inputs], + ); + const outputList = inputs?.map((item) => ({ title: item.name, type: item.type, @@ -155,6 +163,14 @@ function UserFillUpForm({ node }: INextOperatorForm) { submit={ok} > )} + {hasFileInput && ( + + )}