mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-05-03 00:37:48 +08:00
feat: Add PDF parser selection to Agent Begin and Await Response comp… (#13325)
### Issue: #12756 ### What problem does this PR solve? When users upload files through Agent's Begin or Await Response components, the parsing is hardcoded to "Plain Text", ignoring all other available parsers (DeepDOC, TCADP, Docling, MinerU, PaddleOCR). This PR adds a PDF parser dropdown to these components so users can select the appropriate parser for their file inputs. ### Changes **Backend** - `agent/component/fillup.py` - Added `layout_recognize` param to `UserFillUpParam`, forwarded to `FileService.get_files()` - `agent/component/begin.py` - Same forwarding in `Begin._invoke()` - `agent/canvas.py` - Extract Begin's `layout_recognize` for `sys.files` parsing, added param to `get_files_async()` / `get_files()` - `api/db/services/file_service.py` - Added `layout_recognize` param to `parse()` and `get_files()`, replacing hardcoded `"Plain Text"` - `rag/app/naive.py` - Added `"plain text"` and `"tcadp parser"` aliases to PARSERS dict to match dropdown values after `.lower()` **Frontend** - `web/src/pages/agent/form/begin-form/index.tsx` - Show `LayoutRecognizeFormField` dropdown when file inputs exist - `web/src/pages/agent/form/begin-form/schema.ts` - Added `layout_recognize` to Zod schema - `web/src/pages/agent/form/user-fill-up-form/index.tsx` - Same dropdown for Await Response component ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -519,7 +519,7 @@ class FileService(CommonService):
|
||||
return "\n\n".join(res)
|
||||
|
||||
@staticmethod
|
||||
def parse(filename, blob, img_base64=True, tenant_id=None):
|
||||
def parse(filename, blob, img_base64=True, tenant_id=None, layout_recognize=None):
|
||||
from rag.app import audio, email, naive, picture, presentation
|
||||
from api.apps import current_user
|
||||
|
||||
@ -527,7 +527,7 @@ class FileService(CommonService):
|
||||
pass
|
||||
|
||||
FACTORY = {ParserType.PRESENTATION.value: presentation, ParserType.PICTURE.value: picture, ParserType.AUDIO.value: audio, ParserType.EMAIL.value: email}
|
||||
parser_config = {"chunk_token_num": 16096, "delimiter": "\n!?;。;!?", "layout_recognize": "Plain Text"}
|
||||
parser_config = {"chunk_token_num": 16096, "delimiter": "\n!?;。;!?", "layout_recognize": layout_recognize or "Plain Text"}
|
||||
kwargs = {"lang": "English", "callback": dummy, "parser_config": parser_config, "from_page": 0, "to_page": 100000, "tenant_id": current_user.id if current_user else tenant_id}
|
||||
file_type = filename_type(filename)
|
||||
if img_base64 and file_type == FileType.VISUAL.value:
|
||||
@ -663,7 +663,7 @@ class FileService(CommonService):
|
||||
return structured(file.filename, filename_type(file.filename), file.read(), file.content_type)
|
||||
|
||||
@staticmethod
|
||||
def get_files(files: Union[None, list[dict]], raw: bool = False) -> Union[list[str], tuple[list[str], list[dict]]]:
|
||||
def get_files(files: Union[None, list[dict]], raw: bool = False, layout_recognize: str = None) -> Union[list[str], tuple[list[str], list[dict]]]:
|
||||
if not files:
|
||||
return []
|
||||
def image_to_base64(file):
|
||||
@ -679,7 +679,7 @@ class FileService(CommonService):
|
||||
else:
|
||||
threads.append(exe.submit(image_to_base64, file))
|
||||
continue
|
||||
threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"]))
|
||||
threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"], layout_recognize))
|
||||
|
||||
if raw:
|
||||
return [th.result() for th in threads], imgs
|
||||
|
||||
Reference in New Issue
Block a user