mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-01 05:17:51 +08:00
### What problem does this PR solve? Implement MinerU Provider **The following functionalities are now supported:** **MinerU** ---- - [x] Parse file - [x] Show task - [ ] ~~List tasks~~ **Verified examples from the CLI:** ```plaintext RAGFlow(user)> parse with 'vlm@test@mineru' file 'https://arxiv.org/pdf/2505.09358' +--------------------------------------+ | task_id | +--------------------------------------+ | 142ac8ea-d9d0-4a68-a2d1-d3af67635dc9 | +--------------------------------------+ RAGFlow(user)> show 'test@mineru' task '142ac8ea-d9d0-4a68-a2d1-d3af67635dc9' +--------------------------------------------+-------+ | content | index | +--------------------------------------------+-------+ | Task is running... Progress: 17 / 18 pages | 0 | +--------------------------------------------+-------+ RAGFlow(user)> show 'test@mineru' task '142ac8ea-d9d0-4a68-a2d1-d3af67635dc9' +--------------------------------------------------------------------------------------------+-------+ | content | index | +--------------------------------------------------------------------------------------------+-------+ | https://cdn-mineru.openxlab.org.cn/pdf/2026-05-18/142ac8ea-d9d0-4a68-a2d1-d3af67635dc9.zip | 0 | +--------------------------------------------------------------------------------------------+-------+ ``` ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [x] Refactoring
87 lines
1.5 KiB
JSON
87 lines
1.5 KiB
JSON
{
|
|
"name": "Baidu",
|
|
"url": {
|
|
"default": "https://qianfan.baidubce.com/v2"
|
|
},
|
|
"url_suffix": {
|
|
"chat": "chat/completions",
|
|
"models": "models",
|
|
"embedding": "embeddings",
|
|
"rerank": "rerank",
|
|
"ocr": "ocr/paddleocr"
|
|
},
|
|
"class": "baidu",
|
|
"models": [
|
|
{
|
|
"name": "deepseek-v3.2",
|
|
"max_tokens": 98304,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "deepseek-v4-flash",
|
|
"max_tokens": 1048576,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "deepseek-v4-pro",
|
|
"max_tokens": 1048576,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "qwen3-32b",
|
|
"max_tokens": 30720,
|
|
"model_types":[
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "qwen3-4b",
|
|
"max_tokens": 30720,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "ernie-5.0",
|
|
"max_tokens": 121856,
|
|
"model_types": [
|
|
"vision"
|
|
]
|
|
},
|
|
{
|
|
"name": "embedding-v1",
|
|
"max_tokens": 384,
|
|
"model_types": [
|
|
"embedding"
|
|
]
|
|
},
|
|
{
|
|
"name": "qwen3-reranker-4b",
|
|
"max_tokens": 32768,
|
|
"model_types": [
|
|
"rerank"
|
|
]
|
|
},
|
|
{
|
|
"name": "paddleocr-vl-0.9b",
|
|
"max_tokens": 8192,
|
|
"model_types": [
|
|
"ocr"
|
|
]
|
|
}
|
|
]
|
|
} |