mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-05-28 11:43:06 +08:00
fix(go-models): route hosted OCR providers through drivers (#15233)
## Summary - route hosted MinerU.Net and PaddleOCR.Net provider names to their existing Go drivers - add regression coverage for loading the hosted OCR provider configs through ProviderManager ## What changed - Added canonical provider-name aliases for the hosted OCR provider display names. - Covered both bundled configs with a focused provider-manager test. ## Why The hosted provider configs use display names with `.Net`, while model factory dispatch lowercases the provider name. Without aliases, those configs fall through to `DummyModel` instead of using the existing MinerU and PaddleOCR drivers. --------- Co-authored-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@ -7,7 +7,7 @@
|
||||
"doc_parse": "v4/extract/task",
|
||||
"tasks": ""
|
||||
},
|
||||
"class": "mineru",
|
||||
"class": "mineru.net",
|
||||
"models": [
|
||||
{
|
||||
"name": "vlm",
|
||||
@ -22,4 +22,4 @@
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
"url_suffix": {
|
||||
"ocr": "v2/ocr/jobs"
|
||||
},
|
||||
"class": "paddleocr",
|
||||
"class": "paddleocr.net",
|
||||
"models": [
|
||||
{
|
||||
"name": "PaddleOCR-VL-1.5",
|
||||
@ -30,4 +30,4 @@
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@ -23,12 +23,12 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func readPPIOProviderConfig(t *testing.T) []byte {
|
||||
func readProviderConfig(t *testing.T, fileName string) []byte {
|
||||
t.Helper()
|
||||
|
||||
for _, candidate := range []string{
|
||||
filepath.Join("..", "..", "conf", "models", "ppio.json"),
|
||||
filepath.Join("conf", "models", "ppio.json"),
|
||||
filepath.Join("..", "..", "conf", "models", fileName),
|
||||
filepath.Join("conf", "models", fileName),
|
||||
} {
|
||||
data, err := os.ReadFile(candidate)
|
||||
if err == nil {
|
||||
@ -36,10 +36,93 @@ func readPPIOProviderConfig(t *testing.T) []byte {
|
||||
}
|
||||
}
|
||||
|
||||
t.Fatal("could not locate conf/models/ppio.json")
|
||||
t.Fatalf("could not locate conf/models/%s", fileName)
|
||||
return nil
|
||||
}
|
||||
|
||||
func readPPIOProviderConfig(t *testing.T) []byte {
|
||||
t.Helper()
|
||||
return readProviderConfig(t, "ppio.json")
|
||||
}
|
||||
|
||||
func TestHostedProviderConfigsLoadSharedDrivers(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
for _, fileName := range []string{"mineru.json", "paddleocr.json"} {
|
||||
if err := os.WriteFile(filepath.Join(dir, fileName), readProviderConfig(t, fileName), 0o600); err != nil {
|
||||
t.Fatalf("write %s config: %v", fileName, err)
|
||||
}
|
||||
}
|
||||
|
||||
pm, err := NewProviderManager(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("NewProviderManager: %v", err)
|
||||
}
|
||||
|
||||
minerU := pm.FindProvider("MinerU.Net")
|
||||
if minerU == nil {
|
||||
t.Fatal("MinerU.Net provider not found")
|
||||
}
|
||||
if _, ok := minerU.ModelDriver.(*modeldrivers.MinerUModel); !ok {
|
||||
t.Fatalf("MinerU.Net ModelDriver=%T, want *models.MinerUModel", minerU.ModelDriver)
|
||||
}
|
||||
if minerU.Class != "mineru.net" {
|
||||
t.Errorf("MinerU.Net class=%q", minerU.Class)
|
||||
}
|
||||
if minerU.URLSuffix.DocumentParse != "v4/extract/task" {
|
||||
t.Errorf("MinerU.Net doc_parse suffix=%q", minerU.URLSuffix.DocumentParse)
|
||||
}
|
||||
|
||||
paddleOCR := pm.FindProvider("PaddleOCR.Net")
|
||||
if paddleOCR == nil {
|
||||
t.Fatal("PaddleOCR.Net provider not found")
|
||||
}
|
||||
if _, ok := paddleOCR.ModelDriver.(*modeldrivers.PaddleOCRModel); !ok {
|
||||
t.Fatalf("PaddleOCR.Net ModelDriver=%T, want *models.PaddleOCRModel", paddleOCR.ModelDriver)
|
||||
}
|
||||
if paddleOCR.Class != "paddleocr.net" {
|
||||
t.Errorf("PaddleOCR.Net class=%q", paddleOCR.Class)
|
||||
}
|
||||
if paddleOCR.URLSuffix.OCR != "v2/ocr/jobs" {
|
||||
t.Errorf("PaddleOCR.Net OCR suffix=%q", paddleOCR.URLSuffix.OCR)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLocalOCRProviderConfigsLoadLocalDrivers(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
for _, fileName := range []string{"mineru_local.json", "paddleocr_local.json"} {
|
||||
if err := os.WriteFile(filepath.Join(dir, fileName), readProviderConfig(t, fileName), 0o600); err != nil {
|
||||
t.Fatalf("write %s config: %v", fileName, err)
|
||||
}
|
||||
}
|
||||
|
||||
pm, err := NewProviderManager(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("NewProviderManager: %v", err)
|
||||
}
|
||||
|
||||
minerU := pm.FindProvider("MinerU")
|
||||
if minerU == nil {
|
||||
t.Fatal("MinerU provider not found")
|
||||
}
|
||||
if _, ok := minerU.ModelDriver.(*modeldrivers.MinerULocalModel); !ok {
|
||||
t.Fatalf("MinerU ModelDriver=%T, want *models.MinerULocalModel", minerU.ModelDriver)
|
||||
}
|
||||
if minerU.URLSuffix.DocumentParse != "file_parse" {
|
||||
t.Errorf("MinerU doc_parse suffix=%q", minerU.URLSuffix.DocumentParse)
|
||||
}
|
||||
|
||||
paddleOCR := pm.FindProvider("PaddleOCR")
|
||||
if paddleOCR == nil {
|
||||
t.Fatal("PaddleOCR provider not found")
|
||||
}
|
||||
if _, ok := paddleOCR.ModelDriver.(*modeldrivers.PaddleOCRLocalModel); !ok {
|
||||
t.Fatalf("PaddleOCR ModelDriver=%T, want *models.PaddleOCRLocalModel", paddleOCR.ModelDriver)
|
||||
}
|
||||
if paddleOCR.URLSuffix.OCR != "layout-parsing" {
|
||||
t.Errorf("PaddleOCR OCR suffix=%q", paddleOCR.URLSuffix.OCR)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPPIOProviderConfigLoadsIntoProviderManager(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "ppio.json"), readPPIOProviderConfig(t), 0o600); err != nil {
|
||||
|
||||
@ -119,19 +119,19 @@ func (f *ModelFactory) CreateModelDriver(providerName string, baseURL map[string
|
||||
return NewPPIOModel(baseURL, urlSuffix), nil
|
||||
case "voyage":
|
||||
return NewVoyageModel(baseURL, urlSuffix), nil
|
||||
case "paddleocr":
|
||||
case "paddleocr.net":
|
||||
return NewPaddleOCRModel(baseURL, urlSuffix), nil
|
||||
case "xunfei":
|
||||
return NewXunFeiModel(baseURL, urlSuffix), nil
|
||||
case "deepinfra":
|
||||
return NewDeepInfraModel(baseURL, urlSuffix), nil
|
||||
case "mineru":
|
||||
case "mineru.net":
|
||||
return NewMinerUModel(baseURL, urlSuffix), nil
|
||||
case "jiekouai":
|
||||
return NewJieKouAIModel(baseURL, urlSuffix), nil
|
||||
case "302.ai":
|
||||
return NewAI302Model(baseURL, urlSuffix), nil
|
||||
case "mineru_local":
|
||||
case "mineru":
|
||||
return NewMinerLocalUModel(baseURL, urlSuffix), nil
|
||||
case "futurmix":
|
||||
return NewFuturMixModel(baseURL, urlSuffix), nil
|
||||
@ -143,7 +143,7 @@ func (f *ModelFactory) CreateModelDriver(providerName string, baseURL map[string
|
||||
return NewN1NModel(baseURL, urlSuffix), nil
|
||||
case "bedrock":
|
||||
return NewBedrockModel(baseURL, urlSuffix), nil
|
||||
case "paddleocr_local":
|
||||
case "paddleocr":
|
||||
return NewPaddleOCRLocalModel(baseURL, urlSuffix), nil
|
||||
case "orcarouter":
|
||||
return NewOrcaRouterModel(baseURL, urlSuffix), nil
|
||||
|
||||
Reference in New Issue
Block a user