From 5ae41dc1ebebcaad8daf40982d626dbdf73282fe Mon Sep 17 00:00:00 2001 From: oktofeesh <287075021+oktofeesh1@users.noreply.github.com> Date: Tue, 26 May 2026 05:40:40 -0700 Subject: [PATCH] fix(go-models): route hosted OCR providers through drivers (#15233) ## Summary - route hosted MinerU.Net and PaddleOCR.Net provider names to their existing Go drivers - add regression coverage for loading the hosted OCR provider configs through ProviderManager ## What changed - Added canonical provider-name aliases for the hosted OCR provider display names. - Covered both bundled configs with a focused provider-manager test. ## Why The hosted provider configs use display names with `.Net`, while model factory dispatch lowercases the provider name. Without aliases, those configs fall through to `DummyModel` instead of using the existing MinerU and PaddleOCR drivers. --------- Co-authored-by: Jin Hai --- conf/models/mineru.json | 4 +- conf/models/paddleocr.json | 4 +- internal/entity/model_test.go | 91 +++++++++++++++++++++++++++++-- internal/entity/models/factory.go | 8 +-- 4 files changed, 95 insertions(+), 12 deletions(-) diff --git a/conf/models/mineru.json b/conf/models/mineru.json index 3c79719bd..89c56bc22 100644 --- a/conf/models/mineru.json +++ b/conf/models/mineru.json @@ -7,7 +7,7 @@ "doc_parse": "v4/extract/task", "tasks": "" }, - "class": "mineru", + "class": "mineru.net", "models": [ { "name": "vlm", @@ -22,4 +22,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/conf/models/paddleocr.json b/conf/models/paddleocr.json index 04418358e..fd4f3a90d 100644 --- a/conf/models/paddleocr.json +++ b/conf/models/paddleocr.json @@ -6,7 +6,7 @@ "url_suffix": { "ocr": "v2/ocr/jobs" }, - "class": "paddleocr", + "class": "paddleocr.net", "models": [ { "name": "PaddleOCR-VL-1.5", @@ -30,4 +30,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/internal/entity/model_test.go b/internal/entity/model_test.go index ec2f5b97a..0076128db 100644 --- a/internal/entity/model_test.go +++ b/internal/entity/model_test.go @@ -23,12 +23,12 @@ import ( "testing" ) -func readPPIOProviderConfig(t *testing.T) []byte { +func readProviderConfig(t *testing.T, fileName string) []byte { t.Helper() for _, candidate := range []string{ - filepath.Join("..", "..", "conf", "models", "ppio.json"), - filepath.Join("conf", "models", "ppio.json"), + filepath.Join("..", "..", "conf", "models", fileName), + filepath.Join("conf", "models", fileName), } { data, err := os.ReadFile(candidate) if err == nil { @@ -36,10 +36,93 @@ func readPPIOProviderConfig(t *testing.T) []byte { } } - t.Fatal("could not locate conf/models/ppio.json") + t.Fatalf("could not locate conf/models/%s", fileName) return nil } +func readPPIOProviderConfig(t *testing.T) []byte { + t.Helper() + return readProviderConfig(t, "ppio.json") +} + +func TestHostedProviderConfigsLoadSharedDrivers(t *testing.T) { + dir := t.TempDir() + for _, fileName := range []string{"mineru.json", "paddleocr.json"} { + if err := os.WriteFile(filepath.Join(dir, fileName), readProviderConfig(t, fileName), 0o600); err != nil { + t.Fatalf("write %s config: %v", fileName, err) + } + } + + pm, err := NewProviderManager(dir) + if err != nil { + t.Fatalf("NewProviderManager: %v", err) + } + + minerU := pm.FindProvider("MinerU.Net") + if minerU == nil { + t.Fatal("MinerU.Net provider not found") + } + if _, ok := minerU.ModelDriver.(*modeldrivers.MinerUModel); !ok { + t.Fatalf("MinerU.Net ModelDriver=%T, want *models.MinerUModel", minerU.ModelDriver) + } + if minerU.Class != "mineru.net" { + t.Errorf("MinerU.Net class=%q", minerU.Class) + } + if minerU.URLSuffix.DocumentParse != "v4/extract/task" { + t.Errorf("MinerU.Net doc_parse suffix=%q", minerU.URLSuffix.DocumentParse) + } + + paddleOCR := pm.FindProvider("PaddleOCR.Net") + if paddleOCR == nil { + t.Fatal("PaddleOCR.Net provider not found") + } + if _, ok := paddleOCR.ModelDriver.(*modeldrivers.PaddleOCRModel); !ok { + t.Fatalf("PaddleOCR.Net ModelDriver=%T, want *models.PaddleOCRModel", paddleOCR.ModelDriver) + } + if paddleOCR.Class != "paddleocr.net" { + t.Errorf("PaddleOCR.Net class=%q", paddleOCR.Class) + } + if paddleOCR.URLSuffix.OCR != "v2/ocr/jobs" { + t.Errorf("PaddleOCR.Net OCR suffix=%q", paddleOCR.URLSuffix.OCR) + } +} + +func TestLocalOCRProviderConfigsLoadLocalDrivers(t *testing.T) { + dir := t.TempDir() + for _, fileName := range []string{"mineru_local.json", "paddleocr_local.json"} { + if err := os.WriteFile(filepath.Join(dir, fileName), readProviderConfig(t, fileName), 0o600); err != nil { + t.Fatalf("write %s config: %v", fileName, err) + } + } + + pm, err := NewProviderManager(dir) + if err != nil { + t.Fatalf("NewProviderManager: %v", err) + } + + minerU := pm.FindProvider("MinerU") + if minerU == nil { + t.Fatal("MinerU provider not found") + } + if _, ok := minerU.ModelDriver.(*modeldrivers.MinerULocalModel); !ok { + t.Fatalf("MinerU ModelDriver=%T, want *models.MinerULocalModel", minerU.ModelDriver) + } + if minerU.URLSuffix.DocumentParse != "file_parse" { + t.Errorf("MinerU doc_parse suffix=%q", minerU.URLSuffix.DocumentParse) + } + + paddleOCR := pm.FindProvider("PaddleOCR") + if paddleOCR == nil { + t.Fatal("PaddleOCR provider not found") + } + if _, ok := paddleOCR.ModelDriver.(*modeldrivers.PaddleOCRLocalModel); !ok { + t.Fatalf("PaddleOCR ModelDriver=%T, want *models.PaddleOCRLocalModel", paddleOCR.ModelDriver) + } + if paddleOCR.URLSuffix.OCR != "layout-parsing" { + t.Errorf("PaddleOCR OCR suffix=%q", paddleOCR.URLSuffix.OCR) + } +} + func TestPPIOProviderConfigLoadsIntoProviderManager(t *testing.T) { dir := t.TempDir() if err := os.WriteFile(filepath.Join(dir, "ppio.json"), readPPIOProviderConfig(t), 0o600); err != nil { diff --git a/internal/entity/models/factory.go b/internal/entity/models/factory.go index e890f8d28..6b520cd92 100644 --- a/internal/entity/models/factory.go +++ b/internal/entity/models/factory.go @@ -119,19 +119,19 @@ func (f *ModelFactory) CreateModelDriver(providerName string, baseURL map[string return NewPPIOModel(baseURL, urlSuffix), nil case "voyage": return NewVoyageModel(baseURL, urlSuffix), nil - case "paddleocr": + case "paddleocr.net": return NewPaddleOCRModel(baseURL, urlSuffix), nil case "xunfei": return NewXunFeiModel(baseURL, urlSuffix), nil case "deepinfra": return NewDeepInfraModel(baseURL, urlSuffix), nil - case "mineru": + case "mineru.net": return NewMinerUModel(baseURL, urlSuffix), nil case "jiekouai": return NewJieKouAIModel(baseURL, urlSuffix), nil case "302.ai": return NewAI302Model(baseURL, urlSuffix), nil - case "mineru_local": + case "mineru": return NewMinerLocalUModel(baseURL, urlSuffix), nil case "futurmix": return NewFuturMixModel(baseURL, urlSuffix), nil @@ -143,7 +143,7 @@ func (f *ModelFactory) CreateModelDriver(providerName string, baseURL map[string return NewN1NModel(baseURL, urlSuffix), nil case "bedrock": return NewBedrockModel(baseURL, urlSuffix), nil - case "paddleocr_local": + case "paddleocr": return NewPaddleOCRLocalModel(baseURL, urlSuffix), nil case "orcarouter": return NewOrcaRouterModel(baseURL, urlSuffix), nil