Files
ragflow/internal/entity/model_test.go
oktofeesh 5ae41dc1eb fix(go-models): route hosted OCR providers through drivers (#15233)
## Summary
- route hosted MinerU.Net and PaddleOCR.Net provider names to their
existing Go drivers
- add regression coverage for loading the hosted OCR provider configs
through ProviderManager

## What changed
- Added canonical provider-name aliases for the hosted OCR provider
display names.
- Covered both bundled configs with a focused provider-manager test.

## Why
The hosted provider configs use display names with `.Net`, while model
factory dispatch lowercases the provider name. Without aliases, those
configs fall through to `DummyModel` instead of using the existing
MinerU and PaddleOCR drivers.

---------

Co-authored-by: Jin Hai <haijin.chn@gmail.com>
2026-05-26 20:40:40 +08:00

212 lines
6.4 KiB
Go

//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package entity
import (
"os"
"path/filepath"
modeldrivers "ragflow/internal/entity/models"
"testing"
)
func readProviderConfig(t *testing.T, fileName string) []byte {
t.Helper()
for _, candidate := range []string{
filepath.Join("..", "..", "conf", "models", fileName),
filepath.Join("conf", "models", fileName),
} {
data, err := os.ReadFile(candidate)
if err == nil {
return data
}
}
t.Fatalf("could not locate conf/models/%s", fileName)
return nil
}
func readPPIOProviderConfig(t *testing.T) []byte {
t.Helper()
return readProviderConfig(t, "ppio.json")
}
func TestHostedProviderConfigsLoadSharedDrivers(t *testing.T) {
dir := t.TempDir()
for _, fileName := range []string{"mineru.json", "paddleocr.json"} {
if err := os.WriteFile(filepath.Join(dir, fileName), readProviderConfig(t, fileName), 0o600); err != nil {
t.Fatalf("write %s config: %v", fileName, err)
}
}
pm, err := NewProviderManager(dir)
if err != nil {
t.Fatalf("NewProviderManager: %v", err)
}
minerU := pm.FindProvider("MinerU.Net")
if minerU == nil {
t.Fatal("MinerU.Net provider not found")
}
if _, ok := minerU.ModelDriver.(*modeldrivers.MinerUModel); !ok {
t.Fatalf("MinerU.Net ModelDriver=%T, want *models.MinerUModel", minerU.ModelDriver)
}
if minerU.Class != "mineru.net" {
t.Errorf("MinerU.Net class=%q", minerU.Class)
}
if minerU.URLSuffix.DocumentParse != "v4/extract/task" {
t.Errorf("MinerU.Net doc_parse suffix=%q", minerU.URLSuffix.DocumentParse)
}
paddleOCR := pm.FindProvider("PaddleOCR.Net")
if paddleOCR == nil {
t.Fatal("PaddleOCR.Net provider not found")
}
if _, ok := paddleOCR.ModelDriver.(*modeldrivers.PaddleOCRModel); !ok {
t.Fatalf("PaddleOCR.Net ModelDriver=%T, want *models.PaddleOCRModel", paddleOCR.ModelDriver)
}
if paddleOCR.Class != "paddleocr.net" {
t.Errorf("PaddleOCR.Net class=%q", paddleOCR.Class)
}
if paddleOCR.URLSuffix.OCR != "v2/ocr/jobs" {
t.Errorf("PaddleOCR.Net OCR suffix=%q", paddleOCR.URLSuffix.OCR)
}
}
func TestLocalOCRProviderConfigsLoadLocalDrivers(t *testing.T) {
dir := t.TempDir()
for _, fileName := range []string{"mineru_local.json", "paddleocr_local.json"} {
if err := os.WriteFile(filepath.Join(dir, fileName), readProviderConfig(t, fileName), 0o600); err != nil {
t.Fatalf("write %s config: %v", fileName, err)
}
}
pm, err := NewProviderManager(dir)
if err != nil {
t.Fatalf("NewProviderManager: %v", err)
}
minerU := pm.FindProvider("MinerU")
if minerU == nil {
t.Fatal("MinerU provider not found")
}
if _, ok := minerU.ModelDriver.(*modeldrivers.MinerULocalModel); !ok {
t.Fatalf("MinerU ModelDriver=%T, want *models.MinerULocalModel", minerU.ModelDriver)
}
if minerU.URLSuffix.DocumentParse != "file_parse" {
t.Errorf("MinerU doc_parse suffix=%q", minerU.URLSuffix.DocumentParse)
}
paddleOCR := pm.FindProvider("PaddleOCR")
if paddleOCR == nil {
t.Fatal("PaddleOCR provider not found")
}
if _, ok := paddleOCR.ModelDriver.(*modeldrivers.PaddleOCRLocalModel); !ok {
t.Fatalf("PaddleOCR ModelDriver=%T, want *models.PaddleOCRLocalModel", paddleOCR.ModelDriver)
}
if paddleOCR.URLSuffix.OCR != "layout-parsing" {
t.Errorf("PaddleOCR OCR suffix=%q", paddleOCR.URLSuffix.OCR)
}
}
func TestPPIOProviderConfigLoadsIntoProviderManager(t *testing.T) {
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "ppio.json"), readPPIOProviderConfig(t), 0o600); err != nil {
t.Fatalf("write ppio config: %v", err)
}
pm, err := NewProviderManager(dir)
if err != nil {
t.Fatalf("NewProviderManager: %v", err)
}
provider := pm.FindProvider("ppio")
if provider == nil {
t.Fatal("PPIO provider not found")
}
if provider.Name != "PPIO" {
t.Errorf("provider.Name=%q", provider.Name)
}
if provider.URL["default"] != "https://api.ppio.com/openai/v1" {
t.Errorf("default URL=%q", provider.URL["default"])
}
if provider.URL["us"] != "https://api.ppinfra.com/v3/openai" {
t.Errorf("us URL=%q", provider.URL["us"])
}
if provider.URLSuffix.Chat != "chat/completions" {
t.Errorf("chat suffix=%q", provider.URLSuffix.Chat)
}
if provider.URLSuffix.Models != "models" {
t.Errorf("models suffix=%q", provider.URLSuffix.Models)
}
if _, ok := provider.ModelDriver.(*modeldrivers.PPIOModel); !ok {
t.Fatalf("ModelDriver=%T, want *models.PPIOModel", provider.ModelDriver)
}
if provider.ModelDriver.Name() != "ppio" {
t.Errorf("ModelDriver.Name()=%q", provider.ModelDriver.Name())
}
if len(provider.Models) != 21 {
t.Fatalf("PPIO model count=%d, want 21", len(provider.Models))
}
for _, model := range provider.Models {
if !model.ModelTypeMap["chat"] {
t.Errorf("model %q missing chat type map", model.Name)
}
if model.Class == nil || *model.Class != "PPIO" {
t.Errorf("model %q class=%v", model.Name, model.Class)
}
}
models, err := pm.ListModels("PPIO")
if err != nil {
t.Fatalf("ListModels: %v", err)
}
if len(models) != 21 {
t.Errorf("ListModels count=%d, want 21", len(models))
}
model, err := pm.GetModelByName("ppio", "deepseek/deepseek-r1")
if err != nil {
t.Fatalf("GetModelByName: %v", err)
}
if model.MaxTokens != 64000 {
t.Errorf("deepseek/deepseek-r1 max_tokens=%d", model.MaxTokens)
}
model, err = pm.GetModelByName("ppio", "deepseek/deepseek-v4-pro")
if err != nil {
t.Fatalf("GetModelByName v4 pro: %v", err)
}
if model.MaxTokens != 1048576 {
t.Errorf("deepseek/deepseek-v4-pro max_tokens=%d", model.MaxTokens)
}
model, err = pm.GetModelByName("ppio", "deepseek/deepseek-v4-flash")
if err != nil {
t.Fatalf("GetModelByName v4 flash: %v", err)
}
if model.MaxTokens != 1048576 {
t.Errorf("deepseek/deepseek-v4-flash max_tokens=%d", model.MaxTokens)
}
resp := pm.SearchByType("chat")
if resp.Code != 0 {
t.Fatalf("SearchByType code=%d message=%q", resp.Code, resp.Message)
}
if len(resp.Data) != 21 {
t.Errorf("SearchByType data count=%d, want 21", len(resp.Data))
}
}