mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-05-27 11:15:59 +08:00
## Summary - Replaces the `"no such method"` stub on `NvidiaModel.Rerank` (`internal/entity/models/nvidia.go`) with a real implementation against NVIDIA NIM's `/ranking` endpoint. - Mirrors the existing Python `NvidiaRerank` class at `rag/llm/rerank_model.py:149-190` for behavior parity: same `passages`/`query.text`/`logit` payload shape; `top_n` set to `len(documents)` so every input gets a score returned in original order (the issue body's spec omitted `top_n`, which would cause silent data loss). - Adds the `"rerank": "ranking"` URL suffix and two NIM rerank model entries (`nvidia/nv-rerankqa-mistral-4b-v3`, `nvidia/llama-3.2-nv-rerankqa-1b-v2`) to `conf/models/nvidia.json` so the picker exposes them. - Follows the same shape as the recently merged Aliyun (#14676), Gitee (#14656), and ZhipuAI (#14608) Rerank implementations: lowercase per-driver request/response types, conversion to the project-wide `RerankResponse{Data: []RerankResult}`, per-call `context.WithTimeout` of 30s. Closes #14720 ## Test plan - [x] `gofmt -l internal/entity/models/nvidia.go` — clean - [x] `go vet ./internal/entity/models/...` — no new errors introduced (the two pre-existing vet errors in `baidu.go:642` and `openrouter.go:566` are unrelated to this PR) - [x] `go build ./internal/entity/models/...` — succeeds - [x] `python3 -c "import json; json.load(open('conf/models/nvidia.json'))"` — JSON valid - [ ] Live smoke test against NVIDIA NIM with a real API key (requires reviewer with NIM credentials) ## Notes for reviewers - The issue body suggested omitting `top_n`. The Python reference includes it (`top_n: len(texts)`), and without it NVIDIA returns only the default top-K rankings rather than scores for every input. This PR follows the Python. - The URL host is `integrate.api.nvidia.com` (kept consistent with the existing chat/embeddings BaseURL in `nvidia.go`), not the legacy `ai.api.nvidia.com` host the Python uses. NIM's unified endpoint accepts the model names as-is, so no per-model URL transform is needed.
519 lines
9.9 KiB
JSON
519 lines
9.9 KiB
JSON
{
|
|
"name": "Nvidia",
|
|
"url": {
|
|
"default": "https://integrate.api.nvidia.com/v1"
|
|
},
|
|
"url_suffix": {
|
|
"chat": "chat/completions",
|
|
"models": "models",
|
|
"embedding": "embeddings",
|
|
"rerank": "ranking"
|
|
},
|
|
"class": "nvidia",
|
|
"models": [
|
|
{
|
|
"name": "abacusai/dracarys-llama-3.1-70b-instruct",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "baai/bge-m3",
|
|
"max_tokens": 8192,
|
|
"model_types": [
|
|
"embedding"
|
|
]
|
|
},
|
|
{
|
|
"name": "bytedance/seed-oss-36b-instruct",
|
|
"max_tokens": 32768,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "deepseek-ai/deepseek-v4-flash",
|
|
"max_tokens": 1048576,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "deepseek-ai/deepseek-v4-pro",
|
|
"max_tokens": 1048576,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "deepseek-ai/deepseek-v3.2",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "deepseek-ai/deepseek-v3.1",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "google/codegemma-7b",
|
|
"max_tokens": 8192,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "google/gemma-2-2b-it",
|
|
"max_tokens": 8192,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "google/gemma-4-31b-it",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "google/gemma-7b",
|
|
"max_tokens": 8192,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "ibm/granite-3.3-8b-instruct",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "meta/llama-3.1-405b-instruct",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "meta/llama-3.2-90b-vision-instruct",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat",
|
|
"vision"
|
|
]
|
|
},
|
|
{
|
|
"name": "meta/llama-4-maverick-17b-128e-instruct",
|
|
"max_tokens": 1048576,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "microsoft/phi-4-mini-flash-reasoning",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "minimaxai/minimax-m2.1",
|
|
"max_tokens": 204800,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "minimaxai/minimax-m2.5",
|
|
"max_tokens": 204800,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "minimaxai/minimax-m2.7",
|
|
"max_tokens": 204800,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "mistralai/devstral-2-123b-instruct-2512",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "mistralai/magistral-small-2506",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "mistralai/mistral-7b-instruct-v0.3",
|
|
"max_tokens": 32768,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "mistralai/mistral-large-3-675b-instruct-2512",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "mistralai/mistral-medium-3-5-128b",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat",
|
|
"vision"
|
|
]
|
|
},
|
|
{
|
|
"name": "mistralai/mistral-nemotron",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "mistralai/mixtral-8x22b-instruct",
|
|
"max_tokens": 65536,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "moonshotai/kimi-k2.5",
|
|
"max_tokens": 262144,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "moonshotai/kimi-k2.6",
|
|
"max_tokens": 262144,
|
|
"model_types": [
|
|
"chat",
|
|
"vision"
|
|
]
|
|
},
|
|
{
|
|
"name": "moonshotai/kimi-k2-instruct",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "moonshotai/kimi-k2-instruct-0905",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "moonshotai/kimi-k2-thinking",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "nvidia/gliner-pii",
|
|
"max_tokens": 4096,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/llama-3.1-nemoguard-8b-content-safety",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/llama-3.1-nemoguard-8b-topic-control",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/llama-3.1-nemotron-nano-8b-v1",
|
|
"max_tokens": 8192,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/llama-3.1-nemotron-safety-guard-8b-v3",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/llama-3.1-nemotron-ultra-253b-v1",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1",
|
|
"max_tokens": 8192,
|
|
"model_types": [
|
|
"embedding"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/llama-3.2-nv-embedqa-1b-v2",
|
|
"max_tokens": 8192,
|
|
"model_types": [
|
|
"embedding"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/llama-3.3-nemotron-super-49b-v1",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/llama-3.3-nemotron-super-49b-v1.5",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "nvidia/nemoguard-jailbreak-detect",
|
|
"max_tokens": 4096,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/nemotron-3-nano-30b-a3b",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat",
|
|
"vision"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "nvidia/nemotron-3-super-120b-a12b",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/nemotron-content-safety-reasoning-4b",
|
|
"max_tokens": 8192,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/nemotron-mini-4b-instruct",
|
|
"max_tokens": 4096,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/nv-embed-v1",
|
|
"max_tokens": 32768,
|
|
"model_types": [
|
|
"embedding"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/nv-embedqa-e5-v5",
|
|
"max_tokens": 512,
|
|
"model_types": [
|
|
"embedding"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/nv-embedqa-mistral-7b-v2",
|
|
"max_tokens": 512,
|
|
"model_types": [
|
|
"embedding"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/nv-rerankqa-mistral-4b-v3",
|
|
"max_tokens": 4096,
|
|
"model_types": [
|
|
"rerank"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/llama-3.2-nv-rerankqa-1b-v2",
|
|
"max_tokens": 4096,
|
|
"model_types": [
|
|
"rerank"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/nvidia-nemotron-nano-9b-v2",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/riva-translate-4b-instruct-v1_1",
|
|
"max_tokens": 4096,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "nvidia/usdcode",
|
|
"max_tokens": 8192,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "openai/gpt-oss-120b",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "qwen/qwen2.5-coder-7b-instruct",
|
|
"max_tokens": 32768,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "qwen/qwen3-5-122b-a10b",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
]
|
|
},
|
|
{
|
|
"name": "qwen/qwen3-235b-a22b",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "qwen/qwen3-coder-480b-a35b-instruct",
|
|
"max_tokens": 262144,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "snowflake/arctic-embed-l",
|
|
"max_tokens": 512,
|
|
"model_types": [
|
|
"embedding"
|
|
]
|
|
},
|
|
{
|
|
"name": "z-ai/glm-5",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "z-ai/glm-5.1",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
},
|
|
{
|
|
"name": "z-ai/glm-4.7",
|
|
"max_tokens": 131072,
|
|
"model_types": [
|
|
"chat"
|
|
],
|
|
"thinking": {
|
|
"default_value": true,
|
|
"clear_thinking": true
|
|
}
|
|
}
|
|
]
|
|
} |