package models import ( "bufio" "bytes" "encoding/json" "fmt" "io" "net/http" "strings" "time" ) // NvidiaModel implements ModelDriver for Nvidia type NvidiaModel struct { BaseURL map[string]string URLSuffix URLSuffix httpClient *http.Client } // NewNvidiaModel creates a new Nvidia model instance func NewNvidiaModel(baseURL map[string]string, urlSuffix URLSuffix) *NvidiaModel { return &NvidiaModel{ BaseURL: baseURL, URLSuffix: urlSuffix, httpClient: &http.Client{ Timeout: 120 * time.Second, Transport: &http.Transport{ MaxIdleConns: 100, MaxIdleConnsPerHost: 10, IdleConnTimeout: 90 * time.Second, DisableCompression: false, }, }, } } func (n NvidiaModel) NewInstance(baseURL map[string]string) ModelDriver { return &NvidiaModel{ BaseURL: baseURL, URLSuffix: n.URLSuffix, httpClient: &http.Client{ Timeout: 120 * time.Second, Transport: &http.Transport{ MaxIdleConns: 100, MaxIdleConnsPerHost: 10, IdleConnTimeout: 90 * time.Second, DisableCompression: false, }, }, } } func (n NvidiaModel) Name() string { return "nvidia" } func (n *NvidiaModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) { if len(messages) == 0 { return nil, fmt.Errorf("messages is empty") } var region = "default" if apiConfig != nil && apiConfig.Region != nil { region = *apiConfig.Region } baseURL := n.BaseURL[region] if baseURL == "" { baseURL = n.BaseURL["default"] } url := fmt.Sprintf("%s/%s", baseURL, n.URLSuffix.Chat) apiMessages := make([]map[string]interface{}, len(messages)) for i, msg := range messages { apiMessages[i] = map[string]interface{}{ "role": msg.Role, "content": msg.Content, } } reqBody := map[string]interface{}{ "model": modelName, "messages": apiMessages, "stream": false, } if chatModelConfig != nil { if chatModelConfig.Stream != nil { reqBody["stream"] = *chatModelConfig.Stream } if chatModelConfig.MaxTokens != nil { reqBody["max_tokens"] = *chatModelConfig.MaxTokens } if chatModelConfig.Temperature != nil { reqBody["temperature"] = *chatModelConfig.Temperature } if chatModelConfig.TopP != nil { reqBody["top_p"] = *chatModelConfig.TopP } if chatModelConfig.Stop != nil { reqBody["stop"] = *chatModelConfig.Stop } if chatModelConfig.Thinking != nil { if *chatModelConfig.Thinking { reqBody["thinking"] = map[string]interface{}{"type": "enabled"} } else { reqBody["thinking"] = map[string]interface{}{"type": "disabled"} } } } jsonData, err := json.Marshal(reqBody) if err != nil { return nil, fmt.Errorf("failed to marshal request: %w", err) } req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } req.Header.Set("Content-Type", "application/json") if apiConfig != nil && apiConfig.ApiKey != nil { req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) } resp, err := n.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("failed to read response: %w", err) } if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) } var result map[string]interface{} if err = json.Unmarshal(body, &result); err != nil { return nil, fmt.Errorf("failed to parse response: %w", err) } choices, ok := result["choices"].([]interface{}) if !ok || len(choices) == 0 { return nil, fmt.Errorf("no choices in response") } firstChoice, ok := choices[0].(map[string]interface{}) if !ok { return nil, fmt.Errorf("invalid choice format") } messageMap, ok := firstChoice["message"].(map[string]interface{}) if !ok { return nil, fmt.Errorf("invalid message format") } content, ok := messageMap["content"].(string) if !ok { return nil, fmt.Errorf("invalid content format") } var modelClass *string if chatModelConfig != nil { modelClass = chatModelConfig.ModelClass } thinking, answer := GetThinkingAndAnswer(modelClass, &content) chatResponse := &ChatResponse{ Answer: answer, ReasonContent: thinking, } return chatResponse, nil } func (n *NvidiaModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error { if len(messages) == 0 { return fmt.Errorf("messages is empty") } var region = "default" if apiConfig != nil && apiConfig.Region != nil { region = *apiConfig.Region } baseURL := n.BaseURL[region] if baseURL == "" { baseURL = n.BaseURL["default"] } url := fmt.Sprintf("%s/%s", baseURL, n.URLSuffix.Chat) apiMessages := make([]map[string]interface{}, len(messages)) for i, msg := range messages { apiMessages[i] = map[string]interface{}{ "role": msg.Role, "content": msg.Content, } } reqBody := map[string]interface{}{ "model": modelName, "messages": apiMessages, "stream": true, } if modelConfig != nil { if modelConfig.Stream != nil { reqBody["stream"] = *modelConfig.Stream } if modelConfig.MaxTokens != nil { reqBody["max_tokens"] = *modelConfig.MaxTokens } if modelConfig.Temperature != nil { reqBody["temperature"] = *modelConfig.Temperature } if modelConfig.DoSample != nil { reqBody["do_sample"] = *modelConfig.DoSample } if modelConfig.TopP != nil { reqBody["top_p"] = *modelConfig.TopP } if modelConfig.Stop != nil { reqBody["stop"] = *modelConfig.Stop } if modelConfig.Thinking != nil { if *modelConfig.Thinking { reqBody["thinking"] = map[string]interface{}{"type": "enabled"} } else { reqBody["thinking"] = map[string]interface{}{"type": "disabled"} } } } jsonData, err := json.Marshal(reqBody) if err != nil { return fmt.Errorf("failed to marshal request: %w", err) } req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) if err != nil { return fmt.Errorf("failed to create request: %w", err) } req.Header.Set("Content-Type", "application/json") if apiConfig != nil && apiConfig.ApiKey != nil { req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) } resp, err := n.httpClient.Do(req) if err != nil { return fmt.Errorf("failed to send request: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) } scanner := bufio.NewScanner(resp.Body) for scanner.Scan() { line := scanner.Text() if !strings.HasPrefix(line, "data:") { continue } data := strings.TrimSpace(line[5:]) if data == "[DONE]" { break } var event map[string]interface{} if err = json.Unmarshal([]byte(data), &event); err != nil { continue } choices, ok := event["choices"].([]interface{}) if !ok || len(choices) == 0 { continue } firstChoice, ok := choices[0].(map[string]interface{}) if !ok { continue } delta, ok := firstChoice["delta"].(map[string]interface{}) if !ok { continue } reasoningContent, ok := delta["reasoning_content"].(string) if ok && reasoningContent != "" { if err := sender(nil, &reasoningContent); err != nil { return err } } content, ok := delta["content"].(string) if ok && content != "" { if err := sender(&content, nil); err != nil { return err } } finishReason, ok := firstChoice["finish_reason"].(string) if ok && finishReason != "" { break } } endOfStream := "[DONE]" if err = sender(&endOfStream, nil); err != nil { return err } return scanner.Err() } func (n NvidiaModel) Encode(modelName *string, texts []string, apiConfig *APIConfig, embeddingConfig *EmbeddingConfig) ([][]float64, error) { return nil, fmt.Errorf("no such method") } func (n NvidiaModel) Rerank(modelName *string, query string, texts []string, apiConfig *APIConfig) ([]float64, error) { return nil, fmt.Errorf("no such method") } func (n NvidiaModel) ListModels(apiConfig *APIConfig) ([]string, error) { return nil, fmt.Errorf("no such method") } func (n NvidiaModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error) { return nil, fmt.Errorf("no such method") } func (n NvidiaModel) CheckConnection(apiConfig *APIConfig) error { return fmt.Errorf("no such method") }