mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-03-13 11:07:46 +08:00
Refact: switch from google-generativeai to google-genai (#13140)
### What problem does this PR solve? Refact: switch from oogle-generativeai to google-genai #13132 Refact: commnet out unused pywencai. ### Type of change - [x] Refactoring
This commit is contained in:
@ -76,8 +76,6 @@ class AgentParam(LLMParam, ToolParamBase):
|
||||
self.mcp = []
|
||||
self.max_rounds = 5
|
||||
self.description = ""
|
||||
self.custom_header = {}
|
||||
|
||||
|
||||
|
||||
class Agent(LLM, ToolBase):
|
||||
@ -107,8 +105,7 @@ class Agent(LLM, ToolBase):
|
||||
|
||||
for mcp in self._param.mcp:
|
||||
_, mcp_server = MCPServerService.get_by_id(mcp["mcp_id"])
|
||||
custom_header = self._param.custom_header
|
||||
tool_call_session = MCPToolCallSession(mcp_server, mcp_server.variables, custom_header)
|
||||
tool_call_session = MCPToolCallSession(mcp_server, mcp_server.variables)
|
||||
for tnm, meta in mcp["tools"].items():
|
||||
self.tool_meta.append(mcp_tool_metadata_to_openai_tool(meta))
|
||||
self.tools[tnm] = tool_call_session
|
||||
|
||||
@ -18,7 +18,7 @@ import os
|
||||
import time
|
||||
from abc import ABC
|
||||
import pandas as pd
|
||||
import pywencai
|
||||
# import pywencai
|
||||
|
||||
from agent.tools.base import ToolParamBase, ToolMeta, ToolBase
|
||||
from common.connection_utils import timeout
|
||||
@ -84,7 +84,8 @@ class WenCai(ToolBase, ABC):
|
||||
|
||||
try:
|
||||
wencai_res = []
|
||||
res = pywencai.get(query=kwargs["query"], query_type=self._param.query_type, perpage=self._param.top_n)
|
||||
# res = pywencai.get(query=kwargs["query"], query_type=self._param.query_type, perpage=self._param.top_n)
|
||||
res = []
|
||||
if self.check_if_canceled("WenCai processing"):
|
||||
return
|
||||
|
||||
|
||||
@ -37,9 +37,10 @@ dependencies = [
|
||||
"flask-login==0.6.3",
|
||||
"flask-mail>=0.10.0",
|
||||
"flask-session==0.8.0",
|
||||
"google-api-python-client>=2.190.0,<3.0.0",
|
||||
"google-auth-oauthlib>=1.2.0,<2.0.0",
|
||||
"google-cloud-storage>=2.19.0,<3.0.0",
|
||||
"google-genai>=1.41.0,<2.0.0",
|
||||
"google-generativeai>=0.8.1,<0.9.0", # Needed for cv_model and embedding_model
|
||||
"google-search-results==2.4.2",
|
||||
"graspologic @ git+https://github.com/yuzhichang/graspologic.git@38e680cab72bc9fb68a7992c3bcc2d53b24e42fd",
|
||||
"groq==0.9.0",
|
||||
@ -107,7 +108,6 @@ dependencies = [
|
||||
"tencentcloud-sdk-python==3.0.1478",
|
||||
"tika==2.6.0",
|
||||
"valkey==6.0.2",
|
||||
"vertexai==1.70.0",
|
||||
"volcengine==1.0.194",
|
||||
"voyageai==0.2.3",
|
||||
"webdav4>=0.10.0,<0.11.0",
|
||||
@ -279,4 +279,4 @@ exclude_lines = [
|
||||
# HTML report configuration
|
||||
directory = "htmlcov"
|
||||
title = "Test Coverage Report"
|
||||
# extra_css = "custom.css" # Optional custom CSS
|
||||
# extra_css = "custom.css" # Optional custom CSS
|
||||
|
||||
@ -1210,15 +1210,12 @@ class GoogleCV(AnthropicCV, GeminiCV):
|
||||
else:
|
||||
self.client = AnthropicVertex(region=region, project_id=project_id)
|
||||
else:
|
||||
import vertexai.generative_models as glm
|
||||
from google.cloud import aiplatform
|
||||
|
||||
from google import genai
|
||||
if access_token:
|
||||
credits = service_account.Credentials.from_service_account_info(access_token)
|
||||
aiplatform.init(credentials=credits, project=project_id, location=region)
|
||||
credits = service_account.Credentials.from_service_account_info(access_token, scopes=scopes)
|
||||
self.client = genai.Client(vertexai=True, project=project_id, location=region, credentials=credits)
|
||||
else:
|
||||
aiplatform.init(project=project_id, location=region)
|
||||
self.client = glm.GenerativeModel(model_name=self.model_name)
|
||||
self.client = genai.Client(vertexai=True, project=project_id, location=region)
|
||||
Base.__init__(self, **kwargs)
|
||||
|
||||
def describe(self, image):
|
||||
|
||||
@ -20,7 +20,6 @@ from abc import ABC
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import dashscope
|
||||
import google.generativeai as genai
|
||||
import numpy as np
|
||||
import requests
|
||||
from ollama import Client
|
||||
@ -543,31 +542,87 @@ class BedrockEmbed(Base):
|
||||
class GeminiEmbed(Base):
|
||||
_FACTORY_NAME = "Gemini"
|
||||
|
||||
def __init__(self, key, model_name="models/text-embedding-004", **kwargs):
|
||||
def __init__(self, key, model_name="gemini-embedding-001", **kwargs):
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
||||
self.key = key
|
||||
self.model_name = "models/" + model_name
|
||||
self.model_name = model_name[7:] if model_name.startswith("models/") else model_name
|
||||
self.client = genai.Client(api_key=self.key)
|
||||
self.types = types
|
||||
|
||||
@staticmethod
|
||||
def _parse_embedding_vector(embedding):
|
||||
if isinstance(embedding, dict):
|
||||
values = embedding.get("values")
|
||||
if values is None:
|
||||
values = embedding.get("embedding")
|
||||
if values is not None:
|
||||
return values
|
||||
|
||||
values = getattr(embedding, "values", None)
|
||||
if values is None:
|
||||
values = getattr(embedding, "embedding", None)
|
||||
if values is not None:
|
||||
return values
|
||||
|
||||
raise TypeError(f"Unsupported embedding payload: {type(embedding)}")
|
||||
|
||||
@classmethod
|
||||
def _parse_embedding_response(cls, response):
|
||||
if response is None:
|
||||
raise ValueError("Embedding response is empty")
|
||||
|
||||
embeddings = getattr(response, "embeddings", None)
|
||||
if embeddings is None and isinstance(response, dict):
|
||||
embeddings = response.get("embeddings")
|
||||
|
||||
if embeddings is None:
|
||||
return [cls._parse_embedding_vector(response)]
|
||||
|
||||
return [cls._parse_embedding_vector(item) for item in embeddings]
|
||||
|
||||
def _build_embedding_config(self):
|
||||
task_type = "RETRIEVAL_DOCUMENT"
|
||||
if hasattr(self.types, "TaskType"):
|
||||
task_type = getattr(self.types.TaskType, "RETRIEVAL_DOCUMENT", task_type)
|
||||
try:
|
||||
return self.types.EmbedContentConfig(task_type=task_type, title="Embedding of single string")
|
||||
except TypeError:
|
||||
# Compatible with SDK versions that do not accept title in embed config.
|
||||
return self.types.EmbedContentConfig(task_type=task_type)
|
||||
|
||||
def encode(self, texts: list):
|
||||
texts = [truncate(t, 2048) for t in texts]
|
||||
token_count = sum(num_tokens_from_string(text) for text in texts)
|
||||
genai.configure(api_key=self.key)
|
||||
config = self._build_embedding_config()
|
||||
batch_size = 16
|
||||
ress = []
|
||||
for i in range(0, len(texts), batch_size):
|
||||
result = genai.embed_content(model=self.model_name, content=texts[i : i + batch_size], task_type="retrieval_document", title="Embedding of single string")
|
||||
result = None
|
||||
try:
|
||||
ress.extend(result["embedding"])
|
||||
result = self.client.models.embed_content(
|
||||
model=self.model_name,
|
||||
contents=texts[i : i + batch_size],
|
||||
config=config,
|
||||
)
|
||||
ress.extend(self._parse_embedding_response(result))
|
||||
except Exception as _e:
|
||||
log_exception(_e, result)
|
||||
raise Exception(f"Error: {result}")
|
||||
return np.array(ress), token_count
|
||||
|
||||
def encode_queries(self, text):
|
||||
genai.configure(api_key=self.key)
|
||||
result = genai.embed_content(model=self.model_name, content=truncate(text, 2048), task_type="retrieval_document", title="Embedding of single string")
|
||||
config = self._build_embedding_config()
|
||||
result = None
|
||||
token_count = num_tokens_from_string(text)
|
||||
try:
|
||||
return np.array(result["embedding"]), token_count
|
||||
result = self.client.models.embed_content(
|
||||
model=self.model_name,
|
||||
contents=[truncate(text, 2048)],
|
||||
config=config,
|
||||
)
|
||||
return np.array(self._parse_embedding_response(result)[0]), token_count
|
||||
except Exception as _e:
|
||||
log_exception(_e, result)
|
||||
raise Exception(f"Error: {result}")
|
||||
|
||||
Reference in New Issue
Block a user