Refact: switch from google-generativeai to google-genai (#13140)

### What problem does this PR solve? Refact: switch from oogle-generativeai to google-genai #13132 Refact: commnet out unused pywencai. ### Type of change - [x] Refactoring
2026-06-01 05:17:51 +08:00 · 2026-02-24 10:28:33 +08:00
parent 45aa3a0e89
commit 98e1d5aa5c
6 changed files with 949 additions and 851 deletions
--- a/agent/component/agent_with_tools.py
+++ b/agent/component/agent_with_tools.py
@ -76,8 +76,6 @@ class AgentParam(LLMParam, ToolParamBase):
        self.mcp = []
        self.max_rounds = 5
        self.description = ""
-        self.custom_header = {}
-


 class Agent(LLM, ToolBase):
@ -107,8 +105,7 @@ class Agent(LLM, ToolBase):

        for mcp in self._param.mcp:
            _, mcp_server = MCPServerService.get_by_id(mcp["mcp_id"])
-            custom_header = self._param.custom_header
-            tool_call_session = MCPToolCallSession(mcp_server, mcp_server.variables, custom_header)
+            tool_call_session = MCPToolCallSession(mcp_server, mcp_server.variables)
            for tnm, meta in mcp["tools"].items():
                self.tool_meta.append(mcp_tool_metadata_to_openai_tool(meta))
                self.tools[tnm] = tool_call_session
--- a/agent/tools/wencai.py
+++ b/agent/tools/wencai.py
@ -18,7 +18,7 @@ import os
 import time
 from abc import ABC
 import pandas as pd
-import pywencai
+# import pywencai

 from agent.tools.base import ToolParamBase, ToolMeta, ToolBase
 from common.connection_utils import timeout
@ -84,7 +84,8 @@ class WenCai(ToolBase, ABC):

            try:
                wencai_res = []
-                res = pywencai.get(query=kwargs["query"], query_type=self._param.query_type, perpage=self._param.top_n)
+                # res = pywencai.get(query=kwargs["query"], query_type=self._param.query_type, perpage=self._param.top_n)
+                res = []
                if self.check_if_canceled("WenCai processing"):
                    return

--- a/pyproject.toml
+++ b/pyproject.toml
@ -37,9 +37,10 @@ dependencies = [
    "flask-login==0.6.3",
    "flask-mail>=0.10.0",
    "flask-session==0.8.0",
+    "google-api-python-client>=2.190.0,<3.0.0",
    "google-auth-oauthlib>=1.2.0,<2.0.0",
+    "google-cloud-storage>=2.19.0,<3.0.0",
    "google-genai>=1.41.0,<2.0.0",
-    "google-generativeai>=0.8.1,<0.9.0", # Needed for cv_model and embedding_model
    "google-search-results==2.4.2",
    "graspologic @ git+https://github.com/yuzhichang/graspologic.git@38e680cab72bc9fb68a7992c3bcc2d53b24e42fd",
    "groq==0.9.0",
@ -107,7 +108,6 @@ dependencies = [
    "tencentcloud-sdk-python==3.0.1478",
    "tika==2.6.0",
    "valkey==6.0.2",
-    "vertexai==1.70.0",
    "volcengine==1.0.194",
    "voyageai==0.2.3",
    "webdav4>=0.10.0,<0.11.0",
@ -279,4 +279,4 @@ exclude_lines = [
 # HTML report configuration
 directory = "htmlcov"
 title = "Test Coverage Report"
-# extra_css = "custom.css"  # Optional custom CSS
+# extra_css = "custom.css"  # Optional custom CSS
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@ -1210,15 +1210,12 @@ class GoogleCV(AnthropicCV, GeminiCV):
            else:
                self.client = AnthropicVertex(region=region, project_id=project_id)
        else:
-            import vertexai.generative_models as glm
-            from google.cloud import aiplatform
-
+            from google import genai
            if access_token:
-                credits = service_account.Credentials.from_service_account_info(access_token)
-                aiplatform.init(credentials=credits, project=project_id, location=region)
+                credits = service_account.Credentials.from_service_account_info(access_token, scopes=scopes)
+                self.client = genai.Client(vertexai=True, project=project_id, location=region, credentials=credits)
            else:
-                aiplatform.init(project=project_id, location=region)
-            self.client = glm.GenerativeModel(model_name=self.model_name)
+                self.client = genai.Client(vertexai=True, project=project_id, location=region)
        Base.__init__(self, **kwargs)

    def describe(self, image):
--- a/rag/llm/embedding_model.py
+++ b/rag/llm/embedding_model.py
@ -20,7 +20,6 @@ from abc import ABC
 from urllib.parse import urljoin

 import dashscope
-import google.generativeai as genai
 import numpy as np
 import requests
 from ollama import Client
@ -543,31 +542,87 @@ class BedrockEmbed(Base):
 class GeminiEmbed(Base):
    _FACTORY_NAME = "Gemini"

-    def __init__(self, key, model_name="models/text-embedding-004", **kwargs):
+    def __init__(self, key, model_name="gemini-embedding-001", **kwargs):
+        from google import genai
+        from google.genai import types
+
        self.key = key
-        self.model_name = "models/" + model_name
+        self.model_name = model_name[7:] if model_name.startswith("models/") else model_name
+        self.client = genai.Client(api_key=self.key)
+        self.types = types
+
+    @staticmethod
+    def _parse_embedding_vector(embedding):
+        if isinstance(embedding, dict):
+            values = embedding.get("values")
+            if values is None:
+                values = embedding.get("embedding")
+            if values is not None:
+                return values
+
+        values = getattr(embedding, "values", None)
+        if values is None:
+            values = getattr(embedding, "embedding", None)
+        if values is not None:
+            return values
+
+        raise TypeError(f"Unsupported embedding payload: {type(embedding)}")
+
+    @classmethod
+    def _parse_embedding_response(cls, response):
+        if response is None:
+            raise ValueError("Embedding response is empty")
+
+        embeddings = getattr(response, "embeddings", None)
+        if embeddings is None and isinstance(response, dict):
+            embeddings = response.get("embeddings")
+
+        if embeddings is None:
+            return [cls._parse_embedding_vector(response)]
+
+        return [cls._parse_embedding_vector(item) for item in embeddings]
+
+    def _build_embedding_config(self):
+        task_type = "RETRIEVAL_DOCUMENT"
+        if hasattr(self.types, "TaskType"):
+            task_type = getattr(self.types.TaskType, "RETRIEVAL_DOCUMENT", task_type)
+        try:
+            return self.types.EmbedContentConfig(task_type=task_type, title="Embedding of single string")
+        except TypeError:
+            # Compatible with SDK versions that do not accept title in embed config.
+            return self.types.EmbedContentConfig(task_type=task_type)

    def encode(self, texts: list):
        texts = [truncate(t, 2048) for t in texts]
        token_count = sum(num_tokens_from_string(text) for text in texts)
-        genai.configure(api_key=self.key)
+        config = self._build_embedding_config()
        batch_size = 16
        ress = []
        for i in range(0, len(texts), batch_size):
-            result = genai.embed_content(model=self.model_name, content=texts[i : i + batch_size], task_type="retrieval_document", title="Embedding of single string")
+            result = None
            try:
-                ress.extend(result["embedding"])
+                result = self.client.models.embed_content(
+                    model=self.model_name,
+                    contents=texts[i : i + batch_size],
+                    config=config,
+                )
+                ress.extend(self._parse_embedding_response(result))
            except Exception as _e:
                log_exception(_e, result)
                raise Exception(f"Error: {result}")
        return np.array(ress), token_count

    def encode_queries(self, text):
-        genai.configure(api_key=self.key)
-        result = genai.embed_content(model=self.model_name, content=truncate(text, 2048), task_type="retrieval_document", title="Embedding of single string")
+        config = self._build_embedding_config()
+        result = None
        token_count = num_tokens_from_string(text)
        try:
-            return np.array(result["embedding"]), token_count
+            result = self.client.models.embed_content(
+                model=self.model_name,
+                contents=[truncate(text, 2048)],
+                config=config,
+            )
+            return np.array(self._parse_embedding_response(result)[0]), token_count
        except Exception as _e:
            log_exception(_e, result)
            raise Exception(f"Error: {result}")
--- a/uv.lock
+++ b/uv.lock