Compare commits
7 Commits
feat/conve
...
feat/brows
| Author | SHA1 | Date | |
|---|---|---|---|
| 6b466dc865 | |||
| 86c593f120 | |||
| 702413c2af | |||
| d92dacffb5 | |||
| 106c6e4d65 | |||
| 8c848401a6 | |||
| ee46dd8417 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -60,3 +60,4 @@ values-dev.yaml
|
||||
|
||||
*.tsbuildinfo
|
||||
|
||||
*venv/
|
||||
1
py_package/browser_agent/.python-version
Normal file
1
py_package/browser_agent/.python-version
Normal file
@ -0,0 +1 @@
|
||||
3.12
|
||||
0
py_package/browser_agent/__init__.py
Normal file
0
py_package/browser_agent/__init__.py
Normal file
30
py_package/browser_agent/agent.py
Normal file
30
py_package/browser_agent/agent.py
Normal file
@ -0,0 +1,30 @@
|
||||
from pydantic import BaseModel
|
||||
from browser_agent.index import RunBrowserUseAgentCtx,LLMConfig
|
||||
from typing import AsyncGenerator,Dict,Optional
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime
|
||||
from stream_helper.schema import SSEData
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
class BrowserAgentBase(BaseModel,ABC):
|
||||
query: str
|
||||
conversation_id: str = ''
|
||||
llm: BaseChatModel
|
||||
browser_session_endpoint: str
|
||||
endpoint_header: Dict[str, str] = {}
|
||||
max_steps: int = 20
|
||||
system_prompt: str = None
|
||||
extend_prompt: str = None
|
||||
@abstractmethod
|
||||
async def save_cookies(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_llm(self)->BaseChatModel:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_system_prompt(self)->str:
|
||||
pass
|
||||
|
||||
async def run(self)->AsyncGenerator[SSEData,None]:
|
||||
pass
|
||||
158
py_package/browser_agent/browser.py
Normal file
158
py_package/browser_agent/browser.py
Normal file
@ -0,0 +1,158 @@
|
||||
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
||||
# Licensed under the 【火山方舟】原型应用软件自用许可协议
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://www.volcengine.com/docs/82379/1433703
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import aiohttp
|
||||
from playwright.async_api import async_playwright
|
||||
from playwright.async_api import Browser
|
||||
from playwright.async_api._generated import Playwright as AsyncPlaywright
|
||||
|
||||
browser_ready_event = asyncio.Event()
|
||||
|
||||
class BrowserWrapper:
|
||||
def __init__(self, port, browser: Browser, playwright: AsyncPlaywright, remote_browser_id: str = None, endpoint: str = None):
|
||||
self.port = port
|
||||
self.browser = browser
|
||||
self.playwright = playwright
|
||||
self.remote_browser_id = remote_browser_id
|
||||
self.endpoint = endpoint
|
||||
|
||||
async def stop(self):
|
||||
if self.browser:
|
||||
logging.info(f"Closing browser on port {self.port}...")
|
||||
# try:
|
||||
# for context in self.browser.contexts:
|
||||
# await context.close()
|
||||
# except Exception as e:
|
||||
# logging.error(f"Error closing contexts: {e}")
|
||||
await self.browser.close()
|
||||
logging.info(f"Browser on port {self.port} closed successfully")
|
||||
if self.playwright:
|
||||
logging.info(f"Closing playwright session {self.port}...")
|
||||
await self.playwright.stop()
|
||||
logging.info(
|
||||
f"Paywright session on port {self.port} closed successfully")
|
||||
if self.remote_browser_id and self.endpoint:
|
||||
logging.info(f"Closing remote browser {self.remote_browser_id}...")
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
delete_url = f"{self.endpoint}/{self.remote_browser_id}"
|
||||
async with session.delete(delete_url, timeout=30) as response:
|
||||
if response.status == 200:
|
||||
logging.info(f"Remote browser {self.remote_browser_id} closed successfully")
|
||||
else:
|
||||
error_text = await response.text()
|
||||
logging.error(f"Failed to close remote browser. Status: {response.status}, Error: {error_text}")
|
||||
except Exception as e:
|
||||
logging.error(f"Error closing remote browser {self.remote_browser_id}: {e}")
|
||||
|
||||
|
||||
async def start_local_browser(port):
|
||||
logging.info(f"Attempting to start browser on port {port}")
|
||||
p = None
|
||||
browser = None
|
||||
|
||||
try:
|
||||
p = await async_playwright().start()
|
||||
|
||||
w = BrowserWrapper(port, None, p)
|
||||
|
||||
try:
|
||||
# Configure proxy if environment variables are set
|
||||
proxy_config = None
|
||||
proxy_server = os.getenv('PROXY_SERVER')
|
||||
if proxy_server:
|
||||
proxy_config = {
|
||||
'server': proxy_server,
|
||||
'bypass': '127.0.0.1,localhost'
|
||||
}
|
||||
|
||||
browser = await p.chromium.launch(
|
||||
# executable_path="/opt/chromium.org/browser_use/chromium/chromium-browser-use",
|
||||
headless=False,
|
||||
args=[
|
||||
f'--remote-debugging-port={port}',
|
||||
'--remote-allow-origins=*',
|
||||
'--remote-debugging-address=0.0.0.0',
|
||||
'--no-sandbox'
|
||||
],
|
||||
proxy=proxy_config
|
||||
)
|
||||
|
||||
w = BrowserWrapper(port, browser, p)
|
||||
|
||||
logging.info(f"Browser launched successfully on port {port}")
|
||||
|
||||
# Verify CDP is actually running
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(f"http://127.0.0.1:{port}/json/version", timeout=5) as response:
|
||||
if response.status == 200:
|
||||
version_info = await response.json()
|
||||
logging.info(
|
||||
f"successfully connected to cdp: {version_info}")
|
||||
else:
|
||||
logging.error(
|
||||
f"Failed to get CDP version. Status: {response.status}")
|
||||
except Exception as cdp_e:
|
||||
logging.error(f"Error checking CDP availability: {cdp_e}")
|
||||
|
||||
logging.info('closing playwright driver and browser')
|
||||
await w.stop()
|
||||
raise
|
||||
return BrowserWrapper(port, browser, p)
|
||||
|
||||
except Exception as launch_e:
|
||||
logging.error(f"Failed to launch browser: {launch_e}")
|
||||
browser_ready_event.clear()
|
||||
|
||||
logging.info('closing playwright driver and browser')
|
||||
await w.stop()
|
||||
raise
|
||||
except Exception as p_e:
|
||||
logging.error(f"Playwright initialization error: {p_e}")
|
||||
browser_ready_event.clear()
|
||||
|
||||
logging.info('closing playwright driver and browser')
|
||||
await w.stop()
|
||||
raise
|
||||
|
||||
async def start_remote_browser(endpoint):
|
||||
logging.info(f"Attempting to create browser via remote endpoint: {endpoint}")
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
data = {"timeout": 30}
|
||||
|
||||
async with session.post(
|
||||
endpoint,
|
||||
json=data,
|
||||
headers={'Content-Type': 'application/json'},
|
||||
timeout=30
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
result = await response.json()
|
||||
logging.info(f"Browser created successfully: {result}")
|
||||
id_value = result.get('id', None)
|
||||
if id_value is None:
|
||||
result['id'] = 'id'
|
||||
return BrowserWrapper(None, None, None, result['id'], endpoint)
|
||||
else:
|
||||
error_text = await response.text()
|
||||
logging.error(f"Failed to create browser. Status: {response.status}, Error: {error_text}")
|
||||
raise Exception(f"Failed to create browser: {response.status} - {error_text}")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error creating remote browser: {e}")
|
||||
raise
|
||||
|
||||
@ -0,0 +1,6 @@
|
||||
from browser_use import Agent
|
||||
|
||||
class MyAgent(Agent):
|
||||
# browser use 0.2.5版本 移除模型检测
|
||||
def _test_tool_calling_method(self, method: str) -> bool:
|
||||
return True
|
||||
185
py_package/browser_agent/browser_use_custom/controller/screen.py
Normal file
185
py_package/browser_agent/browser_use_custom/controller/screen.py
Normal file
@ -0,0 +1,185 @@
|
||||
import asyncio
|
||||
import io,base64
|
||||
import logging,time
|
||||
from typing import Optional, Tuple
|
||||
from PIL import Image, ImageChops
|
||||
import numpy as np
|
||||
from pydantic import BaseModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class VisualChangeDetector:
|
||||
"""视觉变化检测器(基于屏幕截图比对)"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
similarity_threshold: float = 0.95,
|
||||
pixel_change_threshold: int = 1000,
|
||||
resize_width: Optional[int] = 800
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
similarity_threshold: 相似度阈值(0-1)
|
||||
pixel_change_threshold: 变化像素数量阈值
|
||||
resize_width: 缩放宽度(提升性能)
|
||||
"""
|
||||
self.similarity_threshold = similarity_threshold
|
||||
self.pixel_threshold = pixel_change_threshold
|
||||
self.resize_width = resize_width
|
||||
|
||||
async def capture_screenshot(self, page, upload_service=None,logger=None) -> Image.Image:
|
||||
"""捕获页面截图(自动优化)"""
|
||||
screenshot_bytes = await page.screenshot(timeout=10000,type='jpeg',quality=50,full_page=False,animations='disabled')
|
||||
img = Image.open(io.BytesIO(screenshot_bytes))
|
||||
|
||||
# 统一缩放尺寸(提升比对性能)
|
||||
if self.resize_width:
|
||||
w, h = img.size
|
||||
new_h = int(h * (self.resize_width / w))
|
||||
img = img.resize((self.resize_width, new_h))
|
||||
# if upload_service:
|
||||
# img_byte_arr = io.BytesIO()
|
||||
# img.save(img_byte_arr, format='JPEG')
|
||||
# binary_data = img_byte_arr.getvalue()
|
||||
# if not binary_data:
|
||||
# return
|
||||
# file_name = f"screenshot_{int(time.time())}.jpg"
|
||||
# base64_str = base64.b64encode(binary_data).decode('ascii')
|
||||
# if logger:
|
||||
# logger.info(f'upload screenshot to {file_name}')
|
||||
# else:
|
||||
# logging.info(f'upload screenshot to {file_name}')
|
||||
# try:
|
||||
# await upload_service.upload_file('',file_name,base64_content=base64_str)
|
||||
# except Exception as e:
|
||||
# if logger:
|
||||
# logger.error(f"Failed to upload screenshot to {file_name}: {e}")
|
||||
# else:
|
||||
# logging.error(f"Failed to upload screenshot to {file_name}: {e}")
|
||||
# return img.convert('RGB')
|
||||
return img.convert('RGB') # 确保RGB模式
|
||||
|
||||
def calculate_change(
|
||||
self,
|
||||
img1: Image.Image,
|
||||
img2: Image.Image
|
||||
) -> Tuple[float, Image.Image]:
|
||||
"""
|
||||
计算两图差异
|
||||
|
||||
Returns:
|
||||
tuple: (相似度百分比, 差异图)
|
||||
"""
|
||||
# 确保尺寸一致
|
||||
if img1.size != img2.size:
|
||||
img2 = img2.resize(img1.size)
|
||||
|
||||
arr1 = np.array(img1) # shape: (height, width, 3)
|
||||
arr2 = np.array(img2) # shape: (height, width, 3)
|
||||
|
||||
# 计算绝对差异(每个通道单独计算)
|
||||
diff = np.abs(arr1.astype(int) - arr2.astype(int))
|
||||
|
||||
# 变化像素计算(任一通道有变化即视为该像素变化)
|
||||
# 先检查每个像素的3个通道是否有变化
|
||||
pixel_changed = np.any(diff > 0, axis=2) # shape: (height, width)
|
||||
changed_pixels = np.sum(pixel_changed) # 变化像素总数
|
||||
|
||||
# 总像素数(不需要除以3,因为pixel_changed已经是像素级别的判断)
|
||||
total_pixels = arr1.shape[0] * arr1.shape[1]
|
||||
|
||||
# 生成差异图(可视化用)
|
||||
diff_img = ImageChops.difference(img1, img2)
|
||||
|
||||
# 计算相似度
|
||||
similarity = 1 - (changed_pixels / total_pixels)
|
||||
|
||||
return similarity, diff_img
|
||||
|
||||
async def detect_change(
|
||||
self,
|
||||
browser_session,
|
||||
reference_img: Optional[Image.Image] = None,
|
||||
max_attempts: int = 5,
|
||||
attempt_interval: float = 1.5,
|
||||
upload_service = None,
|
||||
logger = None
|
||||
) -> Tuple[bool, Optional[Image.Image], Optional[Image.Image]]:
|
||||
"""
|
||||
检测视觉变化
|
||||
|
||||
Args:
|
||||
page: 浏览器页面对象
|
||||
reference_img: 基准截图(None则自动捕获)
|
||||
max_attempts: 最大检测次数
|
||||
attempt_interval: 检测间隔(秒)
|
||||
|
||||
Returns:
|
||||
tuple: (是否变化, 基准截图, 差异图)
|
||||
"""
|
||||
# 首次捕获基准图
|
||||
if reference_img is None:
|
||||
page = await browser_session.get_current_page()
|
||||
reference_img = await self.capture_screenshot(page,upload_service=upload_service,logger=logger)
|
||||
logger.info("start detect change")
|
||||
for attempt in range(max_attempts):
|
||||
await asyncio.sleep(attempt_interval)
|
||||
|
||||
# 捕获当前截图
|
||||
page = await browser_session.get_current_page()
|
||||
current_img = await self.capture_screenshot(page,upload_service=upload_service,logger=logger)
|
||||
if not current_img:
|
||||
logger.error(f"Failed to capture screenshot on attempt {attempt + 1}")
|
||||
continue
|
||||
# 计算变化
|
||||
similarity, diff_img = self.calculate_change(reference_img, current_img)
|
||||
logger.info(f"Attempt {attempt + 1}: 相似度 {similarity:.2f}, 变化像素 {np.sum(np.array(diff_img) > 0)}")
|
||||
# 判断是否显著变化
|
||||
if similarity < self.similarity_threshold:
|
||||
diff_pixels = np.sum(np.array(diff_img) > 0)
|
||||
if diff_pixels > self.pixel_threshold:
|
||||
logger.info(f"视觉变化 detected (相似度: {similarity:.2f}, 变化像素: {diff_pixels})")
|
||||
return True, reference_img, diff_img
|
||||
|
||||
return False, reference_img, None
|
||||
|
||||
class WaitForVisualChangeAction(BaseModel):
|
||||
"""等待视觉变化的参数模型"""
|
||||
timeout: int = 30
|
||||
check_interval: float = 2
|
||||
similarity_threshold: float = 0.95
|
||||
pixel_threshold: int = 5000
|
||||
|
||||
async def wait_for_visual_change(
|
||||
params: WaitForVisualChangeAction,
|
||||
browser_session,
|
||||
initial_screenshot: Optional[Image.Image] = None,
|
||||
upload_service = None,
|
||||
logger = None
|
||||
) -> Tuple[bool, Optional[Image.Image], Optional[Image.Image]]:
|
||||
"""
|
||||
等待页面视觉变化(完整工作流)
|
||||
|
||||
Args:
|
||||
params: 配置参数
|
||||
browser_session: 浏览器会话
|
||||
initial_screenshot: 初始截图(可选)
|
||||
|
||||
Returns:
|
||||
tuple: (是否变化, 初始截图, 差异图)
|
||||
"""
|
||||
detector = VisualChangeDetector(
|
||||
similarity_threshold=params.similarity_threshold,
|
||||
pixel_change_threshold=params.pixel_threshold
|
||||
)
|
||||
|
||||
max_attempts = int(params.timeout / params.check_interval)
|
||||
|
||||
return await detector.detect_change(
|
||||
browser_session=browser_session,
|
||||
reference_img=initial_screenshot,
|
||||
max_attempts=max_attempts,
|
||||
attempt_interval=params.check_interval,
|
||||
upload_service=upload_service,
|
||||
logger=logger,
|
||||
)
|
||||
@ -0,0 +1,204 @@
|
||||
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
||||
# Licensed under the 【火山方舟】原型应用软件自用许可协议
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://www.volcengine.com/docs/82379/1433703
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import io
|
||||
import logging
|
||||
from typing import Optional, Tuple
|
||||
from PIL import Image, ImageChops
|
||||
import numpy as np
|
||||
from pydantic import BaseModel
|
||||
import logging
|
||||
import asyncio
|
||||
import re
|
||||
import markdownify
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Optional
|
||||
from browser_use.browser import BrowserSession
|
||||
from browser_use.controller.views import SearchGoogleAction
|
||||
from browser_use.agent.views import ActionResult
|
||||
from browser_use.controller.service import Controller
|
||||
from playwright.async_api import Page
|
||||
from pydantic import BaseModel,Field
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from browser_agent.browser_use_custom.i18n import _
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
from browser_agent.browser_use_custom.controller.screen import VisualChangeDetector,wait_for_visual_change,WaitForVisualChangeAction
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class PauseAction(BaseModel):
|
||||
reason: str
|
||||
|
||||
class WaitForLoginAction(BaseModel):
|
||||
"""Action parameters for waiting for login completion."""
|
||||
timeout: int = Field(
|
||||
default=300,
|
||||
description="Maximum time to wait for login completion in seconds"
|
||||
)
|
||||
check_interval: int = Field(
|
||||
default=5,
|
||||
description="Interval between checks for URL changes in seconds"
|
||||
)
|
||||
|
||||
class MyController(Controller):
|
||||
"""Custom controller extending base Controller with additional actions.
|
||||
|
||||
Features:
|
||||
- Inherits core controller functionality
|
||||
- Adds custom pause action handler
|
||||
- Maintains action registry with exclusion support
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
exclude_actions: list[str] = [],
|
||||
output_model: type[BaseModel] | None = None,
|
||||
):
|
||||
super().__init__(exclude_actions, output_model)
|
||||
# Basic Navigation Actions
|
||||
@self.registry.action(
|
||||
_('Search the query in Baidu in the current tab, the query should be a search query like humans search in Baidu, concrete and not vague or super long. More the single most important items.'),
|
||||
param_model=SearchGoogleAction,
|
||||
)
|
||||
async def search_google(params: SearchGoogleAction, browser_session: BrowserSession):
|
||||
search_url = f'https://www.baidu.com/s?wd={params.query}'
|
||||
|
||||
page = await browser_session.get_current_page()
|
||||
await page.goto(search_url)
|
||||
await page.wait_for_load_state()
|
||||
msg = _('🔍 Searched for "{query}" in Baidu').format(query=params.query)
|
||||
logger.info(msg)
|
||||
return ActionResult(extracted_content=msg, include_in_memory=True)
|
||||
# Content Actions
|
||||
@self.registry.action(
|
||||
_('Extract page content to retrieve specific information from the page, e.g. all company names, a specific description, all information about xyc, 4 links with companies in structured format. Use include_links true if the goal requires links'),
|
||||
)
|
||||
async def extract_content(
|
||||
goal: str,
|
||||
page: Page,
|
||||
page_extraction_llm: BaseChatModel,
|
||||
include_links: bool = False,
|
||||
):
|
||||
raw_content = await page.content()
|
||||
soup = BeautifulSoup(
|
||||
raw_content, 'html.parser')
|
||||
# remove all unnecessary http metadata
|
||||
for s in soup.select('script'):
|
||||
s.decompose()
|
||||
for s in soup.select('style'):
|
||||
s.decompose()
|
||||
for s in soup.select('textarea'):
|
||||
s.decompose()
|
||||
for s in soup.select('img'):
|
||||
s.decompose()
|
||||
for s in soup.find_all(style=re.compile("background-image.*")):
|
||||
s.decompose()
|
||||
content = markdownify.markdownify(str(soup))
|
||||
|
||||
# manually append iframe text into the content so it's readable by the LLM (includes cross-origin iframes)
|
||||
for iframe in page.frames:
|
||||
if iframe.url != page.url and not iframe.url.startswith('data:'):
|
||||
content += f'\n\nIFRAME {iframe.url}:\n'
|
||||
content += markdownify.markdownify(await iframe.content())
|
||||
|
||||
prompt = _('Your task is to extract the content of the page. You will be given a page and a goal and you should extract all relevant information around this goal from the page. If the goal is vague, summarize the page. Respond in json format. Extraction goal: {goal}, Page: {page}')
|
||||
template = PromptTemplate(input_variables=['goal', 'page'], template=prompt)
|
||||
try:
|
||||
output = await page_extraction_llm.ainvoke(template.format(goal=goal, page=content))
|
||||
msg = _('📄 Extracted from page\n: {content}\n').format(content=output.content)
|
||||
logger.info(msg)
|
||||
return ActionResult(extracted_content=msg, include_in_memory=True)
|
||||
except Exception as e:
|
||||
logger.debug(_('Error extracting content: {error}').format(error=e))
|
||||
msg = _('📄 Extracted from page\n: {content}\n').format(content=content)
|
||||
logger.info(msg)
|
||||
return ActionResult(extracted_content=msg)
|
||||
|
||||
@self.registry.action(
|
||||
_('Pause agent'),
|
||||
param_model=PauseAction,
|
||||
)
|
||||
async def pause(params: PauseAction):
|
||||
msg = _('👩 Pause agent, reason: {reason}').format(reason=params.reason)
|
||||
logger.info(msg)
|
||||
return ActionResult(extracted_content=msg, include_in_memory=True)
|
||||
# Login detection and waiting action
|
||||
# @self.registry.action(
|
||||
# _('Detects if current page requires login and waits for authentication to complete.Wait for login completion by monitoring URL changes.'),
|
||||
# param_model=WaitForLoginAction,
|
||||
# )
|
||||
# async def wait_for_login(params: WaitForLoginAction, browser_session: BrowserSession):
|
||||
# page = await browser_session.get_current_page()
|
||||
|
||||
# # Get initial URL for comparison
|
||||
# initial_url = page.url
|
||||
# logger.info(_('🔐 Starting login detection. Initial URL: {url}').format(url=initial_url))
|
||||
|
||||
# # Wait for URL change indicating login completion
|
||||
# msg = _('🔐 Login page detected. Waiting for authentication completion (max {timeout}s)...').format(
|
||||
# timeout=params.timeout
|
||||
# )
|
||||
# logger.info(msg)
|
||||
|
||||
# final_url = await self._wait_for_url_change(
|
||||
# page,
|
||||
# initial_url,
|
||||
# params.timeout,
|
||||
# params.check_interval
|
||||
# )
|
||||
|
||||
# if final_url and final_url != initial_url:
|
||||
# success_msg = _('✅ Login completed successfully! URL changed from {initial} to {final}').format(
|
||||
# initial=initial_url,
|
||||
# final=final_url
|
||||
# )
|
||||
# logger.info(success_msg)
|
||||
# return ActionResult(
|
||||
# extracted_content=success_msg,
|
||||
# include_in_memory=True,
|
||||
# success=True
|
||||
# )
|
||||
# else:
|
||||
# timeout_msg = _('⏰ Login timeout or no URL change detected after {timeout} seconds').format(
|
||||
# timeout=params.timeout
|
||||
# )
|
||||
# logger.warning(timeout_msg)
|
||||
# return ActionResult(
|
||||
# extracted_content=timeout_msg,
|
||||
# include_in_memory=True,
|
||||
# success=False
|
||||
# )
|
||||
|
||||
# async def _wait_for_url_change(
|
||||
# self,
|
||||
# page: Page,
|
||||
# initial_url: str,
|
||||
# timeout: int,
|
||||
# check_interval: int
|
||||
# ) -> Optional[str]:
|
||||
# """Wait for URL change indicating login completion."""
|
||||
# start_time = asyncio.get_event_loop().time()
|
||||
|
||||
# while (asyncio.get_event_loop().time() - start_time) < timeout:
|
||||
# try:
|
||||
# current_url = page.url
|
||||
|
||||
# # If URL has changed, return the new URL
|
||||
# if current_url != initial_url:
|
||||
# return current_url
|
||||
|
||||
# # Wait before checking again
|
||||
# await asyncio.sleep(check_interval)
|
||||
|
||||
# except Exception as e:
|
||||
# logger.warning(_('Error checking URL change: {error}').format(error=str(e)))
|
||||
# await asyncio.sleep(check_interval)
|
||||
|
||||
# return None # Timeout reached without URL change
|
||||
127
py_package/browser_agent/browser_use_custom/i18n/__init__.py
Normal file
127
py_package/browser_agent/browser_use_custom/i18n/__init__.py
Normal file
@ -0,0 +1,127 @@
|
||||
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
||||
# Licensed under the 【火山方舟】原型应用软件自用许可协议
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://www.volcengine.com/docs/82379/1433703
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import gettext
|
||||
import os
|
||||
import copy
|
||||
from typing import Optional
|
||||
|
||||
# Default language
|
||||
_current_language = 'zh-CN'
|
||||
_translator = None
|
||||
|
||||
def get_locales_dir():
|
||||
"""Get the locales directory path"""
|
||||
return os.path.join(os.path.dirname(__file__), 'locales')
|
||||
|
||||
def set_language(language: str = 'zh-CN'):
|
||||
"""Set the current language for translations"""
|
||||
global _current_language, _translator
|
||||
_current_language = language
|
||||
|
||||
locales_dir = get_locales_dir()
|
||||
|
||||
try:
|
||||
# Try to load the translation
|
||||
translation = gettext.translation(
|
||||
'vefaas_browser_use',
|
||||
localedir=locales_dir,
|
||||
languages=[language],
|
||||
fallback=True
|
||||
)
|
||||
_translator = translation
|
||||
except Exception:
|
||||
# Fallback to NullTranslations if translation not found
|
||||
_translator = gettext.NullTranslations()
|
||||
|
||||
def get_language():
|
||||
"""Get the current language"""
|
||||
return _current_language
|
||||
|
||||
def _(message: str) -> str:
|
||||
"""Translate a message"""
|
||||
if _translator is None:
|
||||
set_language(_current_language)
|
||||
return _translator.gettext(message)
|
||||
|
||||
def get_available_languages():
|
||||
"""Get list of available languages"""
|
||||
locales_dir = get_locales_dir()
|
||||
if not os.path.exists(locales_dir):
|
||||
return ['zh-CN']
|
||||
|
||||
languages = []
|
||||
for item in os.listdir(locales_dir):
|
||||
lang_dir = os.path.join(locales_dir, item)
|
||||
if os.path.isdir(lang_dir):
|
||||
mo_file = os.path.join(lang_dir, 'LC_MESSAGES', 'vefaas_browser_use.mo')
|
||||
if os.path.exists(mo_file):
|
||||
languages.append(item)
|
||||
|
||||
return languages if languages else ['zh-CN']
|
||||
|
||||
def translate_planning_step_data(conversation_update: dict) -> dict:
|
||||
"""Translate planning step data fields using pattern matching"""
|
||||
import re
|
||||
translated_update = copy.deepcopy(conversation_update)
|
||||
|
||||
# Translate evaluation field
|
||||
if 'evaluation' in translated_update:
|
||||
evaluation = translated_update['evaluation']
|
||||
# Basic status translations
|
||||
if evaluation == "Unknown":
|
||||
translated_update['evaluation'] = _("Unknown")
|
||||
elif evaluation == "Failed":
|
||||
translated_update['evaluation'] = _("Failed")
|
||||
elif evaluation == "Success":
|
||||
translated_update['evaluation'] = _("Success")
|
||||
# Specific pattern translations
|
||||
elif evaluation == "Unknown - The task has just started, and no actions have been taken yet.":
|
||||
translated_update['evaluation'] = _("Unknown - The task has just started, and no actions have been taken yet.")
|
||||
elif evaluation == "Unknown - The task has not yet begun as the current page is blank.":
|
||||
translated_update['evaluation'] = _("Unknown - The task has not yet begun as the current page is blank.")
|
||||
elif evaluation == "Success - Successfully navigated to Google's homepage.":
|
||||
translated_update['evaluation'] = _("Success - Successfully navigated to Google's homepage.")
|
||||
elif evaluation == "Failed - The search was blocked by Google's CAPTCHA verification.":
|
||||
translated_update['evaluation'] = _("Failed - The search was blocked by Google's CAPTCHA verification.")
|
||||
# General pattern matching for CAPTCHA-related failures
|
||||
elif "CAPTCHA" in evaluation and evaluation.startswith("Failed"):
|
||||
translated_update['evaluation'] = _("Failed - The search was blocked by a CAPTCHA verification page.")
|
||||
# Partial translation for status prefixes
|
||||
elif evaluation.startswith("Success - "):
|
||||
translated_update['evaluation'] = evaluation.replace("Success", _("Success"), 1)
|
||||
elif evaluation.startswith("Failed - "):
|
||||
translated_update['evaluation'] = evaluation.replace("Failed", _("Failed"), 1)
|
||||
elif evaluation.startswith("Unknown - "):
|
||||
translated_update['evaluation'] = evaluation.replace("Unknown", _("Unknown"), 1)
|
||||
|
||||
# Translate goal field with pattern matching
|
||||
if 'goal' in translated_update:
|
||||
goal = translated_update['goal']
|
||||
if goal == "Pause execution due to CAPTCHA verification.":
|
||||
translated_update['goal'] = _("Pause execution due to CAPTCHA verification.")
|
||||
# Add more general goal patterns as needed
|
||||
|
||||
# Translate actions if they contain pause reasons
|
||||
if 'actions' in translated_update:
|
||||
for action in translated_update['actions']:
|
||||
if 'pause' in action and 'reason' in action['pause']:
|
||||
reason = action['pause']['reason']
|
||||
if reason == "CAPTCHA verification required. Human intervention is needed to proceed.":
|
||||
action['pause']['reason'] = _("CAPTCHA verification required. Human intervention is needed to proceed.")
|
||||
elif "CAPTCHA" in reason and "verification required" in reason:
|
||||
# General CAPTCHA verification pattern
|
||||
action['pause']['reason'] = _("CAPTCHA verification required. Human intervention is needed to proceed.")
|
||||
|
||||
return translated_update
|
||||
|
||||
# Initialize with default language
|
||||
set_language('zh-CN')
|
||||
Binary file not shown.
@ -0,0 +1,178 @@
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: vefaas-browser-use\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2025-01-04 12:00+0000\n"
|
||||
"PO-Revision-Date: 2025-01-04 12:00+0000\n"
|
||||
"Last-Translator: Auto Generated\n"
|
||||
"Language-Team: Chinese (Simplified)\n"
|
||||
"Language: zh-CN\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Plural-Forms: nplurals=1; plural=0;\n"
|
||||
|
||||
# Task management messages
|
||||
msgid "Task auto-stopped after 1 minute in paused state"
|
||||
msgstr "任务在暂停状态1分钟后自动停止"
|
||||
|
||||
# Controller action descriptions
|
||||
msgid "Search the query in Baidu in the current tab, the query should be a search query like humans search in Baidu, concrete and not vague or super long. More the single most important items."
|
||||
msgstr "在当前标签页的百度中搜索查询,查询应该是像人类在百度中搜索的搜索查询,具体而不模糊或过长。更多的单个最重要的项目。"
|
||||
|
||||
msgid "🔍 Searched for \"{query}\" in Baidu"
|
||||
msgstr "🔍 在百度搜索了\"{query}\""
|
||||
|
||||
msgid "Pause agent"
|
||||
msgstr "暂停代理"
|
||||
|
||||
msgid "👩 Pause agent, reason: {reason}"
|
||||
msgstr "👩 暂停代理,原因:{reason}"
|
||||
|
||||
msgid "Extract page content to retrieve specific information from the page, e.g. all company names, a specific description, all information about xyc, 4 links with companies in structured format. Use include_links true if the goal requires links"
|
||||
msgstr "提取页面内容以从页面中检索特定信息,例如所有公司名称、特定描述、关于xyc的所有信息、4个具有结构化格式公司的链接。如果目标需要链接,请使用include_links true"
|
||||
|
||||
msgid "Your task is to extract the content of the page. You will be given a page and a goal and you should extract all relevant information around this goal from the page. If the goal is vague, summarize the page. Respond in json format. Extraction goal: {goal}, Page: {page}"
|
||||
msgstr "您的任务是提取页面的内容。您将获得一个页面和一个目标,您应该从页面中提取围绕此目标的所有相关信息。如果目标模糊,请总结页面。以json格式响应。提取目标:{goal},页面:{page}"
|
||||
|
||||
msgid "📄 Extracted from page\n: {content}\n"
|
||||
msgstr "📄 从页面提取\n:{content}\n"
|
||||
|
||||
msgid "Error extracting content: {error}"
|
||||
msgstr "提取内容错误:{error}"
|
||||
|
||||
# SSE messages
|
||||
msgid "Starting new step..."
|
||||
msgstr "开始新步骤..."
|
||||
|
||||
msgid "Taken action #{number}: {action}"
|
||||
msgstr "执行动作 #{number}:{action}"
|
||||
|
||||
msgid "Agent creation failed: {error}"
|
||||
msgstr "代理创建失败:{error}"
|
||||
|
||||
msgid "Agent execution failed: {error}"
|
||||
msgstr "代理执行失败:{error}"
|
||||
|
||||
# Planning step status messages
|
||||
msgid "Unknown"
|
||||
msgstr "未知"
|
||||
|
||||
msgid "Failed"
|
||||
msgstr "失败"
|
||||
|
||||
msgid "Success"
|
||||
msgstr "成功"
|
||||
|
||||
# Common action result messages
|
||||
msgid "Unknown - The task has just started, and no actions have been taken yet."
|
||||
msgstr "未知 - 任务刚刚开始,尚未执行任何操作。"
|
||||
|
||||
msgid "Failed - The attempt to search for 'MCP news' was blocked by a CAPTCHA verification page. This requires human intervention to proceed."
|
||||
msgstr "失败 - 搜索'MCP news'的尝试被验证码页面阻止。需要人工干预才能继续。"
|
||||
|
||||
msgid "CAPTCHA verification required to proceed with the search for MCP news. Please solve the CAPTCHA to continue."
|
||||
msgstr "需要验证码验证才能继续搜索MCP新闻。请解决验证码以继续。"
|
||||
|
||||
# Dynamic evaluation patterns
|
||||
msgid "Unknown - The task has not yet begun as the current page is blank."
|
||||
msgstr "未知 - 任务尚未开始,因为当前页面为空白。"
|
||||
|
||||
msgid "Success - Successfully navigated to Google's homepage."
|
||||
msgstr "成功 - 成功导航到Google主页。"
|
||||
|
||||
# General pause patterns
|
||||
msgid "Pause execution due to CAPTCHA verification."
|
||||
msgstr "由于验证码验证而暂停执行。"
|
||||
|
||||
# General evaluation patterns
|
||||
msgid "Failed - The search was blocked by Google's CAPTCHA verification."
|
||||
msgstr "失败 - 搜索被Google的验证码验证阻止。"
|
||||
|
||||
msgid "Failed - The search was blocked by a CAPTCHA verification page."
|
||||
msgstr "失败 - 搜索被验证码页面阻止。"
|
||||
|
||||
# General pause reasons
|
||||
msgid "CAPTCHA verification required. Human intervention is needed to proceed."
|
||||
msgstr "需要验证码验证。需要人工干预才能继续。"
|
||||
|
||||
msgid "pause"
|
||||
msgstr "暂停"
|
||||
|
||||
msgid "reason"
|
||||
msgstr "原因"
|
||||
|
||||
msgid "Human intervention required to solve it before proceeding with the task."
|
||||
msgstr "需要人工干预解决后才能继续执行任务。"
|
||||
|
||||
# CAPTCHA detection messages
|
||||
msgid "CAPTCHA detected on Amazon.com. Human intervention required to solve it before proceeding with the task."
|
||||
msgstr "在Amazon.com检测到验证码。需要人工干预解决后才能继续执行任务。"
|
||||
|
||||
msgid "Pause execution and request human intervention to solve the CAPTCHA"
|
||||
msgstr "暂停执行并请求人工干预解决验证码"
|
||||
|
||||
# Task status messages
|
||||
msgid "Task started."
|
||||
msgstr "任务已开始。"
|
||||
|
||||
msgid "Task started:"
|
||||
msgstr "任务已开始:"
|
||||
|
||||
msgid "Pause the task and wait for human intervention to complete the CAPTCHA verification."
|
||||
msgstr "暂停任务并等待人工干预完成验证码验证。"
|
||||
|
||||
# Extraction instruction
|
||||
msgid "Extract the titles, sources, and summaries of"
|
||||
msgstr "提取标题、来源和摘要"
|
||||
|
||||
# Initial task status messages
|
||||
msgid "No previous actions have been taken yet."
|
||||
msgstr "尚未执行任何操作。"
|
||||
|
||||
msgid "Submit the search query for"
|
||||
msgstr "提交搜索查询"
|
||||
|
||||
# Key instruction messages
|
||||
msgid "'keys': 'Enter'"
|
||||
msgstr "'输入键': '回车'"
|
||||
|
||||
msgid "error"
|
||||
msgstr "失败"
|
||||
|
||||
# Status/State translations
|
||||
msgid "queued"
|
||||
msgstr "排队中"
|
||||
|
||||
msgid "starting"
|
||||
msgstr "开始执行"
|
||||
|
||||
msgid "browser_initialized"
|
||||
msgstr "浏览器初始化"
|
||||
|
||||
msgid "running"
|
||||
msgstr "执行中"
|
||||
|
||||
msgid "failed"
|
||||
msgstr "失败"
|
||||
|
||||
msgid "agent_initialized"
|
||||
msgstr "agent 初始化"
|
||||
|
||||
msgid "completed"
|
||||
msgstr "完成"
|
||||
|
||||
msgid "go_to_url"
|
||||
msgstr "访问网址"
|
||||
|
||||
msgid "click_element_by_index"
|
||||
msgstr "按索引点击元素"
|
||||
|
||||
msgid "index"
|
||||
msgstr "索引"
|
||||
|
||||
msgid "include_links"
|
||||
msgstr "包含链接"
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,116 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
||||
# Licensed under the 【火山方舟】原型应用软件自用许可协议
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://www.volcengine.com/docs/82379/1433703
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Translation management script for vefaas-browser-use
|
||||
|
||||
Usage:
|
||||
python scripts/update_translations.py compile # Compile .po files to .mo files
|
||||
python scripts/update_translations.py test # Test translations
|
||||
"""
|
||||
|
||||
import sys
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
def get_project_root():
|
||||
"""Get the project root directory"""
|
||||
return Path(__file__).parent.parent
|
||||
|
||||
def compile_translations():
|
||||
"""Compile all .po files to .mo files"""
|
||||
project_root = get_project_root()
|
||||
locales_dir = project_root / "my_browser_use" / "locales"
|
||||
|
||||
if not locales_dir.exists():
|
||||
print(f"Locales directory not found: {locales_dir}")
|
||||
return False
|
||||
|
||||
success = True
|
||||
for lang_dir in locales_dir.iterdir():
|
||||
if lang_dir.is_dir() and lang_dir.name != "__pycache__":
|
||||
po_file = lang_dir / "LC_MESSAGES" / "vefaas_browser_use.po"
|
||||
mo_file = lang_dir / "LC_MESSAGES" / "vefaas_browser_use.mo"
|
||||
|
||||
if po_file.exists():
|
||||
print(f"Compiling {lang_dir.name}...")
|
||||
try:
|
||||
subprocess.run([
|
||||
"msgfmt", "-o", str(mo_file), str(po_file)
|
||||
], check=True)
|
||||
print(f" ✓ Compiled {lang_dir.name}")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f" ✗ Failed to compile {lang_dir.name}: {e}")
|
||||
success = False
|
||||
else:
|
||||
print(f" ⚠ No .po file found for {lang_dir.name}")
|
||||
|
||||
return success
|
||||
|
||||
def test_translations():
|
||||
"""Test the translation system"""
|
||||
sys.path.insert(0, str(get_project_root()))
|
||||
|
||||
try:
|
||||
from browser_agent.browser_use_custom.i18n import _, set_language, get_available_languages
|
||||
|
||||
print("Testing translation system...")
|
||||
print(f"Available languages: {get_available_languages()}")
|
||||
|
||||
# Test a few key messages
|
||||
test_messages = [
|
||||
"Task auto-stopped after 1 minute in paused state",
|
||||
"Pause agent",
|
||||
"🔍 Searched for \"{query}\" in Baidu",
|
||||
"Starting new step...",
|
||||
"Taken action #{number}: {action}",
|
||||
"Agent creation failed: {error}",
|
||||
"Pause execution due to CAPTCHA verification.",
|
||||
"Unknown - The task has not yet begun as the current page is blank.",
|
||||
"Success - Successfully navigated to Google's homepage.",
|
||||
"CAPTCHA verification required. Human intervention is needed to proceed."
|
||||
]
|
||||
|
||||
for lang in get_available_languages():
|
||||
print(f"\n--- Testing {lang} ---")
|
||||
set_language(lang)
|
||||
for msg in test_messages:
|
||||
translated = _(msg)
|
||||
print(f" {msg[:50]}... -> {translated[:50]}...")
|
||||
|
||||
print("\n✓ Translation test completed")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ Translation test failed: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 2:
|
||||
print(__doc__)
|
||||
sys.exit(1)
|
||||
|
||||
command = sys.argv[1]
|
||||
|
||||
if command == "compile":
|
||||
success = compile_translations()
|
||||
sys.exit(0 if success else 1)
|
||||
elif command == "test":
|
||||
success = test_translations()
|
||||
sys.exit(0 if success else 1)
|
||||
else:
|
||||
print(f"Unknown command: {command}")
|
||||
print(__doc__)
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
241
py_package/browser_agent/cdp.py
Normal file
241
py_package/browser_agent/cdp.py
Normal file
@ -0,0 +1,241 @@
|
||||
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
||||
# Licensed under the 【火山方舟】原型应用软件自用许可协议
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://www.volcengine.com/docs/82379/1433703
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
import aiohttp
|
||||
import websockets
|
||||
from fastapi import HTTPException, WebSocket, WebSocketDisconnect
|
||||
|
||||
|
||||
async def websocket_endpoint(websocket: WebSocket, page_id: str, port: str):
|
||||
await websocket.accept()
|
||||
|
||||
try:
|
||||
# Construct the WebSocket URL for the specific page
|
||||
ws_url = f"ws://127.0.0.1:{port}/devtools/page/{page_id}"
|
||||
|
||||
# Establish a connection to the CDP WebSocket
|
||||
async with websockets.connect(ws_url) as cdp_socket:
|
||||
# Create tasks for bidirectional communication
|
||||
receive_task = asyncio.create_task(
|
||||
receive_from_cdp(cdp_socket, websocket))
|
||||
send_task = asyncio.create_task(send_to_cdp(cdp_socket, websocket))
|
||||
|
||||
# Wait for either task to complete
|
||||
done, pending = await asyncio.wait(
|
||||
[receive_task, send_task],
|
||||
return_when=asyncio.FIRST_COMPLETED
|
||||
)
|
||||
|
||||
# Cancel any remaining tasks
|
||||
for task in pending:
|
||||
task.cancel()
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"WebSocket error: {e}")
|
||||
await websocket.close()
|
||||
|
||||
|
||||
async def websocket_browser_endpoint(websocket: WebSocket, browser_id: str, port: str):
|
||||
await websocket.accept()
|
||||
|
||||
try:
|
||||
# Construct the WebSocket URL for the specific page
|
||||
ws_url = f"ws://0.0.0.0:{port}/devtools/browser/{browser_id}"
|
||||
|
||||
# Establish a connection to the CDP WebSocket
|
||||
async with websockets.connect(ws_url) as cdp_socket:
|
||||
# Create tasks for bidirectional communication
|
||||
receive_task = asyncio.create_task(
|
||||
receive_from_cdp(cdp_socket, websocket))
|
||||
send_task = asyncio.create_task(send_to_cdp(cdp_socket, websocket))
|
||||
|
||||
# Wait for either task to complete
|
||||
done, pending = await asyncio.wait(
|
||||
[receive_task, send_task],
|
||||
return_when=asyncio.FIRST_COMPLETED
|
||||
)
|
||||
|
||||
# Cancel any remaining tasks
|
||||
for task in pending:
|
||||
task.cancel()
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"WebSocket error: {e}")
|
||||
await websocket.close()
|
||||
|
||||
|
||||
async def receive_from_cdp(cdp_socket, websocket):
|
||||
try:
|
||||
while True:
|
||||
message = await cdp_socket.recv()
|
||||
await websocket.send_text(message)
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
logging.info("CDP WebSocket connection closed")
|
||||
except Exception as e:
|
||||
logging.error(f"Error receiving from CDP: {e}")
|
||||
|
||||
|
||||
async def send_to_cdp(cdp_socket, websocket):
|
||||
try:
|
||||
while True:
|
||||
message = await websocket.receive_text()
|
||||
await cdp_socket.send(message)
|
||||
except WebSocketDisconnect:
|
||||
logging.info("Client WebSocket disconnected")
|
||||
except Exception as e:
|
||||
logging.error(f"Error sending to CDP: {e}")
|
||||
|
||||
|
||||
async def get_websocket_targets(port: str):
|
||||
endpoint = os.getenv("CDP_ENDPOINT")
|
||||
logging.info(f"Getting websocket targets for endpoint: {endpoint}")
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Use the cdp_websocket parameter to dynamically construct the URL
|
||||
base_url = f"http://127.0.0.1:{port}/json/list"
|
||||
async with session.get(base_url) as response:
|
||||
if response.status == 200:
|
||||
result = await response.json()
|
||||
|
||||
# If result is an empty list, return it immediately
|
||||
if not result:
|
||||
return result
|
||||
|
||||
# Modify the URLs in the result to use the provided cdp_websocket
|
||||
for target in result:
|
||||
# Replace devtoolsFrontendUrl
|
||||
if 'devtoolsFrontendUrl' in target:
|
||||
target['devtoolsFrontendUrl'] = target['devtoolsFrontendUrl'].replace(
|
||||
'127.0.0.1:' + str(port), str(endpoint)
|
||||
)
|
||||
|
||||
# Replace webSocketDebuggerUrl
|
||||
if 'webSocketDebuggerUrl' in target:
|
||||
target['webSocketDebuggerUrl'] = target['webSocketDebuggerUrl'].replace(
|
||||
'127.0.0.1:' + str(port), str(endpoint)
|
||||
)
|
||||
|
||||
return result
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=response.status, detail="Failed to fetch JSON list")
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Error fetching JSON list: {str(e)}")
|
||||
|
||||
|
||||
async def get_websocket_version(port: str):
|
||||
endpoint = os.getenv("CDP_ENDPOINT")
|
||||
logging.info(f"Getting websocket version for endpoint: {endpoint}")
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Use the cdp_websocket parameter to dynamically construct the URL
|
||||
base_url = f"http://127.0.0.1:{port}/json/version"
|
||||
async with session.get(base_url) as response:
|
||||
if response.status == 200:
|
||||
result = await response.json()
|
||||
|
||||
if not result:
|
||||
return result
|
||||
|
||||
if 'webSocketDebuggerUrl' in result:
|
||||
result['webSocketDebuggerUrl'] = result['webSocketDebuggerUrl'].replace(
|
||||
'127.0.0.1:' + str(port), str(endpoint)
|
||||
)
|
||||
|
||||
return result
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=response.status, detail="Failed to fetch JSON list")
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Error fetching JSON list: {str(e)}")
|
||||
|
||||
async def get_remote_websocket_version(browser_session_endpoint: str, browser_id: str):
|
||||
endpoint = os.getenv("CDP_ENDPOINT")
|
||||
logging.info(f"Getting websocket version for endpoint: {endpoint}")
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Use the cdp_websocket parameter to dynamically construct the URL
|
||||
base_url = f"{browser_session_endpoint}/{browser_id}/json/version"
|
||||
async with session.get(base_url) as response:
|
||||
if response.status == 200:
|
||||
result = await response.json()
|
||||
return result
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=response.status, detail="Failed to fetch JSON version")
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Error fetching JSON list: {str(e)}")
|
||||
|
||||
|
||||
async def get_inspector(url):
|
||||
endpoint = os.getenv("CDP_ENDPOINT")
|
||||
logging.info(f"Getting websocket targets for endpoint: {endpoint}")
|
||||
|
||||
# Convert Starlette URL to string
|
||||
url_str = str(url)
|
||||
|
||||
# like this: http://127.0.0.1:8000/devtools/inspector.html?ws=scqevor9btoi2t6dnkcpg.apigateway-cn-beijing.volceapi.com/devtools/page/7D574C7E6237CB326E385DD2C3A5C845
|
||||
logging.info(f"Getting original inspector for URL: {url_str}")
|
||||
|
||||
# Parse the URL
|
||||
parsed_url = urlparse(url_str)
|
||||
|
||||
# Check if the current domain matches the endpoint
|
||||
if parsed_url.netloc == endpoint:
|
||||
# Replace only the netloc
|
||||
modified_url = parsed_url._replace(netloc="127.0.0.1:9222")
|
||||
url_str = urlunparse(modified_url)
|
||||
|
||||
logging.info(f"Converted inspector for URL: {url_str}")
|
||||
|
||||
# if parsed_url contains "127.0.0.1:9222", run a 301 redirect
|
||||
if "127.0.0.1" in parsed_url.netloc:
|
||||
url_str = url_str.replace("127.0.0.1:9222", endpoint)
|
||||
logging.info(f"Redirecting to: {url_str}")
|
||||
return await get_inspector(url_str)
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url_str) as response:
|
||||
if response.status == 200:
|
||||
# Check content type
|
||||
content_type = response.headers.get(
|
||||
'Content-Type', '').lower()
|
||||
|
||||
if 'application/json' not in content_type:
|
||||
# If not JSON, read the content
|
||||
content = await response.text()
|
||||
logging.error(
|
||||
f"Unexpected content type: {content_type}")
|
||||
logging.error(f"Response content: {content[:500]}")
|
||||
|
||||
return {
|
||||
"status": "error",
|
||||
"content_type": content_type,
|
||||
"content": content
|
||||
}
|
||||
|
||||
result = await response.json()
|
||||
return result
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=response.status, detail="Failed to fetch inspector")
|
||||
except Exception as e:
|
||||
logging.error(f"Error fetching inspector: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Error fetching inspector: {str(e)}")
|
||||
541
py_package/browser_agent/index.py
Normal file
541
py_package/browser_agent/index.py
Normal file
@ -0,0 +1,541 @@
|
||||
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
||||
# Licensed under the 【火山方舟】原型应用软件自用许可协议
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://www.volcengine.com/docs/82379/1433703
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from typing import Dict,List,Union
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import AsyncGenerator,Optional
|
||||
import aiohttp
|
||||
import aiohttp
|
||||
import uvicorn
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
from pydantic import BaseModel, Field,ConfigDict
|
||||
from browser_use.browser.views import BrowserStateSummary
|
||||
from browser_agent.browser import start_local_browser,BrowserWrapper
|
||||
from browser_agent.browser_use_custom.controller.service import MyController
|
||||
from browser_agent.utils import enforce_log_format
|
||||
from browser_use import Agent
|
||||
from browser_agent.browser_use_custom.agent.service import MyAgent
|
||||
from browser_agent.browser_use_custom.i18n import _, set_language
|
||||
from browser_use.agent.views import (
|
||||
AgentOutput,
|
||||
)
|
||||
from browser_use import Agent, BrowserProfile, BrowserSession
|
||||
from stream_helper.schema import SSEData,ContentTypeEnum,ReturnTypeEnum,OutputModeEnum,ContextModeEnum,MessageActionInfo,MessageActionItem,ReplyContentType,ContentTypeInReplyEnum,ReplyTypeInReplyEnum
|
||||
from browser_agent.upload import UploadService
|
||||
from browser_agent.upload import filter_file_by_time
|
||||
from stream_helper.schema import FileChangeInfo,FileChangeType,FileChangeData
|
||||
import base64
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from datetime import datetime
|
||||
from langchain_openai import ChatOpenAI
|
||||
from logging import Logger
|
||||
from browser_agent.browser_use_custom.controller.screen import wait_for_visual_change,WaitForVisualChangeAction,VisualChangeDetector
|
||||
from browser_use.utils import match_url_with_domain_pattern, time_execution_async, time_execution_sync
|
||||
app = FastAPI()
|
||||
load_dotenv()
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
expose_headers=["x-faas-instance-name"],
|
||||
)
|
||||
|
||||
llm_openai = "openai"
|
||||
llm_deepseek = "deepseek"
|
||||
llm_ark = "ark"
|
||||
llm_name = llm_ark
|
||||
|
||||
|
||||
# Global variable to track the port
|
||||
CURRENT_CDP_PORT = 9222
|
||||
|
||||
browser_session_endpoint = None
|
||||
set_language(os.getenv("LANGUAGE", "en"))
|
||||
|
||||
class Message(BaseModel):
|
||||
role: str
|
||||
content: str
|
||||
|
||||
|
||||
class Messages(BaseModel):
|
||||
messages: list[Message]
|
||||
|
||||
|
||||
class TaskRequest(BaseModel):
|
||||
task: str
|
||||
|
||||
import logging
|
||||
|
||||
class LLMConfig(BaseModel):
|
||||
llm_type: str
|
||||
model_id: str
|
||||
api_key: str
|
||||
extract_model_id: str
|
||||
|
||||
class RunBrowserUseAgentCtx(BaseModel):
|
||||
query: str
|
||||
conversation_id: str
|
||||
llm: BaseChatModel
|
||||
browser_session_endpoint: str
|
||||
max_steps: int = 20
|
||||
system_prompt: str | None = None
|
||||
extend_prompt: str | None = None
|
||||
upload_service:Optional[UploadService] = None
|
||||
start_time:int = int(datetime.now().timestamp() * 1000)
|
||||
logger: Logger = Field(default_factory=lambda: logging.getLogger(__name__))
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
)
|
||||
|
||||
def genSSEData(stream_id:str,
|
||||
content:str,
|
||||
content_type:ContentTypeEnum = ContentTypeEnum.TEXT,
|
||||
return_type:ReturnTypeEnum = ReturnTypeEnum.MODEL,
|
||||
output_mode:OutputModeEnum = OutputModeEnum.STREAM,
|
||||
is_last_msg:bool = False,
|
||||
is_finish:bool = False,
|
||||
is_last_packet_in_msg:bool = False,
|
||||
message_title:str = None,
|
||||
context_mode:ContextModeEnum = ContextModeEnum.NOT_IGNORE,
|
||||
response_for_model:str = None,
|
||||
ext:Dict[str,str] = None,
|
||||
card_body:str = None,
|
||||
reply_content_type:Optional[ReplyContentType] = None)->SSEData:
|
||||
return SSEData(
|
||||
stream_id = stream_id,
|
||||
content = content,
|
||||
content_type = content_type,
|
||||
return_type = return_type,
|
||||
output_mode = output_mode,
|
||||
is_last_msg = is_last_msg,
|
||||
is_finish = is_finish,
|
||||
is_last_packet_in_msg = is_last_packet_in_msg,
|
||||
message_title = message_title,
|
||||
context_mode = context_mode,
|
||||
response_for_model = response_for_model,
|
||||
ext = ext,
|
||||
card_body = card_body,
|
||||
reply_content_type = reply_content_type
|
||||
)
|
||||
def convert_ws_url(original_url: str) -> str:
|
||||
"""
|
||||
将本地开发环境的 WebSocket URL 转换为生产环境的 URL 格式。
|
||||
|
||||
示例输入:
|
||||
'ws://127.0.0.1:8001/v1/browsers/devtools/browser/77a025af-5483-46e2-ac57-9360812e4608?faasInstanceName=vefaas-duxz5kfb-jjecq2yuvf-d340et34rnmvf4b2ugd0-sandbox'
|
||||
|
||||
示例输出:
|
||||
'ws://bots-sandbox.bytedance.net/api/sandbox/coze_studio/proxy/v1/browsers/devtools/browser/77a025af-5483-46e2-ac57-9360812e4608'
|
||||
"""
|
||||
# 提取 UUID 部分(从路径中截取)
|
||||
uuid_part = original_url.split("/v1/browsers/devtools/browser/")[1].split("?")[0]
|
||||
|
||||
# 构建新 URL
|
||||
new_url = (
|
||||
"ws://bots-sandbox.bytedance.net"
|
||||
"/ws/sandbox/coze_studio/proxy"
|
||||
f"/v1/browsers/devtools/browser/{uuid_part}"
|
||||
)
|
||||
return new_url
|
||||
|
||||
async def get_data_files(directory: str | Path) -> List[Dict[str, str]]:
|
||||
path = Path(directory)
|
||||
if not path.is_dir():
|
||||
raise ValueError(f"'{directory}' is not a valid directory")
|
||||
result = []
|
||||
for file in path.iterdir():
|
||||
if file.is_file():
|
||||
try:
|
||||
with open(file, 'rb') as f:
|
||||
content = f.read()
|
||||
base64_encoded = base64.b64encode(content).decode('ascii')
|
||||
result.append({
|
||||
"name": file.name,
|
||||
"content": base64_encoded
|
||||
})
|
||||
except Exception as e:
|
||||
result.append({
|
||||
"name": file.name,
|
||||
"content": f"[ERROR READING FILE: {str(e)}]".encode()
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
async def RunBrowserUseAgent(ctx: RunBrowserUseAgentCtx) -> AsyncGenerator[SSEData, None]:
|
||||
task_id = str(uuid.uuid4())
|
||||
event_queue = asyncio.Queue(maxsize=100)
|
||||
# 初始化日志
|
||||
|
||||
ctx.logger.info(f"RunBrowserUseAgent with query: {ctx.query},task_id:{task_id}")
|
||||
|
||||
# 浏览器初始化
|
||||
try:
|
||||
if ctx.browser_session_endpoint == "" or ctx.browser_session_endpoint is None:
|
||||
global CURRENT_CDP_PORT
|
||||
CURRENT_CDP_PORT += 1
|
||||
current_port = CURRENT_CDP_PORT
|
||||
browser_wrapper = await start_local_browser(current_port)
|
||||
else:
|
||||
browser_wrapper = BrowserWrapper(None, None, None, 'id', ctx.browser_session_endpoint)
|
||||
except Exception as e:
|
||||
ctx.logger.error(f"[Failed to initialize browser: {e}")
|
||||
yield genSSEData(
|
||||
stream_id=ctx.conversation_id,
|
||||
content=f'init browser_wrapper err:{e}',
|
||||
is_finish=True,
|
||||
is_last_msg=True,
|
||||
is_last_packet_in_msg=True,
|
||||
output_mode=OutputModeEnum.NOT_STREAM,
|
||||
return_type=ReturnTypeEnum.MODEL,
|
||||
response_for_model=f'init browser_wrapper err:{e}',
|
||||
reply_content_type=ReplyContentType(content_type=ContentTypeInReplyEnum.TXT,reply_type=ReplyTypeInReplyEnum.ANSWER)
|
||||
)
|
||||
return
|
||||
|
||||
# CDP URL 获取
|
||||
cdp_url = None
|
||||
try:
|
||||
if browser_wrapper.remote_browser_id:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
get_url = f"{browser_wrapper.endpoint}/v1/browsers/"
|
||||
async with session.get(url = get_url,
|
||||
timeout=aiohttp.ClientTimeout(total=30),
|
||||
headers={
|
||||
'x-sandbox-taskid':ctx.conversation_id,
|
||||
}) as response:
|
||||
if response.status == 200:
|
||||
reader = response.content
|
||||
browser_info = json.loads(await reader.read())
|
||||
cdp_url = convert_ws_url(browser_info['ws_url'])
|
||||
ctx.logger.info(f"[{task_id}] Retrieved remote CDP URL: {cdp_url}")
|
||||
else:
|
||||
error_text = await response.text()
|
||||
raise Exception(f"Failed to get browser info. Status: {response.status}, Error: {error_text}")
|
||||
else:
|
||||
current_port = CURRENT_CDP_PORT
|
||||
ctx.logger.info(f"Starting task with local browser on port: {current_port}")
|
||||
cdp_url = f"http://127.0.0.1:{current_port}"
|
||||
except Exception as e:
|
||||
ctx.logger.error(f"Error getting browser URL: {e}")
|
||||
yield genSSEData(
|
||||
stream_id=ctx.conversation_id,
|
||||
content=f'Failed to get browser cdp_url:{e}',
|
||||
is_finish=True,
|
||||
is_last_msg=True,
|
||||
is_last_packet_in_msg=True,
|
||||
output_mode=OutputModeEnum.NOT_STREAM,
|
||||
return_type=ReturnTypeEnum.MODEL,
|
||||
response_for_model=f'',
|
||||
reply_content_type=ReplyContentType(content_type=ContentTypeInReplyEnum.TXT,reply_type=ReplyTypeInReplyEnum.ANSWER)
|
||||
)
|
||||
return
|
||||
|
||||
# 目录创建
|
||||
base_dir = os.path.join("videos", task_id)
|
||||
snapshot_dir = os.path.join(base_dir, "snapshots")
|
||||
Path(snapshot_dir).mkdir(parents=True, exist_ok=True)
|
||||
browser_session = None
|
||||
agent = None
|
||||
agent_task = None
|
||||
|
||||
try:
|
||||
# 浏览器会话配置
|
||||
headless = False if ctx.browser_session_endpoint != "" else True
|
||||
browser_profile = BrowserProfile(
|
||||
headless=headless,
|
||||
disable_security=True,
|
||||
highlight_elements=False,
|
||||
wait_between_actions=1,
|
||||
keep_alive=False,
|
||||
headers={
|
||||
'x-sandbox-taskid':ctx.conversation_id,
|
||||
},
|
||||
)
|
||||
browser_session = BrowserSession(
|
||||
browser_profile=browser_profile,
|
||||
cdp_url=cdp_url,
|
||||
)
|
||||
ctx.logger.info(f"[{task_id}] Browser initialized with CDP URL: {cdp_url}")
|
||||
async def on_step_end(agent):
|
||||
try:
|
||||
ctx.logger.info("Agent step end")
|
||||
page = await agent.browser_session.get_current_page()
|
||||
detector = VisualChangeDetector()
|
||||
current_screenshot = await detector.capture_screenshot(page,upload_service=ctx.upload_service,logger=ctx.logger)
|
||||
agent.context = {'current_screenshot':current_screenshot}
|
||||
ctx.logger.info(f'captured screenshot success')
|
||||
except Exception as e:
|
||||
ctx.logger.error(f"Error in on_step_end: {e}")
|
||||
# 回调函数定义
|
||||
async def new_step_callback_wrapper(browser_state_summary: BrowserStateSummary,
|
||||
model_output: AgentOutput,
|
||||
step_number: int):
|
||||
try:
|
||||
islogin = False
|
||||
for ac in model_output.action:
|
||||
try:
|
||||
action_data = ac.model_dump(exclude_unset=True)
|
||||
action_name = next(iter(action_data.keys()))
|
||||
if action_name == 'pause':
|
||||
islogin = True
|
||||
ctx.logger.info("pause action detected, browser task pause")
|
||||
agent.pause()
|
||||
except Exception as e:
|
||||
ctx.logger.error(f"Error in new_step_callback_wrapper: {e}")
|
||||
agent.resume()
|
||||
data = ''
|
||||
content_type = ContentTypeInReplyEnum.TXT
|
||||
if islogin:
|
||||
data = data + MessageActionInfo(actions=[MessageActionItem()]).model_dump_json()
|
||||
content_type = ContentTypeInReplyEnum.ACTION_INFO
|
||||
else:
|
||||
data = data + model_output.current_state.next_goal or ''
|
||||
await event_queue.put(genSSEData(
|
||||
stream_id=ctx.conversation_id,
|
||||
content=data,
|
||||
is_last_packet_in_msg=True,
|
||||
reply_content_type= ReplyContentType(content_type=content_type)
|
||||
))
|
||||
if islogin:
|
||||
current_screenshot = None
|
||||
if agent.context:
|
||||
current_screenshot = agent.context.get('current_screenshot',None)
|
||||
if current_screenshot:
|
||||
ctx.logger.info(f'current screenshot exists')
|
||||
has_change, _, _ = await wait_for_visual_change(
|
||||
params=WaitForVisualChangeAction(
|
||||
timeout=300,
|
||||
similarity_threshold=0.85,
|
||||
),
|
||||
browser_session=agent.browser_session,
|
||||
initial_screenshot=current_screenshot,
|
||||
upload_service=ctx.upload_service,
|
||||
logger=ctx.logger,
|
||||
)
|
||||
if has_change:
|
||||
ctx.logger.info('detected visual change,browser task resume')
|
||||
agent.resume()
|
||||
data = 'timeout: no visual change detected during login'
|
||||
await event_queue.put(genSSEData(
|
||||
stream_id=ctx.conversation_id,
|
||||
content=data,
|
||||
output_mode=OutputModeEnum.NOT_STREAM,
|
||||
is_finish=True,
|
||||
is_last_msg=True,
|
||||
is_last_packet_in_msg=True,
|
||||
response_for_model=data,
|
||||
reply_content_type= ReplyContentType(content_type=ContentTypeInReplyEnum.TXT,reply_type=ReplyTypeInReplyEnum.ANSWER)
|
||||
))
|
||||
except Exception as e:
|
||||
ctx.logger.error(f"Error in new_step_callback_wrapper: {e}")
|
||||
agent.resume()
|
||||
# Agent 创建
|
||||
agent = MyAgent(
|
||||
task=ctx.query,
|
||||
browser_session=browser_session,
|
||||
register_new_step_callback=new_step_callback_wrapper,
|
||||
llm=ctx.llm,
|
||||
page_extraction_llm=ctx.llm,
|
||||
planner_llm=ctx.llm,
|
||||
controller=MyController(),
|
||||
planner_interval=int(os.getenv("ARK_PLANNER_INTERVAL", "1")),
|
||||
override_system_message=ctx.system_prompt,
|
||||
extend_planner_system_message=ctx.extend_prompt,
|
||||
language='zh',
|
||||
enable_memory=False,
|
||||
)
|
||||
|
||||
ctx.logger.info(f"[{task_id}] Agent initialized and ready to run")
|
||||
# 启动 Agent 任务
|
||||
agent_task = asyncio.create_task(agent.run(20,on_step_end=on_step_end))
|
||||
ctx.logger.info(f"[{task_id}] Agent started running")
|
||||
|
||||
# 事件流生成
|
||||
while True:
|
||||
try:
|
||||
# 等待事件或检查任务完成
|
||||
try:
|
||||
event = await asyncio.wait_for(event_queue.get(), timeout=0.5)
|
||||
yield event
|
||||
|
||||
# 检查是否应该结束
|
||||
if event == "error" or (isinstance(event, dict) and event.get("status") == "completed"):
|
||||
break
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# 检查 Agent 任务是否完成
|
||||
if agent_task.done():
|
||||
break
|
||||
continue
|
||||
except asyncio.CancelledError:
|
||||
ctx.logger.info(f"[{task_id}] Task was cancelled")
|
||||
agent_task.cancel()
|
||||
if agent:
|
||||
agent.pause()
|
||||
await agent.close()
|
||||
raise
|
||||
except Exception as e:
|
||||
ctx.logger.error(f"[{task_id}] Error in event streaming: {e}")
|
||||
break
|
||||
|
||||
# 等待 Agent 任务完成
|
||||
if agent_task and not agent_task.done():
|
||||
await agent_task
|
||||
# 获取最终结果
|
||||
result = await agent_task if agent_task else None
|
||||
if result:
|
||||
final_result = None
|
||||
for history_item in reversed(result.history):
|
||||
for result_item in history_item.result:
|
||||
if hasattr(result_item, "is_done") and result_item.is_done:
|
||||
final_result = result_item.extracted_content
|
||||
break
|
||||
if final_result:
|
||||
break
|
||||
|
||||
if not final_result:
|
||||
result = [
|
||||
[item.extracted_content for item in history_item.result
|
||||
if hasattr(item, "extracted_content")]
|
||||
for history_item in result.history
|
||||
]
|
||||
final_result = "\n".join(
|
||||
item
|
||||
for sublist in result
|
||||
for item in sublist
|
||||
if isinstance(item, str)
|
||||
)
|
||||
if final_result:
|
||||
ctx.logger.info(f"[{task_id}] final_result: {final_result}")
|
||||
completion_event = genSSEData(
|
||||
stream_id=ctx.conversation_id,
|
||||
content= final_result,
|
||||
return_type=ReturnTypeEnum.MODEL,
|
||||
response_for_model=final_result,
|
||||
content_type=ContentTypeEnum.TEXT,
|
||||
output_mode=OutputModeEnum.STREAM,
|
||||
is_finish= True,
|
||||
is_last_msg=True,
|
||||
is_last_packet_in_msg=True,
|
||||
reply_content_type=ReplyContentType(content_type=ContentTypeInReplyEnum.TXT,reply_type=ReplyTypeInReplyEnum.ANSWER)
|
||||
)
|
||||
yield completion_event
|
||||
|
||||
ctx.logger.info(f"[{task_id}] Task completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
ctx.logger.error(f"[{task_id}] Agent execution failed: {e}")
|
||||
yield genSSEData(
|
||||
stream_id=ctx.conversation_id,
|
||||
content=f'err:{e}',
|
||||
is_finish=True,
|
||||
is_last_msg=True,
|
||||
is_last_packet_in_msg=True,
|
||||
return_type=ReturnTypeEnum.MODEL,
|
||||
response_for_model=f'err:{e}',
|
||||
reply_content_type=ReplyContentType(content_type=ContentTypeInReplyEnum.TXT,reply_type=ReplyTypeInReplyEnum.ANSWER)
|
||||
)
|
||||
finally:
|
||||
if agent:
|
||||
if agent.browser_session:
|
||||
agent.browser_session.cdp_url = None
|
||||
if agent.browser_session.browser_context:
|
||||
await agent.browser_session.browser_context.close()
|
||||
if agent.browser_session.browser:
|
||||
await agent.browser_session.browser.close()
|
||||
agent.pause()
|
||||
ctx.logger.info('agent close success')
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {"message": "Hello World"}
|
||||
|
||||
|
||||
@app.post("/tasks")
|
||||
async def sse_task(request: Messages):
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
# 提取用户消息
|
||||
prompt = ""
|
||||
for message in request.messages:
|
||||
if message.role == "user":
|
||||
prompt = message.content
|
||||
logging.debug(f"Found user message: {prompt}")
|
||||
break
|
||||
|
||||
logging.info(f"[{task_id}] Starting SSE task with prompt: {prompt}")
|
||||
|
||||
async def generate_sse_events():
|
||||
try:
|
||||
# 创建SSE格式的生成器
|
||||
async for event in RunBrowserUseAgent(ctx=RunBrowserUseAgentCtx(
|
||||
query=prompt,
|
||||
conversation_id="",
|
||||
cookie="",
|
||||
llm_config=LLMConfig(
|
||||
),
|
||||
browser_session_endpoint="http://127.0.0.1:8001/v1/browsers",
|
||||
max_steps=20
|
||||
)):
|
||||
# 确保事件是SSE格式
|
||||
if isinstance(event, str):
|
||||
# 如果是字符串,直接作为数据发送
|
||||
yield f"data: {event}\n\n"
|
||||
elif isinstance(event, dict):
|
||||
# 如果是字典,转换为JSON格式
|
||||
event_json = json.dumps(event, ensure_ascii=False)
|
||||
yield f"data: {event_json}\n\n"
|
||||
else:
|
||||
# 其他类型转换为字符串
|
||||
yield f"data: {str(event)}\n\n"
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"[{task_id}] Error in SSE generation: {e}")
|
||||
# 发送错误事件
|
||||
error_event = json.dumps({
|
||||
"task_id": task_id,
|
||||
"status": "error",
|
||||
"error": str(e)
|
||||
}, ensure_ascii=False)
|
||||
yield f"data: {error_event}\n\n"
|
||||
finally:
|
||||
# 可选:发送结束标记
|
||||
yield f"data: {json.dumps({'task_id': task_id, 'status': 'stream_completed'}, ensure_ascii=False)}\n\n"
|
||||
|
||||
try:
|
||||
return StreamingResponse(
|
||||
generate_sse_events(),
|
||||
media_type="text/event-stream",
|
||||
)
|
||||
except Exception as e:
|
||||
logging.error(f"[{task_id}] Error creating StreamingResponse: {e}")
|
||||
# 返回错误响应
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={"error": "Failed to create SSE stream", "task_id": task_id}
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
enforce_log_format()
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
|
||||
|
||||
|
||||
23
py_package/browser_agent/upload.py
Normal file
23
py_package/browser_agent/upload.py
Normal file
@ -0,0 +1,23 @@
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
from abc import ABC, abstractmethod
|
||||
from pydantic import BaseModel
|
||||
class FileItem(BaseModel):
|
||||
file_name: str
|
||||
file_type: str
|
||||
file_size: int
|
||||
file_uri: str
|
||||
file_url: str
|
||||
upload_type: str
|
||||
create_time: int
|
||||
update_time: int
|
||||
|
||||
class UploadService(BaseModel,ABC):
|
||||
headers:dict[str,str] = {}
|
||||
async def upload_file(self,file_content:str,file_name:str,base64_content:str=''):
|
||||
pass
|
||||
async def list_file(self)->List[FileItem]:
|
||||
pass
|
||||
|
||||
async def filter_file_by_time(file_list:List[FileItem],start_time:int)->List[FileItem]:
|
||||
return [file for file in file_list if (start_time <= file.create_time or start_time <= file.update_time) ]
|
||||
86
py_package/browser_agent/utils.py
Normal file
86
py_package/browser_agent/utils.py
Normal file
@ -0,0 +1,86 @@
|
||||
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
||||
# Licensed under the 【火山方舟】原型应用软件自用许可协议
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://www.volcengine.com/docs/82379/1433703
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List
|
||||
from langchain_core.callbacks import BaseCallbackHandler
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.outputs import LLMResult
|
||||
|
||||
|
||||
root_logger = logging.getLogger()
|
||||
for handler in root_logger.handlers:
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(levelname)s - %(message)s',
|
||||
'%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
|
||||
|
||||
def enforce_log_format():
|
||||
root_logger = logging.getLogger()
|
||||
for handler in root_logger.handlers:
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(levelname)s - %(message)s',
|
||||
'%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
|
||||
class ModelLoggingCallback(BaseCallbackHandler):
|
||||
def on_chat_model_start(
|
||||
self, serialized: Dict[str, Any], messages: List[List[BaseMessage]], **kwargs
|
||||
) -> None:
|
||||
logging.info(
|
||||
f"[Model] Chat model started\n")
|
||||
|
||||
def on_llm_end(self, response: LLMResult, **kwargs) -> None:
|
||||
logging.info(
|
||||
f"[Model] Chat model ended, response: {response}")
|
||||
|
||||
def on_llm_error(self, error: BaseException, **kwargs) -> Any:
|
||||
logging.info(
|
||||
f"[Model] Chat model error, response: {error}")
|
||||
|
||||
def on_chain_start(
|
||||
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs
|
||||
) -> None:
|
||||
logging.info(
|
||||
f"[Model] Chain {serialized.get('name')} started")
|
||||
|
||||
def on_chain_end(self, outputs: Dict[str, Any], **kwargs) -> None:
|
||||
logging.info(f"[Model] Chain ended, outputs: {outputs}")
|
||||
|
||||
# class that wraps another class and logs all function calls being executed
|
||||
|
||||
|
||||
class Wrapper:
|
||||
def __init__(self, wrapped_class):
|
||||
self.wrapped_class = wrapped_class
|
||||
|
||||
def __getattr__(self, attr):
|
||||
original_func = getattr(self.wrapped_class, attr)
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
print(f"Calling function: {attr}")
|
||||
print(f"Arguments: {args}, {kwargs}")
|
||||
result = original_func(*args, **kwargs)
|
||||
print(f"Response: {result}")
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
29
py_package/pyproject.toml
Normal file
29
py_package/pyproject.toml
Normal file
@ -0,0 +1,29 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=64.0.0", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
include-package-data = true
|
||||
package-dir = {"browser_agent" = "browser_agent","stream_helper" = "stream_helper"}
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"browser_agent" = ["**/*.mo", "**/*.po"]
|
||||
|
||||
[project]
|
||||
name = "coze-studio-py-package" # 包名,在PyPI上唯一。通常用中划线
|
||||
version = "0.1.0"
|
||||
description = "browser agent"
|
||||
|
||||
# 如果你的包有依赖,在这里声明
|
||||
dependencies = [
|
||||
"fastapi==0.109.2",
|
||||
"uvicorn==0.27.0.post1",
|
||||
"python-multipart==0.0.9",
|
||||
"MainContentExtractor==0.0.4",
|
||||
"aiohttp==3.11.16",
|
||||
"websockets==15.0.1",
|
||||
"faiss-cpu==1.10.0",
|
||||
"browser_use @ git+https://github.com/liuyunchao-1998/browser-use-copy.git@i18n_system_prompt",
|
||||
"playwright==1.52.0",
|
||||
"pillow==11.3.0"
|
||||
]
|
||||
0
py_package/stream_helper/__init__.py
Normal file
0
py_package/stream_helper/__init__.py
Normal file
125
py_package/stream_helper/schema.py
Normal file
125
py_package/stream_helper/schema.py
Normal file
@ -0,0 +1,125 @@
|
||||
from enum import Enum, IntEnum
|
||||
from typing import Optional, Dict, Union
|
||||
from pydantic import BaseModel
|
||||
|
||||
class StepInfo(BaseModel):
|
||||
step_number:int
|
||||
goal:str
|
||||
|
||||
class WebSocketItem(BaseModel):
|
||||
ws_url:str
|
||||
|
||||
class WebSocketInfo(BaseModel):
|
||||
items: list[WebSocketItem]
|
||||
|
||||
|
||||
class MessageActionTypeEnum(int, Enum):
|
||||
"""MessageActionType 的枚举值"""
|
||||
WebPageAuthorization = 1 # Web page authorization
|
||||
|
||||
class MessageActionItem(BaseModel):
|
||||
type: MessageActionTypeEnum = MessageActionTypeEnum.WebPageAuthorization
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
class MessageActionInfo(BaseModel):
|
||||
actions: list[MessageActionItem]
|
||||
|
||||
# 定义枚举类型
|
||||
class FileType(str, Enum):
|
||||
DIR = "dir"
|
||||
FILE = "file"
|
||||
|
||||
class FileChangeType(str, Enum):
|
||||
CREATE = "create"
|
||||
DELETE = "delete"
|
||||
UPDATE = "update"
|
||||
|
||||
class FileChangeData(BaseModel):
|
||||
file_type: FileType = FileType.FILE
|
||||
file_path: str = ''
|
||||
file_name: str
|
||||
change_type: FileChangeType
|
||||
uri: str
|
||||
url: str
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
class ErrData(BaseModel):
|
||||
data: Dict[str, str]
|
||||
|
||||
|
||||
class FileChangeInfo(BaseModel):
|
||||
file_change_list: Optional[list[FileChangeData]] = None
|
||||
err_list: Optional[list[ErrData]] = None
|
||||
|
||||
class OutputModeEnum(int, Enum):
|
||||
"""SSEData.output_mode 的枚举值"""
|
||||
NOT_STREAM = 0 # 非流式
|
||||
STREAM = 1 # 流式
|
||||
|
||||
class ReturnTypeEnum(int, Enum):
|
||||
"""SSEData.return_type 的枚举值"""
|
||||
MODEL = 0 # 输出到模型
|
||||
USER_TERMINAL = 1 # 输出到终端
|
||||
|
||||
class ContentTypeEnum(int, Enum):
|
||||
"""SSEData.content_type 的枚举值"""
|
||||
TEXT = 0 # 文本
|
||||
APPLET_WIDGET = 1 # 小程序组件
|
||||
LOADING_TIPS = 2 # 加载提示
|
||||
CARD = 3 # 卡片
|
||||
VERBOSE = 4 # 详细信息
|
||||
USAGE = 10 # 使用情况
|
||||
|
||||
class ContextModeEnum(int, Enum):
|
||||
"""SSEData.context_mode 的枚举值"""
|
||||
NOT_IGNORE = 0 # 不忽略上下文
|
||||
IGNORE = 1 # 忽略上下文
|
||||
|
||||
class ReplyTypeInReplyEnum(IntEnum):
|
||||
"""Type of reply (purpose/context)."""
|
||||
ANSWER = 1 # Standard answer
|
||||
SUGGEST = 2 # Suggestion (e.g., follow-up questions)
|
||||
LLM_OUTPUT = 3 # Raw LLM output (before processing)
|
||||
TOOL_OUTPUT = 4 # Output from an external tool/API
|
||||
VERBOSE = 100 # Debug/verbose logging
|
||||
PLACEHOLDER = 101 # Placeholder (e.g., loading state)
|
||||
TOOL_VERBOSE = 102 # Verbose logs from tools
|
||||
|
||||
class ContentTypeInReplyEnum(IntEnum):
|
||||
"""Format of the content."""
|
||||
TXT = 1 # Plain text
|
||||
IMAGE = 2 # Image
|
||||
VIDEO = 4 # Video
|
||||
MUSIC = 7 # Audio
|
||||
CARD = 50 # Structured card (e.g., rich UI element)
|
||||
WIDGET = 52 # Interactive widget
|
||||
APP = 100 # Embedded application
|
||||
WEBSOCKET_INFO = 101 # Websocket metadata
|
||||
FILE_CHANGE_INFO = 102 # File system event
|
||||
ACTION_INFO = 103 # Action/command metadata
|
||||
|
||||
class ReplyContentType(BaseModel):
|
||||
"""Combines reply type and content type for structured responses."""
|
||||
reply_type: ReplyTypeInReplyEnum = ReplyTypeInReplyEnum.TOOL_VERBOSE
|
||||
content_type: ContentTypeInReplyEnum = ContentTypeInReplyEnum.TXT
|
||||
|
||||
class SSEData(BaseModel):
|
||||
stream_id: str
|
||||
message_title: Optional[str] = None
|
||||
context_mode: Union[ContextModeEnum, int] = ContextModeEnum.NOT_IGNORE
|
||||
output_mode: OutputModeEnum = OutputModeEnum.STREAM # 0=非流式, 1=流式
|
||||
return_type: Union[ReturnTypeEnum, int] = ReturnTypeEnum.USER_TERMINAL
|
||||
content_type: Union[ContentTypeEnum, int] = ContentTypeEnum.TEXT
|
||||
is_last_msg: bool = False
|
||||
is_finish: bool = False
|
||||
is_last_packet_in_msg: bool = False
|
||||
content: Optional[str] = None
|
||||
response_for_model: Optional[str] = None
|
||||
ext: Optional[Dict[str, str]] = None
|
||||
card_body: Optional[str] = None
|
||||
reply_content_type :Optional[ReplyContentType] = None
|
||||
class Config:
|
||||
use_enum_values = True # 序列化时使用枚举的原始值
|
||||
Reference in New Issue
Block a user