name change name change name change name change name change name change 暂存 暂存 暂存 版本更新 版本更新 版本更新 和网关协议对齐 和网关协议对齐 和网关协议对齐 再升级下 再升 再完善下 升级 final resp 修复 修复 修复 再测试下 再测试下 包顺序 包顺序 包顺序 包顺序 修改为answer 更新下 更新版本 使用logger 使用logger 使用 滚滚滚 更新版本 screen opmot test use context 有问题 gogogo agent browser agent browser screen resume gogo gogo file upload to debug file upload base64 screen screen 修复 修复
204 lines
9.0 KiB
Python
204 lines
9.0 KiB
Python
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
|
# Licensed under the 【火山方舟】原型应用软件自用许可协议
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
# https://www.volcengine.com/docs/82379/1433703
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import io
|
|
import logging
|
|
from typing import Optional, Tuple
|
|
from PIL import Image, ImageChops
|
|
import numpy as np
|
|
from pydantic import BaseModel
|
|
import logging
|
|
import asyncio
|
|
import re
|
|
import markdownify
|
|
from bs4 import BeautifulSoup
|
|
from typing import Optional
|
|
from browser_use.browser import BrowserSession
|
|
from browser_use.controller.views import SearchGoogleAction
|
|
from browser_use.agent.views import ActionResult
|
|
from browser_use.controller.service import Controller
|
|
from playwright.async_api import Page
|
|
from pydantic import BaseModel,Field
|
|
from langchain_core.language_models.chat_models import BaseChatModel
|
|
from browser_agent.browser_use_custom.i18n import _
|
|
from langchain_core.prompts import PromptTemplate
|
|
from browser_agent.browser_use_custom.controller.screen import VisualChangeDetector,wait_for_visual_change,WaitForVisualChangeAction
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class PauseAction(BaseModel):
|
|
reason: str
|
|
|
|
class WaitForLoginAction(BaseModel):
|
|
"""Action parameters for waiting for login completion."""
|
|
timeout: int = Field(
|
|
default=300,
|
|
description="Maximum time to wait for login completion in seconds"
|
|
)
|
|
check_interval: int = Field(
|
|
default=5,
|
|
description="Interval between checks for URL changes in seconds"
|
|
)
|
|
|
|
class MyController(Controller):
|
|
"""Custom controller extending base Controller with additional actions.
|
|
|
|
Features:
|
|
- Inherits core controller functionality
|
|
- Adds custom pause action handler
|
|
- Maintains action registry with exclusion support
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
exclude_actions: list[str] = [],
|
|
output_model: type[BaseModel] | None = None,
|
|
):
|
|
super().__init__(exclude_actions, output_model)
|
|
# Basic Navigation Actions
|
|
@self.registry.action(
|
|
_('Search the query in Baidu in the current tab, the query should be a search query like humans search in Baidu, concrete and not vague or super long. More the single most important items.'),
|
|
param_model=SearchGoogleAction,
|
|
)
|
|
async def search_google(params: SearchGoogleAction, browser_session: BrowserSession):
|
|
search_url = f'https://www.baidu.com/s?wd={params.query}'
|
|
|
|
page = await browser_session.get_current_page()
|
|
await page.goto(search_url)
|
|
await page.wait_for_load_state()
|
|
msg = _('🔍 Searched for "{query}" in Baidu').format(query=params.query)
|
|
logger.info(msg)
|
|
return ActionResult(extracted_content=msg, include_in_memory=True)
|
|
# Content Actions
|
|
@self.registry.action(
|
|
_('Extract page content to retrieve specific information from the page, e.g. all company names, a specific description, all information about xyc, 4 links with companies in structured format. Use include_links true if the goal requires links'),
|
|
)
|
|
async def extract_content(
|
|
goal: str,
|
|
page: Page,
|
|
page_extraction_llm: BaseChatModel,
|
|
include_links: bool = False,
|
|
):
|
|
raw_content = await page.content()
|
|
soup = BeautifulSoup(
|
|
raw_content, 'html.parser')
|
|
# remove all unnecessary http metadata
|
|
for s in soup.select('script'):
|
|
s.decompose()
|
|
for s in soup.select('style'):
|
|
s.decompose()
|
|
for s in soup.select('textarea'):
|
|
s.decompose()
|
|
for s in soup.select('img'):
|
|
s.decompose()
|
|
for s in soup.find_all(style=re.compile("background-image.*")):
|
|
s.decompose()
|
|
content = markdownify.markdownify(str(soup))
|
|
|
|
# manually append iframe text into the content so it's readable by the LLM (includes cross-origin iframes)
|
|
for iframe in page.frames:
|
|
if iframe.url != page.url and not iframe.url.startswith('data:'):
|
|
content += f'\n\nIFRAME {iframe.url}:\n'
|
|
content += markdownify.markdownify(await iframe.content())
|
|
|
|
prompt = _('Your task is to extract the content of the page. You will be given a page and a goal and you should extract all relevant information around this goal from the page. If the goal is vague, summarize the page. Respond in json format. Extraction goal: {goal}, Page: {page}')
|
|
template = PromptTemplate(input_variables=['goal', 'page'], template=prompt)
|
|
try:
|
|
output = await page_extraction_llm.ainvoke(template.format(goal=goal, page=content))
|
|
msg = _('📄 Extracted from page\n: {content}\n').format(content=output.content)
|
|
logger.info(msg)
|
|
return ActionResult(extracted_content=msg, include_in_memory=True)
|
|
except Exception as e:
|
|
logger.debug(_('Error extracting content: {error}').format(error=e))
|
|
msg = _('📄 Extracted from page\n: {content}\n').format(content=content)
|
|
logger.info(msg)
|
|
return ActionResult(extracted_content=msg)
|
|
|
|
@self.registry.action(
|
|
_('Pause agent'),
|
|
param_model=PauseAction,
|
|
)
|
|
async def pause(params: PauseAction):
|
|
msg = _('👩 Pause agent, reason: {reason}').format(reason=params.reason)
|
|
logger.info(msg)
|
|
return ActionResult(extracted_content=msg, include_in_memory=True)
|
|
# Login detection and waiting action
|
|
# @self.registry.action(
|
|
# _('Detects if current page requires login and waits for authentication to complete.Wait for login completion by monitoring URL changes.'),
|
|
# param_model=WaitForLoginAction,
|
|
# )
|
|
# async def wait_for_login(params: WaitForLoginAction, browser_session: BrowserSession):
|
|
# page = await browser_session.get_current_page()
|
|
|
|
# # Get initial URL for comparison
|
|
# initial_url = page.url
|
|
# logger.info(_('🔐 Starting login detection. Initial URL: {url}').format(url=initial_url))
|
|
|
|
# # Wait for URL change indicating login completion
|
|
# msg = _('🔐 Login page detected. Waiting for authentication completion (max {timeout}s)...').format(
|
|
# timeout=params.timeout
|
|
# )
|
|
# logger.info(msg)
|
|
|
|
# final_url = await self._wait_for_url_change(
|
|
# page,
|
|
# initial_url,
|
|
# params.timeout,
|
|
# params.check_interval
|
|
# )
|
|
|
|
# if final_url and final_url != initial_url:
|
|
# success_msg = _('✅ Login completed successfully! URL changed from {initial} to {final}').format(
|
|
# initial=initial_url,
|
|
# final=final_url
|
|
# )
|
|
# logger.info(success_msg)
|
|
# return ActionResult(
|
|
# extracted_content=success_msg,
|
|
# include_in_memory=True,
|
|
# success=True
|
|
# )
|
|
# else:
|
|
# timeout_msg = _('⏰ Login timeout or no URL change detected after {timeout} seconds').format(
|
|
# timeout=params.timeout
|
|
# )
|
|
# logger.warning(timeout_msg)
|
|
# return ActionResult(
|
|
# extracted_content=timeout_msg,
|
|
# include_in_memory=True,
|
|
# success=False
|
|
# )
|
|
|
|
# async def _wait_for_url_change(
|
|
# self,
|
|
# page: Page,
|
|
# initial_url: str,
|
|
# timeout: int,
|
|
# check_interval: int
|
|
# ) -> Optional[str]:
|
|
# """Wait for URL change indicating login completion."""
|
|
# start_time = asyncio.get_event_loop().time()
|
|
|
|
# while (asyncio.get_event_loop().time() - start_time) < timeout:
|
|
# try:
|
|
# current_url = page.url
|
|
|
|
# # If URL has changed, return the new URL
|
|
# if current_url != initial_url:
|
|
# return current_url
|
|
|
|
# # Wait before checking again
|
|
# await asyncio.sleep(check_interval)
|
|
|
|
# except Exception as e:
|
|
# logger.warning(_('Error checking URL change: {error}').format(error=str(e)))
|
|
# await asyncio.sleep(check_interval)
|
|
|
|
# return None # Timeout reached without URL change |