diff --git a/libs/python/agent/agent/computers/base.py b/libs/python/agent/agent/computers/base.py index 7fbcb0f76..358fbbf42 100644 --- a/libs/python/agent/agent/computers/base.py +++ b/libs/python/agent/agent/computers/base.py @@ -19,8 +19,12 @@ async def get_dimensions(self) -> tuple[int, int]: """Get screen dimensions as (width, height).""" ... - async def screenshot(self) -> str: - """Take a screenshot and return as base64 string.""" + async def screenshot(self, text: Optional[str] = None) -> str: + """Take a screenshot and return as base64 string. + + Args: + text: Optional descriptive text (for compatibility with GPT-4o models, ignored) + """ ... async def click(self, x: int, y: int, button: str = "left") -> None: diff --git a/libs/python/agent/agent/computers/cua.py b/libs/python/agent/agent/computers/cua.py index f935be5b4..403379501 100644 --- a/libs/python/agent/agent/computers/cua.py +++ b/libs/python/agent/agent/computers/cua.py @@ -33,8 +33,12 @@ async def get_dimensions(self) -> tuple[int, int]: screen_size = await self.interface.get_screen_size() return screen_size["width"], screen_size["height"] - async def screenshot(self) -> str: - """Take a screenshot and return as base64 string.""" + async def screenshot(self, text: Optional[str] = None) -> str: + """Take a screenshot and return as base64 string. + + Args: + text: Optional descriptive text (for compatibility with GPT-4o models, ignored) + """ assert self.interface is not None screenshot_bytes = await self.interface.screenshot() return base64.b64encode(screenshot_bytes).decode('utf-8') diff --git a/libs/python/agent/agent/computers/custom.py b/libs/python/agent/agent/computers/custom.py index b5f801b69..5ab7d535f 100644 --- a/libs/python/agent/agent/computers/custom.py +++ b/libs/python/agent/agent/computers/custom.py @@ -120,8 +120,12 @@ async def get_dimensions(self) -> tuple[int, int]: return self._last_screenshot_size - async def screenshot(self) -> str: - """Take a screenshot and return as base64 string.""" + async def screenshot(self, text: Optional[str] = None) -> str: + """Take a screenshot and return as base64 string. + + Args: + text: Optional descriptive text (for compatibility with GPT-4o models, ignored) + """ result = await self._call_function(self.functions['screenshot']) b64_str = self._to_b64_str(result) # type: ignore