Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions src/askui/tools/playwright/agent_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

import io
import subprocess
from pathlib import Path
from typing import Literal

from PIL import Image
from playwright.sync_api import (
Browser,
BrowserContext,
BrowserType,
Download,
Page,
Playwright,
ViewportSize,
Expand All @@ -22,6 +24,29 @@
from ..agent_os import AgentOs, Display, DisplaySize, InputEvent, ModifierKey, PcKey


def _to_unique_path(path: Path) -> Path:
"""Return ``path`` or, if it already exists, a counter-suffixed variant.

For example, if ``report.pdf`` exists, returns ``report (1).pdf``; if that
exists too, ``report (2).pdf``, and so on. This keeps existing files from
being overwritten.

Args:
path (Path): The desired target path.

Returns:
Path: A path that does not currently exist on disk.
"""
if not path.exists():
return path
counter = 1
while True:
candidate = path.with_name(f"{path.stem} ({counter}){path.suffix}")
if not candidate.exists():
return candidate
counter += 1


class PlaywrightAgentOs(AgentOs):
"""Playwright-based implementation of `AgentOs`.

Expand All @@ -45,6 +70,11 @@ class PlaywrightAgentOs(AgentOs):
Defaults to `True`.
install_dependencies (bool, optional): Whether to install system dependencies
(requires root permissions). Defaults to `False`.
download_dir (str | Path | None, optional): Directory into which files
downloaded by the browser are automatically copied once they finish.
When ``None``, downloads are left in Playwright's temporary location
(and deleted when the browser closes). The directory is created if it
does not exist. Defaults to `None`.
"""

_REPORTER_ROLE_NAME: str = "PlaywrightAgentOS"
Expand All @@ -58,13 +88,15 @@ def __init__(
slow_mo: int = 0,
install_browser: bool = True,
install_dependencies: bool = False,
download_dir: str | Path | None = None,
) -> None:
self._browser_type = browser_type
self._headless = headless
self._viewport_size = viewport_size
self._slow_mo = slow_mo
self._install_browser = install_browser
self._install_dependencies = install_dependencies
self._download_dir = Path(download_dir) if download_dir is not None else None

# Playwright objects
self._playwright: Playwright | None = None
Expand All @@ -77,6 +109,9 @@ def __init__(
self._listening = False
self._event_queue: list[InputEvent] = []

# Files copied into `download_dir`, in the order they finished
self._downloaded_files: list[Path] = []

def _install_playwright_browser(self) -> None:
"""Install Playwright browser if requested."""
if not self._install_browser:
Expand Down Expand Up @@ -162,13 +197,58 @@ def connect(self) -> None:
)

self._page = self._context.new_page()
self._page.on("download", self._on_download)
# Navigate to a blank page to ensure we have a working page
self._page.goto("data:text/html,<html><body><h1>Starting...</h1></body></html>")
self._reporter.add_message(
self._REPORTER_ROLE_NAME,
"Connected to playwright browser",
)

def _on_download(self, download: Download) -> None:
"""Copy a finished download into `download_dir`.

Registered as the page's ``download`` event handler. When `download_dir`
is configured, the file is saved there under its suggested filename
(auto-renamed on collision); otherwise the download is left untouched in
Playwright's temporary location. Failures are reported but never
propagated, so a failed download cannot break the automation run.

Args:
download (Download): The Playwright download to persist.
"""
if self._download_dir is None:
return
# Use only the filename component to avoid path traversal from a
# server-suggested name such as "../../etc/passwd".
suggested_name = Path(download.suggested_filename).name
target = _to_unique_path(self._download_dir / suggested_name)
try:
target.parent.mkdir(parents=True, exist_ok=True)
download.save_as(target)
except Exception as e: # noqa: BLE001 - never let a download break the run
self._reporter.add_message(
self._REPORTER_ROLE_NAME,
f"Failed to save download '{suggested_name}': {e}",
)
return
self._downloaded_files.append(target)
self._reporter.add_message(
self._REPORTER_ROLE_NAME,
f"Downloaded file saved to {target}",
)

@property
def downloaded_files(self) -> list[Path]:
"""Files copied into `download_dir`, in the order they finished.

Returns:
list[Path]: Absolute paths of downloads saved so far this session.
Empty when no `download_dir` was configured or nothing was
downloaded yet.
"""
return list(self._downloaded_files)

@override
def disconnect(self) -> None:
"""Terminates the connection to the browser."""
Expand Down
12 changes: 12 additions & 0 deletions src/askui/tools/store/web/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""Web-specific tools.
These tools require a `PlaywrightAgentOs` and are designed for use with
`WebVisionAgent`.
"""

from askui.tools.store.web.save_screenshot_tool import WebSaveScreenshotTool

__all__ = [
"WebSaveScreenshotTool",
]
92 changes: 92 additions & 0 deletions src/askui/tools/store/web/save_screenshot_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from pathlib import Path

from askui.models.shared import PlaywrightBaseTool


class WebSaveScreenshotTool(PlaywrightBaseTool):
"""
Tool for saving screenshots of the currently active web page to disk.

This tool captures a screenshot of the current browser page and saves
it to a specified location on the filesystem. The screenshot is saved as a PNG
image file. The directory structure will be created automatically if it doesn't
exist.

Args:
base_dir (str): The base directory path where screenshots will be saved.
All screenshot paths will be relative to this directory.

Example:
```python
from askui import WebVisionAgent
from askui.tools.store.web import WebSaveScreenshotTool

with WebVisionAgent() as agent:
agent.act(
"Take a screenshot and save it as demo/demo.png",
tools=[WebSaveScreenshotTool(base_dir="/path/to/screenshots")]
)
```

Example
```python
from askui import WebVisionAgent
from askui.tools.store.web import WebSaveScreenshotTool

with WebVisionAgent(
act_tools=[WebSaveScreenshotTool(base_dir="/path/to/screenshots")]
) as agent:
agent.act("Take a screenshot and save it as demo/demo.png")
"""

def __init__(self, base_dir: str) -> None:
super().__init__(
name="save_screenshot_tool",
description=(
"Saves a screenshot of the currently active web page "
"to disk as a PNG image file. The screenshot is captured from the "
"currently active browser page. The directory structure for the "
"specified path will be created automatically if it doesn't exist. "
"The PNG extension is automatically appended to the provided path."
),
input_schema={
"type": "object",
"properties": {
"image_path": {
"type": "string",
"description": (
"The relative path where the screenshot should be saved, "
"without the PNG extension. The path is relative to the "
"base directory specified during tool initialization. "
"For example, if base_dir is '/screenshots' and "
"image_path is 'test/my_screenshot', the file will be "
"saved as '/screenshots/test/my_screenshot.png'. "
"Subdirectories will be created automatically if needed."
),
},
},
"required": ["image_path"],
},
)
self._base_dir = base_dir
self.is_cacheable = True

def __call__(self, image_path: str) -> str:
"""
Save a screenshot of the current web page to disk.

Args:
image_path (str): The relative path where the screenshot should be saved,
without the PNG extension. The path is relative to the base directory
specified during tool initialization.

Returns:
str: A confirmation message indicating where the screenshot was saved,
including the full absolute path.
"""
absolute_image_path = Path(self._base_dir) / f"{image_path}.png"
absolute_image_path.parent.mkdir(parents=True, exist_ok=True)

image = self.agent_os.screenshot()
image.save(absolute_image_path, format="PNG")
return f"Screenshot of the current web page saved to {absolute_image_path}."
33 changes: 32 additions & 1 deletion src/askui/web_agent.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import warnings
from pathlib import Path

from pydantic import ConfigDict, validate_call

Expand Down Expand Up @@ -39,6 +40,35 @@


class WebAgent(Agent):
"""Web automation agent backed by a Playwright browser.

Args:
reporters (list[Reporter] | None, optional): Reporters used for reporting.
Defaults to `None`.
settings (AgentSettings | None, optional): Agent settings. Defaults to
`None`.
retry (Retry | None, optional): Retry strategy. Defaults to `None`.
act_tools (list[Tool] | None, optional): Additional tools made available
during `act()`. Defaults to `None`.
callbacks (list[ConversationCallback] | None, optional): Conversation
callbacks. Defaults to `None`.
truncation_strategy (TruncationStrategy | None, optional): Message history
truncation strategy. Defaults to `None`.
download_dir (str | Path | None, optional): Directory into which files
downloaded by the browser are automatically copied once they finish
(auto-renamed on filename collision). When `None`, downloads are left
in Playwright's temporary location and removed when the browser
closes. Defaults to `None`.

Example:
```python
from askui import WebAgent

with WebAgent(download_dir="~/Downloads/askui") as agent:
agent.act("Open example.com and download the sample PDF")
```
"""

@telemetry.record_call(
exclude={
"reporters",
Expand All @@ -57,9 +87,10 @@ def __init__(
act_tools: list[Tool] | None = None,
callbacks: list[ConversationCallback] | None = None,
truncation_strategy: TruncationStrategy | None = None,
download_dir: str | Path | None = None,
) -> None:
reporter = CompositeReporter(reporters=reporters)
self.os = PlaywrightAgentOs(reporter)
self.os = PlaywrightAgentOs(reporter, download_dir=download_dir)
super().__init__(
reporter=reporter,
retry=retry,
Expand Down
Empty file.
71 changes: 71 additions & 0 deletions tests/e2e/tools/playwright/test_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from pathlib import Path

import pytest

from askui.tools.playwright.agent_os import PlaywrightAgentOs

# A page with a link that downloads a small text file via a data URL. The
# ``download`` attribute makes the browser treat the navigation as a download
# and provides the suggested filename.
_DOWNLOAD_PAGE = (
'<a id="dl" download="sample.txt" href="data:text/plain,Hello%20AskUI">download</a>'
)


def _trigger_download(agent_os: PlaywrightAgentOs) -> None:
page = agent_os._page
assert page is not None
page.set_content(_DOWNLOAD_PAGE)
page.click("#dl")
# Give the download event time to fire and the file to be written.
page.wait_for_timeout(2000)


@pytest.mark.timeout(60)
def test_download_is_copied_into_download_dir(tmp_path: Path) -> None:
agent_os = PlaywrightAgentOs(
headless=True, install_browser=False, download_dir=tmp_path
)
agent_os.connect()
try:
_trigger_download(agent_os)
finally:
agent_os.disconnect()

saved = tmp_path / "sample.txt"
assert saved.exists()
assert saved.read_text(encoding="utf-8") == "Hello AskUI"
assert agent_os.downloaded_files == [saved]


@pytest.mark.timeout(60)
def test_colliding_downloads_are_auto_renamed(tmp_path: Path) -> None:
(tmp_path / "sample.txt").write_text("pre-existing", encoding="utf-8")

agent_os = PlaywrightAgentOs(
headless=True, install_browser=False, download_dir=tmp_path
)
agent_os.connect()
try:
_trigger_download(agent_os)
finally:
agent_os.disconnect()

renamed = tmp_path / "sample (1).txt"
assert renamed.exists()
assert renamed.read_text(encoding="utf-8") == "Hello AskUI"
# The pre-existing file is left untouched.
assert (tmp_path / "sample.txt").read_text(encoding="utf-8") == "pre-existing"


@pytest.mark.timeout(60)
def test_no_download_dir_leaves_files_in_temp(tmp_path: Path) -> None:
agent_os = PlaywrightAgentOs(headless=True, install_browser=False)
agent_os.connect()
try:
_trigger_download(agent_os)
finally:
agent_os.disconnect()

assert agent_os.downloaded_files == []
assert list(tmp_path.iterdir()) == []
Empty file.
Loading
Loading