diff --git a/examples/secrets.py b/examples/secrets.py new file mode 100644 index 00000000..0c86755b --- /dev/null +++ b/examples/secrets.py @@ -0,0 +1,100 @@ +"""Example demonstrating the secret scope. + +Secrets let the agent *use* sensitive values (e.g. type a password) while the value is +never sent to the LLM. The model only ever sees the placeholder +``<|secret|>NAME<|secret|>``; the real value is substituted into tool calls at execution +time. Literal values are also redacted from the LLM history, tool outputs and the cache. + +Two ways to provide a secret are shown: +1. Hardcoded value (handy for a quick demo only). +2. Read from an environment variable (recommended for real usage). + +Required environment variables (see .env): +- ASKUI_WORKSPACE_ID, ASKUI_TOKEN - for the default AskUI providers +- APP_PASSWORD - the example login password, read at runtime (see below) + +Set the secret in your shell before running (do NOT hardcode real secrets in code): + export APP_PASSWORD="my-real-password" + +Note: a secret typed into a *visible* field can still appear in screenshots sent to the +model; on-screen secrets cannot currently be hidden. +""" + +import logging +import os + +from askui import ComputerAgent, Secret + +logging.basicConfig( + level=logging.INFO, + format="[%(levelname)s] %(asctime)s %(pathname)s:%(lineno)d | %(message)s", +) +logger = logging.getLogger(__name__) + + +def secrets_from_env() -> list[Secret]: + """Build secrets by reading their values from environment variables. + + This is the recommended approach: keep real values out of source code and pass them + in from the environment. We only *read* env vars here (never set them in code). + """ + return [ + Secret( + name="password", + value=os.environ["APP_PASSWORD"], + description="the application login password", + ), + ] + + +def secrets_hardcoded() -> list[Secret]: + """Build secrets with hardcoded values. + + Convenient for a quick local demo, but never commit real secrets to source control. + """ + return [ + Secret( + name="password", + value="hunter2-demo-only", + description="the application login password", + ), + ] + + +def run_with_agent_level_secrets() -> None: + """Define secrets on the agent so they apply to every act()/type() call.""" + with ComputerAgent(secrets=secrets_from_env()) as agent: + # The agent emits the placeholder; the real value is typed at execution time. + agent.act("Log in as 'admin' using the password") + + # Deterministic typing also resolves the placeholder at the OS boundary. + agent.click("Password field") + agent.type("<|secret|>password<|secret|>") + + +def run_with_per_call_secrets() -> None: + """Provide secrets only for a single act() call (overrides agent-level on name).""" + with ComputerAgent() as agent: + agent.act( + "Enter the one-time PIN into the verification field", + secrets=[ + Secret( + name="pin", + value=os.environ.get("APP_OTP", "000000"), + description="6-digit one-time PIN", + ), + ], + ) + + +def run_with_hardcoded_secret() -> None: + """Quick demo using a hardcoded secret value (not for production).""" + with ComputerAgent(secrets=secrets_hardcoded()) as agent: + agent.act("Log in using the password") + + +if __name__ == "__main__": + # Pick the variant you want to try: + run_with_agent_level_secrets() + # run_with_per_call_secrets() + # run_with_hardcoded_secret() diff --git a/src/askui/__init__.py b/src/askui/__init__.py index 6f9c78f2..a4e33463 100644 --- a/src/askui/__init__.py +++ b/src/askui/__init__.py @@ -32,6 +32,7 @@ UrlImageSourceParam, ) from .models.exceptions import AutomationError +from .models.shared.secrets import Secret, SecretVault from .models.shared.settings import ( DEFAULT_GET_RESOLUTION, DEFAULT_LOCATE_RESOLUTION, @@ -103,6 +104,8 @@ "ResponseSchema", "ResponseSchemaBase", "Retry", + "Secret", + "SecretVault", "TextBlockParam", "TextCitationParam", "Tool", diff --git a/src/askui/agent_base.py b/src/askui/agent_base.py index 5775a9c0..45f39174 100644 --- a/src/askui/agent_base.py +++ b/src/askui/agent_base.py @@ -15,6 +15,7 @@ from askui.locators.locators import Locator from askui.models.shared.agent_message_param import MessageParam from askui.models.shared.conversation import Conversation, Speakers +from askui.models.shared.secrets import Secret, SecretVault from askui.models.shared.settings import ( ActSettings, CacheWritingSettings, @@ -61,6 +62,7 @@ def __init__( settings: AgentSettings | None = None, callbacks: list[ConversationCallback] | None = None, truncation_strategy: TruncationStrategy | None = None, + secrets: list[Secret] | None = None, ) -> None: load_dotenv() self._reporter: Reporter = reporter or CompositeReporter(reporters=None) @@ -68,6 +70,12 @@ def __init__( self._tools = tools or [] + # Secrets the agent may use but the LLM must never see. Real values are + # substituted into tool inputs at execution time; placeholders are all the + # model ever sees. Literal values are redacted from the LLM history and tool + # outputs. See `askui.models.shared.secrets`. + self._secret_vault = SecretVault(secrets) + # Store settings and model providers _settings = settings or AgentSettings() self._vlm_provider = _settings.vlm_provider @@ -117,7 +125,7 @@ def __init__( self.caching_settings = CachingSettings() @telemetry.record_call( - exclude={"goal", "act_settings", "tools", "tracing_settings"} + exclude={"goal", "act_settings", "tools", "tracing_settings", "secrets"} ) @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) def act( @@ -127,6 +135,7 @@ def act( tools: list[Tool] | ToolCollection | None = None, caching_settings: CachingSettings | None = None, tracing_settings: OtelSettings | None = None, + secrets: list[Secret] | None = None, ) -> None: """ Instructs the agent to achieve a specified goal through autonomous actions. @@ -154,6 +163,14 @@ def act( tracing_settings (OtelSettings | None, optional): The tracing settings for the act execution. Controls if and how traces are exported via Opentelemetry. + secrets (list[Secret] | None, optional): Secrets available for this act + execution, in addition to any defined on the agent. The model only ever + sees the placeholder `<|secret|>NAME<|secret|>`; the real value is + substituted into tool inputs at execution time and is never sent to the + model. Per-call secrets override agent-level + secrets with the same name. Defaults to `None`. Note: a secret typed + into a visible field may still appear in screenshots sent to the model; + on-screen secrets cannot currently be hidden. Returns: None @@ -228,18 +245,30 @@ def act( # Agent can use existing caches and will record new actions ``` """ + # Merge agent-level and per-call secrets (per-call wins on name collision). + active_vault = self._secret_vault.merge(SecretVault(secrets)) + goal_str = ( goal if isinstance(goal, str) else "\n".join(msg.model_dump_json() for msg in goal) ) - self._reporter.add_message("User", f'act: "{goal_str}"') + # Redact any literal secret value the user may have placed in the goal before + # it reaches the reporter/logs. + redacted_goal_str = active_vault.redact(goal_str) + self._reporter.add_message("User", f'act: "{redacted_goal_str}"') logger.debug( - "Agent received instruction to act towards the goal '%s'", goal_str + "Agent received instruction to act towards the goal '%s'", redacted_goal_str ) messages: list[MessageParam] = ( [MessageParam(role="user", content=goal)] if isinstance(goal, str) else goal ) + # Initial messages bypass Conversation._add_message, so redact them here to keep + # literal secrets out of the history sent to the LLM. + messages = [active_vault.redact_message(message) for message in messages] + # Make the vault available for substitution (tools) and redaction (history). + # The Conversation propagates it to the ToolCollection. + self._conversation.secret_vault = active_vault _act_settings = act_settings or self.act_settings _caching_settings: CachingSettings = caching_settings or self.caching_settings @@ -275,6 +304,20 @@ def _build_tools(self, tools: list[Tool] | ToolCollection | None) -> ToolCollect tool_collection += tools return tool_collection + def _resolve_secrets(self, text: str) -> str: + """Substitute `<|secret|>NAME<|secret|>` placeholders with real values. + + Used by deterministic input methods (e.g. `type`) so callers/agents can pass a + placeholder that resolves to the real value at the OS boundary. + """ + resolved: str = self._secret_vault.substitute(text) + return resolved + + def _redact_secrets(self, text: str) -> str: + """Redact literal secret values to their placeholders (for reporting/logs).""" + redacted: str = self._secret_vault.redact(text) + return redacted + def _patch_act_with_cache( self, caching_settings: CachingSettings, diff --git a/src/askui/android_agent.py b/src/askui/android_agent.py index 98b79143..df82c1cf 100644 --- a/src/askui/android_agent.py +++ b/src/askui/android_agent.py @@ -10,6 +10,7 @@ from askui.container import telemetry from askui.locators.locators import Locator from askui.models.models import Point +from askui.models.shared.secrets import Secret from askui.models.shared.settings import ActSettings, MessageSettings from askui.models.shared.tools import Tool from askui.models.shared.truncation_strategies import TruncationStrategy @@ -53,6 +54,7 @@ class AndroidAgent(Agent): settings (AgentSettings | None, optional): Provider-based model settings. If `None`, uses the default AskUI model stack. retry (Retry, optional): The retry instance to use for retrying failed actions. Defaults to `ConfigurableRetry` with exponential backoff. Currently only supported for `locate()` method. act_tools (list[Tool] | None, optional): Additional tools to make available for the `act()` method. + secrets (list[Secret] | None, optional): Sensitive values (e.g. passwords) the agent may use but the LLM must never see. The model only sees the placeholder `<|secret|>NAME<|secret|>`; the real value is substituted at execution time and kept out of the LLM prompt, reporter, logs and cache. Also usable in deterministic `type()` and overridable per call via `act(..., secrets=[...])`. Note: a secret typed into a visible field may still appear in screenshots sent to the model; on-screen secrets cannot currently be hidden. Example: ```python @@ -72,6 +74,7 @@ class AndroidAgent(Agent): "act_tools", "callbacks", "truncation_strategy", + "secrets", } ) @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) @@ -84,6 +87,7 @@ def __init__( act_tools: list[Tool] | None = None, callbacks: list[ConversationCallback] | None = None, truncation_strategy: TruncationStrategy | None = None, + secrets: list[Secret] | None = None, ) -> None: reporter = CompositeReporter(reporters=reporters) self.os = PpadbAgentOs(device_identifier=device, reporter=reporter) @@ -96,6 +100,7 @@ def __init__( settings=settings, callbacks=callbacks, truncation_strategy=truncation_strategy, + secrets=secrets, ) self.act_tool_collection.add_agent_os(self.act_agent_os_facade) # Override default act settings with Android-specific settings @@ -177,9 +182,10 @@ def type( agent.type("password123") # Types a password ``` """ - self._reporter.add_message("User", f'type: "{text}"') - logger.debug("AndroidAgent received instruction to type", extra={"text": text}) - self.os.type(text) + # Reporter sees the placeholder; the device receives the resolved value. + self._reporter.add_message("User", f'type: "{self._redact_secrets(text)}"') + logger.debug("AndroidAgent received instruction to type") + self.os.type(self._resolve_secrets(text)) @telemetry.record_call() @validate_call diff --git a/src/askui/computer_agent.py b/src/askui/computer_agent.py index ad0a6627..42f4cca0 100644 --- a/src/askui/computer_agent.py +++ b/src/askui/computer_agent.py @@ -10,6 +10,7 @@ from askui.container import telemetry from askui.locators.locators import Locator from askui.models.models import Point +from askui.models.shared.secrets import Secret from askui.models.shared.settings import ActSettings, LocateSettings, MessageSettings from askui.models.shared.tools import Tool from askui.models.shared.truncation_strategies import TruncationStrategy @@ -59,6 +60,13 @@ class ComputerAgent(Agent): act_tools (list[Tool] | None, optional): Additional tools to make available for the `act()` method for every call. Same tools can instead be passed per call via `act(..., tools=[...])` (see example below). + secrets (list[Secret] | None, optional): Sensitive values (e.g. passwords) the + agent may use but the LLM must never see. The model only sees the placeholder + `<|secret|>NAME<|secret|>`; the real value is substituted at execution time and is + kept out of the LLM prompt, reporter, logs and cache. Also usable in + deterministic `type()` and overridable per call via `act(..., secrets=[...])`. + Note: a secret typed into a visible field may still appear in screenshots sent + to the model; on-screen secrets cannot currently be hidden. Example: ```python @@ -99,6 +107,7 @@ class ComputerAgent(Agent): "act_tools", "callbacks", "truncation_strategy", + "secrets", } ) @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) @@ -112,6 +121,7 @@ def __init__( act_tools: list[Tool] | None = None, callbacks: list[ConversationCallback] | None = None, truncation_strategy: TruncationStrategy | None = None, + secrets: list[Secret] | None = None, ) -> None: reporter = CompositeReporter(reporters=reporters) self.tools = tools or AgentToolbox( @@ -128,6 +138,7 @@ def __init__( settings=settings, callbacks=callbacks, truncation_strategy=truncation_strategy, + secrets=secrets, ) self.act_agent_os_facade: ComputerAgentOsFacade = ComputerAgentOsFacade( self.tools.os @@ -320,7 +331,9 @@ def type( agent.type("text", locator="Input field", offset=(5, 0)) # Click 5 pixels right of "Input field", then type ``` """ - msg = f'type "{text}"' + # Reporter/logs see the placeholder; the OS receives the resolved value. + redacted_text = self._redact_secrets(text) + msg = f'type "{redacted_text}"' if locator is not None: msg += f" into {locator}" if clear: @@ -337,7 +350,7 @@ def type( ) logger.debug("Agent received instruction to %s", msg) self._reporter.add_message("User", msg) - self.tools.os.type(text) + self.tools.os.type(self._resolve_secrets(text)) @telemetry.record_call() @validate_call diff --git a/src/askui/models/shared/conversation.py b/src/askui/models/shared/conversation.py index ac6438fb..1a74b097 100644 --- a/src/askui/models/shared/conversation.py +++ b/src/askui/models/shared/conversation.py @@ -10,6 +10,7 @@ from askui.model_providers.image_qa_provider import ImageQAProvider from askui.model_providers.vlm_provider import VlmProvider from askui.models.shared.agent_message_param import MessageParam +from askui.models.shared.secrets import SecretVault from askui.models.shared.settings import ActSettings from askui.models.shared.tools import ToolCollection from askui.models.shared.truncation_strategies import ( @@ -91,6 +92,8 @@ def __init__( self.cache_manager = cache_manager self._callbacks: "list[ConversationCallback]" = callbacks or [] + self.secret_vault: SecretVault = SecretVault() + # State for current execution (set in start()) self.settings: ActSettings = ActSettings() self.tools: ToolCollection = ToolCollection() @@ -196,13 +199,18 @@ def _setup_control_loop( self._executed_from_cache = False self.speakers.reset_state() - # Store execution parameters - self.settings = settings or ActSettings() + # Store execution parameters. Deep-copy settings so per-call mutations + # (speaker handoff + secret sections appended to the system prompt) do not + # accumulate on the Agent's persistent, reused settings object across calls. + self.settings = (settings or ActSettings()).model_copy(deep=True) self.tools = tools or ToolCollection() + self.tools.secret_vault = self.secret_vault self._reporters = reporters or [] # Auto-populate speaker descriptions and switch_speaker tool self._setup_speaker_handoff() + # Advertise available secret placeholders to the model + self._setup_secrets() @tracer.start_as_current_span("_execute_control_loop") def _execute_control_loop(self) -> None: @@ -261,6 +269,24 @@ def _setup_speaker_handoff(self) -> None: switch_tool = SwitchSpeakerTool(speaker_names=handoff_speakers) self.tools.append_tool(switch_tool) + def _setup_secrets(self) -> None: + """Advertise available secret placeholders to the model. + + Appends an ```` section to ``system_capabilities`` so the + model knows which ``<|secret|>NAME<|secret|>`` placeholders it may use. No-op + when no + secrets are registered or no system prompt is set. Mirrors + ``_setup_speaker_handoff``'s ```` injection. + """ + if not self.secret_vault: + return + section = self.secret_vault.system_prompt_section() + if not section or self.settings.messages.system is None: + return + has_capabilities = self.settings.messages.system.system_capabilities + separator = "\n\n" if has_capabilities else "" + self.settings.messages.system.system_capabilities += f"{separator}{section}" + def _build_speaker_descriptions(self) -> str: """Build formatted speaker descriptions for the system prompt. @@ -368,9 +394,17 @@ def _execute_tools_if_present(self, message: MessageParam) -> MessageParam | Non def _add_message(self, message: MessageParam) -> None: """Add message to conversation history. + Redacts literal secret values from the message before it reaches the reporter, + truncation strategy (LLM history) or cache recording. This is the single point + every speaker-produced message and tool result flows through. + Args: message: Message to add """ + # Defense-in-depth: scrub any literal secret value that slipped into content + # (e.g. a tool echoing typed text) before it leaves the trusted boundary. + message = self.secret_vault.redact_message(message) + # Report to reporter self._reporter.add_message( self.current_speaker.name, message.model_dump(mode="json") diff --git a/src/askui/models/shared/secrets.py b/src/askui/models/shared/secrets.py new file mode 100644 index 00000000..83b4d488 --- /dev/null +++ b/src/askui/models/shared/secrets.py @@ -0,0 +1,260 @@ +"""Secret scope — values the agent may use but the LLM must never see. + +Users register secrets by name. The LLM only ever sees placeholders of the form +``<|secret|>NAME<|secret|>``. Real values are substituted only at tool-execution time +(deepest point before the OS call), and a redaction safety-net scrubs any literal +secret value from anything that leaves the trusted boundary (LLM prompt/history, +reporter, logs, cache files). + +Two operations: +- ``substitute`` (placeholder -> value): applied to a copy of tool input immediately + before a tool runs. Conversation history keeps the placeholder. +- ``redact`` / ``redact_message`` (value -> placeholder): defense-in-depth applied + before content reaches the LLM, reporter, logs or cache. + +Limitations (best-effort): +- **Screenshots are NOT protected.** A secret typed into a visible field can appear in + subsequent screenshots sent to the model (and in `get()`/OCR over such a screen). + On-screen secrets cannot currently be hidden — only text fed to the model/reporter/ + logs/cache is scrubbed. +- Redaction is exact-substring only — transformed forms (base64/url-encoded/partial) + are not caught. +- Very short values (< 4 chars) are not redacted to avoid over-redacting unrelated text + (placeholder usage remains the primary path). +""" + +import logging +import re +from typing import Any + +from pydantic import BaseModel, Field, SecretStr + +from askui.models.shared.agent_message_param import ( + ContentBlockParam, + MessageParam, + TextBlockParam, + ToolResultBlockParam, + ToolUseBlockParam, +) + +logger = logging.getLogger(__name__) + +_PLACEHOLDER_PREFIX = "<|secret|>" +_PLACEHOLDER_SUFFIX = "<|secret|>" +# Match `<|secret|>NAME<|secret|>`; non-greedy so adjacent placeholders are matched +# individually. The delimiters make any (non-empty) name unambiguous, so no charset +# restriction on names is required. +_PLACEHOLDER_PATTERN = re.compile( + re.escape(_PLACEHOLDER_PREFIX) + r"(.+?)" + re.escape(_PLACEHOLDER_SUFFIX) +) +# Values shorter than this are not redacted (the placeholder path still works); short +# values risk over-redacting unrelated occurrences in normal text. +_MIN_REDACTION_LENGTH = 4 + + +def _placeholder_for(name: str) -> str: + """Build the placeholder string for a secret ``name``.""" + return f"{_PLACEHOLDER_PREFIX}{name}{_PLACEHOLDER_SUFFIX}" + + +class Secret(BaseModel): + """A named secret value the agent may use but the LLM must never see. + + The agent references the secret in tool calls via its placeholder + (`<|secret|>NAME<|secret|>`); the real value is substituted at execution time. + + Args: + name (str): Identifier used in the placeholder `<|secret|>NAME<|secret|>`. Must + be non-empty. + value (str | SecretStr): The sensitive value. Accepts a plain `str` (wrapped + automatically) or a `SecretStr`; stored as a `SecretStr` so it is masked in + reprs, logs and `model_dump()`/`model_dump_json()`. Substituted into tool + calls at execution time; never sent to the model. Read the real value via + `secret.value.get_secret_value()` only where you actually need it. + description (str, optional): Human-readable hint shown to the model so it knows + what the placeholder is for (e.g. `"the user's login password"`). The + description itself IS sent to the model, so it must not contain the secret. + Defaults to `""`. + + Note: + The real value is kept out of the model prompt, reporter, logs and cache, but a + secret typed into a **visible** field can still appear in subsequent screenshots + sent to the model. On-screen secrets cannot currently be hidden. + + Example: + ```python + from askui import ComputerAgent, Secret + + with ComputerAgent( + secrets=[Secret(name="password", value="hunter2")] + ) as agent: + agent.act("Log in as admin using the password") + ``` + """ + + name: str = Field(min_length=1) + value: SecretStr + description: str = "" + + @property + def placeholder(self) -> str: + """The placeholder string the LLM uses to reference this secret.""" + return _placeholder_for(self.name) + + +class SecretVault: + """Holds registered secrets and performs substitution and redaction. + + Real secret values live only here. See the module docstring for the trust model. + + Args: + secrets (list[Secret] | None, optional): Secrets to register. Later entries win + on name collision. Defaults to `None` (empty vault). + """ + + def __init__(self, secrets: list[Secret] | None = None) -> None: + self._secrets: dict[str, Secret] = { + secret.name: secret for secret in (secrets or []) + } + + def __bool__(self) -> bool: + return bool(self._secrets) + + @property + def names(self) -> list[str]: + """Names of all registered secrets.""" + return list(self._secrets.keys()) + + @property + def secrets(self) -> list[Secret]: + """All registered secrets.""" + return list(self._secrets.values()) + + def merge(self, other: "SecretVault") -> "SecretVault": + """Return a new vault combining this vault with `other`. + + Secrets in `other` take precedence on name collision. + """ + return SecretVault(self.secrets + other.secrets) + + def substitute(self, obj: Any) -> Any: + """Recursively replace `<|secret|>NAME<|secret|>` placeholders with real values. + + Returns a new object; the input is not mutated. Unknown placeholder names are + left intact. + """ + if not self._secrets: + return obj + if isinstance(obj, str): + return self._substitute_str(obj) + if isinstance(obj, dict): + return {key: self.substitute(value) for key, value in obj.items()} + if isinstance(obj, list): + return [self.substitute(item) for item in obj] + if isinstance(obj, tuple): + return tuple(self.substitute(item) for item in obj) + return obj + + def _substitute_str(self, text: str) -> str: + def _replace(match: "re.Match[str]") -> str: + # Tolerate stray whitespace the model may add inside the delimiters + # (e.g. ``<|secret|> password <|secret|>``). + name = match.group(1).strip() + secret = self._secrets.get(name) + if secret is None: + logger.debug("Unknown secret placeholder '%s' left unresolved", name) + return match.group(0) + return secret.value.get_secret_value() + + return _PLACEHOLDER_PATTERN.sub(_replace, text) + + def redact(self, obj: Any) -> Any: + """Recursively replace literal secret values with their placeholders. + + Emits a warning (naming the secret, never its value) whenever a literal value is + found and replaced. Returns a new object; the input is not mutated. + """ + if not self._secrets: + return obj + if isinstance(obj, str): + return self._redact_str(obj) + if isinstance(obj, dict): + return {key: self.redact(value) for key, value in obj.items()} + if isinstance(obj, list): + return [self.redact(item) for item in obj] + if isinstance(obj, tuple): + return tuple(self.redact(item) for item in obj) + return obj + + def _redact_str(self, text: str) -> str: + result = text + for secret in self._secrets.values(): + value = secret.value.get_secret_value() + if len(value) < _MIN_REDACTION_LENGTH: + continue + if value in result: + result = result.replace(value, secret.placeholder) + logger.warning( + "Redacted secret '%s' from content before it left the trusted " + "boundary. Reference secrets via their placeholder '%s' instead of " + "embedding the value in goals or tool outputs.", + secret.name, + secret.placeholder, + ) + return result + + def redact_message(self, message: MessageParam) -> MessageParam: + """Return a copy of `message` with all text-bearing fields redacted.""" + if not self._secrets: + return message + redacted = message.model_copy(deep=True) + redacted.content = self.redact_content(redacted.content) + return redacted + + def redact_content( + self, content: "str | list[ContentBlockParam] | list[Any]" + ) -> Any: + """Redact a message/tool-result content (str or list of content blocks). + + Used to scrub tool outputs (`ToolResultBlockParam.content`) before they are + fed back to the model or recorded, in addition to `redact_message`. + """ + if not self._secrets: + return content + if isinstance(content, str): + return self._redact_str(content) + return [self._redact_block(block) for block in content] + + def _redact_block(self, block: Any) -> Any: + if isinstance(block, TextBlockParam): + block.text = self._redact_str(block.text) + elif isinstance(block, ToolResultBlockParam): + block.content = self.redact_content(block.content) + elif isinstance(block, ToolUseBlockParam): + block.input = self.redact(block.input) + return block + + def system_prompt_section(self) -> str: + """Build the `` system-prompt block (``""`` if empty).""" + if not self._secrets: + return "" + lines = [ + f"- {secret.placeholder}" + + (f" — {secret.description}" if secret.description else "") + for secret in self._secrets.values() + ] + listing = "\n".join(lines) + example = next(iter(self._secrets.values())).placeholder + return ( + "\n" + "The following secret placeholders are available. When you need to enter a " + "sensitive value (e.g. a password), use the EXACT placeholder string shown " + "below as the value (for example, as the text to type). The real value is " + "substituted securely at execution time and is hidden from you. " + "NEVER guess, invent, or ask for the actual value, and never write it out." + "\n\n" + f"{listing}\n\n" + "Example — to enter the first secret with a typing tool, pass the text " + f"exactly as: {example}\n" + "" + ) diff --git a/src/askui/models/shared/tools.py b/src/askui/models/shared/tools.py index 74912911..72ecbd5a 100644 --- a/src/askui/models/shared/tools.py +++ b/src/askui/models/shared/tools.py @@ -31,6 +31,7 @@ ToolResultBlockParam, ToolUseBlockParam, ) +from askui.models.shared.secrets import SecretVault from askui.tools import AgentOs from askui.tools.android.agent_os import AndroidAgentOs from askui.utils.image_utils import ImageSource, base64_to_image @@ -461,15 +462,26 @@ def __init__( mcp_client: McpClientProtocol | None = None, include: set[str] | None = None, agent_os_list: list[AgentOs | AndroidAgentOs] | None = None, + secret_vault: SecretVault | None = None, ) -> None: self._mcp_client = mcp_client self._include = include self._agent_os_list: list[AgentOs | AndroidAgentOs] = [] self._tools: list[Tool] = tools or [] + self._secret_vault: SecretVault = secret_vault or SecretVault() if agent_os_list: for agent_os in agent_os_list: self.add_agent_os(agent_os) + @property + def secret_vault(self) -> SecretVault: + """The secret vault used to substitute placeholders in tool inputs.""" + return self._secret_vault + + @secret_vault.setter + def secret_vault(self, secret_vault: SecretVault) -> None: + self._secret_vault = secret_vault + def add_agent_os(self, agent_os: AgentOs | AndroidAgentOs) -> None: """Add an agent OS to the collection. @@ -641,9 +653,14 @@ def _run_regular_tool( tool: Tool, ) -> ToolResultBlockParam: try: - tool_result: ToolCallResult = tool(**tool_use_block_param.input) # type: ignore + tool_input = self._secret_vault.substitute(tool_use_block_param.input) + tool_result: ToolCallResult = tool(**tool_input) + # Redact secret values that a tool may echo back in its output, so they do + # not leak into the conversation history / model. return ToolResultBlockParam( - content=_convert_to_content(tool_result), + content=self._secret_vault.redact_content( + _convert_to_content(tool_result) + ), tool_use_id=tool_use_block_param.id, ) except (AgentError, AutomationError): @@ -656,7 +673,9 @@ def _run_regular_tool( ) logger.info("%s - %s", tool_use_block_param.name, error_message) return ToolResultBlockParam( - content=f"Tool raised an unexpected error: {error_message}", + content=self._secret_vault.redact( + f"Tool raised an unexpected error: {error_message}" + ), is_error=True, tool_use_id=tool_use_block_param.id, ) @@ -667,9 +686,10 @@ async def _call_mcp_tool( tool_use_block_param: ToolUseBlockParam, ) -> ToolCallResult: async with mcp_client: + tool_input = self._secret_vault.substitute(tool_use_block_param.input) return await mcp_client.call_tool( tool_use_block_param.name, - tool_use_block_param.input, # type: ignore[arg-type] + tool_input, ) def _run_mcp_tool( @@ -686,8 +706,9 @@ def _run_mcp_tool( try: call_mcp_tool_sync = syncify(self._call_mcp_tool, raise_sync_error=False) result = call_mcp_tool_sync(self._mcp_client, tool_use_block_param) + # Redact secret values an MCP tool may echo back in its output. return ToolResultBlockParam( - content=_convert_to_content(result), + content=self._secret_vault.redact_content(_convert_to_content(result)), tool_use_id=tool_use_block_param.id, ) except AutomationError: @@ -704,7 +725,7 @@ def _run_mcp_tool( e, ) return ToolResultBlockParam( - content=str(e), + content=self._secret_vault.redact(str(e)), is_error=True, tool_use_id=tool_use_block_param.id, ) @@ -714,4 +735,5 @@ def __add__(self, other: "ToolCollection") -> "ToolCollection": tools=self._tools + other._tools, mcp_client=other._mcp_client or self._mcp_client, agent_os_list=self._agent_os_list + other._agent_os_list, + secret_vault=other._secret_vault or self._secret_vault, ) diff --git a/src/askui/multi_device_agent.py b/src/askui/multi_device_agent.py index 924c1e50..3632c6fa 100644 --- a/src/askui/multi_device_agent.py +++ b/src/askui/multi_device_agent.py @@ -7,6 +7,7 @@ from askui.android_agent import AndroidAgent from askui.computer_agent import ComputerAgent from askui.locators.locators import Locator +from askui.models.shared.secrets import Secret from askui.models.shared.settings import GetSettings, LocateSettings from askui.models.shared.tools import Tool from askui.models.types.geometry import Point @@ -31,6 +32,10 @@ class MultiDeviceAgent(Agent): act_tools (list[Tool] | None, optional): Additional tools for `act()`. android_device_sn (str | None, optional): Android device serial number to select on open. + secrets (list[Secret] | None, optional): Sensitive values the agent may use but + the LLM must never see. Applied to `act()` and to the composed + `computer`/`android` agents. The model only sees the placeholder + `<|secret|>NAME<|secret|>`; the real value is substituted at execution time. Example: ```python @@ -51,6 +56,7 @@ def __init__( retry: Retry | None = None, act_tools: list[Tool] | None = None, settings: AgentSettings | None = None, + secrets: list[Secret] | None = None, ) -> None: reporter = CompositeReporter(reporters=reporters) @@ -59,13 +65,16 @@ def __init__( reporter=reporter, retry=retry, settings=settings, + secrets=secrets, ) - # Initialize the computer agent + # Initialize the computer agent (secrets also passed so that deterministic + # `agent.computer.*` calls resolve placeholders too). self._computer_agent: ComputerAgent = ComputerAgent( display=desktop_display, reporters=[reporter], settings=settings, + secrets=secrets, ) # Initialize the Android agent @@ -73,6 +82,7 @@ def __init__( device=android_device_sn, reporters=[reporter], settings=settings, + secrets=secrets, ) # Combine the tool collections of the computer and Android agents diff --git a/src/askui/tools/android/tools.py b/src/askui/tools/android/tools.py index c85c22ea..b849a927 100644 --- a/src/askui/tools/android/tools.py +++ b/src/askui/tools/android/tools.py @@ -118,6 +118,9 @@ def __init__(self, agent_os: AndroidAgentOsFacade | None = None) -> None: """ Types the given text on the Android device screen. The to typed text can not contains non ASCII printable characters. + To enter a secret/sensitive value, pass its placeholder + `<|secret|>NAME<|secret|>` as the text; the real value is substituted + securely at runtime and is hidden from you. """ ), input_schema={ diff --git a/src/askui/tools/computer/type_tool.py b/src/askui/tools/computer/type_tool.py index c2b11741..ace3a612 100644 --- a/src/askui/tools/computer/type_tool.py +++ b/src/askui/tools/computer/type_tool.py @@ -8,7 +8,11 @@ class ComputerTypeTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( name="type", - description="Type text on the computer.", + description=( + "Type text on the computer. To enter a secret/sensitive value, pass " + "its placeholder `<|secret|>NAME<|secret|>` as the text; the real " + "value is substituted securely at runtime and is hidden from you." + ), input_schema={ "type": "object", "properties": { diff --git a/src/askui/tools/playwright/tools.py b/src/askui/tools/playwright/tools.py index ba08f273..b7d52141 100644 --- a/src/askui/tools/playwright/tools.py +++ b/src/askui/tools/playwright/tools.py @@ -235,7 +235,11 @@ class PlaywrightTypeTool(PlaywrightBaseTool): def __init__(self, agent_os: PlaywrightAgentOs | None = None) -> None: super().__init__( name="type", - description="Type text in the browser page.", + description=( + "Type text in the browser page. To enter a secret/sensitive value, " + "pass its placeholder `<|secret|>NAME<|secret|>` as the text; the real " + "value is substituted securely at runtime and is hidden from you." + ), input_schema={ "type": "object", "properties": { diff --git a/src/askui/web_agent.py b/src/askui/web_agent.py index fe47c5f9..a82bac5e 100644 --- a/src/askui/web_agent.py +++ b/src/askui/web_agent.py @@ -6,6 +6,7 @@ from askui.agent_settings import AgentSettings from askui.callbacks import ConversationCallback from askui.container import telemetry +from askui.models.shared.secrets import Secret from askui.models.shared.settings import ( ActSettings, MessageSettings, @@ -46,6 +47,7 @@ class WebAgent(Agent): "act_tools", "callbacks", "truncation_strategy", + "secrets", } ) @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) @@ -57,6 +59,7 @@ def __init__( act_tools: list[Tool] | None = None, callbacks: list[ConversationCallback] | None = None, truncation_strategy: TruncationStrategy | None = None, + secrets: list[Secret] | None = None, ) -> None: reporter = CompositeReporter(reporters=reporters) self.os = PlaywrightAgentOs(reporter) @@ -69,6 +72,7 @@ def __init__( settings=settings, callbacks=callbacks, truncation_strategy=truncation_strategy, + secrets=secrets, ) self.act_tool_collection.add_agent_os(self.act_agent_os_facade) self.act_settings = ActSettings( diff --git a/src/askui/web_testing_agent.py b/src/askui/web_testing_agent.py index f155bb2a..17336c52 100644 --- a/src/askui/web_testing_agent.py +++ b/src/askui/web_testing_agent.py @@ -3,6 +3,7 @@ from pydantic import ConfigDict, validate_call from askui.agent_settings import AgentSettings +from askui.models.shared.secrets import Secret from askui.models.shared.settings import ( ActSettings, MessageSettings, @@ -42,6 +43,7 @@ def __init__( reporters: list[Reporter] | None = None, settings: AgentSettings | None = None, retry: Retry | None = None, + secrets: list[Secret] | None = None, ) -> None: base_dir = Path.cwd() / "chat" / "testing" base_dir.mkdir(parents=True, exist_ok=True) @@ -49,6 +51,7 @@ def __init__( reporters=reporters, settings=settings, retry=retry, + secrets=secrets, act_tools=[ CreateFeatureTool(base_dir), RetrieveFeatureTool(base_dir), diff --git a/tests/unit/test_secrets.py b/tests/unit/test_secrets.py new file mode 100644 index 00000000..bc42d440 --- /dev/null +++ b/tests/unit/test_secrets.py @@ -0,0 +1,243 @@ +"""Unit tests for the secret scope (`Secret` / `SecretVault`).""" + +import logging + +import pytest +from pydantic import SecretStr, ValidationError +from typing_extensions import override + +from askui import Secret, SecretVault +from askui.models.shared.agent_message_param import ( + MessageParam, + TextBlockParam, + ToolResultBlockParam, + ToolUseBlockParam, +) +from askui.models.shared.tools import Tool, ToolCallResult, ToolCollection + + +class _EchoTool(Tool): + """Tool that echoes its input back (to exercise output redaction).""" + + @override + def __call__(self, text: str = "") -> ToolCallResult: + return f"you typed: {text}" + + +class TestSecret: + def test_accepts_plain_str_and_stores_as_secret_str(self) -> None: + secret = Secret(name="password", value="hunter2value") + assert isinstance(secret.value, SecretStr) + assert secret.value.get_secret_value() == "hunter2value" + + def test_accepts_secret_str(self) -> None: + secret = Secret(name="password", value=SecretStr("hunter2value")) + assert isinstance(secret.value, SecretStr) + assert secret.value.get_secret_value() == "hunter2value" + + def test_value_is_masked_in_repr_and_dump(self) -> None: + secret = Secret(name="password", value="hunter2value") + assert "hunter2value" not in repr(secret) + assert "hunter2value" not in secret.model_dump_json() + + def test_placeholder(self) -> None: + placeholder = "<|secret|>password<|secret|>" + assert Secret(name="password", value="x").placeholder == placeholder + + def test_empty_name_raises(self) -> None: + with pytest.raises(ValidationError): + Secret(name="", value="x") + + @pytest.mark.parametrize( + "name", ["password", "PIN_2", "token0", "with space", "with-dash"] + ) + def test_name_has_no_charset_restriction(self, name: str) -> None: + assert Secret(name=name, value="x").name == name + + +class TestSubstitute: + def test_replaces_placeholder_in_str(self) -> None: + vault = SecretVault([Secret(name="password", value="hunter2")]) + assert vault.substitute("<|secret|>password<|secret|>") == "hunter2" + + def test_replaces_within_surrounding_text(self) -> None: + vault = SecretVault([Secret(name="password", value="hunter2")]) + assert ( + vault.substitute("pw is <|secret|>password<|secret|>!") == "pw is hunter2!" + ) + + def test_walks_nested_structures(self) -> None: + vault = SecretVault([Secret(name="password", value="hunter2")]) + result = vault.substitute( + { + "text": "<|secret|>password<|secret|>", + "items": ["<|secret|>password<|secret|>", 1], + } + ) + assert result == {"text": "hunter2", "items": ["hunter2", 1]} + + def test_tolerates_whitespace_in_placeholder_name(self) -> None: + vault = SecretVault([Secret(name="password", value="hunter2")]) + assert vault.substitute("<|secret|> password <|secret|>") == "hunter2" + + def test_unknown_placeholder_left_intact(self) -> None: + vault = SecretVault([Secret(name="password", value="hunter2")]) + unknown = "<|secret|>unknown<|secret|>" + assert vault.substitute(unknown) == unknown + + def test_empty_vault_is_noop_same_object(self) -> None: + vault = SecretVault() + obj = {"a": "b"} + assert vault.substitute(obj) is obj + + def test_does_not_mutate_input(self) -> None: + vault = SecretVault([Secret(name="password", value="hunter2")]) + original = {"text": "<|secret|>password<|secret|>"} + vault.substitute(original) + assert original == {"text": "<|secret|>password<|secret|>"} + + +class TestRedact: + def test_replaces_literal_value_with_placeholder(self) -> None: + vault = SecretVault([Secret(name="password", value="hunter2")]) + assert vault.redact("typed hunter2") == "typed <|secret|>password<|secret|>" + + def test_warns_when_redacting(self, caplog: pytest.LogCaptureFixture) -> None: + vault = SecretVault([Secret(name="password", value="hunter2")]) + with caplog.at_level(logging.WARNING): + vault.redact("typed hunter2") + assert "Redacted secret 'password'" in caplog.text + # The value itself must never be logged. + assert "hunter2" not in caplog.text + + def test_short_values_not_redacted(self) -> None: + vault = SecretVault([Secret(name="pin", value="12")]) + assert vault.redact("code 12 shown") == "code 12 shown" + + def test_walks_nested_structures(self) -> None: + vault = SecretVault([Secret(name="password", value="hunter2")]) + result = vault.redact(["typed hunter2", {"k": "hunter2"}]) + assert result == [ + "typed <|secret|>password<|secret|>", + {"k": "<|secret|>password<|secret|>"}, + ] + + +class TestRedactMessage: + def test_redacts_str_content(self) -> None: + vault = SecretVault([Secret(name="password", value="hunter2")]) + msg = MessageParam(role="user", content="login with hunter2") + assert ( + vault.redact_message(msg).content + == "login with <|secret|>password<|secret|>" + ) + + def test_redacts_text_and_tool_result_blocks(self) -> None: + vault = SecretVault([Secret(name="password", value="hunter2")]) + msg = MessageParam( + role="user", + content=[ + TextBlockParam(text="value hunter2"), + ToolResultBlockParam( + tool_use_id="t1", + content=[TextBlockParam(text="Typed hunter2")], + ), + ToolResultBlockParam(tool_use_id="t2", content="echo hunter2"), + ], + ) + redacted = vault.redact_message(msg) + assert isinstance(redacted.content, list) + text_block, tool_result_block, echo_block = redacted.content + assert isinstance(text_block, TextBlockParam) + assert text_block.text == "value <|secret|>password<|secret|>" + assert isinstance(tool_result_block, ToolResultBlockParam) + assert isinstance(tool_result_block.content, list) + inner_block = tool_result_block.content[0] + assert isinstance(inner_block, TextBlockParam) + assert inner_block.text == "Typed <|secret|>password<|secret|>" + assert isinstance(echo_block, ToolResultBlockParam) + assert echo_block.content == "echo <|secret|>password<|secret|>" + + def test_redacts_tool_use_input(self) -> None: + vault = SecretVault([Secret(name="password", value="hunter2")]) + msg = MessageParam( + role="assistant", + content=[ + ToolUseBlockParam(id="t1", name="type", input={"text": "hunter2"}) + ], + ) + redacted = vault.redact_message(msg) + assert isinstance(redacted.content, list) + tool_use_block = redacted.content[0] + assert isinstance(tool_use_block, ToolUseBlockParam) + assert tool_use_block.input == {"text": "<|secret|>password<|secret|>"} + + def test_does_not_mutate_original_message(self) -> None: + vault = SecretVault([Secret(name="password", value="hunter2")]) + msg = MessageParam(role="user", content="hunter2 here") + vault.redact_message(msg) + assert msg.content == "hunter2 here" + + +class TestMergeAndSystemPrompt: + def test_merge_precedence_and_names(self) -> None: + base = SecretVault([Secret(name="password", value="old")]) + override = SecretVault( + [Secret(name="password", value="new"), Secret(name="pin", value="1234")] + ) + merged = base.merge(override) + assert set(merged.names) == {"password", "pin"} + assert merged.substitute("<|secret|>password<|secret|>") == "new" + + def test_system_prompt_section_lists_placeholders(self) -> None: + vault = SecretVault( + [Secret(name="password", value="hunter2", description="login password")] + ) + section = vault.system_prompt_section() + assert "" in section + assert "<|secret|>password<|secret|>" in section + assert "login password" in section + assert "Example" in section + assert "hunter2" not in section + + def test_system_prompt_section_empty_vault(self) -> None: + assert SecretVault().system_prompt_section() == "" + + def test_bool(self) -> None: + assert not SecretVault() + assert SecretVault([Secret(name="a", value="bbbb")]) + + +class TestToolOutputRedaction: + def _echo_tool(self) -> _EchoTool: + return _EchoTool( + name="echo", + description="echo text", + input_schema={ + "type": "object", + "properties": {"text": {"type": "string"}}, + "required": ["text"], + }, + ) + + def test_tool_output_echoing_secret_is_redacted(self) -> None: + tool = self._echo_tool() + tools = ToolCollection( + tools=[tool], + secret_vault=SecretVault([Secret(name="password", value="hunter2xyz")]), + ) + block = ToolUseBlockParam( + id="t1", name=tool.name, input={"text": "<|secret|>password<|secret|>"} + ) + + results = tools.run([block]) + + # The tool received the decoded value (substitution) and echoed it, but the + # output is redacted before it becomes a tool result. + result = results[0] + assert isinstance(result, ToolResultBlockParam) + assert isinstance(result.content, list) + text_block = result.content[0] + assert isinstance(text_block, TextBlockParam) + assert "hunter2xyz" not in text_block.text + assert text_block.text == "you typed: <|secret|>password<|secret|>"