diff --git a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py index cf8b3c562ae..269b0902bf5 100644 --- a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py +++ b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py @@ -689,6 +689,11 @@ def _parse_usage(self, usage: dict[str, Any] | None) -> UsageDetails | None: details["output_token_count"] = output_tokens if (total_tokens := usage.get("totalTokens")) is not None: details["total_token_count"] = total_tokens + # Bedrock Converse reports these when prompt caching is active. + if (cache_read := usage.get("cacheReadInputTokens")) is not None: + details["cache_read_input_token_count"] = cache_read + if (cache_write := usage.get("cacheWriteInputTokens")) is not None: + details["cache_creation_input_token_count"] = cache_write return details def _parse_message_contents(self, content_blocks: Sequence[dict[str, Any]]) -> list[Any]: diff --git a/python/packages/bedrock/tests/test_bedrock_client.py b/python/packages/bedrock/tests/test_bedrock_client.py index 9e1b42ea251..839dd4371b9 100644 --- a/python/packages/bedrock/tests/test_bedrock_client.py +++ b/python/packages/bedrock/tests/test_bedrock_client.py @@ -169,3 +169,21 @@ def test_prepare_options_tool_choice_required_without_tools_raises() -> None: with pytest.raises(ValueError, match="tool_choice='required' requires at least one tool"): client._prepare_options(messages, options) + + +def test_parse_usage_surfaces_cache_tokens() -> None: + """Bedrock Converse reports cache token counts when prompt caching is used.""" + client = _make_client() + + details = client._parse_usage({ + "inputTokens": 10, + "outputTokens": 5, + "totalTokens": 15, + "cacheReadInputTokens": 8, + "cacheWriteInputTokens": 3, + }) + + assert details is not None + assert details["input_token_count"] == 10 + assert details["cache_read_input_token_count"] == 8 + assert details["cache_creation_input_token_count"] == 3 diff --git a/python/packages/gemini/agent_framework_gemini/_chat_client.py b/python/packages/gemini/agent_framework_gemini/_chat_client.py index fee44ee368c..f1aa36a9e7c 100644 --- a/python/packages/gemini/agent_framework_gemini/_chat_client.py +++ b/python/packages/gemini/agent_framework_gemini/_chat_client.py @@ -1051,6 +1051,10 @@ def _parse_usage(self, usage: types.GenerateContentResponseUsageMetadata | None) details["output_token_count"] = v if (v := usage.total_token_count) is not None: details["total_token_count"] = v + if (v := usage.cached_content_token_count) is not None: + details["cache_read_input_token_count"] = v + if (v := usage.thoughts_token_count) is not None: + details["reasoning_output_token_count"] = v return details or None def _map_finish_reason(self, reason: str | None) -> FinishReasonLiteral | None: diff --git a/python/packages/gemini/tests/test_gemini_client.py b/python/packages/gemini/tests/test_gemini_client.py index 32da66f56fa..c85d1bbec3b 100644 --- a/python/packages/gemini/tests/test_gemini_client.py +++ b/python/packages/gemini/tests/test_gemini_client.py @@ -93,6 +93,8 @@ def _make_response( prompt_tokens: int | None = 10, output_tokens: int | None = 5, total_tokens: int | None = 15, + cached_tokens: int | None = None, + thoughts_tokens: int | None = None, ) -> MagicMock: """Build a mock types.GenerateContentResponse.""" response = MagicMock() @@ -113,6 +115,8 @@ def _make_response( usage.prompt_token_count = prompt_tokens usage.candidates_token_count = output_tokens usage.total_token_count = total_tokens + usage.cached_content_token_count = cached_tokens + usage.thoughts_token_count = thoughts_tokens response.usage_metadata = usage else: response.usage_metadata = None @@ -374,6 +378,27 @@ async def test_get_response_usage_details() -> None: assert response.usage_details["total_token_count"] == 28 +async def test_get_response_usage_details_includes_cached_and_reasoning_tokens() -> None: + """Surfaces Gemini cached-content and thinking token counts into the canonical usage fields.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock( + return_value=_make_response( + [_make_part(text="Hi")], + prompt_tokens=20, + output_tokens=8, + total_tokens=28, + cached_tokens=12, + thoughts_tokens=6, + ) + ) + + response = await client.get_response(messages=[Message(role="user", contents=[Content.from_text("Hi")])]) + + assert response.usage_details is not None + assert response.usage_details["cache_read_input_token_count"] == 12 + assert response.usage_details["reasoning_output_token_count"] == 6 + + async def test_get_response_no_usage_when_metadata_absent() -> None: """Returns None for usage_details when the API response includes no usage metadata.""" client, mock = _make_gemini_client()