diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 2e70c2bd95..471e21581b 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- Fixed `umans` requests with more than 10 live context images still sending every image despite the provider budget; outgoing provider contexts now drop the oldest images above the active provider cap while preserving text and newest images ([#3230](https://github.com/can1357/oh-my-pi/issues/3230)). + ## [16.1.14] - 2026-06-22 ### Added diff --git a/packages/coding-agent/src/sdk.ts b/packages/coding-agent/src/sdk.ts index 8b57ac8011..bac8f82ffd 100644 --- a/packages/coding-agent/src/sdk.ts +++ b/packages/coding-agent/src/sdk.ts @@ -115,6 +115,7 @@ import { USER_INTERRUPT_LABEL, wrapSteeringForModel, } from "./session/messages"; +import { clampProviderContextImages } from "./session/provider-image-budget"; import { getRestorableSessionModels } from "./session/session-context"; import { SessionManager } from "./session/session-manager"; import { SnapcompactInlineTransformer } from "./session/snapcompact-inline"; @@ -2420,8 +2421,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} return wrapSteeringForModel(withContext); }; // Per-request provider-context transforms. Obfuscate FIRST so secrets are - // redacted from text before snapcompact rasterizes it into PNG frames. - // Both operate on the transient outgoing Context only — never persisted. + // redacted from text before snapcompact rasterizes it into PNG frames, then + // clamp images to the active provider budget before the request is sent. const snapcompactSystemPromptMode = settings.get("snapcompact.systemPrompt"); const snapcompactInline = snapcompactSystemPromptMode !== "none" || settings.get("snapcompact.toolResults") @@ -2436,14 +2437,11 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} createSnapcompactSavingsRecorder(() => sessionManager.getSessionFile() ?? null), ) : undefined; - const transformProviderContext = - obfuscator || snapcompactInline - ? async (context: Context, transformModel: Model): Promise => { - let transformed = obfuscator ? obfuscateProviderContext(obfuscator, context) : context; - if (snapcompactInline) transformed = await snapcompactInline.transform(transformed, transformModel); - return transformed; - } - : undefined; + const transformProviderContext = async (context: Context, transformModel: Model): Promise => { + let transformed = obfuscator ? obfuscateProviderContext(obfuscator, context) : context; + if (snapcompactInline) transformed = await snapcompactInline.transform(transformed, transformModel); + return clampProviderContextImages(transformed, transformModel); + }; const onPayload = async (payload: unknown, _model?: Model) => { return await extensionRunner.emitBeforeProviderRequest(payload); }; diff --git a/packages/coding-agent/src/session/provider-image-budget.ts b/packages/coding-agent/src/session/provider-image-budget.ts new file mode 100644 index 0000000000..ade57c48cf --- /dev/null +++ b/packages/coding-agent/src/session/provider-image-budget.ts @@ -0,0 +1,86 @@ +import type { + Context, + DeveloperMessage, + ImageContent, + Model, + TextContent, + ToolResultMessage, + UserMessage, +} from "@oh-my-pi/pi-ai"; +import { providerImageBudget } from "@oh-my-pi/snapcompact"; + +const TOOL_RESULT_IMAGE_OMISSION: TextContent = { + type: "text", + text: "[image omitted: provider image limit]", +}; + +function countImages(context: Context): number { + let count = 0; + for (const message of context.messages) { + if (!Array.isArray(message.content)) continue; + for (const part of message.content) { + if (part.type === "image") count++; + } + } + return count; +} + +function clampContent( + content: readonly (TextContent | ImageContent)[], + state: { remainingDrops: number }, +): (TextContent | ImageContent)[] | undefined { + let changed = false; + const clamped: (TextContent | ImageContent)[] = []; + for (const part of content) { + if (part.type === "image" && state.remainingDrops > 0) { + state.remainingDrops--; + changed = true; + continue; + } + clamped.push(part); + } + return changed ? clamped : undefined; +} + +function clampUserMessage(message: UserMessage, state: { remainingDrops: number }): UserMessage { + if (!Array.isArray(message.content) || state.remainingDrops <= 0) return message; + const content = clampContent(message.content, state); + return content ? { ...message, content } : message; +} + +function clampDeveloperMessage(message: DeveloperMessage, state: { remainingDrops: number }): DeveloperMessage { + if (!Array.isArray(message.content) || state.remainingDrops <= 0) return message; + const content = clampContent(message.content, state); + return content ? { ...message, content } : message; +} + +function clampToolResultMessage(message: ToolResultMessage, state: { remainingDrops: number }): ToolResultMessage { + if (state.remainingDrops <= 0) return message; + const content = clampContent(message.content, state); + if (!content) return message; + return { ...message, content: content.length > 0 ? content : [TOOL_RESULT_IMAGE_OMISSION] }; +} + +/** Drops oldest transient image blocks so outgoing vision requests fit the active provider's image cap. */ +export function clampProviderContextImages(context: Context, model: Model): Context { + if (!model.input.includes("image")) return context; + const limit = providerImageBudget(model.provider); + const totalImages = countImages(context); + if (totalImages <= limit) return context; + + const state = { remainingDrops: totalImages - limit }; + const messages = context.messages.map(message => { + switch (message.role) { + case "user": + return clampUserMessage(message, state); + case "developer": + return clampDeveloperMessage(message, state); + case "toolResult": + return clampToolResultMessage(message, state); + case "assistant": + return message; + } + return message; + }); + return { ...context, messages }; +} diff --git a/packages/coding-agent/test/session/provider-image-budget.test.ts b/packages/coding-agent/test/session/provider-image-budget.test.ts new file mode 100644 index 0000000000..eefafd2b6e --- /dev/null +++ b/packages/coding-agent/test/session/provider-image-budget.test.ts @@ -0,0 +1,109 @@ +import { describe, expect, it } from "bun:test"; +import type { Context, ImageContent, TextContent } from "@oh-my-pi/pi-ai"; +import { buildModel } from "@oh-my-pi/pi-catalog/build"; +import { clampProviderContextImages } from "@oh-my-pi/pi-coding-agent/session/provider-image-budget"; + +const UMANS_MODEL = buildModel({ + id: "umans-glm-5.2", + name: "umans-glm-5.2", + api: "anthropic-messages", + provider: "umans", + baseUrl: "https://api.code.umans.ai", + reasoning: true, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 4096, +}); + +function image(data: string): ImageContent { + return { type: "image", data, mimeType: "image/png" }; +} + +function text(value: string): TextContent { + return { type: "text", text: value }; +} + +function imageData(context: Context): string[] { + const data: string[] = []; + for (const message of context.messages) { + if (!Array.isArray(message.content)) continue; + for (const part of message.content) { + if (part.type === "image") data.push(part.data); + } + } + return data; +} + +function textData(context: Context): string[] { + const data: string[] = []; + for (const message of context.messages) { + if (typeof message.content === "string") { + data.push(message.content); + continue; + } + for (const part of message.content) { + if (part.type === "text") data.push(part.text); + } + } + return data; +} + +describe("provider context image budgets", () => { + it("drops oldest images above the active provider cap while preserving text", () => { + const context: Context = { + systemPrompt: ["system"], + tools: [], + messages: Array.from({ length: 31 }, (_, index) => ({ + role: "user", + content: [text(`text-${index}`), image(`image-${index}`)], + timestamp: index, + })), + }; + + const clamped = clampProviderContextImages(context, UMANS_MODEL); + + expect(imageData(clamped)).toEqual(Array.from({ length: 10 }, (_, index) => `image-${index + 21}`)); + expect(textData(clamped)).toEqual(Array.from({ length: 31 }, (_, index) => `text-${index}`)); + expect(clamped).not.toBe(context); + expect(imageData(context)).toEqual(Array.from({ length: 31 }, (_, index) => `image-${index}`)); + }); + + it("keeps image-only tool results meaningful when every image block is dropped", () => { + const context: Context = { + systemPrompt: [], + tools: [], + messages: Array.from({ length: 11 }, (_, index) => ({ + role: "toolResult", + toolCallId: `call-${index}`, + toolName: "inspect_image", + content: [image(`image-${index}`)], + isError: false, + timestamp: index, + })), + }; + + const clamped = clampProviderContextImages(context, UMANS_MODEL); + const firstMessage = clamped.messages[0]; + + expect(imageData(clamped)).toEqual(Array.from({ length: 10 }, (_, index) => `image-${index + 1}`)); + expect(firstMessage?.role).toBe("toolResult"); + expect(firstMessage?.content).toEqual([text("[image omitted: provider image limit]")]); + }); + + it("preserves context identity when the provider cap is not exceeded", () => { + const context: Context = { + systemPrompt: [], + tools: [], + messages: [ + { + role: "user", + content: [text("ok"), ...Array.from({ length: 10 }, (_, index) => image(`image-${index}`))], + timestamp: 1, + }, + ], + }; + + expect(clampProviderContextImages(context, UMANS_MODEL)).toBe(context); + }); +});