From 5cce507582c49eb01a9c5e01d993c88c9972bf92 Mon Sep 17 00:00:00 2001 From: roboomp Date: Mon, 22 Jun 2026 10:03:44 +0000 Subject: [PATCH 1/4] fix(agent): size snapcompact maxFrames by the live model window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Snapcompact's bundled MAX_FRAMES_DEFAULT (80) × FRAME_TOKEN_ESTIMATE (5024) ≈ 402k tokens worth of frames. AgentSession was calling snapcompact.compact() with no maxFrames override, so the post-render projection inside #runAuto Compaction / compact() always overflowed the budget on any sub-1M-token window (Claude Sonnet 4.5's 200k = 170k usable, the 80-frame projection alone clears that 2.4×), looping the 'snapcompact could not bring the context under the limit — using an LLM summary instead' warning on every threshold tick. AgentSession.#computeSnapcompactMaxFrames now sizes the frame cap from the resolved budget — (window − reserve − non-message − kept-recent − summary-text reserve) / FRAME_TOKEN_ESTIMATE, clamped to MAX_FRAMES_DEFAULT — and threads it into snapcompact.compact() in both the auto-compaction and manual /compact paths. When the kept-recent slice already exceeds the budget, snapcompact is skipped outright instead of running just to be rejected: the projection guard remains as a defensive check. Fixes #3247 --- packages/coding-agent/CHANGELOG.md | 4 + .../coding-agent/src/session/agent-session.ts | 125 ++++++++--- .../agent-session-snapcompact-budget.test.ts | 198 ++++++++++++++++++ 3 files changed, 297 insertions(+), 30 deletions(-) create mode 100644 packages/coding-agent/test/agent-session-snapcompact-budget.test.ts diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 2e70c2bd95..6aa2c35a1d 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- Fixed snapcompact auto-compaction looping the "snapcompact could not bring the context under the limit — using an LLM summary instead" warning on every threshold tick for sub-1M-token models (Claude Sonnet 4.5, GPT-5.x, Gemini 2.x). `snapcompact.compact()` was called with no `maxFrames` override, so it defaulted to `MAX_FRAMES_DEFAULT = 80`; the projection in `AgentSession` charges `FRAME_TOKEN_ESTIMATE = 5024` per frame block (the conservative high-res Anthropic ceiling), making 80 × 5024 ≈ 402k frame-token projections that always overflow a 200k budget. `AgentSession.#computeSnapcompactMaxFrames` now sizes the frame cap from the live `(window − reserve − non-message − kept-recent − summary-text reserve)` envelope before invoking snapcompact in both the auto-compaction and manual `/compact` paths, and skips snapcompact outright when the kept-recent slice alone already exceeds the budget (with a clearer notice explaining the cause). ([#3247](https://github.com/can1357/oh-my-pi/issues/3247)) + ## [16.1.14] - 2026-06-22 ### Added diff --git a/packages/coding-agent/src/session/agent-session.ts b/packages/coding-agent/src/session/agent-session.ts index b25b305074..5f9e2c874f 100644 --- a/packages/coding-agent/src/session/agent-session.ts +++ b/packages/coding-agent/src/session/agent-session.ts @@ -44,6 +44,7 @@ import { CompactionCancelledError, type CompactionPreparation, type CompactionResult, + type CompactionSettings, calculateContextTokens, calculatePromptTokens, collectEntriesForBranchSummary, @@ -7797,31 +7798,45 @@ export class AgentSession { let tokensBefore: number; let details: unknown; - // Snapcompact runs locally first; if its frame archive plus the kept - // history still overflows the model window, fall back to an LLM summary - // (far cheaper than ~FRAME_TOKEN_ESTIMATE per frame). + // Snapcompact runs locally first. The frame cap is sized from the live + // model window via #computeSnapcompactMaxFrames so the post-render context + // fits without the warning loop (issue #3247). Zero-frame budget → skip + // snapcompact and take the summarizer path immediately. let snapcompactResult: snapcompact.CompactionResult | undefined; if (snapcompactReady) { - snapcompactResult = await snapcompact.compact(preparation, { - convertToLlm, - model: this.model, - shape: snapcompact.resolveShape(this.model, this.settings.get("snapcompact.shape")), - }); - const ctxWindow = this.model?.contextWindow ?? 0; - const budget = - ctxWindow > 0 - ? ctxWindow - effectiveReserveTokens(ctxWindow, effectiveSettings) - : Number.POSITIVE_INFINITY; - if (this.#projectSnapcompactContextTokens(preparation, snapcompactResult) > budget) { - logger.warn("Snapcompact still overflows the window; falling back to an LLM summary", { + const maxFrames = this.#computeSnapcompactMaxFrames(preparation, effectiveSettings); + if (maxFrames < 1) { + logger.warn("Snapcompact skipped: kept history alone exceeds the context budget", { model: this.model?.id, }); this.emitNotice( "warning", - "snapcompact could not bring the context under the limit — using an LLM summary instead", + "snapcompact: kept history alone exceeds the context budget — using an LLM summary instead", "compaction", ); - snapcompactResult = undefined; + } else { + snapcompactResult = await snapcompact.compact(preparation, { + convertToLlm, + model: this.model, + shape: snapcompact.resolveShape(this.model, this.settings.get("snapcompact.shape")), + maxFrames, + }); + const ctxWindow = this.model?.contextWindow ?? 0; + const budget = + ctxWindow > 0 + ? ctxWindow - effectiveReserveTokens(ctxWindow, effectiveSettings) + : Number.POSITIVE_INFINITY; + if (this.#projectSnapcompactContextTokens(preparation, snapcompactResult) > budget) { + logger.warn("Snapcompact still overflows the window after frame-budget sizing; falling back", { + model: this.model?.id, + }); + this.emitNotice( + "warning", + "snapcompact could not bring the context under the limit — using an LLM summary instead", + "compaction", + ); + snapcompactResult = undefined; + } } } @@ -9407,6 +9422,40 @@ export class AgentSession { return { kind: "needsLlm", hookContext, hookPrompt, preserveData }; } + /** + * Cap on snapcompact frames the post-compaction context can carry without + * busting the model window. Mirrors the per-frame token charge used by the + * projection ({@link snapcompact.FRAME_TOKEN_ESTIMATE}, the conservative + * high-res Anthropic ceiling), so picking `maxFrames` from this helper makes + * {@link #projectSnapcompactContextTokens} succeed by construction. + * + * Returns `0` when the kept-recent slice plus the non-message overhead + * already eats the entire budget — at that point snapcompact cannot fit a + * single frame and the caller MUST skip it instead of running just to + * reject the result and re-emit the "could not bring the context under the + * limit" warning every threshold tick. Without this cap, the bundled + * `MAX_FRAMES_DEFAULT = 80` × 5024 tokens = ~402k frame-token projection + * always overflows any sub-1M-token window (issue #3247). + */ + #computeSnapcompactMaxFrames(preparation: CompactionPreparation, settings: CompactionSettings): number { + const ctxWindow = this.model?.contextWindow ?? 0; + if (ctxWindow <= 0) return snapcompact.MAX_FRAMES_DEFAULT; + const reserve = effectiveReserveTokens(ctxWindow, settings); + let nonFrameTokens = computeNonMessageTokens(this); + for (const message of preparation.recentMessages) { + nonFrameTokens += estimateTokens(message); + } + // Headroom for the summary-message lead-in plus the verbatim text edges + // snapcompact pins around the imaged middle. Sized for the typical + // snapcompact summary (~2k tokens) plus one HQ-capacity text edge on + // each side; conservative, so a tighter post-render run cannot drift + // past the projection check below. + const SUMMARY_TEXT_RESERVE = 4000; + const frameBudget = ctxWindow - reserve - nonFrameTokens - SUMMARY_TEXT_RESERVE; + if (frameBudget < snapcompact.FRAME_TOKEN_ESTIMATE) return 0; + return Math.min(Math.floor(frameBudget / snapcompact.FRAME_TOKEN_ESTIMATE), snapcompact.MAX_FRAMES_DEFAULT); + } + /** * Project the post-compaction context size of a snapcompact result: kept * recent messages + the summary message with its re-attached frames + the @@ -9652,24 +9701,20 @@ export class AgentSession { let tokensBefore: number; let details: unknown; - // Snapcompact runs locally first; if its frame archive plus the kept - // history still overflows the model window (frames default to - // MAX_FRAMES_DEFAULT and cost ~FRAME_TOKEN_ESTIMATE each), an LLM - // summary is far cheaper — downgrade to context-full and take the - // summarizer path. + // Snapcompact runs locally first. The post-compaction context = kept-recent + // + a summary message carrying the imaged archive at FRAME_TOKEN_ESTIMATE + // per frame; #computeSnapcompactMaxFrames sizes the frame cap from the + // live window so we don't run snapcompact just to overflow and fall back + // every threshold tick. Kept-recent already over budget → skip snapcompact + // outright (a single frame won't fit). Otherwise the projection below is + // only a defensive guard for summary-text drift. let snapcompactResult: snapcompact.CompactionResult | undefined; if (action === "snapcompact" && compactionPrep.kind !== "fromHook") { const text = snapcompact.serializeConversation( convertToLlm(preparation.messagesToSummarize.concat(preparation.turnPrefixMessages)), ); const renderScan = snapcompact.scanRenderability(text); - if (renderScan.isSafe) { - snapcompactResult = await snapcompact.compact(preparation, { - convertToLlm, - model: this.model, - shape: snapcompact.resolveShape(this.model, this.settings.get("snapcompact.shape")), - }); - } else { + if (!renderScan.isSafe) { logger.warn("Snapcompact disabled: high non-ASCII rate detected; falling back to an LLM summary", { model: this.model?.id, unrenderableRatio: renderScan.unrenderableRatio, @@ -9680,6 +9725,26 @@ export class AgentSession { "compaction", ); action = "context-full"; + } else { + const maxFrames = this.#computeSnapcompactMaxFrames(preparation, compactionSettings); + if (maxFrames < 1) { + logger.warn("Snapcompact skipped: kept history alone exceeds the context budget", { + model: this.model?.id, + }); + this.emitNotice( + "warning", + "snapcompact: kept history alone exceeds the context budget — using an LLM summary instead", + "compaction", + ); + action = "context-full"; + } else { + snapcompactResult = await snapcompact.compact(preparation, { + convertToLlm, + model: this.model, + shape: snapcompact.resolveShape(this.model, this.settings.get("snapcompact.shape")), + maxFrames, + }); + } } if (snapcompactResult) { @@ -9690,7 +9755,7 @@ export class AgentSession { : Number.POSITIVE_INFINITY; const projected = this.#projectSnapcompactContextTokens(preparation, snapcompactResult); if (projected > budget) { - logger.warn("Snapcompact still overflows the window; falling back to an LLM summary", { + logger.warn("Snapcompact still overflows the window after frame-budget sizing; falling back", { model: this.model?.id, projected, budget, diff --git a/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts b/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts new file mode 100644 index 0000000000..dba0a415af --- /dev/null +++ b/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts @@ -0,0 +1,198 @@ +/** + * Regression test for issue #3247. + * + * Snapcompact's bundled `MAX_FRAMES_DEFAULT = 80` × `FRAME_TOKEN_ESTIMATE = 5024` + * ≈ 402k tokens worth of frames. On any sub-1M-token window (e.g. Claude + * Sonnet 4.5's 200k), passing the default cap to `snapcompact.compact()` made + * the post-render projection in `AgentSession` always overflow the budget, + * emit the "snapcompact could not bring the context under the limit" warning + * on every threshold tick, and downgrade to an LLM summary. The fix sizes the + * `maxFrames` cap from the live model window (window − reserve − non-message + * overhead − kept-recent − summary-text reserve) before calling + * `snapcompact.compact()`. + * + * The contract this test defends: for a 200k-window vision model with sane + * kept-recent traffic, AgentSession MUST pass a budget-sized `maxFrames` + * (smaller than `MAX_FRAMES_DEFAULT`, and with `maxFrames × FRAME_TOKEN_ESTIMATE` + * inside the resolved budget) so the projection accepts the snapcompact + * result instead of falling back to the LLM summarizer. + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from "bun:test"; +import * as path from "node:path"; +import { Agent } from "@oh-my-pi/pi-agent-core"; +import { effectiveReserveTokens } from "@oh-my-pi/pi-agent-core/compaction"; +import { getBundledModel } from "@oh-my-pi/pi-catalog/models"; +import { ModelRegistry } from "@oh-my-pi/pi-coding-agent/config/model-registry"; +import { Settings } from "@oh-my-pi/pi-coding-agent/config/settings"; +import { AgentSession } from "@oh-my-pi/pi-coding-agent/session/agent-session"; +import { AuthStorage } from "@oh-my-pi/pi-coding-agent/session/auth-storage"; +import { SessionManager } from "@oh-my-pi/pi-coding-agent/session/session-manager"; +import { TempDir } from "@oh-my-pi/pi-utils"; +import * as snapcompact from "@oh-my-pi/snapcompact"; + +describe("AgentSession snapcompact frame-budget sizing", () => { + let tempDir: TempDir; + let session: AgentSession; + let sessionManager: SessionManager; + let authStorage: AuthStorage; + let modelRegistry: ModelRegistry; + + beforeEach(async () => { + tempDir = TempDir.createSync("@pi-snapcompact-budget-"); + + authStorage = await AuthStorage.create(path.join(tempDir.path(), "testauth.db")); + authStorage.setRuntimeApiKey("anthropic", "test-key"); + modelRegistry = new ModelRegistry(authStorage); + sessionManager = SessionManager.create(tempDir.path(), tempDir.path()); + + const model = getBundledModel("anthropic", "claude-sonnet-4-5"); + if (!model) throw new Error("Expected bundled claude-sonnet-4-5 model"); + // Sanity: the contract only holds for vision models with a window + // genuinely smaller than the snapcompact upper bound. If the bundled + // catalog ever raises Sonnet's window past 1M, this test no longer + // covers the failure mode the fix targets. + expect(model.input).toContain("image"); + expect(model.contextWindow).toBeLessThan(1_000_000); + + const agent = new Agent({ + initialState: { model, systemPrompt: ["Test"], tools: [], messages: [] }, + }); + + // Seed a representative long-running session: many turn-pairs with + // substantial filler so prepareCompaction() splits the branch into + // "discard + summarize" (oldest) vs "kept-recent" (newest). + const filler = "the quick brown fox jumps over the lazy dog. ".repeat(64); + for (let i = 0; i < 64; i++) { + sessionManager.appendMessage({ + role: "user", + content: [{ type: "text", text: `turn ${i}: ${filler}` }], + timestamp: Date.now() - (64 - i) * 1000, + }); + sessionManager.appendMessage({ + role: "assistant", + content: [{ type: "text", text: `reply ${i}: ${filler}` }], + api: "anthropic-messages", + provider: "anthropic", + model: "claude-sonnet-4-5", + stopReason: "stop", + usage: { + input: 1000, + output: 1000, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 2000, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + timestamp: Date.now() - (64 - i) * 1000 + 100, + }); + } + + session = new AgentSession({ + agent, + sessionManager, + settings: Settings.isolated({ + "compaction.strategy": "snapcompact", + "compaction.autoContinue": false, + // Force a small kept-recent window so the seeded conversation + // definitely splits into discard + kept and prepareCompaction() + // returns a non-empty preparation. + "compaction.keepRecentTokens": 4000, + }), + modelRegistry, + }); + }); + + afterEach(async () => { + try { + await session?.dispose(); + } finally { + authStorage?.close(); + await tempDir?.remove(); + vi.restoreAllMocks(); + } + }); + + it("passes a window-sized maxFrames to snapcompact.compact() on sub-1M-token models", async () => { + // Capture the options snapcompact.compact() is invoked with, and short- + // circuit it so the projection downstream evaluates against a known + // empty-frame archive (which fits any budget). The contract is about + // what the caller asks for, not what snapcompact then chooses to emit. + const model = session.model; + if (!model) throw new Error("Expected model to be set on session"); + const ctxWindow = model.contextWindow ?? 0; + expect(ctxWindow).toBeGreaterThan(0); + + const branchEntries = sessionManager.getBranch(); + const firstKeptEntry = branchEntries[branchEntries.length - 1]; + if (!firstKeptEntry?.id) throw new Error("Expected branch entry with id"); + + const compactSpy = vi.spyOn(snapcompact, "compact").mockResolvedValue({ + summary: "stubbed snapcompact", + shortSummary: "stub", + firstKeptEntryId: firstKeptEntry.id, + tokensBefore: 100_000, + details: { readFiles: [], modifiedFiles: [] }, + preserveData: { + snapcompact: { frames: [], totalChars: 0, truncatedChars: 0 }, + }, + }); + + await session.compact(undefined, { mode: "snapcompact" }); + + expect(compactSpy).toHaveBeenCalledTimes(1); + const opts = compactSpy.mock.calls[0]?.[1]; + expect(opts).toBeDefined(); + const maxFrames = opts?.maxFrames; + expect(maxFrames).toBeDefined(); + expect(maxFrames).toBeLessThan(snapcompact.MAX_FRAMES_DEFAULT); + expect(maxFrames).toBeGreaterThan(0); + + // The chosen cap MUST keep the projected frame budget inside the + // resolved (window − reserve) envelope — otherwise the projection + // guard would reject and loop back to the LLM summary every tick. + const reserve = effectiveReserveTokens(ctxWindow, { + enabled: true, + reserveTokens: 16384, + keepRecentTokens: 4000, + }); + const budget = ctxWindow - reserve; + expect((maxFrames ?? 0) * snapcompact.FRAME_TOKEN_ESTIMATE).toBeLessThan(budget); + }); + + it("skips snapcompact entirely when kept-recent already exceeds the budget", async () => { + // Append one synthetic message large enough to overflow the model window + // on its own (kept by findCutPoint since keepRecentTokens=4000 falls + // well short of it). Snapcompact CANNOT fit even a single frame; the + // session MUST skip it instead of running and emitting "could not bring + // the context under the limit" every tick. + const model = session.model; + if (!model) throw new Error("Expected model"); + const ctxWindow = model.contextWindow ?? 0; + const huge = "a".repeat(ctxWindow * 4); + sessionManager.appendMessage({ + role: "user", + content: [{ type: "text", text: huge }], + timestamp: Date.now(), + }); + + const compactSpy = vi.spyOn(snapcompact, "compact"); + const notices: { level: string; message: string }[] = []; + session.subscribe(event => { + if (event.type === "notice") { + notices.push({ level: event.level, message: event.message }); + } + }); + + await expect(session.compact(undefined, { mode: "snapcompact" })).rejects.toThrow(); + + // snapcompact.compact() MUST NOT be invoked when the budget cannot + // fit even one frame — running it just to reject the result and + // re-emit the warning is the exact loop issue #3247 reports. + expect(compactSpy).not.toHaveBeenCalled(); + // The user-facing notice MUST explain the kept-history overflow rather + // than the misleading "could not bring the context under the limit" + // (which implied snapcompact had run and produced an oversized result). + expect(notices.some(n => n.level === "warning" && n.message.includes("kept history"))).toBe(true); + }); +}); From 65f945f1b7dbc11cb8792a725b6dccbafcc11e8e Mon Sep 17 00:00:00 2001 From: roboomp Date: Mon, 22 Jun 2026 10:16:25 +0000 Subject: [PATCH 2/4] fix(agent): preserve snapcompact text-only path when budget is near full MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit chatgpt-codex review on #3249: the helper returned 0 when frameBudget < FRAME_TOKEN_ESTIMATE, causing the caller to skip snapcompact entirely. But snapcompact.planArchive has a 'text.length <= 2 * edgeCap' short- circuit that produces a valid frames:[] archive when the discarded history is small enough — and the projection charges 0 for that. Hard return-0 blocked that opportunity, forcing the LLM summarizer fallback in offline/no-credential sessions where the text-only path would have landed cleanly. #computeSnapcompactMaxFrames now distinguishes two near-full cases: - frameBudget < 0 → return 0 (kept-recent already exhausted budget; no text-only summary can fit either) → caller still skips outright. - 0 ≤ frameBudget < FRAME_TOKEN_ESTIMATE → return 1 → snapcompact runs and picks the frame-less planArchive branch automatically for small discarded histories; the projection guard rejects any actual frame-bearing archive that overflows. Added regression test pinning maxFrames=1 (not 0) in the near-full window case. --- packages/coding-agent/CHANGELOG.md | 2 +- .../coding-agent/src/session/agent-session.ts | 25 ++++++--- .../agent-session-snapcompact-budget.test.ts | 54 +++++++++++++++++++ 3 files changed, 72 insertions(+), 9 deletions(-) diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 6aa2c35a1d..14e03deb84 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -4,7 +4,7 @@ ### Fixed -- Fixed snapcompact auto-compaction looping the "snapcompact could not bring the context under the limit — using an LLM summary instead" warning on every threshold tick for sub-1M-token models (Claude Sonnet 4.5, GPT-5.x, Gemini 2.x). `snapcompact.compact()` was called with no `maxFrames` override, so it defaulted to `MAX_FRAMES_DEFAULT = 80`; the projection in `AgentSession` charges `FRAME_TOKEN_ESTIMATE = 5024` per frame block (the conservative high-res Anthropic ceiling), making 80 × 5024 ≈ 402k frame-token projections that always overflow a 200k budget. `AgentSession.#computeSnapcompactMaxFrames` now sizes the frame cap from the live `(window − reserve − non-message − kept-recent − summary-text reserve)` envelope before invoking snapcompact in both the auto-compaction and manual `/compact` paths, and skips snapcompact outright when the kept-recent slice alone already exceeds the budget (with a clearer notice explaining the cause). ([#3247](https://github.com/can1357/oh-my-pi/issues/3247)) +- Fixed snapcompact auto-compaction looping the "snapcompact could not bring the context under the limit — using an LLM summary instead" warning on every threshold tick for sub-1M-token models (Claude Sonnet 4.5, GPT-5.x, Gemini 2.x). `snapcompact.compact()` was called with no `maxFrames` override, so it defaulted to `MAX_FRAMES_DEFAULT = 80`; the projection in `AgentSession` charges `FRAME_TOKEN_ESTIMATE = 5024` per frame block (the conservative high-res Anthropic ceiling), making 80 × 5024 ≈ 402k frame-token projections that always overflow a 200k budget. `AgentSession.#computeSnapcompactMaxFrames` now sizes the frame cap from the live `(window − reserve − non-message − kept-recent − summary-text reserve)` envelope before invoking snapcompact in both the auto-compaction and manual `/compact` paths. When the residual is below one frame's token charge but still positive, the helper returns `maxFrames = 1` so snapcompact's frame-less `text.length <= 2 * edgeCap` short-circuit (`planArchive`) can still produce a valid text-only archive instead of being skipped; snapcompact is skipped outright only when even that text-only summary cannot fit (kept-recent already exhausted the budget), with a clearer notice explaining the cause. ([#3247](https://github.com/can1357/oh-my-pi/issues/3247)) ## [16.1.14] - 2026-06-22 diff --git a/packages/coding-agent/src/session/agent-session.ts b/packages/coding-agent/src/session/agent-session.ts index 5f9e2c874f..a4ca5be32d 100644 --- a/packages/coding-agent/src/session/agent-session.ts +++ b/packages/coding-agent/src/session/agent-session.ts @@ -9429,13 +9429,21 @@ export class AgentSession { * high-res Anthropic ceiling), so picking `maxFrames` from this helper makes * {@link #projectSnapcompactContextTokens} succeed by construction. * - * Returns `0` when the kept-recent slice plus the non-message overhead - * already eats the entire budget — at that point snapcompact cannot fit a - * single frame and the caller MUST skip it instead of running just to - * reject the result and re-emit the "could not bring the context under the - * limit" warning every threshold tick. Without this cap, the bundled - * `MAX_FRAMES_DEFAULT = 80` × 5024 tokens = ~402k frame-token projection - * always overflows any sub-1M-token window (issue #3247). + * Returns `0` only when the kept-recent slice plus the non-message overhead + * plus the summary lead-in reserve already exceed the budget — at that + * point even snapcompact's text-only archive path (frames `[]`, the + * `text.length <= 2 * edgeCap` short-circuit in `planArchive`) cannot fit + * and the caller MUST skip it instead of running just to re-emit the + * "could not bring the context under the limit" warning every threshold + * tick. Returns `1` when the frame charge would overflow but the text-only + * path still has room: snapcompact's planner picks the frame-less layout + * automatically when the discarded text fits in the edges, so giving it + * the minimum cap lets it succeed for small archives instead of being + * skipped outright (chatgpt-codex review on #3249). + * + * Without this cap, the bundled `MAX_FRAMES_DEFAULT = 80` × 5024 tokens = + * ~402k frame-token projection always overflows any sub-1M-token window + * (issue #3247). */ #computeSnapcompactMaxFrames(preparation: CompactionPreparation, settings: CompactionSettings): number { const ctxWindow = this.model?.contextWindow ?? 0; @@ -9452,7 +9460,8 @@ export class AgentSession { // past the projection check below. const SUMMARY_TEXT_RESERVE = 4000; const frameBudget = ctxWindow - reserve - nonFrameTokens - SUMMARY_TEXT_RESERVE; - if (frameBudget < snapcompact.FRAME_TOKEN_ESTIMATE) return 0; + if (frameBudget < 0) return 0; + if (frameBudget < snapcompact.FRAME_TOKEN_ESTIMATE) return 1; return Math.min(Math.floor(frameBudget / snapcompact.FRAME_TOKEN_ESTIMATE), snapcompact.MAX_FRAMES_DEFAULT); } diff --git a/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts b/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts index dba0a415af..bfde4dbc03 100644 --- a/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts +++ b/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts @@ -195,4 +195,58 @@ describe("AgentSession snapcompact frame-budget sizing", () => { // (which implied snapcompact had run and produced an oversized result). expect(notices.some(n => n.level === "warning" && n.message.includes("kept history"))).toBe(true); }); + + it("still invokes snapcompact with maxFrames=1 when the budget can only fit a text-only archive", async () => { + // Reviewer (chatgpt-codex on #3249): when kept-recent leaves less than + // one FRAME_TOKEN_ESTIMATE of headroom but still some room for a + // summary, snapcompact's `text.length <= 2 * edgeCap` short-circuit in + // `planArchive` can still produce a valid frame-less archive that the + // projection accepts (0 frame tokens billed). The helper MUST NOT + // return 0 in that case — it must give snapcompact the minimum + // `maxFrames = 1` cap so that text-only opportunity is taken. + const model = session.model; + if (!model) throw new Error("Expected model"); + const ctxWindow = model.contextWindow ?? 0; + // Tune the kept-recent message so frameBudget lands in the + // `[0, FRAME_TOKEN_ESTIMATE)` window: kept-recent + non-message + + // summary reserve is just under `ctxWindow - reserve` but the + // residual is below one frame's token charge. Aim for ~3000 tokens + // of headroom (less than FRAME_TOKEN_ESTIMATE = 5024). + const reserve = Math.max(Math.floor(ctxWindow * 0.15), 16384); + const headroomTokens = 3000; + const targetRecentTokens = ctxWindow - reserve - 4000 /* SUMMARY_TEXT_RESERVE */ - headroomTokens; + // Rough 4-chars-per-token rule for the tiktoken estimator on ASCII. + const filler = "x".repeat(targetRecentTokens * 4); + sessionManager.appendMessage({ + role: "user", + content: [{ type: "text", text: filler }], + timestamp: Date.now(), + }); + + const branchEntries = sessionManager.getBranch(); + const lastEntry = branchEntries[branchEntries.length - 1]; + if (!lastEntry?.id) throw new Error("Expected branch entry with id"); + + const compactSpy = vi.spyOn(snapcompact, "compact").mockResolvedValue({ + summary: "stubbed snapcompact", + shortSummary: "stub", + firstKeptEntryId: lastEntry.id, + tokensBefore: 100_000, + // Text-only archive: zero frames, modest text edges. The projection + // charges 0 for frames, so the post-compaction context fits. + details: { readFiles: [], modifiedFiles: [] }, + preserveData: { + snapcompact: { frames: [], totalChars: 1000, truncatedChars: 0 }, + }, + }); + + await session.compact(undefined, { mode: "snapcompact" }); + + expect(compactSpy).toHaveBeenCalledTimes(1); + const opts = compactSpy.mock.calls[0]?.[1]; + // Snapcompact MUST be invoked with the floor cap, never skipped, + // even though one frame charge would overflow the budget — the + // text-only `planArchive` path makes this case recoverable. + expect(opts?.maxFrames).toBe(1); + }); }); From db57efc3d3485e45a76f94b0e53c5a416ef7bf17 Mon Sep 17 00:00:00 2001 From: roboomp Date: Mon, 22 Jun 2026 10:24:36 +0000 Subject: [PATCH 3/4] fix(agent): split snapcompact skip reserve from frame-cap reserve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit chatgpt-codex second-pass review on #3249: the previous helper folded the 4k SUMMARY_TEXT_RESERVE into both the maxFrames cap math AND the skip decision (return 0 when frameBudget < 0). That made any residual headroom below 4k fall negative and force the LLM-summarizer fallback, even though a text-only snapcompact archive (the 'text.length <= 2 * edgeCap' short-circuit in planArchive) typically costs only a few hundred tokens of summary lead-in and would have fit cleanly. The two reserves now serve their own jobs: - Skip iff 'baseTokens >= totalBudget' (kept-recent + non-message already eats the entire window − reserve envelope). No reserve fudge here; positive residual is always worth attempting. - Cap reserve (4k) is applied ONLY to the maxFrames calculation so the projection still passes once frames land. When the frame budget goes negative under that reserve but residual headroom is positive, the helper now returns maxFrames=1 instead of 0 so snapcompact's frame-less planArchive branch can still produce a valid archive. Updated regression test to pin the new contract directly: kept-recent tuned for 1500 tokens of headroom (well below the 4k cap reserve), the old helper returned 0 and skipped to the LLM summarizer, the new helper invokes snapcompact with maxFrames=1. --- packages/coding-agent/CHANGELOG.md | 2 +- .../coding-agent/src/session/agent-session.ts | 51 +++++++++++-------- .../agent-session-snapcompact-budget.test.ts | 32 ++++++------ 3 files changed, 49 insertions(+), 36 deletions(-) diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 14e03deb84..0fc8259333 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -4,7 +4,7 @@ ### Fixed -- Fixed snapcompact auto-compaction looping the "snapcompact could not bring the context under the limit — using an LLM summary instead" warning on every threshold tick for sub-1M-token models (Claude Sonnet 4.5, GPT-5.x, Gemini 2.x). `snapcompact.compact()` was called with no `maxFrames` override, so it defaulted to `MAX_FRAMES_DEFAULT = 80`; the projection in `AgentSession` charges `FRAME_TOKEN_ESTIMATE = 5024` per frame block (the conservative high-res Anthropic ceiling), making 80 × 5024 ≈ 402k frame-token projections that always overflow a 200k budget. `AgentSession.#computeSnapcompactMaxFrames` now sizes the frame cap from the live `(window − reserve − non-message − kept-recent − summary-text reserve)` envelope before invoking snapcompact in both the auto-compaction and manual `/compact` paths. When the residual is below one frame's token charge but still positive, the helper returns `maxFrames = 1` so snapcompact's frame-less `text.length <= 2 * edgeCap` short-circuit (`planArchive`) can still produce a valid text-only archive instead of being skipped; snapcompact is skipped outright only when even that text-only summary cannot fit (kept-recent already exhausted the budget), with a clearer notice explaining the cause. ([#3247](https://github.com/can1357/oh-my-pi/issues/3247)) +- Fixed snapcompact auto-compaction looping the "snapcompact could not bring the context under the limit — using an LLM summary instead" warning on every threshold tick for sub-1M-token models (Claude Sonnet 4.5, GPT-5.x, Gemini 2.x). `snapcompact.compact()` was called with no `maxFrames` override, so it defaulted to `MAX_FRAMES_DEFAULT = 80`; the projection in `AgentSession` charges `FRAME_TOKEN_ESTIMATE = 5024` per frame block (the conservative high-res Anthropic ceiling), making 80 × 5024 ≈ 402k frame-token projections that always overflow a 200k budget. `AgentSession.#computeSnapcompactMaxFrames` now sizes the `maxFrames` cap from `(window − reserve − non-message − kept-recent − 4k summary reserve) / FRAME_TOKEN_ESTIMATE` before invoking snapcompact in both the auto-compaction and manual `/compact` paths. The 4k summary reserve applies **only** to the frame-cap math, not the skip decision: snapcompact is skipped outright only when `kept-recent + non-message ≥ ctxWindow − reserve` (no headroom at all), so the frame-less `text.length <= 2 * edgeCap` short-circuit in `planArchive` can still land a valid text-only archive when residual headroom is positive but below the cap reserve (a few hundred tokens of summary lead-in fits easily in 1–2k of headroom). The projection guard catches any actual frame-bearing archive that overflows. ([#3247](https://github.com/can1357/oh-my-pi/issues/3247)) ## [16.1.14] - 2026-06-22 diff --git a/packages/coding-agent/src/session/agent-session.ts b/packages/coding-agent/src/session/agent-session.ts index a4ca5be32d..ec5cd8f1dc 100644 --- a/packages/coding-agent/src/session/agent-session.ts +++ b/packages/coding-agent/src/session/agent-session.ts @@ -9429,17 +9429,25 @@ export class AgentSession { * high-res Anthropic ceiling), so picking `maxFrames` from this helper makes * {@link #projectSnapcompactContextTokens} succeed by construction. * - * Returns `0` only when the kept-recent slice plus the non-message overhead - * plus the summary lead-in reserve already exceed the budget — at that - * point even snapcompact's text-only archive path (frames `[]`, the - * `text.length <= 2 * edgeCap` short-circuit in `planArchive`) cannot fit - * and the caller MUST skip it instead of running just to re-emit the - * "could not bring the context under the limit" warning every threshold - * tick. Returns `1` when the frame charge would overflow but the text-only - * path still has room: snapcompact's planner picks the frame-less layout + * Skip vs. cap use different reserves on purpose. The **skip** decision + * (return `0`) trips only when kept-recent plus non-message tokens already + * eat the entire `ctxWindow − reserve` envelope: at that point no archive + * shape — frame-bearing or text-only — can fit, and the caller MUST + * shortcut to the LLM summarizer instead of re-running snapcompact to + * re-emit the "could not bring the context under the limit" warning every + * threshold tick. The **cap** calculation subtracts a 4k summary-text + * reserve sized for a typical frame-bearing archive (~2k summary lead-in + * + one HQ-capacity verbatim text edge), so the projection still passes + * once frames land — but it MUST NOT gate the skip decision, since a + * frame-less archive (`text.length <= 2 * edgeCap` short-circuit in + * `planArchive`) typically costs only a few hundred tokens of summary + * lead and would fit under residual headroom far smaller than the cap + * reserve (chatgpt-codex review on #3249). + * + * Returns `1` when the frame charge would overflow but the text-only path + * still has room: snapcompact's planner picks the frame-less layout * automatically when the discarded text fits in the edges, so giving it - * the minimum cap lets it succeed for small archives instead of being - * skipped outright (chatgpt-codex review on #3249). + * the minimum cap lets it succeed instead of being skipped outright. * * Without this cap, the bundled `MAX_FRAMES_DEFAULT = 80` × 5024 tokens = * ~402k frame-token projection always overflows any sub-1M-token window @@ -9449,18 +9457,21 @@ export class AgentSession { const ctxWindow = this.model?.contextWindow ?? 0; if (ctxWindow <= 0) return snapcompact.MAX_FRAMES_DEFAULT; const reserve = effectiveReserveTokens(ctxWindow, settings); - let nonFrameTokens = computeNonMessageTokens(this); + let baseTokens = computeNonMessageTokens(this); for (const message of preparation.recentMessages) { - nonFrameTokens += estimateTokens(message); - } - // Headroom for the summary-message lead-in plus the verbatim text edges - // snapcompact pins around the imaged middle. Sized for the typical - // snapcompact summary (~2k tokens) plus one HQ-capacity text edge on - // each side; conservative, so a tighter post-render run cannot drift - // past the projection check below. + baseTokens += estimateTokens(message); + } + const totalBudget = ctxWindow - reserve; + // Skip iff there is no headroom whatsoever; a text-only archive costs + // far less than the cap reserve below, so any positive residual is + // worth attempting and the projection guard catches actual overflow. + if (baseTokens >= totalBudget) return 0; + // Cap reserve: conservative headroom for a frame-bearing archive's + // summary lead-in plus verbatim text edges. Applied ONLY to the + // maxFrames cap (so the projection passes once frames land), never + // to the skip decision above. const SUMMARY_TEXT_RESERVE = 4000; - const frameBudget = ctxWindow - reserve - nonFrameTokens - SUMMARY_TEXT_RESERVE; - if (frameBudget < 0) return 0; + const frameBudget = totalBudget - baseTokens - SUMMARY_TEXT_RESERVE; if (frameBudget < snapcompact.FRAME_TOKEN_ESTIMATE) return 1; return Math.min(Math.floor(frameBudget / snapcompact.FRAME_TOKEN_ESTIMATE), snapcompact.MAX_FRAMES_DEFAULT); } diff --git a/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts b/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts index bfde4dbc03..262956e5dd 100644 --- a/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts +++ b/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts @@ -196,25 +196,27 @@ describe("AgentSession snapcompact frame-budget sizing", () => { expect(notices.some(n => n.level === "warning" && n.message.includes("kept history"))).toBe(true); }); - it("still invokes snapcompact with maxFrames=1 when the budget can only fit a text-only archive", async () => { - // Reviewer (chatgpt-codex on #3249): when kept-recent leaves less than - // one FRAME_TOKEN_ESTIMATE of headroom but still some room for a - // summary, snapcompact's `text.length <= 2 * edgeCap` short-circuit in - // `planArchive` can still produce a valid frame-less archive that the - // projection accepts (0 frame tokens billed). The helper MUST NOT - // return 0 in that case — it must give snapcompact the minimum - // `maxFrames = 1` cap so that text-only opportunity is taken. + it("still invokes snapcompact with maxFrames=1 when residual headroom is below the summary-text reserve", async () => { + // Reviewer (chatgpt-codex on #3249, second pass): when kept-recent + + // non-message leaves SOME real headroom but less than the 4k + // SUMMARY_TEXT_RESERVE the helper holds back to size frame caps, the + // previous revision still went negative and returned 0 (skipped + // snapcompact). But a text-only snapcompact archive (the + // `text.length <= 2 * edgeCap` short-circuit in `planArchive`) + // typically costs only a few hundred tokens of summary lead, far + // below 4k. The skip decision MUST use raw `baseTokens >= totalBudget` + // — the cap reserve applies only to the maxFrames math, not the skip. const model = session.model; if (!model) throw new Error("Expected model"); const ctxWindow = model.contextWindow ?? 0; - // Tune the kept-recent message so frameBudget lands in the - // `[0, FRAME_TOKEN_ESTIMATE)` window: kept-recent + non-message + - // summary reserve is just under `ctxWindow - reserve` but the - // residual is below one frame's token charge. Aim for ~3000 tokens - // of headroom (less than FRAME_TOKEN_ESTIMATE = 5024). + // Tune kept-recent so the residual `totalBudget − baseTokens` is + // 1500 tokens — strictly positive, but well below the 4k cap reserve. + // The previous helper would compute frameBudget = 1500 − 4000 = −2500 + // and return 0; the fixed helper returns 1 because the residual is + // positive and the text-only archive can still fit. const reserve = Math.max(Math.floor(ctxWindow * 0.15), 16384); - const headroomTokens = 3000; - const targetRecentTokens = ctxWindow - reserve - 4000 /* SUMMARY_TEXT_RESERVE */ - headroomTokens; + const headroomTokens = 1500; + const targetRecentTokens = ctxWindow - reserve - headroomTokens; // Rough 4-chars-per-token rule for the tiktoken estimator on ASCII. const filler = "x".repeat(targetRecentTokens * 4); sessionManager.appendMessage({ From 232994496d10f6ac9698582d817af486be5b23d8 Mon Sep 17 00:00:00 2001 From: roboomp Date: Mon, 22 Jun 2026 10:38:24 +0000 Subject: [PATCH 4/4] fix(agent): size snapcompact cap reserve from live shape's text-edge cost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit chatgpt-codex third-pass review on #3249: the 4k SUMMARY_TEXT_RESERVE in the cap math undersized the actual textHead+textTail cost a frame- bearing archive carries (the projection separately bills 'countTokens(summary + textHead + textTail)'). At ~120k headroom on Anthropic 11on16-bw, the cap picked maxFrames=23, but '23 * 5024 + 2 * 13916 chars (≈7k tokens) + 2k summary template ≈ 124.5k' still exceeded the same 120k headroom — the cap chose a value the projection then immediately rejected, re-opening the warning loop. #computeSnapcompactMaxFrames now resolves the live snapcompact shape (same call the auto/manual paths pass to snapcompact.compact) and sizes the cap reserve from 'geometry(shape).capacity': textEdgeTokens = ceil(2 * capacity * 1.15 / 4) // 1.15 absorbs // tokenizer drift capReserve = textEdgeTokens + 2000 // + summary template For the default per-provider winners that resolves to ~10k (Anthropic Sonnet), ~14k (Opus 4.7), ~16k (Gemini 2.x), and ~10k (OpenAI) — all larger than the prior fixed 4k. Skip decision stays separate (baseTokens >= totalBudget), so positive sub-reserve headroom still runs snapcompact's text-only path. Test 1 retuned to baseline kept-recent ≈ 100k tokens with a strengthened assertion verifying the FULL projection invariant (frames + worst-case text edges + summary template + base ≤ budget). Confirmed test fails against the previous 4k-reserve helper by exactly the reviewer's predicted margin (174,271 vs 170,000 budget = 4,271 token overshoot). --- packages/coding-agent/CHANGELOG.md | 2 +- .../coding-agent/src/session/agent-session.ts | 46 +++++++++++---- .../agent-session-snapcompact-budget.test.ts | 58 ++++++++++++++----- 3 files changed, 77 insertions(+), 29 deletions(-) diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 0fc8259333..c6379df8a4 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -4,7 +4,7 @@ ### Fixed -- Fixed snapcompact auto-compaction looping the "snapcompact could not bring the context under the limit — using an LLM summary instead" warning on every threshold tick for sub-1M-token models (Claude Sonnet 4.5, GPT-5.x, Gemini 2.x). `snapcompact.compact()` was called with no `maxFrames` override, so it defaulted to `MAX_FRAMES_DEFAULT = 80`; the projection in `AgentSession` charges `FRAME_TOKEN_ESTIMATE = 5024` per frame block (the conservative high-res Anthropic ceiling), making 80 × 5024 ≈ 402k frame-token projections that always overflow a 200k budget. `AgentSession.#computeSnapcompactMaxFrames` now sizes the `maxFrames` cap from `(window − reserve − non-message − kept-recent − 4k summary reserve) / FRAME_TOKEN_ESTIMATE` before invoking snapcompact in both the auto-compaction and manual `/compact` paths. The 4k summary reserve applies **only** to the frame-cap math, not the skip decision: snapcompact is skipped outright only when `kept-recent + non-message ≥ ctxWindow − reserve` (no headroom at all), so the frame-less `text.length <= 2 * edgeCap` short-circuit in `planArchive` can still land a valid text-only archive when residual headroom is positive but below the cap reserve (a few hundred tokens of summary lead-in fits easily in 1–2k of headroom). The projection guard catches any actual frame-bearing archive that overflows. ([#3247](https://github.com/can1357/oh-my-pi/issues/3247)) +- Fixed snapcompact auto-compaction looping the "snapcompact could not bring the context under the limit — using an LLM summary instead" warning on every threshold tick for sub-1M-token models (Claude Sonnet 4.5, GPT-5.x, Gemini 2.x). `snapcompact.compact()` was called with no `maxFrames` override, so it defaulted to `MAX_FRAMES_DEFAULT = 80`; the projection in `AgentSession` charges `FRAME_TOKEN_ESTIMATE = 5024` per frame block (the conservative high-res Anthropic ceiling), making 80 × 5024 ≈ 402k frame-token projections that always overflow a 200k budget. `AgentSession.#computeSnapcompactMaxFrames` now sizes the `maxFrames` cap from a **shape-aware** reserve — `2 × geometry(shape).capacity` worth of verbatim text-edge chars billed at the tiktoken cl100k 4-chars/token baseline (with a 1.15 multiplier for tokenizer drift), plus a 2k summary-template allowance — mirroring what `#projectSnapcompactContextTokens` will charge once frames land. The shape comes from the same `snapcompact.resolveShape(model, settings)` call the auto and manual paths pass into `snapcompact.compact()`. The cap reserve applies **only** to the frame-cap math, not the skip decision: snapcompact is skipped outright only when `kept-recent + non-message ≥ ctxWindow − reserve` (no headroom at all), so the frame-less `text.length <= 2 * edgeCap` short-circuit in `planArchive` can still land a valid text-only archive when residual headroom is positive but below the cap reserve. The projection guard catches any actual frame-bearing archive that overflows. ([#3247](https://github.com/can1357/oh-my-pi/issues/3247)) ## [16.1.14] - 2026-06-22 diff --git a/packages/coding-agent/src/session/agent-session.ts b/packages/coding-agent/src/session/agent-session.ts index ec5cd8f1dc..31d9075b5a 100644 --- a/packages/coding-agent/src/session/agent-session.ts +++ b/packages/coding-agent/src/session/agent-session.ts @@ -9435,14 +9435,15 @@ export class AgentSession { * shape — frame-bearing or text-only — can fit, and the caller MUST * shortcut to the LLM summarizer instead of re-running snapcompact to * re-emit the "could not bring the context under the limit" warning every - * threshold tick. The **cap** calculation subtracts a 4k summary-text - * reserve sized for a typical frame-bearing archive (~2k summary lead-in - * + one HQ-capacity verbatim text edge), so the projection still passes - * once frames land — but it MUST NOT gate the skip decision, since a - * frame-less archive (`text.length <= 2 * edgeCap` short-circuit in + * threshold tick. The **cap** calculation subtracts a shape-aware reserve + * (`2 × geometry(shape).capacity` chars worth of text edges, billed at the + * tiktoken cl100k baseline, plus a 2k summary-template allowance) sized + * from the same `shape` snapcompact will use, so the projection still + * passes once frames land — but it MUST NOT gate the skip decision, since + * a frame-less archive (`text.length <= 2 * edgeCap` short-circuit in * `planArchive`) typically costs only a few hundred tokens of summary * lead and would fit under residual headroom far smaller than the cap - * reserve (chatgpt-codex review on #3249). + * reserve (chatgpt-codex reviews on #3249). * * Returns `1` when the frame charge would overflow but the text-only path * still has room: snapcompact's planner picks the frame-less layout @@ -9466,12 +9467,33 @@ export class AgentSession { // far less than the cap reserve below, so any positive residual is // worth attempting and the projection guard catches actual overflow. if (baseTokens >= totalBudget) return 0; - // Cap reserve: conservative headroom for a frame-bearing archive's - // summary lead-in plus verbatim text edges. Applied ONLY to the - // maxFrames cap (so the projection passes once frames land), never - // to the skip decision above. - const SUMMARY_TEXT_RESERVE = 4000; - const frameBudget = totalBudget - baseTokens - SUMMARY_TEXT_RESERVE; + // Cap reserve mirrors what `estimateTokens(summaryMessage)` will charge + // when frames > 0: `countTokens(summaryTemplate ‖ textHead ‖ textTail)` + // plus `numFrames × FRAME_TOKEN_ESTIMATE`. Resolve the shape this + // snapcompact pass will actually use (matches the `shape` argument + // passed to `snapcompact.compact` in the auto and manual paths) so the + // text-edge cost reflects the live frame geometry rather than a fixed + // approximation. Reviewer (chatgpt-codex on #3249): a 4k reserve + // undersized the ~7k text-edge cost on the default Anthropic + // 11on16-bw shape, so the projection then rejected the `maxFrames` + // the cap had picked and the warning loop reappeared. + // + // - `textHead` and `textTail` each consume up to `geometry.capacity` + // chars when frames > 0 (one HQ-capacity page per edge: see + // `TEXT_EDGE_PAGES = 1` in `planArchive`), so 2 × capacity chars + // total. Per-shape capacity: Anthropic 11on16-bw ~13.9k, Opus + // 1932px ~21k, Gemini 8on22-bw 2048px ~23.8k, OpenAI 1568px ~13.9k. + // - tiktoken cl100k ≈ 4 chars/token on ASCII (verified empirically + // for prose, code, and JSON); a 1.15 multiplier absorbs tokenizer + // drift on denser content (e.g. dense JSON / tool-result blobs). + // - Summary template (intro + FILES section + grid notes) bills + // ~2k tokens for typical sessions. + const shape = snapcompact.resolveShape(this.model, this.settings.get("snapcompact.shape")); + const edgeCap = snapcompact.geometry(shape).capacity; + const textEdgeTokens = Math.ceil((2 * edgeCap * 1.15) / 4); + const SUMMARY_TEMPLATE_TOKENS = 2000; + const capReserve = textEdgeTokens + SUMMARY_TEMPLATE_TOKENS; + const frameBudget = totalBudget - baseTokens - capReserve; if (frameBudget < snapcompact.FRAME_TOKEN_ESTIMATE) return 1; return Math.min(Math.floor(frameBudget / snapcompact.FRAME_TOKEN_ESTIMATE), snapcompact.MAX_FRAMES_DEFAULT); } diff --git a/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts b/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts index 262956e5dd..d5e70f891f 100644 --- a/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts +++ b/packages/coding-agent/test/agent-session-snapcompact-budget.test.ts @@ -21,10 +21,11 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "bun:test"; import * as path from "node:path"; import { Agent } from "@oh-my-pi/pi-agent-core"; -import { effectiveReserveTokens } from "@oh-my-pi/pi-agent-core/compaction"; +import { effectiveReserveTokens, estimateTokens, prepareCompaction } from "@oh-my-pi/pi-agent-core/compaction"; import { getBundledModel } from "@oh-my-pi/pi-catalog/models"; import { ModelRegistry } from "@oh-my-pi/pi-coding-agent/config/model-registry"; import { Settings } from "@oh-my-pi/pi-coding-agent/config/settings"; +import { computeNonMessageTokens } from "@oh-my-pi/pi-coding-agent/modes/utils/context-usage"; import { AgentSession } from "@oh-my-pi/pi-coding-agent/session/agent-session"; import { AuthStorage } from "@oh-my-pi/pi-coding-agent/session/auth-storage"; import { SessionManager } from "@oh-my-pi/pi-coding-agent/session/session-manager"; @@ -113,16 +114,32 @@ describe("AgentSession snapcompact frame-budget sizing", () => { } }); - it("passes a window-sized maxFrames to snapcompact.compact() on sub-1M-token models", async () => { - // Capture the options snapcompact.compact() is invoked with, and short- - // circuit it so the projection downstream evaluates against a known - // empty-frame archive (which fits any budget). The contract is about - // what the caller asks for, not what snapcompact then chooses to emit. + it("passes a maxFrames whose full projection (frames + text edges + base) fits the budget", async () => { + // Tighten kept-recent into the realistic ~100k-token range. Without + // it, the helper has so much headroom that even a flawed (too-large) + // cap reserve passes the `maxFrames × FRAME_TOKEN_ESTIMATE < budget` + // check by accident. Reviewer chatgpt-codex on #3249 cited the exact + // failure mode: ~120k headroom on Anthropic 11on16-bw chose 23 frames + // under the previous 4k-reserve helper, but `23 × 5024 + 7k text + // edges + 2k summary template + base` then exceeded the same headroom. const model = session.model; if (!model) throw new Error("Expected model to be set on session"); const ctxWindow = model.contextWindow ?? 0; expect(ctxWindow).toBeGreaterThan(0); + const settings = { enabled: true as const, reserveTokens: 16384, keepRecentTokens: 4000 }; + const reserve = effectiveReserveTokens(ctxWindow, settings); + const budget = ctxWindow - reserve; + // Filler tuned so `baseTokens ≈ 100k`, leaving ~70k headroom — the + // regime where a shape-aware cap reserve actually matters. + const targetRecentTokens = 100_000; + const filler = "x".repeat(targetRecentTokens * 4); + sessionManager.appendMessage({ + role: "user", + content: [{ type: "text", text: filler }], + timestamp: Date.now(), + }); + const branchEntries = sessionManager.getBranch(); const firstKeptEntry = branchEntries[branchEntries.length - 1]; if (!firstKeptEntry?.id) throw new Error("Expected branch entry with id"); @@ -148,16 +165,25 @@ describe("AgentSession snapcompact frame-budget sizing", () => { expect(maxFrames).toBeLessThan(snapcompact.MAX_FRAMES_DEFAULT); expect(maxFrames).toBeGreaterThan(0); - // The chosen cap MUST keep the projected frame budget inside the - // resolved (window − reserve) envelope — otherwise the projection - // guard would reject and loop back to the LLM summary every tick. - const reserve = effectiveReserveTokens(ctxWindow, { - enabled: true, - reserveTokens: 16384, - keepRecentTokens: 4000, - }); - const budget = ctxWindow - reserve; - expect((maxFrames ?? 0) * snapcompact.FRAME_TOKEN_ESTIMATE).toBeLessThan(budget); + // Verify the FULL projection — base (non-message + kept-recent) + + // frame-bearing summary cost — fits the budget. The projection + // {@link #projectSnapcompactContextTokens} mirrors what the auto and + // manual paths charge: countTokens(summary + textHead + textTail) + + // numFrames × FRAME_TOKEN_ESTIMATE + non-message + kept-recent. + const preparation = prepareCompaction(branchEntries, settings); + if (!preparation) throw new Error("Expected non-empty preparation"); + let baseTokens = computeNonMessageTokens(session); + for (const message of preparation.recentMessages) { + baseTokens += estimateTokens(message); + } + const shape = snapcompact.resolveShape(model); + const edgeCap = snapcompact.geometry(shape).capacity; + // Worst-case `textHead + textTail` tokenized at the cl100k 4-chars/token + // baseline, plus a 2k allowance for the snapcompact summary template + // (intro + FILES section + grid notes). + const worstCaseEdgeTokens = Math.ceil((2 * edgeCap) / 4) + 2000; + const fullProjection = baseTokens + (maxFrames ?? 0) * snapcompact.FRAME_TOKEN_ESTIMATE + worstCaseEdgeTokens; + expect(fullProjection).toBeLessThanOrEqual(budget); }); it("skips snapcompact entirely when kept-recent already exceeds the budget", async () => {