Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions packages/coding-agent/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

### Fixed

- Fixed snapcompact auto-compaction looping the "snapcompact could not bring the context under the limit — using an LLM summary instead" warning on every threshold tick for sub-1M-token models (Claude Sonnet 4.5, GPT-5.x, Gemini 2.x). `snapcompact.compact()` was called with no `maxFrames` override, so it defaulted to `MAX_FRAMES_DEFAULT = 80`; the projection in `AgentSession` charges `FRAME_TOKEN_ESTIMATE = 5024` per frame block (the conservative high-res Anthropic ceiling), making 80 × 5024 ≈ 402k frame-token projections that always overflow a 200k budget. `AgentSession.#computeSnapcompactMaxFrames` now sizes the `maxFrames` cap from a **shape-aware** reserve — `2 × geometry(shape).capacity` worth of verbatim text-edge chars billed at the tiktoken cl100k 4-chars/token baseline (with a 1.15 multiplier for tokenizer drift), plus a 2k summary-template allowance — mirroring what `#projectSnapcompactContextTokens` will charge once frames land. The shape comes from the same `snapcompact.resolveShape(model, settings)` call the auto and manual paths pass into `snapcompact.compact()`. The cap reserve applies **only** to the frame-cap math, not the skip decision: snapcompact is skipped outright only when `kept-recent + non-message ≥ ctxWindow − reserve` (no headroom at all), so the frame-less `text.length <= 2 * edgeCap` short-circuit in `planArchive` can still land a valid text-only archive when residual headroom is positive but below the cap reserve. The projection guard catches any actual frame-bearing archive that overflows. ([#3247](https://github.com/can1357/oh-my-pi/issues/3247))

## [16.1.14] - 2026-06-22

### Added
Expand Down
167 changes: 137 additions & 30 deletions packages/coding-agent/src/session/agent-session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import {
CompactionCancelledError,
type CompactionPreparation,
type CompactionResult,
type CompactionSettings,
calculateContextTokens,
calculatePromptTokens,
collectEntriesForBranchSummary,
Expand Down Expand Up @@ -7797,31 +7798,45 @@ export class AgentSession {
let tokensBefore: number;
let details: unknown;

// Snapcompact runs locally first; if its frame archive plus the kept
// history still overflows the model window, fall back to an LLM summary
// (far cheaper than ~FRAME_TOKEN_ESTIMATE per frame).
// Snapcompact runs locally first. The frame cap is sized from the live
// model window via #computeSnapcompactMaxFrames so the post-render context
// fits without the warning loop (issue #3247). Zero-frame budget → skip
// snapcompact and take the summarizer path immediately.
let snapcompactResult: snapcompact.CompactionResult | undefined;
if (snapcompactReady) {
snapcompactResult = await snapcompact.compact(preparation, {
convertToLlm,
model: this.model,
shape: snapcompact.resolveShape(this.model, this.settings.get("snapcompact.shape")),
});
const ctxWindow = this.model?.contextWindow ?? 0;
const budget =
ctxWindow > 0
? ctxWindow - effectiveReserveTokens(ctxWindow, effectiveSettings)
: Number.POSITIVE_INFINITY;
if (this.#projectSnapcompactContextTokens(preparation, snapcompactResult) > budget) {
logger.warn("Snapcompact still overflows the window; falling back to an LLM summary", {
const maxFrames = this.#computeSnapcompactMaxFrames(preparation, effectiveSettings);
if (maxFrames < 1) {
logger.warn("Snapcompact skipped: kept history alone exceeds the context budget", {
model: this.model?.id,
});
this.emitNotice(
"warning",
"snapcompact could not bring the context under the limit — using an LLM summary instead",
"snapcompact: kept history alone exceeds the context budget — using an LLM summary instead",
"compaction",
);
snapcompactResult = undefined;
} else {
snapcompactResult = await snapcompact.compact(preparation, {
convertToLlm,
model: this.model,
shape: snapcompact.resolveShape(this.model, this.settings.get("snapcompact.shape")),
maxFrames,
});
const ctxWindow = this.model?.contextWindow ?? 0;
const budget =
ctxWindow > 0
? ctxWindow - effectiveReserveTokens(ctxWindow, effectiveSettings)
: Number.POSITIVE_INFINITY;
if (this.#projectSnapcompactContextTokens(preparation, snapcompactResult) > budget) {
logger.warn("Snapcompact still overflows the window after frame-budget sizing; falling back", {
model: this.model?.id,
});
this.emitNotice(
"warning",
"snapcompact could not bring the context under the limit — using an LLM summary instead",
"compaction",
);
snapcompactResult = undefined;
}
}
}

Expand Down Expand Up @@ -9407,6 +9422,82 @@ export class AgentSession {
return { kind: "needsLlm", hookContext, hookPrompt, preserveData };
}

/**
* Cap on snapcompact frames the post-compaction context can carry without
* busting the model window. Mirrors the per-frame token charge used by the
* projection ({@link snapcompact.FRAME_TOKEN_ESTIMATE}, the conservative
* high-res Anthropic ceiling), so picking `maxFrames` from this helper makes
* {@link #projectSnapcompactContextTokens} succeed by construction.
*
* Skip vs. cap use different reserves on purpose. The **skip** decision
* (return `0`) trips only when kept-recent plus non-message tokens already
* eat the entire `ctxWindow − reserve` envelope: at that point no archive
* shape — frame-bearing or text-only — can fit, and the caller MUST
* shortcut to the LLM summarizer instead of re-running snapcompact to
* re-emit the "could not bring the context under the limit" warning every
* threshold tick. The **cap** calculation subtracts a shape-aware reserve
* (`2 × geometry(shape).capacity` chars worth of text edges, billed at the
* tiktoken cl100k baseline, plus a 2k summary-template allowance) sized
* from the same `shape` snapcompact will use, so the projection still
* passes once frames land — but it MUST NOT gate the skip decision, since
* a frame-less archive (`text.length <= 2 * edgeCap` short-circuit in
* `planArchive`) typically costs only a few hundred tokens of summary
* lead and would fit under residual headroom far smaller than the cap
* reserve (chatgpt-codex reviews on #3249).
*
* Returns `1` when the frame charge would overflow but the text-only path
* still has room: snapcompact's planner picks the frame-less layout
* automatically when the discarded text fits in the edges, so giving it
* the minimum cap lets it succeed instead of being skipped outright.
*
* Without this cap, the bundled `MAX_FRAMES_DEFAULT = 80` × 5024 tokens =
* ~402k frame-token projection always overflows any sub-1M-token window
* (issue #3247).
*/
#computeSnapcompactMaxFrames(preparation: CompactionPreparation, settings: CompactionSettings): number {
const ctxWindow = this.model?.contextWindow ?? 0;
if (ctxWindow <= 0) return snapcompact.MAX_FRAMES_DEFAULT;
const reserve = effectiveReserveTokens(ctxWindow, settings);
let baseTokens = computeNonMessageTokens(this);
for (const message of preparation.recentMessages) {
baseTokens += estimateTokens(message);
}
const totalBudget = ctxWindow - reserve;
// Skip iff there is no headroom whatsoever; a text-only archive costs
// far less than the cap reserve below, so any positive residual is
// worth attempting and the projection guard catches actual overflow.
if (baseTokens >= totalBudget) return 0;
// Cap reserve mirrors what `estimateTokens(summaryMessage)` will charge
// when frames > 0: `countTokens(summaryTemplate ‖ textHead ‖ textTail)`
// plus `numFrames × FRAME_TOKEN_ESTIMATE`. Resolve the shape this
// snapcompact pass will actually use (matches the `shape` argument
// passed to `snapcompact.compact` in the auto and manual paths) so the
// text-edge cost reflects the live frame geometry rather than a fixed
// approximation. Reviewer (chatgpt-codex on #3249): a 4k reserve
// undersized the ~7k text-edge cost on the default Anthropic
// 11on16-bw shape, so the projection then rejected the `maxFrames`
// the cap had picked and the warning loop reappeared.
//
// - `textHead` and `textTail` each consume up to `geometry.capacity`
// chars when frames > 0 (one HQ-capacity page per edge: see
// `TEXT_EDGE_PAGES = 1` in `planArchive`), so 2 × capacity chars
// total. Per-shape capacity: Anthropic 11on16-bw ~13.9k, Opus
// 1932px ~21k, Gemini 8on22-bw 2048px ~23.8k, OpenAI 1568px ~13.9k.
// - tiktoken cl100k ≈ 4 chars/token on ASCII (verified empirically
// for prose, code, and JSON); a 1.15 multiplier absorbs tokenizer
// drift on denser content (e.g. dense JSON / tool-result blobs).
// - Summary template (intro + FILES section + grid notes) bills
// ~2k tokens for typical sessions.
const shape = snapcompact.resolveShape(this.model, this.settings.get("snapcompact.shape"));
const edgeCap = snapcompact.geometry(shape).capacity;
const textEdgeTokens = Math.ceil((2 * edgeCap * 1.15) / 4);
const SUMMARY_TEMPLATE_TOKENS = 2000;
const capReserve = textEdgeTokens + SUMMARY_TEMPLATE_TOKENS;
const frameBudget = totalBudget - baseTokens - capReserve;
if (frameBudget < snapcompact.FRAME_TOKEN_ESTIMATE) return 1;
return Math.min(Math.floor(frameBudget / snapcompact.FRAME_TOKEN_ESTIMATE), snapcompact.MAX_FRAMES_DEFAULT);
}

/**
* Project the post-compaction context size of a snapcompact result: kept
* recent messages + the summary message with its re-attached frames + the
Expand Down Expand Up @@ -9652,24 +9743,20 @@ export class AgentSession {
let tokensBefore: number;
let details: unknown;

// Snapcompact runs locally first; if its frame archive plus the kept
// history still overflows the model window (frames default to
// MAX_FRAMES_DEFAULT and cost ~FRAME_TOKEN_ESTIMATE each), an LLM
// summary is far cheaper — downgrade to context-full and take the
// summarizer path.
// Snapcompact runs locally first. The post-compaction context = kept-recent
// + a summary message carrying the imaged archive at FRAME_TOKEN_ESTIMATE
// per frame; #computeSnapcompactMaxFrames sizes the frame cap from the
// live window so we don't run snapcompact just to overflow and fall back
// every threshold tick. Kept-recent already over budget → skip snapcompact
// outright (a single frame won't fit). Otherwise the projection below is
// only a defensive guard for summary-text drift.
let snapcompactResult: snapcompact.CompactionResult | undefined;
if (action === "snapcompact" && compactionPrep.kind !== "fromHook") {
const text = snapcompact.serializeConversation(
convertToLlm(preparation.messagesToSummarize.concat(preparation.turnPrefixMessages)),
);
const renderScan = snapcompact.scanRenderability(text);
if (renderScan.isSafe) {
snapcompactResult = await snapcompact.compact(preparation, {
convertToLlm,
model: this.model,
shape: snapcompact.resolveShape(this.model, this.settings.get("snapcompact.shape")),
});
} else {
if (!renderScan.isSafe) {
logger.warn("Snapcompact disabled: high non-ASCII rate detected; falling back to an LLM summary", {
model: this.model?.id,
unrenderableRatio: renderScan.unrenderableRatio,
Expand All @@ -9680,6 +9767,26 @@ export class AgentSession {
"compaction",
);
action = "context-full";
} else {
const maxFrames = this.#computeSnapcompactMaxFrames(preparation, compactionSettings);
if (maxFrames < 1) {
logger.warn("Snapcompact skipped: kept history alone exceeds the context budget", {
model: this.model?.id,
});
this.emitNotice(
"warning",
"snapcompact: kept history alone exceeds the context budget — using an LLM summary instead",
"compaction",
);
action = "context-full";
} else {
snapcompactResult = await snapcompact.compact(preparation, {
convertToLlm,
model: this.model,
shape: snapcompact.resolveShape(this.model, this.settings.get("snapcompact.shape")),
maxFrames,
});
}
}

if (snapcompactResult) {
Expand All @@ -9690,7 +9797,7 @@ export class AgentSession {
: Number.POSITIVE_INFINITY;
const projected = this.#projectSnapcompactContextTokens(preparation, snapcompactResult);
if (projected > budget) {
logger.warn("Snapcompact still overflows the window; falling back to an LLM summary", {
logger.warn("Snapcompact still overflows the window after frame-budget sizing; falling back", {
model: this.model?.id,
projected,
budget,
Expand Down
Loading
Loading