-
Notifications
You must be signed in to change notification settings - Fork 1.3k
fix(compaction): trigger /goal threshold on billed context, not post-prune estimate #3175
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b74f7bb
8ab754f
d444b3c
00d14ac
e70c710
eef2f7b
a305e68
29e69f5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1612,6 +1612,7 @@ export class AgentSession { | |
| await this.#advisorRuntime.waitForCatchup(30000, threshold, signal); | ||
| } | ||
| } | ||
| await this.#maintainContextMidRun(messages, signal); | ||
| }); | ||
| this.yieldQueue = new YieldQueue({ | ||
| isStreaming: () => this.isStreaming, | ||
|
|
@@ -2769,10 +2770,32 @@ export class AgentSession { | |
| this.#lastAssistantMessage = undefined; | ||
| if (!msg) { | ||
| this.#lastSuccessfulYieldToolCallId = undefined; | ||
| logger.debug("agent_end maintenance routing", { | ||
| reason: "no-assistant-message", | ||
| goalModeEnabled: this.#goalModeState?.enabled === true, | ||
| goalStatus: this.#goalModeState?.goal.status, | ||
| }); | ||
| await emitAgentEndNotification(); | ||
| return; | ||
| } | ||
|
|
||
| const maintenanceRoute = (route: string, extra?: Record<string, unknown>) => { | ||
| logger.debug("agent_end maintenance routing", { | ||
| route, | ||
| stopReason: msg.stopReason, | ||
| provider: msg.provider, | ||
| model: msg.model, | ||
| contentBlocks: msg.content.length, | ||
| hasToolCalls: msg.content.some(content => content.type === "toolCall"), | ||
| hasText: msg.content.some(content => content.type === "text"), | ||
| goalModeEnabled: this.#goalModeState?.enabled === true, | ||
| goalStatus: this.#goalModeState?.goal.status, | ||
| successfulYield: this.#assistantEndedWithSuccessfulYield(msg), | ||
| ...extra, | ||
| }); | ||
| }; | ||
| maintenanceRoute("entered"); | ||
|
|
||
| // Invalidate GitHub Copilot credentials on auth failure so stale tokens | ||
| // aren't reused on the next request | ||
| if ( | ||
|
|
@@ -2786,27 +2809,61 @@ export class AgentSession { | |
| if (this.#skipPostTurnMaintenanceAssistantTimestamp === msg.timestamp) { | ||
| this.#skipPostTurnMaintenanceAssistantTimestamp = undefined; | ||
| this.#lastSuccessfulYieldToolCallId = undefined; | ||
| maintenanceRoute("skip-post-turn-maintenance"); | ||
| await emitAgentEndNotification(); | ||
| return; | ||
| } | ||
|
|
||
| const activeGoal = this.#goalModeState?.enabled === true && this.#goalModeState.goal.status === "active"; | ||
| if (this.#assistantEndedWithSuccessfulYield(msg)) { | ||
| this.#lastSuccessfulYieldToolCallId = undefined; | ||
| if (this.#goalModeState?.enabled && this.#goalModeState.goal.status === "active") { | ||
| if (activeGoal) { | ||
| maintenanceRoute("successful-yield-active-goal-checkCompaction"); | ||
| const compactionTask = this.#checkCompaction(msg); | ||
| this.#trackPostPromptTask(compactionTask); | ||
| await compactionTask; | ||
| } else { | ||
| maintenanceRoute("successful-yield-no-active-goal"); | ||
| } | ||
| await emitAgentEndNotification(); | ||
| return; | ||
| } | ||
| this.#lastSuccessfulYieldToolCallId = undefined; | ||
|
|
||
| // Empty-stop cleanup MUST run before any compaction continuation: an | ||
| // empty toolUse stop must be stripped from active context + session | ||
| // history before we schedule another turn, otherwise the next | ||
| // Anthropic turn carries a tool_use block with no matching | ||
| // tool_result and corrupts message history. The handler also | ||
| // schedules its own retry, so a real empty stop never needs the | ||
| // active-goal threshold pre-empt below. | ||
| if (await this.#handleEmptyAssistantStop(msg)) { | ||
| maintenanceRoute("empty-stop-handled"); | ||
| await emitAgentEndNotification(); | ||
| return; | ||
| } | ||
|
|
||
| let compactionResult = COMPACTION_CHECK_NONE; | ||
| let checkedCompaction = false; | ||
| if (activeGoal) { | ||
| maintenanceRoute("active-goal-pre-empt-checkCompaction"); | ||
| const compactionTask = this.#checkCompaction(msg); | ||
| this.#trackPostPromptTask(compactionTask); | ||
| compactionResult = await compactionTask; | ||
| checkedCompaction = true; | ||
| if (compactionResult.deferredHandoff || compactionResult.continuationScheduled) { | ||
| maintenanceRoute("active-goal-pre-empt-continuation-scheduled", { | ||
| deferredHandoff: compactionResult.deferredHandoff, | ||
| continuationScheduled: compactionResult.continuationScheduled, | ||
| }); | ||
| this.#resolveRetry(); | ||
| await emitAgentEndNotification(); | ||
| return; | ||
|
Comment on lines
+2854
to
+2861
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When an active Useful? React with 👍 / 👎. |
||
| } | ||
| } | ||
|
|
||
| if (await this.#handleUnexpectedAssistantStop(msg)) { | ||
| maintenanceRoute("unexpected-stop-handled"); | ||
| await emitAgentEndNotification(); | ||
| return; | ||
| } | ||
|
|
@@ -2846,9 +2903,12 @@ export class AgentSession { | |
| } | ||
| this.#resolveRetry(); | ||
|
|
||
| const compactionTask = this.#checkCompaction(msg); | ||
| this.#trackPostPromptTask(compactionTask); | ||
| const compactionResult = await compactionTask; | ||
| if (!checkedCompaction) { | ||
| maintenanceRoute("bottom-checkCompaction"); | ||
| const compactionTask = this.#checkCompaction(msg); | ||
| this.#trackPostPromptTask(compactionTask); | ||
| compactionResult = await compactionTask; | ||
| } | ||
| // Check for incomplete todos only after a final assistant stop, not intermediate tool-use turns. | ||
| const hasToolCalls = msg.content.some(content => content.type === "toolCall"); | ||
| if (hasToolCalls) { | ||
|
|
@@ -8207,6 +8267,58 @@ export class AgentSession { | |
| }); | ||
| } | ||
|
|
||
| /** | ||
| * Compact active `/goal` runs that never settle to `agent_end`. | ||
| * | ||
| * Long autonomous goals can keep producing tool calls inside one agent run. | ||
| * The post-turn `agent_end` threshold check never fires in that shape, so | ||
| * context can grow until provider overflow. `onTurnEnd` is the safe boundary: | ||
| * tool results for the just-finished turn are already paired in | ||
| * `activeMessages`, the live array the agent loop reads before its next | ||
| * model call. Run maintenance here and splice the compacted state back into | ||
| * that array, mirroring [`AgentSession.#applyRewind`]. | ||
| */ | ||
| async #maintainContextMidRun(activeMessages: AgentMessage[], signal?: AbortSignal): Promise<void> { | ||
| if (signal?.aborted || this.#isDisposed || this.isCompacting || this.isGeneratingHandoff) return; | ||
| if (!(this.#goalModeState?.enabled === true && this.#goalModeState.goal.status === "active")) return; | ||
|
|
||
| const model = this.model; | ||
| const contextWindow = model?.contextWindow ?? 0; | ||
| if (contextWindow <= 0) return; | ||
|
|
||
| const compactionSettings = this.settings.getGroup("compaction"); | ||
| if (!compactionSettings.enabled || compactionSettings.strategy === "off") return; | ||
|
|
||
| const lastAssistant = [...activeMessages] | ||
| .reverse() | ||
| .find((message): message is AssistantMessage => message.role === "assistant"); | ||
| if (!lastAssistant || lastAssistant.stopReason === "aborted" || lastAssistant.stopReason === "error") return; | ||
|
|
||
| const billedContextTokens = calculateContextTokens(lastAssistant.usage); | ||
| const storedContextTokens = this.#estimateStoredContextTokens(); | ||
| const contextTokens = compactionContextTokens(billedContextTokens, storedContextTokens); | ||
| if (!shouldCompact(contextTokens, contextWindow, compactionSettings)) return; | ||
|
|
||
| const messagesBefore = activeMessages.length; | ||
| await this.#runAutoCompaction("threshold", false, false, false, { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When an active Useful? React with 👍 / 👎. |
||
| autoContinue: false, | ||
| suppressContinuation: true, | ||
| triggerContextTokens: contextTokens, | ||
| }); | ||
|
|
||
| if (signal?.aborted) return; | ||
| const compactedMessages = this.agent.state.messages; | ||
| if (compactedMessages !== activeMessages) { | ||
| activeMessages.splice(0, activeMessages.length, ...compactedMessages); | ||
| } | ||
| logger.debug("Mid-run goal compaction ran between tool-call turns", { | ||
| contextTokens, | ||
| contextWindow, | ||
| strategy: compactionSettings.strategy, | ||
| messagesBefore, | ||
| messagesAfter: activeMessages.length, | ||
| }); | ||
| } | ||
| /** | ||
| * Check if context maintenance or promotion is needed and run it. | ||
| * Called after agent_end and before prompt submission. | ||
|
|
@@ -8332,28 +8444,58 @@ export class AgentSession { | |
| // Skip if this was an error (non-overflow errors don't have usage data) | ||
| if (assistantMessage.stopReason === "error") return COMPACTION_CHECK_NONE; | ||
| const pruneResult = await this.#pruneToolOutputs(); | ||
| let contextTokens = calculateContextTokens(assistantMessage.usage); | ||
| if (supersedeResult) { | ||
| contextTokens = Math.max(0, contextTokens - supersedeResult.tokensSaved); | ||
| } | ||
| if (pruneResult) { | ||
| contextTokens = Math.max(0, contextTokens - pruneResult.tokensSaved); | ||
| } | ||
| // Floor by the real stored-conversation estimate so a payload-shrinking | ||
| // before_provider_request hook (e.g. a compression extension such as | ||
| // Headroom) can't deflate the provider-reported usage below the true | ||
| // history size and skip the threshold. The estimate runs after the prune | ||
| // passes above, so it reflects the post-prune message set. | ||
| contextTokens = compactionContextTokens(contextTokens, this.#estimateStoredContextTokens()); | ||
| if (shouldCompact(contextTokens, contextWindow, compactionSettings)) { | ||
| const maintenanceTokensFreed = (supersedeResult?.tokensSaved ?? 0) + (pruneResult?.tokensSaved ?? 0); | ||
| const assistantUsageContextTokens = calculateContextTokens(assistantMessage.usage); | ||
| const storedContextTokens = this.#estimateStoredContextTokens(); | ||
| // Pruning frees bytes for the NEXT prompt; it does not change the size of | ||
| // the prompt the LLM just billed for. Earlier revisions subtracted the | ||
| // per-turn supersede/prune `tokensSaved` from the threshold input, which | ||
| // let a long-running `/goal` session sit above `compaction.thresholdTokens` | ||
| // indefinitely whenever per-turn pruning saved enough to drop the | ||
| // post-prune estimate below the user-configured trigger — the visible | ||
| // context (anchored to the same provider billing) still showed >threshold, | ||
| // but `shouldCompact` no-op'd (#3174). Anchor the initial trigger on the | ||
| // last turn's billed context tokens, floored by the post-prune | ||
| // stored-conversation estimate so a payload-compression hook still can't | ||
| // deflate the trigger. | ||
| const contextTokens = compactionContextTokens(assistantUsageContextTokens, storedContextTokens); | ||
| const postMaintenanceContextTokens = compactionContextTokens( | ||
| Math.max(0, assistantUsageContextTokens - maintenanceTokensFreed), | ||
| storedContextTokens, | ||
| ); | ||
| const thresholdTokens = resolveThresholdTokens(contextWindow, compactionSettings); | ||
| const shouldThresholdCompact = shouldCompact(contextTokens, contextWindow, compactionSettings); | ||
| logger.debug("Auto-compaction threshold decision", { | ||
| phase: "post-agent-end", | ||
| goalModeEnabled: this.#goalModeState?.enabled === true, | ||
| goalStatus: this.#goalModeState?.goal.status, | ||
| stopReason: assistantMessage.stopReason, | ||
| sameModel: sameModel === true, | ||
| contextWindow, | ||
| strategy: compactionSettings.strategy, | ||
| thresholdTokens, | ||
| assistantUsageContextTokens, | ||
| storedContextTokens, | ||
| resolvedContextTokens: contextTokens, | ||
| postMaintenanceContextTokens, | ||
| maintenanceTokensFreed, | ||
| shouldCompact: shouldThresholdCompact, | ||
| contextPromotionEnabled: this.settings.get("contextPromotion.enabled") === true, | ||
| }); | ||
| if (shouldThresholdCompact) { | ||
| // Try promotion first — if a larger model is available, switch instead of compacting | ||
| const promoted = await this.#tryContextPromotion(assistantMessage); | ||
| if (!promoted) { | ||
| return await this.#runAutoCompaction("threshold", false, false, allowDefer, { | ||
| autoContinue, | ||
| triggerContextTokens: contextTokens, | ||
| triggerContextTokens: postMaintenanceContextTokens, | ||
| }); | ||
| } | ||
| logger.debug("Auto-compaction threshold satisfied but context promotion took over", { | ||
| contextTokens, | ||
| contextWindow, | ||
| model: `${assistantMessage.provider}/${assistantMessage.model}`, | ||
| }); | ||
| } | ||
| return COMPACTION_CHECK_NONE; | ||
| } | ||
|
|
@@ -9447,13 +9589,15 @@ export class AgentSession { | |
| willRetry: boolean, | ||
| deferred = false, | ||
| allowDefer = true, | ||
| options: { autoContinue?: boolean; triggerContextTokens?: number } = {}, | ||
| options: { autoContinue?: boolean; triggerContextTokens?: number; suppressContinuation?: boolean } = {}, | ||
| ): Promise<CompactionCheckResult> { | ||
| const compactionSettings = this.settings.getGroup("compaction"); | ||
| if (compactionSettings.strategy === "off") return COMPACTION_CHECK_NONE; | ||
| if (reason !== "idle" && !compactionSettings.enabled) return COMPACTION_CHECK_NONE; | ||
| const generation = this.#promptGeneration; | ||
| const shouldAutoContinue = options.autoContinue !== false && compactionSettings.autoContinue !== false; | ||
| const suppressContinuation = options.suppressContinuation === true; | ||
| const shouldAutoContinue = | ||
| !suppressContinuation && options.autoContinue !== false && compactionSettings.autoContinue !== false; | ||
| // Shake runs inline (cheap, no remote LLM). On overflow recovery, if shake | ||
| // reclaims nothing we fall through to the summary-compaction body below so | ||
| // the oversized input still gets resolved. | ||
|
|
@@ -9464,6 +9608,7 @@ export class AgentSession { | |
| generation, | ||
| shouldAutoContinue, | ||
| options.triggerContextTokens, | ||
| suppressContinuation, | ||
| ); | ||
| if (outcome !== "fallback") return outcome; | ||
| } | ||
|
|
@@ -9917,7 +10062,7 @@ export class AgentSession { | |
|
|
||
| this.#scheduleAgentContinue({ delayMs: 100, generation }); | ||
| continuationScheduled = true; | ||
| } else if (this.agent.hasQueuedMessages()) { | ||
| } else if (!suppressContinuation && this.agent.hasQueuedMessages()) { | ||
| // Auto-compaction can complete while follow-up/steering/custom messages are waiting. | ||
| // Kick the loop so queued messages are actually delivered. | ||
| this.#scheduleAgentContinue({ | ||
|
|
@@ -9977,6 +10122,7 @@ export class AgentSession { | |
| generation: number, | ||
| autoContinue: boolean, | ||
| triggerContextTokens?: number, | ||
| suppressContinuation = false, | ||
| ): Promise<CompactionCheckResult | "fallback"> { | ||
| const action = "shake"; | ||
| this.#autoCompactionAbortController?.abort(); | ||
|
|
@@ -10007,15 +10153,16 @@ export class AgentSession { | |
| // situation actually resolves; "idle" is exempt because its 60s+ timer | ||
| // re-checks usage before re-firing and cannot dead-loop on its own. | ||
| // | ||
| // #2275: the post-shake check MUST be anchored on the same metric that | ||
| // triggered compaction. The local estimator (`#estimatePendingPromptTokens`) | ||
| // undercounts thinking-signature payloads, so on thinking-heavy sessions it | ||
| // reads well below the provider-reported usage that fired the threshold. | ||
| // When that estimate slips under the threshold, the fallback never fires | ||
| // and the auto-continue prompt re-injects every turn. Prefer the trigger's | ||
| // own `contextTokens` (provider-anchored) when the caller supplies it, and | ||
| // add hysteresis (80% recovery band) so we don't oscillate at the boundary | ||
| // while shake keeps reclaiming a trickle of the previous turn's output. | ||
| // #2275: the post-shake check MUST stay provider-anchored when caller | ||
| // usage and local estimates diverge. The local estimator undercounts | ||
| // thinking-signature payloads, so thinking-heavy sessions can read well | ||
| // below the provider usage that fired the threshold. Prefer the caller's | ||
| // context figure when supplied, then subtract shake's own savings and add | ||
| // hysteresis (80% recovery band) so we don't oscillate at the boundary. | ||
| // Threshold callers pass the provider-billed trigger after accounting for | ||
| // any supersede/drop-useless pruning that already rewrote the next prompt; | ||
| // without that pre-shake savings, shake can fall through to context-full | ||
| // even though the post-prune history is already inside the recovery band. | ||
| const contextWindow = this.model?.contextWindow ?? 0; | ||
| const compactionSettings = this.settings.getGroup("compaction"); | ||
| let stillOverThreshold = false; | ||
|
|
@@ -10075,7 +10222,7 @@ export class AgentSession { | |
| } | ||
| this.#scheduleAgentContinue({ delayMs: 100, generation }); | ||
| continuationScheduled = true; | ||
| } else if (this.agent.hasQueuedMessages()) { | ||
| } else if (!suppressContinuation && this.agent.hasQueuedMessages()) { | ||
| this.#scheduleAgentContinue({ | ||
| delayMs: 100, | ||
| generation, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When an active goal hits the compaction threshold and the assistant returned an empty
toolUsestop, this early return skips#handleEmptyAssistantStop; that handler is the only path that removes the orphaned tool-use assistant from active context/session history before retrying. With the default compaction auto-continue path, the next turn can be scheduled with atoolUseassistant that has no tool call, which the existing cleanup comment notes corrupts Anthropic message history. Run the empty-stop cleanup before returning, or avoid the early return for empty tool-use stops.Useful? React with 👍 / 👎.