diff --git a/TASK_CONTEXT_DESIGN.md b/TASK_CONTEXT_DESIGN.md new file mode 100644 index 0000000000..baf748e265 --- /dev/null +++ b/TASK_CONTEXT_DESIGN.md @@ -0,0 +1,599 @@ +# Design: Task-Relevant Code Summaries (codemap-inspired) for oh-my-pi + +> **Status**: Design complete, ready for implementation. +> **Branch**: `taskrelevant_context` in `oh-my-pi-dev` +> **Scope**: Minimal — agent-written file summaries + budget-packed task-relevant retrieval, native to oh-my-pi (not MCP), with automatic Turso setup. + +--- + +## What this is + +A native oh-my-pi feature that lets the agent **write short summaries about code files it has read**, persist them in libSQL/Turso, and **retrieve only the summaries relevant to the current task** on future turns — avoiding re-reading entire file trees. Token-efficient by construction: summaries are hard-capped at 1000 chars, retrieval is FTS-targeted, and a greedy budget packer bounds the response to a token ceiling. + +Inspired by [devalade/codemap](https://github.com/devalade/codemap)'s agent-written summaries + task-context retrieval, but built natively into oh-my-pi's tool/settings/prompt system — **not** as an MCP server. + +## What this is NOT + +- **Not a memory backend.** This is a distinct axis: code-entity summaries keyed by file path with staleness tracking. It does NOT go into the `memory.backend` enum (`off`/`local`/`mnemopi`/`hindsight`). It composes with any memory backend — including `off` (the default). +- **Not the full codemap.** No call graph (`call_edges`), no references table, no imports/exports tracking, no indexing daemon, no semantic embeddings search. Those are future work. +- **Not multi-language.** TypeScript file extraction only in v1. A pluggable `LanguageAdapter` interface is declared for future Go/Python/Rust adapters but only `TsAdapter` ships. + +--- + +## Architecture Decisions (verified through adversarial review) + +### Decision 1: File-primary, single summaries table (Approach A) + +**Choice**: One `summaries` table keyed by `(project_label, file_path)` composite primary key. No symbols table. + +**Rationale**: Three architecture approaches were designed and adversarially reviewed by 5 independent skeptic judges. Approaches B (symbol-centric faithful port) and C (hybrid progressive) were both disqualified for violating the minimal-scope constraint: + +| Approach | Symbols table? | Verdict | +|----------|--------------|---------| +| A — file-centric | No (single `summaries` table) | ✅ In scope | +| B — symbol-centric | Yes (`CREATE TABLE symbols`) | ❌ Constraint violation | +| C — hybrid | Yes (`file_summaries` + `symbol_summaries`) | ❌ Constraint violation | + +Only Approach A respects the "summaries table + TS adapter + budget packer ONLY" constraint. The `summaries` table includes optional `symbol_name`/`symbol_kind`/`symbol_line_range` columns for disambiguation when a TS adapter is available, but there is no second base table. + +### Decision 2: Dual-driver storage (bun:sqlite default + lazy libsql for Turso) + +**Choice**: `bun:sqlite` for the zero-config local default; `@libsql/client` lazy-loaded only when Turso sync is configured. + +**Rationale**: `bun:sqlite` is the codebase standard (11 files: memories, mnemopi, session, autoresearch, tools). FTS5 is proven working in `bun:sqlite` (history-storage.ts:103, mnemopi/schema.ts:130/137/361). No existing `@libsql/client` dependency exists. Adding a native NAPI binding to every install for a Turso feature most users won't enable violates AGENTS.md's "Use Bun APIs where they provide a cleaner alternative." + +A thin `DbConnection` adapter interface abstracts both drivers so schema SQL is shared verbatim. The libsql driver lazy-loads via `await import('@libsql/client')` only on the Turso sync path (mirroring the `fastembed-runtime.ts:59-77` optional-peer pattern). + +```mermaid +flowchart LR + subgraph "Zero-config default (no new dep)" + A[codemap.enabled = true] --> B[openCodemapDb] + B --> C[bun:sqlite Database] + C --> D["file: ~/.omp/.../codemap.db"] + end + subgraph "Turso sync (opt-in, lazy dep)" + E[turso.syncUrl + authToken] --> F["await import('@libsql/client')"] + F --> G[libsql createClient] + G --> H["embedded replica: local file + remote sync"] + end +``` + +### Decision 3: Independent first-turn injection seam (NOT via memory backend) + +**Blocker identified and fixed**: All three original designs proposed first-turn task-context injection via `backend.beforeAgentStartPrompt` (agent-session.ts:4945). But `#buildSystemPromptForAgentStart` early-returns when `memory.backend === "off"` (the default) because `offBackend` has no `beforeAgentStartPrompt` hook: + +```typescript +// agent-session.ts:4945-4947 (EXISTING — the dead seam) +async #buildSystemPromptForAgentStart(promptText: string): Promise { + const backend = await resolveMemoryBackend(this.settings); + if (!backend.beforeAgentStartPrompt) return this.#baseSystemPrompt; // ← dead when memory.backend="off" +``` + +**Fix**: Add a new unconditional injection block that runs *before* the memory-backend block, gated only on `codemap.enabled`, with its own try/catch so a libSQL/FTS error can't break the memory backend path: + +```typescript +async #buildSystemPromptForAgentStart(promptText: string): Promise { + // NEW: composable first-turn injection (runs regardless of memory.backend) + const codemapBlock = await this.#injectCodemapTaskContext(promptText); + const baseWithCodemap = codemapBlock + ? [...this.#baseSystemPrompt, codemapBlock] + : this.#baseSystemPrompt; + + const backend = await resolveMemoryBackend(this.settings); + if (!backend.beforeAgentStartPrompt) return baseWithCodemap; + // ... existing backend logic, but operate on baseWithCodemap as the "previousBase" +} +``` + +**Why codemap runs before the memory block**: (1) code summaries are stable background knowledge (like mnemopi's `STATIC_INSTRUCTIONS` framing), so ordering them first keeps the higher-recency memory block last; (2) it avoids coupling codemap's failure to the memory backend's try/catch. + +**Once-per-session gating**: `#injectCodemapTaskContext` checks a `hasInjectedForFirstTurn` flag on the codemap session state (mirroring mnemopi/state.ts:313 `hasRecalledForFirstTurn`), so it fires once per fresh session, not every turn. + +--- + +## Module Layout + +New module at `packages/coding-agent/src/task-context/`: + +| File | Responsibility | +|------|---------------| +| `index.ts` | Barrel re-exports + lazy factory `resolveCodemap(settings)` (mirrors `resolveMemoryBackend`) | +| `config.ts` | Settings → typed config loader (mirrors `mnemopi/config.ts:loadMnemopiConfig`) | +| `db.ts` | DB connection factory + schema bootstrap + FTS rebuild. `openCodemapDb(config)` returns a `DbConnection` (either `BunSqliteConnection` or `LibsqlConnection`) | +| `schema.ts` | Pure SQL DDL + migration runner. `initSchema(conn)` executes `CREATE TABLE IF NOT EXISTS` + FTS5 virtual table + sync triggers | +| `store.ts` | Data-access layer. CRUD over the single `summaries` table + FTS search queries | +| `staleness.ts` | Filesystem hash + staleness flag. `computeFileHash(path)` reads file from disk, returns `Bun.hash` hex string | +| `adapter.ts` | Pluggable `LanguageAdapter` interface + `TsAdapter` (the only v1 implementation) | +| `retrieve.ts` | The `get_task_context` retrieval pipeline (lexical extract → FTS seed → rank → budget packer) | +| `tools.ts` | Four native `AgentTool` classes with `createIf` gating | +| `prompt.ts` | System-prompt injection helpers. `buildCodemapPromptBlock(config, summaries)` | +| `turso.ts` | Turso connection resolution + auto-provisioning + `settings.set()` persist-back | +| `state.ts` | Per-session state via Symbol key on `AgentSession` (mirrors mnemopi state pattern) | + +**Edits to existing files** (not new files): + +| File | Change | +|------|--------| +| `config/settings-schema.ts` | Add `codemap.*` schema block after the hindsight config block | +| `modes/components/settings-defs.ts` | Add `codemapActive` condition (mirrors `mnemopiActive` at :93) | +| `tools/index.ts` | Register the 4 codemap tools as `createIf` factories | +| `sdk.ts` (~line 2149) | Append `codemapInstructions` to `appendParts[]` | +| `session/agent-session.ts` (~line 4945) | Add `#injectCodemapTaskContext` block before memory-backend block | +| `system-prompt.ts` (~line 419, ~671) | Add `codemapEnabled: boolean` to `BuildSystemPromptOptions`; thread `hasCodemap` into prompt data | +| `prompts/system/system-prompt.md` (~lines 55-57) | Add `{{#if hasCodemap}}` advertisement block | +| `hindsight/content.ts` | Extend `stripMemoryTags` regex to also strip `...` blocks | + +--- + +## Database Schema + +Single `summaries` table — file-keyed, with optional symbol disambiguation columns. No symbols table. Runs identically on `bun:sqlite` (local) and `@libsql/client` (Turso embedded replica). + +```sql +-- ════════════════════════════════════════════════════════════════════════════ +-- codemap schema v1: file-primary minimal. +-- ONE summaries table — file-keyed, with optional symbol disambiguation columns. +-- NO symbols/symbol_summaries second table (scope constraint). +-- Runs on BOTH local file: libSQL (zero-config default) and Turso-synced +-- embedded replica, unchanged. +-- ════════════════════════════════════════════════════════════════════════════ + +CREATE TABLE IF NOT EXISTS schema_migrations ( + version INTEGER PRIMARY KEY, + applied_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +-- PRIMARY store: one row per file the agent has summarized. Language-agnostic. +CREATE TABLE IF NOT EXISTS summaries ( + project_label TEXT NOT NULL, -- git primary root basename (isolates per-project) + file_path TEXT NOT NULL, -- posix-relative to project root + summary_text TEXT NOT NULL, -- hard-capped at 1000 chars on write + content_hash TEXT NOT NULL, -- Bun.hash (xxHash64) of file contents at write time; '' if file missing + symbol_name TEXT, -- optional: symbol the summary is about (TS adapter) + symbol_kind TEXT, -- optional: 'function' | 'class' | 'method' | etc. + symbol_line_range TEXT, -- optional: 'startLine-endLine' for symbol span + source TEXT NOT NULL DEFAULT 'agent', + updated_at TEXT NOT NULL DEFAULT (datetime('now')), + PRIMARY KEY (project_label, file_path) +); + +CREATE INDEX IF NOT EXISTS idx_summaries_project ON summaries(project_label); +CREATE INDEX IF NOT EXISTS idx_summaries_hash ON summaries(project_label, content_hash); + +-- FTS5 over summary_text + file_path (external-content pattern, matching +-- history-storage.ts:103 and mnemopi/schema.ts:130). +CREATE VIRTUAL TABLE IF NOT EXISTS summaries_fts USING fts5( + summary_text, + file_path, + content='summaries', + content_rowid='rowid' +); + +-- Sync triggers (identical pattern to history-storage.ts:105-108). +CREATE TRIGGER IF NOT EXISTS summaries_ai AFTER INSERT ON summaries BEGIN + INSERT INTO summaries_fts(rowid, summary_text, file_path) + VALUES (new.rowid, new.summary_text, new.file_path); +END; + +CREATE TRIGGER IF NOT EXISTS summaries_ad AFTER DELETE ON summaries BEGIN + INSERT INTO summaries_fts(summaries_fts, rowid, summary_text, file_path) + VALUES('delete', old.rowid, old.summary_text, old.file_path); +END; + +CREATE TRIGGER IF NOT EXISTS summaries_au AFTER UPDATE ON summaries BEGIN + INSERT INTO summaries_fts(summaries_fts, rowid, summary_text, file_path) + VALUES('delete', old.rowid, old.summary_text, old.file_path); + INSERT INTO summaries_fts(rowid, summary_text, file_path) + VALUES(new.rowid, new.summary_text, new.file_path); +END; +``` + +**Staleness**: `content_hash` is `Bun.hash(contents).toString(16)` of the file on disk at write time. On read, `computeFileHash(path)` recomputes and compares. If they differ → `stale: true`. If the file is missing → `content_hash = ''` and `stale: true` with a `missing` flag. + +--- + +## Settings + +Distinct `codemap.*` settings group (NOT in `memory.backend` enum). Follows the `SETTINGS_SCHEMA` object-literal pattern (`as const`), mirroring the mnemopi/hindsight config blocks. + +### Settings schema block (added to `settings-schema.ts`) + +```typescript +// ════════════════════════════════════════════════════════════════════════════ +// Codemap (code summaries) — DISTINCT feature module at packages/coding-agent/src/task-context/. +// NOT a memory.backend enum value (those are mutually exclusive; codemap is a +// composable different axis: code entities keyed by file + staleness). Lives in +// its own settings group, enabled independently of memory.backend. +// ════════════════════════════════════════════════════════════════════════════ +"codemap.enabled": { + type: "boolean", + default: false, // master toggle, off by default + description: "Enable agent-written code summaries with task-relevant retrieval.", +}, +"codemap.autoInject": { + type: "boolean", + default: true, // auto-inject task-relevant summaries into first-turn system prompt + description: "Automatically retrieve and inject task-relevant summaries on the first turn of each session.", +}, +"codemap.dbPath": { + type: "string", + default: "", // empty = /codemap/codemap.db + description: "Path to the local libSQL database file. Empty uses the default per-project location.", +}, +"codemap.tokenBudget": { + type: "number", + default: 8000, // codemap's documented default budget + description: "Soft token budget for task-context retrieval responses.", +}, +"codemap.maxResults": { + type: "number", + default: 20, // max summaries before budget packing + description: "Maximum number of file summaries to retrieve before budget packing.", +}, +"codemap.maxSummaryChars": { + type: "number", + default: 1000, // HARD write-side cap — schema-enforced token guard + description: "Maximum characters per summary. Summaries are truncated on write.", +}, +// Turso sync (optional — feature works fully offline without these) +"codemap.turso.syncUrl": { + type: "string", + default: "", // empty = local-only mode + description: "Turso/libSQL remote sync URL. When set, enables embedded-replica sync.", +}, +"codemap.turso.authToken": { + type: "string", + default: "", + description: "Auth token for Turso sync. Required when syncUrl is set.", +}, +"codemap.turso.autoProvision": { + type: "boolean", + default: false, // opt-in automatic Turso DB creation + description: "Automatically provision a Turso database when credentials are available but no syncUrl is set.", +}, +"codemap.turso.org": { + type: "string", + default: "", // Turso org name for auto-provisioning + description: "Turso organization name for auto-provisioning.", +}, +``` + +### Defaults summary + +| Setting | Default | Purpose | +|---------|---------|---------| +| `codemap.enabled` | `false` | Master toggle | +| `codemap.autoInject` | `true` | First-turn auto-injection | +| `codemap.dbPath` | `""` | Empty = `/codemap/codemap.db` | +| `codemap.tokenBudget` | `8000` | codemap's documented default | +| `codemap.maxResults` | `20` | Max summaries before packing | +| `codemap.maxSummaryChars` | `1000` | Hard write-side char cap | +| `codemap.turso.syncUrl` | `""` | Empty = local-only | +| `codemap.turso.authToken` | `""` | Required when syncUrl set | +| `codemap.turso.autoProvision` | `false` | Opt-in auto DB creation | +| `codemap.turso.org` | `""` | Turso org for provisioning | + +--- + +## Tools + +Four native `AgentTool` classes with `createIf` gating (conditionally enabled based on `codemap.enabled`). Each follows the existing tool patterns (ArkType parameter schema, `AgentTool` interface). + +### 1. `set_file_summary` + +Agent writes a short note about a file it has read. Computes `Bun.hash` of the file at write time and stores it as `content_hash` for staleness detection. Hard-truncates `summary` to `codemap.maxSummaryChars` (1000) on write — the only schema-enforced per-summary token bound. + +```typescript +{ + name: "set_file_summary", + purpose: "Persist a summary written by the agent after reading a file. Stores it for future task-relevant retrieval.", + parameters: { + file: { type: "string", required: true, description: "File path (relative to cwd, resolved to project root)" }, + summary: { type: "string", required: true, description: "1-3 sentences: purpose, key symbols, gotchas, invariants" }, + symbol_name: { type: "string", required: false, description: "Optional: specific symbol this summary is about" }, + symbol_kind: { type: "string", required: false, description: "Optional: 'function' | 'class' | 'method' | etc." }, + }, + createIf: (settings) => settings.get("codemap.enabled") === true +} +``` + +### 2. `get_file_summary` + +Retrieve a single file's summary + live staleness flag. Re-reads the file from disk, recomputes `Bun.hash`, compares to stored `content_hash`. + +```typescript +{ + name: "get_file_summary", + purpose: "Retrieve a file's stored summary with a live staleness check.", + parameters: { + file: { type: "string", required: true }, + }, + createIf: (settings) => settings.get("codemap.enabled") === true +} +``` + +### 3. `get_task_context` (primary) + +Run the retrieval pipeline for a task string: lexical extract → FTS seed retrieval → rank + dedupe → budget packer. Returns a bounded, ranked context pack of file summaries. + +```typescript +{ + name: "get_task_context", + purpose: "Retrieve task-relevant file summaries as a bounded context pack.", + parameters: { + task: { type: "string", required: true, description: "What the agent is trying to do" }, + max_files: { type: "number", default: 12, maximum: 50 }, + include_summaries: { type: "boolean", default: true }, + token_budget: { type: "number", description: "Soft limit for estimated response tokens (default: codemap.tokenBudget)" }, + }, + createIf: (settings) => settings.get("codemap.enabled") === true +} +``` + +### 4. `delete_file_summary` + +Remove a file's summary (file deleted, or summary is wrong/stale beyond refresh). + +```typescript +{ + name: "delete_file_summary", + purpose: "Remove a file's stored summary.", + parameters: { + file: { type: "string", required: true }, + }, + createIf: (settings) => settings.get("codemap.enabled") === true +} +``` + +--- + +## Retrieval Algorithm (`get_task_context`) + +Used by BOTH the `get_task_context` tool and the first-turn auto-injection hook (same code path). Adapted from [codemap's task-context algorithm](https://github.com/devalade/codemap/blob/main/docs/07-task-context.md), with graph expansion dropped (no call graph in minimal scope). + +### Step 1: Resolve project label + +`projectLabel = basename of git primary root` (worktree-aware, mirrors `hindsight computeBankScope`) or `cwd basename` if not a repo. Isolates summaries per project in the shared DB. + +### Step 2: Lexical extraction + +Tokenize the task string on non-alphanumeric. Keep tokens ≥ 3 chars, lowercase. Split camelCase (`buildSystemPrompt` → `build`, `system`, `prompt`) and snake_case (`get_task_context` → `get`, `task`, `context`). Drop a small hardcoded stopword set (`the`, `and`, `for`, `with`, `this`, `that`, `fix`, `add`, `update`, `refactor` — generic verbs). Partition into: `keywords` (alnum tokens) and `pathHints` (tokens containing `/` or ending in `.ts`/`.tsx`/`.js`). + +### Step 3: FTS seed retrieval + +Query `summaries_fts` with an FTS5 MATCH built from the keywords (token AND with prefix-wildcard per token, mirroring `history-storage.ts:212-215`): + +```sql +SELECT s.* FROM summaries_fts f +JOIN summaries s ON s.rowid = f.rowid +WHERE summaries_fts MATCH ? + AND s.project_label = ? +ORDER BY rank +LIMIT ? +``` + +Also run a path LIKE fallback for `pathHints` (mirroring the substring fallback at `history-storage.ts:224-226`). + +### Step 4: Rank and dedupe + +**Seed score** per summary (adapted from codemap's multi-channel formula, graph channel dropped): + +``` +seed(s) = 0.70 * fts_score(s) + 0.30 * path_match(s) +``` + +Deduplicate by `file_path` (keep highest score). + +### Step 5: Budget packer (greedy knapsack) + +**Token estimate** (codemap's exact documented formula): + +``` +tokenCost(summary) = ceil(summary_text.length / 4) + 20 +``` + +Where `+20` is the flat per-file header overhead (markdown heading + metadata). Since we have no symbol count at file-level, `symbols*20` collapses to a flat `20`. + +Greedy: sort by `seed(s)` descending, add summaries until `token_budget` (default 8000) is consumed. Stop when `estimated_total > token_budget`. + +``` +tokens ≈ Σ ceil(summary_text.length / 4) + 20 * file_count +``` + +### Step 6: Compose response + +Return JSON: + +```json +{ + "task": "string", + "files": [ + { + "path": "src/auth/password.ts", + "score": 0.92, + "summary": "Validates token, updates hash. Depends on bcrypt + db.", + "stale": false, + "updatedAt": "2026-06-21T12:00:00Z" + } + ], + "meta": { "fileCount": 3, "estimatedTokens": 420, "truncated": false } +} +``` + +--- + +## System Prompt Integration + +### Handlebars block (added to `system-prompt.md` near lines 55-57) + +```handlebars +{{#if hasCodemap}} +## Code Summaries (codemap) +File-level code summaries are available for this repo. Before reading unfamiliar files, call `get_task_context` with your task to retrieve relevant summaries (packed within a token budget). After reading a non-trivial file or making load-bearing changes, call `set_file_summary` to record a short note (purpose, key symbols, gotchas, invariants). Summaries are anchored to file content via `Bun.hash` — if a file changes, its summary is flagged `stale` and should be refreshed. +{{/if}} +``` + +### Threading into `buildSystemPrompt` + +Add `codemapEnabled: boolean` to `BuildSystemPromptOptions` (mirroring `mnemopiEnabled` at ~line 419). Thread `hasCodemap` into the prompt data object (~line 671). The `{{#if hasCodemap}}` block renders when `codemap.enabled` is true. + +### First-turn auto-injection + +The `#injectCodemapTaskContext` block in `agent-session.ts` calls `getTaskContext` with the user's first-turn prompt as the task string, retrieves the packed summaries, and appends them as an extra system-prompt part. Gated by `codemap.autoInject` and a `hasInjectedForFirstTurn` session-state flag (fires once per fresh session, mirroring `mnemopi/state.ts:313`). + +--- + +## Turso Auto-Setup + +### Zero-config local (default) + +`resolveDbUrl(config)` returns `{ url: 'file:' + dbPath }` where `dbPath` defaults to `path.join(getMemoriesDir(agentDir), 'codemap', 'codemap.db')`. Opens via `bun:sqlite` `Database` — no new dependency, no network, microsecond reads. The feature works fully offline. + +### Optional Turso sync (when credentials present) + +When `codemap.turso.syncUrl` + `codemap.turso.authToken` are set, `openCodemapDb` switches to the libsql embedded-replica driver: + +```typescript +// Lazy-loaded ONLY on the Turso path — users who never configure Turso +// never load @libsql/client or its native NAPI binding. +const { createClient } = await import("@libsql/client"); +const client = createClient({ + url: "file:" + localDbPath, // local file (embedded replica) + syncUrl: config.turso.syncUrl, // remote Turso endpoint + authToken: config.turso.authToken, +}); +await client.sync(); // pull remote changes, push local writes +``` + +The same schema SQL runs unchanged (libSQL is a SQLite fork; FTS5 + triggers work identically). + +### Auto-provisioning (opt-in) + +When `codemap.turso.autoProvision = true` and `TURSO_API_TOKEN` + `codemap.turso.org` are present, but no `syncUrl` is set yet: + +1. Call Turso platform API (`POST https://api.turso.tech/v1/organizations/{org}/databases`) to create a DB. +2. Generate a full-access JWT (`POST .../databases/{name}/auth/tokens`). +3. Derive `syncUrl = 'libsql://' + db.Hostname`. +4. **Persist back via `settings.set()`** (the gap all three original designs left underspecified — now concrete): + +```typescript +await settings.set("codemap.turso.syncUrl", syncUrl); +await settings.set("codemap.turso.authToken", jwt); +// settings.set() triggers #queueSave → config.yml (the existing persist path) +``` + +5. Subsequent starts skip provisioning and go straight to sync mode. Idempotent (no-op if `syncUrl` already set). + +### DB adapter interface + +```typescript +// packages/coding-agent/src/task-context/db-adapter.ts +export type DbRow = Record; +export type BindValue = null | number | string | bigint | Uint8Array | ArrayBuffer; +export type BindArgs = readonly BindValue[]; + +export interface PreparedStatement { + all(...args: BindArgs): Promise; + get(...args: BindArgs): Promise; + run(...args: BindArgs): Promise; +} + +export interface DbConnection { + prepare(sql: string): PreparedStatement; + run(sql: string, ...args: BindArgs): Promise; + exec(sql: string): Promise; // multi-statement (schema bootstrap) + close(): Promise; +} +``` + +`BunSqliteConnection` wraps `bun:sqlite` `Database` (sync calls wrapped in resolved Promises — no event-loop cost, same as `HistoryStorage` pattern). `LibsqlConnection` wraps `@libsql/client` (lazy-loaded). + +--- + +## Staleness Tracking + +**Hash**: `Bun.hash(contents)` (xxHash64) per AGENTS.md convention ("Hashing → `Bun.hash()`, NOT `node:crypto`"). Returns a number; store as `.toString(16)` hex string. The codebase uses `Bun.hash` for content hashing throughout (e.g. `noop-loop-guard.ts:98` `Bun.hash(input).toString(16)` for "is this the same payload?" — the same staleness use case). Non-cryptographic is fine here: we only need to detect "did the file change?", not defend against collisions. `node:crypto`'s `createHash` is used only for cross-service cryptographic needs (Anthropic fingerprinting, Cursor blob IDs), never for file content staleness. + +**Write** (`set_file_summary`): resolve `file_path` relative to cwd, read the file, compute `Bun.hash(contents).toString(16)`. Store as `content_hash`. If the file doesn't exist (agent summarizing a not-yet-saved or deleted file), store `content_hash = ''`. + +**Read** (`get_file_summary`, `get_task_context`): re-read the file from disk, recompute `Bun.hash(contents).toString(16)`, compare to stored `content_hash`: +- Differ → `stale: true` (file changed since summary was written) +- File missing + `content_hash = ''` → `stale: true`, `missing: true` +- File missing + `content_hash != ''` → `stale: true`, `missing: true` (file was deleted) + +**Failure modes**: +- *Rename/move*: file_path no longer matches → summary orphaned. The `get_task_context` FTS over `file_path` won't match the new path. Agent must re-summarize under the new path. (Acceptable for v1 — codemap has `symbol_aliases` for this, deliberately out of scope.) +- *Partial edit*: `content_hash` changes → `stale: true`. The summary may still be partially valid, but the flag correctly indicates the file changed. Agent refreshes. +- *Unchanged file re-summarized*: same `content_hash` → no-op upsert (idempotent). + +--- + +## Pluggable Language Adapter Interface + +```typescript +// packages/coding-agent/src/task-context/adapter.ts + +/** + * Pluggable language adapter for extracting symbol anchors from source files. + * Only TsAdapter is implemented in v1; Go/Python/Rust adapters are future work. + * The interface is the pluggable seam — no symbols table, but optional + * symbol_name/kind/line-range COLUMNS on the single summaries table are + * populated by an adapter when one is available for the file's language. + */ +export interface LanguageAdapter { + /** File extensions this adapter handles (e.g. ['.ts', '.tsx', '.js', '.jsx']) */ + extensions: readonly string[]; + /** Extract symbol info at a given line, or null if the line isn't in a symbol. */ + getSymbolAtLine(filePath: string, line: number): SymbolAnchor | null; + /** Extract all top-level symbols in a file (for future indexing). */ + getSymbols(filePath: string): SymbolAnchor[]; +} + +export interface SymbolAnchor { + name: string; + kind: 'function' | 'class' | 'method' | 'interface' | 'type' | 'variable' | 'const'; + startLine: number; + endLine: number; +} + +/** Maps file extension → adapter. Adding a new language = add one entry here. */ +export function getAdapter(filePath: string): LanguageAdapter | null { + const ext = filePath.slice(filePath.lastIndexOf('.')); + if (TS_ADAPTER.extensions.includes(ext)) return TS_ADAPTER; + return null; // file-primary summaries work fine without an adapter +} +``` + +`TsAdapter` uses oh-my-pi's existing LSP client (`packages/coding-agent/src/lsp/`) for symbol extraction — no regex parsing, no new AST dependency. When no adapter is available for a file's language, summaries still work (file-primary); only `symbol_name`/`symbol_kind`/`symbol_line_range` columns stay null. + +--- + +## Implementation Order + +1. **Schema + DB layer**: `schema.ts` (DDL), `db-adapter.ts` (interface), `db.ts` (factory + `BunSqliteConnection`), `staleness.ts` (`Bun.hash` content hash). Smoke test: open local DB, run migrations, insert a summary, FTS query it. +2. **Config + settings**: `config.ts` (loader), edit `settings-schema.ts` (add `codemap.*` block), edit `settings-defs.ts` (add `codemapActive` condition). +3. **Store**: `store.ts` (CRUD + FTS search queries over the summaries table). +4. **Adapter**: `adapter.ts` (interface + `TsAdapter` using LSP client). +5. **Retrieval**: `retrieve.ts` (lexical extract → FTS seed → rank → budget packer with codemap's `chars/4 + 20` formula). +6. **Tools**: `tools.ts` (4 `AgentTool` classes with `createIf` gating), register in `tools/index.ts`. +7. **Prompt injection**: `prompt.ts`, edit `system-prompt.ts` (thread `hasCodemap`), edit `system-prompt.md` (add `{{#if hasCodemap}}` block). +8. **First-turn injection**: `state.ts` (session state + `hasInjectedForFirstTurn` flag), edit `agent-session.ts` (add `#injectCodemapTaskContext` block BEFORE memory-backend block, with own try/catch). +9. **Turso sync**: `turso.ts` (connection resolution + auto-provisioning + `settings.set()` persist-back), `db.ts` (add `LibsqlConnection` lazy-load path). +10. **SDK integration**: edit `sdk.ts` (append `codemapInstructions` to `appendParts[]`), edit `hindsight/content.ts` (extend `stripMemoryTags`). +11. **Tests**: staleness flag transitions, budget packer token math, FTS retrieval ranking, first-turn injection fires once, injection works with `memory.backend="off"`. +12. **Changelog**: `packages/coding-agent/CHANGELOG.md` under `## [Unreleased]` → `### Added`. + +--- + +## Summary of Verified Design Decisions + +| Decision | Choice | Verified By | +|----------|--------|-------------| +| Granularity | File-primary, single `summaries` table | Adversarial judge panel (5 skeptics) + main-agent regex re-verification | +| Symbols table | **No** (B and C disqualified for constraint violation) | Skeptic IRC pushback caught main-agent verification error | +| Token formula | `ceil(summary_text.length / 4) + 20` (codemap's exact documented formula) | Re-verified against designs.json with corrected regex | +| Storage driver | Dual-driver: `bun:sqlite` default + lazy `@libsql/client` for Turso | Deliberate decision weighing codebase conventions (11 `bun:sqlite` files, 0 libsql deps, FTS5 proven) | +| First-turn injection | Independent block before memory-backend, gated on `codemap.enabled` only | Blocker advisory verified against agent-session.ts:4945-4947 | +| Persist-back | `settings.set()` + `#queueSave` (concrete, not hand-waved) | Skeptic caught main-agent's false "NONE wire it" claim | +| Staleness hash | `Bun.hash` xxHash64 (per AGENTS.md convention, matches `noop-loop-guard.ts:98` pattern) | Advisory caught unsupported "cross-runtime" rationale | +| Module placement | `packages/coding-agent/src/task-context/` (distinct, not in `memory.backend`) | Advisory constraint + integration-fit judge | diff --git a/bun.lock b/bun.lock index f0183b9c65..dba56eefb6 100644 --- a/bun.lock +++ b/bun.lock @@ -76,6 +76,7 @@ "dependencies": { "@agentclientprotocol/sdk": "catalog:", "@babel/parser": "catalog:", + "@libsql/client": "^0.17.4", "@mozilla/readability": "catalog:", "@oh-my-pi/hashline": "catalog:", "@oh-my-pi/omp-stats": "catalog:", @@ -596,6 +597,32 @@ "@kurkle/color": ["@kurkle/color@0.3.4", "", {}, "sha512-M5UknZPHRu3DEDWoipU6sE8PdkZ6Z/S+v4dD+Ke8IaNlpdSQah50lz1KtcFBa2vsdOnwbbnxJwVM4wty6udA5w=="], + "@libsql/client": ["@libsql/client@0.17.4", "", { "dependencies": { "@libsql/core": "^0.17.4", "@libsql/hrana-client": "^0.10.0", "js-base64": "^3.7.5", "libsql": "^0.5.28", "promise-limit": "^2.7.0" } }, "sha512-lYayFWasDV78A+TjlEhr6ubb3odBV6OHjb+wdp8VQcyWWAEIjuwbCHaraEUS4m4yWoo0BvZo96It4VdzZRmRWw=="], + + "@libsql/core": ["@libsql/core@0.17.4", "", { "dependencies": { "js-base64": "^3.7.5" } }, "sha512-LqF9gIvnJ38nmAH1y/ChizHqDO/MO1wLgA96XrraulEEbqXxLjleSH92YWTolbuJKgPUmGu4aJk9W3UnAcxLOQ=="], + + "@libsql/darwin-arm64": ["@libsql/darwin-arm64@0.5.29", "", { "os": "darwin", "cpu": "arm64" }, "sha512-K+2RIB1OGFPYQbfay48GakLhqf3ArcbHqPFu7EZiaUcRgFcdw8RoltsMyvbj5ix2fY0HV3Q3Ioa/ByvQdaSM0A=="], + + "@libsql/darwin-x64": ["@libsql/darwin-x64@0.5.29", "", { "os": "darwin", "cpu": "x64" }, "sha512-OtT+KFHsKFy1R5FVadr8FJ2Bb1mghtXTyJkxv0trocq7NuHntSki1eUbxpO5ezJesDvBlqFjnWaYYY516QNLhQ=="], + + "@libsql/hrana-client": ["@libsql/hrana-client@0.10.0", "", { "dependencies": { "@libsql/isomorphic-ws": "^0.1.5", "js-base64": "^3.7.5" } }, "sha512-OoA4EMqRAC7kn7V2P6EQqRcpZf2W+AjsNIyCizBg339Tq/aMC7sRnzs3SklderhmQWAqEzvv8A2vhxVmWpkVvw=="], + + "@libsql/isomorphic-ws": ["@libsql/isomorphic-ws@0.1.5", "", { "dependencies": { "@types/ws": "^8.5.4", "ws": "^8.13.0" } }, "sha512-DtLWIH29onUYR00i0GlQ3UdcTRC6EP4u9w/h9LxpUZJWRMARk6dQwZ6Jkd+QdwVpuAOrdxt18v0K2uIYR3fwFg=="], + + "@libsql/linux-arm-gnueabihf": ["@libsql/linux-arm-gnueabihf@0.5.29", "", { "os": "linux", "cpu": "arm" }, "sha512-CD4n4zj7SJTHso4nf5cuMoWoMSS7asn5hHygsDuhRl8jjjCTT3yE+xdUvI4J7zsyb53VO5ISh4cwwOtf6k2UhQ=="], + + "@libsql/linux-arm-musleabihf": ["@libsql/linux-arm-musleabihf@0.5.29", "", { "os": "linux", "cpu": "arm" }, "sha512-2Z9qBVpEJV7OeflzIR3+l5yAd4uTOLxklScYTwpZnkm2vDSGlC1PRlueLaufc4EFITkLKXK2MWBpexuNJfMVcg=="], + + "@libsql/linux-arm64-gnu": ["@libsql/linux-arm64-gnu@0.5.29", "", { "os": "linux", "cpu": "arm64" }, "sha512-gURBqaiXIGGwFNEaUj8Ldk7Hps4STtG+31aEidCk5evMMdtsdfL3HPCpvys+ZF/tkOs2MWlRWoSq7SOuCE9k3w=="], + + "@libsql/linux-arm64-musl": ["@libsql/linux-arm64-musl@0.5.29", "", { "os": "linux", "cpu": "arm64" }, "sha512-fwgYZ0H8mUkyVqXZHF3mT/92iIh1N94Owi/f66cPVNsk9BdGKq5gVpoKO+7UxaNzuEH1roJp2QEwsCZMvBLpqg=="], + + "@libsql/linux-x64-gnu": ["@libsql/linux-x64-gnu@0.5.29", "", { "os": "linux", "cpu": "x64" }, "sha512-y14V0vY0nmMC6G0pHeJcEarcnGU2H6cm21ZceRkacWHvQAEhAG0latQkCtoS2njFOXiYIg+JYPfAoWKbi82rkg=="], + + "@libsql/linux-x64-musl": ["@libsql/linux-x64-musl@0.5.29", "", { "os": "linux", "cpu": "x64" }, "sha512-gquqwA/39tH4pFl+J9n3SOMSymjX+6kZ3kWgY3b94nXFTwac9bnFNMffIomgvlFaC4ArVqMnOZD3nuJ3H3VO1w=="], + + "@libsql/win32-x64-msvc": ["@libsql/win32-x64-msvc@0.5.29", "", { "os": "win32", "cpu": "x64" }, "sha512-4/0CvEdhi6+KjMxMaVbFM2n2Z44escBRoEYpR+gZg64DdetzGnYm8mcNLcoySaDJZNaBd6wz5DNdgRmcI4hXcg=="], + "@mixmark-io/domino": ["@mixmark-io/domino@2.2.0", "", {}, "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw=="], "@mozilla/readability": ["@mozilla/readability@0.6.0", "", {}, "sha512-juG5VWh4qAivzTAeMzvY9xs9HY5rAcr2E4I7tiSSCokRFi7XIZCAu92ZkSTsIj1OPceCifL3cpfteP3pDT9/QQ=="], @@ -704,6 +731,8 @@ "@napi-rs/wasm-tools-win32-x64-msvc": ["@napi-rs/wasm-tools-win32-x64-msvc@1.0.1", "", { "os": "win32", "cpu": "x64" }, "sha512-rEAf05nol3e3eei2sRButmgXP+6ATgm0/38MKhz9Isne82T4rPIMYsCIFj0kOisaGeVwoi2fnm7O9oWp5YVnYQ=="], + "@neon-rs/load": ["@neon-rs/load@0.0.4", "", {}, "sha512-kTPhdZyTQxB+2wpiRcFWrDcejc4JI6tkPuS7UZCG4l6Zvc5kU/gGQ/ozvHTh1XR5tS+UlfAfGuPajjzQjCiHCw=="], + "@nodable/entities": ["@nodable/entities@2.2.0", "", {}, "sha512-9uGyhaQavEUMC8AIddIjau4NsnsXhou+j5sBAGojCM1oxmQpVKTWR/9JxABD6UAv12vpIms55fPZKFQEhG6uBg=="], "@octokit/auth-token": ["@octokit/auth-token@6.0.0", "", {}, "sha512-P4YJBPdPSpWTQ1NU4XYdvHvXJJDxM6YwpS0FZHRgP7YFkdVxsWcpWGy/NVqlAA7PcPCnMacXlRm1y2PFZRWL/w=="], @@ -898,6 +927,8 @@ "@types/turndown": ["@types/turndown@5.0.6", "", {}, "sha512-ru00MoyeeouE5BX4gRL+6m/BsDfbRayOskWqUvh7CLGW+UXxHQItqALa38kKnOiZPqJrtzJUgAC2+F0rL1S4Pg=="], + "@types/ws": ["@types/ws@8.18.1", "", { "dependencies": { "@types/node": "*" } }, "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg=="], + "@typescript/native-preview": ["@typescript/native-preview@7.0.0-dev.20260609.1", "", { "optionalDependencies": { "@typescript/native-preview-darwin-arm64": "7.0.0-dev.20260609.1", "@typescript/native-preview-darwin-x64": "7.0.0-dev.20260609.1", "@typescript/native-preview-linux-arm": "7.0.0-dev.20260609.1", "@typescript/native-preview-linux-arm64": "7.0.0-dev.20260609.1", "@typescript/native-preview-linux-x64": "7.0.0-dev.20260609.1", "@typescript/native-preview-win32-arm64": "7.0.0-dev.20260609.1", "@typescript/native-preview-win32-x64": "7.0.0-dev.20260609.1" }, "bin": { "tsgo": "bin/tsgo.js" } }, "sha512-1HOuH/u/451O3hx4Z9fesNqarpeit6UfkgwK96sCVWi5p69F0N3v+6bI969lLIjF7K9dbYQNiWUaZ6Wik87iKg=="], "@typescript/native-preview-darwin-arm64": ["@typescript/native-preview-darwin-arm64@7.0.0-dev.20260609.1", "", { "os": "darwin", "cpu": "arm64" }, "sha512-Yf/zHEadP/yUiWUdM/mZVfEVFJuGMf6nhRSFif0vp+FwtfGU4jmlpNF7BTJJdOHrrcWkwEJKzAoMCtEtyxhuyQ=="], @@ -1140,6 +1171,8 @@ "jiti": ["jiti@2.7.0", "", { "bin": { "jiti": "lib/jiti-cli.mjs" } }, "sha512-AC/7JofJvZGrrneWNaEnJeOLUx+JlGt7tNa0wZiRPT4MY1wmfKjt2+6O2p2uz2+skll8OZZmJMNqeke7kKbNgQ=="], + "js-base64": ["js-base64@3.7.8", "", {}, "sha512-hNngCeKxIUQiEUN3GPJOkz4wF/YvdUdbNL9hsBcMQTkKzboD7T/q3OYOuuPZLUE6dBxSGpwhk5mwuDud7JVAow=="], + "js-tokens": ["js-tokens@4.0.0", "", {}, "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ=="], "js-yaml": ["js-yaml@4.2.0", "", { "dependencies": { "argparse": "^2.0.1" }, "bin": { "js-yaml": "bin/js-yaml.js" } }, "sha512-ePWsvanv0DWuDRsW8dnt+R4jQ31SCRCQ7hhNcPXZPsoBZiemuZNYGf7adZdqX2D86j6rvKp3RpCxVTSb8WQlOw=="], @@ -1156,6 +1189,8 @@ "kuler": ["kuler@2.0.0", "", {}, "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A=="], + "libsql": ["libsql@0.5.29", "", { "dependencies": { "@neon-rs/load": "^0.0.4", "detect-libc": "2.0.2" }, "optionalDependencies": { "@libsql/darwin-arm64": "0.5.29", "@libsql/darwin-x64": "0.5.29", "@libsql/linux-arm-gnueabihf": "0.5.29", "@libsql/linux-arm-musleabihf": "0.5.29", "@libsql/linux-arm64-gnu": "0.5.29", "@libsql/linux-arm64-musl": "0.5.29", "@libsql/linux-x64-gnu": "0.5.29", "@libsql/linux-x64-musl": "0.5.29", "@libsql/win32-x64-msvc": "0.5.29" }, "os": [ "linux", "win32", "darwin", ], "cpu": [ "arm", "x64", "arm64", ] }, "sha512-8lMP8iMgiBzzoNbAPQ59qdVcj6UaE/Vnm+fiwX4doX4Narook0a4GPKWBEv+CR8a1OwbfkgL18uBfBjWdF0Fzg=="], + "lie": ["lie@3.3.0", "", { "dependencies": { "immediate": "~3.0.5" } }, "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ=="], "lightningcss": ["lightningcss@1.32.0", "", { "dependencies": { "detect-libc": "^2.0.3" }, "optionalDependencies": { "lightningcss-android-arm64": "1.32.0", "lightningcss-darwin-arm64": "1.32.0", "lightningcss-darwin-x64": "1.32.0", "lightningcss-freebsd-x64": "1.32.0", "lightningcss-linux-arm-gnueabihf": "1.32.0", "lightningcss-linux-arm64-gnu": "1.32.0", "lightningcss-linux-arm64-musl": "1.32.0", "lightningcss-linux-x64-gnu": "1.32.0", "lightningcss-linux-x64-musl": "1.32.0", "lightningcss-win32-arm64-msvc": "1.32.0", "lightningcss-win32-x64-msvc": "1.32.0" } }, "sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ=="], @@ -1286,6 +1321,8 @@ "progress": ["progress@2.0.3", "", {}, "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA=="], + "promise-limit": ["promise-limit@2.7.0", "", {}, "sha512-7nJ6v5lnJsXwGprnGXga4wx6d1POjvi5Qmf1ivTRxTjH4Z/9Czja/UCMLVmB9N93GeWOU93XaFaEt6jbuoagNw=="], + "protobufjs": ["protobufjs@7.6.4", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.5", "@protobufjs/eventemitter": "^1.1.1", "@protobufjs/fetch": "^1.1.1", "@protobufjs/float": "^1.0.2", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.1", "@types/node": ">=13.7.0", "long": "^5.3.2" } }, "sha512-RJJPTTpvFfHcWLkIa2JFWK4XvtSzS0yEWDmunqHXli1h3JlkbcQZXDZdcWxv+JK3Xsl5/UFDPZ0iGm7DAengYw=="], "puppeteer-core": ["puppeteer-core@25.1.0", "", { "dependencies": { "@puppeteer/browsers": "3.0.4", "chromium-bidi": "16.0.1", "devtools-protocol": "0.0.1624250", "typed-query-selector": "^2.12.2", "webdriver-bidi-protocol": "0.4.2", "ws": "^8.21.0" } }, "sha512-jKzy5y4WG6uNuFbTWgW1D7mqoT9o0nllc/6a1DGF775T1mPmgw3scdFEtEq67yVFikavQmbYq6NLfbTfxHSlqQ=="], @@ -1524,6 +1561,8 @@ "jszip/readable-stream": ["readable-stream@2.3.8", "", { "dependencies": { "core-util-is": "~1.0.0", "inherits": "~2.0.3", "isarray": "~1.0.0", "process-nextick-args": "~2.0.0", "safe-buffer": "~5.1.1", "string_decoder": "~1.1.1", "util-deprecate": "~1.0.1" } }, "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA=="], + "libsql/detect-libc": ["detect-libc@2.0.2", "", {}, "sha512-UX6sGumvvqSaXgdKGUsgZWqcUyIXZ/vZTrlRT/iobiKhGL0zL4d3osHj3uqllWJK+i+sixDS/3COVEOFbupFyw=="], + "log-update/slice-ansi": ["slice-ansi@7.1.2", "", { "dependencies": { "ansi-styles": "^6.2.1", "is-fullwidth-code-point": "^5.0.0" } }, "sha512-iOBWFgUX7caIZiuutICxVgX1SdxwAVFFKwt1EvMYYec/NWO5meOJ6K5uQxhrYBdQJne4KxiqZc+KptFOWFSI9w=="], "log-update/wrap-ansi": ["wrap-ansi@9.0.2", "", { "dependencies": { "ansi-styles": "^6.2.1", "string-width": "^7.0.0", "strip-ansi": "^7.1.0" } }, "sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww=="], diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index d2cf011f1d..6838b91715 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -2,6 +2,20 @@ ## [Unreleased] +### Added + +- Added task-relevant code summaries (codemap): agent-written file-level summaries persisted in Turso/libSQL with native vector search, retrieved as minimal task-relevant context via hybrid FTS5 + vector_top_k retrieval with reciprocal rank fusion and budget packing. Distinct feature module (`codemap.*` settings) that composes with any memory backend including "off". Features automatic Turso database provisioning, lazy embedding on retrieval, staleness tracking via `Bun.hash`, and a pluggable language adapter interface (TS adapter ships in v1 via the existing LSP client). + +### Changed + +- Changed `codemap.turso.autoProvision` default from `true` to `false` (opt-in) to match the design spec — auto-provisioning fires network calls to Turso's API, creates cloud databases, and persists credentials via `settings.set()`, so it must be explicitly enabled. +- Extracted `injectCodemapTaskContext` from `AgentSession.#injectCodemapTaskContext` into a testable standalone function in the `task-context` module. + +### Fixed + +- Fixed path traversal vulnerability in `toStoredPath` — file paths like `../../etc/passwd` were stored without boundary checking. The guard now rejects paths that resolve outside the project cwd, and `toStoredPath` runs before DB access in all tool `execute()` methods (fail-fast on invalid input). +- Removed unused `fmtOps` function from `benchmark.ts` (lint warning). + ## [16.1.9] - 2026-06-21 ### Added diff --git a/packages/coding-agent/package.json b/packages/coding-agent/package.json index 9f5e31321c..a9f59676b5 100644 --- a/packages/coding-agent/package.json +++ b/packages/coding-agent/package.json @@ -46,6 +46,7 @@ }, "dependencies": { "@agentclientprotocol/sdk": "catalog:", + "@libsql/client": "^0.17.4", "@babel/parser": "catalog:", "@mozilla/readability": "catalog:", "@oh-my-pi/hashline": "catalog:", diff --git a/packages/coding-agent/src/config/settings-schema.ts b/packages/coding-agent/src/config/settings-schema.ts index 51bdafbbab..a31108b5b6 100644 --- a/packages/coding-agent/src/config/settings-schema.ts +++ b/packages/coding-agent/src/config/settings-schema.ts @@ -2519,6 +2519,76 @@ export const SETTINGS_SCHEMA = { "hindsight.mentalModelRefreshIntervalMs": { type: "number", default: 5 * 60 * 1000 }, "hindsight.mentalModelMaxRenderChars": { type: "number", default: 16_000 }, + // Codemap (code summaries) — DISTINCT feature module at packages/coding-agent/src/task-context/. + // NOT a memory.backend enum value. Composable with any memory backend including "off". + "codemap.enabled": { + type: "boolean", + default: false, + ui: { + tab: "memory", + group: "Codemap", + label: "Enable Code Summaries", + description: + "Enable agent-written code summaries with task-relevant retrieval (Turso/libSQL + vector search).", + }, + }, + "codemap.autoInject": { + type: "boolean", + default: true, + ui: { + tab: "memory", + group: "Codemap", + label: "Codemap Auto Inject", + description: "Automatically retrieve and inject task-relevant summaries on the first turn of each session.", + condition: "codemapActive", + }, + }, + "codemap.dbPath": { type: "string", default: "" }, + "codemap.tokenBudget": { type: "number", default: 8000 }, + "codemap.maxResults": { type: "number", default: 20 }, + "codemap.maxSummaryChars": { type: "number", default: 1000 }, + "codemap.embedding.variant": { + type: "enum", + values: ["en", "multilingual"] as const, + default: "en", + ui: { + tab: "memory", + group: "Codemap", + label: "Codemap Embedding Variant", + description: "Local embedding model family. en = 768d English; multilingual = 1024d cross-language.", + options: [ + { + value: "en", + label: "English (bge-base-en-v1.5)", + description: "BAAI/bge-base-en-v1.5 (768d), English-only", + }, + { + value: "multilingual", + label: "Multilingual (multilingual-e5-large)", + description: "intfloat/multilingual-e5-large (1024d), cross-language", + }, + ], + condition: "codemapActive", + }, + }, + "codemap.embedding.model": { type: "string", default: undefined }, + "codemap.embedding.apiUrl": { type: "string", default: undefined }, + "codemap.embedding.apiKey": { type: "string", default: undefined }, + "codemap.turso.syncUrl": { type: "string", default: "" }, + "codemap.turso.authToken": { type: "string", default: "" }, + "codemap.turso.autoProvision": { + type: "boolean", + default: false, + ui: { + tab: "memory", + group: "Codemap", + label: "Codemap Turso Auto-Provision", + description: "Automatically provision a Turso database when TURSO_API_TOKEN is available.", + condition: "codemapActive", + }, + }, + "codemap.turso.org": { type: "string", default: "" }, + // TTSR "ttsr.enabled": { type: "boolean", diff --git a/packages/coding-agent/src/hindsight/content.ts b/packages/coding-agent/src/hindsight/content.ts index 38e86ef584..22c182abd4 100644 --- a/packages/coding-agent/src/hindsight/content.ts +++ b/packages/coding-agent/src/hindsight/content.ts @@ -24,6 +24,7 @@ const MEMORIES_REGEX = /[\s\S]*?<\/memories>/g; const LEGACY_HINDSIGHT_MEMORIES_REGEX = /[\s\S]*?<\/hindsight_memories>/g; const LEGACY_RELEVANT_MEMORIES_REGEX = /[\s\S]*?<\/relevant_memories>/g; const MENTAL_MODELS_REGEX = /[\s\S]*?<\/mental_models>/g; +const CODEMAP_REGEX = /[\s\S]*?<\/codemap>/g; /** * Strip ``, ``, and legacy memory blocks. @@ -39,6 +40,7 @@ export function stripMemoryTags(content: string): string { return content .replace(MEMORIES_REGEX, "") .replace(MENTAL_MODELS_REGEX, "") + .replace(CODEMAP_REGEX, "") .replace(LEGACY_HINDSIGHT_MEMORIES_REGEX, "") .replace(LEGACY_RELEVANT_MEMORIES_REGEX, ""); } diff --git a/packages/coding-agent/src/modes/components/settings-defs.ts b/packages/coding-agent/src/modes/components/settings-defs.ts index 3c9d545770..ff8436157a 100644 --- a/packages/coding-agent/src/modes/components/settings-defs.ts +++ b/packages/coding-agent/src/modes/components/settings-defs.ts @@ -97,6 +97,13 @@ const CONDITIONS: Record boolean> = { return false; } }, + codemapActive: () => { + try { + return Settings.instance.get("codemap.enabled") === true; + } catch { + return false; + } + }, autolearnActive: () => { try { return Settings.instance.get("autolearn.enabled") === true; diff --git a/packages/coding-agent/src/prompts/system/system-prompt.md b/packages/coding-agent/src/prompts/system/system-prompt.md index 4173e874a0..e227b5aedf 100644 --- a/packages/coding-agent/src/prompts/system/system-prompt.md +++ b/packages/coding-agent/src/prompts/system/system-prompt.md @@ -66,6 +66,10 @@ Special URLs for internal resources; with most FS/bash tools they auto-resolve t - `issue://` (or `issue:////`): GitHub issue, disk-cached. Bare lists recent issues; `?state=open|closed|all&limit=&author=&label=`. - `pr://` (or `pr:////`): GitHub PR, same cache; `?comments=0` drops comments. Bare lists recent PRs; `?state=open|closed|merged|all&limit=&author=&label=`. - `omp://`: harness docs; AVOID unless the user asks about the harness itself. +{{#if hasCodemap}} +## Code Summaries (codemap) +File-level code summaries are available for this repo. Before reading unfamiliar files, call `get_task_context` with your task to retrieve relevant summaries (packed within a token budget). After reading a non-trivial file or making load-bearing changes, call `set_file_summary` to record a short note (purpose, key symbols, gotchas, invariants). Summaries are anchored to file content via `Bun.hash` — if a file changes, its summary is flagged `stale` and should be refreshed. +{{/if}} {{#if toolInfo.length}} {{#if toolListMode}} diff --git a/packages/coding-agent/src/sdk.ts b/packages/coding-agent/src/sdk.ts index d4133af98c..8f2766e401 100644 --- a/packages/coding-agent/src/sdk.ts +++ b/packages/coding-agent/src/sdk.ts @@ -125,6 +125,7 @@ import { loadProjectContextFiles as loadContextFilesInternal, } from "./system-prompt"; import { AgentOutputManager } from "./task/output-manager"; +import { resolveCodemap } from "./task-context"; import { AUTO_THINKING, type ConfiguredThinkingLevel, @@ -2195,6 +2196,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} secretsEnabled, workspaceTree: workspaceTreePromise, memoryRootEnabled: memoryBackend.id === "local", + codemapEnabled: settings.get("codemap.enabled"), model: settings.get("includeModelInPrompt") ? getActiveModelString() : undefined, personality: agentKind === "sub" ? "none" : settings.get("personality"), }); @@ -2806,6 +2808,19 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} void logger.time("startMemoryStartupTask", startMemoryBackend); } + // Initialize codemap (code summaries) if enabled. Distinct from the memory + // backend — runs independently of memory.backend. Opens the Turso/libSQL DB, + // runs auto-provisioning if configured, and stores session state. Non-blocking + // so the session starts without waiting for DB init; the first-turn injection + // in #buildSystemPromptForAgentStart handles a not-yet-ready state gracefully. + void (async () => { + try { + await resolveCodemap(session, settings); + } catch (error) { + logger.debug("codemap: initialization failed", { error: String(error) }); + } + })(); + // Wire MCP manager callbacks to session for reactive tool updates. // Skip when reusing a parent's manager — the parent owns the callbacks. if (mcpManager && !options.mcpManager) { diff --git a/packages/coding-agent/src/session/agent-session.ts b/packages/coding-agent/src/session/agent-session.ts index fa5146cb80..38bf676bf4 100644 --- a/packages/coding-agent/src/session/agent-session.ts +++ b/packages/coding-agent/src/session/agent-session.ts @@ -240,6 +240,8 @@ import { type SecretObfuscator, } from "../secrets/obfuscator"; import { invalidateHostMetadata } from "../ssh/connection-manager"; +import { injectCodemapTaskContext, shutdownCodemap } from "../task-context"; +import { getCodemapSessionState, markFirstTurnInjected } from "../task-context/state"; import { AUTO_THINKING, type ConfiguredThinkingLevel, @@ -4283,6 +4285,9 @@ export class AgentSession { // memories, and that round-trips through the worker we are about to // hard-kill (issue #3031). await shutdownMnemopiEmbedClient(); + // Shut down codemap (code summaries): close the libSQL client and the + // embedding subprocess. Safe to call even if codemap was never initialized. + await shutdownCodemap(this); this.#disconnectFromAgent(); if (this.#unsubscribeAppendOnly) { this.#unsubscribeAppendOnly(); @@ -4942,15 +4947,39 @@ export class AgentSession { this.#lastAppliedToolSignature = this.#computeAppliedToolSignature(activeToolNames, activeTools); } + /** + * Composable first-turn injection for codemap (code summaries). + * + * Runs REGARDLESS of memory.backend — codemap is a distinct feature axis + * that composes with any memory backend including "off" (the default). + * Gated only on `codemap.enabled` and `codemap.autoInject`. + * + * Fires once per session via `hasInjectedForFirstTurn` flag (mirrors + * mnemopi/state.ts:313 `hasRecalledForFirstTurn`). + */ + async #injectCodemapTaskContext(promptText: string): Promise { + return injectCodemapTaskContext( + this.settings, + getCodemapSessionState(this), + this.sessionManager.getCwd(), + promptText, + () => markFirstTurnInjected(this), + ); + } + async #buildSystemPromptForAgentStart(promptText: string): Promise { + // Composable first-turn injection — runs regardless of memory.backend. + const codemapBlock = await this.#injectCodemapTaskContext(promptText); + const baseWithCodemap = codemapBlock ? [...this.#baseSystemPrompt, codemapBlock] : this.#baseSystemPrompt; + const backend = await resolveMemoryBackend(this.settings); - if (!backend.beforeAgentStartPrompt) return this.#baseSystemPrompt; + if (!backend.beforeAgentStartPrompt) return baseWithCodemap; try { const injected = await backend.beforeAgentStartPrompt(this, promptText); - if (!injected) return this.#baseSystemPrompt; + if (!injected) return baseWithCodemap; - const previousBaseSystemPrompt = this.#baseSystemPrompt; + const previousBaseSystemPrompt = baseWithCodemap; try { await this.refreshBaseSystemPrompt(); } catch (refreshErr) { @@ -4977,7 +5006,7 @@ export class AgentSession { backend: backend.id, error: String(err), }); - return this.#baseSystemPrompt; + return baseWithCodemap; } } diff --git a/packages/coding-agent/src/system-prompt.ts b/packages/coding-agent/src/system-prompt.ts index 9a258d6fcb..9a88822a4f 100644 --- a/packages/coding-agent/src/system-prompt.ts +++ b/packages/coding-agent/src/system-prompt.ts @@ -417,6 +417,8 @@ export interface BuildSystemPromptOptions { workspaceTree?: WorkspaceTree | Promise; /** Whether the local memory://root summary is active. */ memoryRootEnabled?: boolean; + /** Whether the codemap (code summaries) feature is enabled. */ + codemapEnabled?: boolean; /** Active model identifier (e.g. "anthropic/claude-opus-4") surfaced to the agent. */ model?: string; /** Personality preset rendered into the default system prompt. "none" omits the block. Default: "default" */ @@ -459,6 +461,7 @@ export async function buildSystemPrompt(options: BuildSystemPromptOptions = {}): secretsEnabled = false, workspaceTree: providedWorkspaceTree, memoryRootEnabled = false, + codemapEnabled = false, model, personality = "default", } = options; @@ -669,6 +672,7 @@ export async function buildSystemPrompt(options: BuildSystemPromptOptions = {}): taskBatch, secretsEnabled, hasMemoryRoot: memoryRootEnabled, + hasCodemap: codemapEnabled, hasObsidian: hasObsidian(), }; const rendered = prompt.render(resolvedCustomPrompt ? customSystemPromptTemplate : systemPromptTemplate, data); diff --git a/packages/coding-agent/src/task-context/__tests__/adapter.test.ts b/packages/coding-agent/src/task-context/__tests__/adapter.test.ts new file mode 100644 index 0000000000..c138f2d673 --- /dev/null +++ b/packages/coding-agent/src/task-context/__tests__/adapter.test.ts @@ -0,0 +1,156 @@ +import { describe, expect, it } from "bun:test"; +import { + getAdapter, + getAdapterWithProvider, + type LanguageAdapter, + type LspDocumentSymbolProvider, + TsAdapter, +} from "../adapter"; + +function makeProvider( + symbols: Array<{ name: string; kind: number; startLine: number; endLine: number }> = [], +): LspDocumentSymbolProvider { + return { + async getDocumentSymbols() { + return symbols.map(s => ({ + name: s.name, + kind: s.kind, + range: { start: { line: s.startLine }, end: { line: s.endLine } }, + })); + }, + }; +} + +describe("codemap getAdapter (no provider)", () => { + it("returns null for all extensions — requires a provider", () => { + expect(getAdapter("src/foo.ts")).toBeNull(); + expect(getAdapter("src/foo.tsx")).toBeNull(); + expect(getAdapter("src/foo.js")).toBeNull(); + }); + + it("returns null when file has no extension", () => { + expect(getAdapter("Makefile")).toBeNull(); + }); + + it("returns null for non-TS extensions", () => { + expect(getAdapter("src/foo.py")).toBeNull(); + expect(getAdapter("src/foo.rs")).toBeNull(); + expect(getAdapter("src/foo.go")).toBeNull(); + }); +}); + +describe("codemap getAdapterWithProvider — extension routing", () => { + it("returns a TsAdapter for TS-family extensions", () => { + const provider = makeProvider(); + for (const ext of [".ts", ".tsx", ".js", ".jsx", ".mjs", ".mts", ".cjs"]) { + const adapter = getAdapterWithProvider(`src/foo${ext}`, provider); + expect(adapter).toBeInstanceOf(TsAdapter); + } + }); + + it("is case-insensitive on extension", () => { + const provider = makeProvider(); + expect(getAdapterWithProvider("src/FOO.TS", provider)).toBeInstanceOf(TsAdapter); + expect(getAdapterWithProvider("src/FOO.TSX", provider)).toBeInstanceOf(TsAdapter); + }); + + it("returns null for non-TS extensions even with a provider", () => { + const provider = makeProvider(); + expect(getAdapterWithProvider("src/foo.py", provider)).toBeNull(); + expect(getAdapterWithProvider("src/foo.rs", provider)).toBeNull(); + }); + + it("returns null when file has no extension", () => { + const provider = makeProvider(); + expect(getAdapterWithProvider("Dockerfile", provider)).toBeNull(); + }); + + it("handles dotted filenames (last dot wins)", () => { + const provider = makeProvider(); + // "foo.bar.ts" → ext is ".ts" + expect(getAdapterWithProvider("src/foo.bar.ts", provider)).toBeInstanceOf(TsAdapter); + // "foo.tar.gz" → ext is ".gz", not a TS extension + expect(getAdapterWithProvider("src/foo.tar.gz", provider)).toBeNull(); + }); +}); + +describe("codemap TsAdapter — extensions field", () => { + it("exposes the full TS-family extension list", () => { + const adapter = new TsAdapter(makeProvider()); + expect(adapter.extensions).toEqual([".ts", ".tsx", ".js", ".jsx", ".mjs", ".mts", ".cjs"]); + }); +}); + +describe("codemap TsAdapter — getSymbolsAsync (LSP kind mapping)", () => { + it("maps LSP SymbolKind numbers to anchor kinds", async () => { + const provider = makeProvider([ + { name: "MyClass", kind: 5, startLine: 0, endLine: 10 }, // 5 → class + { name: "myMethod", kind: 6, startLine: 1, endLine: 5 }, // 6 → method + { name: "ctor", kind: 9, startLine: 2, endLine: 3 }, // 9 → method (constructor) + { name: "Iface", kind: 11, startLine: 3, endLine: 4 }, // 11 → interface + { name: "myFn", kind: 12, startLine: 4, endLine: 6 }, // 12 → function + { name: "myVar", kind: 13, startLine: 5, endLine: 5 }, // 13 → variable + { name: "MY_CONST", kind: 14, startLine: 6, endLine: 6 }, // 14 → const + { name: "MyStruct", kind: 23, startLine: 7, endLine: 8 }, // 23 → type (Struct) + { name: "unknown", kind: 999, startLine: 8, endLine: 8 }, // default → variable + ]); + const adapter = new TsAdapter(provider); + const symbols = await adapter.getSymbolsAsync("src/foo.ts"); + + expect(symbols.map(s => [s.name, s.kind])).toEqual([ + ["MyClass", "class"], + ["myMethod", "method"], + ["ctor", "method"], + ["Iface", "interface"], + ["myFn", "function"], + ["myVar", "variable"], + ["MY_CONST", "const"], + ["MyStruct", "type"], + ["unknown", "variable"], + ]); + }); + + it("converts LSP 0-indexed lines to 1-indexed", async () => { + const provider = makeProvider([{ name: "fn", kind: 12, startLine: 0, endLine: 4 }]); + const adapter = new TsAdapter(provider); + const symbols = await adapter.getSymbolsAsync("src/foo.ts"); + expect(symbols[0].startLine).toBe(1); + expect(symbols[0].endLine).toBe(5); + }); + + it("returns empty array when provider returns no symbols", async () => { + const adapter = new TsAdapter(makeProvider([])); + expect(await adapter.getSymbolsAsync("src/empty.ts")).toEqual([]); + }); + + it("returns empty array when provider throws", async () => { + const provider: LspDocumentSymbolProvider = { + async getDocumentSymbols() { + throw new Error("LSP server crashed"); + }, + }; + const adapter = new TsAdapter(provider); + expect(await adapter.getSymbolsAsync("src/foo.ts")).toEqual([]); + }); +}); + +describe("codemap TsAdapter — sync stubs", () => { + it("getSymbols always returns empty (sync stub)", () => { + const adapter = new TsAdapter(makeProvider()); + expect(adapter.getSymbols("src/foo.ts")).toEqual([]); + }); + + it("getSymbolAtLine always returns null (sync stub)", () => { + const adapter = new TsAdapter(makeProvider()); + expect(adapter.getSymbolAtLine("src/foo.ts", 1)).toBeNull(); + }); +}); + +describe("codemap TsAdapter — LanguageAdapter interface conformance", () => { + it("satisfies the LanguageAdapter interface", () => { + const adapter: LanguageAdapter = new TsAdapter(makeProvider()); + expect(adapter.extensions.length).toBeGreaterThan(0); + expect(typeof adapter.getSymbolAtLine).toBe("function"); + expect(typeof adapter.getSymbols).toBe("function"); + }); +}); diff --git a/packages/coding-agent/src/task-context/__tests__/benchmark.ts b/packages/coding-agent/src/task-context/__tests__/benchmark.ts new file mode 100644 index 0000000000..d948b11f8d --- /dev/null +++ b/packages/coding-agent/src/task-context/__tests__/benchmark.ts @@ -0,0 +1,278 @@ +/** + * Codemap task-context benchmark. + * + * Measures latency of the core operations at realistic data volumes: + * 1. Schema init (cold start) + * 2. Upsert throughput (batch insert N summaries) + * 3. FTS search latency (single keyword + multi-keyword) + * 4. Vector search latency (vector_top_k with embeddings) + * 5. Full getTaskContext pipeline (FTS + vector + RRF + budget packer + staleness) + * + * Usage: bun run src/task-context/__tests__/benchmark.ts + */ + +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { createClient } from "@libsql/client"; +import type { CodemapConfig } from "../config"; +import { closeCodemapDb } from "../db"; +import { getTaskContext } from "../retrieve"; +import { initSchema } from "../schema"; +import { searchFts, searchVector, summaryCount, updateEmbedding, upsertSummary } from "../store"; + +const PROJECT = "bench-project"; +const DIMENSIONS = 768; + +function makeConfig(dbPath: string): CodemapConfig { + return { + enabled: true, + autoInject: true, + dbPath, + tokenBudget: 8000, + maxResults: 20, + maxSummaryChars: 1000, + turso: { syncUrl: "", authToken: "", autoProvision: false, org: "" }, + embedding: { + model: "BAAI/bge-base-en-v1.5", + variant: "en", + apiUrl: undefined, + apiKey: undefined, + dimensions: DIMENSIONS, + }, + }; +} + +const SUMMARY_TEMPLATES = [ + "Handles authentication token validation and session management for the API gateway", + "Database connection pool manager with PostgreSQL driver and retry logic", + "React component for rendering user profile cards with avatar and bio sections", + "Utility functions for string manipulation including camelCase and snake_case conversion", + "Error handling middleware that catches async errors and formats them as JSON responses", + "Configuration loader that reads YAML files and merges environment variable overrides", + "WebSocket server implementation with room-based message routing and presence tracking", + "File system watcher that detects changes and triggers incremental rebuilds", + "GraphQL resolver for the User type with DataLoader batching for N+1 prevention", + "Test fixture generator that creates mock data factories for integration tests", + "Cryptography module implementing AES-256-GCM encryption with authenticated metadata", + "Rate limiter using sliding window algorithm with Redis backend for distributed systems", + "Image processing pipeline with sharp for resize, crop, and WebP conversion", + "OAuth2 client flow handler supporting authorization code and client credentials grants", + "Background job queue with priority lanes and dead-letter queue for failed tasks", + "Logging utility with structured JSON output and configurable log levels", + "Cache layer with TTL support and LRU eviction policy backed by Redis", + "Input validation schema builder using Zod with custom error messages", + "HTTP client wrapper with retry, timeout, and circuit breaker pattern", + "Migration runner that applies versioned SQL files in order with rollback support", +]; + +function generateVector(seed: number): number[] { + const vec = new Array(DIMENSIONS); + for (let i = 0; i < DIMENSIONS; i++) { + // Use a deterministic non-zero pattern that avoids NaN from zero-norm normalization + vec[i] = Math.sin(seed * 0.1 + i * 0.01) + 0.5; + } + // Normalize to unit length + const norm = Math.sqrt(vec.reduce((sum, v) => sum + v * v, 0)); + return vec.map(v => v / norm); +} + +const FILE_PATHS = Array.from({ length: 1000 }, (_, i) => { + const dirs = ["src", "lib", "utils", "components", "services", "routes", "models", "middleware", "hooks", "types"]; + const dir = dirs[i % dirs.length]!; + const name = `file${i}.ts`; + return `${dir}/${name}`; +}); + +function fmtMs(ms: number): string { + if (ms < 1) return `${(ms * 1000).toFixed(0)}μs`; + return `${ms.toFixed(1)}ms`; +} + +async function bench(label: string, fn: () => Promise, iterations = 1): Promise { + // Warmup + await fn(); + + const times: number[] = []; + for (let i = 0; i < iterations; i++) { + const start = performance.now(); + await fn(); + times.push(performance.now() - start); + } + const avg = times.reduce((a, b) => a + b, 0) / times.length; + const min = Math.min(...times); + const max = Math.max(...times); + console.log( + ` ${label.padEnd(50)} avg=${fmtMs(avg).padStart(8)} min=${fmtMs(min).padStart(8)} max=${fmtMs(max).padStart(8)}${ + iterations > 1 ? ` (${iterations} runs)` : "" + }`, + ); + return avg; +} + +async function runBenchmark() { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "codemap-bench-")); + const dbPath = path.join(tmpDir, "bench.db"); + const config = makeConfig(dbPath); + const client = createClient({ url: `file:${dbPath}` }); + + console.log("═══════════════════════════════════════════════════════════════════"); + console.log(" Codemap Task-Context Benchmark"); + console.log(` DB: ${dbPath}`); + console.log(` Dimensions: ${DIMENSIONS}d (bge-base-en-v1.5)`); + console.log("═══════════════════════════════════════════════════════════════════\n"); + + // 1. Schema init + console.log("── Schema Init ──"); + await bench( + "initSchema (cold, creates tables + indexes + triggers)", + async () => { + // Use a fresh in-memory DB each time + const c = createClient({ url: "file::memory:" }); + await initSchema(c, DIMENSIONS); + await c.close(); + }, + 5, + ); + + // Init the real DB for the rest of the benchmarks + await initSchema(client, DIMENSIONS); + + // 2. Upsert throughput + for (const count of [100, 500, 1000]) { + console.log(`\n── Upsert: ${count} summaries ──`); + // Clear existing + await client.execute("DELETE FROM summaries"); + + await bench( + `upsertSummary x${count} (no embedding)`, + async () => { + for (let i = 0; i < count; i++) { + const filePath = FILE_PATHS[i % FILE_PATHS.length]!; + const summary = SUMMARY_TEMPLATES[i % SUMMARY_TEMPLATES.length]!; + await upsertSummary(client, { + projectLabel: PROJECT, + filePath, + summaryText: summary, + contentHash: `hash${i}`, + maxSummaryChars: 1000, + }); + } + }, + 1, + ); + + const total = await summaryCount(client, PROJECT); + console.log(` → ${total} summaries in DB`); + + // 3. FTS search latency + console.log(`\n── FTS5 Search: ${count} summaries ──`); + await bench( + "searchFts (single keyword: 'database')", + async () => { + await searchFts(client, PROJECT, "database", 20); + }, + 10, + ); + + await bench( + "searchFts (multi-keyword: 'database connection pool retry')", + async () => { + await searchFts(client, PROJECT, "database connection pool retry", 20); + }, + 10, + ); + + await bench( + "searchFts (no match: 'zzznomatchxyz')", + async () => { + await searchFts(client, PROJECT, "zzznomatchxyz", 20); + }, + 10, + ); + + // 4. Vector search — only for counts where we have embeddings + if (count <= 500) { + console.log(`\n── Vector Search: ${count} summaries (embedding first ${count}) ──`); + // Add embeddings to all summaries + const rows = await client.execute({ + sql: "SELECT id FROM summaries WHERE project_label = ? ORDER BY id", + args: [PROJECT], + }); + await bench( + `updateEmbedding x${rows.rows.length}`, + async () => { + for (let i = 0; i < rows.rows.length; i++) { + const id = Number(rows.rows[i]!.id); + await updateEmbedding(client, id, generateVector(i), "test-model"); + } + }, + 1, + ); + + const queryVec = generateVector(42); + await bench( + "searchVector (vector_top_k, k=20)", + async () => { + await searchVector(client, PROJECT, queryVec, 20); + }, + 10, + ); + + // 5. Full pipeline + console.log(`\n── Full getTaskContext Pipeline: ${count} summaries ──`); + await bench( + "getTaskContext (FTS only, no queryEmbedding)", + async () => { + await getTaskContext(client, config, "database connection pool authentication", PROJECT, tmpDir, { + maxFiles: 12, + tokenBudget: 8000, + }); + }, + 10, + ); + + await bench( + "getTaskContext (hybrid FTS + vector, with queryEmbedding)", + async () => { + await getTaskContext(client, config, "database connection pool authentication", PROJECT, tmpDir, { + maxFiles: 12, + tokenBudget: 8000, + queryEmbedding: queryVec, + }); + }, + 10, + ); + } else { + console.log(`\n── Full getTaskContext Pipeline: ${count} summaries (FTS only, no embeddings) ──`); + await bench( + "getTaskContext (FTS only)", + async () => { + await getTaskContext(client, config, "database connection pool authentication", PROJECT, tmpDir, { + maxFiles: 12, + tokenBudget: 8000, + }); + }, + 10, + ); + } + } + + // Cleanup + await closeCodemapDb(client); + await Bun.sleep(100); + try { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } catch { + // Best-effort + } + + console.log("\n═══════════════════════════════════════════════════════════════════"); + console.log(" Benchmark complete."); + console.log("═══════════════════════════════════════════════════════════════════"); +} + +runBenchmark().catch(err => { + console.error("Benchmark failed:", err); + process.exit(1); +}); diff --git a/packages/coding-agent/src/task-context/__tests__/config.test.ts b/packages/coding-agent/src/task-context/__tests__/config.test.ts new file mode 100644 index 0000000000..eb08cb0739 --- /dev/null +++ b/packages/coding-agent/src/task-context/__tests__/config.test.ts @@ -0,0 +1,187 @@ +import { describe, expect, it } from "bun:test"; +import { Settings } from "../../config/settings"; +import { loadCodemapConfig } from "../config"; + +const AGENT_DIR = "/fake/agent/.omp"; + +function makeSettings(overrides: Record = {}): Settings { + return Settings.isolated(overrides as never); +} + +describe("codemap loadCodemapConfig — defaults", () => { + it("returns schema defaults when only codemap.enabled is set", () => { + const config = loadCodemapConfig(makeSettings({ "codemap.enabled": true }), AGENT_DIR); + expect(config.enabled).toBe(true); + expect(config.autoInject).toBe(true); + expect(config.tokenBudget).toBe(8000); + expect(config.maxResults).toBe(20); + expect(config.maxSummaryChars).toBe(1000); + expect(config.embedding.variant).toBe("en"); + expect(config.embedding.dimensions).toBe(768); + expect(config.embedding.model).toBe("BAAI/bge-base-en-v1.5"); + expect(config.turso.autoProvision).toBe(false); + expect(config.turso.syncUrl).toBe(""); + expect(config.turso.authToken).toBe(""); + }); + + it("respects disabled state", () => { + const config = loadCodemapConfig(makeSettings({ "codemap.enabled": false }), AGENT_DIR); + expect(config.enabled).toBe(false); + }); + + it("respects autoInject override", () => { + const config = loadCodemapConfig( + makeSettings({ "codemap.enabled": true, "codemap.autoInject": false }), + AGENT_DIR, + ); + expect(config.autoInject).toBe(false); + }); +}); + +import * as nodePath from "node:path"; +import { getMemoriesDir } from "@oh-my-pi/pi-utils"; + +describe("codemap loadCodemapConfig — dbPath", () => { + it("falls back to memoriesDir/codemap/codemap.db when codemap.dbPath is empty", () => { + const config = loadCodemapConfig(makeSettings({ "codemap.enabled": true }), AGENT_DIR); + const expected = nodePath.join(getMemoriesDir(AGENT_DIR), "codemap", "codemap.db"); + expect(config.dbPath).toBe(expected); + }); + + it("uses configured codemap.dbPath when set", () => { + const config = loadCodemapConfig( + makeSettings({ "codemap.enabled": true, "codemap.dbPath": "/custom/path.db" }), + AGENT_DIR, + ); + expect(config.dbPath).toBe("/custom/path.db"); + }); +}); + +describe("codemap loadCodemapConfig — embedding variant", () => { + it("maps en variant to 768 dimensions and bge-base-en-v1.5", () => { + const config = loadCodemapConfig( + makeSettings({ "codemap.enabled": true, "codemap.embedding.variant": "en" }), + AGENT_DIR, + ); + expect(config.embedding.variant).toBe("en"); + expect(config.embedding.dimensions).toBe(768); + expect(config.embedding.model).toBe("BAAI/bge-base-en-v1.5"); + }); + + it("maps multilingual variant to 1024 dimensions and multilingual-e5-large", () => { + const config = loadCodemapConfig( + makeSettings({ "codemap.enabled": true, "codemap.embedding.variant": "multilingual" }), + AGENT_DIR, + ); + expect(config.embedding.variant).toBe("multilingual"); + expect(config.embedding.dimensions).toBe(1024); + expect(config.embedding.model).toBe("intfloat/multilingual-e5-large"); + }); +}); + +describe("codemap loadCodemapConfig — embedding model override precedence", () => { + it("explicit codemap.embedding.model overrides variant default", () => { + const config = loadCodemapConfig( + makeSettings({ + "codemap.enabled": true, + "codemap.embedding.model": "custom/model-x", + }), + AGENT_DIR, + ); + expect(config.embedding.model).toBe("custom/model-x"); + // dimensions still driven by variant + expect(config.embedding.dimensions).toBe(768); + }); + + it("empty-string codemap.embedding.model falls through to variant default", () => { + const config = loadCodemapConfig( + makeSettings({ "codemap.enabled": true, "codemap.embedding.model": " " }), + AGENT_DIR, + ); + expect(config.embedding.model).toBe("BAAI/bge-base-en-v1.5"); + }); + + it("CODEMAP_EMBEDDING_MODEL env var is used when setting is unset", () => { + const prev = Bun.env.CODEMAP_EMBEDDING_MODEL; + Bun.env.CODEMAP_EMBEDDING_MODEL = "env/model"; + try { + const config = loadCodemapConfig(makeSettings({ "codemap.enabled": true }), AGENT_DIR); + expect(config.embedding.model).toBe("env/model"); + } finally { + if (prev === undefined) delete Bun.env.CODEMAP_EMBEDDING_MODEL; + else Bun.env.CODEMAP_EMBEDDING_MODEL = prev; + } + }); + + it("explicit setting beats CODEMAP_EMBEDDING_MODEL env var", () => { + const prev = Bun.env.CODEMAP_EMBEDDING_MODEL; + Bun.env.CODEMAP_EMBEDDING_MODEL = "env/model"; + try { + const config = loadCodemapConfig( + makeSettings({ "codemap.enabled": true, "codemap.embedding.model": "setting/model" }), + AGENT_DIR, + ); + expect(config.embedding.model).toBe("setting/model"); + } finally { + if (prev === undefined) delete Bun.env.CODEMAP_EMBEDDING_MODEL; + else Bun.env.CODEMAP_EMBEDDING_MODEL = prev; + } + }); +}); + +describe("codemap loadCodemapConfig — floor/clamp guards", () => { + it("clamps tokenBudget below 1000 up to 1000", () => { + const config = loadCodemapConfig( + makeSettings({ "codemap.enabled": true, "codemap.tokenBudget": 500 }), + AGENT_DIR, + ); + expect(config.tokenBudget).toBe(1000); + }); + + it("floors fractional tokenBudget", () => { + const config = loadCodemapConfig( + makeSettings({ "codemap.enabled": true, "codemap.tokenBudget": 8500.7 }), + AGENT_DIR, + ); + expect(config.tokenBudget).toBe(8500); + }); + + it("clamps maxResults below 1 up to 1", () => { + const config = loadCodemapConfig(makeSettings({ "codemap.enabled": true, "codemap.maxResults": 0 }), AGENT_DIR); + expect(config.maxResults).toBe(1); + }); + + it("clamps maxSummaryChars below 100 up to 100", () => { + const config = loadCodemapConfig( + makeSettings({ "codemap.enabled": true, "codemap.maxSummaryChars": 50 }), + AGENT_DIR, + ); + expect(config.maxSummaryChars).toBe(100); + }); +}); + +describe("codemap loadCodemapConfig — turso config", () => { + it("reads turso syncUrl, authToken, org overrides", () => { + const config = loadCodemapConfig( + makeSettings({ + "codemap.enabled": true, + "codemap.turso.syncUrl": "libsql://example.turso.io", + "codemap.turso.authToken": "tok-abc", + "codemap.turso.org": "myorg", + "codemap.turso.autoProvision": false, + }), + AGENT_DIR, + ); + expect(config.turso.syncUrl).toBe("libsql://example.turso.io"); + expect(config.turso.authToken).toBe("tok-abc"); + expect(config.turso.org).toBe("myorg"); + expect(config.turso.autoProvision).toBe(false); + }); + + it("coerces undefined syncUrl/authToken/org to empty string", () => { + const config = loadCodemapConfig(makeSettings({ "codemap.enabled": true }), AGENT_DIR); + expect(config.turso.syncUrl).toBe(""); + expect(config.turso.authToken).toBe(""); + expect(config.turso.org).toBe(""); + }); +}); diff --git a/packages/coding-agent/src/task-context/__tests__/injection.test.ts b/packages/coding-agent/src/task-context/__tests__/injection.test.ts new file mode 100644 index 0000000000..b5881620de --- /dev/null +++ b/packages/coding-agent/src/task-context/__tests__/injection.test.ts @@ -0,0 +1,213 @@ +import { afterAll, beforeAll, describe, expect, it, mock } from "bun:test"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { type Client, createClient } from "@libsql/client"; +import { Settings } from "../../config/settings"; +import { closeCodemapDb } from "../db"; +import { injectCodemapTaskContext } from "../index"; +import { initSchema } from "../schema"; +import type { CodemapSessionState } from "../state"; +import { upsertSummary } from "../store"; + +// --- Test fixtures ---------------------------------------------------------- + +const TASK = "implement user authentication with JWT"; +let tmpDir: string; +let cwd: string; +let projectLabel: string; +let dbPath: string; +let client: Client; + +beforeAll(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "codemap-inj-")); + cwd = path.join(tmpDir, "project"); + projectLabel = path.basename(cwd); + fs.mkdirSync(cwd, { recursive: true }); + dbPath = path.join(tmpDir, "codemap-test.db"); + client = createClient({ url: `file:${dbPath}` }); + await initSchema(client); + + // Seed summaries that match the task keywords (auth, JWT, user). + await fs.promises.mkdir(path.join(cwd, "src"), { recursive: true }); + await fs.promises.writeFile(path.join(cwd, "src", "auth.ts"), "export function login() {}"); + await upsertSummary(client, { + projectLabel, + filePath: "src/auth.ts", + summaryText: "JWT authentication middleware. Verifies tokens and guards protected routes.", + contentHash: "irrelevant-for-fts", + maxSummaryChars: 1000, + symbolName: null, + symbolKind: null, + }); + await fs.promises.writeFile(path.join(cwd, "src", "user.ts"), "export interface User {}"); + await upsertSummary(client, { + projectLabel, + filePath: "src/user.ts", + summaryText: "User model and session management for authentication flows.", + contentHash: "irrelevant-for-fts", + maxSummaryChars: 1000, + symbolName: null, + symbolKind: null, + }); +}); + +afterAll(async () => { + await closeCodemapDb(client); + try { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } catch { + // Best-effort. + } +}); + +function makeConfig(): CodemapSessionState["config"] { + return { + enabled: true, + autoInject: true, + dbPath, + tokenBudget: 8000, + maxResults: 20, + maxSummaryChars: 1000, + turso: { syncUrl: "", authToken: "", autoProvision: false, org: "" }, + embedding: { + model: "BAAI/bge-base-en-v1.5", + variant: "en", + apiUrl: undefined, + apiKey: undefined, + dimensions: 768, + }, + }; +} + +function makeState(overrides: Partial = {}): CodemapSessionState { + return { + client, + config: makeConfig(), + hasInjectedForFirstTurn: false, + ...overrides, + }; +} + +// --- Guard chain: returns null when gated off ------------------------------- + +describe("codemap injectCodemapTaskContext — guard chain", () => { + it("returns null when codemap.enabled is false", async () => { + const settings = Settings.isolated({ "codemap.enabled": false }); + const markInjected = mock(() => {}); + const result = await injectCodemapTaskContext(settings, makeState(), cwd, TASK, markInjected); + expect(result).toBeNull(); + expect(markInjected).not.toHaveBeenCalled(); + }); + + it("returns null when codemap.autoInject is false", async () => { + const settings = Settings.isolated({ + "codemap.enabled": true, + "codemap.autoInject": false, + }); + const markInjected = mock(() => {}); + const result = await injectCodemapTaskContext(settings, makeState(), cwd, TASK, markInjected); + expect(result).toBeNull(); + expect(markInjected).not.toHaveBeenCalled(); + }); + + it("returns null when no session state exists (codemap not initialized)", async () => { + const settings = Settings.isolated({ "codemap.enabled": true }); + const markInjected = mock(() => {}); + const result = await injectCodemapTaskContext(settings, undefined, cwd, TASK, markInjected); + expect(result).toBeNull(); + expect(markInjected).not.toHaveBeenCalled(); + }); + + it("returns null when already injected for first turn", async () => { + const settings = Settings.isolated({ "codemap.enabled": true, "codemap.autoInject": true }); + const markInjected = mock(() => {}); + const state = makeState({ hasInjectedForFirstTurn: true }); + const result = await injectCodemapTaskContext(settings, state, cwd, TASK, markInjected); + expect(result).toBeNull(); + expect(markInjected).not.toHaveBeenCalled(); + }); +}); + +// --- Composes with memory.backend="off" ------------------------------------- +// The function takes Settings directly — it never touches memory.backend. +// This test verifies that: with backend="off" in settings, injection still works. + +describe("codemap injectCodemapTaskContext — composes with memory.backend off", () => { + it("injects summaries even when memory.backend is off", async () => { + const settings = Settings.isolated({ + "codemap.enabled": true, + "codemap.autoInject": true, + "memory.backend": "off", + }); + const markInjected = mock(() => {}); + const result = await injectCodemapTaskContext(settings, makeState(), cwd, TASK, markInjected); + expect(result).not.toBeNull(); + expect(markInjected).toHaveBeenCalled(); + expect(result).toContain("Relevant Code Summaries"); + }); +}); + +// --- Once-per-session guard ------------------------------------------------- + +describe("codemap injectCodemapTaskContext — fires once per session", () => { + it("first call returns injection block and marks injected", async () => { + const settings = Settings.isolated({ "codemap.enabled": true, "codemap.autoInject": true }); + const markInjected = mock(() => {}); + const state = makeState(); + const result = await injectCodemapTaskContext(settings, state, cwd, TASK, markInjected); + expect(result).not.toBeNull(); + expect(markInjected).toHaveBeenCalledTimes(1); + }); + + it("second call (after markInjected set the flag) returns null", async () => { + const settings = Settings.isolated({ "codemap.enabled": true, "codemap.autoInject": true }); + const state = makeState(); + // Simulate the first call having set the flag via markInjected. + const markFirst = mock(() => { + state.hasInjectedForFirstTurn = true; + }); + const first = await injectCodemapTaskContext(settings, state, cwd, TASK, markFirst); + expect(first).not.toBeNull(); + + // Second call — state.hasInjectedForFirstTurn is now true. + const markSecond = mock(() => {}); + const second = await injectCodemapTaskContext(settings, state, cwd, TASK, markSecond); + expect(second).toBeNull(); + expect(markSecond).not.toHaveBeenCalled(); + }); +}); + +// --- Error isolation -------------------------------------------------------- + +describe("codemap injectCodemapTaskContext — error isolation", () => { + it("returns null (never throws) when getTaskContext fails", async () => { + const settings = Settings.isolated({ "codemap.enabled": true, "codemap.autoInject": true }); + // Pass a broken client that will throw on query. + const brokenState = makeState({ + client: { + execute: () => { + throw new Error("DB corrupted"); + }, + } as unknown as Client, + }); + const markInjected = mock(() => {}); + // Must NOT throw — injection must never break agent start. + const result = await injectCodemapTaskContext(settings, brokenState, cwd, TASK, markInjected); + expect(result).toBeNull(); + expect(markInjected).not.toHaveBeenCalled(); + }); +}); + +// --- Injection block content ------------------------------------------------ + +describe("codemap injectCodemapTaskContext — block content", () => { + it("includes the task text and matched file summaries", async () => { + const settings = Settings.isolated({ "codemap.enabled": true, "codemap.autoInject": true }); + const markInjected = mock(() => {}); + const result = await injectCodemapTaskContext(settings, makeState(), cwd, TASK, markInjected); + expect(result).toContain(`task: "${TASK}"`); + // FTS should match auth.ts (JWT authentication) — it has both "auth" and "jwt" keywords. + expect(result).toContain("src/auth.ts"); + }); +}); diff --git a/packages/coding-agent/src/task-context/__tests__/integration.test.ts b/packages/coding-agent/src/task-context/__tests__/integration.test.ts new file mode 100644 index 0000000000..b69387579c --- /dev/null +++ b/packages/coding-agent/src/task-context/__tests__/integration.test.ts @@ -0,0 +1,478 @@ +import { afterAll, beforeAll, describe, expect, it } from "bun:test"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { type Client, createClient } from "@libsql/client"; +import type { CodemapConfig } from "../config"; +import { closeCodemapDb } from "../db"; +import { getTaskContext } from "../retrieve"; +import { initSchema } from "../schema"; +import { + deleteSummary, + getSummary, + getUnembeddedSummaries, + searchFts, + searchVector, + summaryCount, + updateEmbedding, + upsertSummary, +} from "../store"; + +// --- Test helpers ----------------------------------------------------------- + +const PROJECT = "test-project"; +let tmpDir: string; +let dbPath: string; +let client: Client; + +beforeAll(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "codemap-int-")); + dbPath = path.join(tmpDir, "codemap-test.db"); + client = createClient({ url: `file:${dbPath}` }); + await initSchema(client); +}); + +afterAll(async () => { + await closeCodemapDb(client); + try { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } catch { + // Best-effort cleanup — may fail on Windows if libSQL handles aren't fully released + } +}); + +function makeConfig(overrides: Partial = {}): CodemapConfig { + return { + enabled: true, + autoInject: true, + dbPath, + tokenBudget: 8000, + maxResults: 20, + maxSummaryChars: 1000, + turso: { syncUrl: "", authToken: "", autoProvision: false, org: "" }, + embedding: { + model: "BAAI/bge-base-en-v1.5", + variant: "en", + apiUrl: undefined, + apiKey: undefined, + dimensions: 768, + }, + ...overrides, + }; +} + +// --- Schema + init ---------------------------------------------------------- + +describe("codemap schema + init", () => { + it("creates all tables after initSchema", async () => { + const tables = await client.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"); + const names = tables.rows.map(r => String(r.name)); + expect(names).toContain("summaries"); + expect(names).toContain("summaries_fts"); + expect(names).toContain("schema_migrations"); + }); + + it("creates the vector index", async () => { + const indexes = await client.execute( + "SELECT name FROM sqlite_master WHERE type='index' AND name LIKE 'idx_summaries%' ORDER BY name", + ); + const names = indexes.rows.map(r => String(r.name)); + expect(names).toContain("idx_summaries_project"); + expect(names).toContain("idx_summaries_hash"); + expect(names).toContain("idx_summaries_embedding"); + }); + + it("creates FTS5 sync triggers", async () => { + const triggers = await client.execute( + "SELECT name FROM sqlite_master WHERE type='trigger' AND name LIKE 'summaries_%' ORDER BY name", + ); + const names = triggers.rows.map(r => String(r.name)); + expect(names).toContain("summaries_ai"); + expect(names).toContain("summaries_ad"); + expect(names).toContain("summaries_au"); + }); + + it("records schema version 1", async () => { + const result = await client.execute("SELECT version FROM schema_migrations"); + expect(Number(result.rows[0]?.version)).toBe(1); + }); + + it("initSchema is idempotent (running twice does not error)", async () => { + await initSchema(client); + const count = await summaryCount(client, PROJECT); + expect(count).toBe(0); + }); +}); + +// --- CRUD operations -------------------------------------------------------- + +describe("codemap CRUD", () => { + it("upserts and retrieves a summary", async () => { + const row = await upsertSummary(client, { + projectLabel: PROJECT, + filePath: "src/auth.ts", + summaryText: "Handles password hashing and token validation.", + contentHash: "abc123", + maxSummaryChars: 1000, + }); + expect(row.id).toBeGreaterThan(0); + expect(row.filePath).toBe("src/auth.ts"); + expect(row.summaryText).toBe("Handles password hashing and token validation."); + expect(row.contentHash).toBe("abc123"); + + const retrieved = await getSummary(client, PROJECT, "src/auth.ts"); + expect(retrieved).not.toBeNull(); + expect(retrieved?.summaryText).toBe("Handles password hashing and token validation."); + }); + + it("upsert updates existing summary on conflict (same project+path)", async () => { + await upsertSummary(client, { + projectLabel: PROJECT, + filePath: "src/auth.ts", + summaryText: "Updated: handles password hashing, token validation, and session management.", + contentHash: "def456", + maxSummaryChars: 1000, + }); + + const retrieved = await getSummary(client, PROJECT, "src/auth.ts"); + expect(retrieved?.summaryText).toBe( + "Updated: handles password hashing, token validation, and session management.", + ); + expect(retrieved?.contentHash).toBe("def456"); + }); + + it("upsert truncates summary to maxSummaryChars", async () => { + const longText = "x".repeat(2000); + await upsertSummary(client, { + projectLabel: PROJECT, + filePath: "src/long.ts", + summaryText: longText, + contentHash: "hash", + maxSummaryChars: 50, + }); + + const retrieved = await getSummary(client, PROJECT, "src/long.ts"); + expect(retrieved?.summaryText.length).toBe(50); + }); + + it("isolates summaries by project_label", async () => { + await upsertSummary(client, { + projectLabel: PROJECT, + filePath: "src/shared.ts", + summaryText: "Project A summary.", + contentHash: "h1", + maxSummaryChars: 1000, + }); + await upsertSummary(client, { + projectLabel: "other-project", + filePath: "src/shared.ts", + summaryText: "Project B summary.", + contentHash: "h2", + maxSummaryChars: 1000, + }); + + const a = await getSummary(client, PROJECT, "src/shared.ts"); + const b = await getSummary(client, "other-project", "src/shared.ts"); + expect(a?.summaryText).toBe("Project A summary."); + expect(b?.summaryText).toBe("Project B summary."); + }); + + it("returns null for missing summary", async () => { + const result = await getSummary(client, PROJECT, "nonexistent.ts"); + expect(result).toBeNull(); + }); + + it("deletes a summary and returns true", async () => { + await upsertSummary(client, { + projectLabel: PROJECT, + filePath: "src/to-delete.ts", + summaryText: "Will be deleted.", + contentHash: "h", + maxSummaryChars: 1000, + }); + const deleted = await deleteSummary(client, PROJECT, "src/to-delete.ts"); + expect(deleted).toBe(true); + const gone = await getSummary(client, PROJECT, "src/to-delete.ts"); + expect(gone).toBeNull(); + }); + + it("returns false when deleting a non-existent summary", async () => { + const deleted = await deleteSummary(client, PROJECT, "never-existed.ts"); + expect(deleted).toBe(false); + }); + + it("counts summaries per project", async () => { + // PROJECT has: src/auth.ts, src/long.ts, src/shared.ts (to-delete was deleted) + const count = await summaryCount(client, PROJECT); + expect(count).toBeGreaterThanOrEqual(3); + }); + + it("stores optional symbol metadata", async () => { + await upsertSummary(client, { + projectLabel: PROJECT, + filePath: "src/symbols.ts", + summaryText: "Contains exported functions.", + contentHash: "h", + maxSummaryChars: 1000, + symbolName: "authenticateUser", + symbolKind: "function", + symbolLineRange: "10-25", + }); + + const retrieved = await getSummary(client, PROJECT, "src/symbols.ts"); + expect(retrieved?.symbolName).toBe("authenticateUser"); + expect(retrieved?.symbolKind).toBe("function"); + expect(retrieved?.symbolLineRange).toBe("10-25"); + }); +}); + +// --- FTS5 search ------------------------------------------------------------ + +describe("codemap FTS5 search", () => { + it("finds summaries by keyword match", async () => { + await upsertSummary(client, { + projectLabel: PROJECT, + filePath: "src/search-target.ts", + summaryText: "Database connection pool manages PostgreSQL connections.", + contentHash: "h", + maxSummaryChars: 1000, + }); + + const results = await searchFts(client, PROJECT, "database", 10); + expect(results.length).toBeGreaterThan(0); + const target = results.find(r => r.filePath === "src/search-target.ts"); + expect(target).toBeDefined(); + expect(target?.score).toBeGreaterThan(0); + }); + + it("returns empty for no matches", async () => { + const results = await searchFts(client, PROJECT, "zzznomatchxyz", 10); + expect(results).toHaveLength(0); + }); + + it("returns empty for query with only short tokens (<3 chars)", async () => { + const results = await searchFts(client, PROJECT, "ab cd ef", 10); + expect(results).toHaveLength(0); + }); + + it("ranks more relevant results higher", async () => { + // Insert two summaries — one with the keyword prominently, one tangentially + await upsertSummary(client, { + projectLabel: PROJECT, + filePath: "src/relevant.ts", + summaryText: "authentication authentication authentication token validation", + contentHash: "h", + maxSummaryChars: 1000, + }); + await upsertSummary(client, { + projectLabel: PROJECT, + filePath: "src/tangential.ts", + summaryText: "Some code that mentions authentication once in passing", + contentHash: "h", + maxSummaryChars: 1000, + }); + + const results = await searchFts(client, PROJECT, "authentication", 10); + expect(results.length).toBeGreaterThanOrEqual(2); + // The file with more keyword occurrences should rank higher + const relevantIdx = results.findIndex(r => r.filePath === "src/relevant.ts"); + const tangentialIdx = results.findIndex(r => r.filePath === "src/tangential.ts"); + expect(relevantIdx).toBeLessThan(tangentialIdx); + }); + + it("isolates FTS search by project_label", async () => { + await upsertSummary(client, { + projectLabel: "iso-project", + filePath: "src/iso.ts", + summaryText: "unique isolation keyword zonkflag", + contentHash: "h", + maxSummaryChars: 1000, + }); + + const otherProject = await searchFts(client, PROJECT, "zonkflag", 10); + expect(otherProject).toHaveLength(0); + + const ownProject = await searchFts(client, "iso-project", "zonkflag", 10); + expect(ownProject.length).toBeGreaterThan(0); + }); +}); + +// --- Vector search ---------------------------------------------------------- + +describe("codemap vector search", () => { + it("finds nearest neighbors by cosine similarity", async () => { + // Insert a summary with an embedding + const row = await upsertSummary(client, { + projectLabel: PROJECT, + filePath: "src/vec-target.ts", + summaryText: "Vector search test target.", + contentHash: "h", + maxSummaryChars: 1000, + }); + // Use a simple 4-dimensional vector (schema expects 768d, but for testing + // we use a reduced dimension by creating a separate test table) + // Actually, the schema column is F32_BLOB(768) — we need 768d vectors. + // Generate a simple 768d vector. + const vec = new Array(768).fill(0); + vec[0] = 1.0; // unit vector along first dimension + + await updateEmbedding(client, row.id, vec, "test-model"); + + const queryVec = new Array(768).fill(0); + queryVec[0] = 1.0; + + const results = await searchVector(client, PROJECT, queryVec, 5); + expect(results.length).toBeGreaterThan(0); + const target = results.find(r => r.filePath === "src/vec-target.ts"); + expect(target).toBeDefined(); + expect(target?.score).toBeGreaterThan(0.9); // nearly identical vectors + }); + + it("returns empty when queryVector is empty", async () => { + const results = await searchVector(client, PROJECT, [], 5); + expect(results).toHaveLength(0); + }); + + it("returns empty for project with no embedded summaries", async () => { + const queryVec = new Array(768).fill(0); + queryVec[0] = 1.0; + const results = await searchVector(client, "no-embeddings-project", queryVec, 5); + expect(results).toHaveLength(0); + }); +}); + +// --- Embedding backfill ----------------------------------------------------- + +describe("codemap embedding backfill", () => { + it("finds summaries without embeddings", async () => { + const unembedded = await getUnembeddedSummaries(client, PROJECT, 100); + // Most summaries we inserted don't have embeddings (except vec-target.ts) + expect(unembedded.length).toBeGreaterThan(0); + expect(unembedded.every(r => r.filePath !== "src/vec-target.ts")).toBe(true); + }); + + it("updates embedding for a summary row", async () => { + const row = await upsertSummary(client, { + projectLabel: PROJECT, + filePath: "src/embed-test.ts", + summaryText: "Will get an embedding.", + contentHash: "h", + maxSummaryChars: 1000, + }); + + const vec = new Array(768).fill(0.1); + await updateEmbedding(client, row.id, vec, "test-model"); + + // Should no longer appear in unembedded list + const unembedded = await getUnembeddedSummaries(client, PROJECT, 100); + expect(unembedded.find(r => r.id === row.id)).toBeUndefined(); + }); +}); + +// --- Full retrieval pipeline ------------------------------------------------ + +describe("codemap getTaskContext pipeline", () => { + it("returns task-relevant summaries via FTS", async () => { + const config = makeConfig(); + const result = await getTaskContext(client, config, "how does authentication work", PROJECT, tmpDir, { + maxFiles: 5, + tokenBudget: 4000, + }); + + expect(result.task).toBe("how does authentication work"); + expect(result.files.length).toBeGreaterThan(0); + expect(result.meta.fileCount).toBe(result.files.length); + expect(result.meta.estimatedTokens).toBeGreaterThan(0); + // auth.ts may or may not appear depending on FTS ranking, but at least + // some file should be returned + expect(result.files.length).toBeGreaterThan(0); + }); + + it("respects maxFiles limit", async () => { + const config = makeConfig(); + const result = await getTaskContext(client, config, "database connection pool", PROJECT, tmpDir, { + maxFiles: 1, + tokenBudget: 10000, + }); + expect(result.files.length).toBeLessThanOrEqual(1); + }); + + it("respects token budget", async () => { + const config = makeConfig({ tokenBudget: 100 }); + const result = await getTaskContext(client, config, "authentication database connection", PROJECT, tmpDir, { + maxFiles: 50, + tokenBudget: 100, + }); + // With a 100-token budget, only 1-2 short summaries should fit + // (each costs ceil(chars/4) + 20) + expect(result.meta.estimatedTokens).toBeLessThanOrEqual(200); // allows 1 file exceeding budget + }); + + it("sets truncated=true when results exceed budget or maxFiles", async () => { + const config = makeConfig(); + const result = await getTaskContext( + client, + config, + "authentication database connection pool validation", + PROJECT, + tmpDir, + { maxFiles: 1, tokenBudget: 10000 }, + ); + // We have many summaries matching these terms; with maxFiles=1, should truncate + expect(result.meta.truncated).toBe(true); + }); + + it("returns empty files when no summaries match the task", async () => { + const config = makeConfig(); + const result = await getTaskContext(client, config, "zzznomatchxyz qqqnothingqqq", PROJECT, tmpDir); + expect(result.files).toHaveLength(0); + expect(result.meta.fileCount).toBe(0); + expect(result.meta.estimatedTokens).toBe(0); + }); + + it("includes staleness flags in results", async () => { + // Insert a summary for a file that exists on disk + const testFile = path.join(tmpDir, "exists.ts"); + fs.writeFileSync(testFile, "export const x = 1;"); + const row = await upsertSummary(client, { + projectLabel: PROJECT, + filePath: "exists.ts", + summaryText: "A real file that exists.", + contentHash: "wronghash", // intentionally wrong to trigger stale + maxSummaryChars: 1000, + }); + expect(row.id).toBeGreaterThan(0); + + const config = makeConfig(); + const result = await getTaskContext(client, config, "real file exists", PROJECT, tmpDir, { + maxFiles: 50, + tokenBudget: 10000, + }); + + const file = result.files.find(f => f.path === "exists.ts"); + expect(file).toBeDefined(); + expect(file?.stale).toBe(true); // hash doesn't match + expect(file?.missing).toBe(false); // file exists on disk + }); + + it("marks missing=true for summarized files that no longer exist", async () => { + await upsertSummary(client, { + projectLabel: PROJECT, + filePath: "deleted-file.ts", + summaryText: "A file that was deleted.", + contentHash: "somehash", + maxSummaryChars: 1000, + }); + + const config = makeConfig(); + const result = await getTaskContext(client, config, "deleted file", PROJECT, tmpDir, { + maxFiles: 50, + tokenBudget: 10000, + }); + + const file = result.files.find(f => f.path === "deleted-file.ts"); + expect(file).toBeDefined(); + expect(file?.missing).toBe(true); + expect(file?.stale).toBe(true); + }); +}); diff --git a/packages/coding-agent/src/task-context/__tests__/prompt.test.ts b/packages/coding-agent/src/task-context/__tests__/prompt.test.ts new file mode 100644 index 0000000000..34db342ece --- /dev/null +++ b/packages/coding-agent/src/task-context/__tests__/prompt.test.ts @@ -0,0 +1,109 @@ +import { describe, expect, it } from "bun:test"; +import { buildCodemapInjectionBlock } from "../prompt"; +import type { TaskContextResult } from "../retrieve"; + +function makeResult(overrides: Partial = {}): TaskContextResult { + return { + task: "implement auth", + files: [], + meta: { fileCount: 0, estimatedTokens: 0, truncated: false }, + ...overrides, + }; +} + +function makeFile(overrides: Partial = {}): TaskContextResult["files"][number] { + return { + path: "src/auth.ts", + score: 0.5, + summary: "Handles JWT verification.", + stale: false, + missing: false, + updatedAt: "2026-01-01T00:00:00Z", + ...overrides, + }; +} + +describe("codemap buildCodemapInjectionBlock — empty result", () => { + it("returns empty string when files array is empty", () => { + expect(buildCodemapInjectionBlock(makeResult())).toBe(""); + }); +}); + +describe("codemap buildCodemapInjectionBlock — structure", () => { + it("includes the task text in the header", () => { + const block = buildCodemapInjectionBlock( + makeResult({ task: "refactor the database layer", files: [makeFile()] }), + ); + expect(block).toContain('The following file summaries are relevant to the task: "refactor the database layer"'); + }); + + it("includes each file path as a heading with its summary", () => { + const block = buildCodemapInjectionBlock( + makeResult({ + files: [ + makeFile({ path: "src/a.ts", summary: "Summary A." }), + makeFile({ path: "src/b.ts", summary: "Summary B." }), + ], + }), + ); + expect(block).toContain("### src/a.ts"); + expect(block).toContain("Summary A."); + expect(block).toContain("### src/b.ts"); + expect(block).toContain("Summary B."); + }); + + it("preserves file order from the result", () => { + const block = buildCodemapInjectionBlock( + makeResult({ + files: [ + makeFile({ path: "src/zzz.ts", summary: "last" }), + makeFile({ path: "src/aaa.ts", summary: "first" }), + ], + }), + ); + const zzzIdx = block.indexOf("src/zzz.ts"); + const aaaIdx = block.indexOf("src/aaa.ts"); + expect(zzzIdx).toBeLessThan(aaaIdx); + }); +}); + +describe("codemap buildCodemapInjectionBlock — staleness tags", () => { + it("appends [STALE: file changed] when stale and not missing", () => { + const block = buildCodemapInjectionBlock(makeResult({ files: [makeFile({ stale: true, missing: false })] })); + expect(block).toContain("### src/auth.ts [STALE: file changed]"); + }); + + it("appends [STALE: file missing] when stale and missing", () => { + const block = buildCodemapInjectionBlock(makeResult({ files: [makeFile({ stale: true, missing: true })] })); + expect(block).toContain("### src/auth.ts [STALE: file missing]"); + }); + + it("appends no tag when fresh", () => { + const block = buildCodemapInjectionBlock(makeResult({ files: [makeFile({ stale: false, missing: false })] })); + expect(block).toContain("### src/auth.ts\n"); + expect(block).not.toContain("[STALE"); + }); +}); + +describe("codemap buildCodemapInjectionBlock — meta footer", () => { + it("includes file count and estimated tokens in footer", () => { + const block = buildCodemapInjectionBlock( + makeResult({ + files: [makeFile()], + meta: { fileCount: 1, estimatedTokens: 42, truncated: false }, + }), + ); + expect(block).toContain("_1 summaries, ~42 tokens_"); + expect(block).not.toContain("(truncated)"); + }); + + it("appends (truncated) when meta.truncated is true", () => { + const block = buildCodemapInjectionBlock( + makeResult({ + files: [makeFile()], + meta: { fileCount: 1, estimatedTokens: 42, truncated: true }, + }), + ); + expect(block).toContain("_1 summaries, ~42 tokens (truncated)_"); + }); +}); diff --git a/packages/coding-agent/src/task-context/__tests__/retrieve.test.ts b/packages/coding-agent/src/task-context/__tests__/retrieve.test.ts new file mode 100644 index 0000000000..022845be48 --- /dev/null +++ b/packages/coding-agent/src/task-context/__tests__/retrieve.test.ts @@ -0,0 +1,171 @@ +import { describe, expect, it } from "bun:test"; +import { extractKeywords, packBudget, reciprocalRankFusion, splitTokens, tokenCost } from "../retrieve"; +import type { RankedSummary } from "../store"; + +// These tests exercise the REAL exported functions from retrieve.ts. +// No re-implementation — if retrieve.ts drifts, these tests catch it. + +function makeRankedSummary(id: number, filePath: string, summaryText: string, score: number): RankedSummary { + return { + id, + projectLabel: "test", + filePath, + summaryText, + contentHash: "h", + symbolName: null, + symbolKind: null, + symbolLineRange: null, + source: "agent", + updatedAt: "2026-01-01", + score, + }; +} + +describe("codemap extractKeywords", () => { + it("tokenizes on non-alphanumeric and lowercases", () => { + expect(extractKeywords("Database Connection-Pool")).toEqual(["database", "connection", "pool"]); + }); + + it("drops tokens shorter than 3 chars", () => { + expect(extractKeywords("ab cd ef gh ij")).toEqual([]); + }); + + it("drops stopwords", () => { + expect(extractKeywords("how does the authentication work")).toEqual(["authentication"]); + }); + + it("returns empty for empty string", () => { + expect(extractKeywords("")).toEqual([]); + }); + + it("returns empty for only-stopword input", () => { + expect(extractKeywords("the and for with this that")).toEqual([]); + }); +}); + +describe("codemap splitTokens", () => { + it("splits camelCase tokens", () => { + const result = splitTokens(["buildSystemPrompt"]); + expect(result).toContain("buildSystemPrompt"); + expect(result).toContain("build"); + expect(result).toContain("System"); + expect(result).toContain("Prompt"); + }); + + it("splits snake_case tokens", () => { + const result = splitTokens(["get_task_context"]); + expect(result).toContain("get_task_context"); + expect(result).toContain("get"); + expect(result).toContain("task"); + expect(result).toContain("context"); + }); + + it("deduplicates results", () => { + const result = splitTokens(["token", "token"]); + expect(result).toEqual(["token"]); + }); + + it("passes through simple tokens unchanged", () => { + expect(splitTokens(["simple"])).toEqual(["simple"]); + }); +}); + +describe("codemap budget packer", () => { + it("uses codemap's documented token formula: ceil(chars/4) + 20", () => { + expect(tokenCost("")).toBe(20); + expect(tokenCost("hello")).toBe(22); + expect(tokenCost("hello world")).toBe(23); + expect(tokenCost("a".repeat(80))).toBe(40); + expect(tokenCost("a".repeat(1000))).toBe(270); + }); + + it("packs all summaries when under budget", () => { + const summaries = [ + makeRankedSummary(1, "a.ts", "short", 0.9), + makeRankedSummary(2, "b.ts", "also short", 0.8), + makeRankedSummary(3, "c.ts", "brief", 0.7), + ]; + const result = packBudget(summaries, 1000, 10, "/tmp"); + expect(result.files.length).toBe(3); + expect(result.truncated).toBe(false); + expect(result.estimatedTokens).toBe(tokenCost("short") + tokenCost("also short") + tokenCost("brief")); + }); + + it("stops when token budget is exhausted", () => { + const summaries = Array.from({ length: 10 }, (_, i) => + makeRankedSummary(i + 1, `file${i}.ts`, "x".repeat(100), 1 - i * 0.1), + ); + const result = packBudget(summaries, 100, 10, "/tmp"); + expect(result.files.length).toBe(2); + expect(result.truncated).toBe(true); + expect(result.estimatedTokens).toBe(90); + }); + + it("respects maxFiles limit", () => { + const summaries = Array.from({ length: 20 }, (_, i) => + makeRankedSummary(i + 1, `f${i}.ts`, "short", 1 - i * 0.05), + ); + const result = packBudget(summaries, 10000, 5, "/tmp"); + expect(result.files.length).toBe(5); + expect(result.truncated).toBe(true); + }); + + it("packs highest-scored summaries first", () => { + const summaries = [ + makeRankedSummary(1, "low.ts", "low priority", 0.3), + makeRankedSummary(2, "high.ts", "high priority", 0.95), + makeRankedSummary(3, "med.ts", "medium", 0.6), + ]; + const sorted = [...summaries].sort((a, b) => b.score - a.score); + const result = packBudget(sorted, 100, 2, "/tmp"); + expect(result.files[0].path).toBe("high.ts"); + expect(result.files[1].path).toBe("med.ts"); + }); + + it("always includes at least one file if any exist", () => { + const summaries = [makeRankedSummary(1, "big.ts", "x".repeat(10000), 0.9)]; + const result = packBudget(summaries, 100, 10, "/tmp"); + expect(result.files.length).toBe(1); + expect(result.estimatedTokens).toBe(tokenCost("x".repeat(10000))); + }); +}); + +describe("codemap reciprocal rank fusion", () => { + it("fuses FTS and vector results by rank position", () => { + const fts = [ + makeRankedSummary(1, "a.ts", "s", 0.9), + makeRankedSummary(2, "b.ts", "s", 0.8), + makeRankedSummary(3, "c.ts", "s", 0.7), + ]; + const vector = [makeRankedSummary(2, "b.ts", "s", 0.95), makeRankedSummary(4, "d.ts", "s", 0.85)]; + const fused = reciprocalRankFusion(fts, vector, 0.7, 0.3); + expect(fused[0].id).toBe(2); + expect(fused[0].score).toBeGreaterThan(fused[1].score); + }); + + it("deduplicates by id", () => { + const fts = [makeRankedSummary(5, "x.ts", "s", 0.9)]; + const vector = [makeRankedSummary(5, "x.ts", "s", 0.8)]; + const fused = reciprocalRankFusion(fts, vector, 0.7, 0.3); + expect(fused.length).toBe(1); + expect(fused[0].id).toBe(5); + }); + + it("returns empty when both channels are empty", () => { + expect(reciprocalRankFusion([], [], 0.7, 0.3)).toHaveLength(0); + }); + + it("handles FTS-only results", () => { + const fts = [makeRankedSummary(1, "a.ts", "s", 0.9), makeRankedSummary(2, "b.ts", "s", 0.8)]; + const fused = reciprocalRankFusion(fts, [], 0.7, 0.3); + expect(fused.length).toBe(2); + expect(fused[0].id).toBe(1); + }); + + it("FTS weight (0.7) is higher than vector weight (0.3)", () => { + const fts = [makeRankedSummary(1, "a.ts", "s", 0.5)]; + const vector = [makeRankedSummary(2, "b.ts", "s", 0.5)]; + const fused = reciprocalRankFusion(fts, vector, 0.7, 0.3); + expect(fused[0].id).toBe(1); + }); +}); diff --git a/packages/coding-agent/src/task-context/__tests__/staleness.test.ts b/packages/coding-agent/src/task-context/__tests__/staleness.test.ts new file mode 100644 index 0000000000..f503c41de8 --- /dev/null +++ b/packages/coding-agent/src/task-context/__tests__/staleness.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it } from "bun:test"; +import * as fs from "node:fs/promises"; +import * as os from "node:os"; +import * as path from "node:path"; +import { checkStaleness, computeFileHash } from "../staleness"; + +describe("codemap staleness", () => { + it("returns empty hash for a missing file (no throw)", async () => { + const hash = await computeFileHash("/nonexistent/path/to/missing-file.ts"); + expect(hash).toBe(""); + }); + + it("returns a non-empty hex hash for an existing file", async () => { + const tmp = path.join(os.tmpdir(), `codemap-test-${Date.now()}.ts`); + await fs.writeFile(tmp, "export const x = 1;"); + const hash = await computeFileHash(tmp); + expect(hash).toBeTruthy(); + expect(hash).toMatch(/^[0-9a-f]+$/); + await fs.unlink(tmp); + }); + + it("returns the same hash for unchanged content", async () => { + const tmp = path.join(os.tmpdir(), `codemap-test-stable-${Date.now()}.ts`); + await fs.writeFile(tmp, "export const y = 2;"); + const hash1 = await computeFileHash(tmp); + const hash2 = await computeFileHash(tmp); + expect(hash1).toBe(hash2); + await fs.unlink(tmp); + }); + + it("returns a different hash after file content changes", async () => { + const tmp = path.join(os.tmpdir(), `codemap-test-change-${Date.now()}.ts`); + await fs.writeFile(tmp, "export const original = 1;"); + const hash1 = await computeFileHash(tmp); + await fs.writeFile(tmp, "export const modified = 2;"); + const hash2 = await computeFileHash(tmp); + expect(hash1).not.toBe(hash2); + await fs.unlink(tmp); + }); + + it("marks stale=true when stored hash differs from current", async () => { + const tmp = path.join(os.tmpdir(), `codemap-test-stale-${Date.now()}.ts`); + await fs.writeFile(tmp, "original content"); + const result = await checkStaleness(tmp, "deadbeef"); + expect(result.stale).toBe(true); + expect(result.missing).toBe(false); + await fs.unlink(tmp); + }); + + it("marks stale=false when stored hash matches current", async () => { + const tmp = path.join(os.tmpdir(), `codemap-test-fresh-${Date.now()}.ts`); + await fs.writeFile(tmp, "stable content"); + const hash = await computeFileHash(tmp); + const result = await checkStaleness(tmp, hash); + expect(result.stale).toBe(false); + expect(result.missing).toBe(false); + await fs.unlink(tmp); + }); + + it("marks missing=true when file does not exist", async () => { + const result = await checkStaleness("/nonexistent/file.ts", "somehash"); + expect(result.stale).toBe(true); + expect(result.missing).toBe(true); + expect(result.contentHash).toBe(""); + }); + + it("marks missing=true when file was deleted after summary was written", async () => { + const tmp = path.join(os.tmpdir(), `codemap-test-deleted-${Date.now()}.ts`); + await fs.writeFile(tmp, "content that will disappear"); + const hash = await computeFileHash(tmp); + await fs.unlink(tmp); + const result = await checkStaleness(tmp, hash); + expect(result.stale).toBe(true); + expect(result.missing).toBe(true); + }); +}); diff --git a/packages/coding-agent/src/task-context/__tests__/state.test.ts b/packages/coding-agent/src/task-context/__tests__/state.test.ts new file mode 100644 index 0000000000..e31a0ef78b --- /dev/null +++ b/packages/coding-agent/src/task-context/__tests__/state.test.ts @@ -0,0 +1,121 @@ +import { describe, expect, it } from "bun:test"; +import type { Client } from "@libsql/client"; +import type { AgentSession } from "../../session/agent-session"; +import type { CodemapConfig } from "../config"; +import { getCodemapSessionState, hasFirstTurnInjected, markFirstTurnInjected, setCodemapSessionState } from "../state"; + +// Minimal stand-in for AgentSession — state.ts only reads/writes a Symbol-keyed +// property, so a plain object suffices. +function makeSession(): AgentSession { + return {} as AgentSession; +} + +function makeConfig(): CodemapConfig { + return { + enabled: true, + autoInject: true, + dbPath: "/tmp/codemap.db", + tokenBudget: 8000, + maxResults: 20, + maxSummaryChars: 1000, + turso: { syncUrl: "", authToken: "", autoProvision: false, org: "" }, + embedding: { + model: "BAAI/bge-base-en-v1.5", + variant: "en", + apiUrl: undefined, + apiKey: undefined, + dimensions: 768, + }, + }; +} + +function makeClient(): Client { + return {} as Client; +} + +describe("codemap state — getCodemapSessionState", () => { + it("returns undefined for a fresh session with no state set", () => { + expect(getCodemapSessionState(makeSession())).toBeUndefined(); + }); + + it("returns undefined when session is undefined", () => { + expect(getCodemapSessionState(undefined)).toBeUndefined(); + }); +}); + +describe("codemap state — setCodemapSessionState roundtrip", () => { + it("stores state and getCodemapSessionState returns it", () => { + const session = makeSession(); + const state = { + client: makeClient(), + config: makeConfig(), + hasInjectedForFirstTurn: false, + }; + setCodemapSessionState(session, state); + expect(getCodemapSessionState(session)).toBe(state); + }); + + it("returns the previous state when overwriting", () => { + const session = makeSession(); + const first = { + client: makeClient(), + config: makeConfig(), + hasInjectedForFirstTurn: false, + }; + const second = { + client: makeClient(), + config: makeConfig(), + hasInjectedForFirstTurn: true, + }; + setCodemapSessionState(session, first); + const previous = setCodemapSessionState(session, second); + expect(previous).toBe(first); + expect(getCodemapSessionState(session)).toBe(second); + }); + + it("clears state when passed undefined and returns previous", () => { + const session = makeSession(); + const state = { + client: makeClient(), + config: makeConfig(), + hasInjectedForFirstTurn: false, + }; + setCodemapSessionState(session, state); + const previous = setCodemapSessionState(session, undefined); + expect(previous).toBe(state); + expect(getCodemapSessionState(session)).toBeUndefined(); + }); +}); + +describe("codemap state — first-turn injection flag", () => { + it("hasFirstTurnInjected returns false when no state exists", () => { + expect(hasFirstTurnInjected(makeSession())).toBe(false); + }); + + it("hasFirstTurnInjected returns false initially when state exists", () => { + const session = makeSession(); + setCodemapSessionState(session, { + client: makeClient(), + config: makeConfig(), + hasInjectedForFirstTurn: false, + }); + expect(hasFirstTurnInjected(session)).toBe(false); + }); + + it("markFirstTurnInjected flips the flag to true", () => { + const session = makeSession(); + setCodemapSessionState(session, { + client: makeClient(), + config: makeConfig(), + hasInjectedForFirstTurn: false, + }); + markFirstTurnInjected(session); + expect(hasFirstTurnInjected(session)).toBe(true); + }); + + it("markFirstTurnInjected is a no-op when no state exists", () => { + const session = makeSession(); + markFirstTurnInjected(session); // must not throw + expect(hasFirstTurnInjected(session)).toBe(false); + }); +}); diff --git a/packages/coding-agent/src/task-context/__tests__/token-usage.test.ts b/packages/coding-agent/src/task-context/__tests__/token-usage.test.ts new file mode 100644 index 0000000000..a6f4439e7e --- /dev/null +++ b/packages/coding-agent/src/task-context/__tests__/token-usage.test.ts @@ -0,0 +1,308 @@ +import { describe, expect, it } from "bun:test"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { type Client, createClient } from "@libsql/client"; +import type { CodemapConfig } from "../config"; +import { closeCodemapDb } from "../db"; +import { getTaskContext, tokenCost } from "../retrieve"; +import { initSchema } from "../schema"; +import { type SummaryRow, upsertSummary } from "../store"; + +// Token usage tests verify the budget packer produces correct estimatedTokens +// and that getTaskContext responses stay within the configured token budget. + +const PROJECT = "token-test-project"; + +function makeConfig(dbPath: string, overrides: Partial = {}): CodemapConfig { + return { + enabled: true, + autoInject: true, + dbPath, + tokenBudget: 8000, + maxResults: 20, + maxSummaryChars: 1000, + turso: { syncUrl: "", authToken: "", autoProvision: false, org: "" }, + embedding: { + model: "BAAI/bge-base-en-v1.5", + variant: "en", + apiUrl: undefined, + apiKey: undefined, + dimensions: 768, + }, + ...overrides, + }; +} + +function makeTmpDb(): { client: Client; dbPath: string; cleanup: () => Promise } { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "codemap-token-")); + const dbPath = path.join(tmpDir, "codemap-token.db"); + const client = createClient({ url: `file:${dbPath}` }); + return { + client, + dbPath, + cleanup: async () => { + await closeCodemapDb(client); + try { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } catch { + // Best-effort — libSQL may hold handles briefly + } + }, + }; +} + +async function setupSummaries( + client: Client, + summaries: Array<{ filePath: string; text: string; hash?: string }>, +): Promise { + const rows: SummaryRow[] = []; + for (const s of summaries) { + const row = await upsertSummary(client, { + projectLabel: PROJECT, + filePath: s.filePath, + summaryText: s.text, + contentHash: s.hash ?? "h", + maxSummaryChars: 1000, + }); + rows.push(row); + } + return rows; +} + +describe("codemap token formula", () => { + it("matches codemap's documented formula: ceil(chars/4) + 20", () => { + expect(tokenCost("")).toBe(20); + expect(tokenCost("a")).toBe(21); + expect(tokenCost("abcd")).toBe(21); + expect(tokenCost("abcde")).toBe(22); + expect(tokenCost("a".repeat(100))).toBe(45); + expect(tokenCost("a".repeat(1000))).toBe(270); + }); + + it("the +20 is per-file overhead (markdown heading + metadata)", () => { + // Two files with 40 chars each: 2 * (ceil(40/4) + 20) = 2 * 30 = 60 + const single = tokenCost("a".repeat(40)); + const double = tokenCost("a".repeat(40)) + tokenCost("a".repeat(40)); + expect(single).toBe(30); + expect(double).toBe(60); + expect(double).toBe(single * 2); + }); + + it("produces reasonable estimates for typical summary lengths", () => { + // A typical 1-3 sentence summary is ~50-200 chars + const short = tokenCost("Validates token and updates hash."); // 33 chars + const medium = tokenCost( + "Handles password hashing with bcrypt, token validation via JWT, and session management with Redis cache.", + ); // 104 chars + const long = tokenCost( + "This module implements the authentication flow: validates JWT tokens from the Authorization header, checks token expiry against the Redis cache, refreshes expired tokens via the refresh endpoint, and logs authentication events to the audit trail. Depends on bcrypt for password hashing and jsonwebtoken for token signing.", + ); + expect(short).toBeLessThan(40); + expect(medium).toBeLessThan(60); + expect(long).toBeLessThan(120); + }); + + it("respects the 1000-char maxSummaryChars cap", () => { + // A summary capped at 1000 chars: ceil(1000/4) + 20 = 270 tokens + expect(tokenCost("a".repeat(1000))).toBe(270); + // This is well under the default 8000 token budget + expect(270).toBeLessThan(8000); + }); +}); + +describe("codemap budget packer token bounds", () => { + it("estimatedTokens never exceeds budget when multiple files fit", async () => { + const { client, dbPath, cleanup } = makeTmpDb(); + await initSchema(client); + const config = makeConfig(dbPath, { tokenBudget: 200 }); + await setupSummaries(client, [ + { filePath: "a.ts", text: "Short summary A." }, + { filePath: "b.ts", text: "Short summary B." }, + { filePath: "c.ts", text: "Short summary C." }, + ]); + + const result = await getTaskContext(client, config, "short summary", PROJECT, os.tmpdir(), { + maxFiles: 10, + tokenBudget: 200, + }); + + expect(result.meta.estimatedTokens).toBeLessThanOrEqual(200); + await cleanup(); + }); + + it("always includes at least one file even if it exceeds budget", async () => { + const { client, dbPath, cleanup } = makeTmpDb(); + await initSchema(client); + const config = makeConfig(dbPath, { tokenBudget: 10 }); + const longText = `searchterm ${"a".repeat(500)}`; // tokenCost = ceil(512/4)+20 = 148 + await setupSummaries(client, [{ filePath: "big.ts", text: longText }]); + + const result = await getTaskContext(client, config, "searchterm", PROJECT, os.tmpdir(), { + maxFiles: 5, + tokenBudget: 10, + }); + + // Even with a 10-token budget, the packer includes at least 1 file + expect(result.files.length).toBeGreaterThanOrEqual(1); + expect(result.meta.estimatedTokens).toBe(tokenCost(longText)); + await cleanup(); + }); + + it("packs more files with a larger budget", async () => { + const { client, dbPath, cleanup } = makeTmpDb(); + await initSchema(client); + const config = makeConfig(dbPath); + + // Insert 20 summaries, each ~50 chars (tokenCost = ceil(50/4)+20 = 33) + const summaries = Array.from({ length: 20 }, (_, i) => ({ + filePath: `file${i}.ts`, + text: `Summary about database and authentication topic ${i}.`, + })); + await setupSummaries(client, summaries); + + const smallBudget = await getTaskContext(client, config, "database authentication", PROJECT, os.tmpdir(), { + maxFiles: 20, + tokenBudget: 100, + }); + const largeBudget = await getTaskContext(client, config, "database authentication", PROJECT, os.tmpdir(), { + maxFiles: 20, + tokenBudget: 8000, + }); + + // With 100-token budget: ~3 files (33 each) + expect(smallBudget.files.length).toBeLessThan(largeBudget.files.length); + expect(smallBudget.meta.estimatedTokens).toBeLessThan(largeBudget.meta.estimatedTokens); + await cleanup(); + }); + + it("truncates when results exceed budget", async () => { + const { client, dbPath, cleanup } = makeTmpDb(); + await initSchema(client); + const config = makeConfig(dbPath); + + const summaries = Array.from({ length: 30 }, (_, i) => ({ + filePath: `mod${i}.ts`, + text: `Database connection pool authentication module number ${i}.`, + })); + await setupSummaries(client, summaries); + + const result = await getTaskContext(client, config, "database authentication pool", PROJECT, os.tmpdir(), { + maxFiles: 5, + tokenBudget: 8000, + }); + + // 30 summaries match, only 5 fit in maxFiles → truncated + expect(result.meta.truncated).toBe(true); + expect(result.files.length).toBe(5); + await cleanup(); + }); +}); + +describe("codemap getTaskContext token efficiency", () => { + it("FTS-only retrieval is token-efficient vs reading full files", async () => { + const { client, dbPath, cleanup } = makeTmpDb(); + await initSchema(client); + const config = makeConfig(dbPath); + + // Simulate 10 files, each with a 1000-char summary (max cap) + // Full file reads would be ~10,000 chars ≈ 2,500 tokens + // Summary retrieval: 10 * (ceil(1000/4)+20) = 10 * 270 = 2,700 tokens + // But with budget=8000, all 10 fit + const summaries = Array.from({ length: 10 }, (_, i) => ({ + filePath: `src/module${i}.ts`, + text: `a`.repeat(1000), + })); + await setupSummaries(client, summaries); + + const result = await getTaskContext(client, config, "module", PROJECT, os.tmpdir(), { + maxFiles: 10, + tokenBudget: 8000, + }); + + // All 10 summaries fit within the 8000 token budget + expect(result.files.length).toBe(10); + expect(result.meta.estimatedTokens).toBe(2700); // 10 * 270 + expect(result.meta.estimatedTokens).toBeLessThan(8000); + await cleanup(); + }); + + it("budget packing reduces token usage when budget is tight", async () => { + const { client, dbPath, cleanup } = makeTmpDb(); + await initSchema(client); + const config = makeConfig(dbPath); + + // 20 summaries, each 200 chars (tokenCost = ceil(200/4)+20 = 70) + // Budget = 500 → can fit 7 summaries (7*70=490 ≤ 500, 8*70=560 > 500) + const summaries = Array.from({ length: 20 }, (_, i) => ({ + filePath: `src/file${i}.ts`, + text: `x`.repeat(200), + })); + await setupSummaries(client, summaries); + + const result = await getTaskContext(client, config, "file", PROJECT, os.tmpdir(), { + maxFiles: 20, + tokenBudget: 500, + }); + + expect(result.files.length).toBe(7); + expect(result.meta.estimatedTokens).toBe(490); + expect(result.meta.estimatedTokens).toBeLessThanOrEqual(500); + expect(result.meta.truncated).toBe(true); + await cleanup(); + }); + + it("empty result has zero token cost", async () => { + const { client, dbPath, cleanup } = makeTmpDb(); + await initSchema(client); + const config = makeConfig(dbPath); + + const result = await getTaskContext(client, config, "zzznomatchxyz", PROJECT, os.tmpdir()); + + expect(result.files).toHaveLength(0); + expect(result.meta.estimatedTokens).toBe(0); + expect(result.meta.fileCount).toBe(0); + await cleanup(); + }); + + it("single file result has exact token cost matching the formula", async () => { + const { client, dbPath, cleanup } = makeTmpDb(); + await initSchema(client); + const config = makeConfig(dbPath); + + const text = "Validates JWT tokens and manages session state with Redis."; + await setupSummaries(client, [{ filePath: "src/auth.ts", text }]); + + const result = await getTaskContext(client, config, "validates jwt tokens", PROJECT, os.tmpdir(), { + maxFiles: 10, + tokenBudget: 8000, + }); + + expect(result.files).toHaveLength(1); + expect(result.meta.estimatedTokens).toBe(tokenCost(text)); + await cleanup(); + }); + + it("default 8000 token budget accommodates 20+ typical summaries", async () => { + const { client, dbPath, cleanup } = makeTmpDb(); + await initSchema(client); + const config = makeConfig(dbPath); // default tokenBudget=8000 + + // 30 typical summaries (~80 chars each, tokenCost = ceil(80/4)+20 = 40) + // 8000 / 40 = 200 summaries could fit — well beyond 30 + const summaries = Array.from({ length: 30 }, (_, i) => ({ + filePath: `src/svc${i}.ts`, + text: `Service module that handles business logic for domain ${i}.`, + })); + await setupSummaries(client, summaries); + + const result = await getTaskContext(client, config, "service module domain", PROJECT, os.tmpdir(), { + maxFiles: 30, + tokenBudget: 8000, + }); + + expect(result.meta.estimatedTokens).toBeLessThanOrEqual(8000); + expect(result.files.length).toBeGreaterThan(10); // most/all fit + await cleanup(); + }); +}); diff --git a/packages/coding-agent/src/task-context/__tests__/tools.test.ts b/packages/coding-agent/src/task-context/__tests__/tools.test.ts new file mode 100644 index 0000000000..51a63e51c8 --- /dev/null +++ b/packages/coding-agent/src/task-context/__tests__/tools.test.ts @@ -0,0 +1,141 @@ +import { afterAll, beforeAll, describe, expect, it } from "bun:test"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { type Client, createClient } from "@libsql/client"; +import { Settings } from "../../config/settings"; +import type { ToolSession } from "../../tools"; +import { closeCodemapDb } from "../db"; +import { initSchema } from "../schema"; +import { DeleteFileSummaryTool, GetFileSummaryTool, GetTaskContextTool, SetFileSummaryTool } from "../tools"; + +// --- Test fixtures ---------------------------------------------------------- + +let tmpDir: string; +let cwd: string; +let dbPath: string; +let client: Client; + +beforeAll(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "codemap-tools-")); + cwd = path.join(tmpDir, "project"); + fs.mkdirSync(cwd, { recursive: true }); + dbPath = path.join(tmpDir, "codemap-test.db"); + client = createClient({ url: `file:${dbPath}` }); + await initSchema(client); +}); + +afterAll(async () => { + await closeCodemapDb(client); + try { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } catch { + // Best-effort — Windows may hold libSQL handles. + } +}); + +function makeSession(settings: Settings): ToolSession { + // The tool's getClient reads settings + opens a DB at dbPath. + // We point dbPath at our temp DB so execute() uses a real client. + return { settings, cwd } as unknown as ToolSession; +} + +function enabledSettings(): Settings { + return Settings.isolated({ "codemap.enabled": true, "codemap.dbPath": dbPath }); +} + +function disabledSettings(): Settings { + return Settings.isolated({ "codemap.enabled": false }); +} + +const TOOL_CREATE_IF = [ + SetFileSummaryTool.createIf, + GetFileSummaryTool.createIf, + GetTaskContextTool.createIf, + DeleteFileSummaryTool.createIf, +] as const; + +// --- createIf gating -------------------------------------------------------- + +describe("codemap tools — createIf gating", () => { + it("returns null for all four tools when codemap.enabled is false", () => { + const session = makeSession(disabledSettings()); + for (const createIf of TOOL_CREATE_IF) { + expect(createIf(session)).toBeNull(); + } + }); + + it("returns a tool instance for all four tools when codemap.enabled is true", () => { + const session = makeSession(enabledSettings()); + for (const createIf of TOOL_CREATE_IF) { + const tool = createIf(session); + expect(tool).not.toBeNull(); + expect(typeof tool!.execute).toBe("function"); + } + }); + + it("returned instances expose the expected tool name", () => { + const session = makeSession(enabledSettings()); + expect(SetFileSummaryTool.createIf(session)?.name).toBe("set_file_summary"); + expect(GetFileSummaryTool.createIf(session)?.name).toBe("get_file_summary"); + expect(GetTaskContextTool.createIf(session)?.name).toBe("get_task_context"); + expect(DeleteFileSummaryTool.createIf(session)?.name).toBe("delete_file_summary"); + }); +}); + +// --- toStoredPath path-traversal guard -------------------------------------- +// toStoredPath is private but runs at the START of each execute() (before +// getClient), so traversal attempts must throw before any DB access. + +describe("codemap tools — path traversal guard rejects escapes", () => { + it("rejects ../../etc/passwd via SetFileSummaryTool", async () => { + const tool = SetFileSummaryTool.createIf(makeSession(enabledSettings()))!; + expect(tool.execute("id", { file: "../../etc/passwd", summary: "x" })).rejects.toThrow( + /outside the project directory/, + ); + }); + + it("rejects absolute /etc/passwd via GetFileSummaryTool", async () => { + const tool = GetFileSummaryTool.createIf(makeSession(enabledSettings()))!; + expect(tool.execute("id", { file: "/etc/passwd" })).rejects.toThrow(/outside the project directory/); + }); + + it("rejects deeply nested src/../../../etc/shadow via DeleteFileSummaryTool", async () => { + const tool = DeleteFileSummaryTool.createIf(makeSession(enabledSettings()))!; + expect(tool.execute("id", { file: "src/../../../etc/shadow" })).rejects.toThrow(/outside the project directory/); + }); +}); + +describe("codemap tools — toStoredPath accepts in-bounds paths", () => { + it("stores and retrieves a summary for a normal relative path", async () => { + // Write a real file so the content hash is non-empty. + const filePath = "src/auth.ts"; + await fs.promises.mkdir(path.join(cwd, "src"), { recursive: true }); + await fs.promises.writeFile(path.join(cwd, filePath), "export function login() {}"); + + const session = makeSession(enabledSettings()); + const setTool = SetFileSummaryTool.createIf(session)!; + const setResult = await setTool.execute("id", { file: filePath, summary: "JWT verification logic." }); + expect(setResult.details).toHaveProperty("id"); + + const getTool = GetFileSummaryTool.createIf(session)!; + const getResult = await getTool.execute("id", { file: filePath }); + expect(getResult.details).toMatchObject({ found: true, stale: false }); + }); + + it("accepts a path with internal .. that stays within cwd", async () => { + // "src/../lib/utils.ts" resolves to "lib/utils.ts" — inside cwd. + await fs.promises.mkdir(path.join(cwd, "lib"), { recursive: true }); + await fs.promises.writeFile(path.join(cwd, "lib", "utils.ts"), "export const x = 1;"); + + const session = makeSession(enabledSettings()); + const setTool = SetFileSummaryTool.createIf(session)!; + const setResult = await setTool.execute("id", { file: "src/../lib/utils.ts", summary: "Utility helpers." }); + expect(setResult.details).toHaveProperty("id"); + + // The stored path should be normalized to "lib/utils.ts". + const getTool = GetFileSummaryTool.createIf(session)!; + const getResult = await getTool.execute("id", { file: "lib/utils.ts" }); + expect(getResult.details).toMatchObject({ found: true }); + }); +}); diff --git a/packages/coding-agent/src/task-context/adapter.ts b/packages/coding-agent/src/task-context/adapter.ts new file mode 100644 index 0000000000..f2880cc232 --- /dev/null +++ b/packages/coding-agent/src/task-context/adapter.ts @@ -0,0 +1,121 @@ +import { logger } from "@oh-my-pi/pi-utils"; + +export interface SymbolAnchor { + name: string; + kind: "function" | "class" | "method" | "interface" | "type" | "variable" | "const"; + startLine: number; + endLine: number; +} + +export interface LanguageAdapter { + /** File extensions this adapter handles (e.g. ['.ts', '.tsx', '.js', '.jsx']) */ + extensions: readonly string[]; + /** Extract the symbol at a given line, or null if the line isn't in a symbol. */ + getSymbolAtLine(filePath: string, line: number): SymbolAnchor | null; + /** Extract all top-level symbols in a file. */ + getSymbols(filePath: string): SymbolAnchor[]; +} + +// Map LSP SymbolKind numbers to our string kind. See lsp/types.ts SYMBOL_KIND_NAMES. +// SymbolKind values: 1=File, 2=Module, 3=Namespace, 4=Package, 5=Class, 6=Method, +// 7=Property, 8=Field, 9=Constructor, 10=Enum, 11=Interface, 12=Function, +// 13=Variable, 14=Constant, 15=String, 16=Number, 17=Boolean, 18=Array, etc. +function lspKindToAnchorKind(kind: number): SymbolAnchor["kind"] { + switch (kind) { + case 5: + return "class"; + case 6: + return "method"; + case 9: + return "method"; + case 11: + return "interface"; + case 12: + return "function"; + case 13: + return "variable"; + case 14: + return "const"; + case 23: + return "type"; // Struct + default: + return "variable"; + } +} + +/** + * TypeScript language adapter. Uses the oh-my-pi LSP client to extract symbols. + * The LSP client is accessed via a callback to avoid a static dependency on the + * LSP module — the caller provides the function that queries the LSP server. + */ +export interface LspDocumentSymbolProvider { + /** Returns document symbols for a file (LSP textDocument/documentSymbol). */ + getDocumentSymbols(filePath: string): Promise< + Array<{ + name: string; + kind: number; + range: { start: { line: number }; end: { line: number } }; + children?: unknown[]; + }> + >; +} + +export class TsAdapter implements LanguageAdapter { + readonly extensions = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".mts", ".cjs"] as const; + + constructor(private readonly symbolProvider: LspDocumentSymbolProvider) {} + + getSymbolAtLine(_filePath: string, _line: number): SymbolAnchor | null { + // LSP documentSymbol is async — this sync method can't call it directly. + // Callers needing line-level symbols should use getSymbolsAsync (async) + // and filter by line range. This method is a stub for the interface contract. + return null; + } + + async getSymbolsAsync(filePath: string): Promise { + try { + const docSymbols = await this.symbolProvider.getDocumentSymbols(filePath); + const anchors: SymbolAnchor[] = []; + for (const sym of docSymbols) { + anchors.push({ + name: sym.name, + kind: lspKindToAnchorKind(sym.kind), + startLine: sym.range.start.line + 1, // LSP is 0-indexed + endLine: sym.range.end.line + 1, + }); + } + return anchors; + } catch (err) { + logger.debug("codemap: TsAdapter symbol extraction failed", { + filePath, + error: err instanceof Error ? err.message : String(err), + }); + return []; + } + } + + getSymbols(_filePath: string): SymbolAnchor[] { + // Synchronous stub — returns empty. Use getSymbolsAsync for real extraction. + return []; + } +} + +const TS_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".mts", ".cjs"]; + +/** Maps file extension to adapter. Returns null when no adapter is available. */ +export function getAdapter(filePath: string): LanguageAdapter | null { + const lastDot = filePath.lastIndexOf("."); + if (lastDot < 0) return null; + const ext = filePath.slice(lastDot).toLowerCase(); + if (TS_EXTENSIONS.includes(ext)) return null; // Requires a symbolProvider — use getAdapterWithProvider + return null; +} + +/** Maps file extension to adapter with an LSP provider. */ +export function getAdapterWithProvider(filePath: string, provider: LspDocumentSymbolProvider): LanguageAdapter | null { + const lastDot = filePath.lastIndexOf("."); + if (lastDot < 0) return null; + const ext = filePath.slice(lastDot).toLowerCase(); + if (TS_EXTENSIONS.includes(ext)) return new TsAdapter(provider); + return null; +} diff --git a/packages/coding-agent/src/task-context/config.ts b/packages/coding-agent/src/task-context/config.ts new file mode 100644 index 0000000000..9278769a82 --- /dev/null +++ b/packages/coding-agent/src/task-context/config.ts @@ -0,0 +1,61 @@ +import * as path from "node:path"; +import { getMemoriesDir } from "@oh-my-pi/pi-utils"; +import type { Settings } from "../config/settings"; + +export interface CodemapTursoConfig { + syncUrl: string; + authToken: string; + autoProvision: boolean; + org: string; +} + +export interface CodemapEmbeddingConfig { + model: string; + variant: "en" | "multilingual"; + apiUrl: string | undefined; + apiKey: string | undefined; + dimensions: number; +} + +export interface CodemapConfig { + enabled: boolean; + autoInject: boolean; + dbPath: string; + tokenBudget: number; + maxResults: number; + maxSummaryChars: number; + turso: CodemapTursoConfig; + embedding: CodemapEmbeddingConfig; +} + +export function loadCodemapConfig(settings: Settings, agentDir: string): CodemapConfig { + const configuredDbPath = settings.get("codemap.dbPath"); + const dbPath = configuredDbPath || path.join(getMemoriesDir(agentDir), "codemap", "codemap.db"); + const embeddingVariant = settings.get("codemap.embedding.variant"); + const embeddingOverride = settings.get("codemap.embedding.model"); + const variantModel = + embeddingVariant === "multilingual" ? "intfloat/multilingual-e5-large" : "BAAI/bge-base-en-v1.5"; + const embeddingModel = embeddingOverride?.trim() || Bun.env.CODEMAP_EMBEDDING_MODEL?.trim() || variantModel; + const dimensions = embeddingVariant === "multilingual" ? 1024 : 768; + return { + enabled: settings.get("codemap.enabled"), + autoInject: settings.get("codemap.autoInject"), + dbPath, + tokenBudget: Math.max(1000, Math.floor(settings.get("codemap.tokenBudget"))), + maxResults: Math.max(1, Math.floor(settings.get("codemap.maxResults"))), + maxSummaryChars: Math.max(100, Math.floor(settings.get("codemap.maxSummaryChars"))), + turso: { + syncUrl: settings.get("codemap.turso.syncUrl") ?? "", + authToken: settings.get("codemap.turso.authToken") ?? "", + autoProvision: settings.get("codemap.turso.autoProvision"), + org: settings.get("codemap.turso.org") ?? "", + }, + embedding: { + model: embeddingModel, + variant: embeddingVariant, + apiUrl: settings.get("codemap.embedding.apiUrl"), + apiKey: settings.get("codemap.embedding.apiKey"), + dimensions, + }, + }; +} diff --git a/packages/coding-agent/src/task-context/db.ts b/packages/coding-agent/src/task-context/db.ts new file mode 100644 index 0000000000..34fd0bed08 --- /dev/null +++ b/packages/coding-agent/src/task-context/db.ts @@ -0,0 +1,70 @@ +import type { Client } from "@libsql/client"; +import { logger } from "@oh-my-pi/pi-utils"; +import type { CodemapConfig } from "./config"; +import { initSchema } from "./schema"; + +/** + * Open a libSQL/Turso client for the codemap summaries store. + * + * Connection resolution order: + * 1. If `turso.syncUrl` + `turso.authToken` are set → embedded replica mode + * (local file + remote sync). + * 2. Otherwise → local file-only mode. + * + * After opening, runs schema bootstrap (initSchema) and — for embedded replica + * mode — an initial sync + post-sync maintenance. + */ +export async function openCodemapDb(config: CodemapConfig): Promise { + // Dynamic import: @libsql/client loads a native NAPI binding (libsql) that + // must NOT load at CLI startup when codemap is disabled. Matches the + // loadFastembedOnce pattern in mnemopi/src/core/fastembed-runtime.ts:59-77 + // — optional native peers are lazy-loaded via `await import()`. + const { createClient } = await import("@libsql/client"); + const hasTursoSync = config.turso.syncUrl && config.turso.authToken; + const client = createClient({ + url: `file:${config.dbPath}`, + ...(hasTursoSync ? { syncUrl: config.turso.syncUrl, authToken: config.turso.authToken } : {}), + }); + + // Schema bootstrap + await initSchema(client, config.embedding.dimensions); + + if (hasTursoSync) { + try { + await client.sync(); + await postSyncMaintenance(client); + } catch (err) { + logger.warn("codemap: initial Turso sync failed, continuing with local-only", { + error: err instanceof Error ? err.message : String(err), + }); + } + } + + return client; +} + +/** + * Rebuild FTS5 index and vector index after a remote sync. + * + * libSQL's `client.sync()` applies a remote changeset to the local embedded + * replica at a low level. This application path does NOT fire SQL triggers + * (triggers only fire on local DML executed via the connection, not on + * replica apply). Since the FTS5 external-content table relies on triggers + * and the DiskANN vector index relies on base-table change notifications, + * BOTH indexes can be stale after a sync that pulled remote-side writes. + */ +export async function postSyncMaintenance(client: Client): Promise { + // Rebuild FTS5 external-content index from the base table. + await client.execute("INSERT INTO summaries_fts(summaries_fts) VALUES('rebuild')"); + // Rebuild the DiskANN vector index from scratch. + await client.execute("REINDEX idx_summaries_embedding"); +} + +/** Close the codemap DB client gracefully. */ +export async function closeCodemapDb(client: Client): Promise { + try { + await client.close(); + } catch { + // Already closed. + } +} diff --git a/packages/coding-agent/src/task-context/embed.ts b/packages/coding-agent/src/task-context/embed.ts new file mode 100644 index 0000000000..24e330fd76 --- /dev/null +++ b/packages/coding-agent/src/task-context/embed.ts @@ -0,0 +1,89 @@ +import { logger } from "@oh-my-pi/pi-utils"; +import { MnemopiEmbedClient, type MnemopiSubprocessEmbeddingModel } from "../mnemopi/embed-client"; +import type { CodemapEmbeddingConfig } from "./config"; + +/** + * Decoupled embedding client for codemap. Creates an independent + * MnemopiEmbedClient instance — NOT the mnemopi singleton — so codemap + * embeddings work regardless of whether memory.backend is mnemopi, off, + * or anything else. + * + * Embedding is lazy: the subprocess is spawned only when embed() is first + * called, and the model is loaded only on first use. set_file_summary writes + * do NOT trigger embedding — embeddings are generated on retrieval. + */ +let codemapEmbedClient: MnemopiEmbedClient | null = null; +let codemapEmbedModel: MnemopiSubprocessEmbeddingModel | null = null; +let codemapEmbedModelKey = ""; + +function getEmbedClient(): MnemopiEmbedClient { + codemapEmbedClient ??= new MnemopiEmbedClient(); + return codemapEmbedClient; +} + +async function ensureModel(config: CodemapEmbeddingConfig): Promise { + const key = `${config.model}:${config.apiUrl ?? "local"}`; + if (codemapEmbedModel && key === codemapEmbedModelKey) return codemapEmbedModel; + codemapEmbedModelKey = key; + codemapEmbedModel = null; // Reset on model change + + const client = getEmbedClient(); + codemapEmbedModel = await client.initialize(config.model, undefined); + if (!codemapEmbedModel) { + logger.warn("codemap: embedding model initialization failed", { model: config.model }); + } + return codemapEmbedModel; +} + +/** + * Embed a single text string into a vector. + * Returns null if embeddings are unavailable (model not installed, subprocess failure). + */ +export async function embedText(text: string, config: CodemapEmbeddingConfig): Promise { + const model = await ensureModel(config); + if (!model) return null; + try { + const vectors: number[][] = []; + for await (const batch of model.embed([text], 1)) { + vectors.push(...batch); + } + return vectors[0] ?? null; + } catch (err) { + logger.debug("codemap: embedding generation failed", { + error: err instanceof Error ? err.message : String(err), + }); + return null; + } +} + +/** + * Embed multiple text strings in a single batch. + * Returns null array element per text if embeddings are unavailable. + */ +export async function embedBatch(texts: string[], config: CodemapEmbeddingConfig): Promise<(number[] | null)[]> { + const model = await ensureModel(config); + if (!model) return texts.map(() => null); + try { + const vectors: number[][] = []; + for await (const batch of model.embed(texts, 32)) { + vectors.push(...batch); + } + // Map results back to input texts + return texts.map((_, i) => vectors[i] ?? null); + } catch (err) { + logger.debug("codemap: batch embedding generation failed", { + error: err instanceof Error ? err.message : String(err), + }); + return texts.map(() => null); + } +} + +/** Shutdown the codemap embedding subprocess. Called on agent shutdown. */ +export async function shutdownCodemapEmbedClient(): Promise { + codemapEmbedModel = null; + codemapEmbedModelKey = ""; + if (codemapEmbedClient) { + await codemapEmbedClient.terminate(); + codemapEmbedClient = null; + } +} diff --git a/packages/coding-agent/src/task-context/index.ts b/packages/coding-agent/src/task-context/index.ts new file mode 100644 index 0000000000..c21dd744d6 --- /dev/null +++ b/packages/coding-agent/src/task-context/index.ts @@ -0,0 +1,120 @@ +export * from "./adapter"; +export * from "./config"; +export * from "./db"; +export * from "./embed"; +export * from "./prompt"; +export * from "./retrieve"; +export * from "./schema"; +export * from "./staleness"; +export * from "./state"; +export * from "./store"; +export * from "./tools"; +export * from "./turso"; + +import * as path from "node:path"; +import { getAgentDir, logger } from "@oh-my-pi/pi-utils"; +import type { Settings } from "../config/settings"; +import type { AgentSession } from "../session/agent-session"; +import { loadCodemapConfig } from "./config"; +import { closeCodemapDb, openCodemapDb } from "./db"; +import { embedText, shutdownCodemapEmbedClient } from "./embed"; +import { buildCodemapInjectionBlock } from "./prompt"; +import { getTaskContext } from "./retrieve"; +import { type CodemapSessionState, getCodemapSessionState, setCodemapSessionState } from "./state"; +import { resolveTursoConfig } from "./turso"; + +/** + * Initialize the codemap feature for a session. + * Opens the DB, runs auto-provisioning if needed, and stores state on the session. + * Returns true if codemap is active for this session, false if disabled. + */ +export async function resolveCodemap(session: AgentSession, settings: Settings): Promise { + const config = loadCodemapConfig(settings, getAgentDir()); + if (!config.enabled) return false; + + try { + // Run Turso auto-provisioning if configured + const resolvedConfig = await resolveTursoConfig(config, settings); + // Open the DB (local file or embedded replica) + const client = await openCodemapDb(resolvedConfig); + // Store state on the session + const state: CodemapSessionState = { + client, + config: resolvedConfig, + hasInjectedForFirstTurn: false, + }; + setCodemapSessionState(session, state); + return true; + } catch (err) { + logger.warn("codemap: initialization failed, feature disabled for this session", { + error: err instanceof Error ? err.message : String(err), + }); + return false; + } +} + +/** Shutdown codemap for a session — close DB and embedding client. */ +export async function shutdownCodemap(session: AgentSession): Promise { + const state = getCodemapSessionState(session); + if (!state) return; + await closeCodemapDb(state.client); + await shutdownCodemapEmbedClient(); +} + +/** + * First-turn injection for codemap (code summaries). + * + * Runs REGARDLESS of memory.backend — codemap is a distinct feature axis + * that composes with any memory backend including "off" (the default). + * Gated only on `codemap.enabled` and `codemap.autoInject`. + * + * Fires once per session via the `hasInjectedForFirstTurn` flag. The caller + * must invoke `markInjected` (which sets the flag on the session state) so + * subsequent calls return null. + * + * Returns the injection block string, or null when: + * - codemap is disabled or autoInject is off + * - no session state exists (codemap not initialized) + * - the first-turn injection already fired + * - the retrieval returned no files + * - any error occurred (swallowed + logged at debug — injection must never + * break the agent start) + * + * Extracted from AgentSession.#injectCodemapTaskContext for testability. + */ +export async function injectCodemapTaskContext( + settings: Settings, + state: CodemapSessionState | undefined, + cwd: string, + promptText: string, + markInjected: () => void, +): Promise { + try { + if (!settings.get("codemap.enabled")) return null; + if (!settings.get("codemap.autoInject")) return null; + if (!state) return null; + if (state.hasInjectedForFirstTurn) return null; + + const projectLabel = path.basename(cwd); + // Generate query embedding for vector search (lazy — may return null + // if the embedding model is unavailable, in which case retrieval is + // FTS-only, which is acceptable). + const queryEmbedding = await embedText(promptText, state.config.embedding); + const result = await getTaskContext( + state.client, + state.config, + promptText, + projectLabel, + cwd, + queryEmbedding ? { queryEmbedding } : {}, + ); + markInjected(); + const block = buildCodemapInjectionBlock(result); + return block || null; + } catch (err) { + logger.debug("codemap: first-turn injection failed", { + error: err instanceof Error ? err.message : String(err), + }); + return null; + } +} diff --git a/packages/coding-agent/src/task-context/prompt.ts b/packages/coding-agent/src/task-context/prompt.ts new file mode 100644 index 0000000000..97af4ba81a --- /dev/null +++ b/packages/coding-agent/src/task-context/prompt.ts @@ -0,0 +1,35 @@ +import type { TaskContextResult } from "./retrieve"; + +/** + * Build the system-prompt advertisement block for codemap. + * This is the text that goes into the {{#if hasCodemap}} block in system-prompt.md. + */ +export const CODEMAP_ADVERTISEMENT = `## Code Summaries (codemap) +File-level code summaries are available for this repo. Before reading unfamiliar files, call \`get_task_context\` with your task to retrieve relevant summaries (packed within a token budget). After reading a non-trivial file or making load-bearing changes, call \`set_file_summary\` to record a short note (purpose, key symbols, gotchas, invariants). Summaries are anchored to file content via Bun.hash — if a file changes, its summary is flagged stale and should be refreshed.`; + +/** + * Build the first-turn injection block from a task-context result. + * This is appended to the system prompt as an extra part. + */ +export function buildCodemapInjectionBlock(result: TaskContextResult): string { + if (result.files.length === 0) return ""; + + const lines: string[] = [ + "## Relevant Code Summaries", + `The following file summaries are relevant to the task: "${result.task}"`, + "", + ]; + + for (const file of result.files) { + const staleTag = file.stale ? (file.missing ? " [STALE: file missing]" : " [STALE: file changed]") : ""; + lines.push(`### ${file.path}${staleTag}`); + lines.push(file.summary); + lines.push(""); + } + + lines.push( + `_${result.meta.fileCount} summaries, ~${result.meta.estimatedTokens} tokens${result.meta.truncated ? " (truncated)" : ""}_`, + ); + + return lines.join("\n"); +} diff --git a/packages/coding-agent/src/task-context/retrieve.ts b/packages/coding-agent/src/task-context/retrieve.ts new file mode 100644 index 0000000000..bf504fbff4 --- /dev/null +++ b/packages/coding-agent/src/task-context/retrieve.ts @@ -0,0 +1,224 @@ +import * as path from "node:path"; +import type { Client } from "@libsql/client"; +import type { CodemapConfig } from "./config"; +import { checkStaleness } from "./staleness"; +import type { RankedSummary } from "./store"; +import { searchFts, searchVector } from "./store"; + +export interface TaskContextResult { + task: string; + files: Array<{ + path: string; + score: number; + summary: string; + stale: boolean; + missing: boolean; + updatedAt: string; + }>; + meta: { + fileCount: number; + estimatedTokens: number; + truncated: boolean; + }; +} + +// --- Lexical extraction --- + +const STOPWORDS: Record = { + the: true, + and: true, + for: true, + with: true, + this: true, + that: true, + from: true, + into: true, + but: true, + not: true, + are: true, + was: true, + were: true, + have: true, + has: true, + will: true, + would: true, + could: true, + should: true, + how: true, + does: true, + what: true, + when: true, + where: true, + why: true, + who: true, + can: true, + use: true, + using: true, + work: true, + works: true, +}; + +export function extractKeywords(task: string): string[] { + // Tokenize on non-alphanumeric, lowercase, keep >= 3 chars, drop stopwords + const tokens = task.toLowerCase().match(/[a-z0-9]+/g) ?? []; + return tokens.filter(t => t.length >= 3 && !STOPWORDS[t]); +} + +// Split camelCase and snake_case for richer FTS queries +export function splitTokens(tokens: string[]): string[] { + const result: string[] = []; + for (const token of tokens) { + result.push(token); + // Split camelCase: buildSystemPrompt → build, system, prompt + const camelParts = token.replace(/([a-z])([A-Z])/g, "$1 $2").split(" "); + if (camelParts.length > 1) result.push(...camelParts.filter(p => p.length >= 3)); + // Split snake_case: get_task_context → get, task, context + if (token.includes("_")) { + const snakeParts = token.split("_").filter(p => p.length >= 3); + result.push(...snakeParts); + } + } + return [...new Set(result)]; +} + +// --- Reciprocal Rank Fusion --- + +export function reciprocalRankFusion( + ftsResults: RankedSummary[], + vectorResults: RankedSummary[], + ftsWeight: number, + vectorWeight: number, +): RankedSummary[] { + const k = 60; // Standard RRF constant + const scores = new Map(); + + for (let i = 0; i < ftsResults.length; i++) { + const s = ftsResults[i]; + const rrfScore = ftsWeight / (k + i + 1); + const existing = scores.get(s.id); + if (existing) existing.score += rrfScore; + else scores.set(s.id, { summary: s, score: rrfScore }); + } + + for (let i = 0; i < vectorResults.length; i++) { + const s = vectorResults[i]; + const rrfScore = vectorWeight / (k + i + 1); + const existing = scores.get(s.id); + if (existing) existing.score += rrfScore; + else scores.set(s.id, { summary: s, score: rrfScore }); + } + + return [...scores.values()].sort((a, b) => b.score - a.score).map(({ summary, score }) => ({ ...summary, score })); +} + +// --- Budget packer --- + +// Codemap's exact documented token formula: ceil(summary_text.length / 4) + 20 +export function tokenCost(summaryText: string): number { + return Math.ceil(summaryText.length / 4) + 20; +} + +export function packBudget( + ranked: RankedSummary[], + tokenBudget: number, + maxFiles: number, + _cwd: string, +): { files: TaskContextResult["files"]; estimatedTokens: number; truncated: boolean } { + let totalTokens = 0; + const files: TaskContextResult["files"] = []; + + for (const summary of ranked) { + if (files.length >= maxFiles) break; + const cost = tokenCost(summary.summaryText); + if (totalTokens + cost > tokenBudget && files.length > 0) { + // Would exceed budget and we have at least one file + break; + } + totalTokens += cost; + files.push({ + path: summary.filePath, + score: Number(summary.score.toFixed(4)), + summary: summary.summaryText, + stale: false, // Will be set by staleness check + missing: false, + updatedAt: summary.updatedAt, + }); + } + + return { + files, + estimatedTokens: totalTokens, + truncated: ranked.length > files.length, + }; +} + +// --- Main retrieval pipeline --- + +export interface GetTaskContextOptions { + maxFiles?: number; + tokenBudget?: number; + /** Optional query embedding vector. When provided, enables vector seed retrieval; + * when omitted, vector search is skipped (no embedding client available). */ + queryEmbedding?: number[]; +} + +export async function getTaskContext( + client: Client, + config: CodemapConfig, + task: string, + projectLabel: string, + cwd: string, + opts: GetTaskContextOptions = {}, +): Promise { + const maxFiles = opts.maxFiles ?? 12; + const tokenBudget = opts.tokenBudget ?? config.tokenBudget; + const seedLimit = config.maxResults; + const queryEmbedding = opts.queryEmbedding; + + // Step 1: Lexical extraction + const keywords = splitTokens(extractKeywords(task)); + const queryStr = keywords.join(" "); + + // Step 2: Parallel seed retrieval + // Vector search is skipped when no queryEmbedding is provided (no embedding client). + const vectorPromise = + queryEmbedding && queryEmbedding.length > 0 + ? searchVector(client, projectLabel, queryEmbedding, seedLimit).catch(() => [] as RankedSummary[]) + : Promise.resolve([] as RankedSummary[]); + + const [ftsResults, vectorResults] = await Promise.all([ + queryStr ? searchFts(client, projectLabel, queryStr, seedLimit) : Promise.resolve([] as RankedSummary[]), + vectorPromise, + ]); + + // Step 3: RRF fusion (FTS weighted higher since it's lexical task matching) + const fused = reciprocalRankFusion(ftsResults, vectorResults, 0.7, 0.3); + + // If FTS returned nothing and vector returned nothing, return empty + if (fused.length === 0) { + return { task, files: [], meta: { fileCount: 0, estimatedTokens: 0, truncated: false } }; + } + + // Step 4: Budget packer + const packed = packBudget(fused, tokenBudget, maxFiles, cwd); + + // Step 5: Staleness check for each included file + const filesWithStaleness = await Promise.all( + packed.files.map(async f => { + const fullPath = path.resolve(cwd, f.path); + const storedHash = fused.find(s => s.filePath === f.path)?.contentHash ?? ""; + const staleness = await checkStaleness(fullPath, storedHash); + return { ...f, stale: staleness.stale, missing: staleness.missing }; + }), + ); + + return { + task, + files: filesWithStaleness, + meta: { + fileCount: filesWithStaleness.length, + estimatedTokens: packed.estimatedTokens, + truncated: packed.truncated, + }, + }; +} diff --git a/packages/coding-agent/src/task-context/schema.ts b/packages/coding-agent/src/task-context/schema.ts new file mode 100644 index 0000000000..3d44f60161 --- /dev/null +++ b/packages/coding-agent/src/task-context/schema.ts @@ -0,0 +1,96 @@ +import type { Client } from "@libsql/client"; + +/** + * DDL for the codemap summaries store. + * + * Layout notes: + * - `summaries.id` is `INTEGER PRIMARY KEY AUTOINCREMENT` — a singular PK with + * an implicit ROWID. The `libsql_vector_idx` DiskANN index only works on + * tables that have a ROWID or a single-column PRIMARY KEY, so a composite PK + * is intentionally NOT used here. Per-file uniqueness is enforced separately + * via the `UNIQUE (project_label, file_path)` constraint. + * - `embedding F32_BLOB(768)` is nullable so embeddings can be computed lazily; + * the 768 dimensionality matches bge-base-en-v1.5. + * - `embed_model` records which model produced the embedding so callers can + * detect staleness when the model changes. + * - `summaries_fts` is an FTS5 external-content table synced via triggers + * (after-insert / after-delete / after-update), mirroring the pattern used + * by hindsight history-storage. Triggers do NOT fire on replica apply, so + * `postSyncMaintenance()` in db.ts rebuilds both FTS and the vector index + * after a remote sync. + */ +export function buildSchemaSql(dimensions: number = 768): string { + return ` +CREATE TABLE IF NOT EXISTS schema_migrations ( + version INTEGER PRIMARY KEY, + applied_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE TABLE IF NOT EXISTS summaries ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_label TEXT NOT NULL, + file_path TEXT NOT NULL, + summary_text TEXT NOT NULL, + content_hash TEXT NOT NULL DEFAULT '', + embedding F32_BLOB(${dimensions}), + embed_model TEXT, + symbol_name TEXT, + symbol_kind TEXT, + symbol_line_range TEXT, + source TEXT NOT NULL DEFAULT 'agent', + updated_at TEXT NOT NULL DEFAULT (datetime('now')), + UNIQUE (project_label, file_path) +); + +CREATE INDEX IF NOT EXISTS idx_summaries_project ON summaries(project_label); +CREATE INDEX IF NOT EXISTS idx_summaries_hash ON summaries(project_label, content_hash); + +CREATE INDEX IF NOT EXISTS idx_summaries_embedding ON summaries(libsql_vector_idx(embedding)); + +CREATE VIRTUAL TABLE IF NOT EXISTS summaries_fts USING fts5( + summary_text, + file_path, + content='summaries', + content_rowid='rowid' +); + +CREATE TRIGGER IF NOT EXISTS summaries_ai AFTER INSERT ON summaries BEGIN + INSERT INTO summaries_fts(rowid, summary_text, file_path) + VALUES (new.rowid, new.summary_text, new.file_path); +END; + +CREATE TRIGGER IF NOT EXISTS summaries_ad AFTER DELETE ON summaries BEGIN + INSERT INTO summaries_fts(summaries_fts, rowid, summary_text, file_path) + VALUES('delete', old.rowid, old.summary_text, old.file_path); +END; + +CREATE TRIGGER IF NOT EXISTS summaries_au AFTER UPDATE ON summaries BEGIN + INSERT INTO summaries_fts(summaries_fts, rowid, summary_text, file_path) + VALUES('delete', old.rowid, old.summary_text, old.file_path); + INSERT INTO summaries_fts(rowid, summary_text, file_path) + VALUES(new.rowid, new.summary_text, new.file_path); +END; +`; +} + +/** Default schema SQL (768 dimensions — bge-base-en-v1.5). */ +export const SCHEMA_SQL = buildSchemaSql(768); + +/** + * Bootstrap the codemap summaries schema on the given libSQL client. + * + * Runs all DDL via `executeMultiple` (which accepts semicolon-separated + * statements) and then records schema version 1 via `execute` (single + * statement). `INSERT OR IGNORE` makes this idempotent across re-runs. + * Pass `dimensions` to match the configured embedding model (768 for en, + * 1024 for multilingual). + */ +export async function initSchema(client: Client, dimensions: number = 768): Promise { + // Execute all DDL statements + await client.executeMultiple(buildSchemaSql(dimensions)); + // Record schema version + await client.execute({ + sql: "INSERT OR IGNORE INTO schema_migrations (version, applied_at) VALUES (?, datetime('now'))", + args: [1], + }); +} diff --git a/packages/coding-agent/src/task-context/staleness.ts b/packages/coding-agent/src/task-context/staleness.ts new file mode 100644 index 0000000000..86d01cadc0 --- /dev/null +++ b/packages/coding-agent/src/task-context/staleness.ts @@ -0,0 +1,43 @@ +export interface StalenessResult { + /** The computed hash of the file on disk, or '' if the file is missing. */ + contentHash: string; + /** Whether the file has changed since the summary was written. */ + stale: boolean; + /** Whether the file no longer exists on disk. */ + missing: boolean; +} + +/** + * Compute the content hash of a file using Bun.hash (xxHash64). + * Returns '' if the file does not exist (no error thrown). + * + * Per AGENTS.md: use `Bun.hash()` for hashing, NOT `node:crypto`. + * The codebase uses Bun.hash for content hashing throughout + * (e.g. noop-loop-guard.ts:98 `Bun.hash(input).toString(16)`). + */ +export async function computeFileHash(filePath: string): Promise { + try { + const contents = await Bun.file(filePath).text(); + return Bun.hash(contents).toString(16); + } catch { + // File doesn't exist or can't be read — return empty hash. + // Don't log every missing file (common when summarizing not-yet-saved files). + return ""; + } +} + +/** + * Check whether a summary is stale by comparing the stored content_hash + * to the current file on disk. + * + * - Hashes match → not stale + * - Hashes differ → stale (file changed) + * - File missing + stored hash was '' → stale, missing (file was never saved) + * - File missing + stored hash was non-empty → stale, missing (file was deleted) + */ +export async function checkStaleness(filePath: string, storedHash: string): Promise { + const currentHash = await computeFileHash(filePath); + const missing = currentHash === ""; + const stale = missing || currentHash !== storedHash; + return { contentHash: currentHash, stale, missing }; +} diff --git a/packages/coding-agent/src/task-context/state.ts b/packages/coding-agent/src/task-context/state.ts new file mode 100644 index 0000000000..a784283c8c --- /dev/null +++ b/packages/coding-agent/src/task-context/state.ts @@ -0,0 +1,42 @@ +import type { Client } from "@libsql/client"; +import type { AgentSession } from "../session/agent-session"; +import type { CodemapConfig } from "./config"; + +const kCodemapSessionState = Symbol("codemap.sessionState"); + +interface AgentSessionWithCodemapState extends AgentSession { + [kCodemapSessionState]?: CodemapSessionState; +} + +export interface CodemapSessionState { + /** The libSQL client for this session (cached, shared across tool calls). */ + client: Client; + /** The resolved codemap config for this session. */ + config: CodemapConfig; + /** Whether the first-turn injection has already fired for this session. */ + hasInjectedForFirstTurn: boolean; +} + +export function getCodemapSessionState(session: AgentSession | undefined): CodemapSessionState | undefined { + return session ? (session as AgentSessionWithCodemapState)[kCodemapSessionState] : undefined; +} + +export function setCodemapSessionState( + session: AgentSession, + state: CodemapSessionState | undefined, +): CodemapSessionState | undefined { + const previous = (session as AgentSessionWithCodemapState)[kCodemapSessionState]; + (session as AgentSessionWithCodemapState)[kCodemapSessionState] = state; + return previous; +} + +/** Mark that the first-turn injection has fired. */ +export function markFirstTurnInjected(session: AgentSession): void { + const state = getCodemapSessionState(session); + if (state) state.hasInjectedForFirstTurn = true; +} + +/** Check whether the first-turn injection has already fired. */ +export function hasFirstTurnInjected(session: AgentSession): boolean { + return getCodemapSessionState(session)?.hasInjectedForFirstTurn ?? false; +} diff --git a/packages/coding-agent/src/task-context/store.ts b/packages/coding-agent/src/task-context/store.ts new file mode 100644 index 0000000000..232da7a1a4 --- /dev/null +++ b/packages/coding-agent/src/task-context/store.ts @@ -0,0 +1,243 @@ +import type { Client } from "@libsql/client"; + +/** + * Data-access layer over the `summaries` table. + * + * Pure CRUD + FTS search + vector search queries. No business logic — just SQL. + * Ranking fusion, budget packing, staleness checks, and lexical extraction + * belong to `retrieve.ts`; this module exposes the primitive query channels + * that the retrieval pipeline composes. + */ + +export interface SummaryRow { + id: number; + projectLabel: string; + filePath: string; + summaryText: string; + contentHash: string; + symbolName: string | null; + symbolKind: string | null; + symbolLineRange: string | null; + source: string; + updatedAt: string; +} + +export interface RankedSummary extends SummaryRow { + score: number; +} + +/** Parameters for {@link upsertSummary}. */ +export interface UpsertSummaryParams { + projectLabel: string; + filePath: string; + summaryText: string; + contentHash: string; + /** Hard cap: summaryText is truncated to this many characters before insert. */ + maxSummaryChars: number; + symbolName?: string | null; + symbolKind?: string | null; + symbolLineRange?: string | null; +} + +/** + * Map a raw libSQL row (snake_case columns) to the camelCase {@link SummaryRow}. + * + * libSQL's `Row` interface carries a `[name: string]: Value` index signature, + * so column access by name is safe here; `Value` is `null | string | number | + * bigint | ArrayBuffer`, all of which `String()` / `Number()` coerce cleanly. + */ +function mapRow(row: Record): SummaryRow { + return { + id: Number(row.id), + projectLabel: String(row.project_label), + filePath: String(row.file_path), + summaryText: String(row.summary_text), + contentHash: String(row.content_hash ?? ""), + symbolName: row.symbol_name != null ? String(row.symbol_name) : null, + symbolKind: row.symbol_kind != null ? String(row.symbol_kind) : null, + symbolLineRange: row.symbol_line_range != null ? String(row.symbol_line_range) : null, + source: String(row.source ?? "agent"), + updatedAt: String(row.updated_at ?? ""), + }; +} + +/** + * INSERT OR REPLACE a summary, keyed by (project_label, file_path) via the + * UNIQUE constraint. `summaryText` is hard-truncated to `maxSummaryChars` + * before insert — the only schema-enforced per-summary token guard. Embedding + * columns are deliberately untouched (lazy backfill owns them). + * + * Returns the inserted row (read back to capture the autoincrement id and + * server-set `updated_at`). + */ +export async function upsertSummary(client: Client, params: UpsertSummaryParams): Promise { + const truncated = params.summaryText.slice(0, params.maxSummaryChars); + await client.execute({ + sql: `INSERT INTO summaries (project_label, file_path, summary_text, content_hash, symbol_name, symbol_kind, symbol_line_range) + VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (project_label, file_path) DO UPDATE SET + summary_text = excluded.summary_text, + content_hash = excluded.content_hash, + symbol_name = excluded.symbol_name, + symbol_kind = excluded.symbol_kind, + symbol_line_range = excluded.symbol_line_range, + updated_at = datetime('now')`, + args: [ + params.projectLabel, + params.filePath, + truncated, + params.contentHash, + params.symbolName ?? null, + params.symbolKind ?? null, + params.symbolLineRange ?? null, + ], + }); + const row = await getSummary(client, params.projectLabel, params.filePath); + if (!row) throw new Error(`codemap: upsert failed to read back summary for ${params.filePath}`); + return row; +} + +/** SELECT a single summary by its (project_label, file_path) key, or null if absent. */ +export async function getSummary(client: Client, projectLabel: string, filePath: string): Promise { + const result = await client.execute({ + sql: `SELECT * FROM summaries WHERE project_label = ? AND file_path = ?`, + args: [projectLabel, filePath], + }); + const row = result.rows[0]; + return row ? mapRow(row) : null; +} + +/** DELETE a summary by key. Returns true iff a row was actually removed. */ +export async function deleteSummary(client: Client, projectLabel: string, filePath: string): Promise { + const result = await client.execute({ + sql: `DELETE FROM summaries WHERE project_label = ? AND file_path = ?`, + args: [projectLabel, filePath], + }); + return Number(result.rowsAffected) > 0; +} + +/** + * Build an FTS5 MATCH expression from a free-text query. + * + * Tokenize on non-alphanumeric, lowercase, then wrap each token as + * `"token"*` (quoted, prefix-wildcard) joined by ` OR ` — FTS5 token-OR + * with prefix matching. Task queries describe intent, not exact content, + * so OR (any keyword matches) is more appropriate than AND (all required). + * Double quotes inside tokens are escaped by doubling (`""`), the FTS5 + * string-literal escape. Tokens shorter than 3 chars are dropped (they + * match too broadly to be useful seeds). Returns "" when no usable tokens + * remain, signalling the caller to short-circuit. + */ +function buildFtsQuery(query: string): string { + const tokens = query.toLowerCase().match(/[a-z0-9]+/g) ?? []; + const filtered = tokens.filter(t => t.length >= 3); + if (filtered.length === 0) return ""; + return filtered.map(tok => `"${tok.replace(/"/g, '""')}"*`).join(" OR "); +} + +/** + * FTS5 lexical search over `summaries_fts` (external-content table over + * `summaries`). Ranks by `bm25(summaries_fts)` ascending (lower = more + * relevant) and normalizes the score to [0, 1) where higher = more relevant, + * consistent with {@link searchVector}'s score semantics so the retrieval + * pipeline can fuse channels directly. + */ +export async function searchFts( + client: Client, + projectLabel: string, + query: string, + limit: number, +): Promise { + const ftsQuery = buildFtsQuery(query); + if (!ftsQuery) return []; + const result = await client.execute({ + sql: `SELECT s.*, bm25(summaries_fts) as fts_rank + FROM summaries_fts f + JOIN summaries s ON s.rowid = f.rowid + WHERE summaries_fts MATCH ? + AND s.project_label = ? + ORDER BY fts_rank + LIMIT ?`, + args: [ftsQuery, projectLabel, limit], + }); + return result.rows.map(row => { + const mapped = mapRow(row); + // bm25 returns negative values where more-negative = better. + // |bm25|/(1+|bm25|) maps that to [0,1) with higher = more relevant. + const absRank = Math.abs(Number(row.fts_rank ?? 0)); + return { ...mapped, score: absRank / (1 + absRank) }; + }); +} + +/** + * Vector (semantic) search via the DiskANN index `idx_summaries_embedding`. + * + * `vector_top_k` returns the `id` (= rowid, since `id INTEGER PRIMARY KEY` + * aliases rowid) and cosine `distance` of the k nearest neighbors to the query + * vector. Cosine distance ranges [0, 2] (0 = identical, 2 = opposite); we map + * it to a [0, 1] relevance score where higher = more similar, matching + * {@link searchFts}. Only rows with a non-null `embedding` are reachable — + * unembedded rows are invisible here and handled by lazy backfill. + */ +export async function searchVector( + client: Client, + projectLabel: string, + queryVector: number[], + limit: number, +): Promise { + if (queryVector.length === 0) return []; + const vecStr = `[${queryVector.join(",")}]`; + const result = await client.execute({ + sql: `SELECT s.*, vector_distance_cos(s.embedding, vector32(?)) as vec_distance + FROM vector_top_k('idx_summaries_embedding', vector32(?), ?) v + JOIN summaries s ON s.rowid = v.id + WHERE s.project_label = ? + ORDER BY vec_distance`, + args: [vecStr, vecStr, limit, projectLabel], + }); + return result.rows.map(row => { + const mapped = mapRow(row); + const distance = Number(row.vec_distance ?? 2); + return { ...mapped, score: Math.max(0, 1 - distance / 2) }; + }); +} + +/** + * List summaries in a project that have no embedding yet, ordered by + * recency. Used by the lazy-embed backfill path after FTS retrieval: batch-embed + * these rows' `summaryText`, then call {@link updateEmbedding} for each. + */ +export async function getUnembeddedSummaries( + client: Client, + projectLabel: string, + limit: number, +): Promise { + const result = await client.execute({ + sql: `SELECT * FROM summaries WHERE project_label = ? AND embedding IS NULL ORDER BY updated_at DESC LIMIT ?`, + args: [projectLabel, limit], + }); + return result.rows.map(mapRow); +} + +/** + * Populate one row's `embedding` (F32_BLOB via `vector32`) and record the + * `embed_model` that produced it. The DiskANN vector index updates + * incrementally on this DML. Callers embedding many rows should batch the + * updates to amortize index maintenance. + */ +export async function updateEmbedding(client: Client, id: number, embedding: number[], model: string): Promise { + const vecStr = `[${embedding.join(",")}]`; + await client.execute({ + sql: `UPDATE summaries SET embedding = vector32(?), embed_model = ? WHERE id = ?`, + args: [vecStr, model, id], + }); +} + +/** Count of summaries stored for a project. */ +export async function summaryCount(client: Client, projectLabel: string): Promise { + const result = await client.execute({ + sql: `SELECT COUNT(*) as count FROM summaries WHERE project_label = ?`, + args: [projectLabel], + }); + return Number(result.rows[0]?.count ?? 0); +} diff --git a/packages/coding-agent/src/task-context/tools.ts b/packages/coding-agent/src/task-context/tools.ts new file mode 100644 index 0000000000..8f2c9cd0a0 --- /dev/null +++ b/packages/coding-agent/src/task-context/tools.ts @@ -0,0 +1,295 @@ +import * as path from "node:path"; +import type { Client } from "@libsql/client"; +import type { AgentTool, AgentToolResult } from "@oh-my-pi/pi-agent-core"; +import { getAgentDir } from "@oh-my-pi/pi-utils"; +import { type } from "arktype"; +import type { ToolSession } from "../tools"; +import type { CodemapConfig } from "./config"; +import { loadCodemapConfig } from "./config"; +import { closeCodemapDb, openCodemapDb } from "./db"; +import { embedText } from "./embed"; +import { getTaskContext } from "./retrieve"; +import { checkStaleness, computeFileHash } from "./staleness"; +import { deleteSummary, getSummary, upsertSummary } from "./store"; + +// --- Shared per-session DB client cache ------------------------------------- + +let cachedClient: Client | null = null; +let cachedConfig: CodemapConfig | null = null; +let clientPromise: Promise<{ client: Client; config: CodemapConfig }> | null = null; + +/** + * Resolve (or reuse) a codemap DB client for the session. + * + * The client is cached at module level keyed by `config.dbPath` + Turso + * connection fields so that repeated tool calls don't reopen the libSQL + * native binding each time. If connection-shaping fields changed (e.g. + * settings edited mid-session), the previous client is closed before + * opening the new one. + * + * Uses an in-flight promise guard to prevent concurrent callers from + * double-opening: all concurrent calls await the same open promise. + */ +async function getClient(session: ToolSession): Promise<{ client: Client; config: CodemapConfig }> { + const config = loadCodemapConfig(session.settings, getAgentDir()); + const cacheKey = `${config.dbPath}|${config.turso.syncUrl}|${config.turso.authToken}`; + const cachedKey = cachedConfig + ? `${cachedConfig.dbPath}|${cachedConfig.turso.syncUrl}|${cachedConfig.turso.authToken}` + : null; + + if (cachedClient && cacheKey === cachedKey) { + return { client: cachedClient, config }; + } + + // If an open is already in flight, await it instead of starting a second + if (clientPromise) { + return clientPromise; + } + + const promise = (async () => { + if (cachedClient) { + await closeCodemapDb(cachedClient); + cachedClient = null; + cachedConfig = null; + } + const client = await openCodemapDb(config); + cachedClient = client; + cachedConfig = config; + return { client, config }; + })(); + clientPromise = promise; + try { + return await promise; + } finally { + clientPromise = null; + } +} + +/** + * Resolve a project label from cwd. Mirrors hindsight's computeBankScope — + * the cwd basename is a stable, human-readable scope key that groups + * summaries across sessions for the same repo. + */ +function resolveProjectLabel(cwd: string): string { + return path.basename(cwd); +} + +/** Normalize a resolved path to forward-slash relative form for storage. + * Rejects paths that escape the project cwd (path traversal). */ +function toStoredPath(cwd: string, filePath: string): { relativePath: string; absolutePath: string } { + const absolutePath = path.resolve(cwd, filePath); + // Guard against path traversal: the resolved path must be inside cwd. + const normalizedCwd = path.resolve(cwd); + const rel = path.relative(normalizedCwd, absolutePath); + const escapes = rel.startsWith("..") || path.isAbsolute(rel); + if (escapes) { + throw new Error(`Path "${filePath}" resolves outside the project directory.`); + } + const relativePath = rel.replace(/\\/g, "/"); + return { relativePath, absolutePath }; +} + +// --- Tool 1: set_file_summary ----------------------------------------------- + +const setFileSummarySchema = type({ + file: type("string").describe("File path (relative to cwd)"), + summary: type("string").describe("1-3 sentences: purpose, key symbols, gotchas, invariants"), + "symbol_name?": type("string").describe("Optional: specific symbol this summary is about"), + "symbol_kind?": type("string").describe("Optional: function | class | method | etc."), +}); +export type SetFileSummaryParams = typeof setFileSummarySchema.infer; + +export class SetFileSummaryTool implements AgentTool { + readonly name = "set_file_summary"; + readonly approval = "read" as const; + readonly label = "Set File Summary"; + readonly description = + "Persist a summary written by the agent after reading a file. Stores it for future task-relevant retrieval."; + readonly parameters = setFileSummarySchema; + readonly strict = true; + readonly loadMode = "discoverable" as const; + readonly summary = "Store a code summary for a file"; + + constructor(private readonly session: ToolSession) {} + + static createIf(session: ToolSession): SetFileSummaryTool | null { + if (!session.settings.get("codemap.enabled")) return null; + return new SetFileSummaryTool(session); + } + + async execute(_id: string, params: SetFileSummaryParams): Promise { + // Validate path before opening DB — fail fast on traversal attempts. + const { relativePath, absolutePath } = toStoredPath(this.session.cwd, params.file); + const { client, config } = await getClient(this.session); + const contentHash = await computeFileHash(absolutePath); + const projectLabel = resolveProjectLabel(this.session.cwd); + const row = await upsertSummary(client, { + projectLabel, + filePath: relativePath, + summaryText: params.summary, + contentHash, + maxSummaryChars: config.maxSummaryChars, + symbolName: params.symbol_name ?? null, + symbolKind: params.symbol_kind ?? null, + }); + const hashPreview = contentHash.slice(0, 8) || "none"; + return { + content: [{ type: "text", text: `Summary stored for ${relativePath} (hash: ${hashPreview}).` }], + details: { id: row.id, contentHash }, + }; + } +} + +// --- Tool 2: get_file_summary ----------------------------------------------- + +const getFileSummarySchema = type({ + file: type("string").describe("File path (relative to cwd)"), +}); +export type GetFileSummaryParams = typeof getFileSummarySchema.infer; + +export class GetFileSummaryTool implements AgentTool { + readonly name = "get_file_summary"; + readonly approval = "read" as const; + readonly label = "Get File Summary"; + readonly description = + "Retrieve the stored summary for a file, with a staleness check against the current file content hash."; + readonly parameters = getFileSummarySchema; + readonly strict = true; + readonly loadMode = "discoverable" as const; + readonly summary = "Retrieve a stored code summary for a file"; + + constructor(private readonly session: ToolSession) {} + + static createIf(session: ToolSession): GetFileSummaryTool | null { + if (!session.settings.get("codemap.enabled")) return null; + return new GetFileSummaryTool(session); + } + async execute(_id: string, params: GetFileSummaryParams): Promise { + // Validate path before opening DB — fail fast on traversal attempts. + const { relativePath, absolutePath } = toStoredPath(this.session.cwd, params.file); + const { client } = await getClient(this.session); + const projectLabel = resolveProjectLabel(this.session.cwd); + const row = await getSummary(client, projectLabel, relativePath); + if (!row) { + return { + content: [{ type: "text", text: `No summary stored for ${relativePath}.` }], + details: { found: false, relativePath }, + }; + } + const staleness = await checkStaleness(absolutePath, row.contentHash); + const lines: string[] = [`Summary for ${relativePath}:`, "", row.summaryText]; + if (staleness.missing) { + lines.push("", "[STALE] File no longer exists on disk."); + } else if (staleness.stale) { + lines.push("", "[STALE] File has changed since this summary was written."); + } else { + lines.push("", "[FRESH] File matches the summary's content hash."); + } + if (row.symbolName) { + lines.push("", `Symbol: ${row.symbolName}${row.symbolKind ? ` (${row.symbolKind})` : ""}`); + } + return { + content: [{ type: "text", text: lines.join("\n") }], + details: { + found: true, + id: row.id, + stale: staleness.stale, + missing: staleness.missing, + updatedAt: row.updatedAt, + symbolName: row.symbolName, + symbolKind: row.symbolKind, + }, + }; + } +} + +// --- Tool 3: get_task_context ----------------------------------------------- + +const getTaskContextSchema = type({ + task: type("string").describe("The current task or goal in natural language"), + "max_files?": type("number").describe("Optional: cap on number of files returned (default 12)"), + "token_budget?": type("number").describe("Optional: token budget for packed summaries (default from config)"), +}); +export type GetTaskContextParams = typeof getTaskContextSchema.infer; + +export class GetTaskContextTool implements AgentTool { + readonly name = "get_task_context"; + readonly approval = "read" as const; + readonly label = "Get Task Context"; + readonly description = + "Retrieve task-relevant file summaries via hybrid (lexical + vector) retrieval with reciprocal rank fusion and budget packing."; + readonly parameters = getTaskContextSchema; + readonly strict = true; + readonly loadMode = "discoverable" as const; + readonly summary = "Retrieve task-relevant code summaries"; + + constructor(private readonly session: ToolSession) {} + + static createIf(session: ToolSession): GetTaskContextTool | null { + if (!session.settings.get("codemap.enabled")) return null; + return new GetTaskContextTool(session); + } + + async execute(_id: string, params: GetTaskContextParams): Promise { + const { client, config } = await getClient(this.session); + const projectLabel = resolveProjectLabel(this.session.cwd); + const opts: { maxFiles?: number; tokenBudget?: number; queryEmbedding?: number[] } = {}; + if (params.max_files !== undefined) opts.maxFiles = params.max_files; + if (params.token_budget !== undefined) opts.tokenBudget = params.token_budget; + // Generate query embedding for vector search (lazy — may return null + // if the embedding model is unavailable, in which case retrieval is + // FTS-only, which is acceptable). + const queryEmbedding = await embedText(params.task, config.embedding); + if (queryEmbedding) opts.queryEmbedding = queryEmbedding; + const result = await getTaskContext(client, config, params.task, projectLabel, this.session.cwd, opts); + const header = `Task context for: ${params.task}`; + const body = JSON.stringify(result, null, 2); + return { + content: [{ type: "text", text: `${header}\n\n${body}` }], + details: result, + }; + } +} + +// --- Tool 4: delete_file_summary -------------------------------------------- + +const deleteFileSummarySchema = type({ + file: type("string").describe("File path (relative to cwd)"), +}); +export type DeleteFileSummaryParams = typeof deleteFileSummarySchema.infer; + +export class DeleteFileSummaryTool implements AgentTool { + readonly name = "delete_file_summary"; + readonly approval = "read" as const; + readonly label = "Delete File Summary"; + readonly description = + "Delete the stored summary for a file. Use when a file is removed or its summary is no longer relevant."; + readonly parameters = deleteFileSummarySchema; + readonly strict = true; + readonly loadMode = "discoverable" as const; + readonly summary = "Delete a stored code summary for a file"; + + constructor(private readonly session: ToolSession) {} + + static createIf(session: ToolSession): DeleteFileSummaryTool | null { + if (!session.settings.get("codemap.enabled")) return null; + return new DeleteFileSummaryTool(session); + } + async execute(_id: string, params: DeleteFileSummaryParams): Promise { + // Validate path before opening DB — fail fast on traversal attempts. + const { relativePath } = toStoredPath(this.session.cwd, params.file); + const { client } = await getClient(this.session); + const projectLabel = resolveProjectLabel(this.session.cwd); + const removed = await deleteSummary(client, projectLabel, relativePath); + if (!removed) { + return { + content: [{ type: "text", text: `No summary found for ${relativePath} — nothing to delete.` }], + details: { removed: false, relativePath }, + }; + } + return { + content: [{ type: "text", text: `Summary deleted for ${relativePath}.` }], + details: { removed: true, relativePath }, + }; + } +} diff --git a/packages/coding-agent/src/task-context/turso.ts b/packages/coding-agent/src/task-context/turso.ts new file mode 100644 index 0000000000..f5cdd4e96c --- /dev/null +++ b/packages/coding-agent/src/task-context/turso.ts @@ -0,0 +1,91 @@ +import { logger } from "@oh-my-pi/pi-utils"; +import type { Settings } from "../config/settings"; +import type { CodemapConfig, CodemapTursoConfig } from "./config"; + +/** + * Turso platform API endpoint for database management. + */ +const TURSO_API_BASE = "https://api.turso.tech/v1"; + +/** + * Auto-provision a Turso database when autoProvision is enabled and + * TURSO_API_TOKEN + org are available, but no syncUrl is set yet. + * + * Steps: + * 1. POST to Turso platform API to create a database. + * 2. Generate a full-access JWT for the database. + * 3. Derive syncUrl = 'libsql://' + db.Hostname. + * 4. Persist back via settings.set() so subsequent starts skip provisioning. + * + * Idempotent: no-op if syncUrl is already set. + */ +export async function autoProvisionTurso(config: CodemapTursoConfig, settings: Settings): Promise { + // Skip if already configured + if (config.syncUrl && config.authToken) return config; + // Skip if auto-provisioning is disabled + if (!config.autoProvision) return config; + + const apiToken = Bun.env.TURSO_API_TOKEN?.trim(); + const org = config.org || Bun.env.TURSO_ORG?.trim(); + if (!apiToken || !org) { + logger.debug("codemap: Turso auto-provision skipped — missing TURSO_API_TOKEN or org"); + return config; + } + + try { + // 1. Create database + const dbName = `codemap-${Date.now()}`; + const createResp = await fetch(`${TURSO_API_BASE}/organizations/${org}/databases`, { + method: "POST", + headers: { + Authorization: `Bearer ${apiToken}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ name: dbName, group: "default" }), + }); + if (!createResp.ok) { + const body = await createResp.text(); + throw new Error(`Turso API create DB failed: ${createResp.status} ${body}`); + } + const created = (await createResp.json()) as { Hostname: string }; + const syncUrl = `libsql://${created.Hostname}`; + + // 2. Generate auth token for the database + const tokenResp = await fetch(`${TURSO_API_BASE}/organizations/${org}/databases/${dbName}/auth/tokens`, { + method: "POST", + headers: { Authorization: `Bearer ${apiToken}` }, + }); + if (!tokenResp.ok) { + const body = await tokenResp.text(); + throw new Error(`Turso API create token failed: ${tokenResp.status} ${body}`); + } + const tokenData = (await tokenResp.json()) as { jwt: string }; + const authToken = tokenData.jwt; + + // 3. Persist back via settings.set() — triggers #queueSave → config.yml + await settings.set("codemap.turso.syncUrl", syncUrl); + await settings.set("codemap.turso.authToken", authToken); + + logger.info("codemap: Turso database auto-provisioned", { syncUrl, dbName }); + + return { + ...config, + syncUrl, + authToken, + }; + } catch (err) { + logger.warn("codemap: Turso auto-provision failed, falling back to local-only", { + error: err instanceof Error ? err.message : String(err), + }); + return config; + } +} + +/** + * Resolve the effective Turso connection config, running auto-provisioning if needed. + * Returns the updated config (with syncUrl+authToken if provisioned). + */ +export async function resolveTursoConfig(config: CodemapConfig, settings: Settings): Promise { + const updatedTurso = await autoProvisionTurso(config.turso, settings); + return { ...config, turso: updatedTurso }; +} diff --git a/packages/coding-agent/src/tools/builtin-names.ts b/packages/coding-agent/src/tools/builtin-names.ts index ccb62ca2e9..d9e2c2ca90 100644 --- a/packages/coding-agent/src/tools/builtin-names.ts +++ b/packages/coding-agent/src/tools/builtin-names.ts @@ -29,6 +29,10 @@ export const BUILTIN_TOOL_NAMES = [ "reflect", "learn", "manage_skill", + "set_file_summary", + "get_file_summary", + "get_task_context", + "delete_file_summary", ] as const; export type BuiltinToolName = (typeof BUILTIN_TOOL_NAMES)[number]; diff --git a/packages/coding-agent/src/tools/index.ts b/packages/coding-agent/src/tools/index.ts index 1ba174606d..b1b90a306a 100644 --- a/packages/coding-agent/src/tools/index.ts +++ b/packages/coding-agent/src/tools/index.ts @@ -27,6 +27,12 @@ import type { ToolChoiceQueue } from "../session/tool-choice-queue"; import { TaskTool } from "../task"; import type { AgentOutputManager } from "../task/output-manager"; import { canSpawnAtDepth } from "../task/types"; +import { + DeleteFileSummaryTool, + GetFileSummaryTool, + GetTaskContextTool, + SetFileSummaryTool, +} from "../task-context/tools"; import { countToolsForAutoDiscovery, resolveEffectiveToolDiscoveryMode } from "../tool-discovery/mode"; import type { DiscoverableTool, DiscoverableToolSearchIndex } from "../tool-discovery/tool-index"; import type { EventBus } from "../utils/event-bus"; @@ -457,6 +463,10 @@ export const BUILTIN_TOOLS: Record = { recall: MemoryRecallTool.createIf, reflect: MemoryReflectTool.createIf, learn: LearnTool.createIf, + set_file_summary: SetFileSummaryTool.createIf, + get_file_summary: GetFileSummaryTool.createIf, + get_task_context: GetTaskContextTool.createIf, + delete_file_summary: DeleteFileSummaryTool.createIf, manage_skill: ManageSkillTool.createIf, }; @@ -553,6 +563,11 @@ export async function createTools(session: ToolSession, toolNames?: string[]): P requestedTools.push("learn"); } } + if (session.settings.get("codemap.enabled")) { + for (const name of ["set_file_summary", "get_file_summary", "get_task_context", "delete_file_summary"]) { + if (!requestedTools.includes(name)) requestedTools.push(name); + } + } } // Resolve effective tool discovery mode. // tools.discoveryMode controls the new modes; mcp.discoveryMode remains a back-compat alias for "mcp-only". @@ -596,6 +611,14 @@ export async function createTools(session: ToolSession, toolNames?: string[]): P if (name === "task") { return canSpawnAtDepth(session.settings.get("task.maxRecursionDepth") ?? 2, session.taskDepth ?? 0); } + if ( + name === "set_file_summary" || + name === "get_file_summary" || + name === "get_task_context" || + name === "delete_file_summary" + ) { + return session.settings.get("codemap.enabled") === true; + } return true; }; if (includeYield && requestedTools && !requestedTools.includes("yield")) {