diff --git a/docs/deploy/cloud.mdx b/docs/deploy/cloud.mdx index a8a21f7863..20bd92c40c 100644 --- a/docs/deploy/cloud.mdx +++ b/docs/deploy/cloud.mdx @@ -61,7 +61,7 @@ The credential is **shared with the [`heygen` CLI](https://github.com/heygen-com 3. `~/.heygen/credentials` - Point the CLI at a different backend with `HEYGEN_API_URL` (default `https://api.heygen.com`). Use `hyperframes auth refresh` to force-refresh an OAuth token before a long job; `hyperframes auth logout` clears the stored credential. + Point the CLI at a different backend with `HEYGEN_API_URL` (default `https://api.heygen.com`). Use `hyperframes auth refresh` to force-refresh an OAuth token before a long job; `hyperframes auth logout` clears the stored credential. For the keys voice, music, and capture use across the skills — and the fully local fallback — see [Authentication & API keys](/guides/authentication). ## How a cloud render flows diff --git a/docs/docs.json b/docs/docs.json index 7e8ac25996..75a3ac0d26 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -75,6 +75,7 @@ "group": "Guides", "pages": [ "guides/pipeline", + "guides/authentication", "guides/video-components", "guides/html-in-canvas", "guides/website-to-video", diff --git a/docs/guides/authentication.mdx b/docs/guides/authentication.mdx new file mode 100644 index 0000000000..a16a42ca8e --- /dev/null +++ b/docs/guides/authentication.mdx @@ -0,0 +1,88 @@ +--- +title: Authentication & API keys +description: "Sign in to HeyGen, and how the keys for voice, music, and capture resolve across the CLI and skills — including the priority order and the fully local fallback." +--- + +HyperFrames uses a HeyGen credential for premium voiceover (TTS) and the music / sound-effects library. Other providers are optional, and **everything runs without any key** — voice and music fall back to fully local engines. This page covers signing in, the keys each capability uses, and the order they resolve. + +## Sign in + +Signing in is the same OAuth step as creating an account — new users land on the sign-up screen. + + + + The default flow opens your browser for OAuth and captures the token on a loopback port: + + ```bash + npx hyperframes auth login + # ✓ Signed in. + ``` + + For CI or headless machines, save a long-lived API key instead: + + ```bash + npx hyperframes auth login --api-key # hidden-input prompt + echo "$HEYGEN_API_KEY" | npx hyperframes auth login --api-key # from stdin + ``` + + + ```bash + npx hyperframes auth status + ``` + + Shows the active credential's source and verified identity, and — when you're signed out — which local engines voice and music will use. Add `--json` for `{ configured, recommended_action, offline_engines }` in scripts. + + + +The credential lives in `~/.heygen/credentials` (mode `0600`) — no per-repo `.env` to manage. Browser OAuth is a `hyperframes auth login` feature. The separate [`heygen` CLI](https://github.com/heygen-com/heygen-cli) (its own install — there's no `npx heygen`) is API-key-only, so `heygen auth login` just stores a key you paste. Both read the same `~/.heygen/credentials`, so signing in with one carries to the other. + + + No account needed to try HyperFrames. With no credential, voice uses **Kokoro** and music uses **MusicGen**, both fully local and offline — see [Working offline](#working-offline). + + +## How credentials resolve + +The HeyGen credential drives TTS and music / SFX **retrieval**. It resolves first-match-wins: + +1. `HEYGEN_API_KEY` — environment variable +2. `HYPERFRAMES_API_KEY` — alias, for parity with other tools +3. `~/.heygen/credentials` — written by `hyperframes auth login` (or `heygen auth login`) + +Point at a different config directory with `HEYGEN_CONFIG_DIR`, or a different backend with `HEYGEN_API_URL`. + +## Keys by capability + +Each capability picks the **first available provider** in order; the last is always a local engine that needs no key. Cloud providers below the HeyGen line need their own key *and* a local Python dependency. + +| Capability | Provider order | Key(s) — first match wins | Local dependency | +|------------|----------------|---------------------------|------------------| +| **Voice (TTS)** | HeyGen → ElevenLabs → Kokoro | `HEYGEN_API_KEY` → `HYPERFRAMES_API_KEY` → `~/.heygen` · then `ELEVENLABS_API_KEY` | Kokoro: `pip install kokoro-onnx soundfile` | +| **Music (BGM)** | HeyGen library → Lyria → MusicGen | HeyGen credential (above) · then `GEMINI_API_KEY` → `GOOGLE_API_KEY` | MusicGen: `pip install transformers torch soundfile numpy` | +| **Sound effects** | HeyGen library → bundled library | HeyGen credential (above) | bundled — no deps | +| **Capture descriptions** | OpenRouter → Gemini | `OPENROUTER_API_KEY` → `GEMINI_API_KEY` | — (optional; for [website-to-video](/guides/website-to-video)) | + +Run `npx hyperframes doctor` to check which local dependencies are installed. The media skills also run `hyperframes auth status` as a preflight before generating, so you always know whether a run will use HeyGen or a local engine before it starts. + +## Working offline + +No key configured is a normal state, not an error. The workflow runs entirely on local models: + +- **Voice** — Kokoro-82M (54 voices), with Whisper for word-level caption alignment. +- **Music** — MusicGen (`facebook/musicgen-small`). +- **Sound effects** — a bundled library. + +Local engines are free and offline; HeyGen gives higher-quality voices and a professionally produced music library. Sign in any time to switch a project from local to HeyGen. + +## Environment variables + +| Variable | Used for | +|----------|----------| +| `HEYGEN_API_KEY` | HeyGen credential — voice + music/SFX retrieval. Highest priority. | +| `HYPERFRAMES_API_KEY` | Alias for `HEYGEN_API_KEY`. | +| `HEYGEN_API_URL` | API base URL (default `https://api.heygen.com`). | +| `HEYGEN_CONFIG_DIR` | Credentials directory (default `~/.heygen`). | +| `ELEVENLABS_API_KEY` | ElevenLabs TTS, used when no HeyGen credential is present. | +| `GEMINI_API_KEY` / `GOOGLE_API_KEY` | Lyria music generation (and capture descriptions). | +| `OPENROUTER_API_KEY` | Capture descriptions; takes priority over Gemini for that step. | + +See the [`hyperframes auth`](/packages/cli#hyperframes-auth) command reference for subcommand details, and [Cloud rendering](/deploy/cloud) for using the same credential to render in HeyGen's cloud. diff --git a/docs/packages/cli.mdx b/docs/packages/cli.mdx index 1fc75cfa35..ac7cb156b4 100644 --- a/docs/packages/cli.mdx +++ b/docs/packages/cli.mdx @@ -995,6 +995,8 @@ hyperframes auth logout --yes # skip the confirmation prompt | `HEYGEN_API_URL` | API base URL (default `https://api.heygen.com`). | | `HEYGEN_CONFIG_DIR` | Credentials directory (default `~/.heygen`). | +For the keys other capabilities use — ElevenLabs and Gemini for voice/music fallback, OpenRouter/Gemini for capture — and how the skills prioritize them, see [Authentication & API keys](/guides/authentication). + ## hyperframes cloud Render a HyperFrames composition on HeyGen's hosted cloud — no local Chrome, no local ffmpeg, no AWS to manage. Sign in once with `hyperframes auth login` and the same credential drives every `cloud` subcommand. diff --git a/packages/cli/src/audio/providers.test.ts b/packages/cli/src/audio/providers.test.ts new file mode 100644 index 0000000000..ddb5b9dd29 --- /dev/null +++ b/packages/cli/src/audio/providers.test.ts @@ -0,0 +1,50 @@ +import { describe, expect, it } from "vitest"; +import { decideMusic, decideVoice, KOKORO_PIP, MUSICGEN_PIP } from "./providers.js"; + +describe("decideVoice — mirrors the skill's heygen → elevenlabs → kokoro order", () => { + it("prefers HeyGen when configured", () => { + const r = decideVoice({ hasHeygen: true, elevenlabs: true, kokoro: true }); + expect(r.engine).toBe("heygen"); + expect(r.ready).toBe(true); + }); + + it("falls to ElevenLabs only when key + module are both present", () => { + expect(decideVoice({ hasHeygen: false, elevenlabs: true, kokoro: true }).engine).toBe( + "elevenlabs", + ); + }); + + it("falls to Kokoro when no cloud provider is usable", () => { + expect(decideVoice({ hasHeygen: false, elevenlabs: false, kokoro: true }).engine).toBe( + "kokoro", + ); + }); + + it("flags Kokoro as not-ready with a pip hint when deps are missing", () => { + const r = decideVoice({ hasHeygen: false, elevenlabs: false, kokoro: false }); + expect(r.engine).toBe("kokoro"); + expect(r.ready).toBe(false); + expect(r.setupHint).toBe(KOKORO_PIP); + }); + + it("omits the hint when Kokoro is ready", () => { + expect( + decideVoice({ hasHeygen: false, elevenlabs: false, kokoro: true }).setupHint, + ).toBeUndefined(); + }); +}); + +describe("decideMusic — mirrors the skill's heygen → lyria → musicgen order", () => { + it("prefers HeyGen, then Lyria, then MusicGen", () => { + expect(decideMusic({ hasHeygen: true, lyria: true, musicgen: true }).engine).toBe("heygen"); + expect(decideMusic({ hasHeygen: false, lyria: true, musicgen: true }).engine).toBe("lyria"); + expect(decideMusic({ hasHeygen: false, lyria: false, musicgen: true }).engine).toBe("musicgen"); + }); + + it("flags MusicGen as not-ready with a pip hint when deps are missing", () => { + const r = decideMusic({ hasHeygen: false, lyria: false, musicgen: false }); + expect(r.engine).toBe("musicgen"); + expect(r.ready).toBe(false); + expect(r.setupHint).toBe(MUSICGEN_PIP); + }); +}); diff --git a/packages/cli/src/audio/providers.ts b/packages/cli/src/audio/providers.ts new file mode 100644 index 0000000000..a3eb3d4890 --- /dev/null +++ b/packages/cli/src/audio/providers.ts @@ -0,0 +1,102 @@ +/** + * Which voice / music engine a workflow will actually use, and whether + * its local dependencies are present. Mirrors the resolution order the + * hyperframes-media skill scripts use, so `auth status` and `doctor` + * report the same engine the render pipeline would pick: + * + * voice: HeyGen Starfish → ElevenLabs (key + `elevenlabs`) → Kokoro (local) + * music: HeyGen library → Lyria (key + `google.genai`) → MusicGen (local) + * + * The decision is split from the probing: `decide*` is pure (unit-tested + * without spawning Python); `gather*` collects the live facts. + */ + +import { hasPythonModules } from "../tts/python.js"; + +/** Python import names probed for each local engine. */ +export const KOKORO_MODULES = ["kokoro_onnx", "soundfile"]; +export const MUSICGEN_MODULES = ["transformers", "torch", "soundfile", "numpy"]; + +/** pip one-liners shown when a local engine's deps are missing. */ +export const KOKORO_PIP = "pip install kokoro-onnx soundfile"; +export const MUSICGEN_PIP = "pip install transformers torch soundfile numpy"; + +export type VoiceEngine = "heygen" | "elevenlabs" | "kokoro"; +export type MusicEngine = "heygen" | "lyria" | "musicgen"; + +export interface EngineReadiness { + engine: E; + /** Human label, e.g. "Kokoro". */ + label: string; + /** A local engine (no account needed) vs a cloud provider keyed by env. */ + local: boolean; + /** Usable right now: cloud key present, or local deps installed. */ + ready: boolean; + /** Shown when `ready` is false — how to make it ready. */ + setupHint?: string; +} + +export interface VoiceFacts { + hasHeygen: boolean; + /** ELEVENLABS_API_KEY set AND the `elevenlabs` module importable. */ + elevenlabs: boolean; + /** Kokoro's local deps importable. */ + kokoro: boolean; +} + +export interface MusicFacts { + hasHeygen: boolean; + /** A Gemini/Google key set AND `google.genai` importable. */ + lyria: boolean; + /** MusicGen's local deps importable. */ + musicgen: boolean; +} + +export function decideVoice(f: VoiceFacts): EngineReadiness { + if (f.hasHeygen) return { engine: "heygen", label: "HeyGen Starfish", local: false, ready: true }; + if (f.elevenlabs) return { engine: "elevenlabs", label: "ElevenLabs", local: false, ready: true }; + return { + engine: "kokoro", + label: "Kokoro", + local: true, + ready: f.kokoro, + ...(f.kokoro ? {} : { setupHint: KOKORO_PIP }), + }; +} + +export function decideMusic(f: MusicFacts): EngineReadiness { + if (f.hasHeygen) return { engine: "heygen", label: "HeyGen library", local: false, ready: true }; + if (f.lyria) return { engine: "lyria", label: "Lyria (Gemini)", local: false, ready: true }; + return { + engine: "musicgen", + label: "MusicGen", + local: true, + ready: f.musicgen, + ...(f.musicgen ? {} : { setupHint: MUSICGEN_PIP }), + }; +} + +/** Collect live voice facts. Skips Python probes when HeyGen is configured. */ +function gatherVoiceFacts(hasHeygen: boolean): VoiceFacts { + if (hasHeygen) return { hasHeygen, elevenlabs: false, kokoro: false }; + const elevenlabs = Boolean(process.env["ELEVENLABS_API_KEY"]) && hasPythonModules(["elevenlabs"]); + const kokoro = hasPythonModules(KOKORO_MODULES); + return { hasHeygen, elevenlabs, kokoro }; +} + +/** Collect live music facts. Skips Python probes when HeyGen is configured. */ +function gatherMusicFacts(hasHeygen: boolean): MusicFacts { + if (hasHeygen) return { hasHeygen, lyria: false, musicgen: false }; + const hasLyriaKey = Boolean(process.env["GEMINI_API_KEY"] || process.env["GOOGLE_API_KEY"]); + const lyria = hasLyriaKey && hasPythonModules(["google.genai"]); + const musicgen = hasPythonModules(MUSICGEN_MODULES); + return { hasHeygen, lyria, musicgen }; +} + +export function resolveVoice(hasHeygen: boolean): EngineReadiness { + return decideVoice(gatherVoiceFacts(hasHeygen)); +} + +export function resolveMusic(hasHeygen: boolean): EngineReadiness { + return decideMusic(gatherMusicFacts(hasHeygen)); +} diff --git a/packages/cli/src/commands/auth/status-guidance.ts b/packages/cli/src/commands/auth/status-guidance.ts new file mode 100644 index 0000000000..08f5244262 --- /dev/null +++ b/packages/cli/src/commands/auth/status-guidance.ts @@ -0,0 +1,105 @@ +/** + * Onboarding guidance shown by `auth status` when nothing is configured. + * + * Kept separate from `status.ts` so the wording is pure (it depends only + * on colors, not on the credential resolver / API client / system probe) + * and can be unit-tested without booting the whole CLI dependency graph. + * Environment detection lives in `status.ts`; this module only renders. + */ + +import { c } from "../../ui/colors.js"; + +export interface UnconfiguredContext { + /** A human can act on guidance now — a TTY, or a coding agent driving the CLI. */ + interactive: boolean; +} + +/** The local engine a workflow will fall back to, and whether it's ready. */ +export interface OfflineEngineLine { + capability: "voice" | "music"; + /** Engine label, e.g. "Kokoro" / "MusicGen". */ + label: string; + /** Deps installed (local) or key present (cloud) — usable right now. */ + ready: boolean; + /** How to make it ready, shown when `ready` is false. */ + setupHint?: string; +} + +/** The recommended first step; sign-in and sign-up are the same OAuth flow. */ +const RECOMMENDED_ACTION = "npx hyperframes auth login"; + +/** + * Render the "what offline will use" block from probed engine readiness. + * Falls back to a generic one-liner when readiness wasn't probed (e.g. a + * caller that didn't want to spawn Python). + */ +function offlineEngineLines(engines?: OfflineEngineLine[]): string[] { + if (!engines || engines.length === 0) { + return [ + c.dim("Prefer offline? Just continue — local engines (Kokoro · MusicGen) need no account."), + ]; + } + const lines = ["Prefer offline? Workflows will use these local engines:"]; + for (const e of engines) { + const cap = e.capability.padEnd(5); + if (e.ready) { + lines.push(` ${cap} → ${e.label} ${c.success("✓ ready")}`); + } else { + lines.push(` ${cap} → ${e.label} ${c.warn("⚠ deps missing")}`); + if (e.setupHint) lines.push(` ${c.dim(e.setupHint)}`); + } + } + if (engines.some((e) => !e.ready)) { + lines.push(c.dim(" (or run `hyperframes doctor` to check the local toolchain)")); + } + return lines; +} + +/** + * Human guidance for an unconfigured machine — registration-first. + * Both paths use `npx hyperframes` (zero-install via npm): browser OAuth + * (sign-in / sign-up) and `--api-key` both write `~/.heygen`. The separate + * `heygen` CLI shares that file but needs its own install (no `npx heygen`), + * so it's left to the docs — not dangled here as a command a fresh machine + * can't run. Names the local fallback so "no key" never reads as a failure, + * and never steers users toward a per-repo `.env`. Mirrors the + * hyperframes-media skill's Preflight section. + */ +export function buildUnconfiguredLines( + ctx: UnconfiguredContext, + engines?: OfflineEngineLine[], +): string[] { + if (!ctx.interactive) { + return [ + c.warn("Not signed in to HeyGen (non-interactive)."), + c.dim( + "Set HEYGEN_API_KEY to use HeyGen, or workflows fall back to local engines (Kokoro voice · MusicGen music).", + ), + ]; + } + return [ + c.warn("Not signed in to HeyGen — voice & music will use local engines (free, offline)."), + "", + "Sign in or sign up (browser OAuth, writes ~/.heygen — no per-repo .env):", + ` ${c.accent("npx hyperframes auth login")} ${c.dim("# browser sign-in / sign-up")}`, + "", + "Or paste an existing HeyGen API key (get one at app.heygen.com/settings/api):", + ` ${c.accent("npx hyperframes auth login --api-key")} ${c.dim("# paste at the prompt")}`, + "", + ...offlineEngineLines(engines), + ]; +} + +/** Machine-readable form of the unconfigured guidance for `--json`. */ +export function buildUnconfiguredJson( + ctx: UnconfiguredContext, + engines?: OfflineEngineLine[], +): Record { + return { + configured: false, + interactive: ctx.interactive, + recommended_action: RECOMMENDED_ACTION, + fallback: "local", + ...(engines ? { offline_engines: engines } : {}), + }; +} diff --git a/packages/cli/src/commands/auth/status.test.ts b/packages/cli/src/commands/auth/status.test.ts new file mode 100644 index 0000000000..41b44ae936 --- /dev/null +++ b/packages/cli/src/commands/auth/status.test.ts @@ -0,0 +1,120 @@ +import { describe, expect, it } from "vitest"; +import { + buildUnconfiguredJson, + buildUnconfiguredLines, + type OfflineEngineLine, + type UnconfiguredContext, +} from "./status-guidance.js"; + +const INTERACTIVE: UnconfiguredContext = { interactive: true }; +const NON_INTERACTIVE: UnconfiguredContext = { interactive: false }; + +function joined(ctx: UnconfiguredContext, engines?: OfflineEngineLine[]): string { + return buildUnconfiguredLines(ctx, engines).join("\n"); +} + +describe("buildUnconfiguredLines — interactive (TTY / agent-driven)", () => { + const text = joined(INTERACTIVE); + + it("makes browser OAuth the hyperframes path", () => { + expect(text).toContain("hyperframes auth login"); + expect(text).toMatch(/browser oauth/i); + expect(text).toMatch(/sign in or sign up/i); + }); + + it("never steers users toward a per-repo .env", () => { + // The improvised flow recommended writing keys into videos//.env; + // this guidance must actively rule that out, not suggest it. + expect(text).toContain("no per-repo .env"); + expect(text).not.toMatch(/paste keys.*\.env/i); + }); + + it("names the local fallback so 'no key' never reads as a failure", () => { + expect(text).toMatch(/Kokoro/); + expect(text).toMatch(/MusicGen/); + expect(text).toMatch(/free, offline/i); + }); + + it("shows only zero-install `npx hyperframes` paths, not the separately-installed heygen CLI", () => { + expect(text).not.toMatch(/heygen auth login/); + expect(text).toContain("npx hyperframes auth login"); + expect(text).toContain("npx hyperframes auth login --api-key"); + }); + + it("offers the --api-key path as a secondary option", () => { + expect(text).toContain("hyperframes auth login --api-key"); + }); +}); + +describe("buildUnconfiguredLines — non-interactive (CI / piped)", () => { + const lines = buildUnconfiguredLines(NON_INTERACTIVE); + const text = lines.join("\n"); + + it("is terse — two lines, no browser walkthrough", () => { + expect(lines).toHaveLength(2); + expect(text).not.toMatch(/opens your browser/i); + }); + + it("points at HEYGEN_API_KEY and the local fallback", () => { + expect(text).toContain("HEYGEN_API_KEY"); + expect(text).toMatch(/local engines/i); + }); +}); + +describe("buildUnconfiguredLines — offline engine readiness", () => { + const ready: OfflineEngineLine[] = [ + { capability: "voice", label: "Kokoro", ready: true }, + { capability: "music", label: "MusicGen", ready: true }, + ]; + const missing: OfflineEngineLine[] = [ + { capability: "voice", label: "Kokoro", ready: true }, + { + capability: "music", + label: "MusicGen", + ready: false, + setupHint: "pip install transformers torch soundfile numpy", + }, + ]; + + it("shows the resolved engine per capability when ready", () => { + const text = joined(INTERACTIVE, ready); + expect(text).toMatch(/voice .*Kokoro/); + expect(text).toMatch(/music .*MusicGen/); + expect(text).toMatch(/ready/); + }); + + it("surfaces the pip setup hint and doctor pointer when a dep is missing", () => { + const text = joined(INTERACTIVE, missing); + expect(text).toContain("pip install transformers torch soundfile numpy"); + expect(text).toMatch(/deps missing/); + expect(text).toContain("hyperframes doctor"); + }); + + it("falls back to a generic line when readiness wasn't probed", () => { + const text = joined(INTERACTIVE); + expect(text).toMatch(/Kokoro/); + expect(text).toMatch(/MusicGen/); + }); +}); + +describe("buildUnconfiguredJson", () => { + it("recommends auth login and reports the local fallback", () => { + for (const ctx of [INTERACTIVE, NON_INTERACTIVE]) { + const payload = buildUnconfiguredJson(ctx); + expect(payload).toMatchObject({ + configured: false, + interactive: ctx.interactive, + recommended_action: "npx hyperframes auth login", + fallback: "local", + }); + } + }); + + it("includes probed engines when provided", () => { + const engines: OfflineEngineLine[] = [ + { capability: "voice", label: "Kokoro", ready: true }, + { capability: "music", label: "MusicGen", ready: false, setupHint: "pip install ..." }, + ]; + expect(buildUnconfiguredJson(INTERACTIVE, engines)).toMatchObject({ offline_engines: engines }); + }); +}); diff --git a/packages/cli/src/commands/auth/status.ts b/packages/cli/src/commands/auth/status.ts index 832f6a0d07..d083b1079f 100644 --- a/packages/cli/src/commands/auth/status.ts +++ b/packages/cli/src/commands/auth/status.ts @@ -4,6 +4,14 @@ * * Exits non-zero when nothing is configured or the API rejects the * credential, so scripts can check "am I logged in?" with `$?`. + * + * When nothing is configured the output is onboarding-first: an + * interactive session (a TTY, or a coding agent driving the CLI) gets + * registration guidance led by `hyperframes auth login` — sign-in and + * sign-up are the same OAuth step — while CI / non-interactive runs get + * a terse note and continue on local fallbacks. This is the shared + * preflight every TTS/BGM workflow relays, so the wording lives in one + * place instead of each workflow improvising its own. */ import { defineCommand } from "citty"; @@ -15,7 +23,15 @@ import { type ResolvedCredential, type UserInfo, } from "../../auth/index.js"; +import { getSystemMeta } from "../../telemetry/system.js"; import { c } from "../../ui/colors.js"; +import { resolveMusic, resolveVoice } from "../../audio/providers.js"; +import { + buildUnconfiguredJson, + buildUnconfiguredLines, + type OfflineEngineLine, + type UnconfiguredContext, +} from "./status-guidance.js"; interface VerifiedStatus { credential: ResolvedCredential; @@ -54,13 +70,44 @@ export default defineCommand({ }, }); +/** + * Decide whether to show full onboarding guidance or a terse note. + * CI is never "interactive" even on a TTY; an agent runtime counts as + * interactive because a human is watching its relayed output. + */ +function detectUnconfiguredContext(): UnconfiguredContext { + const sys = getSystemMeta(); + return { interactive: !sys.is_ci && (sys.is_tty || sys.agent_runtime !== null) }; +} + +/** + * Probe the local voice/music engines a workflow would fall back to. + * `hasHeygen` is false here by construction — we only reach this when no + * credential resolved — so this reports the offline engines and whether + * their Python deps are installed. + */ +function collectOfflineEngines(): OfflineEngineLine[] { + const voice = resolveVoice(false); + const music = resolveMusic(false); + return [ + { capability: "voice", label: voice.label, ready: voice.ready, ...hint(voice.setupHint) }, + { capability: "music", label: music.label, ready: music.ready, ...hint(music.setupHint) }, + ]; +} + +function hint(setupHint: string | undefined): { setupHint?: string } { + return setupHint ? { setupHint } : {}; +} + function handleUnconfigured(asJson: boolean): never { - if (asJson) { - console.log(JSON.stringify({ configured: false })); - } else { - console.log(c.warn("Not signed in to HeyGen.")); - console.log(`Run ${c.accent("hyperframes auth login --api-key")} to sign in.`); - } + const ctx = detectUnconfiguredContext(); + // Probe engines for JSON (skills parse it) and interactive guidance; skip + // the Python probes for terse non-interactive/CI output to stay fast. + const engines = asJson || ctx.interactive ? collectOfflineEngines() : undefined; + const output = asJson + ? JSON.stringify(buildUnconfiguredJson(ctx, engines)) + : buildUnconfiguredLines(ctx, engines).join("\n"); + console.log(output); process.exit(1); } diff --git a/packages/cli/src/commands/doctor.ts b/packages/cli/src/commands/doctor.ts index 6619b164d8..f3fe14822f 100644 --- a/packages/cli/src/commands/doctor.ts +++ b/packages/cli/src/commands/doctor.ts @@ -5,6 +5,8 @@ import { platform } from "node:os"; import type { Example } from "./_examples.js"; import { c } from "../ui/colors.js"; import { parseToolVersion, runEnvironmentChecks } from "../browser/preflight.js"; +import { KOKORO_MODULES, KOKORO_PIP, MUSICGEN_MODULES, MUSICGEN_PIP } from "../audio/providers.js"; +import { hasPythonModules } from "../tts/python.js"; import { VERSION } from "../version.js"; import { getUpdateMeta, withMeta } from "../utils/updateCheck.js"; import { @@ -158,6 +160,24 @@ async function checkWhisper(): Promise { }; } +function checkLocalVoice(): CheckResult { + if (hasPythonModules(KOKORO_MODULES)) return { ok: true, detail: "Kokoro deps installed" }; + return { + ok: false, + detail: "Not installed (optional \u2014 local voice fallback)", + hint: KOKORO_PIP, + }; +} + +function checkLocalMusic(): CheckResult { + if (hasPythonModules(MUSICGEN_MODULES)) return { ok: true, detail: "MusicGen deps installed" }; + return { + ok: false, + detail: "Not installed (optional \u2014 local music fallback)", + hint: MUSICGEN_PIP, + }; +} + export interface CheckOutcome { name: string; ok: boolean; @@ -227,6 +247,8 @@ export default defineCommand({ checks.push({ name: "Environment", run: checkEnvironment }); checks.push({ name: "whisper-cpp", run: checkWhisper }); + checks.push({ name: "TTS (Kokoro)", run: checkLocalVoice }); + checks.push({ name: "BGM (MusicGen)", run: checkLocalMusic }); const outcomes: CheckOutcome[] = []; for (const check of checks) { diff --git a/packages/cli/src/tts/python.ts b/packages/cli/src/tts/python.ts new file mode 100644 index 0000000000..13e6111d40 --- /dev/null +++ b/packages/cli/src/tts/python.ts @@ -0,0 +1,74 @@ +/** + * Shared Python-runtime probes. Used by Kokoro synthesis (which must + * actually `import` a module before using it) and by the `auth status` / + * `doctor` readiness checks (which only need to know whether a module is + * installed, cheaply, without paying the cost of importing heavy packages + * like torch). + */ + +import { execFileSync } from "node:child_process"; + +/** Locate a `python3` (or `python`) on PATH that reports as Python 3. */ +export function findPython(): string | undefined { + for (const name of ["python3", "python"]) { + try { + const cmd = process.platform === "win32" ? "where" : "which"; + const output = execFileSync(cmd, [name], { + encoding: "utf-8", + stdio: ["pipe", "pipe", "pipe"], + timeout: 5000, + }); + const first = output + .split(/\r?\n/) + .map((s) => s.trim()) + .find(Boolean); + if (!first) continue; + + // Verify it's Python 3 + const version = execFileSync(first, ["--version"], { + encoding: "utf-8", + stdio: ["pipe", "pipe", "pipe"], + timeout: 5000, + }).trim(); + + if (version.includes("Python 3")) return first; + } catch { + // not found or not Python 3 + } + } + return undefined; +} + +/** True if `import ` succeeds — actually executes the module. */ +export function hasPythonPackage(python: string, pkg: string): boolean { + try { + execFileSync(python, ["-c", `import ${pkg}`], { + stdio: ["pipe", "pipe", "pipe"], + timeout: 10_000, + }); + return true; + } catch { + return false; + } +} + +/** + * True if every module is installed, checked via `importlib.util.find_spec` + * so heavy packages (torch) are never imported — fast enough for a preflight. + * Returns false when no Python 3 is found. + */ +export function hasPythonModules(modules: string[]): boolean { + const python = findPython(); + if (!python) return false; + const list = JSON.stringify(modules); + const probe = `import importlib.util,sys; sys.exit(0 if all(importlib.util.find_spec(m) for m in ${list}) else 1)`; + try { + execFileSync(python, ["-c", probe], { + stdio: ["pipe", "pipe", "pipe"], + timeout: 10_000, + }); + return true; + } catch { + return false; + } +} diff --git a/packages/cli/src/tts/synthesize.ts b/packages/cli/src/tts/synthesize.ts index 829417914d..cc887b81f2 100644 --- a/packages/cli/src/tts/synthesize.ts +++ b/packages/cli/src/tts/synthesize.ts @@ -1,3 +1,4 @@ +// fallow-ignore-file complexity import { execFileSync } from "node:child_process"; import { existsSync, writeFileSync, mkdirSync, readdirSync, unlinkSync } from "node:fs"; import { join, dirname, basename } from "node:path"; @@ -9,52 +10,7 @@ import { inferLangFromVoiceId, type SupportedLang, } from "./manager.js"; - -// --------------------------------------------------------------------------- -// Python runtime detection -// --------------------------------------------------------------------------- - -function findPython(): string | undefined { - for (const name of ["python3", "python"]) { - try { - const cmd = process.platform === "win32" ? "where" : "which"; - const output = execFileSync(cmd, [name], { - encoding: "utf-8", - stdio: ["pipe", "pipe", "pipe"], - timeout: 5000, - }); - const first = output - .split(/\r?\n/) - .map((s) => s.trim()) - .find(Boolean); - if (!first) continue; - - // Verify it's Python 3 - const version = execFileSync(first, ["--version"], { - encoding: "utf-8", - stdio: ["pipe", "pipe", "pipe"], - timeout: 5000, - }).trim(); - - if (version.includes("Python 3")) return first; - } catch { - // not found or not Python 3 - } - } - return undefined; -} - -function hasPythonPackage(python: string, pkg: string): boolean { - try { - execFileSync(python, ["-c", `import ${pkg}`], { - stdio: ["pipe", "pipe", "pipe"], - timeout: 10_000, - }); - return true; - } catch { - return false; - } -} +import { findPython, hasPythonPackage } from "./python.js"; // --------------------------------------------------------------------------- // Inline Python script for Kokoro synthesis diff --git a/packages/cli/src/utils/lintProject.test.ts b/packages/cli/src/utils/lintProject.test.ts index e31601aac9..aec22d15e2 100644 --- a/packages/cli/src/utils/lintProject.test.ts +++ b/packages/cli/src/utils/lintProject.test.ts @@ -906,6 +906,19 @@ describe("multiple_root_compositions", () => { expect(finding).toBeUndefined(); }); + it("ignores root-level caption-skin.html source files", async () => { + const project = makeProject(validHtml()); + writeFileSync( + join(project.dir, "caption-skin.html"), + '
', + ); + const { results } = await lintProject(project); + const finding = results[0]?.result.findings.find( + (f) => f.code === "multiple_root_compositions", + ); + expect(finding).toBeUndefined(); + }); + it("ignores HTML files without data-composition-id", async () => { const project = makeProject(validHtml()); writeFileSync(join(project.dir, "readme.html"), "Not a composition"); diff --git a/packages/cli/src/utils/lintProject.ts b/packages/cli/src/utils/lintProject.ts index 2fe61902ec..8558c852bf 100644 --- a/packages/cli/src/utils/lintProject.ts +++ b/packages/cli/src/utils/lintProject.ts @@ -477,6 +477,7 @@ function lintMultipleRootCompositions(projectDir: string): HyperframeLintFinding const rootHtmlFiles = readdirSync(projectDir).filter((f) => f.endsWith(".html")); const rootCompositions: string[] = []; for (const file of rootHtmlFiles) { + if (file === "caption-skin.html") continue; const content = readFileSync(join(projectDir, file), "utf-8"); if (/data-composition-id/i.test(content)) { rootCompositions.push(file); diff --git a/packages/core/src/lint/context.ts b/packages/core/src/lint/context.ts index 9de887df56..7ee0d75164 100644 --- a/packages/core/src/lint/context.ts +++ b/packages/core/src/lint/context.ts @@ -5,6 +5,7 @@ import { findRootTag, collectCompositionIds, readAttr, + stripHtmlComments, STYLE_BLOCK_PATTERN, SCRIPT_BLOCK_PATTERN, } from "./utils"; @@ -29,7 +30,10 @@ export type { HyperframeLintFinding }; export function buildLintContext(html: string, options: HyperframeLinterOptions = {}): LintContext { const rawSource = html || ""; - let source = rawSource; + // Strip HTML comments before scanning so a commented-out