diff --git a/src/specify_cli/_utils.py b/src/specify_cli/_utils.py index 30c59f553a..d921e591d9 100644 --- a/src/specify_cli/_utils.py +++ b/src/specify_cli/_utils.py @@ -9,7 +9,7 @@ import subprocess import tempfile import yaml -from pathlib import Path +from pathlib import Path, PurePosixPath, PureWindowsPath from typing import Any from ._console import console @@ -17,6 +17,44 @@ CLAUDE_NPM_LOCAL_PATH = Path.home() / ".claude" / "local" / "node_modules" / ".bin" / "claude" +def relative_extension_path_violation(value: Any) -> str | None: + """Return why ``value`` is unsafe as an extension-relative ``file`` path. + + Single source of truth for the path-safety policy shared by + ``ExtensionManifest._validate()`` (manifest-load validation) and + ``CommandRegistrar.register_commands()`` (runtime guard), so the two cannot + drift. Returns a human-readable reason string when ``value`` is unsafe, or + ``None`` when it is an acceptable relative path within the extension + directory. + + Policy: the value must be a non-empty string with no leading/trailing + whitespace, no absolute/anchored form, and no ``..`` traversal. The value is + evaluated under both POSIX and Windows path semantics because a native + ``Path`` is OS-dependent (a ``PurePosixPath`` on POSIX does not interpret + Windows drive/UNC forms, and ``C:foo`` is anchored but not ``is_absolute()`` + yet resolves against the CWD on its drive). Rejecting any non-empty anchor + covers POSIX-absolute (``/abs``), Windows drive-relative (``C:foo``), Windows + absolute (``C:\\foo``), and UNC/rooted forms. + """ + if not isinstance(value, str) or not value: + return "must be a non-empty string" + if value.strip() != value: + return "must not have leading or trailing whitespace" + posix_path = PurePosixPath(value) + win_path = PureWindowsPath(value) + if ( + posix_path.anchor + or win_path.anchor + or ".." in posix_path.parts + or ".." in win_path.parts + ): + return ( + "must be a relative path within the extension directory " + "(no absolute paths, drive letters, or '..' segments)" + ) + return None + + def dump_frontmatter(data: dict[str, Any]) -> str: """Serialize skill/command frontmatter to a YAML string. diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index 3c06418014..881517b427 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -16,6 +16,7 @@ import yaml from ._init_options import is_ai_skills_enabled, load_init_options +from ._utils import relative_extension_path_violation def _build_agent_configs() -> dict[str, Any]: @@ -540,17 +541,42 @@ def register_commands( registered = [] is_cline_ext = agent_name == "cline" and source_id != "core" + source_root = source_dir.resolve() for cmd_info in commands: cmd_name = cmd_info["name"] aliases = cmd_info.get("aliases", []) cmd_file = cmd_info["file"] - source_file = source_dir / cmd_file - if not source_file.exists(): + # Guard against path traversal using the single shared policy in + # relative_extension_path_violation(), so the runtime guard stays + # aligned with ExtensionManifest._validate() and the skill/preset + # readers. Skip a malformed/unsafe ``file`` (non-string, empty, + # whitespace, absolute/anchored, or ``..`` traversal); the + # resolve()/relative_to() check below is the final containment + # backstop. + if relative_extension_path_violation(cmd_file): + continue + try: + source_file = (source_root / cmd_file).resolve() + source_file.relative_to(source_root) # raises ValueError if outside + except (OSError, ValueError): + continue + + if not source_file.is_file(): continue - content = source_file.read_text(encoding="utf-8") + try: + content = source_file.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError) as exc: + import warnings + + warnings.warn( + f"Skipping command '{cmd_name}': could not read source file " + f"'{cmd_file}' ({exc.__class__.__name__}: {exc}).", + stacklevel=2, + ) + continue frontmatter, body = self.parse_frontmatter(content) if frontmatter.get("strategy") == "wrap": diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 42ba2fe888..03bfa6c4b7 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -28,7 +28,7 @@ from ._init_options import is_ai_skills_enabled from ._invocation_style import is_slash_skills_agent -from ._utils import dump_frontmatter +from ._utils import dump_frontmatter, relative_extension_path_violation from .catalogs import CatalogEntry as BaseCatalogEntry from .catalogs import CatalogStackBase @@ -290,6 +290,18 @@ def _validate(self): if "name" not in cmd or "file" not in cmd: raise ValidationError("Command missing 'name' or 'file'") + # Validate the 'file' field at manifest-load time using the single + # shared policy in relative_extension_path_violation(), so manifest + # validation cannot drift from the runtime registrar guard. This is + # defense-in-depth: the command/skill/preset readers also contain + # the resolved path, but rejecting an unsafe value here surfaces a + # clear error instead of silently skipping the command. + cmd_file = cmd["file"] + reason = relative_extension_path_violation(cmd_file) + if reason: + label = repr(cmd_file) if isinstance(cmd_file, str) else f"for command '{cmd.get('name')}'" + raise ValidationError(f"Invalid command 'file' {label}: {reason}") + # Validate command name format if not EXTENSION_COMMAND_NAME_PATTERN.match(cmd["name"]): corrected = self._try_correct_command_name(cmd["name"], ext["id"]) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index e063571b14..9cf0167ce5 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -377,6 +377,40 @@ def test_invalid_command_name(self, temp_dir, valid_manifest_data): with pytest.raises(ValidationError, match="Invalid command name"): ExtensionManifest(manifest_path) + @pytest.mark.parametrize( + "bad_file", + ["../../../outside.md", "../escape.md", "a/../../escape.md", "/abs/outside.md", "C:escape.md", "C:\\Windows\\x.md", "..\\..\\escape.md"], + ) + def test_command_file_traversal_rejected(self, temp_dir, valid_manifest_data, bad_file): + """Manifest 'file' field with traversal/absolute path raises ValidationError. + + Defense-in-depth for GHSA-w5fv-7w9x-7fc5. + """ + import yaml + + valid_manifest_data["provides"]["commands"][0]["file"] = bad_file + + manifest_path = temp_dir / "extension.yml" + with open(manifest_path, 'w', encoding='utf-8') as f: + yaml.safe_dump(valid_manifest_data, f) + + with pytest.raises(ValidationError, match="Invalid command 'file'"): + ExtensionManifest(manifest_path) + + @pytest.mark.parametrize("bad_file", [" commands/hello.md", "commands/hello.md ", "\tcommands/hello.md"]) + def test_command_file_whitespace_rejected(self, temp_dir, valid_manifest_data, bad_file): + """Manifest 'file' with leading/trailing whitespace raises ValidationError.""" + import yaml + + valid_manifest_data["provides"]["commands"][0]["file"] = bad_file + + manifest_path = temp_dir / "extension.yml" + with open(manifest_path, 'w', encoding='utf-8') as f: + yaml.safe_dump(valid_manifest_data, f) + + with pytest.raises(ValidationError, match="leading or trailing whitespace"): + ExtensionManifest(manifest_path) + def test_command_name_autocorrect_speckit_prefix(self, temp_dir, valid_manifest_data): """Test that 'speckit.command' is auto-corrected to 'speckit.{ext_id}.command'.""" import yaml diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index fc423b4056..1f97a98d87 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -6,6 +6,7 @@ import pytest from specify_cli.agents import CommandRegistrar +from specify_cli._utils import relative_extension_path_violation TRAVERSAL_PAYLOADS = [ @@ -135,8 +136,185 @@ def test_rejects_traversal_names(self, tmp_path, bad_name): _assert_no_stray_files(tmp_path, Path(bad_name).name.replace("/", "")) -class TestSafeRegistration: - """Positive regression — well-formed names continue to register.""" +ABS_OUTSIDE = "__ABS_OUTSIDE__" + +FILE_FIELD_PAYLOADS = [ + "../outside.txt", + "../../outside.txt", + "commands/../../outside.txt", + "C:outside.txt", + ABS_OUTSIDE, +] + + +def _resolve_payload(bad_file: str, outside_file: Path) -> str: + """Map the absolute-path sentinel to the real, existing outside file. + + Using the temp file's own absolute path (instead of ``/etc/passwd``) + guarantees the file exists on every platform — so the test fails if the + absolute-path guard regresses, rather than passing because the target + happens not to exist (e.g. on Windows runners). + """ + return str(outside_file) if bad_file == ABS_OUTSIDE else bad_file + + +def _assert_no_marker_leak(project: Path, marker: str) -> None: + """Fail if ``marker`` content was written into any file under ``project``.""" + leaked = [ + p for p in project.rglob("*") + if p.is_file() and marker in p.read_text(encoding="utf-8", errors="ignore") + ] + assert leaked == [], f"Outside file leaked into generated command: {leaked}" + + +class TestCommandFileTraversal: + """The manifest ``file`` field must not read files outside source_dir. + + Regression for GHSA-w5fv-7w9x-7fc5: ``register_commands`` read + ``source_dir / cmd_file`` with no containment check, so a manifest with + a traversal (``file: ../../../outside.txt``) or an absolute path read an + arbitrary host file verbatim into the generated agent command. + """ + + @pytest.mark.parametrize("bad_file", FILE_FIELD_PAYLOADS) + def test_claude_skips_traversal_in_file_field(self, tmp_path, bad_file): + project, ext_dir = _project_and_source(tmp_path) + (project / ".claude" / "skills").mkdir(parents=True) + + outside_file = tmp_path / "outside.txt" + outside_file.write_text("OUTSIDE-FILE-MARKER", encoding="utf-8") + + registrar = CommandRegistrar() + registered = registrar.register_commands( + "claude", + [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, outside_file), "aliases": []}], + "myext", + ext_dir, + project, + ) + + assert registered == [] + _assert_no_marker_leak(project, "OUTSIDE-FILE-MARKER") + + @pytest.mark.parametrize("bad_file", FILE_FIELD_PAYLOADS) + def test_gemini_skips_traversal_in_file_field(self, tmp_path, bad_file): + project, ext_dir = _project_and_source(tmp_path) + (project / ".gemini" / "commands").mkdir(parents=True) + + outside_file = tmp_path / "outside.txt" + outside_file.write_text("OUTSIDE-FILE-MARKER", encoding="utf-8") + + registrar = CommandRegistrar() + registered = registrar.register_commands( + "gemini", + [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, outside_file), "aliases": []}], + "myext", + ext_dir, + project, + ) + + assert registered == [] + _assert_no_marker_leak(project, "OUTSIDE-FILE-MARKER") + + @pytest.mark.parametrize("bad_value", [None, 123, "", ["x"]]) + def test_non_string_file_is_skipped(self, tmp_path, bad_value): + """A non-string/empty ``file`` must be skipped, not raise TypeError.""" + project, ext_dir = _project_and_source(tmp_path) + (project / ".gemini" / "commands").mkdir(parents=True) + + registrar = CommandRegistrar() + registered = registrar.register_commands( + "gemini", + [{"name": "speckit.myext.hello", "file": bad_value, "aliases": []}], + "myext", + ext_dir, + project, + ) + + assert registered == [] + + def test_dotdot_rejected_even_when_target_is_in_bounds(self, tmp_path): + """An in-bounds ``..`` payload is rejected by the ``..`` check itself. + + ``commands/../cmd.md`` resolves to ``ext_dir/cmd.md`` — inside + source_dir — so the resolve()/relative_to() containment backstop would + allow it. Creating that target file ensures the command is skipped + because of the ``..`` rejection, not merely because the file is absent. + """ + project, ext_dir = _project_and_source(tmp_path) + (project / ".gemini" / "commands").mkdir(parents=True) + (ext_dir / "cmd.md").write_text( + "---\ndescription: test\n---\n\nbody\n", encoding="utf-8" + ) + + registrar = CommandRegistrar() + registered = registrar.register_commands( + "gemini", + [{"name": "speckit.myext.hello", "file": "commands/../cmd.md", "aliases": []}], + "myext", + ext_dir, + project, + ) + + assert registered == [] + + +class TestRelativeExtensionPathPolicy: + """Unit tests for the shared ``relative_extension_path_violation`` policy.""" + + @pytest.mark.parametrize( + "value", + [ + "commands/hello.md", + "hello.md", + "a/b/c/hello.md", + ], + ) + def test_safe_relative_paths_have_no_violation(self, value): + assert relative_extension_path_violation(value) is None + + @pytest.mark.parametrize( + "value", + [ + None, + 123, + ["x"], + "", + " ", + " hello.md", + "hello.md ", + "/abs/outside.md", + "/etc/passwd", + "C:foo.md", + "C:\\Windows\\system32", + "\\\\server\\share\\x.md", + "../escape.md", + "commands/../../escape.md", + ], + ) + def test_unsafe_values_report_violation(self, value): + assert relative_extension_path_violation(value) is not None + + +class TestReadSkipWarning: + """Unregisterable but in-bounds files warn instead of failing silently.""" + + def test_unreadable_target_warns_and_skips(self, tmp_path): + project, ext_dir = _project_and_source(tmp_path) + (project / ".gemini" / "commands").mkdir(parents=True) + (ext_dir / "cmd.md").write_bytes(b"\xff\xfe\x00\x80bad") + + registrar = CommandRegistrar() + with pytest.warns(UserWarning): + registered = registrar.register_commands( + "gemini", + [{"name": "speckit.myext.hello", "file": "cmd.md", "aliases": []}], + "myext", + ext_dir, + project, + ) + + assert registered == [] def test_symlinked_subdir_under_commands_dir_is_preserved(self, tmp_path): """Lexical check must not block legitimately symlinked sub-directories.