From 90ebaafef4ece66df8b8586e052157a9d57ffb09 Mon Sep 17 00:00:00 2001 From: Alex Macdonald-Smith Date: Fri, 8 May 2026 00:46:56 -0400 Subject: [PATCH 1/2] feat: opt-in publish to looptech-ai/understand-quickly registry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a `format=graph` choice and an opt-in `publish` flag to the existing `/export/wiki` endpoint, plus a small `api/publish.py` helper, so a generated DeepWiki can land in the public knowledge-graph registry at `looptech-ai/understand-quickly` with no extra infrastructure. - `format=graph` emits a `generic@1`-shaped graph (pages -> nodes, `relatedPages` -> edges) with `metadata.{tool, tool_version, generated_at, repo_url}`. Existing `markdown` / `json` exports are unchanged. - `publish=true` (default false) fires a `repository_dispatch` `sync-entry` event at the registry, gated on `UNDERSTAND_QUICKLY_TOKEN` in the server env. With the token unset the graph is still produced and the dispatch is skipped — no network call, no failure. - Owner/repo derives from `repo_url` (HTTPS + SSH GitHub shapes) with an explicit `repo: "owner/repo"` override on the request. - `api/publish.py` is stdlib-only (`urllib`, `subprocess`, `re`, `json`); no new dependencies. 15 unit tests cover URL parsing, graph shape, dangling-edge handling, no-op paths, and a mocked dispatch request + soft failure on HTTP error. - README has a short opt-in section under "API Server Details". Protocol reference: https://github.com/looptech-ai/understand-quickly/blob/main/docs/integrations/protocol.md --- README.md | 18 +++ api/api.py | 62 ++++++++++- api/publish.py | 256 +++++++++++++++++++++++++++++++++++++++++++ test/test_publish.py | 222 +++++++++++++++++++++++++++++++++++++ 4 files changed, 553 insertions(+), 5 deletions(-) create mode 100644 api/publish.py create mode 100644 test/test_publish.py diff --git a/README.md b/README.md index 1c017aed8..0c9c0f925 100644 --- a/README.md +++ b/README.md @@ -555,6 +555,24 @@ The API server provides: For more details, see the [API README](./api/README.md). +### Publishing to understand-quickly (opt-in) + +DeepWiki can emit its generated wiki as a [`generic@1`](https://github.com/looptech-ai/understand-quickly/blob/main/schemas/generic@1.json) knowledge graph and (optionally) register it with [`looptech-ai/understand-quickly`](https://github.com/looptech-ai/understand-quickly), a public registry of code-knowledge graphs that ships an MCP server and a stable `registry.json` API. + +```bash +# Existing markdown / json export — unchanged. +curl -X POST http://localhost:8001/export/wiki \ + -H "content-type: application/json" \ + -d '{"repo_url":"https://github.com/owner/repo","format":"json","pages":[...]}' > wiki.json + +# New: emit the knowledge graph and (optionally) ping the registry. +curl -X POST http://localhost:8001/export/wiki \ + -H "content-type: application/json" \ + -d '{"repo_url":"https://github.com/owner/repo","format":"graph","publish":true,"pages":[...]}' > graph.json +``` + +Set `UNDERSTAND_QUICKLY_TOKEN` in the API server env (a fine-grained PAT with `Repository dispatches: write` on `looptech-ai/understand-quickly` only) to enable the dispatch step. With the token unset, `format=graph` still emits the file — the dispatch is simply skipped. See the [integration protocol](https://github.com/looptech-ai/understand-quickly/blob/main/docs/integrations/protocol.md) for the full contract. + ## 🔌 OpenRouter Integration DeepWiki now supports [OpenRouter](https://openrouter.ai/) as a model provider, giving you access to hundreds of AI models through a single API: diff --git a/api/api.py b/api/api.py index d40e73f96..492c64eca 100644 --- a/api/api.py +++ b/api/api.py @@ -115,7 +115,31 @@ class WikiExportRequest(BaseModel): """ repo_url: str = Field(..., description="URL of the repository") pages: List[WikiPage] = Field(..., description="List of wiki pages to export") - format: Literal["markdown", "json"] = Field(..., description="Export format (markdown or json)") + format: Literal["markdown", "json", "graph"] = Field( + ..., + description=( + "Export format. 'markdown' / 'json' are the existing wiki dumps; " + "'graph' emits a generic@1 knowledge graph for the " + "looptech-ai/understand-quickly registry." + ), + ) + publish: bool = Field( + False, + description=( + "If true, after producing the export also fire a " + "repository_dispatch event at looptech-ai/understand-quickly " + "so the registry resyncs the entry. Opt-in; requires " + "UNDERSTAND_QUICKLY_TOKEN in the server env. No-ops cleanly " + "if the token is missing." + ), + ) + repo: Optional[str] = Field( + None, + description=( + "Optional 'owner/repo' override for the registry id. If " + "omitted, derived from `repo_url`." + ), + ) # --- Model Configuration Models --- class Model(BaseModel): @@ -227,7 +251,7 @@ async def get_model_config(): @app.post("/export/wiki") async def export_wiki(request: WikiExportRequest): """ - Export wiki content as Markdown or JSON. + Export wiki content as Markdown, JSON, or a knowledge graph. Args: request: The export request containing wiki pages and format @@ -245,11 +269,40 @@ async def export_wiki(request: WikiExportRequest): # Get current timestamp for the filename timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + publish_status: Optional[Dict[str, Any]] = None + headers: Dict[str, str] = {} + if request.format == "markdown": # Generate Markdown content content = generate_markdown_export(request.repo_url, request.pages) filename = f"{repo_name}_wiki_{timestamp}.md" media_type = "text/markdown" + elif request.format == "graph": + # generic@1 knowledge graph for looptech-ai/understand-quickly. + from api.publish import ( + build_graph_payload, + derive_owner_repo, + publish as publish_to_registry, + ) + + payload = build_graph_payload( + [page.model_dump() for page in request.pages], + repo_url=request.repo_url, + ) + content = json.dumps(payload, indent=2) + filename = f"{repo_name}_graph_{timestamp}.json" + media_type = "application/json" + + if request.publish: + owner_repo = request.repo or derive_owner_repo(request.repo_url) + publish_status = publish_to_registry(payload, owner_repo=owner_repo) + headers["X-Understand-Quickly-Dispatched"] = ( + "true" if publish_status.get("dispatched") else "false" + ) + if publish_status.get("reason"): + headers["X-Understand-Quickly-Reason"] = str( + publish_status["reason"] + ) else: # JSON format # Generate JSON content content = generate_json_export(request.repo_url, request.pages) @@ -257,12 +310,11 @@ async def export_wiki(request: WikiExportRequest): media_type = "application/json" # Create response with appropriate headers for file download + headers["Content-Disposition"] = f"attachment; filename={filename}" response = Response( content=content, media_type=media_type, - headers={ - "Content-Disposition": f"attachment; filename={filename}" - } + headers=headers, ) return response diff --git a/api/publish.py b/api/publish.py new file mode 100644 index 000000000..6823c325d --- /dev/null +++ b/api/publish.py @@ -0,0 +1,256 @@ +""" +Opt-in publishing helpers for the looptech-ai/understand-quickly registry. + +This module is self-contained. It uses the Python stdlib only (no extra +dependencies beyond what the rest of the API already pulls in) and is +imported lazily from ``api.api`` so that an unused publish path costs +nothing at import time. + +The contract is documented at: + https://github.com/looptech-ai/understand-quickly/blob/main/docs/integrations/protocol.md + +DeepWiki emits a wiki graph in the ``generic@1`` format: pages are nodes +(``kind="wiki-page"``), and each ``relatedPages`` reference becomes an +edge (``kind="related"``). +""" + +from __future__ import annotations + +import json +import logging +import os +import re +import subprocess +from datetime import datetime, timezone +from typing import Any, Dict, Iterable, Mapping, Optional, Tuple +from urllib import error as urllib_error +from urllib import request as urllib_request + +logger = logging.getLogger(__name__) + +TOOL_NAME = "deepwiki-open" +DEFAULT_TOOL_VERSION = "1.0.0" +DISPATCH_URL = ( + "https://api.github.com/repos/looptech-ai/understand-quickly/dispatches" +) + + +def derive_owner_repo(remote_url: Optional[str]) -> Optional[str]: + """ + Parse a GitHub remote URL and return ``owner/repo``. + + Handles both HTTPS (``https://github.com/owner/repo(.git)``) and SSH + (``git@github.com:owner/repo(.git)``) shapes. Returns ``None`` for + anything we don't recognise — callers are expected to fall back to + an explicit ``owner_repo`` argument or no-op. + """ + if not remote_url: + return None + url = remote_url.strip() + # SSH: git@github.com:owner/repo(.git) + m = re.match(r"git@github\.com:([^/]+)/([^/]+?)(?:\.git)?/?$", url) + if m: + return f"{m.group(1)}/{m.group(2)}" + # HTTPS / git: https://github.com/owner/repo(.git) + m = re.match( + r"^(?:https?|git)://github\.com/([^/]+)/([^/]+?)(?:\.git)?/?$", url + ) + if m: + return f"{m.group(1)}/{m.group(2)}" + return None + + +def git_head_sha(repo_path: Optional[str] = None) -> Optional[str]: + """ + Return the 40-hex SHA of HEAD in ``repo_path`` (or cwd), or ``None`` + if not a git checkout / git is unavailable. + """ + try: + result = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=repo_path or None, + capture_output=True, + text=True, + timeout=5, + check=False, + ) + except (OSError, subprocess.SubprocessError) as exc: # pragma: no cover + logger.debug("git rev-parse failed: %s", exc) + return None + if result.returncode != 0: + return None + sha = result.stdout.strip() + if re.fullmatch(r"[0-9a-f]{40}", sha): + return sha + return None + + +def build_graph_payload( + pages: Iterable[Mapping[str, Any]], + *, + repo_url: Optional[str] = None, + tool_version: str = DEFAULT_TOOL_VERSION, + commit: Optional[str] = None, + generated_at: Optional[str] = None, +) -> Dict[str, Any]: + """ + Build a ``generic@1``-shaped graph from a list of WikiPage-like dicts. + + Each page becomes a node; each ``relatedPages`` reference becomes a + directed edge ``page -> related_page`` with ``kind="related"``. File + paths attached to a page are surfaced under ``data.filePaths`` so + downstream tools can map nodes back to source files. + """ + nodes = [] + edges = [] + page_ids = set() + + pages_list = list(pages) + for page in pages_list: + page_ids.add(page.get("id")) + + for page in pages_list: + page_id = page.get("id") + if not page_id: + continue + node: Dict[str, Any] = { + "id": page_id, + "kind": "wiki-page", + "label": page.get("title", page_id), + } + data: Dict[str, Any] = {} + file_paths = page.get("filePaths") or [] + if file_paths: + data["filePaths"] = list(file_paths) + importance = page.get("importance") + if importance: + data["importance"] = importance + if data: + node["data"] = data + nodes.append(node) + + for related_id in page.get("relatedPages") or []: + # Skip dangling refs so the graph stays internally consistent. + if related_id not in page_ids: + continue + edges.append( + { + "source": page_id, + "target": related_id, + "kind": "related", + } + ) + + metadata: Dict[str, Any] = { + "tool": TOOL_NAME, + "tool_version": tool_version, + "generated_at": generated_at + or datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + } + if commit: + metadata["commit"] = commit + if repo_url: + metadata["repo_url"] = repo_url + + return {"nodes": nodes, "edges": edges, "metadata": metadata} + + +def dispatch_sync( + id_: str, + token: str, + *, + url: str = DISPATCH_URL, + timeout: float = 10.0, +) -> Tuple[bool, Optional[str]]: + """ + Fire a ``repository_dispatch`` ``sync-entry`` event at the registry. + + Returns ``(ok, error_message)``. Network / HTTP errors are caught and + surfaced as a soft failure — the caller is expected to keep going. + """ + body = json.dumps( + {"event_type": "sync-entry", "client_payload": {"id": id_}} + ).encode("utf-8") + req = urllib_request.Request( + url, + data=body, + method="POST", + headers={ + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {token}", + "X-GitHub-Api-Version": "2022-11-28", + "Content-Type": "application/json", + "User-Agent": f"{TOOL_NAME}/{DEFAULT_TOOL_VERSION}", + }, + ) + try: + with urllib_request.urlopen(req, timeout=timeout) as resp: + status = getattr(resp, "status", 0) or resp.getcode() + if 200 <= status < 300: + return True, None + return False, f"unexpected status {status}" + except urllib_error.HTTPError as exc: + return False, f"HTTP {exc.code}: {exc.reason}" + except urllib_error.URLError as exc: + return False, f"network error: {exc.reason}" + except Exception as exc: # pragma: no cover - defensive + return False, str(exc) + + +def publish( + payload: Mapping[str, Any], + *, + owner_repo: Optional[str] = None, + token: Optional[str] = None, +) -> Dict[str, Any]: + """ + Best-effort publish path. + + Always returns a small status dict. Never raises — callers can wire + this in next to a normal export and trust that a failure here will + not knock over the parent request. + + ``payload`` is the full graph dict (used here only for log lines / + sanity). ``owner_repo`` is an explicit ``owner/repo`` to register + against; if omitted, no dispatch is attempted. + """ + token = token or os.environ.get("UNDERSTAND_QUICKLY_TOKEN") + if not token: + msg = ( + "UNDERSTAND_QUICKLY_TOKEN not set; skipping repository_dispatch. " + "The graph was still produced — register your repo with " + "`npx @understand-quickly/cli add` and the nightly sync will " + "pick it up." + ) + logger.info("[understand-quickly] %s", msg) + return {"dispatched": False, "reason": "no-token", "message": msg} + + if not owner_repo: + msg = ( + "owner/repo could not be determined; skipping dispatch. " + "Pass `repo` explicitly or set the git remote." + ) + logger.info("[understand-quickly] %s", msg) + return {"dispatched": False, "reason": "no-owner-repo", "message": msg} + + ok, err = dispatch_sync(owner_repo, token) + if ok: + logger.info( + "[understand-quickly] dispatched sync-entry for %s", owner_repo + ) + return {"dispatched": True, "id": owner_repo} + + msg = ( + f"dispatch failed for {owner_repo}: {err}. " + "If this repo is not yet in the registry, register it with " + "`npx @understand-quickly/cli add` or the wizard at " + "https://looptech-ai.github.io/understand-quickly/add.html." + ) + logger.warning("[understand-quickly] %s", msg) + return { + "dispatched": False, + "reason": "dispatch-failed", + "id": owner_repo, + "error": err, + "message": msg, + } diff --git a/test/test_publish.py b/test/test_publish.py new file mode 100644 index 000000000..244b39f80 --- /dev/null +++ b/test/test_publish.py @@ -0,0 +1,222 @@ +""" +Unit tests for the opt-in publish path used to register graphs with the +looptech-ai/understand-quickly registry. + +These tests are deliberately self-contained: they import only ``api.publish`` +(stdlib-only dependencies) and never reach for the heavier ``data_pipeline`` +imports, so they run without ``adalflow`` or any AI provider keys. +""" + +from __future__ import annotations + +import json +import os +import re +import sys +from unittest.mock import patch +from urllib import error as urllib_error + +# Make ``api.publish`` importable when this file is run directly. +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +from api.publish import ( # noqa: E402 (path tweak above is intentional) + DISPATCH_URL, + TOOL_NAME, + build_graph_payload, + derive_owner_repo, + dispatch_sync, + publish, +) + + +SAMPLE_PAGES = [ + { + "id": "page-overview", + "title": "Overview", + "content": "Top-level intro.", + "filePaths": ["README.md"], + "importance": "high", + "relatedPages": ["page-architecture"], + }, + { + "id": "page-architecture", + "title": "Architecture", + "content": "Architecture details.", + "filePaths": ["api/api.py", "api/data_pipeline.py"], + "importance": "medium", + "relatedPages": ["page-overview", "page-missing"], # 1 dangling + }, +] + + +class TestDeriveOwnerRepo: + def test_https_url(self): + assert ( + derive_owner_repo("https://github.com/AsyncFuncAI/deepwiki-open") + == "AsyncFuncAI/deepwiki-open" + ) + + def test_https_url_with_git_suffix(self): + assert ( + derive_owner_repo( + "https://github.com/AsyncFuncAI/deepwiki-open.git" + ) + == "AsyncFuncAI/deepwiki-open" + ) + + def test_https_url_with_trailing_slash(self): + assert ( + derive_owner_repo("https://github.com/AsyncFuncAI/deepwiki-open/") + == "AsyncFuncAI/deepwiki-open" + ) + + def test_ssh_url(self): + assert ( + derive_owner_repo("git@github.com:AsyncFuncAI/deepwiki-open.git") + == "AsyncFuncAI/deepwiki-open" + ) + + def test_returns_none_for_unrecognised(self): + assert derive_owner_repo(None) is None + assert derive_owner_repo("") is None + assert derive_owner_repo("https://gitlab.com/owner/repo") is None + assert derive_owner_repo("not a url") is None + + +class TestBuildGraphPayload: + def test_basic_shape_is_generic_v1_compatible(self): + payload = build_graph_payload(SAMPLE_PAGES, repo_url="https://example/x") + assert set(payload.keys()) >= {"nodes", "edges", "metadata"} + assert isinstance(payload["nodes"], list) + assert isinstance(payload["edges"], list) + assert len(payload["nodes"]) == 2 + + def test_metadata_fields(self): + payload = build_graph_payload( + SAMPLE_PAGES, + repo_url="https://github.com/AsyncFuncAI/deepwiki-open", + tool_version="1.2.3", + commit="a" * 40, + ) + meta = payload["metadata"] + assert meta["tool"] == TOOL_NAME == "deepwiki-open" + assert meta["tool_version"] == "1.2.3" + # ISO-8601 UTC, ending in Z + assert re.match( + r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$", meta["generated_at"] + ) + assert meta["commit"] == "a" * 40 + assert meta["repo_url"] == "https://github.com/AsyncFuncAI/deepwiki-open" + + def test_metadata_omits_commit_when_unknown(self): + payload = build_graph_payload(SAMPLE_PAGES) + assert "commit" not in payload["metadata"] + + def test_nodes_carry_label_and_filepaths(self): + payload = build_graph_payload(SAMPLE_PAGES) + nodes = {n["id"]: n for n in payload["nodes"]} + assert nodes["page-overview"]["kind"] == "wiki-page" + assert nodes["page-overview"]["label"] == "Overview" + assert nodes["page-overview"]["data"]["filePaths"] == ["README.md"] + assert nodes["page-overview"]["data"]["importance"] == "high" + + def test_dangling_related_refs_are_dropped(self): + payload = build_graph_payload(SAMPLE_PAGES) + edge_pairs = {(e["source"], e["target"]) for e in payload["edges"]} + assert ("page-overview", "page-architecture") in edge_pairs + assert ("page-architecture", "page-overview") in edge_pairs + # The dangling "page-missing" reference must NOT produce an edge. + assert all(t != "page-missing" for _, t in edge_pairs) + assert all(e["kind"] == "related" for e in payload["edges"]) + + +class TestPublishNoOpPaths: + def test_no_token_no_op(self): + env = {k: v for k, v in os.environ.items() if k != "UNDERSTAND_QUICKLY_TOKEN"} + with patch.dict(os.environ, env, clear=True): + with patch("api.publish.urllib_request.urlopen") as mocked: + result = publish( + {"nodes": [], "edges": []}, + owner_repo="looptech-ai/understand-quickly", + ) + assert result["dispatched"] is False + assert result["reason"] == "no-token" + mocked.assert_not_called() + + def test_no_owner_repo_no_op(self): + with patch.dict(os.environ, {"UNDERSTAND_QUICKLY_TOKEN": "x"}): + with patch("api.publish.urllib_request.urlopen") as mocked: + result = publish({"nodes": [], "edges": []}, owner_repo=None) + assert result["dispatched"] is False + assert result["reason"] == "no-owner-repo" + mocked.assert_not_called() + + +class _FakeResponse: + def __init__(self, status: int = 204): + self.status = status + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def getcode(self): + return self.status + + +class TestDispatchSync: + def test_success_sends_correct_request(self): + captured = {} + + def fake_urlopen(req, timeout=None): + captured["url"] = req.full_url + captured["method"] = req.get_method() + captured["headers"] = {k.lower(): v for k, v in req.header_items()} + captured["body"] = json.loads(req.data.decode("utf-8")) + return _FakeResponse(204) + + with patch("api.publish.urllib_request.urlopen", side_effect=fake_urlopen): + ok, err = dispatch_sync("looptech-ai/understand-quickly", "tok-abc") + + assert ok is True + assert err is None + assert captured["url"] == DISPATCH_URL + assert captured["method"] == "POST" + assert captured["headers"]["authorization"] == "Bearer tok-abc" + assert captured["headers"]["accept"] == "application/vnd.github+json" + assert captured["headers"]["x-github-api-version"] == "2022-11-28" + assert captured["body"] == { + "event_type": "sync-entry", + "client_payload": {"id": "looptech-ai/understand-quickly"}, + } + + def test_http_error_is_soft_failure(self): + def boom(req, timeout=None): + raise urllib_error.HTTPError( + req.full_url, 422, "Unprocessable Entity", {}, None + ) + + with patch("api.publish.urllib_request.urlopen", side_effect=boom): + ok, err = dispatch_sync("owner/unknown", "tok") + + assert ok is False + assert err is not None and "422" in err + + def test_publish_dispatch_failure_returns_dispatch_failed(self): + def boom(req, timeout=None): + raise urllib_error.HTTPError( + req.full_url, 422, "Unprocessable Entity", {}, None + ) + + with patch.dict(os.environ, {"UNDERSTAND_QUICKLY_TOKEN": "tok"}): + with patch("api.publish.urllib_request.urlopen", side_effect=boom): + result = publish( + {"nodes": [], "edges": []}, + owner_repo="owner/unknown", + ) + + assert result["dispatched"] is False + assert result["reason"] == "dispatch-failed" + assert "npx @understand-quickly/cli add" in result["message"] From 5d699570a7ffdcdf55ce273ae389fddf9fb0dd85 Mon Sep 17 00:00:00 2001 From: amacsmith Date: Sun, 10 May 2026 04:26:44 -0400 Subject: [PATCH 2/2] fix(api): address PR review on graph export + publish flow - Wrap blocking publish_to_registry() in asyncio.to_thread so the /export/wiki async endpoint does not stall the FastAPI event loop while urllib does network I/O (Copilot HIGH, Gemini HIGH). - Reject WikiExportRequest.repo overrides that disagree with derive_owner_repo(repo_url). Without this, an unauthenticated caller could trigger sync-entry dispatches for any registry id once UNDERSTAND_QUICKLY_TOKEN is configured (Copilot HIGH). - Add WikiExportRequest.commit and thread it (or git_head_sha() as a best-effort fallback) into build_graph_payload so the metadata.commit field actually populates instead of leaving the git_head_sha helper as dead code (Gemini MEDIUM x3). Existing publish unit tests still pass (15/15). --- api/api.py | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/api/api.py b/api/api.py index 492c64eca..fd70c2896 100644 --- a/api/api.py +++ b/api/api.py @@ -137,7 +137,18 @@ class WikiExportRequest(BaseModel): None, description=( "Optional 'owner/repo' override for the registry id. If " - "omitted, derived from `repo_url`." + "omitted, derived from `repo_url`. Must match the owner/repo " + "implied by `repo_url` when set — mismatches are rejected to " + "prevent dispatching syncs for unrelated entries." + ), + ) + commit: Optional[str] = Field( + None, + description=( + "Optional 40-hex git commit SHA to embed in the graph " + "metadata. If omitted, the server attempts to resolve HEAD " + "from its local checkout (best-effort) and otherwise leaves " + "`metadata.commit` unset." ), ) @@ -282,20 +293,41 @@ async def export_wiki(request: WikiExportRequest): from api.publish import ( build_graph_payload, derive_owner_repo, + git_head_sha, publish as publish_to_registry, ) + derived_owner_repo = derive_owner_repo(request.repo_url) + commit = request.commit or git_head_sha() + payload = build_graph_payload( [page.model_dump() for page in request.pages], repo_url=request.repo_url, + commit=commit, ) content = json.dumps(payload, indent=2) filename = f"{repo_name}_graph_{timestamp}.json" media_type = "application/json" if request.publish: - owner_repo = request.repo or derive_owner_repo(request.repo_url) - publish_status = publish_to_registry(payload, owner_repo=owner_repo) + # Reject explicit `repo` overrides that don't match the + # derived owner/repo from `repo_url`. Without this an + # unauthenticated client could trigger a sync-entry + # dispatch for any registry id once the server is + # configured with a token. + if request.repo and derived_owner_repo and request.repo != derived_owner_repo: + raise HTTPException( + status_code=400, + detail=( + f"`repo` override ({request.repo!r}) does not " + f"match owner/repo derived from `repo_url` " + f"({derived_owner_repo!r})." + ), + ) + owner_repo = request.repo or derived_owner_repo + publish_status = await asyncio.to_thread( + publish_to_registry, payload, owner_repo=owner_repo + ) headers["X-Understand-Quickly-Dispatched"] = ( "true" if publish_status.get("dispatched") else "false" )