diff --git a/src/server/main.py b/src/server/main.py index 7f4bb4b..36c2967 100644 --- a/src/server/main.py +++ b/src/server/main.py @@ -117,5 +117,17 @@ async def global_exception_handler(request: Request, exc: Exception): ) +@app.get("/readyz") +async def readyz(): + """Readiness probe: 200 when every model that should be loaded is loaded. + + Intentionally unauthenticated so orchestrators (e.g. Kubernetes) can probe + it without credentials. + """ + result = await _registry.readiness() + status_code = 200 if result["ready"] else 503 + return JSONResponse(status_code=status_code, content=result) + + app.include_router(openarc_router) app.include_router(openai_router) diff --git a/src/server/model_registry.py b/src/server/model_registry.py index f6824dc..9ddceb5 100644 --- a/src/server/model_registry.py +++ b/src/server/model_registry.py @@ -7,7 +7,7 @@ import uuid from dataclasses import dataclass, field from datetime import datetime -from typing import Any, Awaitable, Callable, Dict, List, Optional +from typing import Any, Awaitable, Callable, Dict, List, Optional, Set from src.server.models.registration import ( EngineType, @@ -59,6 +59,12 @@ class ModelRegistry: def __init__(self): self._models: Dict[str, ModelRecord] = {} self._lock = asyncio.Lock() + # Names of models that *should* be loaded for the server to be ready. + # A model joins this set once it has successfully loaded and leaves it + # only when an administrator explicitly unloads it. A model that drops + # out of self._models for any other reason (e.g. an error-triggered + # unload) stays here so readiness reports the server as not ready. + self._expected_models: Set[str] = set() # Event subscribers self._on_loaded: List[Callable[[ModelRecord], Awaitable[None]]] = [] self._on_unloaded: List[Callable[[ModelRecord], Awaitable[None]]] = [] @@ -121,19 +127,35 @@ async def register_load(self, loader: ModelLoadConfig) -> str: return record.model_id - async def register_unload(self, model_name: str) -> bool: - """Unregister/unload a model by model_name. Returns True if found and unload task started.""" + async def register_unload(self, model_name: str, administrative: bool = False) -> bool: + """Unregister/unload a model by model_name. Returns True if found and unload task started. + + Args: + model_name: Name of the model to unload. + administrative: True when an operator explicitly requested the + unload (e.g. via the API), as opposed to an internal + error-triggered unload. An administrative unload removes the + model from the readiness expectation set so it is no longer + required for the server to be considered ready. + """ async with self._lock: + # An explicit unload means the operator no longer wants this model + # loaded, so stop requiring it for readiness. Do this regardless of + # whether the model is still present, so an operator can clear a + # model that already dropped out due to an earlier error. + if administrative: + self._expected_models.discard(model_name) + # Find model_id by model_name model_id = None for mid, record in self._models.items(): if record.model_name == model_name: model_id = mid break - + if model_id is None: return False - + # Start background unload task asyncio.create_task(self._unload_task(model_id)) return True @@ -151,6 +173,9 @@ async def _load_task(self, model_id: str, load_config: ModelLoadConfig) -> None: record.model_instance = model_instance record.status = ModelStatus.LOADED record.loading_task = None + # The model is now serving, so it is expected to remain + # loaded for readiness purposes. + self._expected_models.add(record.model_name) else: return @@ -220,6 +245,30 @@ async def status(self) -> dict: "openai_model_names": [record.model_name for record in self._models.values()], } + async def readiness(self) -> dict: + """Return readiness: ready only when every expected model is loaded. + + A model is "expected" once it has successfully loaded and until it is + administratively unloaded. The server is ready when there is at least + one expected model and all expected models currently have a LOADED + record. Any expected model that is missing or not yet LOADED (e.g. + unloaded due to an error, or still loading) makes the server not ready, + as does having no models expected at all. + """ + async with self._lock: + loaded = { + record.model_name + for record in self._models.values() + if record.status == ModelStatus.LOADED + } + expected = set(self._expected_models) + missing = sorted(expected - loaded) + return { + "ready": bool(expected) and not missing, + "expected_models": sorted(expected), + "missing_models": missing, + } + # Registry mapping (engine, model_type) to model class paths MODEL_CLASS_REGISTRY = { (EngineType.OV_GENAI, ModelType.LLM): "src.engine.ov_genai.llm.OVGenAI_LLM", diff --git a/src/server/routes/openarc.py b/src/server/routes/openarc.py index 15861de..b23d48e 100644 --- a/src/server/routes/openarc.py +++ b/src/server/routes/openarc.py @@ -172,7 +172,7 @@ async def load_model(load_config: ModelLoadConfig): @router.post("/unload", dependencies=[Depends(verify_api_key)]) async def unload_model(unload_config: ModelUnloadConfig): try: - success = await _registry.register_unload(unload_config.model_name) + success = await _registry.register_unload(unload_config.model_name, administrative=True) if success: return {"model_name": unload_config.model_name, "status": "unloading"} else: diff --git a/tests/unit/test_readiness_unit.py b/tests/unit/test_readiness_unit.py new file mode 100644 index 0000000..f905bde --- /dev/null +++ b/tests/unit/test_readiness_unit.py @@ -0,0 +1,241 @@ +import asyncio +from types import SimpleNamespace + +import pytest # type: ignore[import] + +import src.server.model_registry as registry_module +from src.server.model_registry import ModelRecord, ModelRegistry +from src.server.models.registration import ( + EngineType, + ModelLoadConfig, + ModelStatus, + ModelType, +) + + +def _sample_load_config(name: str = "mock-model") -> ModelLoadConfig: + return ModelLoadConfig( + model_path="/models/mock", + model_name=name, + model_type=ModelType.LLM, + engine=EngineType.OV_GENAI, + device="CPU", + runtime_config={}, + ) + + +def _patch_factory(monkeypatch: pytest.MonkeyPatch) -> None: + async def _noop_unload(*_args, **_kwargs): + return None + + async def fake_create(_config): # type: ignore[override] + return SimpleNamespace(unload_model=_noop_unload) + + monkeypatch.setattr(registry_module, "create_model_instance", fake_create) + + +# --- readiness() logic ------------------------------------------------------- + + +def test_readiness_empty_registry_is_not_ready() -> None: + """No models expected at all means the server is not ready.""" + registry = ModelRegistry() + + result = asyncio.run(registry.readiness()) + + assert result == { + "ready": False, + "expected_models": [], + "missing_models": [], + } + + +def test_readiness_ready_when_expected_model_loaded(monkeypatch: pytest.MonkeyPatch) -> None: + _patch_factory(monkeypatch) + registry = ModelRegistry() + + async def _run(): + await registry.register_load(_sample_load_config("a")) + return await registry.readiness() + + result = asyncio.run(_run()) + + assert result["ready"] is True + assert result["expected_models"] == ["a"] + assert result["missing_models"] == [] + + +def test_readiness_not_ready_when_an_expected_model_missing(monkeypatch: pytest.MonkeyPatch) -> None: + """A model expected but only present as a non-LOADED record is missing.""" + _patch_factory(monkeypatch) + registry = ModelRegistry() + + async def _run(): + await registry.register_load(_sample_load_config("a")) + await registry.register_load(_sample_load_config("b")) + # Simulate 'b' degrading without leaving the registry (e.g. mid-reload). + async with registry._lock: + for record in registry._models.values(): + if record.model_name == "b": + record.status = ModelStatus.FAILED + return await registry.readiness() + + result = asyncio.run(_run()) + + assert result["ready"] is False + assert result["expected_models"] == ["a", "b"] + assert result["missing_models"] == ["b"] + + +# --- expected-set lifecycle via (un)load ------------------------------------ + + +def test_error_unload_keeps_model_expected(monkeypatch: pytest.MonkeyPatch) -> None: + """A non-administrative unload (e.g. worker error) leaves the model expected.""" + _patch_factory(monkeypatch) + registry = ModelRegistry() + + async def _run(): + await registry.register_load(_sample_load_config("a")) + # Default administrative=False mirrors the worker error path. + await registry.register_unload("a") + await asyncio.sleep(0) + await asyncio.sleep(0) + return await registry.readiness() + + result = asyncio.run(_run()) + + # Model dropped out of the registry but is still required -> not ready. + assert result["ready"] is False + assert result["expected_models"] == ["a"] + assert result["missing_models"] == ["a"] + + +def test_administrative_unload_drops_model_from_expected(monkeypatch: pytest.MonkeyPatch) -> None: + _patch_factory(monkeypatch) + registry = ModelRegistry() + + async def _run(): + await registry.register_load(_sample_load_config("a")) + await registry.register_load(_sample_load_config("b")) + await registry.register_unload("a", administrative=True) + await asyncio.sleep(0) + await asyncio.sleep(0) + return await registry.readiness() + + result = asyncio.run(_run()) + + # 'a' is no longer required; 'b' is still loaded -> ready. + assert result["ready"] is True + assert result["expected_models"] == ["b"] + assert result["missing_models"] == [] + + +def test_administrative_unload_of_last_model_is_not_ready(monkeypatch: pytest.MonkeyPatch) -> None: + _patch_factory(monkeypatch) + registry = ModelRegistry() + + async def _run(): + await registry.register_load(_sample_load_config("a")) + await registry.register_unload("a", administrative=True) + await asyncio.sleep(0) + await asyncio.sleep(0) + return await registry.readiness() + + result = asyncio.run(_run()) + + assert result["ready"] is False + assert result["expected_models"] == [] + + +def test_administrative_unload_clears_expectation_for_absent_model() -> None: + """An operator can clear a stale expectation even after the record is gone.""" + registry = ModelRegistry() + + async def _run(): + # Model errored out earlier: still expected, but no longer in _models. + registry._expected_models.add("ghost") + before = await registry.readiness() + # Explicit unload returns False (not present) but must clear expectation. + result = await registry.register_unload("ghost", administrative=True) + after = await registry.readiness() + return before, result, after + + before, result, after = asyncio.run(_run()) + + assert before["missing_models"] == ["ghost"] + assert result is False + assert after["expected_models"] == [] + + +def test_reload_after_administrative_unload_restores_expectation(monkeypatch: pytest.MonkeyPatch) -> None: + _patch_factory(monkeypatch) + registry = ModelRegistry() + + async def _run(): + await registry.register_load(_sample_load_config("a")) + await registry.register_unload("a", administrative=True) + await asyncio.sleep(0) + await asyncio.sleep(0) + await registry.register_load(_sample_load_config("a")) + return await registry.readiness() + + result = asyncio.run(_run()) + + assert result["ready"] is True + assert result["expected_models"] == ["a"] + + +# --- /readyz endpoint -------------------------------------------------------- + + +def _make_loaded_record(name: str) -> ModelRecord: + return ModelRecord( + model_name=name, + model_type="llm", + engine="ov_genai", + device="CPU", + status=ModelStatus.LOADED, + ) + + +def test_readyz_endpoint_status_codes() -> None: + from starlette.testclient import TestClient + + from src.server import deps + from src.server.main import app + + registry = deps._registry + # Snapshot and isolate the shared singleton's state for this test. + saved_models = dict(registry._models) + saved_expected = set(registry._expected_models) + registry._models.clear() + registry._expected_models.clear() + try: + with TestClient(app) as client: + # No models -> not ready. + resp = client.get("/readyz") + assert resp.status_code == 503 + assert resp.json()["ready"] is False + + # One loaded + expected model -> ready. + record = _make_loaded_record("a") + registry._models[record.model_id] = record + registry._expected_models.add("a") + + resp = client.get("/readyz") + assert resp.status_code == 200 + body = resp.json() + assert body["ready"] is True + assert body["expected_models"] == ["a"] + + # Expected model drops out (error unload) -> not ready again. + registry._models.clear() + resp = client.get("/readyz") + assert resp.status_code == 503 + assert resp.json()["missing_models"] == ["a"] + finally: + registry._models.clear() + registry._models.update(saved_models) + registry._expected_models.clear() + registry._expected_models.update(saved_expected)