Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,5 +117,17 @@ async def global_exception_handler(request: Request, exc: Exception):
)


@app.get("/readyz")
async def readyz():
"""Readiness probe: 200 when every model that should be loaded is loaded.

Intentionally unauthenticated so orchestrators (e.g. Kubernetes) can probe
it without credentials.
"""
result = await _registry.readiness()
status_code = 200 if result["ready"] else 503
return JSONResponse(status_code=status_code, content=result)


app.include_router(openarc_router)
app.include_router(openai_router)
59 changes: 54 additions & 5 deletions src/server/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import uuid
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Awaitable, Callable, Dict, List, Optional
from typing import Any, Awaitable, Callable, Dict, List, Optional, Set

from src.server.models.registration import (
EngineType,
Expand Down Expand Up @@ -59,6 +59,12 @@ class ModelRegistry:
def __init__(self):
self._models: Dict[str, ModelRecord] = {}
self._lock = asyncio.Lock()
# Names of models that *should* be loaded for the server to be ready.
# A model joins this set once it has successfully loaded and leaves it
# only when an administrator explicitly unloads it. A model that drops
# out of self._models for any other reason (e.g. an error-triggered
# unload) stays here so readiness reports the server as not ready.
self._expected_models: Set[str] = set()
# Event subscribers
self._on_loaded: List[Callable[[ModelRecord], Awaitable[None]]] = []
self._on_unloaded: List[Callable[[ModelRecord], Awaitable[None]]] = []
Expand Down Expand Up @@ -121,19 +127,35 @@ async def register_load(self, loader: ModelLoadConfig) -> str:

return record.model_id

async def register_unload(self, model_name: str) -> bool:
"""Unregister/unload a model by model_name. Returns True if found and unload task started."""
async def register_unload(self, model_name: str, administrative: bool = False) -> bool:
"""Unregister/unload a model by model_name. Returns True if found and unload task started.

Args:
model_name: Name of the model to unload.
administrative: True when an operator explicitly requested the
unload (e.g. via the API), as opposed to an internal
error-triggered unload. An administrative unload removes the
model from the readiness expectation set so it is no longer
required for the server to be considered ready.
"""
async with self._lock:
# An explicit unload means the operator no longer wants this model
# loaded, so stop requiring it for readiness. Do this regardless of
# whether the model is still present, so an operator can clear a
# model that already dropped out due to an earlier error.
if administrative:
self._expected_models.discard(model_name)

# Find model_id by model_name
model_id = None
for mid, record in self._models.items():
if record.model_name == model_name:
model_id = mid
break

if model_id is None:
return False

# Start background unload task
asyncio.create_task(self._unload_task(model_id))
return True
Expand All @@ -151,6 +173,9 @@ async def _load_task(self, model_id: str, load_config: ModelLoadConfig) -> None:
record.model_instance = model_instance
record.status = ModelStatus.LOADED
record.loading_task = None
# The model is now serving, so it is expected to remain
# loaded for readiness purposes.
self._expected_models.add(record.model_name)
else:
return

Expand Down Expand Up @@ -220,6 +245,30 @@ async def status(self) -> dict:
"openai_model_names": [record.model_name for record in self._models.values()],
}

async def readiness(self) -> dict:
"""Return readiness: ready only when every expected model is loaded.

A model is "expected" once it has successfully loaded and until it is
administratively unloaded. The server is ready when there is at least
one expected model and all expected models currently have a LOADED
record. Any expected model that is missing or not yet LOADED (e.g.
unloaded due to an error, or still loading) makes the server not ready,
as does having no models expected at all.
"""
async with self._lock:
loaded = {
record.model_name
for record in self._models.values()
if record.status == ModelStatus.LOADED
}
expected = set(self._expected_models)
missing = sorted(expected - loaded)
return {
"ready": bool(expected) and not missing,
"expected_models": sorted(expected),
"missing_models": missing,
}

# Registry mapping (engine, model_type) to model class paths
MODEL_CLASS_REGISTRY = {
(EngineType.OV_GENAI, ModelType.LLM): "src.engine.ov_genai.llm.OVGenAI_LLM",
Expand Down
2 changes: 1 addition & 1 deletion src/server/routes/openarc.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ async def load_model(load_config: ModelLoadConfig):
@router.post("/unload", dependencies=[Depends(verify_api_key)])
async def unload_model(unload_config: ModelUnloadConfig):
try:
success = await _registry.register_unload(unload_config.model_name)
success = await _registry.register_unload(unload_config.model_name, administrative=True)
if success:
return {"model_name": unload_config.model_name, "status": "unloading"}
else:
Expand Down
241 changes: 241 additions & 0 deletions tests/unit/test_readiness_unit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
import asyncio
from types import SimpleNamespace

import pytest # type: ignore[import]

import src.server.model_registry as registry_module
from src.server.model_registry import ModelRecord, ModelRegistry
from src.server.models.registration import (
EngineType,
ModelLoadConfig,
ModelStatus,
ModelType,
)


def _sample_load_config(name: str = "mock-model") -> ModelLoadConfig:
return ModelLoadConfig(
model_path="/models/mock",
model_name=name,
model_type=ModelType.LLM,
engine=EngineType.OV_GENAI,
device="CPU",
runtime_config={},
)


def _patch_factory(monkeypatch: pytest.MonkeyPatch) -> None:
async def _noop_unload(*_args, **_kwargs):
return None

async def fake_create(_config): # type: ignore[override]
return SimpleNamespace(unload_model=_noop_unload)

monkeypatch.setattr(registry_module, "create_model_instance", fake_create)


# --- readiness() logic -------------------------------------------------------


def test_readiness_empty_registry_is_not_ready() -> None:
"""No models expected at all means the server is not ready."""
registry = ModelRegistry()

result = asyncio.run(registry.readiness())

assert result == {
"ready": False,
"expected_models": [],
"missing_models": [],
}


def test_readiness_ready_when_expected_model_loaded(monkeypatch: pytest.MonkeyPatch) -> None:
_patch_factory(monkeypatch)
registry = ModelRegistry()

async def _run():
await registry.register_load(_sample_load_config("a"))
return await registry.readiness()

result = asyncio.run(_run())

assert result["ready"] is True
assert result["expected_models"] == ["a"]
assert result["missing_models"] == []


def test_readiness_not_ready_when_an_expected_model_missing(monkeypatch: pytest.MonkeyPatch) -> None:
"""A model expected but only present as a non-LOADED record is missing."""
_patch_factory(monkeypatch)
registry = ModelRegistry()

async def _run():
await registry.register_load(_sample_load_config("a"))
await registry.register_load(_sample_load_config("b"))
# Simulate 'b' degrading without leaving the registry (e.g. mid-reload).
async with registry._lock:
for record in registry._models.values():
if record.model_name == "b":
record.status = ModelStatus.FAILED
return await registry.readiness()

result = asyncio.run(_run())

assert result["ready"] is False
assert result["expected_models"] == ["a", "b"]
assert result["missing_models"] == ["b"]


# --- expected-set lifecycle via (un)load ------------------------------------


def test_error_unload_keeps_model_expected(monkeypatch: pytest.MonkeyPatch) -> None:
"""A non-administrative unload (e.g. worker error) leaves the model expected."""
_patch_factory(monkeypatch)
registry = ModelRegistry()

async def _run():
await registry.register_load(_sample_load_config("a"))
# Default administrative=False mirrors the worker error path.
await registry.register_unload("a")
await asyncio.sleep(0)
await asyncio.sleep(0)
return await registry.readiness()

result = asyncio.run(_run())

# Model dropped out of the registry but is still required -> not ready.
assert result["ready"] is False
assert result["expected_models"] == ["a"]
assert result["missing_models"] == ["a"]


def test_administrative_unload_drops_model_from_expected(monkeypatch: pytest.MonkeyPatch) -> None:
_patch_factory(monkeypatch)
registry = ModelRegistry()

async def _run():
await registry.register_load(_sample_load_config("a"))
await registry.register_load(_sample_load_config("b"))
await registry.register_unload("a", administrative=True)
await asyncio.sleep(0)
await asyncio.sleep(0)
return await registry.readiness()

result = asyncio.run(_run())

# 'a' is no longer required; 'b' is still loaded -> ready.
assert result["ready"] is True
assert result["expected_models"] == ["b"]
assert result["missing_models"] == []


def test_administrative_unload_of_last_model_is_not_ready(monkeypatch: pytest.MonkeyPatch) -> None:
_patch_factory(monkeypatch)
registry = ModelRegistry()

async def _run():
await registry.register_load(_sample_load_config("a"))
await registry.register_unload("a", administrative=True)
await asyncio.sleep(0)
await asyncio.sleep(0)
return await registry.readiness()

result = asyncio.run(_run())

assert result["ready"] is False
assert result["expected_models"] == []


def test_administrative_unload_clears_expectation_for_absent_model() -> None:
"""An operator can clear a stale expectation even after the record is gone."""
registry = ModelRegistry()

async def _run():
# Model errored out earlier: still expected, but no longer in _models.
registry._expected_models.add("ghost")
before = await registry.readiness()
# Explicit unload returns False (not present) but must clear expectation.
result = await registry.register_unload("ghost", administrative=True)
after = await registry.readiness()
return before, result, after

before, result, after = asyncio.run(_run())

assert before["missing_models"] == ["ghost"]
assert result is False
assert after["expected_models"] == []


def test_reload_after_administrative_unload_restores_expectation(monkeypatch: pytest.MonkeyPatch) -> None:
_patch_factory(monkeypatch)
registry = ModelRegistry()

async def _run():
await registry.register_load(_sample_load_config("a"))
await registry.register_unload("a", administrative=True)
await asyncio.sleep(0)
await asyncio.sleep(0)
await registry.register_load(_sample_load_config("a"))
return await registry.readiness()

result = asyncio.run(_run())

assert result["ready"] is True
assert result["expected_models"] == ["a"]


# --- /readyz endpoint --------------------------------------------------------


def _make_loaded_record(name: str) -> ModelRecord:
return ModelRecord(
model_name=name,
model_type="llm",
engine="ov_genai",
device="CPU",
status=ModelStatus.LOADED,
)


def test_readyz_endpoint_status_codes() -> None:
from starlette.testclient import TestClient

from src.server import deps
from src.server.main import app

registry = deps._registry
# Snapshot and isolate the shared singleton's state for this test.
saved_models = dict(registry._models)
saved_expected = set(registry._expected_models)
registry._models.clear()
registry._expected_models.clear()
try:
with TestClient(app) as client:
# No models -> not ready.
resp = client.get("/readyz")
assert resp.status_code == 503
assert resp.json()["ready"] is False

# One loaded + expected model -> ready.
record = _make_loaded_record("a")
registry._models[record.model_id] = record
registry._expected_models.add("a")

resp = client.get("/readyz")
assert resp.status_code == 200
body = resp.json()
assert body["ready"] is True
assert body["expected_models"] == ["a"]

# Expected model drops out (error unload) -> not ready again.
registry._models.clear()
resp = client.get("/readyz")
assert resp.status_code == 503
assert resp.json()["missing_models"] == ["a"]
finally:
registry._models.clear()
registry._models.update(saved_models)
registry._expected_models.clear()
registry._expected_models.update(saved_expected)