Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 216 additions & 0 deletions backend/maint-scripts/update_recipes_and_title_flavours.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
#!/usr/bin/env python3
"""Maintenance script to populate CMS database with recipes and title flavours from
zimfarm API.

This script is idempotent and can be run multiple times without creating duplicates.
It
- Finds all recipes on Zimfarm API
- Creates missing recipes on CMS database
- Fetches all the successful tasks for the recipe
- For each task
- Find a title with the same name from the tasks' file information
- Creates a title flavour for the title and associates it with the recipe
- If existing flavour differs from current recipe, warning messages are logged

Environment variables required:
- ZIMFARM_API_URL: URL of Zimfarm API to fetch recipes and tasks from
"""

import os
from dataclasses import dataclass
from json import JSONDecodeError
from typing import Any
from uuid import UUID

import requests # pyright: ignore[reportMissingModuleSource]
from sqlalchemy.orm import Session as OrmSession

from cms_backend import logger
from cms_backend.db import Session
from cms_backend.db.flavour import get_title_flavour_or_none
from cms_backend.db.models import TitleFlavour, ZimfarmRecipe
from cms_backend.db.title import get_title_by_name_or_none
from cms_backend.db.zimfarm_recipe import get_zimfarm_recipe_by_id_or_none
from cms_backend.utils.zim import get_missing_keys


@dataclass
class Response:
"""A response from the webapi"""

status_code: int
success: bool
json: dict[str, Any]


def query_api(
url: str,
method: str = "get",
*,
headers: dict[str, Any] | None = None,
payload: dict[str, Any] | None = None,
params: dict[str, Any] | None = None,
timeout: int = 30,
) -> Response:
req_headers: dict[str, Any] = {}

req_headers.update( # pyright: ignore[reportUnknownMemberType]
headers if headers else {}
)
func = {
"GET": requests.get,
"POST": requests.post,
"PATCH": requests.patch,
"DELETE": requests.delete,
"PUT": requests.put,
}.get(method.upper(), requests.get)

resp = None
try:
resp = func(
url, headers=req_headers, json=payload, params=params, timeout=timeout
)
return Response(
status_code=resp.status_code,
success=resp.ok,
json=resp.json() if resp.text and resp.text.strip() else {},
)
except (JSONDecodeError, Exception) as exc:
logger.exception(
f"unexpected error while making request to {url} : "
f"{resp.text if resp else exc}"
)
return Response(
status_code=resp.status_code if resp else -1,
success=resp.ok if resp else False,
json={},
)


def process_task(
session: OrmSession,
*,
task: dict[str, Any],
recipe: ZimfarmRecipe,
zimfarm_api_url: str,
):
response = query_api(f"{zimfarm_api_url}/tasks/{task['id']}")
if not response.success:
logger.error(f"Unable to fetch task {task['id']} from {zimfarm_api_url}")
return

for filename in response.json.get("files", {}):
metadata = response.json["files"][filename].get("info", {}).get("metadata", {})
missing_keys = get_missing_keys(metadata, "Name")
if missing_keys:
logger.warning(
f"Task {task['id']} metadata is missing keys: {','.join(missing_keys)}"
)
continue

title = get_title_by_name_or_none(session, name=metadata["Name"])
if title is None:
logger.debug(
f"Title with name '{metadata['Name']}' from task {task['id']} "
"does not yet exist on CMS"
)
continue

flavour = metadata.get("Flavour")
if not flavour:
logger.debug(f"Task {task['id']} has no flavour")
continue

tf = get_title_flavour_or_none(session, title.id, flavour)
if tf:
logger.debug(
f"Title flavour '{tf.flavour}' already exists for title '{title.name}'"
)
if tf.recipe_id != recipe.id:
logger.warning(
f"Title flavour '{tf.flavour}' for title '{title.name}' is "
f"attached to a different recipe from zimfarm '{recipe.name}'"
)
else:
tf = TitleFlavour(flavour=flavour)
tf.title = title
tf.recipe = recipe
session.add(tf)
session.flush()
logger.info(f"Created title flavour '{flavour}' for title '{title.name}'")


def process_recipe(session: OrmSession, zf_recipe: ZimfarmRecipe, zimfarm_api_url: str):
skip = 0
limit = 50
while True:
response = query_api(
f"{zimfarm_api_url}/tasks",
params={
"skip": skip,
"limit": limit,
"status": ["succeeded"],
"recipe_id": zf_recipe.id,
"sort_criteria": "done",
},
)
if not response.success:
logger.error(
f"Unable to process tasks for recipe {zf_recipe.name}: {response.json}"
)
break
tasks = response.json["items"]
if len(tasks) == 0:
logger.info(f"No more tasks to process for recipe {zf_recipe.name}")
break

for task in tasks:
with session.begin_nested():
process_task(
session,
task=task,
recipe=zf_recipe,
zimfarm_api_url=zimfarm_api_url,
)
skip += limit


def populate_recipes_from_zimfarm(session: OrmSession, zimfarm_api_url: str):
"""Fetch recipes from zimfarm and attach CMS titles/title flavours to recipes."""
skip = 0
limit = 50

while True:
response = query_api(
f"{zimfarm_api_url}/recipes", params={"skip": skip, "limit": limit}
)
if not response.success:
logger.error(
f"Unable to fetch recipes from {zimfarm_api_url}: {response.json}. "
"Exiting..."
)
break
recipes = response.json["items"]
if len(recipes) == 0:
logger.info(f"No more recipes returned from {zimfarm_api_url}")
break
for recipe in recipes:
zf_recipe = get_zimfarm_recipe_by_id_or_none(session, UUID(recipe["id"]))
if zf_recipe is None:
zf_recipe = ZimfarmRecipe(id=UUID(recipe["id"]), name=recipe["name"])
session.add(zf_recipe)
session.flush()
logger.info(f"Created zimfarm recipe '{zf_recipe.name}'")
process_recipe(session, zf_recipe, zimfarm_api_url)
skip += limit


def main():

zimfarm_api_url = os.getenv("ZIMFARM_API_URL", "https://api.farm.openzim.org/v2")
with Session.begin() as session:
populate_recipes_from_zimfarm(session, zimfarm_api_url)


if __name__ == "__main__":
main()
90 changes: 0 additions & 90 deletions backend/maint-scripts/update_title_flavours_from_books.py

This file was deleted.

1 change: 1 addition & 0 deletions backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ dependencies = [
"Werkzeug == 3.1.5",
"xxhash == 3.7.0",
"pycountry == 26.2.16",
"requests == 2.34.2",
]
dynamic = ["version"]

Expand Down
4 changes: 4 additions & 0 deletions backend/src/cms_backend/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from cms_backend.api.routes.zimfarm_notifications import (
router as zimfarm_notification_router,
)
from cms_backend.api.routes.zimfarm_recipes import (
router as zimfarm_recipe_router,
)
from cms_backend.context import Context
from cms_backend.db.exceptions import (
RecordAlreadyExistsError,
Expand Down Expand Up @@ -69,6 +72,7 @@ def create_app(*, debug: bool = True):
main_router = APIRouter(prefix="/v1")
main_router.include_router(router=config_router)
main_router.include_router(router=zimfarm_notification_router)
main_router.include_router(router=zimfarm_recipe_router)
main_router.include_router(router=healthcheck_router)
main_router.include_router(router=titles_router)
main_router.include_router(router=books_router)
Expand Down
2 changes: 1 addition & 1 deletion backend/src/cms_backend/api/routes/account.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from werkzeug.security import check_password_hash, generate_password_hash

from cms_backend.api.routes.dependencies import get_current_account, require_permission
from cms_backend.api.routes.fields import LimitFieldMax200, NotEmptyString, SkipField
from cms_backend.api.routes.http_errors import BadRequestError, ForbiddenError
from cms_backend.api.routes.models import ListResponse, calculate_pagination_metadata
from cms_backend.db import gen_dbsession
Expand All @@ -25,6 +24,7 @@
from cms_backend.db.models import Account
from cms_backend.roles import RoleEnum
from cms_backend.schemas import BaseModel
from cms_backend.schemas.fields import LimitFieldMax200, NotEmptyString, SkipField
from cms_backend.schemas.models import AccountUpdateSchema
from cms_backend.schemas.orms import AccountSchema
from cms_backend.utils import is_valid_uuid
Expand Down
2 changes: 1 addition & 1 deletion backend/src/cms_backend/api/routes/books.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from sqlalchemy.orm import Session as OrmSession

from cms_backend.api.routes.dependencies import get_current_account, require_permission
from cms_backend.api.routes.fields import LimitFieldMax200, NotEmptyString, SkipField
from cms_backend.api.routes.models import ListResponse, calculate_pagination_metadata
from cms_backend.db import gen_dbsession
from cms_backend.db.book import backup_book as db_backup_book
Expand All @@ -28,6 +27,7 @@
from cms_backend.db.books import get_zim_urls as db_get_zim_urls
from cms_backend.db.models import Account
from cms_backend.schemas import BaseModel
from cms_backend.schemas.fields import LimitFieldMax200, NotEmptyString, SkipField
from cms_backend.schemas.models import (
BookLanguagesSchema,
BookUpdateSchema,
Expand Down
2 changes: 1 addition & 1 deletion backend/src/cms_backend/api/routes/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from sqlalchemy.orm import Session as OrmSession

from cms_backend.api.routes.dependencies import get_current_account, require_permission
from cms_backend.api.routes.fields import LimitFieldMax200, NotEmptyString, SkipField
from cms_backend.api.routes.models import ListResponse, calculate_pagination_metadata
from cms_backend.api.routes.utils import build_library_xml
from cms_backend.db import gen_dbsession
Expand All @@ -35,6 +34,7 @@
from cms_backend.db.exceptions import RecordDoesNotExistError
from cms_backend.db.models import Account
from cms_backend.schemas import BaseModel
from cms_backend.schemas.fields import LimitFieldMax200, NotEmptyString, SkipField
from cms_backend.schemas.models import CollectionUpdateSchema
from cms_backend.schemas.orms import (
CollectionFullSchema,
Expand Down
2 changes: 1 addition & 1 deletion backend/src/cms_backend/api/routes/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
from fastapi import APIRouter, Depends, Query
from sqlalchemy.orm import Session as OrmSession

from cms_backend.api.routes.fields import LimitFieldMax200, NotEmptyString, SkipField
from cms_backend.api.routes.models import ListResponse, calculate_pagination_metadata
from cms_backend.db import gen_dbsession
from cms_backend.db.event import get_events as db_get_events
from cms_backend.schemas import BaseModel
from cms_backend.schemas.fields import LimitFieldMax200, NotEmptyString, SkipField
from cms_backend.schemas.orms import EventLightSchema

router = APIRouter(prefix="/events", tags=["events"])
Expand Down
Loading