Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
259 changes: 259 additions & 0 deletions backend/maint-scripts/update_recipes_and_title_flavours.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
#!/usr/bin/env python3
"""Maintenance script to populate CMS database with recipes and title flavours from
zimfarm API.

This script is idempotent and can be run multiple times without creating duplicates.
It
- Finds all recipes on Zimfarm API
- Creates missing recipes on CMS database
- Fetches all the successful tasks for the recipe
- For each task
- Find a title with the same name from the tasks' file information
- Creates a title flavour for the title and associates it with the recipe
- If existing flavour differs from current recipe, warning messages are logged

Environment variables required:
- ZIMFARM_API_URL: URL of Zimfarm API to fetch recipes and tasks from
- USERNAME: the username of the account that will create history entries for recipes.
Defaults to 'maint-scrpts'
"""

import os
from dataclasses import dataclass
from json import JSONDecodeError
from typing import Any
from uuid import UUID

import requests # pyright: ignore[reportMissingModuleSource]
from sqlalchemy import select
from sqlalchemy.orm import Session as OrmSession
from sqlalchemy.orm.attributes import flag_modified

from cms_backend import logger
from cms_backend.db import Session
from cms_backend.db.account import get_account_by_username
from cms_backend.db.flavour import get_title_flavour_or_none
from cms_backend.db.models import Book, TitleFlavour, ZimfarmRecipe
from cms_backend.db.title import get_title_by_name_or_none
from cms_backend.db.zimfarm_recipe import (
create_zimfarm_recipe,
create_zimfarm_recipe_history_entry,
get_zimfarm_recipe_by_id_or_none,
)
from cms_backend.utils.zim import get_missing_keys


@dataclass
class Response:
"""A response from the webapi"""

status_code: int
success: bool
json: dict[str, Any]


def query_api(
url: str,
method: str = "get",
*,
headers: dict[str, Any] | None = None,
payload: dict[str, Any] | None = None,
params: dict[str, Any] | None = None,
timeout: int = 30,
) -> Response:
req_headers: dict[str, Any] = {}

req_headers.update( # pyright: ignore[reportUnknownMemberType]
headers if headers else {}
)
func = {
"GET": requests.get,
"POST": requests.post,
"PATCH": requests.patch,
"DELETE": requests.delete,
"PUT": requests.put,
}.get(method.upper(), requests.get)

resp = None
try:
resp = func(
url, headers=req_headers, json=payload, params=params, timeout=timeout
)
return Response(
status_code=resp.status_code,
success=resp.ok,
json=resp.json() if resp.text and resp.text.strip() else {},
)
except (JSONDecodeError, Exception) as exc:
logger.exception(
f"unexpected error while making request to {url} : "
f"{resp.text if resp else exc}"
)
return Response(
status_code=resp.status_code if resp else -1,
success=resp.ok if resp else False,
json={},
)


def process_task(
session: OrmSession,
*,
task: dict[str, Any],
recipe: ZimfarmRecipe,
zimfarm_api_url: str,
author_id: UUID,
):
response = query_api(f"{zimfarm_api_url}/tasks/{task['id']}")
if not response.success:
logger.error(
f"Unable to fetch task {task['id']} from {zimfarm_api_url}: {response.json}"
)
return

for filename in response.json.get("files", {}):
metadata = response.json["files"][filename].get("info", {}).get("metadata", {})
missing_keys = get_missing_keys(metadata, "Name")
if missing_keys:
logger.warning(
f"Task {task['id']} metadata is missing keys: {','.join(missing_keys)}"
)
continue

title = get_title_by_name_or_none(session, name=metadata["Name"])
if title is None:
logger.debug(
f"Title with name '{metadata['Name']}' from task {task['id']} "
"does not yet exist on CMS"
)
continue

recipe.title = title

flavour = metadata.get("Flavour")
if flavour is None:
logger.debug(f"Task {task['id']} has no flavour")
continue

flavour = flavour[1:] if flavour.startswith("_") else flavour

tf = get_title_flavour_or_none(session, title.id, flavour)
if tf:
logger.debug(
f"Title flavour '{tf.flavour}' already exists for title '{title.name}'"
)
if tf.recipe_id != recipe.id:
logger.warning(
f"Title flavour '{tf.flavour}' for title '{title.name}' is "
f"attached to a different recipe from zimfarm '{recipe.name}'"
)
else:
tf = TitleFlavour(flavour=flavour)
tf.title = title
tf.recipe = recipe
session.add(tf)
session.flush()
logger.info(f"Created title flavour '{flavour}' for title '{title.name}'")
create_zimfarm_recipe_history_entry(
session,
recipe,
author_id=author_id,
comment=f"Added '{flavour}' for title '{title.name}'",
)

# update books notifications whose flavour matches
books = session.scalars(
select(Book).where(Book.flavour == flavour, Book.title_id == title.id)
).all()
for book in books:
if book.zimfarm_notification and get_missing_keys(
book.zimfarm_notification.content, "recipe_id", "recipe_name"
):
zimfarm_notification = book.zimfarm_notification
zimfarm_notification.content["recipe_id"] = str(recipe.id)
zimfarm_notification.content["recipe_name"] = recipe.name
flag_modified(zimfarm_notification, "content")


def process_recipe(
session: OrmSession, zf_recipe: ZimfarmRecipe, zimfarm_api_url: str, author_id: UUID
):
skip = 0
limit = 50
while True:
response = query_api(
f"{zimfarm_api_url}/tasks",
params={
"skip": skip,
"limit": limit,
"status": ["succeeded"],
"recipe_id": zf_recipe.id,
"sort_criteria": "done",
},
)
if not response.success:
logger.error(
f"Unable to process tasks for recipe {zf_recipe.name}: {response.json}"
)
break
tasks = response.json["items"]
if len(tasks) == 0:
logger.info(f"No more tasks to process for recipe {zf_recipe.name}")
break

for task in tasks:
with session.begin_nested():
process_task(
session,
task=task,
recipe=zf_recipe,
zimfarm_api_url=zimfarm_api_url,
author_id=author_id,
)
skip += limit


def populate_recipes_from_zimfarm(
session: OrmSession, zimfarm_api_url: str, author_id: UUID
):
"""Fetch recipes from zimfarm and attach CMS titles/title flavours to recipes."""
skip = 0
limit = 50

while True:
response = query_api(
f"{zimfarm_api_url}/recipes", params={"skip": skip, "limit": limit}
)
if not response.success:
logger.error(
f"Unable to fetch recipes from {zimfarm_api_url}: {response.json}. "
"Exiting..."
)
break
recipes = response.json["items"]
if len(recipes) == 0:
logger.info(f"No more recipes returned from {zimfarm_api_url}")
break
for recipe in recipes:
zf_recipe = get_zimfarm_recipe_by_id_or_none(session, UUID(recipe["id"]))
if zf_recipe is None:
zf_recipe = create_zimfarm_recipe(
session, recipe_id=recipe["id"], recipe_name=recipe["name"]
)
logger.info(f"Created zimfarm recipe '{zf_recipe.name}'")
process_recipe(session, zf_recipe, zimfarm_api_url, author_id=author_id)
skip += limit


def main():

zimfarm_api_url = os.getenv("ZIMFARM_API_URL", "https://api.farm.openzim.org/v2")
with Session.begin() as session:
author = get_account_by_username(
session, username=os.getenv("USERNAME", default="maint-scripts")
)
populate_recipes_from_zimfarm(session, zimfarm_api_url, author_id=author.id)


if __name__ == "__main__":
main()
90 changes: 0 additions & 90 deletions backend/maint-scripts/update_title_flavours_from_books.py

This file was deleted.

1 change: 1 addition & 0 deletions backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ dependencies = [
"Werkzeug == 3.1.5",
"xxhash == 3.7.0",
"pycountry == 26.2.16",
"requests == 2.34.2",
]
dynamic = ["version"]

Expand Down
4 changes: 4 additions & 0 deletions backend/src/cms_backend/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from cms_backend.api.routes.zimfarm_notifications import (
router as zimfarm_notification_router,
)
from cms_backend.api.routes.zimfarm_recipes import (
router as zimfarm_recipe_router,
)
from cms_backend.context import Context
from cms_backend.db.exceptions import (
RecordAlreadyExistsError,
Expand Down Expand Up @@ -69,6 +72,7 @@ def create_app(*, debug: bool = True):
main_router = APIRouter(prefix="/v1")
main_router.include_router(router=config_router)
main_router.include_router(router=zimfarm_notification_router)
main_router.include_router(router=zimfarm_recipe_router)
main_router.include_router(router=healthcheck_router)
main_router.include_router(router=titles_router)
main_router.include_router(router=books_router)
Expand Down
2 changes: 1 addition & 1 deletion backend/src/cms_backend/api/routes/account.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from werkzeug.security import check_password_hash, generate_password_hash

from cms_backend.api.routes.dependencies import get_current_account, require_permission
from cms_backend.api.routes.fields import LimitFieldMax200, NotEmptyString, SkipField
from cms_backend.api.routes.http_errors import BadRequestError, ForbiddenError
from cms_backend.api.routes.models import ListResponse, calculate_pagination_metadata
from cms_backend.db import gen_dbsession
Expand All @@ -25,6 +24,7 @@
from cms_backend.db.models import Account
from cms_backend.roles import RoleEnum
from cms_backend.schemas import BaseModel
from cms_backend.schemas.fields import LimitFieldMax200, NotEmptyString, SkipField
from cms_backend.schemas.models import AccountUpdateSchema
from cms_backend.schemas.orms import AccountSchema
from cms_backend.utils import is_valid_uuid
Expand Down
Loading