etl module added

This commit is contained in:
garaev kamil 2025-12-05 22:59:33 +03:00
parent 0f619dd954
commit ff36173720
16 changed files with 1573 additions and 0 deletions

View file

@ -0,0 +1,38 @@
from __future__ import annotations
from typing import Optional
from models import Studio
from sources.jikan_async_client import search_producer, fetch_producer_full
async def enrich_studio_with_jikan_desc(studio: Studio) -> Studio:
"""
Если у студии нет description ищем её в Jikan и подтягиваем about.
Ничего не ломает:
- если не нашли / нет about возвращаем студию как есть
- poster/id не трогаем
"""
if not studio or studio.description:
return studio
matches = await search_producer(studio.name, limit=1)
if not matches:
return studio
mal_id = matches[0].get("mal_id")
if not isinstance(mal_id, int):
return studio
full = await fetch_producer_full(mal_id)
if not full:
return studio
about = full.get("about")
if not isinstance(about, str) or not about.strip():
return studio
# лёгкая нормализация: убираем лишние переносы/пробелы
studio.description = " ".join(about.split())
return studio