etl module added

This commit is contained in:
garaev kamil 2025-12-05 22:59:33 +03:00
parent 0f619dd954
commit ff36173720
16 changed files with 1573 additions and 0 deletions

View file

@ -0,0 +1,93 @@
# anime_etl/services/anilist_importer.py
from __future__ import annotations
from typing import Any, Dict, List
import psycopg
from psycopg.rows import dict_row
from sources.anilist_source import AniListSource
from canonicalizer import source_title_to_canonical
from db.repository import (
get_or_create_studio,
get_or_create_image,
insert_title_if_not_exists,
)
from models import CanonicalTitle
from jikan_studio_enricher import enrich_studio_with_jikan_desc
Conn = psycopg.AsyncConnection
class AniListImporter:
def __init__(self, source: AniListSource | None = None) -> None:
self._source = source or AniListSource()
async def import_by_filters_in_tx(
self,
conn: Conn,
filters: Dict[str, Any],
) -> List[Dict[str, Any]]:
"""
Выполнить импорт в рамках одной транзакции:
- поиск в AniList
- канонизация
- обогащение студии (Jikan)
- get_or_create_studio (+ illust_id)
- скачивание постера тайтла -> images
- insert_title_if_not_exists
"""
async with conn.transaction():
return await self._import_by_filters(conn, filters)
async def _import_by_filters(
self,
conn: Conn,
filters: Dict[str, Any],
) -> List[Dict[str, Any]]:
source_titles = await self._source.search(filters)
results: List[Dict[str, Any]] = []
for st in source_titles:
canonical: CanonicalTitle = source_title_to_canonical(st)
# 1) обогатить студию описанием из Jikan (если есть студия и ещё нет description)
if canonical.studio is None:
continue
canonical.studio = await enrich_studio_with_jikan_desc(canonical.studio)
# 2) создать/обновить студию (studio_name, illust_id, studio_desc)
studio_id = await get_or_create_studio(conn, canonical.studio)
# 3) скачать постер тайтла и создать запись в images
poster_id = await get_or_create_image(conn, canonical.poster, subdir="posters")
# 4) создать тайтл, если его ещё нет (с учётом studio_id и poster_id)
title_id = await insert_title_if_not_exists(conn, canonical, studio_id, poster_id)
results.append(
{
"id": title_id,
"title_names": canonical.title_names,
"release_year": canonical.release_year,
"release_season": canonical.release_season,
"season": canonical.season,
}
)
return results
async def import_from_anilist(
dsn: str,
filters: Dict[str, Any],
) -> List[Dict[str, Any]]:
"""
Открывает подключение к БД, делает транзакцию и импорт.
"""
importer = AniListImporter()
async with await psycopg.AsyncConnection.connect(dsn, row_factory=dict_row) as conn:
return await importer.import_by_filters_in_tx(conn, filters)