etl module added
This commit is contained in:
parent
0f619dd954
commit
ff36173720
16 changed files with 1573 additions and 0 deletions
93
modules/anime_etl/services/anilist_importer.py
Normal file
93
modules/anime_etl/services/anilist_importer.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
# anime_etl/services/anilist_importer.py
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import psycopg
|
||||
from psycopg.rows import dict_row
|
||||
|
||||
from sources.anilist_source import AniListSource
|
||||
from canonicalizer import source_title_to_canonical
|
||||
from db.repository import (
|
||||
get_or_create_studio,
|
||||
get_or_create_image,
|
||||
insert_title_if_not_exists,
|
||||
)
|
||||
from models import CanonicalTitle
|
||||
from jikan_studio_enricher import enrich_studio_with_jikan_desc
|
||||
|
||||
|
||||
Conn = psycopg.AsyncConnection
|
||||
|
||||
|
||||
class AniListImporter:
|
||||
def __init__(self, source: AniListSource | None = None) -> None:
|
||||
self._source = source or AniListSource()
|
||||
|
||||
async def import_by_filters_in_tx(
|
||||
self,
|
||||
conn: Conn,
|
||||
filters: Dict[str, Any],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Выполнить импорт в рамках одной транзакции:
|
||||
- поиск в AniList
|
||||
- канонизация
|
||||
- обогащение студии (Jikan)
|
||||
- get_or_create_studio (+ illust_id)
|
||||
- скачивание постера тайтла -> images
|
||||
- insert_title_if_not_exists
|
||||
"""
|
||||
async with conn.transaction():
|
||||
return await self._import_by_filters(conn, filters)
|
||||
|
||||
async def _import_by_filters(
|
||||
self,
|
||||
conn: Conn,
|
||||
filters: Dict[str, Any],
|
||||
) -> List[Dict[str, Any]]:
|
||||
source_titles = await self._source.search(filters)
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
|
||||
for st in source_titles:
|
||||
canonical: CanonicalTitle = source_title_to_canonical(st)
|
||||
|
||||
# 1) обогатить студию описанием из Jikan (если есть студия и ещё нет description)
|
||||
if canonical.studio is None:
|
||||
continue
|
||||
canonical.studio = await enrich_studio_with_jikan_desc(canonical.studio)
|
||||
|
||||
# 2) создать/обновить студию (studio_name, illust_id, studio_desc)
|
||||
studio_id = await get_or_create_studio(conn, canonical.studio)
|
||||
|
||||
# 3) скачать постер тайтла и создать запись в images
|
||||
poster_id = await get_or_create_image(conn, canonical.poster, subdir="posters")
|
||||
|
||||
# 4) создать тайтл, если его ещё нет (с учётом studio_id и poster_id)
|
||||
title_id = await insert_title_if_not_exists(conn, canonical, studio_id, poster_id)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"id": title_id,
|
||||
"title_names": canonical.title_names,
|
||||
"release_year": canonical.release_year,
|
||||
"release_season": canonical.release_season,
|
||||
"season": canonical.season,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def import_from_anilist(
|
||||
dsn: str,
|
||||
filters: Dict[str, Any],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Открывает подключение к БД, делает транзакцию и импорт.
|
||||
"""
|
||||
importer = AniListImporter()
|
||||
|
||||
async with await psycopg.AsyncConnection.connect(dsn, row_factory=dict_row) as conn:
|
||||
return await importer.import_by_filters_in_tx(conn, filters)
|
||||
Loading…
Add table
Add a link
Reference in a new issue