etl module added
This commit is contained in:
parent
0f619dd954
commit
ff36173720
16 changed files with 1573 additions and 0 deletions
0
modules/anime_etl/normalizers/__init__.py
Normal file
0
modules/anime_etl/normalizers/__init__.py
Normal file
184
modules/anime_etl/normalizers/anilist_normalizer.py
Normal file
184
modules/anime_etl/normalizers/anilist_normalizer.py
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
# anime_etl/anilist_normalizer.py
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from models import Source, SourceTitle, Studio, Image, Tag
|
||||
from utils.season_resolver import resolve_season_from_media
|
||||
|
||||
STATUS_MAP: Dict[str, str] = {
|
||||
"FINISHED": "finished",
|
||||
"RELEASING": "ongoing",
|
||||
"NOT_YET_RELEASED": "planned",
|
||||
"CANCELLED": "planned",
|
||||
"HIATUS": "ongoing",
|
||||
}
|
||||
|
||||
SEASON_MAP: Dict[str, str] = {
|
||||
"WINTER": "winter",
|
||||
"SPRING": "spring",
|
||||
"SUMMER": "summer",
|
||||
"FALL": "fall",
|
||||
}
|
||||
|
||||
|
||||
def _title_names(media: Dict[str, Any]) -> Dict[str, List[str]]:
|
||||
t = media.get("title") or {}
|
||||
native = t.get("native")
|
||||
english = t.get("english")
|
||||
romaji = t.get("romaji")
|
||||
|
||||
res: Dict[str, List[str]] = {}
|
||||
if native:
|
||||
res.setdefault("ja", []).append(native)
|
||||
if english:
|
||||
res.setdefault("en", []).append(english)
|
||||
if romaji:
|
||||
res.setdefault("romaji", []).append(romaji)
|
||||
return res
|
||||
|
||||
|
||||
def _studio(media: Dict[str, Any]) -> Optional[Studio]:
|
||||
studios_nodes = (media.get("studios") or {}).get("nodes") or []
|
||||
if not studios_nodes:
|
||||
return None
|
||||
name = studios_nodes[0].get("name")
|
||||
if not name:
|
||||
return None
|
||||
return Studio(id=None, name=name, poster=None, description=None)
|
||||
|
||||
|
||||
def _tags(media: Dict[str, Any]) -> List[Tag]:
|
||||
genres = media.get("genres") or []
|
||||
res: List[Tag] = []
|
||||
for g in genres:
|
||||
if g:
|
||||
res.append(Tag(names={"en": g}))
|
||||
return res
|
||||
|
||||
|
||||
def _poster(media: Dict[str, Any]) -> Optional[Image]:
|
||||
cover = media.get("coverImage") or {}
|
||||
url = cover.get("extraLarge") or cover.get("large")
|
||||
if not url:
|
||||
return None
|
||||
return Image(id=None, storage_type=None, image_path=url)
|
||||
|
||||
|
||||
def _status(media: Dict[str, Any]) -> Optional[str]:
|
||||
raw = media.get("status")
|
||||
if not raw:
|
||||
return None
|
||||
return STATUS_MAP.get(raw)
|
||||
|
||||
|
||||
def _rating(media: Dict[str, Any]) -> Optional[float]:
|
||||
avg = media.get("averageScore")
|
||||
if avg is None:
|
||||
return None
|
||||
try:
|
||||
return float(avg) / 10.0
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _rating_count(media: Dict[str, Any]) -> Optional[int]:
|
||||
pop = media.get("popularity")
|
||||
if pop is None:
|
||||
return None
|
||||
try:
|
||||
return int(pop)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _year_and_season(media: Dict[str, Any]) -> tuple[Optional[int], Optional[str]]:
|
||||
year = media.get("seasonYear")
|
||||
raw_season = media.get("season")
|
||||
release_year = year if isinstance(year, int) else None
|
||||
release_season = None
|
||||
if isinstance(raw_season, str):
|
||||
release_season = SEASON_MAP.get(raw_season.upper())
|
||||
return release_year, release_season
|
||||
|
||||
|
||||
def _episodes(media: Dict[str, Any]) -> tuple[Optional[int], Optional[int]]:
|
||||
episodes_all = media.get("episodes")
|
||||
if not isinstance(episodes_all, int):
|
||||
episodes_all = None
|
||||
|
||||
next_ep = media.get("nextAiringEpisode") or {}
|
||||
ep_num = next_ep.get("episode") if isinstance(next_ep, dict) else None
|
||||
if not isinstance(ep_num, int):
|
||||
ep_num = None
|
||||
|
||||
# базовая логика
|
||||
if ep_num is not None:
|
||||
episodes_aired = ep_num - 1
|
||||
else:
|
||||
episodes_aired = episodes_all
|
||||
|
||||
# приведение к инварианту БД:
|
||||
# либо обе NULL, либо обе заданы и episodes_aired <= episodes_all
|
||||
if episodes_aired is None and episodes_all is None:
|
||||
return None, None
|
||||
|
||||
if episodes_all is None and episodes_aired is not None:
|
||||
episodes_all = episodes_aired
|
||||
|
||||
if episodes_aired is None and episodes_all is not None:
|
||||
episodes_aired = episodes_all
|
||||
|
||||
if (
|
||||
episodes_aired is not None
|
||||
and episodes_all is not None
|
||||
and episodes_aired > episodes_all
|
||||
):
|
||||
episodes_aired = episodes_all
|
||||
|
||||
return episodes_aired, episodes_all
|
||||
|
||||
|
||||
|
||||
def _episodes_len(media: Dict[str, Any]) -> Optional[Dict[str, float]]:
|
||||
duration = media.get("duration")
|
||||
if duration is None:
|
||||
return None
|
||||
try:
|
||||
return {"default": float(duration)}
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def normalize_media(media: Dict[str, Any]) -> SourceTitle:
|
||||
"""AniList Media JSON -> наш SourceTitle."""
|
||||
title_names = _title_names(media)
|
||||
studio = _studio(media)
|
||||
tags = _tags(media)
|
||||
poster = _poster(media)
|
||||
title_status = _status(media)
|
||||
rating = _rating(media)
|
||||
rating_count = _rating_count(media)
|
||||
release_year, release_season = _year_and_season(media)
|
||||
episodes_aired, episodes_all = _episodes(media)
|
||||
episodes_len = _episodes_len(media)
|
||||
|
||||
season = resolve_season_from_media(media)
|
||||
|
||||
return SourceTitle(
|
||||
source=Source.ANILIST,
|
||||
external_id=str(media["id"]),
|
||||
title_names=title_names,
|
||||
studio=studio,
|
||||
tags=tags,
|
||||
poster=poster,
|
||||
title_status=title_status,
|
||||
rating=rating,
|
||||
rating_count=rating_count,
|
||||
release_year=release_year,
|
||||
release_season=release_season,
|
||||
season=season,
|
||||
episodes_aired=episodes_aired,
|
||||
episodes_all=episodes_all,
|
||||
episodes_len=episodes_len,
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue