89 lines
2.3 KiB
Python
89 lines
2.3 KiB
Python
# anime_etl/utils/season_resolver.py
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from typing import Optional
|
|
|
|
|
|
_ROMAN = {
|
|
"I": 1, "II": 2, "III": 3, "IV": 4,
|
|
"V": 5, "VI": 6, "VII": 7, "VIII": 8,
|
|
"IX": 9, "X": 10,
|
|
}
|
|
|
|
|
|
def _roman_to_int(token: str) -> Optional[int]:
|
|
return _ROMAN.get(token.upper())
|
|
|
|
|
|
# Паттерны типа:
|
|
# - "Season 2"
|
|
# - "2nd Season"
|
|
# - "S3"
|
|
# - "III"
|
|
_SEASON_PATTERNS = [
|
|
# "Season 2"
|
|
re.compile(r"\bseason\s*(\d{1,2})\b", re.IGNORECASE),
|
|
# "2nd Season"
|
|
re.compile(r"\b(\d{1,2})(?:st|nd|rd|th)\s+season\b", re.IGNORECASE),
|
|
# "S3"
|
|
re.compile(r"\bs(\d{1,2})\b", re.IGNORECASE),
|
|
# одиночное число (осторожно, поэтому используем как самый последний fallback)
|
|
re.compile(r"\b(\d{1,2})\b"),
|
|
# римские цифры: "III"
|
|
re.compile(r"\b([IVX]{1,5})\b", re.IGNORECASE),
|
|
]
|
|
|
|
|
|
def extract_season_number_from_title(name: str) -> Optional[int]:
|
|
name = name.strip()
|
|
if not name:
|
|
return None
|
|
|
|
for pat in _SEASON_PATTERNS:
|
|
m = pat.search(name)
|
|
if not m:
|
|
continue
|
|
|
|
token = m.group(1)
|
|
|
|
# пробуем римские
|
|
roman = _roman_to_int(token)
|
|
if roman is not None:
|
|
return roman
|
|
|
|
# иначе просто число
|
|
try:
|
|
return int(token)
|
|
except ValueError:
|
|
continue
|
|
|
|
return None
|
|
|
|
|
|
def resolve_season_from_media(media: dict) -> Optional[int]:
|
|
"""
|
|
Определяем номер сезона по данным AniList Media.
|
|
|
|
Логика:
|
|
- Если формат не TV/ONA → считаем, что это не нумерованный сезон (OVA/MOVIE/...)
|
|
- Берём title.english / romaji / native, пытаемся вытащить номер через regex.
|
|
"""
|
|
fmt = media.get("format")
|
|
if fmt not in ("TV", "ONA"):
|
|
return None
|
|
|
|
title = media.get("title") or {}
|
|
candidates: list[str] = []
|
|
|
|
for key in ("english", "romaji", "native"):
|
|
v = title.get(key)
|
|
if isinstance(v, str):
|
|
candidates.append(v)
|
|
|
|
for name in candidates:
|
|
n = extract_season_number_from_title(name)
|
|
if n is not None:
|
|
return n
|
|
|
|
return None
|