etl module added
This commit is contained in:
parent
0f619dd954
commit
ff36173720
16 changed files with 1573 additions and 0 deletions
0
modules/anime_etl/utils/__init__.py
Normal file
0
modules/anime_etl/utils/__init__.py
Normal file
89
modules/anime_etl/utils/season_resolver.py
Normal file
89
modules/anime_etl/utils/season_resolver.py
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
# anime_etl/utils/season_resolver.py
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
|
||||
_ROMAN = {
|
||||
"I": 1, "II": 2, "III": 3, "IV": 4,
|
||||
"V": 5, "VI": 6, "VII": 7, "VIII": 8,
|
||||
"IX": 9, "X": 10,
|
||||
}
|
||||
|
||||
|
||||
def _roman_to_int(token: str) -> Optional[int]:
|
||||
return _ROMAN.get(token.upper())
|
||||
|
||||
|
||||
# Паттерны типа:
|
||||
# - "Season 2"
|
||||
# - "2nd Season"
|
||||
# - "S3"
|
||||
# - "III"
|
||||
_SEASON_PATTERNS = [
|
||||
# "Season 2"
|
||||
re.compile(r"\bseason\s*(\d{1,2})\b", re.IGNORECASE),
|
||||
# "2nd Season"
|
||||
re.compile(r"\b(\d{1,2})(?:st|nd|rd|th)\s+season\b", re.IGNORECASE),
|
||||
# "S3"
|
||||
re.compile(r"\bs(\d{1,2})\b", re.IGNORECASE),
|
||||
# одиночное число (осторожно, поэтому используем как самый последний fallback)
|
||||
re.compile(r"\b(\d{1,2})\b"),
|
||||
# римские цифры: "III"
|
||||
re.compile(r"\b([IVX]{1,5})\b", re.IGNORECASE),
|
||||
]
|
||||
|
||||
|
||||
def extract_season_number_from_title(name: str) -> Optional[int]:
|
||||
name = name.strip()
|
||||
if not name:
|
||||
return None
|
||||
|
||||
for pat in _SEASON_PATTERNS:
|
||||
m = pat.search(name)
|
||||
if not m:
|
||||
continue
|
||||
|
||||
token = m.group(1)
|
||||
|
||||
# пробуем римские
|
||||
roman = _roman_to_int(token)
|
||||
if roman is not None:
|
||||
return roman
|
||||
|
||||
# иначе просто число
|
||||
try:
|
||||
return int(token)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def resolve_season_from_media(media: dict) -> Optional[int]:
|
||||
"""
|
||||
Определяем номер сезона по данным AniList Media.
|
||||
|
||||
Логика:
|
||||
- Если формат не TV/ONA → считаем, что это не нумерованный сезон (OVA/MOVIE/...)
|
||||
- Берём title.english / romaji / native, пытаемся вытащить номер через regex.
|
||||
"""
|
||||
fmt = media.get("format")
|
||||
if fmt not in ("TV", "ONA"):
|
||||
return None
|
||||
|
||||
title = media.get("title") or {}
|
||||
candidates: list[str] = []
|
||||
|
||||
for key in ("english", "romaji", "native"):
|
||||
v = title.get(key)
|
||||
if isinstance(v, str):
|
||||
candidates.append(v)
|
||||
|
||||
for name in candidates:
|
||||
n = extract_season_number_from_title(name)
|
||||
if n is not None:
|
||||
return n
|
||||
|
||||
return None
|
||||
Loading…
Add table
Add a link
Reference in a new issue