- "frontend/src/components/TableOfContents.tsx" - "frontend/src/App.css" GSD-Task: S04/T02
109 lines
3.3 KiB
Python
109 lines
3.3 KiB
Python
"""TheAudioDB avatar lookup for music creators.
|
||
|
||
Searches TheAudioDB by artist name, applies confidence scoring, and
|
||
returns the best-match thumbnail URL or None.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
from dataclasses import dataclass
|
||
|
||
import httpx
|
||
|
||
logger = logging.getLogger("chrysopedia.avatar")
|
||
|
||
AUDIODB_BASE = "https://www.theaudiodb.com/api/v1/json"
|
||
AUDIODB_KEY = "523532" # public test/community key
|
||
|
||
|
||
@dataclass
|
||
class AvatarResult:
|
||
url: str
|
||
source: str # "theaudiodb"
|
||
artist_name: str # name as returned by the API
|
||
confidence: float # 0.0–1.0
|
||
|
||
|
||
def _normalize(name: str) -> str:
|
||
"""Lowercase, strip, collapse whitespace."""
|
||
return " ".join(name.lower().split())
|
||
|
||
|
||
def _name_similarity(query: str, candidate: str) -> float:
|
||
"""Simple token-based similarity score (0.0–1.0).
|
||
|
||
Checks token overlap between the query and candidate names.
|
||
Sufficient for distinctive artist names; avoids adding thefuzz dep.
|
||
"""
|
||
q_tokens = set(_normalize(query).split())
|
||
c_tokens = set(_normalize(candidate).split())
|
||
if not q_tokens or not c_tokens:
|
||
return 0.0
|
||
overlap = q_tokens & c_tokens
|
||
# Jaccard-ish: overlap relative to the shorter set
|
||
return len(overlap) / min(len(q_tokens), len(c_tokens))
|
||
|
||
|
||
def _genre_overlap(creator_genres: list[str] | None, api_genre: str | None) -> bool:
|
||
"""Check if any of the creator's genres appear in the API genre string."""
|
||
if not creator_genres or not api_genre:
|
||
return False
|
||
api_lower = api_genre.lower()
|
||
return any(g.lower() in api_lower for g in creator_genres)
|
||
|
||
|
||
def lookup_avatar(
|
||
creator_name: str,
|
||
creator_genres: list[str] | None = None,
|
||
) -> AvatarResult | None:
|
||
"""Search TheAudioDB for an artist and return the best avatar match.
|
||
|
||
Returns None if no confident match is found. Synchronous — designed
|
||
to run inside a Celery task.
|
||
"""
|
||
url = f"{AUDIODB_BASE}/{AUDIODB_KEY}/search.php"
|
||
try:
|
||
resp = httpx.get(url, params={"s": creator_name}, timeout=10.0)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
except Exception as exc:
|
||
logger.warning("TheAudioDB lookup failed for %r: %s", creator_name, exc)
|
||
return None
|
||
|
||
artists = data.get("artists")
|
||
if not artists:
|
||
logger.info("TheAudioDB: no results for %r", creator_name)
|
||
return None
|
||
|
||
best: AvatarResult | None = None
|
||
best_score = 0.0
|
||
|
||
for artist in artists:
|
||
name = artist.get("strArtist", "")
|
||
thumb = artist.get("strArtistThumb")
|
||
if not thumb:
|
||
continue
|
||
|
||
similarity = _name_similarity(creator_name, name)
|
||
genre_bonus = 0.1 if _genre_overlap(creator_genres, artist.get("strGenre")) else 0.0
|
||
score = similarity + genre_bonus
|
||
|
||
if score > best_score:
|
||
best_score = score
|
||
best = AvatarResult(
|
||
url=thumb,
|
||
source="theaudiodb",
|
||
artist_name=name,
|
||
confidence=min(score, 1.0),
|
||
)
|
||
|
||
if best and best.confidence >= 0.5:
|
||
logger.info(
|
||
"TheAudioDB match for %r → %r (confidence=%.2f)",
|
||
creator_name, best.artist_name, best.confidence,
|
||
)
|
||
return best
|
||
|
||
logger.info("TheAudioDB: no confident match for %r (best=%.2f)", creator_name, best_score)
|
||
return None
|