chrysopedia/backend/services/avatar.py
jlightner 89ef2751fa feat: Added IntersectionObserver scroll-spy to ToC highlighting the act…
- "frontend/src/components/TableOfContents.tsx"
- "frontend/src/App.css"

GSD-Task: S04/T02
2026-04-03 05:54:14 +00:00

109 lines
3.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""TheAudioDB avatar lookup for music creators.
Searches TheAudioDB by artist name, applies confidence scoring, and
returns the best-match thumbnail URL or None.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass
import httpx
logger = logging.getLogger("chrysopedia.avatar")
AUDIODB_BASE = "https://www.theaudiodb.com/api/v1/json"
AUDIODB_KEY = "523532" # public test/community key
@dataclass
class AvatarResult:
url: str
source: str # "theaudiodb"
artist_name: str # name as returned by the API
confidence: float # 0.01.0
def _normalize(name: str) -> str:
"""Lowercase, strip, collapse whitespace."""
return " ".join(name.lower().split())
def _name_similarity(query: str, candidate: str) -> float:
"""Simple token-based similarity score (0.01.0).
Checks token overlap between the query and candidate names.
Sufficient for distinctive artist names; avoids adding thefuzz dep.
"""
q_tokens = set(_normalize(query).split())
c_tokens = set(_normalize(candidate).split())
if not q_tokens or not c_tokens:
return 0.0
overlap = q_tokens & c_tokens
# Jaccard-ish: overlap relative to the shorter set
return len(overlap) / min(len(q_tokens), len(c_tokens))
def _genre_overlap(creator_genres: list[str] | None, api_genre: str | None) -> bool:
"""Check if any of the creator's genres appear in the API genre string."""
if not creator_genres or not api_genre:
return False
api_lower = api_genre.lower()
return any(g.lower() in api_lower for g in creator_genres)
def lookup_avatar(
creator_name: str,
creator_genres: list[str] | None = None,
) -> AvatarResult | None:
"""Search TheAudioDB for an artist and return the best avatar match.
Returns None if no confident match is found. Synchronous — designed
to run inside a Celery task.
"""
url = f"{AUDIODB_BASE}/{AUDIODB_KEY}/search.php"
try:
resp = httpx.get(url, params={"s": creator_name}, timeout=10.0)
resp.raise_for_status()
data = resp.json()
except Exception as exc:
logger.warning("TheAudioDB lookup failed for %r: %s", creator_name, exc)
return None
artists = data.get("artists")
if not artists:
logger.info("TheAudioDB: no results for %r", creator_name)
return None
best: AvatarResult | None = None
best_score = 0.0
for artist in artists:
name = artist.get("strArtist", "")
thumb = artist.get("strArtistThumb")
if not thumb:
continue
similarity = _name_similarity(creator_name, name)
genre_bonus = 0.1 if _genre_overlap(creator_genres, artist.get("strGenre")) else 0.0
score = similarity + genre_bonus
if score > best_score:
best_score = score
best = AvatarResult(
url=thumb,
source="theaudiodb",
artist_name=name,
confidence=min(score, 1.0),
)
if best and best.confidence >= 0.5:
logger.info(
"TheAudioDB match for %r%r (confidence=%.2f)",
creator_name, best.artist_name, best.confidence,
)
return best
logger.info("TheAudioDB: no confident match for %r (best=%.2f)", creator_name, best_score)
return None