"""TheAudioDB avatar lookup for music creators. Searches TheAudioDB by artist name, applies confidence scoring, and returns the best-match thumbnail URL or None. """ from __future__ import annotations import logging from dataclasses import dataclass import httpx logger = logging.getLogger("chrysopedia.avatar") AUDIODB_BASE = "https://www.theaudiodb.com/api/v1/json" AUDIODB_KEY = "523532" # public test/community key @dataclass class AvatarResult: url: str source: str # "theaudiodb" artist_name: str # name as returned by the API confidence: float # 0.0–1.0 def _normalize(name: str) -> str: """Lowercase, strip, collapse whitespace.""" return " ".join(name.lower().split()) def _name_similarity(query: str, candidate: str) -> float: """Simple token-based similarity score (0.0–1.0). Checks token overlap between the query and candidate names. Sufficient for distinctive artist names; avoids adding thefuzz dep. """ q_tokens = set(_normalize(query).split()) c_tokens = set(_normalize(candidate).split()) if not q_tokens or not c_tokens: return 0.0 overlap = q_tokens & c_tokens # Jaccard-ish: overlap relative to the shorter set return len(overlap) / min(len(q_tokens), len(c_tokens)) def _genre_overlap(creator_genres: list[str] | None, api_genre: str | None) -> bool: """Check if any of the creator's genres appear in the API genre string.""" if not creator_genres or not api_genre: return False api_lower = api_genre.lower() return any(g.lower() in api_lower for g in creator_genres) def lookup_avatar( creator_name: str, creator_genres: list[str] | None = None, ) -> AvatarResult | None: """Search TheAudioDB for an artist and return the best avatar match. Returns None if no confident match is found. Synchronous — designed to run inside a Celery task. """ url = f"{AUDIODB_BASE}/{AUDIODB_KEY}/search.php" try: resp = httpx.get(url, params={"s": creator_name}, timeout=10.0) resp.raise_for_status() data = resp.json() except Exception as exc: logger.warning("TheAudioDB lookup failed for %r: %s", creator_name, exc) return None artists = data.get("artists") if not artists: logger.info("TheAudioDB: no results for %r", creator_name) return None best: AvatarResult | None = None best_score = 0.0 for artist in artists: name = artist.get("strArtist", "") thumb = artist.get("strArtistThumb") if not thumb: continue similarity = _name_similarity(creator_name, name) genre_bonus = 0.1 if _genre_overlap(creator_genres, artist.get("strGenre")) else 0.0 score = similarity + genre_bonus if score > best_score: best_score = score best = AvatarResult( url=thumb, source="theaudiodb", artist_name=name, confidence=min(score, 1.0), ) if best and best.confidence >= 0.5: logger.info( "TheAudioDB match for %r → %r (confidence=%.2f)", creator_name, best.artist_name, best.confidence, ) return best logger.info("TheAudioDB: no confident match for %r (best=%.2f)", creator_name, best_score) return None