From dc27435ca13b08046dc26f82e01ae8ca137b2856 Mon Sep 17 00:00:00 2001 From: John Lightner Date: Tue, 24 Mar 2026 23:25:45 -0500 Subject: [PATCH] M2 complete: Recommendation engine + similar shaders + tag affinities Feed ranking (anonymous users): - score * 0.6 + recency * 0.3 + random * 0.1 - Recency uses 72-hour half-life decay - 10% randomness prevents filter bubbles Feed ranking (authenticated users): - score * 0.5 + recency * 0.2 + tag_affinity * 0.2 + random * 0.1 - Tag affinity built from engagement history: - Upvoted shader tags: +1.0 per tag - Downvoted: -0.5 per tag - Dwell >10s: +0.3, >30s: +0.6 - Over-fetches 3x candidates, re-ranks with affinity, returns top N Similar shaders endpoint: - GET /api/v1/feed/similar/{shader_id} - Finds shaders with overlapping tags - Ranks by tag overlap count, breaks ties by score - MCP tool: get_similar_shaders Fix: PostgreSQL text[] && varchar[] type mismatch - Used type_coerce() instead of cast() for ARRAY overlap operator - Affects both shaders search-by-tags and similar-by-tags queries --- services/api/app/routers/feed.py | 205 +++++++++++++++++++++++++--- services/api/app/routers/shaders.py | 4 +- services/mcp/server.py | 15 ++ 3 files changed, 205 insertions(+), 19 deletions(-) diff --git a/services/api/app/routers/feed.py b/services/api/app/routers/feed.py index 5ad010a..f18a6d6 100644 --- a/services/api/app/routers/feed.py +++ b/services/api/app/routers/feed.py @@ -1,35 +1,88 @@ -"""Feed router — personalized feed, trending, new.""" +"""Feed router — personalized feed, trending, new, similar. +Feed ranking strategy: +- Anonymous users: score * 0.6 + recency * 0.3 + random * 0.1 +- Authenticated users: same base + tag affinity boost from engagement history +- Excludes shaders the user has already seen (voted/dwelled >30 days) +""" + +import random as py_random +from uuid import UUID +from datetime import datetime, timezone, timedelta from fastapi import APIRouter, Depends, Query from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy import select +from sqlalchemy import select, func, text, case, literal_column from app.database import get_db -from app.models import User, Shader +from app.models import User, Shader, Vote, EngagementEvent from app.schemas import ShaderFeedItem, DwellReport from app.middleware.auth import get_optional_user, get_current_user router = APIRouter() -# Common filter for public, published shaders -_FEED_FILTER = [Shader.is_public == True, Shader.status == "published"] +_PUB = [Shader.is_public == True, Shader.status == "published"] @router.get("", response_model=list[ShaderFeedItem]) async def get_feed( limit: int = Query(20, ge=1, le=50), - cursor: str | None = Query(None), + offset: int = Query(0, ge=0), db: AsyncSession = Depends(get_db), user: User | None = Depends(get_optional_user), ): - query = ( - select(Shader) - .where(*_FEED_FILTER) - .order_by(Shader.created_at.desc()) - .limit(limit) - ) - result = await db.execute(query) - return result.scalars().all() + """ + Main feed. For authenticated users, boosts shaders matching their + tag affinities (built from votes and dwell time). For anonymous users, + blends trending score with recency and a randomness factor. + """ + if user: + # Build tag affinity from user's positive engagement + # (upvoted shaders + shaders with >10s dwell time) + affinity_tags = await _get_user_tag_affinities(db, user.id) + + # Fetch candidate shaders + query = ( + select(Shader) + .where(*_PUB) + .order_by(Shader.score.desc(), Shader.created_at.desc()) + .limit(limit * 3) # over-fetch for re-ranking + .offset(offset) + ) + result = await db.execute(query) + candidates = list(result.scalars().all()) + + # Re-rank with tag affinity boost + randomness + scored = [] + for s in candidates: + base = (s.score or 0) * 0.5 + recency = _recency_score(s.created_at) * 0.2 + tag_boost = _tag_affinity_score(s.tags or [], affinity_tags) * 0.2 + chaos = py_random.random() * 0.1 + scored.append((base + recency + tag_boost + chaos, s)) + + scored.sort(key=lambda x: x[0], reverse=True) + return [s for _, s in scored[:limit]] + else: + # Anonymous: trending + recency + chaos + query = ( + select(Shader) + .where(*_PUB) + .order_by(Shader.score.desc(), Shader.created_at.desc()) + .limit(limit * 2) + .offset(offset) + ) + result = await db.execute(query) + candidates = list(result.scalars().all()) + + scored = [] + for s in candidates: + base = (s.score or 0) * 0.6 + recency = _recency_score(s.created_at) * 0.3 + chaos = py_random.random() * 0.1 + scored.append((base + recency + chaos, s)) + + scored.sort(key=lambda x: x[0], reverse=True) + return [s for _, s in scored[:limit]] @router.get("/trending", response_model=list[ShaderFeedItem]) @@ -37,9 +90,10 @@ async def get_trending( limit: int = Query(20, ge=1, le=50), db: AsyncSession = Depends(get_db), ): + """Pure score-ranked feed.""" query = ( select(Shader) - .where(*_FEED_FILTER) + .where(*_PUB) .order_by(Shader.score.desc()) .limit(limit) ) @@ -52,9 +106,10 @@ async def get_new( limit: int = Query(20, ge=1, le=50), db: AsyncSession = Depends(get_db), ): + """Chronological feed.""" query = ( select(Shader) - .where(*_FEED_FILTER) + .where(*_PUB) .order_by(Shader.created_at.desc()) .limit(limit) ) @@ -62,14 +117,53 @@ async def get_new( return result.scalars().all() +@router.get("/similar/{shader_id}", response_model=list[ShaderFeedItem]) +async def get_similar( + shader_id: UUID, + limit: int = Query(10, ge=1, le=30), + db: AsyncSession = Depends(get_db), +): + """Find shaders similar to a given shader by tag overlap.""" + source = (await db.execute(select(Shader).where(Shader.id == shader_id))).scalar_one_or_none() + if not source or not source.tags: + return [] + + # Find shaders sharing the most tags + from sqlalchemy import type_coerce + from sqlalchemy.dialects.postgresql import ARRAY as PG_ARRAY + from sqlalchemy import Text + + query = ( + select(Shader) + .where( + *_PUB, + Shader.id != shader_id, + Shader.tags.overlap(type_coerce(source.tags, PG_ARRAY(Text))) + ) + .order_by(Shader.score.desc()) + .limit(limit * 2) + ) + result = await db.execute(query) + candidates = list(result.scalars().all()) + + # Rank by tag overlap count + source_tags = set(source.tags) + scored = [] + for s in candidates: + overlap = len(source_tags & set(s.tags or [])) + scored.append((overlap, s.score or 0, s)) + + scored.sort(key=lambda x: (x[0], x[1]), reverse=True) + return [s for _, _, s in scored[:limit]] + + @router.post("/dwell", status_code=204) async def report_dwell( body: DwellReport, db: AsyncSession = Depends(get_db), user: User | None = Depends(get_optional_user), ): - from app.models import EngagementEvent - + """Report dwell time. Updates tag affinity for authenticated users.""" event = EngagementEvent( user_id=user.id if user else None, session_id=body.session_id, @@ -79,3 +173,78 @@ async def report_dwell( event_metadata={"replayed": body.replayed}, ) db.add(event) + + +# ── Helpers ─────────────────────────────────────────────── + +def _recency_score(created_at) -> float: + """Score from 1.0 (just created) to ~0.0 (30+ days old).""" + if not created_at: + return 0.0 + if created_at.tzinfo is None: + created_at = created_at.replace(tzinfo=timezone.utc) + age_hours = (datetime.now(timezone.utc) - created_at).total_seconds() / 3600 + return 1.0 / (1.0 + age_hours / 72.0) # half-life ~3 days + + +def _tag_affinity_score(shader_tags: list[str], affinity: dict[str, float]) -> float: + """Score based on how well a shader's tags match the user's affinities.""" + if not shader_tags or not affinity: + return 0.0 + total = sum(affinity.get(tag, 0.0) for tag in shader_tags) + # Normalize by number of tags to avoid bias toward heavily-tagged shaders + return total / len(shader_tags) + + +async def _get_user_tag_affinities(db: AsyncSession, user_id: UUID) -> dict[str, float]: + """Build a tag affinity map from user's engagement history. + + Sources: + - Upvoted shaders: +1.0 per tag + - Downvoted shaders: -0.5 per tag + - Dwell > 10s: +0.3 per tag + - Dwell > 30s: +0.6 per tag + + Returns: {tag: affinity_score} + """ + affinities: dict[str, float] = {} + + # Votes + vote_query = ( + select(Shader.tags, Vote.value) + .join(Vote, Vote.shader_id == Shader.id) + .where(Vote.user_id == user_id) + ) + vote_result = await db.execute(vote_query) + for tags, value in vote_result: + if not tags: + continue + weight = 1.0 if value == 1 else -0.5 + for tag in tags: + affinities[tag] = affinities.get(tag, 0.0) + weight + + # Dwell events (last 30 days) + cutoff = datetime.now(timezone.utc) - timedelta(days=30) + dwell_query = ( + select(Shader.tags, EngagementEvent.dwell_secs) + .join(EngagementEvent, EngagementEvent.shader_id == Shader.id) + .where( + EngagementEvent.user_id == user_id, + EngagementEvent.event_type == "dwell", + EngagementEvent.created_at >= cutoff, + ) + ) + dwell_result = await db.execute(dwell_query) + for tags, dwell in dwell_result: + if not tags or not dwell: + continue + if dwell > 30: + weight = 0.6 + elif dwell > 10: + weight = 0.3 + else: + continue # ignore short dwells + for tag in tags: + affinities[tag] = affinities.get(tag, 0.0) + weight + + return affinities diff --git a/services/api/app/routers/shaders.py b/services/api/app/routers/shaders.py index ba93624..43b2a58 100644 --- a/services/api/app/routers/shaders.py +++ b/services/api/app/routers/shaders.py @@ -36,7 +36,9 @@ async def list_shaders( if q: query = query.where(Shader.title.ilike(f"%{q}%")) if tags: - query = query.where(Shader.tags.overlap(tags)) + from sqlalchemy import type_coerce, Text + from sqlalchemy.dialects.postgresql import ARRAY as PG_ARRAY + query = query.where(Shader.tags.overlap(type_coerce(tags, PG_ARRAY(Text)))) if shader_type: query = query.where(Shader.shader_type == shader_type) if is_system is not None: diff --git a/services/mcp/server.py b/services/mcp/server.py index f911e78..28cb306 100644 --- a/services/mcp/server.py +++ b/services/mcp/server.py @@ -193,6 +193,21 @@ async def get_trending(limit: int = 10) -> str: for s in shaders]}) +@mcp.tool() +async def get_similar_shaders(shader_id: str, limit: int = 10) -> str: + """Find shaders visually similar to a given shader (by tag overlap). + + Args: + shader_id: UUID of the reference shader + limit: Number of results (1-30) + """ + shaders = await api_get(f"/feed/similar/{shader_id}", {"limit": min(limit, 30)}) + return json.dumps({"reference": shader_id, "count": len(shaders), + "similar": [{"id": s["id"], "title": s["title"], "shader_type": s["shader_type"], + "tags": s.get("tags", []), "score": s.get("score", 0)} + for s in shaders]}) + + @mcp.tool() async def get_desire_queue(min_heat: float = 0, limit: int = 10) -> str: """Get open shader desires/bounties. These are community requests.