M2 complete: Recommendation engine + similar shaders + tag affinities

Feed ranking (anonymous users): - score * 0.6 + recency * 0.3 + random * 0.1 - Recency uses 72-hour half-life decay - 10% randomness prevents filter bubbles Feed ranking (authenticated users): - score * 0.5 + recency * 0.2 + tag_affinity * 0.2 + random * 0.1 - Tag affinity built from engagement history: - Upvoted shader tags: +1.0 per tag - Downvoted: -0.5 per tag - Dwell >10s: +0.3, >30s: +0.6 - Over-fetches 3x candidates, re-ranks with affinity, returns top N Similar shaders endpoint: - GET /api/v1/feed/similar/{shader_id} - Finds shaders with overlapping tags - Ranks by tag overlap count, breaks ties by score - MCP tool: get_similar_shaders Fix: PostgreSQL text[] && varchar[] type mismatch - Used type_coerce() instead of cast() for ARRAY overlap operator - Affects both shaders search-by-tags and similar-by-tags queries
2026-03-24 23:25:45 -05:00 · 2026-03-24 23:25:45 -05:00 · dc27435ca1
commit dc27435ca1
parent cf591424a1
3 changed files with 205 additions and 19 deletions
--- a/services/api/app/routers/feed.py
+++ b/services/api/app/routers/feed.py
@ -1,35 +1,88 @@
-"""Feed router — personalized feed, trending, new."""
+"""Feed router — personalized feed, trending, new, similar.

+Feed ranking strategy:
+- Anonymous users: score * 0.6 + recency * 0.3 + random * 0.1
+- Authenticated users: same base + tag affinity boost from engagement history
+- Excludes shaders the user has already seen (voted/dwelled >30 days)
+"""
+
+import random as py_random
+from uuid import UUID
+from datetime import datetime, timezone, timedelta
 from fastapi import APIRouter, Depends, Query
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy import select
+from sqlalchemy import select, func, text, case, literal_column

 from app.database import get_db
-from app.models import User, Shader
+from app.models import User, Shader, Vote, EngagementEvent
 from app.schemas import ShaderFeedItem, DwellReport
 from app.middleware.auth import get_optional_user, get_current_user

 router = APIRouter()

-# Common filter for public, published shaders
-_FEED_FILTER = [Shader.is_public == True, Shader.status == "published"]
+_PUB = [Shader.is_public == True, Shader.status == "published"]


@router.get("", response_model=list[ShaderFeedItem])
 async def get_feed(
    limit: int = Query(20, ge=1, le=50),
-    cursor: str | None = Query(None),
+    offset: int = Query(0, ge=0),
    db: AsyncSession = Depends(get_db),
    user: User | None = Depends(get_optional_user),
 ):
+    """
+    Main feed. For authenticated users, boosts shaders matching their
+    tag affinities (built from votes and dwell time). For anonymous users,
+    blends trending score with recency and a randomness factor.
+    """
+    if user:
+        # Build tag affinity from user's positive engagement
+        # (upvoted shaders + shaders with >10s dwell time)
+        affinity_tags = await _get_user_tag_affinities(db, user.id)
+
+        # Fetch candidate shaders
        query = (
            select(Shader)
-        .where(*_FEED_FILTER)
-        .order_by(Shader.created_at.desc())
-        .limit(limit)
+            .where(*_PUB)
+            .order_by(Shader.score.desc(), Shader.created_at.desc())
+            .limit(limit * 3)  # over-fetch for re-ranking
+            .offset(offset)
        )
        result = await db.execute(query)
-    return result.scalars().all()
+        candidates = list(result.scalars().all())
+
+        # Re-rank with tag affinity boost + randomness
+        scored = []
+        for s in candidates:
+            base = (s.score or 0) * 0.5
+            recency = _recency_score(s.created_at) * 0.2
+            tag_boost = _tag_affinity_score(s.tags or [], affinity_tags) * 0.2
+            chaos = py_random.random() * 0.1
+            scored.append((base + recency + tag_boost + chaos, s))
+
+        scored.sort(key=lambda x: x[0], reverse=True)
+        return [s for _, s in scored[:limit]]
+    else:
+        # Anonymous: trending + recency + chaos
+        query = (
+            select(Shader)
+            .where(*_PUB)
+            .order_by(Shader.score.desc(), Shader.created_at.desc())
+            .limit(limit * 2)
+            .offset(offset)
+        )
+        result = await db.execute(query)
+        candidates = list(result.scalars().all())
+
+        scored = []
+        for s in candidates:
+            base = (s.score or 0) * 0.6
+            recency = _recency_score(s.created_at) * 0.3
+            chaos = py_random.random() * 0.1
+            scored.append((base + recency + chaos, s))
+
+        scored.sort(key=lambda x: x[0], reverse=True)
+        return [s for _, s in scored[:limit]]


@router.get("/trending", response_model=list[ShaderFeedItem])
@ -37,9 +90,10 @@ async def get_trending(
    limit: int = Query(20, ge=1, le=50),
    db: AsyncSession = Depends(get_db),
 ):
+    """Pure score-ranked feed."""
    query = (
        select(Shader)
-        .where(*_FEED_FILTER)
+        .where(*_PUB)
        .order_by(Shader.score.desc())
        .limit(limit)
    )
@ -52,9 +106,10 @@ async def get_new(
    limit: int = Query(20, ge=1, le=50),
    db: AsyncSession = Depends(get_db),
 ):
+    """Chronological feed."""
    query = (
        select(Shader)
-        .where(*_FEED_FILTER)
+        .where(*_PUB)
        .order_by(Shader.created_at.desc())
        .limit(limit)
    )
@ -62,14 +117,53 @@ async def get_new(
    return result.scalars().all()


+@router.get("/similar/{shader_id}", response_model=list[ShaderFeedItem])
+async def get_similar(
+    shader_id: UUID,
+    limit: int = Query(10, ge=1, le=30),
+    db: AsyncSession = Depends(get_db),
+):
+    """Find shaders similar to a given shader by tag overlap."""
+    source = (await db.execute(select(Shader).where(Shader.id == shader_id))).scalar_one_or_none()
+    if not source or not source.tags:
+        return []
+
+    # Find shaders sharing the most tags
+    from sqlalchemy import type_coerce
+    from sqlalchemy.dialects.postgresql import ARRAY as PG_ARRAY
+    from sqlalchemy import Text
+
+    query = (
+        select(Shader)
+        .where(
+            *_PUB,
+            Shader.id != shader_id,
+            Shader.tags.overlap(type_coerce(source.tags, PG_ARRAY(Text)))
+        )
+        .order_by(Shader.score.desc())
+        .limit(limit * 2)
+    )
+    result = await db.execute(query)
+    candidates = list(result.scalars().all())
+
+    # Rank by tag overlap count
+    source_tags = set(source.tags)
+    scored = []
+    for s in candidates:
+        overlap = len(source_tags & set(s.tags or []))
+        scored.append((overlap, s.score or 0, s))
+
+    scored.sort(key=lambda x: (x[0], x[1]), reverse=True)
+    return [s for _, _, s in scored[:limit]]
+
+
@router.post("/dwell", status_code=204)
 async def report_dwell(
    body: DwellReport,
    db: AsyncSession = Depends(get_db),
    user: User | None = Depends(get_optional_user),
 ):
-    from app.models import EngagementEvent
-
+    """Report dwell time. Updates tag affinity for authenticated users."""
    event = EngagementEvent(
        user_id=user.id if user else None,
        session_id=body.session_id,
@ -79,3 +173,78 @@ async def report_dwell(
        event_metadata={"replayed": body.replayed},
    )
    db.add(event)
+
+
+# ── Helpers ───────────────────────────────────────────────
+
+def _recency_score(created_at) -> float:
+    """Score from 1.0 (just created) to ~0.0 (30+ days old)."""
+    if not created_at:
+        return 0.0
+    if created_at.tzinfo is None:
+        created_at = created_at.replace(tzinfo=timezone.utc)
+    age_hours = (datetime.now(timezone.utc) - created_at).total_seconds() / 3600
+    return 1.0 / (1.0 + age_hours / 72.0)  # half-life ~3 days
+
+
+def _tag_affinity_score(shader_tags: list[str], affinity: dict[str, float]) -> float:
+    """Score based on how well a shader's tags match the user's affinities."""
+    if not shader_tags or not affinity:
+        return 0.0
+    total = sum(affinity.get(tag, 0.0) for tag in shader_tags)
+    # Normalize by number of tags to avoid bias toward heavily-tagged shaders
+    return total / len(shader_tags)
+
+
+async def _get_user_tag_affinities(db: AsyncSession, user_id: UUID) -> dict[str, float]:
+    """Build a tag affinity map from user's engagement history.
+
+    Sources:
+    - Upvoted shaders: +1.0 per tag
+    - Downvoted shaders: -0.5 per tag
+    - Dwell > 10s: +0.3 per tag
+    - Dwell > 30s: +0.6 per tag
+
+    Returns: {tag: affinity_score}
+    """
+    affinities: dict[str, float] = {}
+
+    # Votes
+    vote_query = (
+        select(Shader.tags, Vote.value)
+        .join(Vote, Vote.shader_id == Shader.id)
+        .where(Vote.user_id == user_id)
+    )
+    vote_result = await db.execute(vote_query)
+    for tags, value in vote_result:
+        if not tags:
+            continue
+        weight = 1.0 if value == 1 else -0.5
+        for tag in tags:
+            affinities[tag] = affinities.get(tag, 0.0) + weight
+
+    # Dwell events (last 30 days)
+    cutoff = datetime.now(timezone.utc) - timedelta(days=30)
+    dwell_query = (
+        select(Shader.tags, EngagementEvent.dwell_secs)
+        .join(EngagementEvent, EngagementEvent.shader_id == Shader.id)
+        .where(
+            EngagementEvent.user_id == user_id,
+            EngagementEvent.event_type == "dwell",
+            EngagementEvent.created_at >= cutoff,
+        )
+    )
+    dwell_result = await db.execute(dwell_query)
+    for tags, dwell in dwell_result:
+        if not tags or not dwell:
+            continue
+        if dwell > 30:
+            weight = 0.6
+        elif dwell > 10:
+            weight = 0.3
+        else:
+            continue  # ignore short dwells
+        for tag in tags:
+            affinities[tag] = affinities.get(tag, 0.0) + weight
+
+    return affinities
--- a/services/api/app/routers/shaders.py
+++ b/services/api/app/routers/shaders.py
@ -36,7 +36,9 @@ async def list_shaders(
    if q:
        query = query.where(Shader.title.ilike(f"%{q}%"))
    if tags:
-        query = query.where(Shader.tags.overlap(tags))
+        from sqlalchemy import type_coerce, Text
+        from sqlalchemy.dialects.postgresql import ARRAY as PG_ARRAY
+        query = query.where(Shader.tags.overlap(type_coerce(tags, PG_ARRAY(Text))))
    if shader_type:
        query = query.where(Shader.shader_type == shader_type)
    if is_system is not None:
--- a/services/mcp/server.py
+++ b/services/mcp/server.py
@ -193,6 +193,21 @@ async def get_trending(limit: int = 10) -> str:
                                    for s in shaders]})


+@mcp.tool()
+async def get_similar_shaders(shader_id: str, limit: int = 10) -> str:
+    """Find shaders visually similar to a given shader (by tag overlap).
+
+    Args:
+        shader_id: UUID of the reference shader
+        limit: Number of results (1-30)
+    """
+    shaders = await api_get(f"/feed/similar/{shader_id}", {"limit": min(limit, 30)})
+    return json.dumps({"reference": shader_id, "count": len(shaders),
+                        "similar": [{"id": s["id"], "title": s["title"], "shader_type": s["shader_type"],
+                                     "tags": s.get("tags", []), "score": s.get("score", 0)}
+                                    for s in shaders]})
+
+
@mcp.tool()
 async def get_desire_queue(min_heat: float = 0, limit: int = 10) -> str:
    """Get open shader desires/bounties. These are community requests.