fractafrag/services/api/app/routers/feed.py
John Lightner dc27435ca1 M2 complete: Recommendation engine + similar shaders + tag affinities
Feed ranking (anonymous users):
- score * 0.6 + recency * 0.3 + random * 0.1
- Recency uses 72-hour half-life decay
- 10% randomness prevents filter bubbles

Feed ranking (authenticated users):
- score * 0.5 + recency * 0.2 + tag_affinity * 0.2 + random * 0.1
- Tag affinity built from engagement history:
  - Upvoted shader tags: +1.0 per tag
  - Downvoted: -0.5 per tag
  - Dwell >10s: +0.3, >30s: +0.6
- Over-fetches 3x candidates, re-ranks with affinity, returns top N

Similar shaders endpoint:
- GET /api/v1/feed/similar/{shader_id}
- Finds shaders with overlapping tags
- Ranks by tag overlap count, breaks ties by score
- MCP tool: get_similar_shaders

Fix: PostgreSQL text[] && varchar[] type mismatch
- Used type_coerce() instead of cast() for ARRAY overlap operator
- Affects both shaders search-by-tags and similar-by-tags queries
2026-03-24 23:25:45 -05:00

250 lines
8.1 KiB
Python

"""Feed router — personalized feed, trending, new, similar.
Feed ranking strategy:
- Anonymous users: score * 0.6 + recency * 0.3 + random * 0.1
- Authenticated users: same base + tag affinity boost from engagement history
- Excludes shaders the user has already seen (voted/dwelled >30 days)
"""
import random as py_random
from uuid import UUID
from datetime import datetime, timezone, timedelta
from fastapi import APIRouter, Depends, Query
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, func, text, case, literal_column
from app.database import get_db
from app.models import User, Shader, Vote, EngagementEvent
from app.schemas import ShaderFeedItem, DwellReport
from app.middleware.auth import get_optional_user, get_current_user
router = APIRouter()
_PUB = [Shader.is_public == True, Shader.status == "published"]
@router.get("", response_model=list[ShaderFeedItem])
async def get_feed(
limit: int = Query(20, ge=1, le=50),
offset: int = Query(0, ge=0),
db: AsyncSession = Depends(get_db),
user: User | None = Depends(get_optional_user),
):
"""
Main feed. For authenticated users, boosts shaders matching their
tag affinities (built from votes and dwell time). For anonymous users,
blends trending score with recency and a randomness factor.
"""
if user:
# Build tag affinity from user's positive engagement
# (upvoted shaders + shaders with >10s dwell time)
affinity_tags = await _get_user_tag_affinities(db, user.id)
# Fetch candidate shaders
query = (
select(Shader)
.where(*_PUB)
.order_by(Shader.score.desc(), Shader.created_at.desc())
.limit(limit * 3) # over-fetch for re-ranking
.offset(offset)
)
result = await db.execute(query)
candidates = list(result.scalars().all())
# Re-rank with tag affinity boost + randomness
scored = []
for s in candidates:
base = (s.score or 0) * 0.5
recency = _recency_score(s.created_at) * 0.2
tag_boost = _tag_affinity_score(s.tags or [], affinity_tags) * 0.2
chaos = py_random.random() * 0.1
scored.append((base + recency + tag_boost + chaos, s))
scored.sort(key=lambda x: x[0], reverse=True)
return [s for _, s in scored[:limit]]
else:
# Anonymous: trending + recency + chaos
query = (
select(Shader)
.where(*_PUB)
.order_by(Shader.score.desc(), Shader.created_at.desc())
.limit(limit * 2)
.offset(offset)
)
result = await db.execute(query)
candidates = list(result.scalars().all())
scored = []
for s in candidates:
base = (s.score or 0) * 0.6
recency = _recency_score(s.created_at) * 0.3
chaos = py_random.random() * 0.1
scored.append((base + recency + chaos, s))
scored.sort(key=lambda x: x[0], reverse=True)
return [s for _, s in scored[:limit]]
@router.get("/trending", response_model=list[ShaderFeedItem])
async def get_trending(
limit: int = Query(20, ge=1, le=50),
db: AsyncSession = Depends(get_db),
):
"""Pure score-ranked feed."""
query = (
select(Shader)
.where(*_PUB)
.order_by(Shader.score.desc())
.limit(limit)
)
result = await db.execute(query)
return result.scalars().all()
@router.get("/new", response_model=list[ShaderFeedItem])
async def get_new(
limit: int = Query(20, ge=1, le=50),
db: AsyncSession = Depends(get_db),
):
"""Chronological feed."""
query = (
select(Shader)
.where(*_PUB)
.order_by(Shader.created_at.desc())
.limit(limit)
)
result = await db.execute(query)
return result.scalars().all()
@router.get("/similar/{shader_id}", response_model=list[ShaderFeedItem])
async def get_similar(
shader_id: UUID,
limit: int = Query(10, ge=1, le=30),
db: AsyncSession = Depends(get_db),
):
"""Find shaders similar to a given shader by tag overlap."""
source = (await db.execute(select(Shader).where(Shader.id == shader_id))).scalar_one_or_none()
if not source or not source.tags:
return []
# Find shaders sharing the most tags
from sqlalchemy import type_coerce
from sqlalchemy.dialects.postgresql import ARRAY as PG_ARRAY
from sqlalchemy import Text
query = (
select(Shader)
.where(
*_PUB,
Shader.id != shader_id,
Shader.tags.overlap(type_coerce(source.tags, PG_ARRAY(Text)))
)
.order_by(Shader.score.desc())
.limit(limit * 2)
)
result = await db.execute(query)
candidates = list(result.scalars().all())
# Rank by tag overlap count
source_tags = set(source.tags)
scored = []
for s in candidates:
overlap = len(source_tags & set(s.tags or []))
scored.append((overlap, s.score or 0, s))
scored.sort(key=lambda x: (x[0], x[1]), reverse=True)
return [s for _, _, s in scored[:limit]]
@router.post("/dwell", status_code=204)
async def report_dwell(
body: DwellReport,
db: AsyncSession = Depends(get_db),
user: User | None = Depends(get_optional_user),
):
"""Report dwell time. Updates tag affinity for authenticated users."""
event = EngagementEvent(
user_id=user.id if user else None,
session_id=body.session_id,
shader_id=body.shader_id,
event_type="dwell",
dwell_secs=body.dwell_secs,
event_metadata={"replayed": body.replayed},
)
db.add(event)
# ── Helpers ───────────────────────────────────────────────
def _recency_score(created_at) -> float:
"""Score from 1.0 (just created) to ~0.0 (30+ days old)."""
if not created_at:
return 0.0
if created_at.tzinfo is None:
created_at = created_at.replace(tzinfo=timezone.utc)
age_hours = (datetime.now(timezone.utc) - created_at).total_seconds() / 3600
return 1.0 / (1.0 + age_hours / 72.0) # half-life ~3 days
def _tag_affinity_score(shader_tags: list[str], affinity: dict[str, float]) -> float:
"""Score based on how well a shader's tags match the user's affinities."""
if not shader_tags or not affinity:
return 0.0
total = sum(affinity.get(tag, 0.0) for tag in shader_tags)
# Normalize by number of tags to avoid bias toward heavily-tagged shaders
return total / len(shader_tags)
async def _get_user_tag_affinities(db: AsyncSession, user_id: UUID) -> dict[str, float]:
"""Build a tag affinity map from user's engagement history.
Sources:
- Upvoted shaders: +1.0 per tag
- Downvoted shaders: -0.5 per tag
- Dwell > 10s: +0.3 per tag
- Dwell > 30s: +0.6 per tag
Returns: {tag: affinity_score}
"""
affinities: dict[str, float] = {}
# Votes
vote_query = (
select(Shader.tags, Vote.value)
.join(Vote, Vote.shader_id == Shader.id)
.where(Vote.user_id == user_id)
)
vote_result = await db.execute(vote_query)
for tags, value in vote_result:
if not tags:
continue
weight = 1.0 if value == 1 else -0.5
for tag in tags:
affinities[tag] = affinities.get(tag, 0.0) + weight
# Dwell events (last 30 days)
cutoff = datetime.now(timezone.utc) - timedelta(days=30)
dwell_query = (
select(Shader.tags, EngagementEvent.dwell_secs)
.join(EngagementEvent, EngagementEvent.shader_id == Shader.id)
.where(
EngagementEvent.user_id == user_id,
EngagementEvent.event_type == "dwell",
EngagementEvent.created_at >= cutoff,
)
)
dwell_result = await db.execute(dwell_query)
for tags, dwell in dwell_result:
if not tags or not dwell:
continue
if dwell > 30:
weight = 0.6
elif dwell > 10:
weight = 0.3
else:
continue # ignore short dwells
for tag in tags:
affinities[tag] = affinities.get(tag, 0.0) + weight
return affinities