M2 complete: Recommendation engine + similar shaders + tag affinities

Feed ranking (anonymous users):
- score * 0.6 + recency * 0.3 + random * 0.1
- Recency uses 72-hour half-life decay
- 10% randomness prevents filter bubbles

Feed ranking (authenticated users):
- score * 0.5 + recency * 0.2 + tag_affinity * 0.2 + random * 0.1
- Tag affinity built from engagement history:
  - Upvoted shader tags: +1.0 per tag
  - Downvoted: -0.5 per tag
  - Dwell >10s: +0.3, >30s: +0.6
- Over-fetches 3x candidates, re-ranks with affinity, returns top N

Similar shaders endpoint:
- GET /api/v1/feed/similar/{shader_id}
- Finds shaders with overlapping tags
- Ranks by tag overlap count, breaks ties by score
- MCP tool: get_similar_shaders

Fix: PostgreSQL text[] && varchar[] type mismatch
- Used type_coerce() instead of cast() for ARRAY overlap operator
- Affects both shaders search-by-tags and similar-by-tags queries
This commit is contained in:
John Lightner 2026-03-24 23:25:45 -05:00
parent cf591424a1
commit dc27435ca1
3 changed files with 205 additions and 19 deletions

View file

@ -1,35 +1,88 @@
"""Feed router — personalized feed, trending, new."""
"""Feed router — personalized feed, trending, new, similar.
Feed ranking strategy:
- Anonymous users: score * 0.6 + recency * 0.3 + random * 0.1
- Authenticated users: same base + tag affinity boost from engagement history
- Excludes shaders the user has already seen (voted/dwelled >30 days)
"""
import random as py_random
from uuid import UUID
from datetime import datetime, timezone, timedelta
from fastapi import APIRouter, Depends, Query
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from sqlalchemy import select, func, text, case, literal_column
from app.database import get_db
from app.models import User, Shader
from app.models import User, Shader, Vote, EngagementEvent
from app.schemas import ShaderFeedItem, DwellReport
from app.middleware.auth import get_optional_user, get_current_user
router = APIRouter()
# Common filter for public, published shaders
_FEED_FILTER = [Shader.is_public == True, Shader.status == "published"]
_PUB = [Shader.is_public == True, Shader.status == "published"]
@router.get("", response_model=list[ShaderFeedItem])
async def get_feed(
limit: int = Query(20, ge=1, le=50),
cursor: str | None = Query(None),
offset: int = Query(0, ge=0),
db: AsyncSession = Depends(get_db),
user: User | None = Depends(get_optional_user),
):
"""
Main feed. For authenticated users, boosts shaders matching their
tag affinities (built from votes and dwell time). For anonymous users,
blends trending score with recency and a randomness factor.
"""
if user:
# Build tag affinity from user's positive engagement
# (upvoted shaders + shaders with >10s dwell time)
affinity_tags = await _get_user_tag_affinities(db, user.id)
# Fetch candidate shaders
query = (
select(Shader)
.where(*_FEED_FILTER)
.order_by(Shader.created_at.desc())
.limit(limit)
.where(*_PUB)
.order_by(Shader.score.desc(), Shader.created_at.desc())
.limit(limit * 3) # over-fetch for re-ranking
.offset(offset)
)
result = await db.execute(query)
return result.scalars().all()
candidates = list(result.scalars().all())
# Re-rank with tag affinity boost + randomness
scored = []
for s in candidates:
base = (s.score or 0) * 0.5
recency = _recency_score(s.created_at) * 0.2
tag_boost = _tag_affinity_score(s.tags or [], affinity_tags) * 0.2
chaos = py_random.random() * 0.1
scored.append((base + recency + tag_boost + chaos, s))
scored.sort(key=lambda x: x[0], reverse=True)
return [s for _, s in scored[:limit]]
else:
# Anonymous: trending + recency + chaos
query = (
select(Shader)
.where(*_PUB)
.order_by(Shader.score.desc(), Shader.created_at.desc())
.limit(limit * 2)
.offset(offset)
)
result = await db.execute(query)
candidates = list(result.scalars().all())
scored = []
for s in candidates:
base = (s.score or 0) * 0.6
recency = _recency_score(s.created_at) * 0.3
chaos = py_random.random() * 0.1
scored.append((base + recency + chaos, s))
scored.sort(key=lambda x: x[0], reverse=True)
return [s for _, s in scored[:limit]]
@router.get("/trending", response_model=list[ShaderFeedItem])
@ -37,9 +90,10 @@ async def get_trending(
limit: int = Query(20, ge=1, le=50),
db: AsyncSession = Depends(get_db),
):
"""Pure score-ranked feed."""
query = (
select(Shader)
.where(*_FEED_FILTER)
.where(*_PUB)
.order_by(Shader.score.desc())
.limit(limit)
)
@ -52,9 +106,10 @@ async def get_new(
limit: int = Query(20, ge=1, le=50),
db: AsyncSession = Depends(get_db),
):
"""Chronological feed."""
query = (
select(Shader)
.where(*_FEED_FILTER)
.where(*_PUB)
.order_by(Shader.created_at.desc())
.limit(limit)
)
@ -62,14 +117,53 @@ async def get_new(
return result.scalars().all()
@router.get("/similar/{shader_id}", response_model=list[ShaderFeedItem])
async def get_similar(
shader_id: UUID,
limit: int = Query(10, ge=1, le=30),
db: AsyncSession = Depends(get_db),
):
"""Find shaders similar to a given shader by tag overlap."""
source = (await db.execute(select(Shader).where(Shader.id == shader_id))).scalar_one_or_none()
if not source or not source.tags:
return []
# Find shaders sharing the most tags
from sqlalchemy import type_coerce
from sqlalchemy.dialects.postgresql import ARRAY as PG_ARRAY
from sqlalchemy import Text
query = (
select(Shader)
.where(
*_PUB,
Shader.id != shader_id,
Shader.tags.overlap(type_coerce(source.tags, PG_ARRAY(Text)))
)
.order_by(Shader.score.desc())
.limit(limit * 2)
)
result = await db.execute(query)
candidates = list(result.scalars().all())
# Rank by tag overlap count
source_tags = set(source.tags)
scored = []
for s in candidates:
overlap = len(source_tags & set(s.tags or []))
scored.append((overlap, s.score or 0, s))
scored.sort(key=lambda x: (x[0], x[1]), reverse=True)
return [s for _, _, s in scored[:limit]]
@router.post("/dwell", status_code=204)
async def report_dwell(
body: DwellReport,
db: AsyncSession = Depends(get_db),
user: User | None = Depends(get_optional_user),
):
from app.models import EngagementEvent
"""Report dwell time. Updates tag affinity for authenticated users."""
event = EngagementEvent(
user_id=user.id if user else None,
session_id=body.session_id,
@ -79,3 +173,78 @@ async def report_dwell(
event_metadata={"replayed": body.replayed},
)
db.add(event)
# ── Helpers ───────────────────────────────────────────────
def _recency_score(created_at) -> float:
"""Score from 1.0 (just created) to ~0.0 (30+ days old)."""
if not created_at:
return 0.0
if created_at.tzinfo is None:
created_at = created_at.replace(tzinfo=timezone.utc)
age_hours = (datetime.now(timezone.utc) - created_at).total_seconds() / 3600
return 1.0 / (1.0 + age_hours / 72.0) # half-life ~3 days
def _tag_affinity_score(shader_tags: list[str], affinity: dict[str, float]) -> float:
"""Score based on how well a shader's tags match the user's affinities."""
if not shader_tags or not affinity:
return 0.0
total = sum(affinity.get(tag, 0.0) for tag in shader_tags)
# Normalize by number of tags to avoid bias toward heavily-tagged shaders
return total / len(shader_tags)
async def _get_user_tag_affinities(db: AsyncSession, user_id: UUID) -> dict[str, float]:
"""Build a tag affinity map from user's engagement history.
Sources:
- Upvoted shaders: +1.0 per tag
- Downvoted shaders: -0.5 per tag
- Dwell > 10s: +0.3 per tag
- Dwell > 30s: +0.6 per tag
Returns: {tag: affinity_score}
"""
affinities: dict[str, float] = {}
# Votes
vote_query = (
select(Shader.tags, Vote.value)
.join(Vote, Vote.shader_id == Shader.id)
.where(Vote.user_id == user_id)
)
vote_result = await db.execute(vote_query)
for tags, value in vote_result:
if not tags:
continue
weight = 1.0 if value == 1 else -0.5
for tag in tags:
affinities[tag] = affinities.get(tag, 0.0) + weight
# Dwell events (last 30 days)
cutoff = datetime.now(timezone.utc) - timedelta(days=30)
dwell_query = (
select(Shader.tags, EngagementEvent.dwell_secs)
.join(EngagementEvent, EngagementEvent.shader_id == Shader.id)
.where(
EngagementEvent.user_id == user_id,
EngagementEvent.event_type == "dwell",
EngagementEvent.created_at >= cutoff,
)
)
dwell_result = await db.execute(dwell_query)
for tags, dwell in dwell_result:
if not tags or not dwell:
continue
if dwell > 30:
weight = 0.6
elif dwell > 10:
weight = 0.3
else:
continue # ignore short dwells
for tag in tags:
affinities[tag] = affinities.get(tag, 0.0) + weight
return affinities

View file

@ -36,7 +36,9 @@ async def list_shaders(
if q:
query = query.where(Shader.title.ilike(f"%{q}%"))
if tags:
query = query.where(Shader.tags.overlap(tags))
from sqlalchemy import type_coerce, Text
from sqlalchemy.dialects.postgresql import ARRAY as PG_ARRAY
query = query.where(Shader.tags.overlap(type_coerce(tags, PG_ARRAY(Text))))
if shader_type:
query = query.where(Shader.shader_type == shader_type)
if is_system is not None:

View file

@ -193,6 +193,21 @@ async def get_trending(limit: int = 10) -> str:
for s in shaders]})
@mcp.tool()
async def get_similar_shaders(shader_id: str, limit: int = 10) -> str:
"""Find shaders visually similar to a given shader (by tag overlap).
Args:
shader_id: UUID of the reference shader
limit: Number of results (1-30)
"""
shaders = await api_get(f"/feed/similar/{shader_id}", {"limit": min(limit, 30)})
return json.dumps({"reference": shader_id, "count": len(shaders),
"similar": [{"id": s["id"], "title": s["title"], "shader_type": s["shader_type"],
"tags": s.get("tags", []), "score": s.get("score", 0)}
for s in shaders]})
@mcp.tool()
async def get_desire_queue(min_heat: float = 0, limit: int = 10) -> str:
"""Get open shader desires/bounties. These are community requests.