perf: Added SearchLog model, Alembic migration 013, Pydantic schemas, f…

- "backend/models.py"
- "backend/schemas.py"
- "backend/routers/search.py"
- "alembic/versions/013_add_search_log.py"

GSD-Task: S01/T01
This commit is contained in:
jlightner 2026-04-03 04:02:55 +00:00
parent dfb10f04b4
commit 4fbc77d10d
4 changed files with 138 additions and 3 deletions

View file

@ -0,0 +1,31 @@
"""Add search_log table for query analytics and popular searches.
Revision ID: 013_add_search_log
Revises: 012_multi_source_fmt
"""
from alembic import op
import sqlalchemy as sa
revision = "013_add_search_log"
down_revision = "012_multi_source_fmt"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"search_log",
sa.Column("id", sa.Integer, primary_key=True, autoincrement=True),
sa.Column("query", sa.String(500), nullable=False),
sa.Column("scope", sa.String(50), nullable=False),
sa.Column("result_count", sa.Integer, nullable=False, server_default="0"),
sa.Column("created_at", sa.TIMESTAMP(), server_default=sa.func.now(), nullable=False),
)
op.create_index("ix_search_log_query", "search_log", ["query"])
op.create_index("ix_search_log_created_at", "search_log", ["created_at"])
def downgrade() -> None:
op.drop_index("ix_search_log_created_at", table_name="search_log")
op.drop_index("ix_search_log_query", table_name="search_log")
op.drop_table("search_log")

View file

@ -395,6 +395,19 @@ class ContentReport(Base):
# ── Pipeline Event ───────────────────────────────────────────────────────────
class SearchLog(Base):
"""Logged search query for analytics and popular searches."""
__tablename__ = "search_log"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
query: Mapped[str] = mapped_column(String(500), nullable=False, index=True)
scope: Mapped[str] = mapped_column(String(50), nullable=False)
result_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
created_at: Mapped[datetime] = mapped_column(
default=_now, server_default=func.now(), index=True
)
class PipelineRunStatus(str, enum.Enum):
"""Status of a pipeline run."""
running = "running"

View file

@ -2,17 +2,22 @@
from __future__ import annotations
import asyncio
import json
import logging
from typing import Annotated
from fastapi import APIRouter, Depends, Query
from sqlalchemy import func, select
from sqlalchemy import func, select, text
from sqlalchemy.ext.asyncio import AsyncSession
from config import get_settings
from database import get_session
from models import Creator, TechniquePage
from database import async_session, get_session
from models import Creator, SearchLog, TechniquePage
from redis_client import get_redis
from schemas import (
PopularSearchesResponse,
PopularSearchItem,
SearchResponse,
SearchResultItem,
SuggestionItem,
@ -24,12 +29,29 @@ logger = logging.getLogger("chrysopedia.search.router")
router = APIRouter(prefix="/search", tags=["search"])
POPULAR_CACHE_KEY = "chrysopedia:popular_searches"
POPULAR_CACHE_TTL = 300 # 5 minutes
def _get_search_service() -> SearchService:
"""Build a SearchService from current settings."""
return SearchService(get_settings())
async def _log_search(query: str, scope: str, result_count: int) -> None:
"""Fire-and-forget: persist a search log row.
Opens its own session so it doesn't interfere with the request session.
Catches all exceptions a logging failure must never break a search request.
"""
try:
async with async_session() as session:
session.add(SearchLog(query=query, scope=scope, result_count=result_count))
await session.commit()
except Exception:
logger.warning("Failed to log search query %r", query, exc_info=True)
@router.get("", response_model=SearchResponse)
async def search(
q: Annotated[str, Query(max_length=500)] = "",
@ -46,6 +68,11 @@ async def search(
"""
svc = _get_search_service()
result = await svc.search(query=q, scope=scope, sort=sort, limit=limit, db=db)
# Fire-and-forget search logging — only non-empty queries
if q.strip():
asyncio.create_task(_log_search(q.strip(), scope, result["total"]))
return SearchResponse(
items=[SearchResultItem(**item) for item in result["items"]],
partial_matches=[SearchResultItem(**item) for item in result.get("partial_matches", [])],
@ -118,3 +145,55 @@ async def suggestions(
_add(name, "creator")
return SuggestionsResponse(suggestions=items)
@router.get("/popular", response_model=PopularSearchesResponse)
async def popular_searches(
db: AsyncSession = Depends(get_session),
) -> PopularSearchesResponse:
"""Return the top 10 search queries from the last 7 days.
Results are cached in Redis for 5 minutes. Falls through to a
direct DB query when Redis is unavailable.
"""
# Try Redis cache first
try:
redis = await get_redis()
cached = await redis.get(POPULAR_CACHE_KEY)
await redis.aclose()
if cached is not None:
items = json.loads(cached)
return PopularSearchesResponse(
items=[PopularSearchItem(**i) for i in items],
cached=True,
)
except Exception:
logger.warning("Redis unavailable for popular searches cache", exc_info=True)
# Cache miss or Redis down — query DB
stmt = (
select(
func.lower(SearchLog.query).label("q"),
func.count().label("cnt"),
)
.where(SearchLog.created_at > func.now() - text("interval '7 days'"))
.group_by(func.lower(SearchLog.query))
.order_by(func.count().desc())
.limit(10)
)
result = await db.execute(stmt)
items = [PopularSearchItem(query=row.q, count=row.cnt) for row in result.all()]
# Write to Redis cache (best-effort)
try:
redis = await get_redis()
await redis.set(
POPULAR_CACHE_KEY,
json.dumps([i.model_dump() for i in items]),
ex=POPULAR_CACHE_TTL,
)
await redis.aclose()
except Exception:
logger.warning("Failed to cache popular searches in Redis", exc_info=True)
return PopularSearchesResponse(items=items, cached=False)

View file

@ -240,6 +240,18 @@ class SuggestionsResponse(BaseModel):
suggestions: list[SuggestionItem] = Field(default_factory=list)
class PopularSearchItem(BaseModel):
"""A single popular search query with occurrence count."""
query: str
count: int
class PopularSearchesResponse(BaseModel):
"""Response for the popular searches endpoint."""
items: list[PopularSearchItem] = Field(default_factory=list)
cached: bool = False
# ── Technique Page Detail ────────────────────────────────────────────────────
class KeyMomentSummary(BaseModel):