From 4fbc77d10d26050b429d7f75f808ab58116dcef5 Mon Sep 17 00:00:00 2001 From: jlightner Date: Fri, 3 Apr 2026 04:02:55 +0000 Subject: [PATCH] =?UTF-8?q?perf:=20Added=20SearchLog=20model,=20Alembic=20?= =?UTF-8?q?migration=20013,=20Pydantic=20schemas,=20f=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - "backend/models.py" - "backend/schemas.py" - "backend/routers/search.py" - "alembic/versions/013_add_search_log.py" GSD-Task: S01/T01 --- alembic/versions/013_add_search_log.py | 31 ++++++++++ backend/models.py | 13 ++++ backend/routers/search.py | 85 +++++++++++++++++++++++++- backend/schemas.py | 12 ++++ 4 files changed, 138 insertions(+), 3 deletions(-) create mode 100644 alembic/versions/013_add_search_log.py diff --git a/alembic/versions/013_add_search_log.py b/alembic/versions/013_add_search_log.py new file mode 100644 index 0000000..599462c --- /dev/null +++ b/alembic/versions/013_add_search_log.py @@ -0,0 +1,31 @@ +"""Add search_log table for query analytics and popular searches. + +Revision ID: 013_add_search_log +Revises: 012_multi_source_fmt +""" +from alembic import op +import sqlalchemy as sa + +revision = "013_add_search_log" +down_revision = "012_multi_source_fmt" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.create_table( + "search_log", + sa.Column("id", sa.Integer, primary_key=True, autoincrement=True), + sa.Column("query", sa.String(500), nullable=False), + sa.Column("scope", sa.String(50), nullable=False), + sa.Column("result_count", sa.Integer, nullable=False, server_default="0"), + sa.Column("created_at", sa.TIMESTAMP(), server_default=sa.func.now(), nullable=False), + ) + op.create_index("ix_search_log_query", "search_log", ["query"]) + op.create_index("ix_search_log_created_at", "search_log", ["created_at"]) + + +def downgrade() -> None: + op.drop_index("ix_search_log_created_at", table_name="search_log") + op.drop_index("ix_search_log_query", table_name="search_log") + op.drop_table("search_log") diff --git a/backend/models.py b/backend/models.py index a2b0f9b..c7c0eea 100644 --- a/backend/models.py +++ b/backend/models.py @@ -395,6 +395,19 @@ class ContentReport(Base): # ── Pipeline Event ─────────────────────────────────────────────────────────── +class SearchLog(Base): + """Logged search query for analytics and popular searches.""" + __tablename__ = "search_log" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + query: Mapped[str] = mapped_column(String(500), nullable=False, index=True) + scope: Mapped[str] = mapped_column(String(50), nullable=False) + result_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + created_at: Mapped[datetime] = mapped_column( + default=_now, server_default=func.now(), index=True + ) + + class PipelineRunStatus(str, enum.Enum): """Status of a pipeline run.""" running = "running" diff --git a/backend/routers/search.py b/backend/routers/search.py index 00bb613..c6471fe 100644 --- a/backend/routers/search.py +++ b/backend/routers/search.py @@ -2,17 +2,22 @@ from __future__ import annotations +import asyncio +import json import logging from typing import Annotated from fastapi import APIRouter, Depends, Query -from sqlalchemy import func, select +from sqlalchemy import func, select, text from sqlalchemy.ext.asyncio import AsyncSession from config import get_settings -from database import get_session -from models import Creator, TechniquePage +from database import async_session, get_session +from models import Creator, SearchLog, TechniquePage +from redis_client import get_redis from schemas import ( + PopularSearchesResponse, + PopularSearchItem, SearchResponse, SearchResultItem, SuggestionItem, @@ -24,12 +29,29 @@ logger = logging.getLogger("chrysopedia.search.router") router = APIRouter(prefix="/search", tags=["search"]) +POPULAR_CACHE_KEY = "chrysopedia:popular_searches" +POPULAR_CACHE_TTL = 300 # 5 minutes + def _get_search_service() -> SearchService: """Build a SearchService from current settings.""" return SearchService(get_settings()) +async def _log_search(query: str, scope: str, result_count: int) -> None: + """Fire-and-forget: persist a search log row. + + Opens its own session so it doesn't interfere with the request session. + Catches all exceptions — a logging failure must never break a search request. + """ + try: + async with async_session() as session: + session.add(SearchLog(query=query, scope=scope, result_count=result_count)) + await session.commit() + except Exception: + logger.warning("Failed to log search query %r", query, exc_info=True) + + @router.get("", response_model=SearchResponse) async def search( q: Annotated[str, Query(max_length=500)] = "", @@ -46,6 +68,11 @@ async def search( """ svc = _get_search_service() result = await svc.search(query=q, scope=scope, sort=sort, limit=limit, db=db) + + # Fire-and-forget search logging — only non-empty queries + if q.strip(): + asyncio.create_task(_log_search(q.strip(), scope, result["total"])) + return SearchResponse( items=[SearchResultItem(**item) for item in result["items"]], partial_matches=[SearchResultItem(**item) for item in result.get("partial_matches", [])], @@ -118,3 +145,55 @@ async def suggestions( _add(name, "creator") return SuggestionsResponse(suggestions=items) + + +@router.get("/popular", response_model=PopularSearchesResponse) +async def popular_searches( + db: AsyncSession = Depends(get_session), +) -> PopularSearchesResponse: + """Return the top 10 search queries from the last 7 days. + + Results are cached in Redis for 5 minutes. Falls through to a + direct DB query when Redis is unavailable. + """ + # Try Redis cache first + try: + redis = await get_redis() + cached = await redis.get(POPULAR_CACHE_KEY) + await redis.aclose() + if cached is not None: + items = json.loads(cached) + return PopularSearchesResponse( + items=[PopularSearchItem(**i) for i in items], + cached=True, + ) + except Exception: + logger.warning("Redis unavailable for popular searches cache", exc_info=True) + + # Cache miss or Redis down — query DB + stmt = ( + select( + func.lower(SearchLog.query).label("q"), + func.count().label("cnt"), + ) + .where(SearchLog.created_at > func.now() - text("interval '7 days'")) + .group_by(func.lower(SearchLog.query)) + .order_by(func.count().desc()) + .limit(10) + ) + result = await db.execute(stmt) + items = [PopularSearchItem(query=row.q, count=row.cnt) for row in result.all()] + + # Write to Redis cache (best-effort) + try: + redis = await get_redis() + await redis.set( + POPULAR_CACHE_KEY, + json.dumps([i.model_dump() for i in items]), + ex=POPULAR_CACHE_TTL, + ) + await redis.aclose() + except Exception: + logger.warning("Failed to cache popular searches in Redis", exc_info=True) + + return PopularSearchesResponse(items=items, cached=False) diff --git a/backend/schemas.py b/backend/schemas.py index e97af61..f18f83e 100644 --- a/backend/schemas.py +++ b/backend/schemas.py @@ -240,6 +240,18 @@ class SuggestionsResponse(BaseModel): suggestions: list[SuggestionItem] = Field(default_factory=list) +class PopularSearchItem(BaseModel): + """A single popular search query with occurrence count.""" + query: str + count: int + + +class PopularSearchesResponse(BaseModel): + """Response for the popular searches endpoint.""" + items: list[PopularSearchItem] = Field(default_factory=list) + cached: bool = False + + # ── Technique Page Detail ──────────────────────────────────────────────────── class KeyMomentSummary(BaseModel):