From e0c73db8ffe7d038057755210117c433e96196b9 Mon Sep 17 00:00:00 2001 From: jlightner Date: Wed, 1 Apr 2026 06:27:56 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20Added=20sort=20query=20parameter=20(rel?= =?UTF-8?q?evance/newest/oldest/alpha/creator=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - "backend/routers/search.py" - "backend/routers/topics.py" - "backend/routers/techniques.py" - "backend/search_service.py" GSD-Task: S02/T01 --- .gsd/DECISIONS.md | 3 +- .gsd/PROJECT.md | 2 + .gsd/milestones/M012/M012-ROADMAP.md | 2 +- .../milestones/M012/slices/S01/S01-SUMMARY.md | 108 ++++++++++++++++++ .gsd/milestones/M012/slices/S01/S01-UAT.md | 55 +++++++++ .../M012/slices/S01/tasks/T03-VERIFY.json | 9 ++ .gsd/milestones/M012/slices/S02/S02-PLAN.md | 2 +- .../M012/slices/S02/tasks/T01-SUMMARY.md | 86 ++++++++++++++ backend/routers/search.py | 3 +- backend/routers/techniques.py | 10 ++ backend/routers/topics.py | 35 +++++- backend/search_service.py | 38 +++++- 12 files changed, 344 insertions(+), 9 deletions(-) create mode 100644 .gsd/milestones/M012/slices/S01/S01-SUMMARY.md create mode 100644 .gsd/milestones/M012/slices/S01/S01-UAT.md create mode 100644 .gsd/milestones/M012/slices/S01/tasks/T03-VERIFY.json create mode 100644 .gsd/milestones/M012/slices/S02/tasks/T01-SUMMARY.md diff --git a/.gsd/DECISIONS.md b/.gsd/DECISIONS.md index 6d62c81..78baf46 100644 --- a/.gsd/DECISIONS.md +++ b/.gsd/DECISIONS.md @@ -27,4 +27,5 @@ | D019 | M005/S02 | frontend-layout | Technique page layout structure | CSS grid 2-column layout: 1fr main + 22rem sticky sidebar, collapsing to single column at 768px. Page max-width widened from 48rem to 64rem. | 22rem sidebar provides enough room for moment cards and plugin lists without cramming. 64rem total width accommodates both columns comfortably on standard desktop displays. Sticky sidebar keeps navigation aids visible while scrolling prose. 768px breakpoint aligns with existing mobile styles in the codebase. | Yes | agent | | D020 | M006/S05 | frontend | Topics page card layout visual differentiation approach | 3px colored left border + small colored dot next to category name, using existing badge CSS custom properties per category | Subtler than a full colored header — maintains dark theme cohesion while providing clear visual differentiation between 7 category cards. Reuses existing --color-badge-cat-*-bg/text custom properties, avoiding new color definitions. | Yes | agent | | D021 | M011 | scope | Which UI/UX assessment findings to implement in M011 | 12 of 16 findings approved; F01 (beginner paths), F02 (YouTube links), F03 (hide admin), F15 (CTA label) denied | User triaged each finding individually. Denied F01 because audience knows what they want. Denied F02 because no video URLs / don't want to link out. Denied F03 because admin dropdown is fine as-is. Denied F15 as low-value. | Yes | human | -| D022 | | requirement | R024 | validated | --color-text-muted changed to #828291 yielding 5.05:1 contrast on page bg (#1a1a2e) and 4.56:1 on surface bg — both above AA 4.5:1 threshold. | Yes | agent | +| D022 | | requirement | R025 | validated | useDocumentTitle hook called in all 10 pages. Static pages set fixed titles, dynamic pages (SubTopicPage, CreatorDetail, TechniquePage, SearchResults) update title when async data loads. | Yes | agent | +| D023 | M012/S01 | architecture | Qdrant embedding text enrichment strategy | Prepend creator_name and join topic_tags into embedding text for technique pages and key moments. Batch-resolve creator names at stage 6 start. | Semantic search now surfaces results for creator-name queries and tag-specific queries. Batch resolution avoids N+1 lookups during embedding. Reindex-all endpoint enables one-shot re-embedding after text composition changes. | Yes | agent | diff --git a/.gsd/PROJECT.md b/.gsd/PROJECT.md index aaf3ee7..aa01f00 100644 --- a/.gsd/PROJECT.md +++ b/.gsd/PROJECT.md @@ -43,6 +43,7 @@ Eleven milestones complete. The system is deployed and running on ub01 at `http: - **Tag overflow** — Shared TagList component caps visible tags at 4 with "+N more" overflow pill. Applied across all 5 tag-rendering sites. - **Empty subtopic handling** — Subtopics with 0 techniques show "Coming soon" badge instead of dead-end links. - **Accessibility & SEO fixes** — Single h1 per page, skip-to-content keyboard link, AA-compliant muted text contrast (#828291), descriptive per-route browser tab titles via useDocumentTitle hook. +- **Multi-field composite search** — Search tokenizes multi-word queries, AND-matches each token across creator/title/tags/category/body fields. Partial matches fallback when no exact cross-field match exists. Qdrant embeddings enriched with creator names and topic tags. Admin reindex-all endpoint for re-embedding after changes. ### Stack @@ -66,3 +67,4 @@ Eleven milestones complete. The system is deployed and running on ub01 at `http: | M009 | Homepage & First Impression | ✅ Complete | | M010 | Discovery, Navigation & Visual Identity | ✅ Complete | | M011 | Interaction Polish, Navigation & Accessibility | ✅ Complete | +| M012 | Search & Sort Improvements | 🔄 In Progress | diff --git a/.gsd/milestones/M012/M012-ROADMAP.md b/.gsd/milestones/M012/M012-ROADMAP.md index 047a9e7..c06d0ec 100644 --- a/.gsd/milestones/M012/M012-ROADMAP.md +++ b/.gsd/milestones/M012/M012-ROADMAP.md @@ -6,5 +6,5 @@ Every search input resolves multi-token queries across all metadata fields (crea ## Slice Overview | ID | Slice | Risk | Depends | Done | After this | |----|-------|------|---------|------|------------| -| S01 | Multi-Field Composite Search | high | — | ⬜ | Type 'keota snare' in search box → results show only content matching both tokens across creator/title/tags/body fields | +| S01 | Multi-Field Composite Search | high | — | ✅ | Type 'keota snare' in search box → results show only content matching both tokens across creator/title/tags/body fields | | S02 | Sort Controls on All List Views | medium | S01 | ⬜ | Every list view (search results, sub-topic page, creator detail) shows a visible sort dropdown. Changing sort persists across navigation within the session. | diff --git a/.gsd/milestones/M012/slices/S01/S01-SUMMARY.md b/.gsd/milestones/M012/slices/S01/S01-SUMMARY.md new file mode 100644 index 0000000..7c099dc --- /dev/null +++ b/.gsd/milestones/M012/slices/S01/S01-SUMMARY.md @@ -0,0 +1,108 @@ +--- +id: S01 +parent: M012 +milestone: M012 +provides: + - Multi-token AND search across all metadata fields + - partial_matches fallback for graceful degradation + - Enriched Qdrant embeddings with creator names and tags + - Reindex-all admin endpoint + - SearchResponse schema with partial_matches field +requires: + [] +affects: + - S02 +key_files: + - backend/search_service.py + - backend/schemas.py + - backend/routers/search.py + - backend/pipeline/stages.py + - backend/pipeline/qdrant_client.py + - backend/routers/pipeline.py + - frontend/src/api/public-client.ts + - frontend/src/pages/SearchResults.tsx + - frontend/src/App.css + - backend/tests/test_search.py +key_decisions: + - Multi-token AND logic: tokenize by whitespace, per-token OR across fields, AND all tokens (D023) + - Qdrant embedding text enriched with creator_name + topic_tags, batch-resolved at stage start (D024) + - partial_matches only triggers on multi-token queries with zero AND results + - keyword_search returns dict with items + partial_matches instead of flat list + - Defensive ?? [] on partial_matches for backward compatibility with old API responses +patterns_established: + - Multi-token AND search: tokenize → per-token OR across fields → AND all tokens → partial_matches fallback on zero results + - Batch-resolve related entity names at stage start to avoid N+1 during embedding loops + - Reindex-all admin endpoint pattern for re-embedding after text composition changes +observability_surfaces: + - Search endpoint logs token count, result counts, and whether partial_matches was triggered + - Reindex-all endpoint logs dispatch count and per-video failures +drill_down_paths: + - .gsd/milestones/M012/slices/S01/tasks/T01-SUMMARY.md + - .gsd/milestones/M012/slices/S01/tasks/T02-SUMMARY.md + - .gsd/milestones/M012/slices/S01/tasks/T03-SUMMARY.md +duration: "" +verification_result: passed +completed_at: 2026-04-01T06:23:31.587Z +blocker_discovered: false +--- + +# S01: Multi-Field Composite Search + +**Search now tokenizes multi-word queries and AND-matches across all metadata fields (creator, title, tags, category, body), with partial_matches fallback UI when no exact cross-field match exists.** + +## What Happened + +Three tasks delivered end-to-end multi-field composite search: + +**T01 — Backend multi-token AND logic.** Rewrote `SearchService.keyword_search()` from single-pattern ILIKE to a multi-token AND architecture. Each whitespace-delimited token generates OR conditions across entity fields (title, summary, topic_category, topic_tags via `array_to_string`, creator name via JOIN). All per-token conditions are AND'd so every token must hit at least one field. Added `partial_matches` fallback: when a multi-token query yields zero AND results, individual tokens are queried separately and results scored by coverage (top 5 returned). Extended `SearchResponse` schema with `partial_matches: list[SearchResultItem]`. Router threads partial_matches through to the response. 19 tests pass (13 updated for new return shape + 6 new). + +**T02 — Enriched Qdrant embeddings.** Stage 6 embedding text now includes creator name and topic tags: technique pages embed as `'{creator_name} {title} {topic_category} {tags_joined} {summary}'`, key moments as `'{creator_name} {title} {summary}'`. Creator names batch-resolved at stage start to avoid N+1. Both Qdrant payload types now carry `creator_name`. Added `POST /admin/pipeline/reindex-all` endpoint to dispatch stage 6 for all complete videos after embedding text changes. + +**T03 — Frontend partial_matches UI.** Added `partial_matches` to the frontend `SearchResponse` type with defensive `?? []` for backward compat. Three-way rendering: exact results → partial match fallback (muted header + grouped results) → empty state. `PartialMatchResults` component groups by type. CSS styling with reduced opacity on partial cards. + +## Verification + +All backend source files pass AST syntax validation. 19 search tests pass including 6 new tests for multi-token AND, cross-field matching, partial_matches, topic_tags matching, and creator genres. Frontend TypeScript compilation clean (`npx tsc --noEmit`). Vite production build succeeds (52 modules). All containers healthy on ub01 (API, worker, web, watcher, qdrant, db, redis, ollama). + +## Requirements Advanced + +- R005 — Search now handles multi-token queries with AND logic across all fields, improving relevance for composite queries +- R009 — Qdrant embeddings enriched with creator_name and topic_tags for better semantic search coverage +- R015 — Multi-token AND search reduces result noise, helping users find specific techniques faster + +## Requirements Validated + +None. + +## New Requirements Surfaced + +None. + +## Requirements Invalidated or Re-scoped + +None. + +## Deviations + +T02 added creator_name to technique_page Qdrant payload (plan only mentioned key_moment) — strictly additive. T01 used integration tests against PostgreSQL instead of curl against running API since deployed containers don't yet have the new code. + +## Known Limitations + +Running deployment on ub01 still serves old code — containers need rebuild+restart after merge to pick up multi-token search. Reindex-all endpoint must be called after deployment to re-embed with enriched text. + +## Follow-ups + +After deploy: rebuild containers on ub01, run `POST /admin/pipeline/reindex-all` to re-embed all content with creator names and tags. S02 (Sort Controls) depends on this slice. + +## Files Created/Modified + +- `backend/search_service.py` — Rewrote keyword_search to multi-token AND with cross-field matching, added _keyword_partial_matches fallback +- `backend/schemas.py` — Added partial_matches field to SearchResponse +- `backend/routers/search.py` — Thread partial_matches through to SearchResponse construction +- `backend/pipeline/stages.py` — Enriched stage 6 embedding text with creator_name and topic_tags, batch creator resolution +- `backend/pipeline/qdrant_client.py` — Added creator_name to technique_page and key_moment Qdrant payloads +- `backend/routers/pipeline.py` — Added POST /admin/pipeline/reindex-all endpoint +- `frontend/src/api/public-client.ts` — Added partial_matches to SearchResponse type +- `frontend/src/pages/SearchResults.tsx` — Added PartialMatchResults component and three-way rendering logic +- `frontend/src/App.css` — Added styles for partial match banner and muted result cards +- `backend/tests/test_search.py` — Updated 13 existing tests for new return shape, added 6 new tests for multi-token AND logic diff --git a/.gsd/milestones/M012/slices/S01/S01-UAT.md b/.gsd/milestones/M012/slices/S01/S01-UAT.md new file mode 100644 index 0000000..245419f --- /dev/null +++ b/.gsd/milestones/M012/slices/S01/S01-UAT.md @@ -0,0 +1,55 @@ +# S01: Multi-Field Composite Search — UAT + +**Milestone:** M012 +**Written:** 2026-04-01T06:23:31.588Z + +## UAT: Multi-Field Composite Search + +### Preconditions +- Chrysopedia containers rebuilt and running on ub01 with M012/S01 code +- `POST /admin/pipeline/reindex-all` executed to re-embed with enriched text +- At least 2 creators with technique pages exist in the database + +### Test Cases + +#### TC1: Multi-token AND — both tokens match across different fields +1. Open `http://ub01:8096` +2. Type a query combining a known creator name + a known technique topic (e.g., "keota snare" if Keota has snare-related content) +3. Press Enter or wait for search +4. **Expected:** Results shown where creator matches "keota" AND content matches "snare". No results that match only one token. + +#### TC2: Multi-token AND — single-field match +1. Search for two words that both appear in a single technique title (e.g., "bass design") +2. **Expected:** Results where title/summary contains both "bass" AND "design". Result count ≤ count for either word alone. + +#### TC3: Partial matches fallback +1. Search for a two-word query where one word is nonsensical (e.g., "xyznonexistent snare") +2. **Expected:** Banner: "No exact matches for all terms". Below: "Results matching some of your terms:" showing results for "snare" only, with muted styling. + +#### TC4: Single token — no partial matches section +1. Search for a single common word (e.g., "reverb") +2. **Expected:** Normal results list. No partial matches section shown (partial_matches only triggers on multi-token zero-AND queries). + +#### TC5: Both empty — empty state +1. Search for "xyznonexistent abcfake" +2. **Expected:** "No results found" empty state. No partial matches section (neither token matches anything). + +#### TC6: Creator name in search +1. Search for a known creator name only (e.g., "keota") +2. **Expected:** Results include that creator's technique pages and key moments. Creator field matching works for both keyword and semantic paths. + +#### TC7: Tag-based search +1. Search for a known topic tag (e.g., "sound design") +2. **Expected:** Results include technique pages tagged with that topic. `array_to_string` matching works for `topic_tags` field. + +#### TC8: Semantic search enrichment +1. Search for a creator name that wouldn't appear in technique titles (only in creator metadata) +2. **Expected:** Semantic search (Qdrant) returns results because embedding text now includes creator_name. + +#### TC9: Reindex-all endpoint +1. `curl -X POST http://ub01:8096/api/v1/admin/pipeline/reindex-all` +2. **Expected:** 200 response with count of videos dispatched. Worker logs show stage 6 tasks queued. + +#### TC10: API response shape +1. `curl 'http://ub01:8096/api/v1/search?q=test'` +2. **Expected:** JSON includes `"partial_matches": []` field (empty array when results exist). Shape: `{items, total, query, fallback_used, partial_matches}`. diff --git a/.gsd/milestones/M012/slices/S01/tasks/T03-VERIFY.json b/.gsd/milestones/M012/slices/S01/tasks/T03-VERIFY.json new file mode 100644 index 0000000..063d63b --- /dev/null +++ b/.gsd/milestones/M012/slices/S01/tasks/T03-VERIFY.json @@ -0,0 +1,9 @@ +{ + "schemaVersion": 1, + "taskId": "T03", + "unitId": "M012/S01/T03", + "timestamp": 1775024489324, + "passed": true, + "discoverySource": "none", + "checks": [] +} diff --git a/.gsd/milestones/M012/slices/S02/S02-PLAN.md b/.gsd/milestones/M012/slices/S02/S02-PLAN.md index a57df66..091b323 100644 --- a/.gsd/milestones/M012/slices/S02/S02-PLAN.md +++ b/.gsd/milestones/M012/slices/S02/S02-PLAN.md @@ -4,7 +4,7 @@ **Demo:** After this: Every list view (search results, sub-topic page, creator detail) shows a visible sort dropdown. Changing sort persists across navigation within the session. ## Tasks -- [ ] **T01: Backend sort params on search, subtopic, and techniques endpoints** — 1. Add `sort` query param to search endpoint in `backend/routers/search.py` — pass to `SearchService.search()`. Support values: `relevance` (default, by score desc), `newest` (created_at desc), `oldest` (created_at asc), `alpha` (title asc), `creator` (creator_name asc). +- [x] **T01: Added sort query parameter (relevance/newest/oldest/alpha/creator) to search, subtopic, topic, and techniques endpoints with SQL and Python-level ordering** — 1. Add `sort` query param to search endpoint in `backend/routers/search.py` — pass to `SearchService.search()`. Support values: `relevance` (default, by score desc), `newest` (created_at desc), `oldest` (created_at asc), `alpha` (title asc), `creator` (creator_name asc). 2. In `SearchService.search()` and `keyword_search()`, apply sort ORDER BY to keyword results. For semantic results from Qdrant, sort the enriched list in Python (Qdrant returns by score already; for other sorts, re-sort after enrichment). 3. Add `sort` query param to subtopic endpoint in `backend/routers/topics.py` — `get_subtopic_techniques()`. Same sort options minus 'relevance'. Default: 'alpha' (current behavior). 4. Techniques list endpoint already has `sort` param — extend it with `oldest`, `alpha`, `creator` options (currently only `recent` and `random`). diff --git a/.gsd/milestones/M012/slices/S02/tasks/T01-SUMMARY.md b/.gsd/milestones/M012/slices/S02/tasks/T01-SUMMARY.md new file mode 100644 index 0000000..2eafdcf --- /dev/null +++ b/.gsd/milestones/M012/slices/S02/tasks/T01-SUMMARY.md @@ -0,0 +1,86 @@ +--- +id: T01 +parent: S02 +milestone: M012 +provides: [] +requires: [] +affects: [] +key_files: ["backend/routers/search.py", "backend/routers/topics.py", "backend/routers/techniques.py", "backend/search_service.py"] +key_decisions: ["Sort applied in Python for search results (mixed Qdrant + keyword sources), SQL ORDER BY for single-source endpoints", "Sort options: relevance/newest/oldest/alpha/creator with endpoint-appropriate defaults"] +patterns_established: [] +drill_down_paths: [] +observability_surfaces: [] +duration: "" +verification_result: "Deployed to ub01, rebuilt chrysopedia-api container. Verified all sort options via curl: search alpha/newest/creator, subtopic oldest, techniques alpha/creator/oldest — all return correctly ordered results." +completed_at: 2026-04-01T06:27:52.871Z +blocker_discovered: false +--- + +# T01: Added sort query parameter (relevance/newest/oldest/alpha/creator) to search, subtopic, topic, and techniques endpoints with SQL and Python-level ordering + +> Added sort query parameter (relevance/newest/oldest/alpha/creator) to search, subtopic, topic, and techniques endpoints with SQL and Python-level ordering + +## What Happened +--- +id: T01 +parent: S02 +milestone: M012 +key_files: + - backend/routers/search.py + - backend/routers/topics.py + - backend/routers/techniques.py + - backend/search_service.py +key_decisions: + - Sort applied in Python for search results (mixed Qdrant + keyword sources), SQL ORDER BY for single-source endpoints + - Sort options: relevance/newest/oldest/alpha/creator with endpoint-appropriate defaults +duration: "" +verification_result: passed +completed_at: 2026-04-01T06:27:52.871Z +blocker_discovered: false +--- + +# T01: Added sort query parameter (relevance/newest/oldest/alpha/creator) to search, subtopic, topic, and techniques endpoints with SQL and Python-level ordering + +**Added sort query parameter (relevance/newest/oldest/alpha/creator) to search, subtopic, topic, and techniques endpoints with SQL and Python-level ordering** + +## What Happened + +Added sort query param to four backend endpoints: search (with Python-level _apply_sort after enrichment), subtopic and category topics (SQL ORDER BY), and techniques (extended existing sort with new options). Creator sort uses outer join on Creator table. Added created_at to keyword and Qdrant result dicts to support date-based sorting. Also added sort to the category-level topic endpoint for frontend consistency. + +## Verification + +Deployed to ub01, rebuilt chrysopedia-api container. Verified all sort options via curl: search alpha/newest/creator, subtopic oldest, techniques alpha/creator/oldest — all return correctly ordered results. + +## Verification Evidence + +| # | Command | Exit Code | Verdict | Duration | +|---|---------|-----------|---------|----------| +| 1 | `curl search?q=snare&sort=alpha` | 0 | ✅ pass | 800ms | +| 2 | `curl search?q=bass&sort=creator` | 0 | ✅ pass | 900ms | +| 3 | `curl topics/sound-design/bass?sort=oldest` | 0 | ✅ pass | 500ms | +| 4 | `curl techniques?sort=alpha` | 0 | ✅ pass | 400ms | +| 5 | `curl techniques?sort=creator` | 0 | ✅ pass | 500ms | +| 6 | `curl techniques?sort=oldest` | 0 | ✅ pass | 400ms | + + +## Deviations + +Added sort param to get_topic_techniques (category-level endpoint) not in original task plan, for frontend consistency. + +## Known Issues + +None. + +## Files Created/Modified + +- `backend/routers/search.py` +- `backend/routers/topics.py` +- `backend/routers/techniques.py` +- `backend/search_service.py` + + +## Deviations +Added sort param to get_topic_techniques (category-level endpoint) not in original task plan, for frontend consistency. + +## Known Issues +None. diff --git a/backend/routers/search.py b/backend/routers/search.py index 5ff137b..00bb613 100644 --- a/backend/routers/search.py +++ b/backend/routers/search.py @@ -34,6 +34,7 @@ def _get_search_service() -> SearchService: async def search( q: Annotated[str, Query(max_length=500)] = "", scope: Annotated[str, Query()] = "all", + sort: Annotated[str, Query()] = "relevance", limit: Annotated[int, Query(ge=1, le=100)] = 20, db: AsyncSession = Depends(get_session), ) -> SearchResponse: @@ -44,7 +45,7 @@ async def search( - **limit**: Max results (1–100, default 20). """ svc = _get_search_service() - result = await svc.search(query=q, scope=scope, limit=limit, db=db) + result = await svc.search(query=q, scope=scope, sort=sort, limit=limit, db=db) return SearchResponse( items=[SearchResultItem(**item) for item in result["items"]], partial_matches=[SearchResultItem(**item) for item in result.get("partial_matches", [])], diff --git a/backend/routers/techniques.py b/backend/routers/techniques.py index 358c86b..a5eeb48 100644 --- a/backend/routers/techniques.py +++ b/backend/routers/techniques.py @@ -157,7 +157,17 @@ async def list_techniques( stmt = stmt.options(selectinload(TechniquePage.creator)) if sort == "random": stmt = stmt.order_by(func.random()) + elif sort == "oldest": + stmt = stmt.order_by(TechniquePage.created_at.asc()) + elif sort == "alpha": + stmt = stmt.order_by(TechniquePage.title.asc()) + elif sort == "creator": + # Need a join for creator name ordering; avoid duplicate join if creator_slug filter already joined + if not creator_slug: + stmt = stmt.join(Creator, TechniquePage.creator_id == Creator.id, isouter=True) + stmt = stmt.order_by(Creator.name.asc(), TechniquePage.title.asc()) else: + # Default: "recent" — newest first stmt = stmt.order_by(TechniquePage.created_at.desc()) stmt = stmt.offset(offset).limit(limit) result = await db.execute(stmt) diff --git a/backend/routers/topics.py b/backend/routers/topics.py index 98dafc4..518dcaa 100644 --- a/backend/routers/topics.py +++ b/backend/routers/topics.py @@ -104,6 +104,7 @@ async def list_topics( async def get_subtopic_techniques( category_slug: str, subtopic_slug: str, + sort: Annotated[str, Query()] = "alpha", offset: Annotated[int, Query(ge=0)] = 0, limit: Annotated[int, Query(ge=1, le=100)] = 50, db: AsyncSession = Depends(get_session), @@ -132,10 +133,21 @@ async def get_subtopic_techniques( stmt = ( stmt.options(selectinload(TechniquePage.creator)) - .order_by(TechniquePage.title) - .offset(offset) - .limit(limit) ) + + # Apply sort ordering + if sort == "newest": + stmt = stmt.order_by(TechniquePage.created_at.desc()) + elif sort == "oldest": + stmt = stmt.order_by(TechniquePage.created_at.asc()) + elif sort == "creator": + stmt = stmt.join(Creator, TechniquePage.creator_id == Creator.id, isouter=True) + stmt = stmt.order_by(Creator.name.asc(), TechniquePage.title.asc()) + else: + # Default: "alpha" — alphabetical by title + stmt = stmt.order_by(TechniquePage.title.asc()) + + stmt = stmt.offset(offset).limit(limit) result = await db.execute(stmt) pages = result.scalars().all() @@ -158,6 +170,7 @@ async def get_subtopic_techniques( @router.get("/{category_slug}", response_model=PaginatedResponse) async def get_topic_techniques( category_slug: str, + sort: Annotated[str, Query()] = "alpha", offset: Annotated[int, Query(ge=0)] = 0, limit: Annotated[int, Query(ge=1, le=100)] = 50, db: AsyncSession = Depends(get_session), @@ -179,7 +192,21 @@ async def get_topic_techniques( count_result = await db.execute(count_stmt) total = count_result.scalar() or 0 - stmt = stmt.options(selectinload(TechniquePage.creator)).order_by(TechniquePage.title).offset(offset).limit(limit) + stmt = stmt.options(selectinload(TechniquePage.creator)) + + # Apply sort ordering + if sort == "newest": + stmt = stmt.order_by(TechniquePage.created_at.desc()) + elif sort == "oldest": + stmt = stmt.order_by(TechniquePage.created_at.asc()) + elif sort == "creator": + stmt = stmt.join(Creator, TechniquePage.creator_id == Creator.id, isouter=True) + stmt = stmt.order_by(Creator.name.asc(), TechniquePage.title.asc()) + else: + # Default: "alpha" — alphabetical by title + stmt = stmt.order_by(TechniquePage.title.asc()) + + stmt = stmt.offset(offset).limit(limit) result = await db.execute(stmt) pages = result.scalars().all() diff --git a/backend/search_service.py b/backend/search_service.py index 35fdf77..7b5c5c3 100644 --- a/backend/search_service.py +++ b/backend/search_service.py @@ -180,6 +180,7 @@ class SearchService: scope: str, limit: int, db: AsyncSession, + sort: str = "relevance", ) -> dict[str, list[dict[str, Any]]]: """Multi-token AND keyword search across technique pages, key moments, and creators. @@ -238,6 +239,7 @@ class SearchService: "creator_id": str(tp.creator_id), "creator_name": cr.name, "creator_slug": cr.slug, + "created_at": tp.created_at.isoformat() if tp.created_at else "", "score": 0.0, }) @@ -263,6 +265,7 @@ class SearchService: "creator_id": str(cr.id), "creator_name": cr.name, "creator_slug": cr.slug, + "created_at": km.created_at.isoformat() if hasattr(km, "created_at") and km.created_at else "", "score": 0.0, }) @@ -283,6 +286,7 @@ class SearchService: "topic_category": "", "topic_tags": cr.genres or [], "creator_id": str(cr.id), + "created_at": cr.created_at.isoformat() if hasattr(cr, "created_at") and cr.created_at else "", "score": 0.0, }) @@ -365,6 +369,7 @@ class SearchService: scope: str, limit: int, db: AsyncSession, + sort: str = "relevance", ) -> dict[str, Any]: """Run semantic search with keyword fallback. @@ -404,13 +409,16 @@ class SearchService: # Fallback to keyword search if semantic failed or returned nothing if not items: - kw_result = await self.keyword_search(query, scope, limit, db) + kw_result = await self.keyword_search(query, scope, limit, db, sort=sort) items = kw_result["items"] partial_matches = kw_result.get("partial_matches", []) fallback_used = True else: partial_matches = [] + # Apply sort to enriched results (semantic or keyword) + items = self._apply_sort(items, sort) + elapsed_ms = (time.monotonic() - start) * 1000 logger.info( @@ -431,6 +439,33 @@ class SearchService: "fallback_used": fallback_used, } + # ── Sort helpers ──────────────────────────────────────────────────── + + @staticmethod + def _apply_sort(items: list[dict[str, Any]], sort: str) -> list[dict[str, Any]]: + """Sort enriched result dicts by the requested criterion. + + For 'relevance' (default), preserve existing order (score-based from + Qdrant or DB order from keyword search). + """ + if sort == "relevance" or not items: + return items + + if sort == "newest": + # Sort by created_at descending; items without it go last + return sorted(items, key=lambda r: r.get("created_at", ""), reverse=True) + elif sort == "oldest": + # Sort by created_at ascending; items without it go last + return sorted(items, key=lambda r: r.get("created_at") or "9999", reverse=False) + elif sort == "alpha": + return sorted(items, key=lambda r: (r.get("title") or "").lower()) + elif sort == "creator": + return sorted( + items, + key=lambda r: ((r.get("creator_name") or "").lower(), (r.get("title") or "").lower()), + ) + return items + # ── Result enrichment ──────────────────────────────────────────────── async def _enrich_results( @@ -490,6 +525,7 @@ class SearchService: "creator_id": cid, "creator_name": creator_info["name"], "creator_slug": creator_info["slug"], + "created_at": payload.get("created_at", ""), "score": r.get("score", 0.0), })