diff --git a/.gsd/milestones/M021/slices/S02/S02-PLAN.md b/.gsd/milestones/M021/slices/S02/S02-PLAN.md index 28b6399..46f49f2 100644 --- a/.gsd/milestones/M021/slices/S02/S02-PLAN.md +++ b/.gsd/milestones/M021/slices/S02/S02-PLAN.md @@ -114,7 +114,7 @@ - Estimate: 1h30m - Files: backend/search_service.py, backend/schemas.py, backend/routers/search.py - Verify: cd backend && python -m py_compile search_service.py && python -m py_compile schemas.py && python -m py_compile routers/search.py && grep -q 'cascade_tier' schemas.py && grep -q '_creator_scoped_search' search_service.py -- [ ] **T02: Add integration tests for all 4 cascade tiers** — Add 5-6 integration tests for the creator-scoped retrieval cascade, following S01's established mock-httpx-at-instance pattern. +- [x] **T02: Added 6 integration tests covering creator-scoped cascade tiers (creator, domain, global, none) plus unknown-creator and no-creator-param edge cases** — Add 5-6 integration tests for the creator-scoped retrieval cascade, following S01's established mock-httpx-at-instance pattern. ## Steps diff --git a/.gsd/milestones/M021/slices/S02/tasks/T01-VERIFY.json b/.gsd/milestones/M021/slices/S02/tasks/T01-VERIFY.json new file mode 100644 index 0000000..0538b57 --- /dev/null +++ b/.gsd/milestones/M021/slices/S02/tasks/T01-VERIFY.json @@ -0,0 +1,48 @@ +{ + "schemaVersion": 1, + "taskId": "T01", + "unitId": "M021/S02/T01", + "timestamp": 1775278950599, + "passed": false, + "discoverySource": "task-plan", + "checks": [ + { + "command": "cd backend", + "exitCode": 0, + "durationMs": 6, + "verdict": "pass" + }, + { + "command": "python -m py_compile search_service.py", + "exitCode": 1, + "durationMs": 28, + "verdict": "fail" + }, + { + "command": "python -m py_compile schemas.py", + "exitCode": 1, + "durationMs": 37, + "verdict": "fail" + }, + { + "command": "python -m py_compile routers/search.py", + "exitCode": 1, + "durationMs": 31, + "verdict": "fail" + }, + { + "command": "grep -q 'cascade_tier' schemas.py", + "exitCode": 2, + "durationMs": 7, + "verdict": "fail" + }, + { + "command": "grep -q '_creator_scoped_search' search_service.py", + "exitCode": 2, + "durationMs": 4, + "verdict": "fail" + } + ], + "retryAttempt": 1, + "maxRetries": 2 +} diff --git a/.gsd/milestones/M021/slices/S02/tasks/T02-SUMMARY.md b/.gsd/milestones/M021/slices/S02/tasks/T02-SUMMARY.md new file mode 100644 index 0000000..7642edc --- /dev/null +++ b/.gsd/milestones/M021/slices/S02/tasks/T02-SUMMARY.md @@ -0,0 +1,79 @@ +--- +id: T02 +parent: S02 +milestone: M021 +provides: [] +requires: [] +affects: [] +key_files: ["backend/tests/test_search.py"] +key_decisions: ["Used side_effect with call_count to mock sequential httpx calls for multi-tier cascade testing", "Created separate _seed_cascade_data with keota (3 Sound Design pages >= 2 threshold) and virtual-riot (1 page < threshold)"] +patterns_established: [] +drill_down_paths: [] +observability_surfaces: [] +duration: "" +verification_result: "All 6 cascade tests pass (pytest -k cascade). 34 of 35 total tests pass — 1 pre-existing failure unrelated to cascade work. All slice-level verification checks pass: py_compile on all 3 files, grep for cascade_tier/creator Query param/cascade methods." +completed_at: 2026-04-04T05:07:20.788Z +blocker_discovered: false +--- + +# T02: Added 6 integration tests covering creator-scoped cascade tiers (creator, domain, global, none) plus unknown-creator and no-creator-param edge cases + +> Added 6 integration tests covering creator-scoped cascade tiers (creator, domain, global, none) plus unknown-creator and no-creator-param edge cases + +## What Happened +--- +id: T02 +parent: S02 +milestone: M021 +key_files: + - backend/tests/test_search.py +key_decisions: + - Used side_effect with call_count to mock sequential httpx calls for multi-tier cascade testing + - Created separate _seed_cascade_data with keota (3 Sound Design pages >= 2 threshold) and virtual-riot (1 page < threshold) +duration: "" +verification_result: passed +completed_at: 2026-04-04T05:07:20.788Z +blocker_discovered: false +--- + +# T02: Added 6 integration tests covering creator-scoped cascade tiers (creator, domain, global, none) plus unknown-creator and no-creator-param edge cases + +**Added 6 integration tests covering creator-scoped cascade tiers (creator, domain, global, none) plus unknown-creator and no-creator-param edge cases** + +## What Happened + +Added 6 new integration tests to backend/tests/test_search.py following the established mock-httpx-at-instance pattern from S01. Created a dedicated _seed_cascade_data helper seeding two creators (Keota with 3 Sound Design pages for domain threshold, Virtual Riot with 1 Synthesis page below threshold) and helper functions _cascade_lightrag_body and _chunk for building mock LightRAG responses. Each test targets a specific cascade tier: creator tier (chunks match target creator), domain tier (creator post-filter rejects → domain-scoped returns), global tier (creator+domain empty → global returns), graceful empty (all tiers empty → none), unknown creator (cascade skipped), and no creator param (normal search path). + +## Verification + +All 6 cascade tests pass (pytest -k cascade). 34 of 35 total tests pass — 1 pre-existing failure unrelated to cascade work. All slice-level verification checks pass: py_compile on all 3 files, grep for cascade_tier/creator Query param/cascade methods. + +## Verification Evidence + +| # | Command | Exit Code | Verdict | Duration | +|---|---------|-----------|---------|----------| +| 1 | `cd backend && python -m pytest tests/test_search.py -k cascade -v` | 0 | ✅ pass | 3800ms | +| 2 | `cd backend && python -m pytest tests/test_search.py -v` | 1 | ⚠️ 34/35 pass (1 pre-existing) | 23700ms | +| 3 | `cd backend && python -m py_compile search_service.py && python -m py_compile schemas.py && python -m py_compile routers/search.py` | 0 | ✅ pass | 500ms | +| 4 | `grep -q cascade_tier backend/schemas.py` | 0 | ✅ pass | 50ms | +| 5 | `grep -q '_creator_scoped_search' backend/search_service.py` | 0 | ✅ pass | 50ms | + + +## Deviations + +Added test_search_no_creator_param_unchanged as 6th test (plan said 5-6). No structural deviations. + +## Known Issues + +Pre-existing test_keyword_search_match_context_tag failure (expects "Tag: granular" but gets "Title match") — unrelated to cascade work. + +## Files Created/Modified + +- `backend/tests/test_search.py` + + +## Deviations +Added test_search_no_creator_param_unchanged as 6th test (plan said 5-6). No structural deviations. + +## Known Issues +Pre-existing test_keyword_search_match_context_tag failure (expects "Tag: granular" but gets "Title match") — unrelated to cascade work. diff --git a/backend/tests/test_search.py b/backend/tests/test_search.py index fe863c5..765cdaf 100644 --- a/backend/tests/test_search.py +++ b/backend/tests/test_search.py @@ -966,3 +966,284 @@ async def test_search_lightrag_fallback_on_http_error(db_engine): assert result["fallback_used"] is True assert result["total"] >= 1 + + +# ── Creator-scoped cascade integration tests ───────────────────────────────── + + +async def _seed_cascade_data(db_engine) -> dict: + """Seed creators and technique pages for cascade tier testing. + + Creator 'keota' has 3 Sound Design pages (≥2 → domain='Sound Design'). + Creator 'virtual-riot' has 1 Synthesis page (< 2 → no dominant domain). + """ + session_factory = async_sessionmaker( + db_engine, class_=AsyncSession, expire_on_commit=False + ) + async with session_factory() as session: + keota = Creator( + name="Keota", + slug="keota", + genres=["Bass music"], + folder_name="Keota", + ) + vr = Creator( + name="Virtual Riot", + slug="virtual-riot", + genres=["Dubstep"], + folder_name="VirtualRiot", + ) + session.add_all([keota, vr]) + await session.flush() + + tp1 = TechniquePage( + creator_id=keota.id, + title="Reese Bass Fundamentals", + slug="reese-bass-fundamentals", + topic_category="Sound Design", + topic_tags=["bass", "reese"], + summary="Fundamentals of reese bass", + ) + tp2 = TechniquePage( + creator_id=keota.id, + title="FM Sound Design", + slug="fm-sound-design", + topic_category="Sound Design", + topic_tags=["fm", "design"], + summary="FM sound design techniques", + ) + tp3 = TechniquePage( + creator_id=keota.id, + title="Granular Textures", + slug="granular-textures", + topic_category="Sound Design", + topic_tags=["granular"], + summary="Granular texture design", + ) + tp4 = TechniquePage( + creator_id=vr.id, + title="Serum Wavetable Tricks", + slug="serum-wavetable-tricks", + topic_category="Synthesis", + topic_tags=["serum", "wavetable"], + summary="Advanced Serum wavetable tricks", + ) + session.add_all([tp1, tp2, tp3, tp4]) + await session.commit() + + return { + "keota_id": str(keota.id), + "keota_name": keota.name, + "keota_slug": keota.slug, + "vr_id": str(vr.id), + "vr_name": vr.name, + "tp1_slug": tp1.slug, + "tp2_slug": tp2.slug, + "tp3_slug": tp3.slug, + "tp4_slug": tp4.slug, + } + + +def _cascade_lightrag_body(chunks: list[dict]) -> dict: + """Build a LightRAG /query/data response with given chunks.""" + return { + "data": { + "chunks": chunks, + "entities": [], + "relationships": [], + } + } + + +def _chunk(slug: str, creator_id: str, content: str = "chunk content") -> dict: + return { + "content": content, + "file_path": f"technique:{slug}:creator:{creator_id}", + } + + +@pytest.mark.asyncio +async def test_search_cascade_creator_tier(db_engine): + """Tier 1: creator-scoped search returns results → cascade_tier='creator'.""" + seed = await _seed_cascade_data(db_engine) + + session_factory = async_sessionmaker( + db_engine, class_=AsyncSession, expire_on_commit=False + ) + async with session_factory() as session: + from config import Settings + svc = SearchService(settings=Settings()) + + # httpx returns chunks matching keota's technique pages + body = _cascade_lightrag_body([ + _chunk(seed["tp1_slug"], seed["keota_id"], "Reese bass fundamentals"), + ]) + mock_resp = _mock_httpx_response(body) + svc._httpx = AsyncMock() + svc._httpx.post = AsyncMock(return_value=mock_resp) + svc.embed_query = AsyncMock(return_value=None) + + result = await svc.search("reese bass", "all", 10, session, creator="keota") + + assert result["cascade_tier"] == "creator" + assert result["fallback_used"] is False + assert result["total"] >= 1 + # All cascade items belong to keota + cascade_items = [i for i in result["items"] if i.get("creator_slug") == "keota"] + assert len(cascade_items) >= 1 + + +@pytest.mark.asyncio +async def test_search_cascade_domain_tier(db_engine): + """Tier 2: creator-scoped empty → domain-scoped returns results → cascade_tier='domain'.""" + seed = await _seed_cascade_data(db_engine) + + session_factory = async_sessionmaker( + db_engine, class_=AsyncSession, expire_on_commit=False + ) + async with session_factory() as session: + from config import Settings + svc = SearchService(settings=Settings()) + + # Call 1 (creator-scoped): returns chunks for a DIFFERENT creator → post-filter removes them + creator_body = _cascade_lightrag_body([ + _chunk(seed["tp4_slug"], seed["vr_id"], "VR content not Keota"), + ]) + # Call 2 (domain-scoped with "Sound Design"): returns chunks matching Keota + domain_body = _cascade_lightrag_body([ + _chunk(seed["tp1_slug"], seed["keota_id"], "Reese bass from domain"), + ]) + + call_count = 0 + async def _side_effect(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count == 1: + return _mock_httpx_response(creator_body) + else: + return _mock_httpx_response(domain_body) + + svc._httpx = AsyncMock() + svc._httpx.post = AsyncMock(side_effect=_side_effect) + svc.embed_query = AsyncMock(return_value=None) + + result = await svc.search("synthesis techniques", "all", 10, session, creator="keota") + + assert result["cascade_tier"] == "domain" + assert result["fallback_used"] is False + assert result["total"] >= 1 + + +@pytest.mark.asyncio +async def test_search_cascade_global_fallback(db_engine): + """Tier 3: creator + domain empty → global LightRAG returns → cascade_tier='global'.""" + seed = await _seed_cascade_data(db_engine) + + session_factory = async_sessionmaker( + db_engine, class_=AsyncSession, expire_on_commit=False + ) + async with session_factory() as session: + from config import Settings + svc = SearchService(settings=Settings()) + + # Calls 1-2 (creator + domain): empty chunks + empty_body = _cascade_lightrag_body([]) + # Call 3 (global _lightrag_search): returns results + global_body = _cascade_lightrag_body([ + _chunk(seed["tp4_slug"], seed["vr_id"], "Global result"), + ]) + + call_count = 0 + async def _side_effect(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count <= 2: + return _mock_httpx_response(empty_body) + else: + return _mock_httpx_response(global_body) + + svc._httpx = AsyncMock() + svc._httpx.post = AsyncMock(side_effect=_side_effect) + svc.embed_query = AsyncMock(return_value=None) + + result = await svc.search("mixing tips", "all", 10, session, creator="keota") + + assert result["cascade_tier"] == "global" + assert result["fallback_used"] is False + assert result["total"] >= 1 + + +@pytest.mark.asyncio +async def test_search_cascade_graceful_empty(db_engine): + """Tier 4: all tiers empty → cascade_tier='none', fallback_used=True.""" + seed = await _seed_cascade_data(db_engine) + + session_factory = async_sessionmaker( + db_engine, class_=AsyncSession, expire_on_commit=False + ) + async with session_factory() as session: + from config import Settings + svc = SearchService(settings=Settings()) + + # All calls return empty chunks + empty_body = _cascade_lightrag_body([]) + svc._httpx = AsyncMock() + svc._httpx.post = AsyncMock(return_value=_mock_httpx_response(empty_body)) + svc.embed_query = AsyncMock(return_value=None) + + result = await svc.search("nonexistent topic xyz", "all", 10, session, creator="keota") + + assert result["cascade_tier"] == "none" + assert result["fallback_used"] is True + + +@pytest.mark.asyncio +async def test_search_cascade_unknown_creator(db_engine): + """Unknown creator slug → cascade skipped, normal search, cascade_tier=''.""" + seed = await _seed_cascade_data(db_engine) + + session_factory = async_sessionmaker( + db_engine, class_=AsyncSession, expire_on_commit=False + ) + async with session_factory() as session: + from config import Settings + svc = SearchService(settings=Settings()) + + # LightRAG returns normal results (non-cascade path) + body = _cascade_lightrag_body([ + _chunk(seed["tp4_slug"], seed["vr_id"], "Normal search result"), + ]) + svc._httpx = AsyncMock() + svc._httpx.post = AsyncMock(return_value=_mock_httpx_response(body)) + svc.embed_query = AsyncMock(return_value=None) + + result = await svc.search("bass design", "all", 10, session, creator="nonexistent-slug") + + # Cascade skipped — falls through to normal search + assert result["cascade_tier"] == "" + + +@pytest.mark.asyncio +async def test_search_no_creator_param_unchanged(db_engine): + """No creator param → normal search path, cascade_tier='' (empty).""" + seed = await _seed_cascade_data(db_engine) + + session_factory = async_sessionmaker( + db_engine, class_=AsyncSession, expire_on_commit=False + ) + async with session_factory() as session: + from config import Settings + svc = SearchService(settings=Settings()) + + body = _cascade_lightrag_body([ + _chunk(seed["tp1_slug"], seed["keota_id"], "Normal result"), + ]) + svc._httpx = AsyncMock() + svc._httpx.post = AsyncMock(return_value=_mock_httpx_response(body)) + svc.embed_query = AsyncMock(return_value=None) + + # No creator param + result = await svc.search("reese bass", "all", 10, session) + + assert result["cascade_tier"] == "" + assert result["total"] >= 1