test: Added 7 LightRAG integration tests verifying primary search path…

- "backend/tests/test_search.py"

GSD-Task: S01/T02
This commit is contained in:
jlightner 2026-04-04 04:50:40 +00:00
parent 4917fd3a32
commit b3f52cc301

View file

@ -3,6 +3,10 @@
Tests run against a real PostgreSQL test database via httpx.AsyncClient.
SearchService is mocked at the router dependency level so we can test
endpoint behavior without requiring external embedding API or Qdrant.
LightRAG integration tests mock httpx calls at the service-instance level
to exercise _lightrag_search, result mapping, and fallback behavior with
real DB lookups.
"""
from __future__ import annotations
@ -10,6 +14,7 @@ from __future__ import annotations
import uuid
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
import pytest_asyncio
from httpx import AsyncClient
@ -710,3 +715,254 @@ async def test_keyword_search_match_context_multi_token(db_engine):
# The match_context should contain both creator and another field
contexts = [r["match_context"] for r in items]
assert any("Creator: Mr. Bill" in c for c in contexts)
# ── LightRAG integration tests ──────────────────────────────────────────────
def _make_lightrag_response(seed: dict) -> dict:
"""Build a realistic LightRAG /query/data response body.
Uses seed data to construct file_source paths that match seeded technique
pages (slug + creator_id format).
"""
return {
"data": {
"chunks": [
{
"content": "Layering multiple snare samples for punch and body",
"file_path": f"technique:reese-bass-design:creator:{seed['creator1_id']}",
},
{
"content": "Granular techniques for pad textures",
"file_path": f"technique:granular-pad-textures:creator:{seed['creator2_id']}",
},
],
"entities": [
{"entity_name": "Reese Bass Design"},
{"entity_name": "Granular Pad Textures"},
],
"relationships": [
{"source": "Reese Bass Design", "target": "FM Bass Layering", "relationship": "related_to"},
],
}
}
def _mock_httpx_response(body: dict, status_code: int = 200) -> httpx.Response:
"""Build a mock httpx.Response with JSON body."""
resp = httpx.Response(
status_code=status_code,
json=body,
request=httpx.Request("POST", "http://mock/query/data"),
)
return resp
@pytest.mark.asyncio
async def test_search_lightrag_primary_path(db_engine):
"""LightRAG primary path returns mapped technique pages, fallback_used=False."""
seed = await _seed_search_data(db_engine)
session_factory = async_sessionmaker(
db_engine, class_=AsyncSession, expire_on_commit=False
)
async with session_factory() as session:
from config import Settings
svc = SearchService(settings=Settings())
# Mock the httpx client's post method
mock_resp = _mock_httpx_response(_make_lightrag_response(seed))
svc._httpx = AsyncMock()
svc._httpx.post = AsyncMock(return_value=mock_resp)
# Also mock embed_query to avoid hitting real embedding API
svc.embed_query = AsyncMock(return_value=None)
result = await svc.search("reese bass design", "all", 10, session)
assert result["fallback_used"] is False
assert result["total"] >= 1
slugs = [item["slug"] for item in result["items"]]
assert "reese-bass-design" in slugs
# Verify result structure matches SearchResponse schema fields
for item in result["items"]:
assert "type" in item
assert "title" in item
assert "slug" in item
assert "score" in item
assert "creator_name" in item
assert "match_context" in item
# LightRAG results should have "LightRAG graph match" context
lightrag_items = [i for i in result["items"] if i.get("match_context") == "LightRAG graph match"]
assert len(lightrag_items) >= 1
@pytest.mark.asyncio
async def test_search_lightrag_fallback_on_timeout(db_engine):
"""When LightRAG times out, search falls back to keyword engine."""
seed = await _seed_search_data(db_engine)
session_factory = async_sessionmaker(
db_engine, class_=AsyncSession, expire_on_commit=False
)
async with session_factory() as session:
from config import Settings
svc = SearchService(settings=Settings())
# Mock httpx.post to raise TimeoutException
svc._httpx = AsyncMock()
svc._httpx.post = AsyncMock(side_effect=httpx.TimeoutException("read timed out"))
# Mock embed_query to avoid hitting real embedding API (Qdrant fallback path)
svc.embed_query = AsyncMock(return_value=None)
result = await svc.search("reese bass design", "all", 10, session)
# Should fall back — keyword engine has "Reese Bass Design" from seed data
assert result["fallback_used"] is True
assert result["total"] >= 1
# Results should come from keyword search (seeded data matches "reese bass")
slugs = [item["slug"] for item in result["items"]]
assert "reese-bass-design" in slugs
@pytest.mark.asyncio
async def test_search_lightrag_fallback_on_connection_error(db_engine):
"""When LightRAG connection fails, search falls back to keyword engine."""
seed = await _seed_search_data(db_engine)
session_factory = async_sessionmaker(
db_engine, class_=AsyncSession, expire_on_commit=False
)
async with session_factory() as session:
from config import Settings
svc = SearchService(settings=Settings())
# Mock httpx.post to raise ConnectError
svc._httpx = AsyncMock()
svc._httpx.post = AsyncMock(
side_effect=httpx.ConnectError("connection refused")
)
# Mock embed_query to avoid hitting real embedding API
svc.embed_query = AsyncMock(return_value=None)
result = await svc.search("reese bass design", "all", 10, session)
assert result["fallback_used"] is True
assert result["total"] >= 1
@pytest.mark.asyncio
async def test_search_lightrag_fallback_on_empty_response(db_engine):
"""When LightRAG returns empty data, search falls back to keyword engine."""
seed = await _seed_search_data(db_engine)
session_factory = async_sessionmaker(
db_engine, class_=AsyncSession, expire_on_commit=False
)
async with session_factory() as session:
from config import Settings
svc = SearchService(settings=Settings())
# Mock httpx.post to return empty data
mock_resp = _mock_httpx_response({"data": {}})
svc._httpx = AsyncMock()
svc._httpx.post = AsyncMock(return_value=mock_resp)
# Mock embed_query to avoid hitting real embedding API
svc.embed_query = AsyncMock(return_value=None)
result = await svc.search("reese bass design", "all", 10, session)
assert result["fallback_used"] is True
# Keyword fallback should still find results from seed data
assert result["total"] >= 1
@pytest.mark.asyncio
async def test_search_lightrag_skipped_for_short_query(db_engine):
"""Queries shorter than lightrag_min_query_length skip LightRAG entirely."""
seed = await _seed_search_data(db_engine)
session_factory = async_sessionmaker(
db_engine, class_=AsyncSession, expire_on_commit=False
)
async with session_factory() as session:
from config import Settings
svc = SearchService(settings=Settings())
# Mock httpx — should NOT be called for short queries
mock_httpx = AsyncMock()
mock_httpx.post = AsyncMock()
svc._httpx = mock_httpx
# Mock embed_query to avoid real calls
svc.embed_query = AsyncMock(return_value=None)
# "ab" is 2 chars, below the default min_query_length of 3
result = await svc.search("ab", "all", 10, session)
# LightRAG should not have been called
mock_httpx.post.assert_not_called()
# fallback_used should be True since LightRAG was skipped
assert result["fallback_used"] is True
@pytest.mark.asyncio
async def test_search_lightrag_result_ordering_preserved(db_engine):
"""LightRAG results maintain retrieval-rank ordering with decreasing scores."""
seed = await _seed_search_data(db_engine)
session_factory = async_sessionmaker(
db_engine, class_=AsyncSession, expire_on_commit=False
)
async with session_factory() as session:
from config import Settings
svc = SearchService(settings=Settings())
mock_resp = _mock_httpx_response(_make_lightrag_response(seed))
svc._httpx = AsyncMock()
svc._httpx.post = AsyncMock(return_value=mock_resp)
svc.embed_query = AsyncMock(return_value=None)
result = await svc.search("reese bass design", "all", 10, session)
# LightRAG items should have scores in descending order
lightrag_items = [i for i in result["items"] if i.get("match_context") == "LightRAG graph match"]
if len(lightrag_items) >= 2:
scores = [item["score"] for item in lightrag_items]
assert scores == sorted(scores, reverse=True), f"Scores not descending: {scores}"
# First result should have highest score (1.0)
assert scores[0] == 1.0
@pytest.mark.asyncio
async def test_search_lightrag_fallback_on_http_error(db_engine):
"""When LightRAG returns a 500 status, search falls back gracefully."""
seed = await _seed_search_data(db_engine)
session_factory = async_sessionmaker(
db_engine, class_=AsyncSession, expire_on_commit=False
)
async with session_factory() as session:
from config import Settings
svc = SearchService(settings=Settings())
# Return a 500 response — raise_for_status() will raise
error_resp = httpx.Response(
status_code=500,
text="Internal Server Error",
request=httpx.Request("POST", "http://mock/query/data"),
)
svc._httpx = AsyncMock()
svc._httpx.post = AsyncMock(return_value=error_resp)
svc.embed_query = AsyncMock(return_value=None)
result = await svc.search("reese bass design", "all", 10, session)
assert result["fallback_used"] is True
assert result["total"] >= 1