test: Added 7 LightRAG integration tests verifying primary search path…
- "backend/tests/test_search.py" GSD-Task: S01/T02
This commit is contained in:
parent
4917fd3a32
commit
b3f52cc301
1 changed files with 256 additions and 0 deletions
|
|
@ -3,6 +3,10 @@
|
||||||
Tests run against a real PostgreSQL test database via httpx.AsyncClient.
|
Tests run against a real PostgreSQL test database via httpx.AsyncClient.
|
||||||
SearchService is mocked at the router dependency level so we can test
|
SearchService is mocked at the router dependency level so we can test
|
||||||
endpoint behavior without requiring external embedding API or Qdrant.
|
endpoint behavior without requiring external embedding API or Qdrant.
|
||||||
|
|
||||||
|
LightRAG integration tests mock httpx calls at the service-instance level
|
||||||
|
to exercise _lightrag_search, result mapping, and fallback behavior with
|
||||||
|
real DB lookups.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
@ -10,6 +14,7 @@ from __future__ import annotations
|
||||||
import uuid
|
import uuid
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import httpx
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
from httpx import AsyncClient
|
from httpx import AsyncClient
|
||||||
|
|
@ -710,3 +715,254 @@ async def test_keyword_search_match_context_multi_token(db_engine):
|
||||||
# The match_context should contain both creator and another field
|
# The match_context should contain both creator and another field
|
||||||
contexts = [r["match_context"] for r in items]
|
contexts = [r["match_context"] for r in items]
|
||||||
assert any("Creator: Mr. Bill" in c for c in contexts)
|
assert any("Creator: Mr. Bill" in c for c in contexts)
|
||||||
|
|
||||||
|
|
||||||
|
# ── LightRAG integration tests ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _make_lightrag_response(seed: dict) -> dict:
|
||||||
|
"""Build a realistic LightRAG /query/data response body.
|
||||||
|
|
||||||
|
Uses seed data to construct file_source paths that match seeded technique
|
||||||
|
pages (slug + creator_id format).
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"data": {
|
||||||
|
"chunks": [
|
||||||
|
{
|
||||||
|
"content": "Layering multiple snare samples for punch and body",
|
||||||
|
"file_path": f"technique:reese-bass-design:creator:{seed['creator1_id']}",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content": "Granular techniques for pad textures",
|
||||||
|
"file_path": f"technique:granular-pad-textures:creator:{seed['creator2_id']}",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"entities": [
|
||||||
|
{"entity_name": "Reese Bass Design"},
|
||||||
|
{"entity_name": "Granular Pad Textures"},
|
||||||
|
],
|
||||||
|
"relationships": [
|
||||||
|
{"source": "Reese Bass Design", "target": "FM Bass Layering", "relationship": "related_to"},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _mock_httpx_response(body: dict, status_code: int = 200) -> httpx.Response:
|
||||||
|
"""Build a mock httpx.Response with JSON body."""
|
||||||
|
resp = httpx.Response(
|
||||||
|
status_code=status_code,
|
||||||
|
json=body,
|
||||||
|
request=httpx.Request("POST", "http://mock/query/data"),
|
||||||
|
)
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_search_lightrag_primary_path(db_engine):
|
||||||
|
"""LightRAG primary path returns mapped technique pages, fallback_used=False."""
|
||||||
|
seed = await _seed_search_data(db_engine)
|
||||||
|
|
||||||
|
session_factory = async_sessionmaker(
|
||||||
|
db_engine, class_=AsyncSession, expire_on_commit=False
|
||||||
|
)
|
||||||
|
async with session_factory() as session:
|
||||||
|
from config import Settings
|
||||||
|
svc = SearchService(settings=Settings())
|
||||||
|
|
||||||
|
# Mock the httpx client's post method
|
||||||
|
mock_resp = _mock_httpx_response(_make_lightrag_response(seed))
|
||||||
|
svc._httpx = AsyncMock()
|
||||||
|
svc._httpx.post = AsyncMock(return_value=mock_resp)
|
||||||
|
|
||||||
|
# Also mock embed_query to avoid hitting real embedding API
|
||||||
|
svc.embed_query = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
result = await svc.search("reese bass design", "all", 10, session)
|
||||||
|
|
||||||
|
assert result["fallback_used"] is False
|
||||||
|
assert result["total"] >= 1
|
||||||
|
slugs = [item["slug"] for item in result["items"]]
|
||||||
|
assert "reese-bass-design" in slugs
|
||||||
|
|
||||||
|
# Verify result structure matches SearchResponse schema fields
|
||||||
|
for item in result["items"]:
|
||||||
|
assert "type" in item
|
||||||
|
assert "title" in item
|
||||||
|
assert "slug" in item
|
||||||
|
assert "score" in item
|
||||||
|
assert "creator_name" in item
|
||||||
|
assert "match_context" in item
|
||||||
|
|
||||||
|
# LightRAG results should have "LightRAG graph match" context
|
||||||
|
lightrag_items = [i for i in result["items"] if i.get("match_context") == "LightRAG graph match"]
|
||||||
|
assert len(lightrag_items) >= 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_search_lightrag_fallback_on_timeout(db_engine):
|
||||||
|
"""When LightRAG times out, search falls back to keyword engine."""
|
||||||
|
seed = await _seed_search_data(db_engine)
|
||||||
|
|
||||||
|
session_factory = async_sessionmaker(
|
||||||
|
db_engine, class_=AsyncSession, expire_on_commit=False
|
||||||
|
)
|
||||||
|
async with session_factory() as session:
|
||||||
|
from config import Settings
|
||||||
|
svc = SearchService(settings=Settings())
|
||||||
|
|
||||||
|
# Mock httpx.post to raise TimeoutException
|
||||||
|
svc._httpx = AsyncMock()
|
||||||
|
svc._httpx.post = AsyncMock(side_effect=httpx.TimeoutException("read timed out"))
|
||||||
|
|
||||||
|
# Mock embed_query to avoid hitting real embedding API (Qdrant fallback path)
|
||||||
|
svc.embed_query = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
result = await svc.search("reese bass design", "all", 10, session)
|
||||||
|
|
||||||
|
# Should fall back — keyword engine has "Reese Bass Design" from seed data
|
||||||
|
assert result["fallback_used"] is True
|
||||||
|
assert result["total"] >= 1
|
||||||
|
# Results should come from keyword search (seeded data matches "reese bass")
|
||||||
|
slugs = [item["slug"] for item in result["items"]]
|
||||||
|
assert "reese-bass-design" in slugs
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_search_lightrag_fallback_on_connection_error(db_engine):
|
||||||
|
"""When LightRAG connection fails, search falls back to keyword engine."""
|
||||||
|
seed = await _seed_search_data(db_engine)
|
||||||
|
|
||||||
|
session_factory = async_sessionmaker(
|
||||||
|
db_engine, class_=AsyncSession, expire_on_commit=False
|
||||||
|
)
|
||||||
|
async with session_factory() as session:
|
||||||
|
from config import Settings
|
||||||
|
svc = SearchService(settings=Settings())
|
||||||
|
|
||||||
|
# Mock httpx.post to raise ConnectError
|
||||||
|
svc._httpx = AsyncMock()
|
||||||
|
svc._httpx.post = AsyncMock(
|
||||||
|
side_effect=httpx.ConnectError("connection refused")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock embed_query to avoid hitting real embedding API
|
||||||
|
svc.embed_query = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
result = await svc.search("reese bass design", "all", 10, session)
|
||||||
|
|
||||||
|
assert result["fallback_used"] is True
|
||||||
|
assert result["total"] >= 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_search_lightrag_fallback_on_empty_response(db_engine):
|
||||||
|
"""When LightRAG returns empty data, search falls back to keyword engine."""
|
||||||
|
seed = await _seed_search_data(db_engine)
|
||||||
|
|
||||||
|
session_factory = async_sessionmaker(
|
||||||
|
db_engine, class_=AsyncSession, expire_on_commit=False
|
||||||
|
)
|
||||||
|
async with session_factory() as session:
|
||||||
|
from config import Settings
|
||||||
|
svc = SearchService(settings=Settings())
|
||||||
|
|
||||||
|
# Mock httpx.post to return empty data
|
||||||
|
mock_resp = _mock_httpx_response({"data": {}})
|
||||||
|
svc._httpx = AsyncMock()
|
||||||
|
svc._httpx.post = AsyncMock(return_value=mock_resp)
|
||||||
|
|
||||||
|
# Mock embed_query to avoid hitting real embedding API
|
||||||
|
svc.embed_query = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
result = await svc.search("reese bass design", "all", 10, session)
|
||||||
|
|
||||||
|
assert result["fallback_used"] is True
|
||||||
|
# Keyword fallback should still find results from seed data
|
||||||
|
assert result["total"] >= 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_search_lightrag_skipped_for_short_query(db_engine):
|
||||||
|
"""Queries shorter than lightrag_min_query_length skip LightRAG entirely."""
|
||||||
|
seed = await _seed_search_data(db_engine)
|
||||||
|
|
||||||
|
session_factory = async_sessionmaker(
|
||||||
|
db_engine, class_=AsyncSession, expire_on_commit=False
|
||||||
|
)
|
||||||
|
async with session_factory() as session:
|
||||||
|
from config import Settings
|
||||||
|
svc = SearchService(settings=Settings())
|
||||||
|
|
||||||
|
# Mock httpx — should NOT be called for short queries
|
||||||
|
mock_httpx = AsyncMock()
|
||||||
|
mock_httpx.post = AsyncMock()
|
||||||
|
svc._httpx = mock_httpx
|
||||||
|
|
||||||
|
# Mock embed_query to avoid real calls
|
||||||
|
svc.embed_query = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
# "ab" is 2 chars, below the default min_query_length of 3
|
||||||
|
result = await svc.search("ab", "all", 10, session)
|
||||||
|
|
||||||
|
# LightRAG should not have been called
|
||||||
|
mock_httpx.post.assert_not_called()
|
||||||
|
# fallback_used should be True since LightRAG was skipped
|
||||||
|
assert result["fallback_used"] is True
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_search_lightrag_result_ordering_preserved(db_engine):
|
||||||
|
"""LightRAG results maintain retrieval-rank ordering with decreasing scores."""
|
||||||
|
seed = await _seed_search_data(db_engine)
|
||||||
|
|
||||||
|
session_factory = async_sessionmaker(
|
||||||
|
db_engine, class_=AsyncSession, expire_on_commit=False
|
||||||
|
)
|
||||||
|
async with session_factory() as session:
|
||||||
|
from config import Settings
|
||||||
|
svc = SearchService(settings=Settings())
|
||||||
|
|
||||||
|
mock_resp = _mock_httpx_response(_make_lightrag_response(seed))
|
||||||
|
svc._httpx = AsyncMock()
|
||||||
|
svc._httpx.post = AsyncMock(return_value=mock_resp)
|
||||||
|
svc.embed_query = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
result = await svc.search("reese bass design", "all", 10, session)
|
||||||
|
|
||||||
|
# LightRAG items should have scores in descending order
|
||||||
|
lightrag_items = [i for i in result["items"] if i.get("match_context") == "LightRAG graph match"]
|
||||||
|
if len(lightrag_items) >= 2:
|
||||||
|
scores = [item["score"] for item in lightrag_items]
|
||||||
|
assert scores == sorted(scores, reverse=True), f"Scores not descending: {scores}"
|
||||||
|
# First result should have highest score (1.0)
|
||||||
|
assert scores[0] == 1.0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_search_lightrag_fallback_on_http_error(db_engine):
|
||||||
|
"""When LightRAG returns a 500 status, search falls back gracefully."""
|
||||||
|
seed = await _seed_search_data(db_engine)
|
||||||
|
|
||||||
|
session_factory = async_sessionmaker(
|
||||||
|
db_engine, class_=AsyncSession, expire_on_commit=False
|
||||||
|
)
|
||||||
|
async with session_factory() as session:
|
||||||
|
from config import Settings
|
||||||
|
svc = SearchService(settings=Settings())
|
||||||
|
|
||||||
|
# Return a 500 response — raise_for_status() will raise
|
||||||
|
error_resp = httpx.Response(
|
||||||
|
status_code=500,
|
||||||
|
text="Internal Server Error",
|
||||||
|
request=httpx.Request("POST", "http://mock/query/data"),
|
||||||
|
)
|
||||||
|
svc._httpx = AsyncMock()
|
||||||
|
svc._httpx.post = AsyncMock(return_value=error_resp)
|
||||||
|
svc.embed_query = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
result = await svc.search("reese bass design", "all", 10, session)
|
||||||
|
|
||||||
|
assert result["fallback_used"] is True
|
||||||
|
assert result["total"] >= 1
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue