chrysopedia/backend/scripts/lightrag_query.py
jlightner 17b43d9778 feat: Added LightRAG /query/data as primary search engine with file_sou…
- "backend/config.py"
- "backend/search_service.py"

GSD-Task: S01/T01
2026-04-04 04:44:24 +00:00

181 lines
5.2 KiB
Python

#!/usr/bin/env python3
"""Query LightRAG with optional creator scoping.
A developer CLI for testing LightRAG queries, including creator-biased
retrieval using ll_keywords. Also serves as the foundation for
creator-scoped chat in M021.
Usage:
# Basic query
python3 /app/scripts/lightrag_query.py --query "snare design"
# Creator-scoped query
python3 /app/scripts/lightrag_query.py --query "snare design" --creator "COPYCATT"
# Different modes
python3 /app/scripts/lightrag_query.py --query "bass techniques" --mode local
# JSON output
python3 /app/scripts/lightrag_query.py --query "reverb" --json
# Context only (no LLM generation)
python3 /app/scripts/lightrag_query.py --query "reverb" --context-only
"""
from __future__ import annotations
import argparse
import json
import os
import sys
import time
from typing import Any
import httpx
def query_lightrag(
lightrag_url: str,
query: str,
mode: str = "hybrid",
creator: str | None = None,
context_only: bool = False,
top_k: int | None = None,
) -> dict[str, Any]:
"""Query LightRAG with optional creator scoping.
When a creator name is provided, it's passed as a low-level keyword
to bias retrieval toward documents mentioning that creator.
Parameters
----------
lightrag_url:
LightRAG API base URL.
query:
The search/question text.
mode:
Query mode: local, global, hybrid, naive, mix, bypass.
creator:
Optional creator name to bias retrieval toward.
context_only:
If True, returns retrieved context without LLM generation.
top_k:
Number of top items to retrieve (optional).
Returns
-------
Dict with keys: response, references, latency_ms, error.
"""
url = f"{lightrag_url}/query"
payload: dict[str, Any] = {
"query": query,
"mode": mode,
"include_references": True,
}
if creator:
# Use ll_keywords to bias retrieval toward the creator
payload["ll_keywords"] = [creator]
# Also prepend creator context to the query for better matching
payload["query"] = f"{query} (by {creator})"
if context_only:
payload["only_need_context"] = True
if top_k:
payload["top_k"] = top_k
start = time.monotonic()
try:
resp = httpx.post(url, json=payload, timeout=300)
latency_ms = (time.monotonic() - start) * 1000
resp.raise_for_status()
data = resp.json()
except httpx.HTTPError as e:
latency_ms = (time.monotonic() - start) * 1000
return {"response": "", "references": [], "latency_ms": latency_ms, "error": str(e)}
return {
"response": data.get("response", ""),
"references": data.get("references", []),
"latency_ms": latency_ms,
"error": "",
}
def format_response(result: dict[str, Any], json_output: bool = False) -> str:
"""Format the query result for display."""
if json_output:
return json.dumps(result, indent=2, default=str)
lines = []
if result["error"]:
lines.append(f"ERROR: {result['error']}")
return "\n".join(lines)
lines.append(f"Latency: {result['latency_ms']:.0f}ms")
lines.append(f"Word count: {len(result['response'].split())}")
lines.append("")
# Response text
lines.append("" * 60)
lines.append(result["response"])
lines.append("" * 60)
# References
refs = result.get("references", [])
if refs:
lines.append(f"\nReferences ({len(refs)}):")
for ref in refs:
fp = ref.get("file_path", "?")
rid = ref.get("reference_id", "?")
lines.append(f" [{rid}] {fp}")
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(description="Query LightRAG with optional creator scoping")
parser.add_argument("--query", "-q", required=True, help="Query text")
parser.add_argument("--creator", "-c", default=None, help="Creator name to bias retrieval")
parser.add_argument(
"--mode", "-m",
default="hybrid",
choices=["local", "global", "hybrid", "naive", "mix", "bypass"],
help="Query mode (default: hybrid)",
)
parser.add_argument("--context-only", action="store_true", help="Return context without LLM generation")
parser.add_argument("--top-k", type=int, default=None, help="Number of top items to retrieve")
parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON")
parser.add_argument(
"--lightrag-url",
default=os.environ.get("LIGHTRAG_URL", "http://chrysopedia-lightrag:9621"),
help="LightRAG API base URL",
)
args = parser.parse_args()
if args.creator:
print(f"Query: {args.query}")
print(f"Creator scope: {args.creator}")
print(f"Mode: {args.mode}")
else:
print(f"Query: {args.query}")
print(f"Mode: {args.mode}")
print("Querying LightRAG...")
result = query_lightrag(
lightrag_url=args.lightrag_url,
query=args.query,
mode=args.mode,
creator=args.creator,
context_only=args.context_only,
top_k=args.top_k,
)
print(format_response(result, json_output=args.json_output))
if __name__ == "__main__":
main()