#!/usr/bin/env python3 """Query LightRAG with optional creator scoping. A developer CLI for testing LightRAG queries, including creator-biased retrieval using ll_keywords. Also serves as the foundation for creator-scoped chat in M021. Usage: # Basic query python3 /app/scripts/lightrag_query.py --query "snare design" # Creator-scoped query python3 /app/scripts/lightrag_query.py --query "snare design" --creator "COPYCATT" # Different modes python3 /app/scripts/lightrag_query.py --query "bass techniques" --mode local # JSON output python3 /app/scripts/lightrag_query.py --query "reverb" --json # Context only (no LLM generation) python3 /app/scripts/lightrag_query.py --query "reverb" --context-only """ from __future__ import annotations import argparse import json import os import sys import time from typing import Any import httpx def query_lightrag( lightrag_url: str, query: str, mode: str = "hybrid", creator: str | None = None, context_only: bool = False, top_k: int | None = None, ) -> dict[str, Any]: """Query LightRAG with optional creator scoping. When a creator name is provided, it's passed as a low-level keyword to bias retrieval toward documents mentioning that creator. Parameters ---------- lightrag_url: LightRAG API base URL. query: The search/question text. mode: Query mode: local, global, hybrid, naive, mix, bypass. creator: Optional creator name to bias retrieval toward. context_only: If True, returns retrieved context without LLM generation. top_k: Number of top items to retrieve (optional). Returns ------- Dict with keys: response, references, latency_ms, error. """ url = f"{lightrag_url}/query" payload: dict[str, Any] = { "query": query, "mode": mode, "include_references": True, } if creator: # Use ll_keywords to bias retrieval toward the creator payload["ll_keywords"] = [creator] # Also prepend creator context to the query for better matching payload["query"] = f"{query} (by {creator})" if context_only: payload["only_need_context"] = True if top_k: payload["top_k"] = top_k start = time.monotonic() try: resp = httpx.post(url, json=payload, timeout=300) latency_ms = (time.monotonic() - start) * 1000 resp.raise_for_status() data = resp.json() except httpx.HTTPError as e: latency_ms = (time.monotonic() - start) * 1000 return {"response": "", "references": [], "latency_ms": latency_ms, "error": str(e)} return { "response": data.get("response", ""), "references": data.get("references", []), "latency_ms": latency_ms, "error": "", } def format_response(result: dict[str, Any], json_output: bool = False) -> str: """Format the query result for display.""" if json_output: return json.dumps(result, indent=2, default=str) lines = [] if result["error"]: lines.append(f"ERROR: {result['error']}") return "\n".join(lines) lines.append(f"Latency: {result['latency_ms']:.0f}ms") lines.append(f"Word count: {len(result['response'].split())}") lines.append("") # Response text lines.append("─" * 60) lines.append(result["response"]) lines.append("─" * 60) # References refs = result.get("references", []) if refs: lines.append(f"\nReferences ({len(refs)}):") for ref in refs: fp = ref.get("file_path", "?") rid = ref.get("reference_id", "?") lines.append(f" [{rid}] {fp}") return "\n".join(lines) def main(): parser = argparse.ArgumentParser(description="Query LightRAG with optional creator scoping") parser.add_argument("--query", "-q", required=True, help="Query text") parser.add_argument("--creator", "-c", default=None, help="Creator name to bias retrieval") parser.add_argument( "--mode", "-m", default="hybrid", choices=["local", "global", "hybrid", "naive", "mix", "bypass"], help="Query mode (default: hybrid)", ) parser.add_argument("--context-only", action="store_true", help="Return context without LLM generation") parser.add_argument("--top-k", type=int, default=None, help="Number of top items to retrieve") parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON") parser.add_argument( "--lightrag-url", default=os.environ.get("LIGHTRAG_URL", "http://chrysopedia-lightrag:9621"), help="LightRAG API base URL", ) args = parser.parse_args() if args.creator: print(f"Query: {args.query}") print(f"Creator scope: {args.creator}") print(f"Mode: {args.mode}") else: print(f"Query: {args.query}") print(f"Mode: {args.mode}") print("Querying LightRAG...") result = query_lightrag( lightrag_url=args.lightrag_url, query=args.query, mode=args.mode, creator=args.creator, context_only=args.context_only, top_k=args.top_k, ) print(format_response(result, json_output=args.json_output)) if __name__ == "__main__": main()