feat: Added LightRAG /query/data as primary search engine with file_sou…
- "backend/config.py" - "backend/search_service.py" GSD-Task: S01/T01
This commit is contained in:
parent
90dea3bde1
commit
4917fd3a32
25 changed files with 1723 additions and 46 deletions
|
|
@ -11,7 +11,7 @@ ssh ub01
|
||||||
cd /vmPool/r/repos/xpltdco/chrysopedia
|
cd /vmPool/r/repos/xpltdco/chrysopedia
|
||||||
```
|
```
|
||||||
|
|
||||||
**GitHub:** https://github.com/xpltdco/chrysopedia (private, xpltdco org)
|
**Git:** https://git.xpltd.co/xpltdco/chrysopedia (Forgejo, xpltdco org)
|
||||||
|
|
||||||
## Why?
|
## Why?
|
||||||
|
|
||||||
|
|
|
||||||
37
alembic/versions/018_add_impersonation_log.py
Normal file
37
alembic/versions/018_add_impersonation_log.py
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
"""Add impersonation_log table for admin impersonation audit trail.
|
||||||
|
|
||||||
|
Revision ID: 018_add_impersonation_log
|
||||||
|
Revises: 017_add_consent_tables
|
||||||
|
"""
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from sqlalchemy.dialects.postgresql import UUID
|
||||||
|
|
||||||
|
|
||||||
|
revision = "018_add_impersonation_log"
|
||||||
|
down_revision = "017_add_consent_tables"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.create_table(
|
||||||
|
"impersonation_log",
|
||||||
|
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
||||||
|
sa.Column("admin_user_id", UUID(as_uuid=True), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False),
|
||||||
|
sa.Column("target_user_id", UUID(as_uuid=True), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False),
|
||||||
|
sa.Column("action", sa.String(10), nullable=False), # 'start' or 'stop'
|
||||||
|
sa.Column("ip_address", sa.String(45), nullable=True),
|
||||||
|
sa.Column("created_at", sa.DateTime, server_default=sa.func.now(), nullable=False),
|
||||||
|
)
|
||||||
|
op.create_index("ix_impersonation_log_admin", "impersonation_log", ["admin_user_id"])
|
||||||
|
op.create_index("ix_impersonation_log_target", "impersonation_log", ["target_user_id"])
|
||||||
|
op.create_index("ix_impersonation_log_created", "impersonation_log", ["created_at"])
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_index("ix_impersonation_log_created")
|
||||||
|
op.drop_index("ix_impersonation_log_target")
|
||||||
|
op.drop_index("ix_impersonation_log_admin")
|
||||||
|
op.drop_table("impersonation_log")
|
||||||
|
|
@ -56,6 +56,32 @@ def create_access_token(
|
||||||
return jwt.encode(payload, settings.app_secret_key, algorithm=_ALGORITHM)
|
return jwt.encode(payload, settings.app_secret_key, algorithm=_ALGORITHM)
|
||||||
|
|
||||||
|
|
||||||
|
_IMPERSONATION_EXPIRE_MINUTES = 60 # 1 hour
|
||||||
|
|
||||||
|
|
||||||
|
def create_impersonation_token(
|
||||||
|
admin_user_id: uuid.UUID | str,
|
||||||
|
target_user_id: uuid.UUID | str,
|
||||||
|
target_role: str,
|
||||||
|
) -> str:
|
||||||
|
"""Create a scoped JWT for admin impersonation.
|
||||||
|
|
||||||
|
The token has sub=target_user_id so get_current_user loads the target,
|
||||||
|
plus original_user_id so the system knows it's impersonation.
|
||||||
|
"""
|
||||||
|
settings = get_settings()
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
payload = {
|
||||||
|
"sub": str(target_user_id),
|
||||||
|
"role": target_role,
|
||||||
|
"original_user_id": str(admin_user_id),
|
||||||
|
"type": "impersonation",
|
||||||
|
"iat": now,
|
||||||
|
"exp": now + timedelta(minutes=_IMPERSONATION_EXPIRE_MINUTES),
|
||||||
|
}
|
||||||
|
return jwt.encode(payload, settings.app_secret_key, algorithm=_ALGORITHM)
|
||||||
|
|
||||||
|
|
||||||
def decode_access_token(token: str) -> dict:
|
def decode_access_token(token: str) -> dict:
|
||||||
"""Decode and validate a JWT. Raises on expiry or malformed tokens."""
|
"""Decode and validate a JWT. Raises on expiry or malformed tokens."""
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
|
|
@ -85,7 +111,11 @@ async def get_current_user(
|
||||||
token: Annotated[str, Depends(oauth2_scheme)],
|
token: Annotated[str, Depends(oauth2_scheme)],
|
||||||
session: Annotated[AsyncSession, Depends(get_session)],
|
session: Annotated[AsyncSession, Depends(get_session)],
|
||||||
) -> User:
|
) -> User:
|
||||||
"""Decode JWT, load User from DB, raise 401 if missing or inactive."""
|
"""Decode JWT, load User from DB, raise 401 if missing or inactive.
|
||||||
|
|
||||||
|
If the token contains an original_user_id claim (impersonation),
|
||||||
|
sets _impersonating_admin_id on the returned user object.
|
||||||
|
"""
|
||||||
payload = decode_access_token(token)
|
payload = decode_access_token(token)
|
||||||
user_id = payload.get("sub")
|
user_id = payload.get("sub")
|
||||||
result = await session.execute(select(User).where(User.id == user_id))
|
result = await session.execute(select(User).where(User.id == user_id))
|
||||||
|
|
@ -95,6 +125,8 @@ async def get_current_user(
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
detail="User not found or inactive",
|
detail="User not found or inactive",
|
||||||
)
|
)
|
||||||
|
# Attach impersonation metadata (non-column runtime attribute)
|
||||||
|
user._impersonating_admin_id = payload.get("original_user_id") # type: ignore[attr-defined]
|
||||||
return user
|
return user
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -112,3 +144,16 @@ def require_role(required_role: UserRole):
|
||||||
return current_user
|
return current_user
|
||||||
|
|
||||||
return _check
|
return _check
|
||||||
|
|
||||||
|
|
||||||
|
async def reject_impersonation(
|
||||||
|
current_user: Annotated[User, Depends(get_current_user)],
|
||||||
|
) -> User:
|
||||||
|
"""Dependency that blocks write operations during impersonation."""
|
||||||
|
admin_id = getattr(current_user, "_impersonating_admin_id", None)
|
||||||
|
if admin_id is not None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
|
detail="Write operations are not allowed during impersonation",
|
||||||
|
)
|
||||||
|
return current_user
|
||||||
|
|
|
||||||
|
|
@ -60,6 +60,11 @@ class Settings(BaseSettings):
|
||||||
qdrant_url: str = "http://localhost:6333"
|
qdrant_url: str = "http://localhost:6333"
|
||||||
qdrant_collection: str = "chrysopedia"
|
qdrant_collection: str = "chrysopedia"
|
||||||
|
|
||||||
|
# LightRAG
|
||||||
|
lightrag_url: str = "http://chrysopedia-lightrag:9621"
|
||||||
|
lightrag_search_timeout: float = 2.0
|
||||||
|
lightrag_min_query_length: int = 3
|
||||||
|
|
||||||
# Prompt templates
|
# Prompt templates
|
||||||
prompts_path: str = "./prompts"
|
prompts_path: str = "./prompts"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ from fastapi import FastAPI
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
from config import get_settings
|
from config import get_settings
|
||||||
from routers import auth, consent, creator_dashboard, creators, health, ingest, pipeline, reports, search, stats, techniques, topics, videos
|
from routers import admin, auth, consent, creator_dashboard, creators, health, ingest, pipeline, reports, search, stats, techniques, topics, videos
|
||||||
|
|
||||||
|
|
||||||
def _setup_logging() -> None:
|
def _setup_logging() -> None:
|
||||||
|
|
@ -78,6 +78,7 @@ app.add_middleware(
|
||||||
app.include_router(health.router)
|
app.include_router(health.router)
|
||||||
|
|
||||||
# Versioned API
|
# Versioned API
|
||||||
|
app.include_router(admin.router, prefix="/api/v1")
|
||||||
app.include_router(auth.router, prefix="/api/v1")
|
app.include_router(auth.router, prefix="/api/v1")
|
||||||
app.include_router(consent.router, prefix="/api/v1")
|
app.include_router(consent.router, prefix="/api/v1")
|
||||||
app.include_router(creator_dashboard.router, prefix="/api/v1")
|
app.include_router(creator_dashboard.router, prefix="/api/v1")
|
||||||
|
|
|
||||||
|
|
@ -654,3 +654,23 @@ class ConsentAuditLog(Base):
|
||||||
video_consent: Mapped[VideoConsent] = sa_relationship(
|
video_consent: Mapped[VideoConsent] = sa_relationship(
|
||||||
back_populates="audit_entries"
|
back_populates="audit_entries"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ImpersonationLog(Base):
|
||||||
|
"""Audit trail for admin impersonation sessions."""
|
||||||
|
__tablename__ = "impersonation_log"
|
||||||
|
|
||||||
|
id: Mapped[uuid.UUID] = _uuid_pk()
|
||||||
|
admin_user_id: Mapped[uuid.UUID] = mapped_column(
|
||||||
|
ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True,
|
||||||
|
)
|
||||||
|
target_user_id: Mapped[uuid.UUID] = mapped_column(
|
||||||
|
ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True,
|
||||||
|
)
|
||||||
|
action: Mapped[str] = mapped_column(
|
||||||
|
String(10), nullable=False, doc="'start' or 'stop'"
|
||||||
|
)
|
||||||
|
ip_address: Mapped[str | None] = mapped_column(String(45), nullable=True)
|
||||||
|
created_at: Mapped[datetime] = mapped_column(
|
||||||
|
default=_now, server_default=func.now()
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -221,6 +221,7 @@ class QdrantManager:
|
||||||
"type": "key_moment",
|
"type": "key_moment",
|
||||||
"moment_id": moment["moment_id"],
|
"moment_id": moment["moment_id"],
|
||||||
"source_video_id": moment["source_video_id"],
|
"source_video_id": moment["source_video_id"],
|
||||||
|
"creator_id": moment.get("creator_id", ""),
|
||||||
"technique_page_id": moment.get("technique_page_id", ""),
|
"technique_page_id": moment.get("technique_page_id", ""),
|
||||||
"technique_page_slug": moment.get("technique_page_slug", ""),
|
"technique_page_slug": moment.get("technique_page_slug", ""),
|
||||||
"title": moment["title"],
|
"title": moment["title"],
|
||||||
|
|
|
||||||
|
|
@ -1673,11 +1673,12 @@ def stage6_embed_and_index(self, video_id: str, run_id: str | None = None) -> st
|
||||||
video_creator_map: dict[str, str] = {}
|
video_creator_map: dict[str, str] = {}
|
||||||
if video_ids:
|
if video_ids:
|
||||||
rows = session.execute(
|
rows = session.execute(
|
||||||
select(SourceVideo.id, Creator.name)
|
select(SourceVideo.id, Creator.name, Creator.id.label("creator_id"))
|
||||||
.join(Creator, SourceVideo.creator_id == Creator.id)
|
.join(Creator, SourceVideo.creator_id == Creator.id)
|
||||||
.where(SourceVideo.id.in_(video_ids))
|
.where(SourceVideo.id.in_(video_ids))
|
||||||
).all()
|
).all()
|
||||||
video_creator_map = {str(r[0]): r[1] for r in rows}
|
video_creator_map = {str(r[0]): r[1] for r in rows}
|
||||||
|
video_creator_id_map = {str(r[0]): str(r[2]) for r in rows}
|
||||||
|
|
||||||
embed_client = EmbeddingClient(settings)
|
embed_client = EmbeddingClient(settings)
|
||||||
qdrant = QdrantManager(settings)
|
qdrant = QdrantManager(settings)
|
||||||
|
|
@ -1737,6 +1738,7 @@ def stage6_embed_and_index(self, video_id: str, run_id: str | None = None) -> st
|
||||||
moment_dicts.append({
|
moment_dicts.append({
|
||||||
"moment_id": str(m.id),
|
"moment_id": str(m.id),
|
||||||
"source_video_id": str(m.source_video_id),
|
"source_video_id": str(m.source_video_id),
|
||||||
|
"creator_id": video_creator_id_map.get(str(m.source_video_id), ""),
|
||||||
"technique_page_id": tp_id,
|
"technique_page_id": tp_id,
|
||||||
"technique_page_slug": page_id_to_slug.get(tp_id, ""),
|
"technique_page_slug": page_id_to_slug.get(tp_id, ""),
|
||||||
"title": m.title,
|
"title": m.title,
|
||||||
|
|
|
||||||
180
backend/routers/admin.py
Normal file
180
backend/routers/admin.py
Normal file
|
|
@ -0,0 +1,180 @@
|
||||||
|
"""Admin router — user management and impersonation."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Annotated
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Request, status
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from auth import (
|
||||||
|
create_impersonation_token,
|
||||||
|
decode_access_token,
|
||||||
|
get_current_user,
|
||||||
|
require_role,
|
||||||
|
)
|
||||||
|
from database import get_session
|
||||||
|
from models import ImpersonationLog, User, UserRole
|
||||||
|
|
||||||
|
logger = logging.getLogger("chrysopedia.admin")
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/admin", tags=["admin"])
|
||||||
|
|
||||||
|
_require_admin = require_role(UserRole.admin)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Schemas ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class UserListItem(BaseModel):
|
||||||
|
id: str
|
||||||
|
email: str
|
||||||
|
display_name: str
|
||||||
|
role: str
|
||||||
|
creator_id: str | None
|
||||||
|
is_active: bool
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
from_attributes = True
|
||||||
|
|
||||||
|
|
||||||
|
class ImpersonateResponse(BaseModel):
|
||||||
|
access_token: str
|
||||||
|
token_type: str = "bearer"
|
||||||
|
target_user: UserListItem
|
||||||
|
|
||||||
|
|
||||||
|
class StopImpersonateResponse(BaseModel):
|
||||||
|
message: str
|
||||||
|
|
||||||
|
|
||||||
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _client_ip(request: Request) -> str | None:
|
||||||
|
"""Best-effort client IP from X-Forwarded-For or direct connection."""
|
||||||
|
forwarded = request.headers.get("x-forwarded-for")
|
||||||
|
if forwarded:
|
||||||
|
return forwarded.split(",")[0].strip()
|
||||||
|
if request.client:
|
||||||
|
return request.client.host
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ── Endpoints ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/users", response_model=list[UserListItem])
|
||||||
|
async def list_users(
|
||||||
|
_admin: Annotated[User, Depends(_require_admin)],
|
||||||
|
session: Annotated[AsyncSession, Depends(get_session)],
|
||||||
|
):
|
||||||
|
"""List all users. Admin only."""
|
||||||
|
result = await session.execute(
|
||||||
|
select(User).order_by(User.display_name)
|
||||||
|
)
|
||||||
|
users = result.scalars().all()
|
||||||
|
return [
|
||||||
|
UserListItem(
|
||||||
|
id=str(u.id),
|
||||||
|
email=u.email,
|
||||||
|
display_name=u.display_name,
|
||||||
|
role=u.role.value,
|
||||||
|
creator_id=str(u.creator_id) if u.creator_id else None,
|
||||||
|
is_active=u.is_active,
|
||||||
|
)
|
||||||
|
for u in users
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/impersonate/{user_id}", response_model=ImpersonateResponse)
|
||||||
|
async def start_impersonation(
|
||||||
|
user_id: UUID,
|
||||||
|
request: Request,
|
||||||
|
admin: Annotated[User, Depends(_require_admin)],
|
||||||
|
session: Annotated[AsyncSession, Depends(get_session)],
|
||||||
|
):
|
||||||
|
"""Start impersonating a user. Admin only. Returns a scoped JWT."""
|
||||||
|
# Cannot impersonate yourself
|
||||||
|
if admin.id == user_id:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail="Cannot impersonate yourself",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load target user
|
||||||
|
result = await session.execute(select(User).where(User.id == user_id))
|
||||||
|
target = result.scalar_one_or_none()
|
||||||
|
if target is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
detail="Target user not found",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create impersonation token
|
||||||
|
token = create_impersonation_token(
|
||||||
|
admin_user_id=admin.id,
|
||||||
|
target_user_id=target.id,
|
||||||
|
target_role=target.role.value,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Audit log
|
||||||
|
session.add(ImpersonationLog(
|
||||||
|
admin_user_id=admin.id,
|
||||||
|
target_user_id=target.id,
|
||||||
|
action="start",
|
||||||
|
ip_address=_client_ip(request),
|
||||||
|
))
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Impersonation started: admin=%s target=%s",
|
||||||
|
admin.id, target.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
return ImpersonateResponse(
|
||||||
|
access_token=token,
|
||||||
|
target_user=UserListItem(
|
||||||
|
id=str(target.id),
|
||||||
|
email=target.email,
|
||||||
|
display_name=target.display_name,
|
||||||
|
role=target.role.value,
|
||||||
|
creator_id=str(target.creator_id) if target.creator_id else None,
|
||||||
|
is_active=target.is_active,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/impersonate/stop", response_model=StopImpersonateResponse)
|
||||||
|
async def stop_impersonation(
|
||||||
|
request: Request,
|
||||||
|
current_user: Annotated[User, Depends(get_current_user)],
|
||||||
|
session: Annotated[AsyncSession, Depends(get_session)],
|
||||||
|
):
|
||||||
|
"""Stop impersonation. Requires a valid impersonation token."""
|
||||||
|
admin_id = getattr(current_user, "_impersonating_admin_id", None)
|
||||||
|
if admin_id is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail="Not currently impersonating",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Audit log
|
||||||
|
session.add(ImpersonationLog(
|
||||||
|
admin_user_id=admin_id,
|
||||||
|
target_user_id=current_user.id,
|
||||||
|
action="stop",
|
||||||
|
ip_address=_client_ip(request),
|
||||||
|
))
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Impersonation stopped: admin=%s target=%s",
|
||||||
|
admin_id, current_user.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
return StopImpersonateResponse(message="Impersonation ended")
|
||||||
|
|
@ -14,6 +14,7 @@ from auth import (
|
||||||
create_access_token,
|
create_access_token,
|
||||||
get_current_user,
|
get_current_user,
|
||||||
hash_password,
|
hash_password,
|
||||||
|
reject_impersonation,
|
||||||
verify_password,
|
verify_password,
|
||||||
)
|
)
|
||||||
from database import get_session
|
from database import get_session
|
||||||
|
|
@ -120,13 +121,17 @@ async def get_profile(
|
||||||
current_user: Annotated[User, Depends(get_current_user)],
|
current_user: Annotated[User, Depends(get_current_user)],
|
||||||
):
|
):
|
||||||
"""Return the current user's profile."""
|
"""Return the current user's profile."""
|
||||||
return current_user
|
resp = UserResponse.model_validate(current_user)
|
||||||
|
admin_id = getattr(current_user, "_impersonating_admin_id", None)
|
||||||
|
if admin_id is not None:
|
||||||
|
resp.impersonating = True
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
@router.put("/me", response_model=UserResponse)
|
@router.put("/me", response_model=UserResponse)
|
||||||
async def update_profile(
|
async def update_profile(
|
||||||
body: UpdateProfileRequest,
|
body: UpdateProfileRequest,
|
||||||
current_user: Annotated[User, Depends(get_current_user)],
|
current_user: Annotated[User, Depends(reject_impersonation)],
|
||||||
session: Annotated[AsyncSession, Depends(get_session)],
|
session: Annotated[AsyncSession, Depends(get_session)],
|
||||||
):
|
):
|
||||||
"""Update the current user's display name and/or password."""
|
"""Update the current user's display name and/or password."""
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ from sqlalchemy import func, select
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy.orm import selectinload
|
from sqlalchemy.orm import selectinload
|
||||||
|
|
||||||
from auth import get_current_user, require_role
|
from auth import get_current_user, reject_impersonation, require_role
|
||||||
from database import get_session
|
from database import get_session
|
||||||
from models import (
|
from models import (
|
||||||
ConsentAuditLog,
|
ConsentAuditLog,
|
||||||
|
|
@ -175,7 +175,7 @@ async def get_video_consent(
|
||||||
async def update_video_consent(
|
async def update_video_consent(
|
||||||
video_id: uuid.UUID,
|
video_id: uuid.UUID,
|
||||||
body: VideoConsentUpdate,
|
body: VideoConsentUpdate,
|
||||||
current_user: Annotated[User, Depends(get_current_user)],
|
current_user: Annotated[User, Depends(reject_impersonation)],
|
||||||
session: Annotated[AsyncSession, Depends(get_session)],
|
session: Annotated[AsyncSession, Depends(get_session)],
|
||||||
request: Request,
|
request: Request,
|
||||||
):
|
):
|
||||||
|
|
|
||||||
|
|
@ -566,6 +566,7 @@ class UserResponse(BaseModel):
|
||||||
creator_id: uuid.UUID | None = None
|
creator_id: uuid.UUID | None = None
|
||||||
is_active: bool = True
|
is_active: bool = True
|
||||||
created_at: datetime
|
created_at: datetime
|
||||||
|
impersonating: bool = False
|
||||||
|
|
||||||
|
|
||||||
class UpdateProfileRequest(BaseModel):
|
class UpdateProfileRequest(BaseModel):
|
||||||
|
|
|
||||||
547
backend/scripts/compare_search.py
Normal file
547
backend/scripts/compare_search.py
Normal file
|
|
@ -0,0 +1,547 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""A/B comparison of Chrysopedia's Qdrant search vs LightRAG retrieval.
|
||||||
|
|
||||||
|
Runs a set of queries against both backends and produces a scored comparison
|
||||||
|
report. Designed to run inside the chrysopedia-api container (has network
|
||||||
|
access to both services) or via tunneled URLs.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Dry run — show query set without executing
|
||||||
|
python3 /app/scripts/compare_search.py --dry-run
|
||||||
|
|
||||||
|
# Run first 5 queries
|
||||||
|
python3 /app/scripts/compare_search.py --limit 5
|
||||||
|
|
||||||
|
# Full comparison
|
||||||
|
python3 /app/scripts/compare_search.py
|
||||||
|
|
||||||
|
# Custom URLs
|
||||||
|
python3 /app/scripts/compare_search.py --api-url http://localhost:8000 --lightrag-url http://localhost:9621
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from dataclasses import asdict, dataclass, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger("compare_search")
|
||||||
|
|
||||||
|
# ── Query set ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# Real user queries (from search_log)
|
||||||
|
USER_QUERIES = [
|
||||||
|
"squelch",
|
||||||
|
"keota snare",
|
||||||
|
"reverb",
|
||||||
|
"how does keota snare",
|
||||||
|
"bass",
|
||||||
|
"groove",
|
||||||
|
"drums",
|
||||||
|
"fx",
|
||||||
|
"textures",
|
||||||
|
"daw setup",
|
||||||
|
"synthesis",
|
||||||
|
"how does keota",
|
||||||
|
"over-leveling snare to control compression behavior",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Curated domain queries — test different retrieval patterns
|
||||||
|
CURATED_QUERIES = [
|
||||||
|
# Broad topic queries
|
||||||
|
"bass design techniques",
|
||||||
|
"reverb chains and spatial effects",
|
||||||
|
"how to layer drums",
|
||||||
|
# Cross-entity synthesis (LightRAG strength)
|
||||||
|
"what plugins are commonly used for bass sounds",
|
||||||
|
"compare different approaches to snare layering",
|
||||||
|
"how do different producers approach sound design",
|
||||||
|
# Exact lookup (Qdrant strength)
|
||||||
|
"COPYCATT",
|
||||||
|
"Emperor arrangement",
|
||||||
|
# How-to / procedural
|
||||||
|
"how to create tension in a buildup",
|
||||||
|
"step by step resampling workflow",
|
||||||
|
# Concept queries
|
||||||
|
"frequency spectrum balance",
|
||||||
|
"signal chain for drums",
|
||||||
|
]
|
||||||
|
|
||||||
|
ALL_QUERIES = USER_QUERIES + CURATED_QUERIES
|
||||||
|
|
||||||
|
|
||||||
|
# ── Data structures ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SearchResult:
|
||||||
|
title: str
|
||||||
|
score: float
|
||||||
|
snippet: str
|
||||||
|
result_type: str = ""
|
||||||
|
creator: str = ""
|
||||||
|
slug: str = ""
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class QdrantSearchResponse:
|
||||||
|
query: str
|
||||||
|
results: list[SearchResult] = field(default_factory=list)
|
||||||
|
partial_matches: list[SearchResult] = field(default_factory=list)
|
||||||
|
total: int = 0
|
||||||
|
latency_ms: float = 0.0
|
||||||
|
error: str = ""
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LightRAGResponse:
|
||||||
|
query: str
|
||||||
|
response_text: str = ""
|
||||||
|
references: list[dict[str, Any]] = field(default_factory=list)
|
||||||
|
latency_ms: float = 0.0
|
||||||
|
error: str = ""
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class QueryComparison:
|
||||||
|
query: str
|
||||||
|
query_type: str # "user" or "curated"
|
||||||
|
qdrant: QdrantSearchResponse | None = None
|
||||||
|
lightrag: LightRAGResponse | None = None
|
||||||
|
# Scores (populated by scoring phase)
|
||||||
|
qdrant_relevance: float = 0.0
|
||||||
|
qdrant_coverage: int = 0
|
||||||
|
qdrant_diversity: int = 0
|
||||||
|
lightrag_relevance: float = 0.0
|
||||||
|
lightrag_coverage: int = 0
|
||||||
|
lightrag_answer_quality: float = 0.0
|
||||||
|
winner: str = "" # "qdrant", "lightrag", "tie"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Qdrant search client ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def query_qdrant_search(api_url: str, query: str, limit: int = 20) -> QdrantSearchResponse:
|
||||||
|
"""Query the Chrysopedia search API (Qdrant + keyword)."""
|
||||||
|
url = f"{api_url}/api/v1/search"
|
||||||
|
params = {"q": query, "scope": "all", "limit": limit}
|
||||||
|
|
||||||
|
start = time.monotonic()
|
||||||
|
try:
|
||||||
|
resp = httpx.get(url, params=params, timeout=15)
|
||||||
|
latency = (time.monotonic() - start) * 1000
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
latency = (time.monotonic() - start) * 1000
|
||||||
|
return QdrantSearchResponse(query=query, latency_ms=latency, error=str(e))
|
||||||
|
|
||||||
|
items = data.get("items", [])
|
||||||
|
partial = data.get("partial_matches", [])
|
||||||
|
|
||||||
|
results = [
|
||||||
|
SearchResult(
|
||||||
|
title=item.get("title", ""),
|
||||||
|
score=item.get("score", 0.0),
|
||||||
|
snippet=item.get("summary", "")[:200],
|
||||||
|
result_type=item.get("type", ""),
|
||||||
|
creator=item.get("creator_name", ""),
|
||||||
|
slug=item.get("slug", ""),
|
||||||
|
)
|
||||||
|
for item in items
|
||||||
|
]
|
||||||
|
partial_results = [
|
||||||
|
SearchResult(
|
||||||
|
title=item.get("title", ""),
|
||||||
|
score=item.get("score", 0.0),
|
||||||
|
snippet=item.get("summary", "")[:200],
|
||||||
|
result_type=item.get("type", ""),
|
||||||
|
creator=item.get("creator_name", ""),
|
||||||
|
slug=item.get("slug", ""),
|
||||||
|
)
|
||||||
|
for item in partial
|
||||||
|
]
|
||||||
|
|
||||||
|
return QdrantSearchResponse(
|
||||||
|
query=query,
|
||||||
|
results=results,
|
||||||
|
partial_matches=partial_results,
|
||||||
|
total=data.get("total", 0),
|
||||||
|
latency_ms=latency,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── LightRAG client ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def query_lightrag(lightrag_url: str, query: str, mode: str = "hybrid") -> LightRAGResponse:
|
||||||
|
"""Query the LightRAG API."""
|
||||||
|
url = f"{lightrag_url}/query"
|
||||||
|
payload = {"query": query, "mode": mode}
|
||||||
|
|
||||||
|
start = time.monotonic()
|
||||||
|
try:
|
||||||
|
# LightRAG queries involve LLM inference — can take 2-4 minutes each
|
||||||
|
resp = httpx.post(url, json=payload, timeout=300)
|
||||||
|
latency = (time.monotonic() - start) * 1000
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
latency = (time.monotonic() - start) * 1000
|
||||||
|
return LightRAGResponse(query=query, latency_ms=latency, error=str(e))
|
||||||
|
|
||||||
|
return LightRAGResponse(
|
||||||
|
query=query,
|
||||||
|
response_text=data.get("response", ""),
|
||||||
|
references=[
|
||||||
|
{"id": ref.get("reference_id", ""), "file_path": ref.get("file_path", "")}
|
||||||
|
for ref in data.get("references", [])
|
||||||
|
],
|
||||||
|
latency_ms=latency,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Scoring ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _token_overlap(query: str, text: str) -> float:
|
||||||
|
"""Fraction of query tokens found in text (case-insensitive)."""
|
||||||
|
if not text:
|
||||||
|
return 0.0
|
||||||
|
query_tokens = {t.lower() for t in query.split() if len(t) > 2}
|
||||||
|
if not query_tokens:
|
||||||
|
return 0.0
|
||||||
|
text_lower = text.lower()
|
||||||
|
matched = sum(1 for t in query_tokens if t in text_lower)
|
||||||
|
return matched / len(query_tokens)
|
||||||
|
|
||||||
|
|
||||||
|
def score_qdrant_results(comp: QueryComparison) -> None:
|
||||||
|
"""Score Qdrant results on relevance, coverage, and diversity."""
|
||||||
|
if not comp.qdrant or comp.qdrant.error:
|
||||||
|
return
|
||||||
|
|
||||||
|
results = comp.qdrant.results
|
||||||
|
if not results:
|
||||||
|
# Check partial matches
|
||||||
|
results = comp.qdrant.partial_matches
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
comp.qdrant_relevance = 0.0
|
||||||
|
comp.qdrant_coverage = 0
|
||||||
|
comp.qdrant_diversity = 0
|
||||||
|
return
|
||||||
|
|
||||||
|
# Relevance: average token overlap across top-5 results
|
||||||
|
overlaps = []
|
||||||
|
for r in results[:5]:
|
||||||
|
combined = f"{r.title} {r.snippet} {r.creator}"
|
||||||
|
overlaps.append(_token_overlap(comp.query, combined))
|
||||||
|
comp.qdrant_relevance = round((sum(overlaps) / len(overlaps)) * 5, 2) if overlaps else 0.0
|
||||||
|
|
||||||
|
# Coverage: unique technique pages
|
||||||
|
slugs = {r.slug for r in results if r.slug}
|
||||||
|
comp.qdrant_coverage = len(slugs)
|
||||||
|
|
||||||
|
# Diversity: unique creators
|
||||||
|
creators = {r.creator for r in results if r.creator}
|
||||||
|
comp.qdrant_diversity = len(creators)
|
||||||
|
|
||||||
|
|
||||||
|
def score_lightrag_results(comp: QueryComparison) -> None:
|
||||||
|
"""Score LightRAG results on relevance, coverage, and answer quality."""
|
||||||
|
if not comp.lightrag or comp.lightrag.error:
|
||||||
|
return
|
||||||
|
|
||||||
|
text = comp.lightrag.response_text
|
||||||
|
refs = comp.lightrag.references
|
||||||
|
|
||||||
|
if not text:
|
||||||
|
comp.lightrag_relevance = 0.0
|
||||||
|
comp.lightrag_coverage = 0
|
||||||
|
comp.lightrag_answer_quality = 0.0
|
||||||
|
return
|
||||||
|
|
||||||
|
# Relevance: token overlap between query and response
|
||||||
|
comp.lightrag_relevance = round(_token_overlap(comp.query, text) * 5, 2)
|
||||||
|
|
||||||
|
# Coverage: unique technique pages referenced
|
||||||
|
unique_sources = {r["file_path"] for r in refs if r.get("file_path")}
|
||||||
|
comp.lightrag_coverage = len(unique_sources)
|
||||||
|
|
||||||
|
# Answer quality (0-5 composite):
|
||||||
|
quality = 0.0
|
||||||
|
|
||||||
|
# Length: longer synthesized answers are generally better (up to a point)
|
||||||
|
word_count = len(text.split())
|
||||||
|
if word_count > 20:
|
||||||
|
quality += 1.0
|
||||||
|
if word_count > 100:
|
||||||
|
quality += 0.5
|
||||||
|
if word_count > 200:
|
||||||
|
quality += 0.5
|
||||||
|
|
||||||
|
# References: more cross-page references = better synthesis
|
||||||
|
if len(unique_sources) >= 2:
|
||||||
|
quality += 1.0
|
||||||
|
if len(unique_sources) >= 4:
|
||||||
|
quality += 0.5
|
||||||
|
|
||||||
|
# Structure: has headings, bullet points, or numbered lists
|
||||||
|
if "**" in text or "##" in text:
|
||||||
|
quality += 0.5
|
||||||
|
if "- " in text or "* " in text:
|
||||||
|
quality += 0.5
|
||||||
|
|
||||||
|
# Doesn't say "no information available" or similar
|
||||||
|
negative_phrases = ["no information", "not mentioned", "no data", "cannot find"]
|
||||||
|
has_negative = any(phrase in text.lower() for phrase in negative_phrases)
|
||||||
|
if not has_negative:
|
||||||
|
quality += 0.5
|
||||||
|
else:
|
||||||
|
quality -= 1.0
|
||||||
|
|
||||||
|
comp.lightrag_answer_quality = round(min(quality, 5.0), 2)
|
||||||
|
|
||||||
|
|
||||||
|
def determine_winner(comp: QueryComparison) -> None:
|
||||||
|
"""Determine which backend wins for this query."""
|
||||||
|
# Composite score: relevance weight 0.4, coverage 0.3, quality/diversity 0.3
|
||||||
|
qdrant_score = (
|
||||||
|
comp.qdrant_relevance * 0.4
|
||||||
|
+ min(comp.qdrant_coverage, 5) * 0.3
|
||||||
|
+ min(comp.qdrant_diversity, 3) * 0.3
|
||||||
|
)
|
||||||
|
lightrag_score = (
|
||||||
|
comp.lightrag_relevance * 0.4
|
||||||
|
+ min(comp.lightrag_coverage, 5) * 0.3
|
||||||
|
+ comp.lightrag_answer_quality * 0.3
|
||||||
|
)
|
||||||
|
|
||||||
|
if abs(qdrant_score - lightrag_score) < 0.5:
|
||||||
|
comp.winner = "tie"
|
||||||
|
elif qdrant_score > lightrag_score:
|
||||||
|
comp.winner = "qdrant"
|
||||||
|
else:
|
||||||
|
comp.winner = "lightrag"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Report generation ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def generate_markdown_report(comparisons: list[QueryComparison], output_dir: Path) -> Path:
|
||||||
|
"""Generate a human-readable markdown comparison report."""
|
||||||
|
lines: list[str] = []
|
||||||
|
|
||||||
|
lines.append("# Search A/B Comparison: Qdrant vs LightRAG")
|
||||||
|
lines.append(f"\n_Generated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}_")
|
||||||
|
lines.append(f"\n**Queries evaluated:** {len(comparisons)}")
|
||||||
|
|
||||||
|
# Aggregate stats
|
||||||
|
wins = {"qdrant": 0, "lightrag": 0, "tie": 0}
|
||||||
|
qdrant_latencies = []
|
||||||
|
lightrag_latencies = []
|
||||||
|
for c in comparisons:
|
||||||
|
wins[c.winner] += 1
|
||||||
|
if c.qdrant and not c.qdrant.error:
|
||||||
|
qdrant_latencies.append(c.qdrant.latency_ms)
|
||||||
|
if c.lightrag and not c.lightrag.error:
|
||||||
|
lightrag_latencies.append(c.lightrag.latency_ms)
|
||||||
|
|
||||||
|
lines.append("\n## Aggregate Results\n")
|
||||||
|
lines.append(f"| Metric | Qdrant Search | LightRAG |")
|
||||||
|
lines.append(f"|--------|:-------------:|:--------:|")
|
||||||
|
lines.append(f"| **Wins** | {wins['qdrant']} | {wins['lightrag']} |")
|
||||||
|
lines.append(f"| **Ties** | {wins['tie']} | {wins['tie']} |")
|
||||||
|
|
||||||
|
avg_q_str = f"{sum(qdrant_latencies) / len(qdrant_latencies):.0f}ms" if qdrant_latencies else "N/A"
|
||||||
|
avg_l_str = f"{sum(lightrag_latencies) / len(lightrag_latencies):.0f}ms" if lightrag_latencies else "N/A"
|
||||||
|
lines.append(f"| **Avg latency** | {avg_q_str} | {avg_l_str} |")
|
||||||
|
|
||||||
|
avg_qr = sum(c.qdrant_relevance for c in comparisons) / len(comparisons) if comparisons else 0
|
||||||
|
avg_lr = sum(c.lightrag_relevance for c in comparisons) / len(comparisons) if comparisons else 0
|
||||||
|
lines.append(f"| **Avg relevance** | {avg_qr:.2f}/5 | {avg_lr:.2f}/5 |")
|
||||||
|
|
||||||
|
avg_qc = sum(c.qdrant_coverage for c in comparisons) / len(comparisons) if comparisons else 0
|
||||||
|
avg_lc = sum(c.lightrag_coverage for c in comparisons) / len(comparisons) if comparisons else 0
|
||||||
|
lines.append(f"| **Avg coverage** | {avg_qc:.1f} pages | {avg_lc:.1f} refs |")
|
||||||
|
|
||||||
|
# Per-query detail
|
||||||
|
lines.append("\n## Per-Query Comparison\n")
|
||||||
|
lines.append("| # | Query | Type | Qdrant Rel | LR Rel | Qdrant Cov | LR Cov | LR Quality | Winner |")
|
||||||
|
lines.append("|---|-------|------|:----------:|:------:|:----------:|:------:|:----------:|:------:|")
|
||||||
|
|
||||||
|
for i, c in enumerate(comparisons, 1):
|
||||||
|
q_display = c.query[:45] + "…" if len(c.query) > 45 else c.query
|
||||||
|
winner_emoji = {"qdrant": "🔵", "lightrag": "🟢", "tie": "⚪"}[c.winner]
|
||||||
|
lines.append(
|
||||||
|
f"| {i} | {q_display} | {c.query_type} | {c.qdrant_relevance:.1f} | "
|
||||||
|
f"{c.lightrag_relevance:.1f} | {c.qdrant_coverage} | {c.lightrag_coverage} | "
|
||||||
|
f"{c.lightrag_answer_quality:.1f} | {winner_emoji} {c.winner} |"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Detailed results for interesting queries
|
||||||
|
lines.append("\n## Notable Comparisons\n")
|
||||||
|
# Pick queries where there's a clear winner with interesting differences
|
||||||
|
notable = [c for c in comparisons if c.winner != "tie"][:5]
|
||||||
|
for c in notable:
|
||||||
|
lines.append(f"### Query: \"{c.query}\"\n")
|
||||||
|
lines.append(f"**Winner: {c.winner}**\n")
|
||||||
|
|
||||||
|
if c.qdrant and c.qdrant.results:
|
||||||
|
lines.append("**Qdrant results:**")
|
||||||
|
for r in c.qdrant.results[:3]:
|
||||||
|
lines.append(f"- {r.title} (by {r.creator}, score: {r.score:.2f})")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if c.lightrag and c.lightrag.response_text:
|
||||||
|
# Show first 300 chars of LightRAG response
|
||||||
|
preview = c.lightrag.response_text[:300]
|
||||||
|
if len(c.lightrag.response_text) > 300:
|
||||||
|
preview += "…"
|
||||||
|
lines.append(f"**LightRAG response preview:**")
|
||||||
|
lines.append(f"> {preview}\n")
|
||||||
|
if c.lightrag.references:
|
||||||
|
ref_slugs = [r["file_path"] for r in c.lightrag.references[:5]]
|
||||||
|
lines.append(f"References: {', '.join(ref_slugs)}\n")
|
||||||
|
|
||||||
|
# Data coverage note
|
||||||
|
lines.append("\n## Data Coverage Note\n")
|
||||||
|
lines.append(
|
||||||
|
"LightRAG has 18 of 93 technique pages indexed. "
|
||||||
|
"Results may improve significantly after full reindexing. "
|
||||||
|
"Qdrant has all 93 pages embedded."
|
||||||
|
)
|
||||||
|
|
||||||
|
report_path = output_dir / "comparison_report.md"
|
||||||
|
report_path.write_text("\n".join(lines), encoding="utf-8")
|
||||||
|
return report_path
|
||||||
|
|
||||||
|
|
||||||
|
def generate_json_report(comparisons: list[QueryComparison], output_dir: Path) -> Path:
|
||||||
|
"""Write full structured comparison data to JSON."""
|
||||||
|
|
||||||
|
def _serialize(obj):
|
||||||
|
if hasattr(obj, "__dict__"):
|
||||||
|
return {k: _serialize(v) for k, v in obj.__dict__.items()}
|
||||||
|
if isinstance(obj, list):
|
||||||
|
return [_serialize(i) for i in obj]
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return {k: _serialize(v) for k, v in obj.items()}
|
||||||
|
return obj
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"query_count": len(comparisons),
|
||||||
|
"comparisons": [_serialize(c) for c in comparisons],
|
||||||
|
}
|
||||||
|
|
||||||
|
report_path = output_dir / "comparison_report.json"
|
||||||
|
report_path.write_text(json.dumps(data, indent=2, default=str), encoding="utf-8")
|
||||||
|
return report_path
|
||||||
|
|
||||||
|
|
||||||
|
# ── Main ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="A/B compare Qdrant search vs LightRAG")
|
||||||
|
parser.add_argument(
|
||||||
|
"--api-url",
|
||||||
|
default=os.environ.get("API_URL", "http://127.0.0.1:8000"),
|
||||||
|
help="Chrysopedia API base URL (default: http://127.0.0.1:8000)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--lightrag-url",
|
||||||
|
default=os.environ.get("LIGHTRAG_URL", "http://chrysopedia-lightrag:9621"),
|
||||||
|
help="LightRAG API base URL (default: http://chrysopedia-lightrag:9621)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output-dir",
|
||||||
|
default=os.environ.get("OUTPUT_DIR", "/app/scripts/output"),
|
||||||
|
help="Output directory for reports",
|
||||||
|
)
|
||||||
|
parser.add_argument("--limit", type=int, default=None, help="Process only first N queries")
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="Show query set without executing")
|
||||||
|
parser.add_argument("--verbose", "-v", action="store_true", help="Debug logging")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG if args.verbose else logging.INFO,
|
||||||
|
format="%(asctime)s %(levelname)s %(message)s",
|
||||||
|
datefmt="%H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
|
queries = ALL_QUERIES[:args.limit] if args.limit else ALL_QUERIES
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
print(f"Query set ({len(queries)} queries):")
|
||||||
|
for i, q in enumerate(queries, 1):
|
||||||
|
qtype = "user" if q in USER_QUERIES else "curated"
|
||||||
|
print(f" {i:2d}. [{qtype:>7s}] {q}")
|
||||||
|
return
|
||||||
|
|
||||||
|
output_dir = Path(args.output_dir)
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
comparisons: list[QueryComparison] = []
|
||||||
|
|
||||||
|
for i, query in enumerate(queries, 1):
|
||||||
|
qtype = "user" if query in USER_QUERIES else "curated"
|
||||||
|
logger.info("[%d/%d] Query: %r (%s)", i, len(queries), query, qtype)
|
||||||
|
|
||||||
|
# Query both backends
|
||||||
|
qdrant_resp = query_qdrant_search(args.api_url, query)
|
||||||
|
lightrag_resp = query_lightrag(args.lightrag_url, query)
|
||||||
|
|
||||||
|
if qdrant_resp.error:
|
||||||
|
logger.warning(" Qdrant error: %s", qdrant_resp.error)
|
||||||
|
else:
|
||||||
|
logger.info(" Qdrant: %d results in %.0fms", qdrant_resp.total, qdrant_resp.latency_ms)
|
||||||
|
|
||||||
|
if lightrag_resp.error:
|
||||||
|
logger.warning(" LightRAG error: %s", lightrag_resp.error)
|
||||||
|
else:
|
||||||
|
ref_count = len(lightrag_resp.references)
|
||||||
|
word_count = len(lightrag_resp.response_text.split())
|
||||||
|
logger.info(" LightRAG: %d words, %d refs in %.0fms", word_count, ref_count, lightrag_resp.latency_ms)
|
||||||
|
|
||||||
|
comp = QueryComparison(query=query, query_type=qtype, qdrant=qdrant_resp, lightrag=lightrag_resp)
|
||||||
|
|
||||||
|
# Score
|
||||||
|
score_qdrant_results(comp)
|
||||||
|
score_lightrag_results(comp)
|
||||||
|
determine_winner(comp)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
" Scores → Qdrant: rel=%.1f cov=%d div=%d | LightRAG: rel=%.1f cov=%d qual=%.1f | Winner: %s",
|
||||||
|
comp.qdrant_relevance, comp.qdrant_coverage, comp.qdrant_diversity,
|
||||||
|
comp.lightrag_relevance, comp.lightrag_coverage, comp.lightrag_answer_quality,
|
||||||
|
comp.winner,
|
||||||
|
)
|
||||||
|
|
||||||
|
comparisons.append(comp)
|
||||||
|
|
||||||
|
# Generate reports
|
||||||
|
logger.info("Generating reports...")
|
||||||
|
md_path = generate_markdown_report(comparisons, output_dir)
|
||||||
|
json_path = generate_json_report(comparisons, output_dir)
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
wins = {"qdrant": 0, "lightrag": 0, "tie": 0}
|
||||||
|
for c in comparisons:
|
||||||
|
wins[c.winner] += 1
|
||||||
|
|
||||||
|
print(f"\n{'=' * 60}")
|
||||||
|
print(f"Comparison complete: {len(comparisons)} queries")
|
||||||
|
print(f" Qdrant wins: {wins['qdrant']}")
|
||||||
|
print(f" LightRAG wins: {wins['lightrag']}")
|
||||||
|
print(f" Ties: {wins['tie']}")
|
||||||
|
print(f"\nReports:")
|
||||||
|
print(f" {md_path}")
|
||||||
|
print(f" {json_path}")
|
||||||
|
print(f"{'=' * 60}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
181
backend/scripts/lightrag_query.py
Normal file
181
backend/scripts/lightrag_query.py
Normal file
|
|
@ -0,0 +1,181 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Query LightRAG with optional creator scoping.
|
||||||
|
|
||||||
|
A developer CLI for testing LightRAG queries, including creator-biased
|
||||||
|
retrieval using ll_keywords. Also serves as the foundation for
|
||||||
|
creator-scoped chat in M021.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Basic query
|
||||||
|
python3 /app/scripts/lightrag_query.py --query "snare design"
|
||||||
|
|
||||||
|
# Creator-scoped query
|
||||||
|
python3 /app/scripts/lightrag_query.py --query "snare design" --creator "COPYCATT"
|
||||||
|
|
||||||
|
# Different modes
|
||||||
|
python3 /app/scripts/lightrag_query.py --query "bass techniques" --mode local
|
||||||
|
|
||||||
|
# JSON output
|
||||||
|
python3 /app/scripts/lightrag_query.py --query "reverb" --json
|
||||||
|
|
||||||
|
# Context only (no LLM generation)
|
||||||
|
python3 /app/scripts/lightrag_query.py --query "reverb" --context-only
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
|
def query_lightrag(
|
||||||
|
lightrag_url: str,
|
||||||
|
query: str,
|
||||||
|
mode: str = "hybrid",
|
||||||
|
creator: str | None = None,
|
||||||
|
context_only: bool = False,
|
||||||
|
top_k: int | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Query LightRAG with optional creator scoping.
|
||||||
|
|
||||||
|
When a creator name is provided, it's passed as a low-level keyword
|
||||||
|
to bias retrieval toward documents mentioning that creator.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
lightrag_url:
|
||||||
|
LightRAG API base URL.
|
||||||
|
query:
|
||||||
|
The search/question text.
|
||||||
|
mode:
|
||||||
|
Query mode: local, global, hybrid, naive, mix, bypass.
|
||||||
|
creator:
|
||||||
|
Optional creator name to bias retrieval toward.
|
||||||
|
context_only:
|
||||||
|
If True, returns retrieved context without LLM generation.
|
||||||
|
top_k:
|
||||||
|
Number of top items to retrieve (optional).
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Dict with keys: response, references, latency_ms, error.
|
||||||
|
"""
|
||||||
|
url = f"{lightrag_url}/query"
|
||||||
|
payload: dict[str, Any] = {
|
||||||
|
"query": query,
|
||||||
|
"mode": mode,
|
||||||
|
"include_references": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
if creator:
|
||||||
|
# Use ll_keywords to bias retrieval toward the creator
|
||||||
|
payload["ll_keywords"] = [creator]
|
||||||
|
# Also prepend creator context to the query for better matching
|
||||||
|
payload["query"] = f"{query} (by {creator})"
|
||||||
|
|
||||||
|
if context_only:
|
||||||
|
payload["only_need_context"] = True
|
||||||
|
|
||||||
|
if top_k:
|
||||||
|
payload["top_k"] = top_k
|
||||||
|
|
||||||
|
start = time.monotonic()
|
||||||
|
try:
|
||||||
|
resp = httpx.post(url, json=payload, timeout=300)
|
||||||
|
latency_ms = (time.monotonic() - start) * 1000
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
latency_ms = (time.monotonic() - start) * 1000
|
||||||
|
return {"response": "", "references": [], "latency_ms": latency_ms, "error": str(e)}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"response": data.get("response", ""),
|
||||||
|
"references": data.get("references", []),
|
||||||
|
"latency_ms": latency_ms,
|
||||||
|
"error": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def format_response(result: dict[str, Any], json_output: bool = False) -> str:
|
||||||
|
"""Format the query result for display."""
|
||||||
|
if json_output:
|
||||||
|
return json.dumps(result, indent=2, default=str)
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
|
||||||
|
if result["error"]:
|
||||||
|
lines.append(f"ERROR: {result['error']}")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
lines.append(f"Latency: {result['latency_ms']:.0f}ms")
|
||||||
|
lines.append(f"Word count: {len(result['response'].split())}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Response text
|
||||||
|
lines.append("─" * 60)
|
||||||
|
lines.append(result["response"])
|
||||||
|
lines.append("─" * 60)
|
||||||
|
|
||||||
|
# References
|
||||||
|
refs = result.get("references", [])
|
||||||
|
if refs:
|
||||||
|
lines.append(f"\nReferences ({len(refs)}):")
|
||||||
|
for ref in refs:
|
||||||
|
fp = ref.get("file_path", "?")
|
||||||
|
rid = ref.get("reference_id", "?")
|
||||||
|
lines.append(f" [{rid}] {fp}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Query LightRAG with optional creator scoping")
|
||||||
|
parser.add_argument("--query", "-q", required=True, help="Query text")
|
||||||
|
parser.add_argument("--creator", "-c", default=None, help="Creator name to bias retrieval")
|
||||||
|
parser.add_argument(
|
||||||
|
"--mode", "-m",
|
||||||
|
default="hybrid",
|
||||||
|
choices=["local", "global", "hybrid", "naive", "mix", "bypass"],
|
||||||
|
help="Query mode (default: hybrid)",
|
||||||
|
)
|
||||||
|
parser.add_argument("--context-only", action="store_true", help="Return context without LLM generation")
|
||||||
|
parser.add_argument("--top-k", type=int, default=None, help="Number of top items to retrieve")
|
||||||
|
parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON")
|
||||||
|
parser.add_argument(
|
||||||
|
"--lightrag-url",
|
||||||
|
default=os.environ.get("LIGHTRAG_URL", "http://chrysopedia-lightrag:9621"),
|
||||||
|
help="LightRAG API base URL",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.creator:
|
||||||
|
print(f"Query: {args.query}")
|
||||||
|
print(f"Creator scope: {args.creator}")
|
||||||
|
print(f"Mode: {args.mode}")
|
||||||
|
else:
|
||||||
|
print(f"Query: {args.query}")
|
||||||
|
print(f"Mode: {args.mode}")
|
||||||
|
|
||||||
|
print("Querying LightRAG...")
|
||||||
|
|
||||||
|
result = query_lightrag(
|
||||||
|
lightrag_url=args.lightrag_url,
|
||||||
|
query=args.query,
|
||||||
|
mode=args.mode,
|
||||||
|
creator=args.creator,
|
||||||
|
context_only=args.context_only,
|
||||||
|
top_k=args.top_k,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(format_response(result, json_output=args.json_output))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
@ -34,7 +34,7 @@ _script_dir = os.path.dirname(os.path.abspath(os.path.realpath(__file__)))
|
||||||
_backend_dir = os.path.dirname(_script_dir)
|
_backend_dir = os.path.dirname(_script_dir)
|
||||||
sys.path.insert(0, _backend_dir)
|
sys.path.insert(0, _backend_dir)
|
||||||
|
|
||||||
from models import Creator, KeyMoment, TechniquePage # noqa: E402
|
from models import Creator, KeyMoment, SourceVideo, TechniquePage # noqa: E402
|
||||||
|
|
||||||
logger = logging.getLogger("reindex_lightrag")
|
logger = logging.getLogger("reindex_lightrag")
|
||||||
|
|
||||||
|
|
@ -49,12 +49,12 @@ def get_sync_engine(db_url: str):
|
||||||
|
|
||||||
|
|
||||||
def load_technique_pages(session: Session, limit: int | None = None) -> list[TechniquePage]:
|
def load_technique_pages(session: Session, limit: int | None = None) -> list[TechniquePage]:
|
||||||
"""Load all technique pages with creator and key moments eagerly."""
|
"""Load all technique pages with creator, key moments, and source videos eagerly."""
|
||||||
query = (
|
query = (
|
||||||
session.query(TechniquePage)
|
session.query(TechniquePage)
|
||||||
.options(
|
.options(
|
||||||
joinedload(TechniquePage.creator),
|
joinedload(TechniquePage.creator),
|
||||||
joinedload(TechniquePage.key_moments),
|
joinedload(TechniquePage.key_moments).joinedload(KeyMoment.source_video),
|
||||||
)
|
)
|
||||||
.order_by(TechniquePage.title)
|
.order_by(TechniquePage.title)
|
||||||
)
|
)
|
||||||
|
|
@ -102,26 +102,42 @@ def _format_v2_sections(body_sections: list[dict]) -> str:
|
||||||
|
|
||||||
|
|
||||||
def format_technique_page(page: TechniquePage) -> str:
|
def format_technique_page(page: TechniquePage) -> str:
|
||||||
"""Convert a TechniquePage + relations into a rich text document for LightRAG."""
|
"""Convert a TechniquePage + relations into a rich text document for LightRAG.
|
||||||
|
|
||||||
|
Includes structured provenance metadata for entity extraction and
|
||||||
|
creator-scoped retrieval.
|
||||||
|
"""
|
||||||
lines = []
|
lines = []
|
||||||
|
|
||||||
# Header metadata
|
# ── Structured provenance block ──────────────────────────────────────
|
||||||
lines.append(f"Technique: {page.title}")
|
lines.append(f"Technique: {page.title}")
|
||||||
if page.creator:
|
if page.creator:
|
||||||
lines.append(f"Creator: {page.creator.name}")
|
lines.append(f"Creator: {page.creator.name}")
|
||||||
|
lines.append(f"Creator ID: {page.creator_id}")
|
||||||
lines.append(f"Category: {page.topic_category or 'Uncategorized'}")
|
lines.append(f"Category: {page.topic_category or 'Uncategorized'}")
|
||||||
if page.topic_tags:
|
if page.topic_tags:
|
||||||
lines.append(f"Tags: {', '.join(page.topic_tags)}")
|
lines.append(f"Tags: {', '.join(page.topic_tags)}")
|
||||||
if page.plugins:
|
if page.plugins:
|
||||||
lines.append(f"Plugins: {', '.join(page.plugins)}")
|
lines.append(f"Plugins: {', '.join(page.plugins)}")
|
||||||
|
|
||||||
|
# Source video provenance
|
||||||
|
if page.key_moments:
|
||||||
|
video_ids: dict[str, str] = {}
|
||||||
|
for km in page.key_moments:
|
||||||
|
sv = getattr(km, "source_video", None)
|
||||||
|
if sv and str(sv.id) not in video_ids:
|
||||||
|
video_ids[str(sv.id)] = sv.filename
|
||||||
|
if video_ids:
|
||||||
|
lines.append(f"Source Videos: {', '.join(video_ids.values())}")
|
||||||
|
lines.append(f"Source Video IDs: {', '.join(video_ids.keys())}")
|
||||||
lines.append("")
|
lines.append("")
|
||||||
|
|
||||||
# Summary
|
# ── Summary ──────────────────────────────────────────────────────────
|
||||||
if page.summary:
|
if page.summary:
|
||||||
lines.append(f"Summary: {page.summary}")
|
lines.append(f"Summary: {page.summary}")
|
||||||
lines.append("")
|
lines.append("")
|
||||||
|
|
||||||
# Body sections — handle both formats
|
# ── Body sections ────────────────────────────────────────────────────
|
||||||
if page.body_sections:
|
if page.body_sections:
|
||||||
fmt = getattr(page, "body_sections_format", "v1") or "v1"
|
fmt = getattr(page, "body_sections_format", "v1") or "v1"
|
||||||
if fmt == "v2" and isinstance(page.body_sections, list):
|
if fmt == "v2" and isinstance(page.body_sections, list):
|
||||||
|
|
@ -134,19 +150,26 @@ def format_technique_page(page: TechniquePage) -> str:
|
||||||
else:
|
else:
|
||||||
lines.append(str(page.body_sections))
|
lines.append(str(page.body_sections))
|
||||||
|
|
||||||
# Key moments from source videos
|
# ── Key moments with source attribution ──────────────────────────────
|
||||||
if page.key_moments:
|
if page.key_moments:
|
||||||
lines.append("Key Moments from Source Videos:")
|
lines.append("Key Moments from Source Videos:")
|
||||||
for km in page.key_moments:
|
for km in page.key_moments:
|
||||||
lines.append(f"- {km.title}: {km.summary}")
|
sv = getattr(km, "source_video", None)
|
||||||
|
source_info = f" (Source: {sv.filename})" if sv else ""
|
||||||
|
lines.append(f"- {km.title}: {km.summary}{source_info}")
|
||||||
lines.append("")
|
lines.append("")
|
||||||
|
|
||||||
return "\n".join(lines).strip()
|
return "\n".join(lines).strip()
|
||||||
|
|
||||||
|
|
||||||
def file_source_for_page(page: TechniquePage) -> str:
|
def file_source_for_page(page: TechniquePage) -> str:
|
||||||
"""Deterministic file_source identifier for a technique page."""
|
"""Deterministic file_source identifier for a technique page.
|
||||||
return f"technique:{page.slug}"
|
|
||||||
|
Encodes creator_id for provenance tracking. Format:
|
||||||
|
technique:{slug}:creator:{creator_id}
|
||||||
|
"""
|
||||||
|
creator_id = str(page.creator_id) if page.creator_id else "unknown"
|
||||||
|
return f"technique:{page.slug}:creator:{creator_id}"
|
||||||
|
|
||||||
|
|
||||||
# ── LightRAG API ─────────────────────────────────────────────────────────────
|
# ── LightRAG API ─────────────────────────────────────────────────────────────
|
||||||
|
|
@ -173,6 +196,47 @@ def get_processed_sources(lightrag_url: str) -> set[str]:
|
||||||
return sources
|
return sources
|
||||||
|
|
||||||
|
|
||||||
|
def clear_all_documents(lightrag_url: str) -> bool:
|
||||||
|
"""Delete all documents from LightRAG. Returns True on success.
|
||||||
|
|
||||||
|
Uses the /documents/delete_document endpoint with doc_ids (not file_path).
|
||||||
|
"""
|
||||||
|
# Get all document IDs
|
||||||
|
try:
|
||||||
|
resp = httpx.get(f"{lightrag_url}/documents", timeout=30)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
logger.error("Failed to fetch documents for clearing: %s", e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
doc_ids = []
|
||||||
|
for status_group in data.get("statuses", {}).values():
|
||||||
|
for doc in status_group:
|
||||||
|
did = doc.get("id")
|
||||||
|
if did:
|
||||||
|
doc_ids.append(did)
|
||||||
|
|
||||||
|
if not doc_ids:
|
||||||
|
logger.info("No documents to clear.")
|
||||||
|
return True
|
||||||
|
|
||||||
|
logger.info("Clearing %d documents from LightRAG...", len(doc_ids))
|
||||||
|
try:
|
||||||
|
resp = httpx.request(
|
||||||
|
"DELETE",
|
||||||
|
f"{lightrag_url}/documents/delete_document",
|
||||||
|
json={"doc_ids": doc_ids, "delete_llm_cache": True},
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
logger.info("Cleared %d documents.", len(doc_ids))
|
||||||
|
return True
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
logger.error("Failed to delete documents: %s", e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def submit_document(lightrag_url: str, text: str, file_source: str) -> dict[str, Any] | None:
|
def submit_document(lightrag_url: str, text: str, file_source: str) -> dict[str, Any] | None:
|
||||||
"""Submit a text document to LightRAG. Returns response dict or None on error."""
|
"""Submit a text document to LightRAG. Returns response dict or None on error."""
|
||||||
url = f"{lightrag_url}/documents/text"
|
url = f"{lightrag_url}/documents/text"
|
||||||
|
|
@ -232,6 +296,16 @@ def main():
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Format and preview pages without submitting to LightRAG",
|
help="Format and preview pages without submitting to LightRAG",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--force",
|
||||||
|
action="store_true",
|
||||||
|
help="Skip resume check — resubmit all pages even if already processed",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--clear-first",
|
||||||
|
action="store_true",
|
||||||
|
help="Delete all existing LightRAG documents before reindexing",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--limit",
|
"--limit",
|
||||||
type=int,
|
type=int,
|
||||||
|
|
@ -271,9 +345,13 @@ def main():
|
||||||
session.close()
|
session.close()
|
||||||
return
|
return
|
||||||
|
|
||||||
# Resume support — get already-processed sources
|
# Clear existing documents if requested
|
||||||
|
if args.clear_first and not args.dry_run:
|
||||||
|
clear_all_documents(args.lightrag_url)
|
||||||
|
|
||||||
|
# Resume support — get already-processed sources (skip if --force)
|
||||||
processed_sources: set[str] = set()
|
processed_sources: set[str] = set()
|
||||||
if not args.dry_run:
|
if not args.dry_run and not args.force:
|
||||||
logger.info("Checking LightRAG for already-processed documents...")
|
logger.info("Checking LightRAG for already-processed documents...")
|
||||||
processed_sources = get_processed_sources(args.lightrag_url)
|
processed_sources = get_processed_sources(args.lightrag_url)
|
||||||
logger.info("Found %d existing document(s) in LightRAG", len(processed_sources))
|
logger.info("Found %d existing document(s) in LightRAG", len(processed_sources))
|
||||||
|
|
|
||||||
|
|
@ -10,9 +10,11 @@ from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
import openai
|
import openai
|
||||||
from qdrant_client import AsyncQdrantClient
|
from qdrant_client import AsyncQdrantClient
|
||||||
from qdrant_client.http import exceptions as qdrant_exceptions
|
from qdrant_client.http import exceptions as qdrant_exceptions
|
||||||
|
|
@ -50,6 +52,13 @@ class SearchService:
|
||||||
self._qdrant = AsyncQdrantClient(url=settings.qdrant_url)
|
self._qdrant = AsyncQdrantClient(url=settings.qdrant_url)
|
||||||
self._collection = settings.qdrant_collection
|
self._collection = settings.qdrant_collection
|
||||||
|
|
||||||
|
# LightRAG client
|
||||||
|
self._httpx = httpx.AsyncClient(
|
||||||
|
timeout=httpx.Timeout(settings.lightrag_search_timeout),
|
||||||
|
)
|
||||||
|
self._lightrag_url = settings.lightrag_url
|
||||||
|
self._lightrag_min_query_length = settings.lightrag_min_query_length
|
||||||
|
|
||||||
# ── Embedding ────────────────────────────────────────────────────────
|
# ── Embedding ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
async def embed_query(self, text: str) -> list[float] | None:
|
async def embed_query(self, text: str) -> list[float] | None:
|
||||||
|
|
@ -392,6 +401,177 @@ class SearchService:
|
||||||
|
|
||||||
return partial
|
return partial
|
||||||
|
|
||||||
|
# ── LightRAG search ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# Regex to parse file_source format: technique:{slug}:creator:{creator_id}
|
||||||
|
_FILE_SOURCE_RE = re.compile(r"^technique:(?P<slug>[^:]+):creator:(?P<creator_id>.+)$")
|
||||||
|
|
||||||
|
async def _lightrag_search(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
limit: int,
|
||||||
|
db: AsyncSession,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Query LightRAG /query/data for entities, relationships, and chunks.
|
||||||
|
|
||||||
|
Maps results back to SearchResultItem dicts using file_source parsing
|
||||||
|
and DB batch lookup. Returns empty list on any failure (timeout,
|
||||||
|
connection, parse error) with a WARNING log — caller falls back.
|
||||||
|
"""
|
||||||
|
start = time.monotonic()
|
||||||
|
try:
|
||||||
|
resp = await self._httpx.post(
|
||||||
|
f"{self._lightrag_url}/query/data",
|
||||||
|
json={"query": query, "mode": "hybrid", "top_k": limit},
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
body = resp.json()
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
elapsed_ms = (time.monotonic() - start) * 1000
|
||||||
|
logger.warning(
|
||||||
|
"lightrag_search_fallback reason=timeout query=%r latency_ms=%.1f",
|
||||||
|
query, elapsed_ms,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
except httpx.HTTPError as exc:
|
||||||
|
elapsed_ms = (time.monotonic() - start) * 1000
|
||||||
|
logger.warning(
|
||||||
|
"lightrag_search_fallback reason=http_error query=%r error=%s latency_ms=%.1f",
|
||||||
|
query, exc, elapsed_ms,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
except Exception as exc:
|
||||||
|
elapsed_ms = (time.monotonic() - start) * 1000
|
||||||
|
logger.warning(
|
||||||
|
"lightrag_search_fallback reason=unexpected query=%r error=%s latency_ms=%.1f",
|
||||||
|
query, exc, elapsed_ms,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Parse response
|
||||||
|
try:
|
||||||
|
data = body.get("data", {})
|
||||||
|
if not data:
|
||||||
|
elapsed_ms = (time.monotonic() - start) * 1000
|
||||||
|
logger.warning(
|
||||||
|
"lightrag_search_fallback reason=empty_data query=%r latency_ms=%.1f",
|
||||||
|
query, elapsed_ms,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
chunks = data.get("chunks", [])
|
||||||
|
entities = data.get("entities", [])
|
||||||
|
# relationships = data.get("relationships", []) # available for future use
|
||||||
|
|
||||||
|
# Extract technique slugs from chunk file_path/file_source fields
|
||||||
|
slug_set: set[str] = set()
|
||||||
|
slug_order: list[str] = [] # preserve retrieval rank
|
||||||
|
for chunk in chunks:
|
||||||
|
file_path = chunk.get("file_path", "")
|
||||||
|
m = self._FILE_SOURCE_RE.match(file_path)
|
||||||
|
if m and m.group("slug") not in slug_set:
|
||||||
|
slug = m.group("slug")
|
||||||
|
slug_set.add(slug)
|
||||||
|
slug_order.append(slug)
|
||||||
|
|
||||||
|
# Also try to extract slugs from entity names by matching DB later
|
||||||
|
entity_names: list[str] = []
|
||||||
|
for ent in entities:
|
||||||
|
name = ent.get("entity_name", "")
|
||||||
|
if name:
|
||||||
|
entity_names.append(name)
|
||||||
|
|
||||||
|
if not slug_set and not entity_names:
|
||||||
|
elapsed_ms = (time.monotonic() - start) * 1000
|
||||||
|
logger.warning(
|
||||||
|
"lightrag_search_fallback reason=no_parseable_results query=%r "
|
||||||
|
"chunks=%d entities=%d latency_ms=%.1f",
|
||||||
|
query, len(chunks), len(entities), elapsed_ms,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Batch-lookup technique pages by slug
|
||||||
|
tp_map: dict[str, tuple] = {} # slug → (TechniquePage, Creator)
|
||||||
|
if slug_set:
|
||||||
|
tp_stmt = (
|
||||||
|
select(TechniquePage, Creator)
|
||||||
|
.join(Creator, TechniquePage.creator_id == Creator.id)
|
||||||
|
.where(TechniquePage.slug.in_(list(slug_set)))
|
||||||
|
)
|
||||||
|
tp_rows = await db.execute(tp_stmt)
|
||||||
|
for tp, cr in tp_rows.all():
|
||||||
|
tp_map[tp.slug] = (tp, cr)
|
||||||
|
|
||||||
|
# If we have entity names but no chunk matches, try matching
|
||||||
|
# entity names against technique page titles or creator names
|
||||||
|
if entity_names and not tp_map:
|
||||||
|
entity_name_pats = [f"%{name}%" for name in entity_names[:20]]
|
||||||
|
tp_stmt2 = (
|
||||||
|
select(TechniquePage, Creator)
|
||||||
|
.join(Creator, TechniquePage.creator_id == Creator.id)
|
||||||
|
.where(
|
||||||
|
or_(
|
||||||
|
TechniquePage.title.ilike(func.any_(entity_name_pats)),
|
||||||
|
Creator.name.ilike(func.any_(entity_name_pats)),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.limit(limit)
|
||||||
|
)
|
||||||
|
tp_rows2 = await db.execute(tp_stmt2)
|
||||||
|
for tp, cr in tp_rows2.all():
|
||||||
|
if tp.slug not in tp_map:
|
||||||
|
tp_map[tp.slug] = (tp, cr)
|
||||||
|
if tp.slug not in slug_set:
|
||||||
|
slug_order.append(tp.slug)
|
||||||
|
slug_set.add(tp.slug)
|
||||||
|
|
||||||
|
# Build result items in retrieval-rank order
|
||||||
|
results: list[dict[str, Any]] = []
|
||||||
|
seen_slugs: set[str] = set()
|
||||||
|
for idx, slug in enumerate(slug_order):
|
||||||
|
if slug in seen_slugs:
|
||||||
|
continue
|
||||||
|
seen_slugs.add(slug)
|
||||||
|
pair = tp_map.get(slug)
|
||||||
|
if not pair:
|
||||||
|
continue
|
||||||
|
tp, cr = pair
|
||||||
|
# Score: higher rank → higher score (1.0 down to ~0.5)
|
||||||
|
score = max(1.0 - (idx * 0.05), 0.5)
|
||||||
|
results.append({
|
||||||
|
"type": "technique_page",
|
||||||
|
"title": tp.title,
|
||||||
|
"slug": tp.slug,
|
||||||
|
"technique_page_slug": tp.slug,
|
||||||
|
"summary": tp.summary or "",
|
||||||
|
"topic_category": tp.topic_category,
|
||||||
|
"topic_tags": tp.topic_tags or [],
|
||||||
|
"creator_id": str(tp.creator_id),
|
||||||
|
"creator_name": cr.name,
|
||||||
|
"creator_slug": cr.slug,
|
||||||
|
"created_at": tp.created_at.isoformat() if tp.created_at else "",
|
||||||
|
"score": score,
|
||||||
|
"match_context": "LightRAG graph match",
|
||||||
|
})
|
||||||
|
if len(results) >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
elapsed_ms = (time.monotonic() - start) * 1000
|
||||||
|
logger.info(
|
||||||
|
"lightrag_search query=%r latency_ms=%.1f result_count=%d chunks=%d entities=%d",
|
||||||
|
query, elapsed_ms, len(results), len(chunks), len(entities),
|
||||||
|
)
|
||||||
|
return results
|
||||||
|
|
||||||
|
except (KeyError, ValueError, TypeError) as exc:
|
||||||
|
elapsed_ms = (time.monotonic() - start) * 1000
|
||||||
|
body_snippet = str(body)[:200] if body else "<empty>"
|
||||||
|
logger.warning(
|
||||||
|
"lightrag_search_fallback reason=parse_error query=%r error=%s body=%.200s latency_ms=%.1f",
|
||||||
|
query, exc, body_snippet, elapsed_ms,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
# ── Orchestrator ─────────────────────────────────────────────────────
|
# ── Orchestrator ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
async def search(
|
async def search(
|
||||||
|
|
@ -418,17 +598,23 @@ class SearchService:
|
||||||
if scope not in ("all", "topics", "creators"):
|
if scope not in ("all", "topics", "creators"):
|
||||||
scope = "all"
|
scope = "all"
|
||||||
|
|
||||||
# Map scope to Qdrant type filter
|
# ── Primary: try LightRAG for queries ≥ min length ─────────────
|
||||||
# topics scope: no filter — both technique_page and technique_section
|
lightrag_results: list[dict[str, Any]] = []
|
||||||
# should appear in semantic results
|
fallback_used = True # assume fallback until LightRAG succeeds
|
||||||
type_filter_map = {
|
|
||||||
"all": None,
|
|
||||||
"topics": None,
|
|
||||||
"creators": None,
|
|
||||||
}
|
|
||||||
qdrant_type_filter = type_filter_map.get(scope)
|
|
||||||
|
|
||||||
# Run both searches in parallel
|
use_lightrag = len(query) >= self._lightrag_min_query_length
|
||||||
|
|
||||||
|
if use_lightrag:
|
||||||
|
lightrag_results = await self._lightrag_search(query, limit, db)
|
||||||
|
if lightrag_results:
|
||||||
|
fallback_used = False
|
||||||
|
|
||||||
|
# ── Keyword search always runs (for merge/dedup) ─────────────
|
||||||
|
async def _keyword():
|
||||||
|
return await self.keyword_search(query, scope, limit, db, sort=sort)
|
||||||
|
|
||||||
|
# ── Fallback: Qdrant semantic (only when LightRAG didn't deliver) ──
|
||||||
|
qdrant_type_filter = None # no type filter — all result types welcome
|
||||||
async def _semantic():
|
async def _semantic():
|
||||||
vector = await self.embed_query(query)
|
vector = await self.embed_query(query)
|
||||||
if vector is None:
|
if vector is None:
|
||||||
|
|
@ -444,14 +630,17 @@ class SearchService:
|
||||||
filtered.append(item)
|
filtered.append(item)
|
||||||
return filtered
|
return filtered
|
||||||
|
|
||||||
async def _keyword():
|
if fallback_used:
|
||||||
return await self.keyword_search(query, scope, limit, db, sort=sort)
|
# LightRAG returned nothing — run Qdrant semantic + keyword in parallel
|
||||||
|
semantic_results, kw_result = await asyncio.gather(
|
||||||
semantic_results, kw_result = await asyncio.gather(
|
_semantic(),
|
||||||
_semantic(),
|
_keyword(),
|
||||||
_keyword(),
|
return_exceptions=True,
|
||||||
return_exceptions=True,
|
)
|
||||||
)
|
else:
|
||||||
|
# LightRAG succeeded — only need keyword for supplementary merge
|
||||||
|
semantic_results = []
|
||||||
|
kw_result = await _keyword()
|
||||||
|
|
||||||
# Handle exceptions gracefully
|
# Handle exceptions gracefully
|
||||||
if isinstance(semantic_results, Exception):
|
if isinstance(semantic_results, Exception):
|
||||||
|
|
@ -464,8 +653,7 @@ class SearchService:
|
||||||
kw_items = kw_result["items"]
|
kw_items = kw_result["items"]
|
||||||
partial_matches = kw_result.get("partial_matches", [])
|
partial_matches = kw_result.get("partial_matches", [])
|
||||||
|
|
||||||
# Merge: keyword results first (they have explicit match_context),
|
# Merge: LightRAG results first (primary), then keyword, then Qdrant semantic
|
||||||
# then semantic results that aren't already present
|
|
||||||
seen_keys: set[str] = set()
|
seen_keys: set[str] = set()
|
||||||
merged: list[dict[str, Any]] = []
|
merged: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
|
@ -475,6 +663,12 @@ class SearchService:
|
||||||
title = item.get("title", "")
|
title = item.get("title", "")
|
||||||
return f"{t}:{s}:{title}"
|
return f"{t}:{s}:{title}"
|
||||||
|
|
||||||
|
for item in lightrag_results:
|
||||||
|
key = _dedup_key(item)
|
||||||
|
if key not in seen_keys:
|
||||||
|
seen_keys.add(key)
|
||||||
|
merged.append(item)
|
||||||
|
|
||||||
for item in kw_items:
|
for item in kw_items:
|
||||||
key = _dedup_key(item)
|
key = _dedup_key(item)
|
||||||
if key not in seen_keys:
|
if key not in seen_keys:
|
||||||
|
|
@ -490,13 +684,13 @@ class SearchService:
|
||||||
# Apply sort
|
# Apply sort
|
||||||
merged = self._apply_sort(merged, sort)
|
merged = self._apply_sort(merged, sort)
|
||||||
|
|
||||||
fallback_used = len(kw_items) > 0 and len(semantic_results) == 0
|
# fallback_used is already set above
|
||||||
|
|
||||||
elapsed_ms = (time.monotonic() - start) * 1000
|
elapsed_ms = (time.monotonic() - start) * 1000
|
||||||
logger.info(
|
logger.info(
|
||||||
"Search query=%r scope=%s keyword=%d semantic=%d merged=%d partial=%d latency_ms=%.1f",
|
"Search query=%r scope=%s lightrag=%d keyword=%d semantic=%d merged=%d partial=%d fallback=%s latency_ms=%.1f",
|
||||||
query, scope, len(kw_items), len(semantic_results),
|
query, scope, len(lightrag_results), len(kw_items), len(semantic_results),
|
||||||
len(merged), len(partial_matches), elapsed_ms,
|
len(merged), len(partial_matches), fallback_used, elapsed_ms,
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,9 @@ const CreatorDashboard = React.lazy(() => import("./pages/CreatorDashboard"));
|
||||||
const CreatorSettings = React.lazy(() => import("./pages/CreatorSettings"));
|
const CreatorSettings = React.lazy(() => import("./pages/CreatorSettings"));
|
||||||
const ConsentDashboard = React.lazy(() => import("./pages/ConsentDashboard"));
|
const ConsentDashboard = React.lazy(() => import("./pages/ConsentDashboard"));
|
||||||
const WatchPage = React.lazy(() => import("./pages/WatchPage"));
|
const WatchPage = React.lazy(() => import("./pages/WatchPage"));
|
||||||
|
const AdminUsers = React.lazy(() => import("./pages/AdminUsers"));
|
||||||
import AdminDropdown from "./components/AdminDropdown";
|
import AdminDropdown from "./components/AdminDropdown";
|
||||||
|
import ImpersonationBanner from "./components/ImpersonationBanner";
|
||||||
import AppFooter from "./components/AppFooter";
|
import AppFooter from "./components/AppFooter";
|
||||||
import SearchAutocomplete from "./components/SearchAutocomplete";
|
import SearchAutocomplete from "./components/SearchAutocomplete";
|
||||||
import ProtectedRoute from "./components/ProtectedRoute";
|
import ProtectedRoute from "./components/ProtectedRoute";
|
||||||
|
|
@ -100,6 +102,7 @@ function AppShell() {
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="app">
|
<div className="app">
|
||||||
|
<ImpersonationBanner />
|
||||||
<a href="#main-content" className="skip-link">Skip to content</a>
|
<a href="#main-content" className="skip-link">Skip to content</a>
|
||||||
<header className="app-header" ref={headerRef}>
|
<header className="app-header" ref={headerRef}>
|
||||||
<Link to="/" className="app-header__brand">
|
<Link to="/" className="app-header__brand">
|
||||||
|
|
@ -179,6 +182,7 @@ function AppShell() {
|
||||||
<Route path="/admin/reports" element={<Suspense fallback={<LoadingFallback />}><AdminReports /></Suspense>} />
|
<Route path="/admin/reports" element={<Suspense fallback={<LoadingFallback />}><AdminReports /></Suspense>} />
|
||||||
<Route path="/admin/pipeline" element={<Suspense fallback={<LoadingFallback />}><AdminPipeline /></Suspense>} />
|
<Route path="/admin/pipeline" element={<Suspense fallback={<LoadingFallback />}><AdminPipeline /></Suspense>} />
|
||||||
<Route path="/admin/techniques" element={<Suspense fallback={<LoadingFallback />}><AdminTechniquePages /></Suspense>} />
|
<Route path="/admin/techniques" element={<Suspense fallback={<LoadingFallback />}><AdminTechniquePages /></Suspense>} />
|
||||||
|
<Route path="/admin/users" element={<Suspense fallback={<LoadingFallback />}><AdminUsers /></Suspense>} />
|
||||||
|
|
||||||
{/* Info routes */}
|
{/* Info routes */}
|
||||||
<Route path="/about" element={<Suspense fallback={<LoadingFallback />}><About /></Suspense>} />
|
<Route path="/about" element={<Suspense fallback={<LoadingFallback />}><About /></Suspense>} />
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,22 @@ export interface UserResponse {
|
||||||
creator_id: string | null;
|
creator_id: string | null;
|
||||||
is_active: boolean;
|
is_active: boolean;
|
||||||
created_at: string;
|
created_at: string;
|
||||||
|
impersonating?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface UserListItem {
|
||||||
|
id: string;
|
||||||
|
email: string;
|
||||||
|
display_name: string;
|
||||||
|
role: string;
|
||||||
|
creator_id: string | null;
|
||||||
|
is_active: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ImpersonateResponse {
|
||||||
|
access_token: string;
|
||||||
|
token_type: string;
|
||||||
|
target_user: UserListItem;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface UpdateProfileRequest {
|
export interface UpdateProfileRequest {
|
||||||
|
|
@ -68,3 +84,33 @@ export async function authUpdateProfile(
|
||||||
body: JSON.stringify(data),
|
body: JSON.stringify(data),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Admin: Impersonation ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export async function fetchUsers(token: string): Promise<UserListItem[]> {
|
||||||
|
return request<UserListItem[]>(`${BASE}/admin/users`, {
|
||||||
|
headers: { Authorization: `Bearer ${token}` },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function impersonateUser(
|
||||||
|
token: string,
|
||||||
|
userId: string,
|
||||||
|
): Promise<ImpersonateResponse> {
|
||||||
|
return request<ImpersonateResponse>(
|
||||||
|
`${BASE}/admin/impersonate/${userId}`,
|
||||||
|
{
|
||||||
|
method: "POST",
|
||||||
|
headers: { Authorization: `Bearer ${token}` },
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function stopImpersonation(
|
||||||
|
token: string,
|
||||||
|
): Promise<{ message: string }> {
|
||||||
|
return request<{ message: string }>(`${BASE}/admin/impersonate/stop`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { Authorization: `Bearer ${token}` },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -110,6 +110,14 @@ export default function AdminDropdown() {
|
||||||
>
|
>
|
||||||
Techniques
|
Techniques
|
||||||
</Link>
|
</Link>
|
||||||
|
<Link
|
||||||
|
to="/admin/users"
|
||||||
|
className="admin-dropdown__item"
|
||||||
|
role="menuitem"
|
||||||
|
onClick={() => setOpen(false)}
|
||||||
|
>
|
||||||
|
Users
|
||||||
|
</Link>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
49
frontend/src/components/ImpersonationBanner.module.css
Normal file
49
frontend/src/components/ImpersonationBanner.module.css
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
.banner {
|
||||||
|
position: fixed;
|
||||||
|
top: 0;
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
z-index: 9999;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
gap: 1rem;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
background: #b45309;
|
||||||
|
color: #fff;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
font-weight: 600;
|
||||||
|
letter-spacing: 0.02em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.text {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.icon {
|
||||||
|
font-size: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.exitBtn {
|
||||||
|
padding: 0.25rem 0.75rem;
|
||||||
|
border: 1px solid rgba(255, 255, 255, 0.5);
|
||||||
|
border-radius: 4px;
|
||||||
|
background: transparent;
|
||||||
|
color: #fff;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
font-weight: 600;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: background 150ms, border-color 150ms;
|
||||||
|
}
|
||||||
|
|
||||||
|
.exitBtn:hover {
|
||||||
|
background: rgba(255, 255, 255, 0.15);
|
||||||
|
border-color: #fff;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Push page content down when banner is showing */
|
||||||
|
:global(body.impersonating) {
|
||||||
|
padding-top: 40px;
|
||||||
|
}
|
||||||
36
frontend/src/components/ImpersonationBanner.tsx
Normal file
36
frontend/src/components/ImpersonationBanner.tsx
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
import { useEffect } from "react";
|
||||||
|
import { useAuth } from "../context/AuthContext";
|
||||||
|
import styles from "./ImpersonationBanner.module.css";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fixed amber banner shown when an admin is impersonating a creator.
|
||||||
|
* Adds body.impersonating class to push page content down.
|
||||||
|
*/
|
||||||
|
export default function ImpersonationBanner() {
|
||||||
|
const { isImpersonating, user, exitImpersonation } = useAuth();
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (isImpersonating) {
|
||||||
|
document.body.classList.add("impersonating");
|
||||||
|
} else {
|
||||||
|
document.body.classList.remove("impersonating");
|
||||||
|
}
|
||||||
|
return () => {
|
||||||
|
document.body.classList.remove("impersonating");
|
||||||
|
};
|
||||||
|
}, [isImpersonating]);
|
||||||
|
|
||||||
|
if (!isImpersonating) return null;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className={styles.banner} role="alert">
|
||||||
|
<span className={styles.text}>
|
||||||
|
<span className={styles.icon} aria-hidden="true">👁</span>
|
||||||
|
Viewing as <strong>{user?.display_name ?? "Unknown"}</strong>
|
||||||
|
</span>
|
||||||
|
<button className={styles.exitBtn} onClick={exitImpersonation}>
|
||||||
|
Exit
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -11,19 +11,26 @@ import {
|
||||||
authLogin,
|
authLogin,
|
||||||
authGetMe,
|
authGetMe,
|
||||||
authRegister,
|
authRegister,
|
||||||
|
impersonateUser,
|
||||||
|
stopImpersonation as apiStopImpersonation,
|
||||||
ApiError,
|
ApiError,
|
||||||
type UserResponse,
|
type UserResponse,
|
||||||
type RegisterRequest,
|
type RegisterRequest,
|
||||||
} from "../api";
|
} from "../api";
|
||||||
|
|
||||||
|
const ADMIN_TOKEN_KEY = "chrysopedia_admin_token";
|
||||||
|
|
||||||
interface AuthContextValue {
|
interface AuthContextValue {
|
||||||
user: UserResponse | null;
|
user: UserResponse | null;
|
||||||
token: string | null;
|
token: string | null;
|
||||||
isAuthenticated: boolean;
|
isAuthenticated: boolean;
|
||||||
|
isImpersonating: boolean;
|
||||||
loading: boolean;
|
loading: boolean;
|
||||||
login: (email: string, password: string) => Promise<void>;
|
login: (email: string, password: string) => Promise<void>;
|
||||||
register: (data: RegisterRequest) => Promise<UserResponse>;
|
register: (data: RegisterRequest) => Promise<UserResponse>;
|
||||||
logout: () => void;
|
logout: () => void;
|
||||||
|
startImpersonation: (userId: string) => Promise<void>;
|
||||||
|
exitImpersonation: () => Promise<void>;
|
||||||
}
|
}
|
||||||
|
|
||||||
const AuthContext = createContext<AuthContextValue | null>(null);
|
const AuthContext = createContext<AuthContextValue | null>(null);
|
||||||
|
|
@ -77,20 +84,58 @@ export function AuthProvider({ children }: { children: ReactNode }) {
|
||||||
|
|
||||||
const logout = useCallback(() => {
|
const logout = useCallback(() => {
|
||||||
localStorage.removeItem(AUTH_TOKEN_KEY);
|
localStorage.removeItem(AUTH_TOKEN_KEY);
|
||||||
|
sessionStorage.removeItem(ADMIN_TOKEN_KEY);
|
||||||
setToken(null);
|
setToken(null);
|
||||||
setUser(null);
|
setUser(null);
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
const startImpersonation = useCallback(async (userId: string) => {
|
||||||
|
if (!token) return;
|
||||||
|
// Save admin token so we can restore it later
|
||||||
|
sessionStorage.setItem(ADMIN_TOKEN_KEY, token);
|
||||||
|
const resp = await impersonateUser(token, userId);
|
||||||
|
localStorage.setItem(AUTH_TOKEN_KEY, resp.access_token);
|
||||||
|
setToken(resp.access_token);
|
||||||
|
const me = await authGetMe(resp.access_token);
|
||||||
|
setUser(me);
|
||||||
|
}, [token]);
|
||||||
|
|
||||||
|
const exitImpersonation = useCallback(async () => {
|
||||||
|
// Try to call stop endpoint for audit log
|
||||||
|
if (token) {
|
||||||
|
try {
|
||||||
|
await apiStopImpersonation(token);
|
||||||
|
} catch {
|
||||||
|
// Best effort — still restore admin session
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Restore admin token
|
||||||
|
const adminToken = sessionStorage.getItem(ADMIN_TOKEN_KEY);
|
||||||
|
sessionStorage.removeItem(ADMIN_TOKEN_KEY);
|
||||||
|
if (adminToken) {
|
||||||
|
localStorage.setItem(AUTH_TOKEN_KEY, adminToken);
|
||||||
|
setToken(adminToken);
|
||||||
|
const me = await authGetMe(adminToken);
|
||||||
|
setUser(me);
|
||||||
|
} else {
|
||||||
|
// Fallback: just logout
|
||||||
|
logout();
|
||||||
|
}
|
||||||
|
}, [token, logout]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<AuthContext.Provider
|
<AuthContext.Provider
|
||||||
value={{
|
value={{
|
||||||
user,
|
user,
|
||||||
token,
|
token,
|
||||||
isAuthenticated: !!user,
|
isAuthenticated: !!user,
|
||||||
|
isImpersonating: !!user?.impersonating,
|
||||||
loading,
|
loading,
|
||||||
login,
|
login,
|
||||||
register,
|
register,
|
||||||
logout,
|
logout,
|
||||||
|
startImpersonation,
|
||||||
|
exitImpersonation,
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{children}
|
{children}
|
||||||
|
|
|
||||||
96
frontend/src/pages/AdminUsers.module.css
Normal file
96
frontend/src/pages/AdminUsers.module.css
Normal file
|
|
@ -0,0 +1,96 @@
|
||||||
|
.page {
|
||||||
|
max-width: 900px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 2rem 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.title {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
font-weight: 700;
|
||||||
|
margin-bottom: 1.5rem;
|
||||||
|
color: var(--text-primary, #e2e8f0);
|
||||||
|
}
|
||||||
|
|
||||||
|
.table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.table th {
|
||||||
|
text-align: left;
|
||||||
|
padding: 0.6rem 0.75rem;
|
||||||
|
border-bottom: 2px solid var(--color-border, #2d2d3d);
|
||||||
|
color: var(--text-secondary, #828291);
|
||||||
|
font-weight: 600;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.05em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.table td {
|
||||||
|
padding: 0.6rem 0.75rem;
|
||||||
|
border-bottom: 1px solid var(--color-border, #2d2d3d);
|
||||||
|
color: var(--text-primary, #e2e8f0);
|
||||||
|
}
|
||||||
|
|
||||||
|
.roleBadge {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.15rem 0.5rem;
|
||||||
|
border-radius: 4px;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
font-weight: 600;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.03em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.roleBadge[data-role="admin"] {
|
||||||
|
background: rgba(239, 68, 68, 0.15);
|
||||||
|
color: #ef4444;
|
||||||
|
}
|
||||||
|
|
||||||
|
.roleBadge[data-role="creator"] {
|
||||||
|
background: rgba(34, 211, 238, 0.15);
|
||||||
|
color: #22d3ee;
|
||||||
|
}
|
||||||
|
|
||||||
|
.viewAsBtn {
|
||||||
|
padding: 0.3rem 0.6rem;
|
||||||
|
border: 1px solid var(--color-accent, #22d3ee);
|
||||||
|
border-radius: 4px;
|
||||||
|
background: transparent;
|
||||||
|
color: var(--color-accent, #22d3ee);
|
||||||
|
font-size: 0.8rem;
|
||||||
|
font-weight: 600;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: background 150ms, color 150ms;
|
||||||
|
}
|
||||||
|
|
||||||
|
.viewAsBtn:hover {
|
||||||
|
background: var(--color-accent, #22d3ee);
|
||||||
|
color: var(--color-bg, #0f0f1a);
|
||||||
|
}
|
||||||
|
|
||||||
|
.viewAsBtn:disabled {
|
||||||
|
opacity: 0.4;
|
||||||
|
cursor: not-allowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
.loading,
|
||||||
|
.error,
|
||||||
|
.empty {
|
||||||
|
text-align: center;
|
||||||
|
padding: 3rem 1rem;
|
||||||
|
color: var(--text-secondary, #828291);
|
||||||
|
}
|
||||||
|
|
||||||
|
.error {
|
||||||
|
color: #ef4444;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: 600px) {
|
||||||
|
.table th:nth-child(2),
|
||||||
|
.table td:nth-child(2) {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
}
|
||||||
96
frontend/src/pages/AdminUsers.tsx
Normal file
96
frontend/src/pages/AdminUsers.tsx
Normal file
|
|
@ -0,0 +1,96 @@
|
||||||
|
import { useEffect, useState } from "react";
|
||||||
|
import { useAuth } from "../context/AuthContext";
|
||||||
|
import { fetchUsers, type UserListItem } from "../api";
|
||||||
|
import { useDocumentTitle } from "../hooks/useDocumentTitle";
|
||||||
|
import styles from "./AdminUsers.module.css";
|
||||||
|
|
||||||
|
export default function AdminUsers() {
|
||||||
|
useDocumentTitle("Users — Admin");
|
||||||
|
const { token, startImpersonation, user: currentUser } = useAuth();
|
||||||
|
const [users, setUsers] = useState<UserListItem[]>([]);
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const [impersonating, setImpersonating] = useState<string | null>(null);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!token) return;
|
||||||
|
setLoading(true);
|
||||||
|
fetchUsers(token)
|
||||||
|
.then(setUsers)
|
||||||
|
.catch((e) => setError(e.message || "Failed to load users"))
|
||||||
|
.finally(() => setLoading(false));
|
||||||
|
}, [token]);
|
||||||
|
|
||||||
|
async function handleViewAs(userId: string) {
|
||||||
|
setImpersonating(userId);
|
||||||
|
try {
|
||||||
|
await startImpersonation(userId);
|
||||||
|
// Navigation will happen via auth context update
|
||||||
|
} catch (e: any) {
|
||||||
|
setError(e.message || "Failed to start impersonation");
|
||||||
|
setImpersonating(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loading) {
|
||||||
|
return (
|
||||||
|
<div className={styles.page}>
|
||||||
|
<h1 className={styles.title}>Users</h1>
|
||||||
|
<p className={styles.loading}>Loading users…</p>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
return (
|
||||||
|
<div className={styles.page}>
|
||||||
|
<h1 className={styles.title}>Users</h1>
|
||||||
|
<p className={styles.error}>{error}</p>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className={styles.page}>
|
||||||
|
<h1 className={styles.title}>Users</h1>
|
||||||
|
{users.length === 0 ? (
|
||||||
|
<p className={styles.empty}>No users found.</p>
|
||||||
|
) : (
|
||||||
|
<table className={styles.table}>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Name</th>
|
||||||
|
<th>Email</th>
|
||||||
|
<th>Role</th>
|
||||||
|
<th>Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{users.map((u) => (
|
||||||
|
<tr key={u.id}>
|
||||||
|
<td>{u.display_name}</td>
|
||||||
|
<td>{u.email}</td>
|
||||||
|
<td>
|
||||||
|
<span className={styles.roleBadge} data-role={u.role}>
|
||||||
|
{u.role}
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{u.role === "creator" && u.id !== currentUser?.id && (
|
||||||
|
<button
|
||||||
|
className={styles.viewAsBtn}
|
||||||
|
disabled={impersonating === u.id}
|
||||||
|
onClick={() => handleViewAs(u.id)}
|
||||||
|
>
|
||||||
|
{impersonating === u.id ? "Switching…" : "View As"}
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -1 +1 @@
|
||||||
{"root":["./src/App.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/api/admin-pipeline.ts","./src/api/admin-techniques.ts","./src/api/auth.ts","./src/api/client.ts","./src/api/consent.ts","./src/api/creator-dashboard.ts","./src/api/creators.ts","./src/api/index.ts","./src/api/reports.ts","./src/api/search.ts","./src/api/stats.ts","./src/api/techniques.ts","./src/api/topics.ts","./src/api/videos.ts","./src/components/AdminDropdown.tsx","./src/components/AppFooter.tsx","./src/components/CategoryIcons.tsx","./src/components/CopyLinkButton.tsx","./src/components/CreatorAvatar.tsx","./src/components/PlayerControls.tsx","./src/components/ProtectedRoute.tsx","./src/components/ReportIssueModal.tsx","./src/components/SearchAutocomplete.tsx","./src/components/SocialIcons.tsx","./src/components/SortDropdown.tsx","./src/components/TableOfContents.tsx","./src/components/TagList.tsx","./src/components/ToggleSwitch.tsx","./src/components/TranscriptSidebar.tsx","./src/components/VideoPlayer.tsx","./src/context/AuthContext.tsx","./src/hooks/useCountUp.ts","./src/hooks/useDocumentTitle.ts","./src/hooks/useMediaSync.ts","./src/hooks/useSortPreference.ts","./src/pages/About.tsx","./src/pages/AdminPipeline.tsx","./src/pages/AdminReports.tsx","./src/pages/AdminTechniquePages.tsx","./src/pages/ConsentDashboard.tsx","./src/pages/CreatorDashboard.tsx","./src/pages/CreatorDetail.tsx","./src/pages/CreatorSettings.tsx","./src/pages/CreatorsBrowse.tsx","./src/pages/Home.tsx","./src/pages/Login.tsx","./src/pages/Register.tsx","./src/pages/SearchResults.tsx","./src/pages/SubTopicPage.tsx","./src/pages/TechniquePage.tsx","./src/pages/TopicsBrowse.tsx","./src/pages/WatchPage.tsx","./src/utils/catSlug.ts","./src/utils/citations.tsx"],"version":"5.6.3"}
|
{"root":["./src/App.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/api/admin-pipeline.ts","./src/api/admin-techniques.ts","./src/api/auth.ts","./src/api/client.ts","./src/api/consent.ts","./src/api/creator-dashboard.ts","./src/api/creators.ts","./src/api/index.ts","./src/api/reports.ts","./src/api/search.ts","./src/api/stats.ts","./src/api/techniques.ts","./src/api/topics.ts","./src/api/videos.ts","./src/components/AdminDropdown.tsx","./src/components/AppFooter.tsx","./src/components/CategoryIcons.tsx","./src/components/CopyLinkButton.tsx","./src/components/CreatorAvatar.tsx","./src/components/ImpersonationBanner.tsx","./src/components/PlayerControls.tsx","./src/components/ProtectedRoute.tsx","./src/components/ReportIssueModal.tsx","./src/components/SearchAutocomplete.tsx","./src/components/SocialIcons.tsx","./src/components/SortDropdown.tsx","./src/components/TableOfContents.tsx","./src/components/TagList.tsx","./src/components/ToggleSwitch.tsx","./src/components/TranscriptSidebar.tsx","./src/components/VideoPlayer.tsx","./src/context/AuthContext.tsx","./src/hooks/useCountUp.ts","./src/hooks/useDocumentTitle.ts","./src/hooks/useMediaSync.ts","./src/hooks/useSortPreference.ts","./src/pages/About.tsx","./src/pages/AdminPipeline.tsx","./src/pages/AdminReports.tsx","./src/pages/AdminTechniquePages.tsx","./src/pages/AdminUsers.tsx","./src/pages/ConsentDashboard.tsx","./src/pages/CreatorDashboard.tsx","./src/pages/CreatorDetail.tsx","./src/pages/CreatorSettings.tsx","./src/pages/CreatorsBrowse.tsx","./src/pages/Home.tsx","./src/pages/Login.tsx","./src/pages/Register.tsx","./src/pages/SearchResults.tsx","./src/pages/SubTopicPage.tsx","./src/pages/TechniquePage.tsx","./src/pages/TopicsBrowse.tsx","./src/pages/WatchPage.tsx","./src/utils/catSlug.ts","./src/utils/citations.tsx"],"version":"5.6.3"}
|
||||||
Loading…
Add table
Reference in a new issue