chrysopedia/backend/pipeline/qdrant_client.py
jlightner 4b0914b12b fix: restore complete project tree from ub01 canonical state
Auto-mode commit 7aa33cd accidentally deleted 78 files (14,814 lines) during M005
execution. Subsequent commits rebuilt some frontend files but backend/, alembic/,
tests/, whisper/, docker configs, and prompts were never restored in this repo.

This commit restores the full project tree by syncing from ub01's working directory,
which has all M001-M007 features running in production containers.

Restored: backend/ (config, models, routers, database, redis, search_service, worker),
alembic/ (6 migrations), docker/ (Dockerfiles, nginx, compose), prompts/ (4 stages),
tests/, whisper/, README.md, .env.example, chrysopedia-spec.md
2026-03-31 02:10:41 +00:00

184 lines
6.3 KiB
Python

"""Qdrant vector database manager for collection lifecycle and point upserts.
Handles collection creation (idempotent) and batch upserts for technique pages
and key moments. Connection failures are non-blocking — the pipeline continues
without search indexing.
"""
from __future__ import annotations
import logging
import uuid
from qdrant_client import QdrantClient
from qdrant_client.http import exceptions as qdrant_exceptions
from qdrant_client.models import Distance, PointStruct, VectorParams
from config import Settings
logger = logging.getLogger(__name__)
class QdrantManager:
"""Manages a Qdrant collection for Chrysopedia technique-page and key-moment vectors."""
def __init__(self, settings: Settings) -> None:
self.settings = settings
self._client = QdrantClient(url=settings.qdrant_url)
self._collection = settings.qdrant_collection
# ── Collection management ────────────────────────────────────────────
def ensure_collection(self) -> None:
"""Create the collection if it does not already exist.
Uses cosine distance and the configured embedding dimensions.
"""
try:
if self._client.collection_exists(self._collection):
logger.info("Qdrant collection '%s' already exists.", self._collection)
return
self._client.create_collection(
collection_name=self._collection,
vectors_config=VectorParams(
size=self.settings.embedding_dimensions,
distance=Distance.COSINE,
),
)
logger.info(
"Created Qdrant collection '%s' (dim=%d, cosine).",
self._collection,
self.settings.embedding_dimensions,
)
except qdrant_exceptions.UnexpectedResponse as exc:
logger.warning(
"Qdrant error during ensure_collection (%s). Skipping.",
exc,
)
except Exception as exc:
logger.warning(
"Qdrant connection failed during ensure_collection (%s: %s). Skipping.",
type(exc).__name__,
exc,
)
# ── Low-level upsert ─────────────────────────────────────────────────
def upsert_points(self, points: list[PointStruct]) -> None:
"""Upsert a batch of pre-built PointStruct objects."""
if not points:
return
try:
self._client.upsert(
collection_name=self._collection,
points=points,
)
logger.info(
"Upserted %d points to Qdrant collection '%s'.",
len(points),
self._collection,
)
except qdrant_exceptions.UnexpectedResponse as exc:
logger.warning(
"Qdrant upsert failed (%s). %d points skipped.",
exc,
len(points),
)
except Exception as exc:
logger.warning(
"Qdrant upsert connection error (%s: %s). %d points skipped.",
type(exc).__name__,
exc,
len(points),
)
# ── High-level upserts ───────────────────────────────────────────────
def upsert_technique_pages(
self,
pages: list[dict],
vectors: list[list[float]],
) -> None:
"""Build and upsert PointStructs for technique pages.
Each page dict must contain:
page_id, creator_id, title, topic_category, topic_tags, summary
Parameters
----------
pages:
Metadata dicts, one per technique page.
vectors:
Corresponding embedding vectors (same order as pages).
"""
if len(pages) != len(vectors):
logger.warning(
"Technique-page count (%d) != vector count (%d). Skipping upsert.",
len(pages),
len(vectors),
)
return
points = []
for page, vector in zip(pages, vectors):
point = PointStruct(
id=str(uuid.uuid4()),
vector=vector,
payload={
"type": "technique_page",
"page_id": page["page_id"],
"creator_id": page["creator_id"],
"title": page["title"],
"topic_category": page["topic_category"],
"topic_tags": page.get("topic_tags") or [],
"summary": page.get("summary") or "",
},
)
points.append(point)
self.upsert_points(points)
def upsert_key_moments(
self,
moments: list[dict],
vectors: list[list[float]],
) -> None:
"""Build and upsert PointStructs for key moments.
Each moment dict must contain:
moment_id, source_video_id, title, start_time, end_time, content_type
Parameters
----------
moments:
Metadata dicts, one per key moment.
vectors:
Corresponding embedding vectors (same order as moments).
"""
if len(moments) != len(vectors):
logger.warning(
"Key-moment count (%d) != vector count (%d). Skipping upsert.",
len(moments),
len(vectors),
)
return
points = []
for moment, vector in zip(moments, vectors):
point = PointStruct(
id=str(uuid.uuid4()),
vector=vector,
payload={
"type": "key_moment",
"moment_id": moment["moment_id"],
"source_video_id": moment["source_video_id"],
"title": moment["title"],
"start_time": moment["start_time"],
"end_time": moment["end_time"],
"content_type": moment["content_type"],
},
)
points.append(point)
self.upsert_points(points)