Auto-mode commit 7aa33cd accidentally deleted 78 files (14,814 lines) during M005
execution. Subsequent commits rebuilt some frontend files but backend/, alembic/,
tests/, whisper/, docker configs, and prompts were never restored in this repo.
This commit restores the full project tree by syncing from ub01's working directory,
which has all M001-M007 features running in production containers.
Restored: backend/ (config, models, routers, database, redis, search_service, worker),
alembic/ (6 migrations), docker/ (Dockerfiles, nginx, compose), prompts/ (4 stages),
tests/, whisper/, README.md, .env.example, chrysopedia-spec.md
184 lines
6.3 KiB
Python
184 lines
6.3 KiB
Python
"""Qdrant vector database manager for collection lifecycle and point upserts.
|
|
|
|
Handles collection creation (idempotent) and batch upserts for technique pages
|
|
and key moments. Connection failures are non-blocking — the pipeline continues
|
|
without search indexing.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import uuid
|
|
|
|
from qdrant_client import QdrantClient
|
|
from qdrant_client.http import exceptions as qdrant_exceptions
|
|
from qdrant_client.models import Distance, PointStruct, VectorParams
|
|
|
|
from config import Settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class QdrantManager:
|
|
"""Manages a Qdrant collection for Chrysopedia technique-page and key-moment vectors."""
|
|
|
|
def __init__(self, settings: Settings) -> None:
|
|
self.settings = settings
|
|
self._client = QdrantClient(url=settings.qdrant_url)
|
|
self._collection = settings.qdrant_collection
|
|
|
|
# ── Collection management ────────────────────────────────────────────
|
|
|
|
def ensure_collection(self) -> None:
|
|
"""Create the collection if it does not already exist.
|
|
|
|
Uses cosine distance and the configured embedding dimensions.
|
|
"""
|
|
try:
|
|
if self._client.collection_exists(self._collection):
|
|
logger.info("Qdrant collection '%s' already exists.", self._collection)
|
|
return
|
|
|
|
self._client.create_collection(
|
|
collection_name=self._collection,
|
|
vectors_config=VectorParams(
|
|
size=self.settings.embedding_dimensions,
|
|
distance=Distance.COSINE,
|
|
),
|
|
)
|
|
logger.info(
|
|
"Created Qdrant collection '%s' (dim=%d, cosine).",
|
|
self._collection,
|
|
self.settings.embedding_dimensions,
|
|
)
|
|
except qdrant_exceptions.UnexpectedResponse as exc:
|
|
logger.warning(
|
|
"Qdrant error during ensure_collection (%s). Skipping.",
|
|
exc,
|
|
)
|
|
except Exception as exc:
|
|
logger.warning(
|
|
"Qdrant connection failed during ensure_collection (%s: %s). Skipping.",
|
|
type(exc).__name__,
|
|
exc,
|
|
)
|
|
|
|
# ── Low-level upsert ─────────────────────────────────────────────────
|
|
|
|
def upsert_points(self, points: list[PointStruct]) -> None:
|
|
"""Upsert a batch of pre-built PointStruct objects."""
|
|
if not points:
|
|
return
|
|
try:
|
|
self._client.upsert(
|
|
collection_name=self._collection,
|
|
points=points,
|
|
)
|
|
logger.info(
|
|
"Upserted %d points to Qdrant collection '%s'.",
|
|
len(points),
|
|
self._collection,
|
|
)
|
|
except qdrant_exceptions.UnexpectedResponse as exc:
|
|
logger.warning(
|
|
"Qdrant upsert failed (%s). %d points skipped.",
|
|
exc,
|
|
len(points),
|
|
)
|
|
except Exception as exc:
|
|
logger.warning(
|
|
"Qdrant upsert connection error (%s: %s). %d points skipped.",
|
|
type(exc).__name__,
|
|
exc,
|
|
len(points),
|
|
)
|
|
|
|
# ── High-level upserts ───────────────────────────────────────────────
|
|
|
|
def upsert_technique_pages(
|
|
self,
|
|
pages: list[dict],
|
|
vectors: list[list[float]],
|
|
) -> None:
|
|
"""Build and upsert PointStructs for technique pages.
|
|
|
|
Each page dict must contain:
|
|
page_id, creator_id, title, topic_category, topic_tags, summary
|
|
|
|
Parameters
|
|
----------
|
|
pages:
|
|
Metadata dicts, one per technique page.
|
|
vectors:
|
|
Corresponding embedding vectors (same order as pages).
|
|
"""
|
|
if len(pages) != len(vectors):
|
|
logger.warning(
|
|
"Technique-page count (%d) != vector count (%d). Skipping upsert.",
|
|
len(pages),
|
|
len(vectors),
|
|
)
|
|
return
|
|
|
|
points = []
|
|
for page, vector in zip(pages, vectors):
|
|
point = PointStruct(
|
|
id=str(uuid.uuid4()),
|
|
vector=vector,
|
|
payload={
|
|
"type": "technique_page",
|
|
"page_id": page["page_id"],
|
|
"creator_id": page["creator_id"],
|
|
"title": page["title"],
|
|
"topic_category": page["topic_category"],
|
|
"topic_tags": page.get("topic_tags") or [],
|
|
"summary": page.get("summary") or "",
|
|
},
|
|
)
|
|
points.append(point)
|
|
|
|
self.upsert_points(points)
|
|
|
|
def upsert_key_moments(
|
|
self,
|
|
moments: list[dict],
|
|
vectors: list[list[float]],
|
|
) -> None:
|
|
"""Build and upsert PointStructs for key moments.
|
|
|
|
Each moment dict must contain:
|
|
moment_id, source_video_id, title, start_time, end_time, content_type
|
|
|
|
Parameters
|
|
----------
|
|
moments:
|
|
Metadata dicts, one per key moment.
|
|
vectors:
|
|
Corresponding embedding vectors (same order as moments).
|
|
"""
|
|
if len(moments) != len(vectors):
|
|
logger.warning(
|
|
"Key-moment count (%d) != vector count (%d). Skipping upsert.",
|
|
len(moments),
|
|
len(vectors),
|
|
)
|
|
return
|
|
|
|
points = []
|
|
for moment, vector in zip(moments, vectors):
|
|
point = PointStruct(
|
|
id=str(uuid.uuid4()),
|
|
vector=vector,
|
|
payload={
|
|
"type": "key_moment",
|
|
"moment_id": moment["moment_id"],
|
|
"source_video_id": moment["source_video_id"],
|
|
"title": moment["title"],
|
|
"start_time": moment["start_time"],
|
|
"end_time": moment["end_time"],
|
|
"content_type": moment["content_type"],
|
|
},
|
|
)
|
|
points.append(point)
|
|
|
|
self.upsert_points(points)
|