chrysopedia/backend/tests/test_public_api.py
jlightner 4b0914b12b fix: restore complete project tree from ub01 canonical state
Auto-mode commit 7aa33cd accidentally deleted 78 files (14,814 lines) during M005
execution. Subsequent commits rebuilt some frontend files but backend/, alembic/,
tests/, whisper/, docker configs, and prompts were never restored in this repo.

This commit restores the full project tree by syncing from ub01's working directory,
which has all M001-M007 features running in production containers.

Restored: backend/ (config, models, routers, database, redis, search_service, worker),
alembic/ (6 migrations), docker/ (Dockerfiles, nginx, compose), prompts/ (4 stages),
tests/, whisper/, README.md, .env.example, chrysopedia-spec.md
2026-03-31 02:10:41 +00:00

526 lines
18 KiB
Python

"""Integration tests for the public S05 API endpoints:
techniques, topics, and enhanced creators.
Tests run against a real PostgreSQL test database via httpx.AsyncClient.
"""
from __future__ import annotations
import uuid
import pytest
import pytest_asyncio
from httpx import AsyncClient
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from models import (
ContentType,
Creator,
KeyMoment,
KeyMomentContentType,
ProcessingStatus,
RelatedTechniqueLink,
RelationshipType,
SourceVideo,
TechniquePage,
)
TECHNIQUES_URL = "/api/v1/techniques"
TOPICS_URL = "/api/v1/topics"
CREATORS_URL = "/api/v1/creators"
# ── Seed helpers ─────────────────────────────────────────────────────────────
async def _seed_full_data(db_engine) -> dict:
"""Seed 2 creators, 2 videos, 3 technique pages, key moments, and a related link.
Returns a dict of IDs and metadata for assertions.
"""
session_factory = async_sessionmaker(
db_engine, class_=AsyncSession, expire_on_commit=False
)
async with session_factory() as session:
# Creators
creator1 = Creator(
name="Alpha Creator",
slug="alpha-creator",
genres=["Bass music", "Dubstep"],
folder_name="AlphaCreator",
)
creator2 = Creator(
name="Beta Producer",
slug="beta-producer",
genres=["House", "Techno"],
folder_name="BetaProducer",
)
session.add_all([creator1, creator2])
await session.flush()
# Videos
video1 = SourceVideo(
creator_id=creator1.id,
filename="bass-tutorial.mp4",
file_path="AlphaCreator/bass-tutorial.mp4",
duration_seconds=600,
content_type=ContentType.tutorial,
processing_status=ProcessingStatus.extracted,
)
video2 = SourceVideo(
creator_id=creator2.id,
filename="mixing-masterclass.mp4",
file_path="BetaProducer/mixing-masterclass.mp4",
duration_seconds=1200,
content_type=ContentType.tutorial,
processing_status=ProcessingStatus.extracted,
)
session.add_all([video1, video2])
await session.flush()
# Technique pages
tp1 = TechniquePage(
creator_id=creator1.id,
title="Reese Bass Design",
slug="reese-bass-design",
topic_category="Sound design",
topic_tags=["bass", "textures"],
summary="Classic reese bass creation",
body_sections={"intro": "Getting started with reese bass"},
)
tp2 = TechniquePage(
creator_id=creator2.id,
title="Granular Pad Textures",
slug="granular-pad-textures",
topic_category="Synthesis",
topic_tags=["granular", "pads"],
summary="Creating evolving pad textures",
)
tp3 = TechniquePage(
creator_id=creator1.id,
title="FM Bass Layering",
slug="fm-bass-layering",
topic_category="Synthesis",
topic_tags=["fm", "bass"],
summary="FM synthesis for bass layers",
)
session.add_all([tp1, tp2, tp3])
await session.flush()
# Key moments
km1 = KeyMoment(
source_video_id=video1.id,
technique_page_id=tp1.id,
title="Oscillator setup",
summary="Setting up the initial oscillator",
start_time=10.0,
end_time=60.0,
content_type=KeyMomentContentType.technique,
)
km2 = KeyMoment(
source_video_id=video1.id,
technique_page_id=tp1.id,
title="Distortion chain",
summary="Adding distortion to the reese",
start_time=60.0,
end_time=120.0,
content_type=KeyMomentContentType.technique,
)
km3 = KeyMoment(
source_video_id=video2.id,
technique_page_id=tp2.id,
title="Granular engine parameters",
summary="Configuring the granular engine",
start_time=20.0,
end_time=80.0,
content_type=KeyMomentContentType.settings,
)
session.add_all([km1, km2, km3])
await session.flush()
# Related technique link: tp1 → tp3 (same_creator_adjacent)
link = RelatedTechniqueLink(
source_page_id=tp1.id,
target_page_id=tp3.id,
relationship=RelationshipType.same_creator_adjacent,
)
session.add(link)
await session.commit()
return {
"creator1_id": str(creator1.id),
"creator1_name": creator1.name,
"creator1_slug": creator1.slug,
"creator2_id": str(creator2.id),
"creator2_name": creator2.name,
"creator2_slug": creator2.slug,
"video1_id": str(video1.id),
"video2_id": str(video2.id),
"tp1_slug": tp1.slug,
"tp1_title": tp1.title,
"tp2_slug": tp2.slug,
"tp3_slug": tp3.slug,
"tp3_title": tp3.title,
}
# ── Technique Tests ──────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_list_techniques(client, db_engine):
"""GET /techniques returns a paginated list of technique pages."""
seed = await _seed_full_data(db_engine)
resp = await client.get(TECHNIQUES_URL)
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 3
assert len(data["items"]) == 3
# Each item has required fields
slugs = {item["slug"] for item in data["items"]}
assert seed["tp1_slug"] in slugs
assert seed["tp2_slug"] in slugs
assert seed["tp3_slug"] in slugs
@pytest.mark.asyncio
async def test_list_techniques_with_category_filter(client, db_engine):
"""GET /techniques?category=Synthesis returns only Synthesis technique pages."""
await _seed_full_data(db_engine)
resp = await client.get(TECHNIQUES_URL, params={"category": "Synthesis"})
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 2
for item in data["items"]:
assert item["topic_category"] == "Synthesis"
@pytest.mark.asyncio
async def test_get_technique_detail(client, db_engine):
"""GET /techniques/{slug} returns full detail with key_moments, creator_info, and related_links."""
seed = await _seed_full_data(db_engine)
resp = await client.get(f"{TECHNIQUES_URL}/{seed['tp1_slug']}")
assert resp.status_code == 200
data = resp.json()
assert data["title"] == seed["tp1_title"]
assert data["slug"] == seed["tp1_slug"]
assert data["topic_category"] == "Sound design"
# Key moments: tp1 has 2 key moments
assert len(data["key_moments"]) == 2
km_titles = {km["title"] for km in data["key_moments"]}
assert "Oscillator setup" in km_titles
assert "Distortion chain" in km_titles
# Creator info
assert data["creator_info"] is not None
assert data["creator_info"]["name"] == seed["creator1_name"]
assert data["creator_info"]["slug"] == seed["creator1_slug"]
# Related links: tp1 → tp3 (same_creator_adjacent)
assert len(data["related_links"]) >= 1
related_slugs = {link["target_slug"] for link in data["related_links"]}
assert seed["tp3_slug"] in related_slugs
@pytest.mark.asyncio
async def test_get_technique_invalid_slug_returns_404(client, db_engine):
"""GET /techniques/{invalid-slug} returns 404."""
await _seed_full_data(db_engine)
resp = await client.get(f"{TECHNIQUES_URL}/nonexistent-slug-xyz")
assert resp.status_code == 404
assert "not found" in resp.json()["detail"].lower()
# ── Topics Tests ─────────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_list_topics_hierarchy(client, db_engine):
"""GET /topics returns category hierarchy with counts matching seeded data."""
await _seed_full_data(db_engine)
resp = await client.get(TOPICS_URL)
assert resp.status_code == 200
data = resp.json()
# Should have the 6 categories from canonical_tags.yaml
assert len(data) == 6
category_names = {cat["name"] for cat in data}
assert "Sound design" in category_names
assert "Synthesis" in category_names
assert "Mixing" in category_names
# Check Sound design category — should have "bass" sub-topic with count
sound_design = next(c for c in data if c["name"] == "Sound design")
bass_sub = next(
(st for st in sound_design["sub_topics"] if st["name"] == "bass"), None
)
assert bass_sub is not None
# tp1 (tags: ["bass", "textures"]) and tp3 (tags: ["fm", "bass"]) both have "bass"
assert bass_sub["technique_count"] == 2
# Both from creator1
assert bass_sub["creator_count"] == 1
# Check Synthesis category — "granular" sub-topic
synthesis = next(c for c in data if c["name"] == "Synthesis")
granular_sub = next(
(st for st in synthesis["sub_topics"] if st["name"] == "granular"), None
)
assert granular_sub is not None
assert granular_sub["technique_count"] == 1
assert granular_sub["creator_count"] == 1
@pytest.mark.asyncio
async def test_topics_with_no_technique_pages(client, db_engine):
"""GET /topics with no seeded data returns categories with zero counts."""
# No data seeded — just use the clean DB
resp = await client.get(TOPICS_URL)
assert resp.status_code == 200
data = resp.json()
assert len(data) == 6
# All sub-topic counts should be zero
for category in data:
for st in category["sub_topics"]:
assert st["technique_count"] == 0
assert st["creator_count"] == 0
# ── Creator Tests ────────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_list_creators_random_sort(client, db_engine):
"""GET /creators?sort=random returns all creators (order may vary)."""
seed = await _seed_full_data(db_engine)
resp = await client.get(CREATORS_URL, params={"sort": "random"})
assert resp.status_code == 200
data = resp.json()
assert len(data) == 2
names = {item["name"] for item in data}
assert seed["creator1_name"] in names
assert seed["creator2_name"] in names
# Each item has technique_count and video_count
for item in data:
assert "technique_count" in item
assert "video_count" in item
@pytest.mark.asyncio
async def test_list_creators_alpha_sort(client, db_engine):
"""GET /creators?sort=alpha returns creators in alphabetical order."""
seed = await _seed_full_data(db_engine)
resp = await client.get(CREATORS_URL, params={"sort": "alpha"})
assert resp.status_code == 200
data = resp.json()
assert len(data) == 2
# "Alpha Creator" < "Beta Producer" alphabetically
assert data[0]["name"] == "Alpha Creator"
assert data[1]["name"] == "Beta Producer"
@pytest.mark.asyncio
async def test_list_creators_genre_filter(client, db_engine):
"""GET /creators?genre=Bass+music returns only matching creators."""
seed = await _seed_full_data(db_engine)
resp = await client.get(CREATORS_URL, params={"genre": "Bass music"})
assert resp.status_code == 200
data = resp.json()
assert len(data) == 1
assert data[0]["name"] == seed["creator1_name"]
assert data[0]["slug"] == seed["creator1_slug"]
@pytest.mark.asyncio
async def test_get_creator_detail(client, db_engine):
"""GET /creators/{slug} returns detail with video_count."""
seed = await _seed_full_data(db_engine)
resp = await client.get(f"{CREATORS_URL}/{seed['creator1_slug']}")
assert resp.status_code == 200
data = resp.json()
assert data["name"] == seed["creator1_name"]
assert data["slug"] == seed["creator1_slug"]
assert data["video_count"] == 1 # creator1 has 1 video
@pytest.mark.asyncio
async def test_get_creator_invalid_slug_returns_404(client, db_engine):
"""GET /creators/{invalid-slug} returns 404."""
await _seed_full_data(db_engine)
resp = await client.get(f"{CREATORS_URL}/nonexistent-creator-xyz")
assert resp.status_code == 404
@pytest.mark.asyncio
async def test_creators_with_counts(client, db_engine):
"""GET /creators returns correct technique_count and video_count."""
seed = await _seed_full_data(db_engine)
resp = await client.get(CREATORS_URL, params={"sort": "alpha"})
assert resp.status_code == 200
data = resp.json()
# Alpha Creator: 2 technique pages, 1 video
alpha = data[0]
assert alpha["name"] == "Alpha Creator"
assert alpha["technique_count"] == 2
assert alpha["video_count"] == 1
# Beta Producer: 1 technique page, 1 video
beta = data[1]
assert beta["name"] == "Beta Producer"
assert beta["technique_count"] == 1
assert beta["video_count"] == 1
@pytest.mark.asyncio
async def test_creators_empty_list(client, db_engine):
"""GET /creators with no creators returns empty list."""
# No data seeded
resp = await client.get(CREATORS_URL)
assert resp.status_code == 200
data = resp.json()
assert data == []
# ── Version Tests ────────────────────────────────────────────────────────────
async def _insert_version(db_engine, technique_page_id: str, version_number: int, content_snapshot: dict, pipeline_metadata: dict | None = None):
"""Insert a TechniquePageVersion row directly for testing."""
from models import TechniquePageVersion
session_factory = async_sessionmaker(
db_engine, class_=AsyncSession, expire_on_commit=False
)
async with session_factory() as session:
v = TechniquePageVersion(
technique_page_id=uuid.UUID(technique_page_id) if isinstance(technique_page_id, str) else technique_page_id,
version_number=version_number,
content_snapshot=content_snapshot,
pipeline_metadata=pipeline_metadata,
)
session.add(v)
await session.commit()
@pytest.mark.asyncio
async def test_version_list_empty(client, db_engine):
"""GET /techniques/{slug}/versions returns empty list when page has no versions."""
seed = await _seed_full_data(db_engine)
resp = await client.get(f"{TECHNIQUES_URL}/{seed['tp1_slug']}/versions")
assert resp.status_code == 200
data = resp.json()
assert data["items"] == []
assert data["total"] == 0
@pytest.mark.asyncio
async def test_version_list_with_versions(client, db_engine):
"""GET /techniques/{slug}/versions returns versions after inserting them."""
seed = await _seed_full_data(db_engine)
# Get the technique page ID by fetching the detail
detail_resp = await client.get(f"{TECHNIQUES_URL}/{seed['tp1_slug']}")
page_id = detail_resp.json()["id"]
# Insert two versions
snapshot1 = {"title": "Old Reese Bass v1", "summary": "First draft"}
snapshot2 = {"title": "Old Reese Bass v2", "summary": "Second draft"}
await _insert_version(db_engine, page_id, 1, snapshot1, {"model": "gpt-4o"})
await _insert_version(db_engine, page_id, 2, snapshot2, {"model": "gpt-4o-mini"})
resp = await client.get(f"{TECHNIQUES_URL}/{seed['tp1_slug']}/versions")
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 2
assert len(data["items"]) == 2
# Ordered by version_number DESC
assert data["items"][0]["version_number"] == 2
assert data["items"][1]["version_number"] == 1
assert data["items"][0]["pipeline_metadata"]["model"] == "gpt-4o-mini"
assert data["items"][1]["pipeline_metadata"]["model"] == "gpt-4o"
@pytest.mark.asyncio
async def test_version_detail_returns_content_snapshot(client, db_engine):
"""GET /techniques/{slug}/versions/{version_number} returns full snapshot."""
seed = await _seed_full_data(db_engine)
detail_resp = await client.get(f"{TECHNIQUES_URL}/{seed['tp1_slug']}")
page_id = detail_resp.json()["id"]
snapshot = {"title": "Old Title", "summary": "Old summary", "body_sections": {"intro": "Old intro"}}
metadata = {"model": "gpt-4o", "prompt_hash": "abc123"}
await _insert_version(db_engine, page_id, 1, snapshot, metadata)
resp = await client.get(f"{TECHNIQUES_URL}/{seed['tp1_slug']}/versions/1")
assert resp.status_code == 200
data = resp.json()
assert data["version_number"] == 1
assert data["content_snapshot"] == snapshot
assert data["pipeline_metadata"] == metadata
assert "created_at" in data
@pytest.mark.asyncio
async def test_version_detail_404_for_nonexistent_version(client, db_engine):
"""GET /techniques/{slug}/versions/999 returns 404."""
seed = await _seed_full_data(db_engine)
resp = await client.get(f"{TECHNIQUES_URL}/{seed['tp1_slug']}/versions/999")
assert resp.status_code == 404
assert "not found" in resp.json()["detail"].lower()
@pytest.mark.asyncio
async def test_versions_404_for_nonexistent_slug(client, db_engine):
"""GET /techniques/nonexistent-slug/versions returns 404."""
await _seed_full_data(db_engine)
resp = await client.get(f"{TECHNIQUES_URL}/nonexistent-slug-xyz/versions")
assert resp.status_code == 404
assert "not found" in resp.json()["detail"].lower()
@pytest.mark.asyncio
async def test_technique_detail_includes_version_count(client, db_engine):
"""GET /techniques/{slug} includes version_count field."""
seed = await _seed_full_data(db_engine)
# Initially version_count should be 0
resp = await client.get(f"{TECHNIQUES_URL}/{seed['tp1_slug']}")
assert resp.status_code == 200
data = resp.json()
assert data["version_count"] == 0
# Insert a version and check again
page_id = data["id"]
await _insert_version(db_engine, page_id, 1, {"title": "Snapshot"})
resp2 = await client.get(f"{TECHNIQUES_URL}/{seed['tp1_slug']}")
assert resp2.status_code == 200
assert resp2.json()["version_count"] == 1