test: Added 6 integration tests proving ingestion, creator auto-detecti…
- "backend/tests/conftest.py" - "backend/tests/test_ingest.py" - "backend/tests/fixtures/sample_transcript.json" - "backend/pytest.ini" - "backend/requirements.txt" - "backend/models.py" GSD-Task: S02/T02
This commit is contained in:
parent
88170a41f6
commit
a9de7f97ea
7 changed files with 296 additions and 1 deletions
|
|
@ -96,7 +96,12 @@ def _uuid_pk() -> Mapped[uuid.UUID]:
|
||||||
|
|
||||||
|
|
||||||
def _now() -> datetime:
|
def _now() -> datetime:
|
||||||
return datetime.now(timezone.utc)
|
"""Return current UTC time as a naive datetime (no tzinfo).
|
||||||
|
|
||||||
|
PostgreSQL TIMESTAMP WITHOUT TIME ZONE columns require naive datetimes.
|
||||||
|
asyncpg rejects timezone-aware datetimes for such columns.
|
||||||
|
"""
|
||||||
|
return datetime.now(timezone.utc).replace(tzinfo=None)
|
||||||
|
|
||||||
|
|
||||||
# ── Models ───────────────────────────────────────────────────────────────────
|
# ── Models ───────────────────────────────────────────────────────────────────
|
||||||
|
|
|
||||||
3
backend/pytest.ini
Normal file
3
backend/pytest.ini
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
[pytest]
|
||||||
|
asyncio_mode = auto
|
||||||
|
testpaths = tests
|
||||||
|
|
@ -10,3 +10,6 @@ redis>=5.0,<6.0
|
||||||
python-dotenv>=1.0,<2.0
|
python-dotenv>=1.0,<2.0
|
||||||
python-multipart>=0.0.9,<1.0
|
python-multipart>=0.0.9,<1.0
|
||||||
httpx>=0.27.0,<1.0
|
httpx>=0.27.0,<1.0
|
||||||
|
# Test dependencies
|
||||||
|
pytest>=8.0,<10.0
|
||||||
|
pytest-asyncio>=0.24,<1.0
|
||||||
|
|
|
||||||
0
backend/tests/__init__.py
Normal file
0
backend/tests/__init__.py
Normal file
93
backend/tests/conftest.py
Normal file
93
backend/tests/conftest.py
Normal file
|
|
@ -0,0 +1,93 @@
|
||||||
|
"""Shared fixtures for Chrysopedia integration tests.
|
||||||
|
|
||||||
|
Provides:
|
||||||
|
- Async SQLAlchemy engine/session against a real PostgreSQL test database
|
||||||
|
- httpx.AsyncClient wired to the FastAPI app with dependency overrides
|
||||||
|
- Sample transcript fixture path and temporary storage directory
|
||||||
|
|
||||||
|
Key design choice: function-scoped engine with NullPool avoids asyncpg
|
||||||
|
"another operation in progress" errors caused by session-scoped connection
|
||||||
|
reuse between the ASGI test client and verification queries.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import pytest_asyncio
|
||||||
|
from httpx import ASGITransport, AsyncClient
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
||||||
|
from sqlalchemy.pool import NullPool
|
||||||
|
|
||||||
|
# Ensure backend/ is on sys.path so "from models import ..." works
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, str(pathlib.Path(__file__).resolve().parent.parent))
|
||||||
|
|
||||||
|
from database import Base, get_session # noqa: E402
|
||||||
|
from main import app # noqa: E402
|
||||||
|
|
||||||
|
TEST_DATABASE_URL = os.getenv(
|
||||||
|
"TEST_DATABASE_URL",
|
||||||
|
"postgresql+asyncpg://chrysopedia:changeme@localhost:5433/chrysopedia_test",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture()
|
||||||
|
async def db_engine():
|
||||||
|
"""Create a per-test async engine (NullPool) and create/drop all tables."""
|
||||||
|
engine = create_async_engine(TEST_DATABASE_URL, echo=False, poolclass=NullPool)
|
||||||
|
|
||||||
|
# Create all tables fresh for each test
|
||||||
|
async with engine.begin() as conn:
|
||||||
|
await conn.run_sync(Base.metadata.drop_all)
|
||||||
|
await conn.run_sync(Base.metadata.create_all)
|
||||||
|
|
||||||
|
yield engine
|
||||||
|
|
||||||
|
# Drop all tables after test
|
||||||
|
async with engine.begin() as conn:
|
||||||
|
await conn.run_sync(Base.metadata.drop_all)
|
||||||
|
|
||||||
|
await engine.dispose()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture()
|
||||||
|
async def client(db_engine, tmp_path):
|
||||||
|
"""Async HTTP test client wired to FastAPI with dependency overrides."""
|
||||||
|
session_factory = async_sessionmaker(
|
||||||
|
db_engine, class_=AsyncSession, expire_on_commit=False
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _override_get_session():
|
||||||
|
async with session_factory() as session:
|
||||||
|
yield session
|
||||||
|
|
||||||
|
# Override DB session dependency
|
||||||
|
app.dependency_overrides[get_session] = _override_get_session
|
||||||
|
|
||||||
|
# Override transcript_storage_path via environment variable
|
||||||
|
os.environ["TRANSCRIPT_STORAGE_PATH"] = str(tmp_path)
|
||||||
|
# Clear the lru_cache so Settings picks up the new env var
|
||||||
|
from config import get_settings
|
||||||
|
get_settings.cache_clear()
|
||||||
|
|
||||||
|
transport = ASGITransport(app=app)
|
||||||
|
async with AsyncClient(transport=transport, base_url="http://testserver") as ac:
|
||||||
|
yield ac
|
||||||
|
|
||||||
|
# Teardown: clean overrides and restore settings cache
|
||||||
|
app.dependency_overrides.clear()
|
||||||
|
os.environ.pop("TRANSCRIPT_STORAGE_PATH", None)
|
||||||
|
get_settings.cache_clear()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def sample_transcript_path() -> pathlib.Path:
|
||||||
|
"""Path to the sample 5-segment transcript JSON fixture."""
|
||||||
|
return pathlib.Path(__file__).parent / "fixtures" / "sample_transcript.json"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def tmp_transcript_dir(tmp_path) -> pathlib.Path:
|
||||||
|
"""Temporary directory for transcript storage during tests."""
|
||||||
|
return tmp_path
|
||||||
12
backend/tests/fixtures/sample_transcript.json
vendored
Normal file
12
backend/tests/fixtures/sample_transcript.json
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
{
|
||||||
|
"source_file": "mixing-basics-ep1.mp4",
|
||||||
|
"creator_folder": "Skope",
|
||||||
|
"duration_seconds": 1234,
|
||||||
|
"segments": [
|
||||||
|
{"start": 0.0, "end": 5.2, "text": "Welcome to mixing basics episode one."},
|
||||||
|
{"start": 5.2, "end": 12.8, "text": "Today we are going to talk about gain staging."},
|
||||||
|
{"start": 12.8, "end": 20.1, "text": "First thing you want to do is set your levels."},
|
||||||
|
{"start": 20.1, "end": 28.5, "text": "Make sure nothing is clipping on the master bus."},
|
||||||
|
{"start": 28.5, "end": 35.0, "text": "That wraps up this quick overview of gain staging."}
|
||||||
|
]
|
||||||
|
}
|
||||||
179
backend/tests/test_ingest.py
Normal file
179
backend/tests/test_ingest.py
Normal file
|
|
@ -0,0 +1,179 @@
|
||||||
|
"""Integration tests for the transcript ingest endpoint.
|
||||||
|
|
||||||
|
Tests run against a real PostgreSQL database via httpx.AsyncClient
|
||||||
|
on the FastAPI ASGI app. Each test gets a clean database state via
|
||||||
|
TRUNCATE in the client fixture (conftest.py).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from httpx import AsyncClient
|
||||||
|
from sqlalchemy import func, select, text
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||||
|
|
||||||
|
from models import Creator, SourceVideo, TranscriptSegment
|
||||||
|
|
||||||
|
|
||||||
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
INGEST_URL = "/api/v1/ingest"
|
||||||
|
|
||||||
|
|
||||||
|
def _upload_file(path: pathlib.Path):
|
||||||
|
"""Return a dict suitable for httpx multipart file upload."""
|
||||||
|
return {"file": (path.name, path.read_bytes(), "application/json")}
|
||||||
|
|
||||||
|
|
||||||
|
async def _query_db(db_engine, stmt):
|
||||||
|
"""Run a read query in its own session to avoid connection contention."""
|
||||||
|
session_factory = async_sessionmaker(
|
||||||
|
db_engine, class_=AsyncSession, expire_on_commit=False
|
||||||
|
)
|
||||||
|
async with session_factory() as session:
|
||||||
|
result = await session.execute(stmt)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def _count_rows(db_engine, model):
|
||||||
|
"""Count rows in a table via a fresh session."""
|
||||||
|
result = await _query_db(db_engine, select(func.count(model.id)))
|
||||||
|
return result.scalar_one()
|
||||||
|
|
||||||
|
|
||||||
|
# ── Happy-path tests ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def test_ingest_creates_creator_and_video(client, sample_transcript_path, db_engine):
|
||||||
|
"""POST a valid transcript → 200 with creator, video, and 5 segments created."""
|
||||||
|
resp = await client.post(INGEST_URL, files=_upload_file(sample_transcript_path))
|
||||||
|
assert resp.status_code == 200, f"Expected 200, got {resp.status_code}: {resp.text}"
|
||||||
|
|
||||||
|
data = resp.json()
|
||||||
|
assert "video_id" in data
|
||||||
|
assert "creator_id" in data
|
||||||
|
assert data["segments_stored"] == 5
|
||||||
|
assert data["creator_name"] == "Skope"
|
||||||
|
assert data["is_reupload"] is False
|
||||||
|
|
||||||
|
# Verify DB state via a fresh session
|
||||||
|
session_factory = async_sessionmaker(db_engine, class_=AsyncSession, expire_on_commit=False)
|
||||||
|
async with session_factory() as session:
|
||||||
|
# Creator exists with correct folder_name and slug
|
||||||
|
result = await session.execute(
|
||||||
|
select(Creator).where(Creator.folder_name == "Skope")
|
||||||
|
)
|
||||||
|
creator = result.scalar_one()
|
||||||
|
assert creator.slug == "skope"
|
||||||
|
assert creator.name == "Skope"
|
||||||
|
|
||||||
|
# SourceVideo exists with correct status
|
||||||
|
result = await session.execute(
|
||||||
|
select(SourceVideo).where(SourceVideo.creator_id == creator.id)
|
||||||
|
)
|
||||||
|
video = result.scalar_one()
|
||||||
|
assert video.processing_status.value == "transcribed"
|
||||||
|
assert video.filename == "mixing-basics-ep1.mp4"
|
||||||
|
|
||||||
|
# 5 TranscriptSegment rows with sequential indices
|
||||||
|
result = await session.execute(
|
||||||
|
select(TranscriptSegment)
|
||||||
|
.where(TranscriptSegment.source_video_id == video.id)
|
||||||
|
.order_by(TranscriptSegment.segment_index)
|
||||||
|
)
|
||||||
|
segments = result.scalars().all()
|
||||||
|
assert len(segments) == 5
|
||||||
|
assert [s.segment_index for s in segments] == [0, 1, 2, 3, 4]
|
||||||
|
|
||||||
|
|
||||||
|
async def test_ingest_reuses_existing_creator(client, sample_transcript_path, db_engine):
|
||||||
|
"""If a Creator with the same folder_name already exists, reuse it."""
|
||||||
|
session_factory = async_sessionmaker(db_engine, class_=AsyncSession, expire_on_commit=False)
|
||||||
|
|
||||||
|
# Pre-create a Creator with folder_name='Skope' in a separate session
|
||||||
|
async with session_factory() as session:
|
||||||
|
existing = Creator(name="Skope", slug="skope", folder_name="Skope")
|
||||||
|
session.add(existing)
|
||||||
|
await session.commit()
|
||||||
|
await session.refresh(existing)
|
||||||
|
existing_id = existing.id
|
||||||
|
|
||||||
|
# POST transcript — should reuse the creator
|
||||||
|
resp = await client.post(INGEST_URL, files=_upload_file(sample_transcript_path))
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["creator_id"] == str(existing_id)
|
||||||
|
|
||||||
|
# Verify only 1 Creator row in DB
|
||||||
|
count = await _count_rows(db_engine, Creator)
|
||||||
|
assert count == 1, f"Expected 1 creator, got {count}"
|
||||||
|
|
||||||
|
|
||||||
|
async def test_ingest_idempotent_reupload(client, sample_transcript_path, db_engine):
|
||||||
|
"""Uploading the same transcript twice is idempotent: same video, no duplicate segments."""
|
||||||
|
# First upload
|
||||||
|
resp1 = await client.post(INGEST_URL, files=_upload_file(sample_transcript_path))
|
||||||
|
assert resp1.status_code == 200
|
||||||
|
data1 = resp1.json()
|
||||||
|
assert data1["is_reupload"] is False
|
||||||
|
video_id = data1["video_id"]
|
||||||
|
|
||||||
|
# Second upload (same file)
|
||||||
|
resp2 = await client.post(INGEST_URL, files=_upload_file(sample_transcript_path))
|
||||||
|
assert resp2.status_code == 200
|
||||||
|
data2 = resp2.json()
|
||||||
|
assert data2["is_reupload"] is True
|
||||||
|
assert data2["video_id"] == video_id
|
||||||
|
|
||||||
|
# Verify DB: still only 1 SourceVideo and 5 segments (not 10)
|
||||||
|
video_count = await _count_rows(db_engine, SourceVideo)
|
||||||
|
assert video_count == 1, f"Expected 1 video, got {video_count}"
|
||||||
|
|
||||||
|
seg_count = await _count_rows(db_engine, TranscriptSegment)
|
||||||
|
assert seg_count == 5, f"Expected 5 segments, got {seg_count}"
|
||||||
|
|
||||||
|
|
||||||
|
async def test_ingest_saves_json_to_disk(client, sample_transcript_path, tmp_path):
|
||||||
|
"""Ingested transcript raw JSON is persisted to the filesystem."""
|
||||||
|
resp = await client.post(INGEST_URL, files=_upload_file(sample_transcript_path))
|
||||||
|
assert resp.status_code == 200
|
||||||
|
|
||||||
|
# The ingest endpoint saves to {transcript_storage_path}/{creator_folder}/{source_file}.json
|
||||||
|
expected_path = tmp_path / "Skope" / "mixing-basics-ep1.mp4.json"
|
||||||
|
assert expected_path.exists(), f"Expected file at {expected_path}"
|
||||||
|
|
||||||
|
# Verify the saved JSON is valid and matches the source
|
||||||
|
saved = json.loads(expected_path.read_text())
|
||||||
|
source = json.loads(sample_transcript_path.read_text())
|
||||||
|
assert saved == source
|
||||||
|
|
||||||
|
|
||||||
|
# ── Error tests ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def test_ingest_rejects_invalid_json(client, tmp_path):
|
||||||
|
"""Uploading a non-JSON file returns 422."""
|
||||||
|
bad_file = tmp_path / "bad.json"
|
||||||
|
bad_file.write_text("this is not valid json {{{")
|
||||||
|
|
||||||
|
resp = await client.post(
|
||||||
|
INGEST_URL,
|
||||||
|
files={"file": ("bad.json", bad_file.read_bytes(), "application/json")},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 422, f"Expected 422, got {resp.status_code}: {resp.text}"
|
||||||
|
assert "JSON parse error" in resp.json()["detail"]
|
||||||
|
|
||||||
|
|
||||||
|
async def test_ingest_rejects_missing_fields(client, tmp_path):
|
||||||
|
"""Uploading JSON without required fields returns 422."""
|
||||||
|
incomplete = tmp_path / "incomplete.json"
|
||||||
|
# Missing creator_folder and segments
|
||||||
|
incomplete.write_text(json.dumps({"source_file": "test.mp4", "duration_seconds": 100}))
|
||||||
|
|
||||||
|
resp = await client.post(
|
||||||
|
INGEST_URL,
|
||||||
|
files={"file": ("incomplete.json", incomplete.read_bytes(), "application/json")},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 422, f"Expected 422, got {resp.status_code}: {resp.text}"
|
||||||
|
assert "Missing required keys" in resp.json()["detail"]
|
||||||
Loading…
Add table
Reference in a new issue