test: Added 6 integration tests proving ingestion, creator auto-detecti…
- "backend/tests/conftest.py" - "backend/tests/test_ingest.py" - "backend/tests/fixtures/sample_transcript.json" - "backend/pytest.ini" - "backend/requirements.txt" - "backend/models.py" GSD-Task: S02/T02
This commit is contained in:
parent
88170a41f6
commit
a9de7f97ea
7 changed files with 296 additions and 1 deletions
|
|
@ -96,7 +96,12 @@ def _uuid_pk() -> Mapped[uuid.UUID]:
|
|||
|
||||
|
||||
def _now() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
"""Return current UTC time as a naive datetime (no tzinfo).
|
||||
|
||||
PostgreSQL TIMESTAMP WITHOUT TIME ZONE columns require naive datetimes.
|
||||
asyncpg rejects timezone-aware datetimes for such columns.
|
||||
"""
|
||||
return datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
|
||||
|
||||
# ── Models ───────────────────────────────────────────────────────────────────
|
||||
|
|
|
|||
3
backend/pytest.ini
Normal file
3
backend/pytest.ini
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
[pytest]
|
||||
asyncio_mode = auto
|
||||
testpaths = tests
|
||||
|
|
@ -10,3 +10,6 @@ redis>=5.0,<6.0
|
|||
python-dotenv>=1.0,<2.0
|
||||
python-multipart>=0.0.9,<1.0
|
||||
httpx>=0.27.0,<1.0
|
||||
# Test dependencies
|
||||
pytest>=8.0,<10.0
|
||||
pytest-asyncio>=0.24,<1.0
|
||||
|
|
|
|||
0
backend/tests/__init__.py
Normal file
0
backend/tests/__init__.py
Normal file
93
backend/tests/conftest.py
Normal file
93
backend/tests/conftest.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
"""Shared fixtures for Chrysopedia integration tests.
|
||||
|
||||
Provides:
|
||||
- Async SQLAlchemy engine/session against a real PostgreSQL test database
|
||||
- httpx.AsyncClient wired to the FastAPI app with dependency overrides
|
||||
- Sample transcript fixture path and temporary storage directory
|
||||
|
||||
Key design choice: function-scoped engine with NullPool avoids asyncpg
|
||||
"another operation in progress" errors caused by session-scoped connection
|
||||
reuse between the ASGI test client and verification queries.
|
||||
"""
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
||||
from sqlalchemy.pool import NullPool
|
||||
|
||||
# Ensure backend/ is on sys.path so "from models import ..." works
|
||||
import sys
|
||||
sys.path.insert(0, str(pathlib.Path(__file__).resolve().parent.parent))
|
||||
|
||||
from database import Base, get_session # noqa: E402
|
||||
from main import app # noqa: E402
|
||||
|
||||
TEST_DATABASE_URL = os.getenv(
|
||||
"TEST_DATABASE_URL",
|
||||
"postgresql+asyncpg://chrysopedia:changeme@localhost:5433/chrysopedia_test",
|
||||
)
|
||||
|
||||
|
||||
@pytest_asyncio.fixture()
|
||||
async def db_engine():
|
||||
"""Create a per-test async engine (NullPool) and create/drop all tables."""
|
||||
engine = create_async_engine(TEST_DATABASE_URL, echo=False, poolclass=NullPool)
|
||||
|
||||
# Create all tables fresh for each test
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.drop_all)
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
yield engine
|
||||
|
||||
# Drop all tables after test
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.drop_all)
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
@pytest_asyncio.fixture()
|
||||
async def client(db_engine, tmp_path):
|
||||
"""Async HTTP test client wired to FastAPI with dependency overrides."""
|
||||
session_factory = async_sessionmaker(
|
||||
db_engine, class_=AsyncSession, expire_on_commit=False
|
||||
)
|
||||
|
||||
async def _override_get_session():
|
||||
async with session_factory() as session:
|
||||
yield session
|
||||
|
||||
# Override DB session dependency
|
||||
app.dependency_overrides[get_session] = _override_get_session
|
||||
|
||||
# Override transcript_storage_path via environment variable
|
||||
os.environ["TRANSCRIPT_STORAGE_PATH"] = str(tmp_path)
|
||||
# Clear the lru_cache so Settings picks up the new env var
|
||||
from config import get_settings
|
||||
get_settings.cache_clear()
|
||||
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://testserver") as ac:
|
||||
yield ac
|
||||
|
||||
# Teardown: clean overrides and restore settings cache
|
||||
app.dependency_overrides.clear()
|
||||
os.environ.pop("TRANSCRIPT_STORAGE_PATH", None)
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_transcript_path() -> pathlib.Path:
|
||||
"""Path to the sample 5-segment transcript JSON fixture."""
|
||||
return pathlib.Path(__file__).parent / "fixtures" / "sample_transcript.json"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def tmp_transcript_dir(tmp_path) -> pathlib.Path:
|
||||
"""Temporary directory for transcript storage during tests."""
|
||||
return tmp_path
|
||||
12
backend/tests/fixtures/sample_transcript.json
vendored
Normal file
12
backend/tests/fixtures/sample_transcript.json
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
{
|
||||
"source_file": "mixing-basics-ep1.mp4",
|
||||
"creator_folder": "Skope",
|
||||
"duration_seconds": 1234,
|
||||
"segments": [
|
||||
{"start": 0.0, "end": 5.2, "text": "Welcome to mixing basics episode one."},
|
||||
{"start": 5.2, "end": 12.8, "text": "Today we are going to talk about gain staging."},
|
||||
{"start": 12.8, "end": 20.1, "text": "First thing you want to do is set your levels."},
|
||||
{"start": 20.1, "end": 28.5, "text": "Make sure nothing is clipping on the master bus."},
|
||||
{"start": 28.5, "end": 35.0, "text": "That wraps up this quick overview of gain staging."}
|
||||
]
|
||||
}
|
||||
179
backend/tests/test_ingest.py
Normal file
179
backend/tests/test_ingest.py
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
"""Integration tests for the transcript ingest endpoint.
|
||||
|
||||
Tests run against a real PostgreSQL database via httpx.AsyncClient
|
||||
on the FastAPI ASGI app. Each test gets a clean database state via
|
||||
TRUNCATE in the client fixture (conftest.py).
|
||||
"""
|
||||
|
||||
import json
|
||||
import pathlib
|
||||
|
||||
import pytest
|
||||
from httpx import AsyncClient
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from models import Creator, SourceVideo, TranscriptSegment
|
||||
|
||||
|
||||
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
INGEST_URL = "/api/v1/ingest"
|
||||
|
||||
|
||||
def _upload_file(path: pathlib.Path):
|
||||
"""Return a dict suitable for httpx multipart file upload."""
|
||||
return {"file": (path.name, path.read_bytes(), "application/json")}
|
||||
|
||||
|
||||
async def _query_db(db_engine, stmt):
|
||||
"""Run a read query in its own session to avoid connection contention."""
|
||||
session_factory = async_sessionmaker(
|
||||
db_engine, class_=AsyncSession, expire_on_commit=False
|
||||
)
|
||||
async with session_factory() as session:
|
||||
result = await session.execute(stmt)
|
||||
return result
|
||||
|
||||
|
||||
async def _count_rows(db_engine, model):
|
||||
"""Count rows in a table via a fresh session."""
|
||||
result = await _query_db(db_engine, select(func.count(model.id)))
|
||||
return result.scalar_one()
|
||||
|
||||
|
||||
# ── Happy-path tests ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_ingest_creates_creator_and_video(client, sample_transcript_path, db_engine):
|
||||
"""POST a valid transcript → 200 with creator, video, and 5 segments created."""
|
||||
resp = await client.post(INGEST_URL, files=_upload_file(sample_transcript_path))
|
||||
assert resp.status_code == 200, f"Expected 200, got {resp.status_code}: {resp.text}"
|
||||
|
||||
data = resp.json()
|
||||
assert "video_id" in data
|
||||
assert "creator_id" in data
|
||||
assert data["segments_stored"] == 5
|
||||
assert data["creator_name"] == "Skope"
|
||||
assert data["is_reupload"] is False
|
||||
|
||||
# Verify DB state via a fresh session
|
||||
session_factory = async_sessionmaker(db_engine, class_=AsyncSession, expire_on_commit=False)
|
||||
async with session_factory() as session:
|
||||
# Creator exists with correct folder_name and slug
|
||||
result = await session.execute(
|
||||
select(Creator).where(Creator.folder_name == "Skope")
|
||||
)
|
||||
creator = result.scalar_one()
|
||||
assert creator.slug == "skope"
|
||||
assert creator.name == "Skope"
|
||||
|
||||
# SourceVideo exists with correct status
|
||||
result = await session.execute(
|
||||
select(SourceVideo).where(SourceVideo.creator_id == creator.id)
|
||||
)
|
||||
video = result.scalar_one()
|
||||
assert video.processing_status.value == "transcribed"
|
||||
assert video.filename == "mixing-basics-ep1.mp4"
|
||||
|
||||
# 5 TranscriptSegment rows with sequential indices
|
||||
result = await session.execute(
|
||||
select(TranscriptSegment)
|
||||
.where(TranscriptSegment.source_video_id == video.id)
|
||||
.order_by(TranscriptSegment.segment_index)
|
||||
)
|
||||
segments = result.scalars().all()
|
||||
assert len(segments) == 5
|
||||
assert [s.segment_index for s in segments] == [0, 1, 2, 3, 4]
|
||||
|
||||
|
||||
async def test_ingest_reuses_existing_creator(client, sample_transcript_path, db_engine):
|
||||
"""If a Creator with the same folder_name already exists, reuse it."""
|
||||
session_factory = async_sessionmaker(db_engine, class_=AsyncSession, expire_on_commit=False)
|
||||
|
||||
# Pre-create a Creator with folder_name='Skope' in a separate session
|
||||
async with session_factory() as session:
|
||||
existing = Creator(name="Skope", slug="skope", folder_name="Skope")
|
||||
session.add(existing)
|
||||
await session.commit()
|
||||
await session.refresh(existing)
|
||||
existing_id = existing.id
|
||||
|
||||
# POST transcript — should reuse the creator
|
||||
resp = await client.post(INGEST_URL, files=_upload_file(sample_transcript_path))
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["creator_id"] == str(existing_id)
|
||||
|
||||
# Verify only 1 Creator row in DB
|
||||
count = await _count_rows(db_engine, Creator)
|
||||
assert count == 1, f"Expected 1 creator, got {count}"
|
||||
|
||||
|
||||
async def test_ingest_idempotent_reupload(client, sample_transcript_path, db_engine):
|
||||
"""Uploading the same transcript twice is idempotent: same video, no duplicate segments."""
|
||||
# First upload
|
||||
resp1 = await client.post(INGEST_URL, files=_upload_file(sample_transcript_path))
|
||||
assert resp1.status_code == 200
|
||||
data1 = resp1.json()
|
||||
assert data1["is_reupload"] is False
|
||||
video_id = data1["video_id"]
|
||||
|
||||
# Second upload (same file)
|
||||
resp2 = await client.post(INGEST_URL, files=_upload_file(sample_transcript_path))
|
||||
assert resp2.status_code == 200
|
||||
data2 = resp2.json()
|
||||
assert data2["is_reupload"] is True
|
||||
assert data2["video_id"] == video_id
|
||||
|
||||
# Verify DB: still only 1 SourceVideo and 5 segments (not 10)
|
||||
video_count = await _count_rows(db_engine, SourceVideo)
|
||||
assert video_count == 1, f"Expected 1 video, got {video_count}"
|
||||
|
||||
seg_count = await _count_rows(db_engine, TranscriptSegment)
|
||||
assert seg_count == 5, f"Expected 5 segments, got {seg_count}"
|
||||
|
||||
|
||||
async def test_ingest_saves_json_to_disk(client, sample_transcript_path, tmp_path):
|
||||
"""Ingested transcript raw JSON is persisted to the filesystem."""
|
||||
resp = await client.post(INGEST_URL, files=_upload_file(sample_transcript_path))
|
||||
assert resp.status_code == 200
|
||||
|
||||
# The ingest endpoint saves to {transcript_storage_path}/{creator_folder}/{source_file}.json
|
||||
expected_path = tmp_path / "Skope" / "mixing-basics-ep1.mp4.json"
|
||||
assert expected_path.exists(), f"Expected file at {expected_path}"
|
||||
|
||||
# Verify the saved JSON is valid and matches the source
|
||||
saved = json.loads(expected_path.read_text())
|
||||
source = json.loads(sample_transcript_path.read_text())
|
||||
assert saved == source
|
||||
|
||||
|
||||
# ── Error tests ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def test_ingest_rejects_invalid_json(client, tmp_path):
|
||||
"""Uploading a non-JSON file returns 422."""
|
||||
bad_file = tmp_path / "bad.json"
|
||||
bad_file.write_text("this is not valid json {{{")
|
||||
|
||||
resp = await client.post(
|
||||
INGEST_URL,
|
||||
files={"file": ("bad.json", bad_file.read_bytes(), "application/json")},
|
||||
)
|
||||
assert resp.status_code == 422, f"Expected 422, got {resp.status_code}: {resp.text}"
|
||||
assert "JSON parse error" in resp.json()["detail"]
|
||||
|
||||
|
||||
async def test_ingest_rejects_missing_fields(client, tmp_path):
|
||||
"""Uploading JSON without required fields returns 422."""
|
||||
incomplete = tmp_path / "incomplete.json"
|
||||
# Missing creator_folder and segments
|
||||
incomplete.write_text(json.dumps({"source_file": "test.mp4", "duration_seconds": 100}))
|
||||
|
||||
resp = await client.post(
|
||||
INGEST_URL,
|
||||
files={"file": ("incomplete.json", incomplete.read_bytes(), "application/json")},
|
||||
)
|
||||
assert resp.status_code == 422, f"Expected 422, got {resp.status_code}: {resp.text}"
|
||||
assert "Missing required keys" in resp.json()["detail"]
|
||||
Loading…
Add table
Reference in a new issue