chrysopedia/backend/pipeline/test_compose_pipeline.py
jlightner 7070ef3f51 test: Added 12 unit tests covering compose prompt construction, branchi…
- "backend/pipeline/test_compose_pipeline.py"

GSD-Task: S04/T02
2026-04-03 01:33:16 +00:00

360 lines
14 KiB
Python

"""Unit tests for compose pipeline logic in stage5_synthesis.
Covers:
- _build_compose_user_prompt(): XML structure, offset indices, empty existing, page JSON
- Compose-or-create branching: compose triggered vs create fallback
- body_sections_format='v2' on persisted pages
- TechniquePageVideo insertion via pg_insert with on_conflict_do_nothing
"""
from __future__ import annotations
import json
import uuid
from collections import namedtuple
from unittest.mock import MagicMock, patch
import pytest
# ── Lightweight mock objects ─────────────────────────────────────────────────
class _MockContentType:
"""Mimics ContentType enum with .value."""
def __init__(self, value: str) -> None:
self.value = value
MockKeyMoment = namedtuple("MockKeyMoment", [
"id", "title", "summary", "content_type", "start_time", "end_time",
"plugins", "raw_transcript", "technique_page_id", "source_video_id",
])
def _moment(
title: str = "Test Moment",
summary: str = "A moment.",
content_type: str = "technique_demo",
start_time: float = 0.0,
end_time: float = 10.0,
plugins: list[str] | None = None,
raw_transcript: str | None = "Some transcript text",
technique_page_id: uuid.UUID | None = None,
source_video_id: uuid.UUID | None = None,
) -> MockKeyMoment:
return MockKeyMoment(
id=uuid.uuid4(),
title=title,
summary=summary,
content_type=_MockContentType(content_type),
start_time=start_time,
end_time=end_time,
plugins=plugins or [],
raw_transcript=raw_transcript or "",
technique_page_id=technique_page_id,
source_video_id=source_video_id,
)
class _MockSourceQuality:
"""Mimics source_quality enum with .value."""
def __init__(self, value: str = "high") -> None:
self.value = value
class MockTechniquePage:
"""Lightweight stand-in for the ORM TechniquePage."""
def __init__(
self,
title: str = "Reverb Techniques",
slug: str = "reverb-techniques",
topic_category: str = "Sound Design",
summary: str = "A page about reverb.",
body_sections: list | None = None,
signal_chains: list | None = None,
plugins: list[str] | None = None,
source_quality: str = "high",
creator_id: uuid.UUID | None = None,
body_sections_format: str | None = None,
):
self.id = uuid.uuid4()
self.title = title
self.slug = slug
self.topic_category = topic_category
self.summary = summary
self.body_sections = body_sections or [{"heading": "Overview", "content": "Intro text."}]
self.signal_chains = signal_chains or []
self.plugins = plugins or ["Valhalla VintageVerb"]
self.source_quality = _MockSourceQuality(source_quality)
self.creator_id = creator_id or uuid.uuid4()
self.body_sections_format = body_sections_format
def _cls_info(tags: list[str] | None = None) -> dict:
return {"topic_category": "Sound Design", "topic_tags": tags or ["reverb", "delay"]}
# ── Import the function under test ───────────────────────────────────────────
# We need to patch modules before importing stages in some tests.
# For _build_compose_user_prompt we can import directly since it's a pure function
# that only depends on _build_moments_text.
@pytest.fixture
def build_compose_prompt():
"""Import _build_compose_user_prompt from stages."""
from pipeline.stages import _build_compose_user_prompt
return _build_compose_user_prompt
# ── Tests for _build_compose_user_prompt ─────────────────────────────────────
class TestBuildComposeUserPrompt:
"""Tests for _build_compose_user_prompt XML structure and offset math."""
def test_compose_prompt_xml_structure(self, build_compose_prompt):
"""Verify output contains all required XML tags."""
page = MockTechniquePage()
existing = [_moment(title="Existing 1")]
new = [(_moment(title="New 1"), _cls_info())]
result = build_compose_prompt(page, existing, new, "COPYCATT")
assert "<existing_page>" in result
assert "</existing_page>" in result
assert "<existing_moments>" in result
assert "</existing_moments>" in result
assert "<new_moments>" in result
assert "</new_moments>" in result
assert "<creator>" in result
assert "</creator>" in result
assert "COPYCATT" in result
def test_compose_prompt_offset_indices(self, build_compose_prompt):
"""With 3 existing + 2 new moments, new moments should use [3] and [4]."""
page = MockTechniquePage()
existing = [
_moment(title=f"Existing {i}") for i in range(3)
]
new = [
(_moment(title=f"New {i}"), _cls_info()) for i in range(2)
]
result = build_compose_prompt(page, existing, new, "COPYCATT")
# New moments section should have [3] and [4]
new_section_start = result.index("<new_moments>")
new_section_end = result.index("</new_moments>")
new_section = result[new_section_start:new_section_end]
assert "[3]" in new_section
assert "[4]" in new_section
# Should NOT have [0], [1], [2] in the new section
assert "[0]" not in new_section
assert "[1]" not in new_section
assert "[2]" not in new_section
def test_compose_prompt_empty_existing_moments(self, build_compose_prompt):
"""0 existing moments → new moments start at [0]."""
page = MockTechniquePage()
existing = []
new = [
(_moment(title="New A"), _cls_info()),
(_moment(title="New B"), _cls_info()),
]
result = build_compose_prompt(page, existing, new, "COPYCATT")
new_section_start = result.index("<new_moments>")
new_section_end = result.index("</new_moments>")
new_section = result[new_section_start:new_section_end]
assert "[0]" in new_section
assert "[1]" in new_section
def test_compose_prompt_page_json(self, build_compose_prompt):
"""Existing page should be serialized as JSON within <existing_page> tags."""
page = MockTechniquePage(title="My Page", slug="my-page", topic_category="Mixing")
result = build_compose_prompt(page, [], [(_moment(), _cls_info())], "Creator")
page_section_start = result.index("<existing_page>") + len("<existing_page>")
page_section_end = result.index("</existing_page>")
page_json_str = result[page_section_start:page_section_end].strip()
page_dict = json.loads(page_json_str)
assert page_dict["title"] == "My Page"
assert page_dict["slug"] == "my-page"
assert page_dict["topic_category"] == "Mixing"
assert "summary" in page_dict
assert "body_sections" in page_dict
def test_compose_prompt_new_moment_content(self, build_compose_prompt):
"""New moments section includes title, summary, time range, and tags."""
page = MockTechniquePage()
m = _moment(title="Sidechain Pump", summary="How to create a sidechain pump",
start_time=30.0, end_time=45.5, plugins=["FabFilter Pro-C 2"])
new = [(m, _cls_info(tags=["compression", "sidechain"]))]
result = build_compose_prompt(page, [], new, "Creator")
new_section_start = result.index("<new_moments>")
new_section_end = result.index("</new_moments>")
new_section = result[new_section_start:new_section_end]
assert "Sidechain Pump" in new_section
assert "How to create a sidechain pump" in new_section
assert "30.0s" in new_section
assert "45.5s" in new_section
assert "FabFilter Pro-C 2" in new_section
assert "compression" in new_section
assert "sidechain" in new_section
# ── Tests for compose-or-create branching ────────────────────────────────────
class TestComposeOrCreateBranching:
"""Tests for the compose-or-create detection and branching in stage5_synthesis.
Full integration-level mocking of stage5_synthesis is fragile (many DB queries).
Instead, we verify:
1. The code structure has correct branching (compose_target check → two paths)
2. _compose_into_existing calls the LLM with compose prompt and returns parsed result
"""
def test_compose_branch_exists_in_source(self):
"""Verify stage5 has compose detection → _compose_into_existing call path."""
from pathlib import Path
src = Path("backend/pipeline/stages.py").read_text()
# The compose detection block
assert "compose_matches = session.execute(" in src
assert "compose_target = compose_matches[0] if compose_matches else None" in src
# The compose branch calls _compose_into_existing
assert "if compose_target is not None:" in src
assert "_compose_into_existing(" in src
# The create branch calls _synthesize_chunk
assert "elif len(moment_group) <= chunk_size:" in src
def test_create_branch_when_no_compose_target(self):
"""Verify the else/elif branches call _synthesize_chunk, not _compose_into_existing."""
from pathlib import Path
src = Path("backend/pipeline/stages.py").read_text()
# Find the compose branch and the create branch — they're mutually exclusive
compose_branch_idx = src.index("if compose_target is not None:")
create_branch_idx = src.index("elif len(moment_group) <= chunk_size:")
# The create branch must come after the compose branch (same if/elif chain)
assert create_branch_idx > compose_branch_idx
# _synthesize_chunk should appear in the create branch, not compose
create_block = src[create_branch_idx:create_branch_idx + 500]
assert "_synthesize_chunk(" in create_block
@patch("pipeline.stages._safe_parse_llm_response")
@patch("pipeline.stages._make_llm_callback", return_value=lambda **kw: None)
@patch("pipeline.stages.estimate_max_tokens", return_value=4000)
@patch("pipeline.stages._load_prompt", return_value="compose system prompt")
def test_compose_into_existing_calls_llm(
self, mock_load_prompt, mock_estimate, mock_callback, mock_parse,
):
"""_compose_into_existing calls LLM with compose prompt and returns parsed result."""
from pipeline.schemas import SynthesisResult, SynthesizedPage
from pipeline.stages import _compose_into_existing
mock_llm = MagicMock()
mock_llm.complete.return_value = "raw response"
synth_page = SynthesizedPage(
title="Merged Page", slug="merged-page", topic_category="Sound Design",
summary="Merged", body_sections=[], signal_chains=[], plugins=[],
source_quality="high", moment_indices=[0, 1],
)
mock_parse.return_value = SynthesisResult(pages=[synth_page])
page = MockTechniquePage()
existing_moments = [_moment(title="Old Moment")]
new_moments = [(_moment(title="New Moment"), _cls_info())]
result = _compose_into_existing(
page, existing_moments, new_moments,
"Sound Design", "COPYCATT", "system prompt",
mock_llm, None, "text", 8000, str(uuid.uuid4()), None,
)
# LLM was called
mock_llm.complete.assert_called_once()
# The compose prompt template was loaded
mock_load_prompt.assert_called_once()
call_args = mock_load_prompt.call_args
assert "stage5_compose" in call_args[0][0]
# Result has the expected page
assert len(result.pages) == 1
assert result.pages[0].title == "Merged Page"
# ── Tests for body_sections_format and TechniquePageVideo ────────────────────
class TestBodySectionsFormatAndTracking:
"""Tests for body_sections_format='v2' and TechniquePageVideo insertion."""
def test_body_sections_format_v2_set_on_page(self):
"""Verify the persist section sets body_sections_format='v2' on pages."""
# Read stages.py source and verify the assignment exists
from pathlib import Path
stages_src = Path("backend/pipeline/stages.py").read_text()
# The line `page.body_sections_format = "v2"` must appear in the persist block
assert 'page.body_sections_format = "v2"' in stages_src, (
"body_sections_format = 'v2' assignment not found in stages.py"
)
def test_technique_page_video_pg_insert(self):
"""Verify TechniquePageVideo insertion uses pg_insert with on_conflict_do_nothing."""
from pathlib import Path
stages_src = Path("backend/pipeline/stages.py").read_text()
assert "pg_insert(TechniquePageVideo.__table__)" in stages_src, (
"pg_insert(TechniquePageVideo.__table__) not found in stages.py"
)
assert "on_conflict_do_nothing()" in stages_src, (
"on_conflict_do_nothing() not found in stages.py"
)
def test_technique_page_video_values(self):
"""Verify TechniquePageVideo INSERT includes technique_page_id and source_video_id."""
from pathlib import Path
stages_src = Path("backend/pipeline/stages.py").read_text()
# Find the pg_insert block
idx = stages_src.index("pg_insert(TechniquePageVideo.__table__)")
block = stages_src[idx:idx + 200]
assert "technique_page_id" in block
assert "source_video_id" in block
# ── Tests for category case-insensitivity ────────────────────────────────────
class TestCategoryCaseInsensitive:
"""Verify the compose detection query uses func.lower for category matching."""
def test_compose_detection_uses_func_lower(self):
"""The compose detection query must use func.lower on both sides."""
from pathlib import Path
stages_src = Path("backend/pipeline/stages.py").read_text()
# Find the compose detection block — need enough window to capture the full query
idx = stages_src.index("Compose-or-create detection")
block = stages_src[idx:idx + 600]
assert "func.lower(TechniquePage.topic_category)" in block
assert "func.lower(category)" in block