"""Unit tests for compose pipeline logic in stage5_synthesis.
Covers:
- _build_compose_user_prompt(): XML structure, offset indices, empty existing, page JSON
- Compose-or-create branching: compose triggered vs create fallback
- body_sections_format='v2' on persisted pages
- TechniquePageVideo insertion via pg_insert with on_conflict_do_nothing
"""
from __future__ import annotations
import json
import uuid
from collections import namedtuple
from unittest.mock import MagicMock, patch
import pytest
# ── Lightweight mock objects ─────────────────────────────────────────────────
class _MockContentType:
"""Mimics ContentType enum with .value."""
def __init__(self, value: str) -> None:
self.value = value
MockKeyMoment = namedtuple("MockKeyMoment", [
"id", "title", "summary", "content_type", "start_time", "end_time",
"plugins", "raw_transcript", "technique_page_id", "source_video_id",
])
def _moment(
title: str = "Test Moment",
summary: str = "A moment.",
content_type: str = "technique_demo",
start_time: float = 0.0,
end_time: float = 10.0,
plugins: list[str] | None = None,
raw_transcript: str | None = "Some transcript text",
technique_page_id: uuid.UUID | None = None,
source_video_id: uuid.UUID | None = None,
) -> MockKeyMoment:
return MockKeyMoment(
id=uuid.uuid4(),
title=title,
summary=summary,
content_type=_MockContentType(content_type),
start_time=start_time,
end_time=end_time,
plugins=plugins or [],
raw_transcript=raw_transcript or "",
technique_page_id=technique_page_id,
source_video_id=source_video_id,
)
class _MockSourceQuality:
"""Mimics source_quality enum with .value."""
def __init__(self, value: str = "high") -> None:
self.value = value
class MockTechniquePage:
"""Lightweight stand-in for the ORM TechniquePage."""
def __init__(
self,
title: str = "Reverb Techniques",
slug: str = "reverb-techniques",
topic_category: str = "Sound Design",
summary: str = "A page about reverb.",
body_sections: list | None = None,
signal_chains: list | None = None,
plugins: list[str] | None = None,
source_quality: str = "high",
creator_id: uuid.UUID | None = None,
body_sections_format: str | None = None,
):
self.id = uuid.uuid4()
self.title = title
self.slug = slug
self.topic_category = topic_category
self.summary = summary
self.body_sections = body_sections or [{"heading": "Overview", "content": "Intro text."}]
self.signal_chains = signal_chains or []
self.plugins = plugins or ["Valhalla VintageVerb"]
self.source_quality = _MockSourceQuality(source_quality)
self.creator_id = creator_id or uuid.uuid4()
self.body_sections_format = body_sections_format
def _cls_info(tags: list[str] | None = None) -> dict:
return {"topic_category": "Sound Design", "topic_tags": tags or ["reverb", "delay"]}
# ── Import the function under test ───────────────────────────────────────────
# We need to patch modules before importing stages in some tests.
# For _build_compose_user_prompt we can import directly since it's a pure function
# that only depends on _build_moments_text.
@pytest.fixture
def build_compose_prompt():
"""Import _build_compose_user_prompt from stages."""
from pipeline.stages import _build_compose_user_prompt
return _build_compose_user_prompt
# ── Tests for _build_compose_user_prompt ─────────────────────────────────────
class TestBuildComposeUserPrompt:
"""Tests for _build_compose_user_prompt XML structure and offset math."""
def test_compose_prompt_xml_structure(self, build_compose_prompt):
"""Verify output contains all required XML tags."""
page = MockTechniquePage()
existing = [_moment(title="Existing 1")]
new = [(_moment(title="New 1"), _cls_info())]
result = build_compose_prompt(page, existing, new, "COPYCATT")
assert "" in result
assert "" in result
assert "" in result
assert "" in result
assert "" in result
assert "" in result
assert "" in result
assert "" in result
assert "COPYCATT" in result
def test_compose_prompt_offset_indices(self, build_compose_prompt):
"""With 3 existing + 2 new moments, new moments should use [3] and [4]."""
page = MockTechniquePage()
existing = [
_moment(title=f"Existing {i}") for i in range(3)
]
new = [
(_moment(title=f"New {i}"), _cls_info()) for i in range(2)
]
result = build_compose_prompt(page, existing, new, "COPYCATT")
# New moments section should have [3] and [4]
new_section_start = result.index("")
new_section_end = result.index("")
new_section = result[new_section_start:new_section_end]
assert "[3]" in new_section
assert "[4]" in new_section
# Should NOT have [0], [1], [2] in the new section
assert "[0]" not in new_section
assert "[1]" not in new_section
assert "[2]" not in new_section
def test_compose_prompt_empty_existing_moments(self, build_compose_prompt):
"""0 existing moments → new moments start at [0]."""
page = MockTechniquePage()
existing = []
new = [
(_moment(title="New A"), _cls_info()),
(_moment(title="New B"), _cls_info()),
]
result = build_compose_prompt(page, existing, new, "COPYCATT")
new_section_start = result.index("")
new_section_end = result.index("")
new_section = result[new_section_start:new_section_end]
assert "[0]" in new_section
assert "[1]" in new_section
def test_compose_prompt_page_json(self, build_compose_prompt):
"""Existing page should be serialized as JSON within tags."""
page = MockTechniquePage(title="My Page", slug="my-page", topic_category="Mixing")
result = build_compose_prompt(page, [], [(_moment(), _cls_info())], "Creator")
page_section_start = result.index("") + len("")
page_section_end = result.index("")
page_json_str = result[page_section_start:page_section_end].strip()
page_dict = json.loads(page_json_str)
assert page_dict["title"] == "My Page"
assert page_dict["slug"] == "my-page"
assert page_dict["topic_category"] == "Mixing"
assert "summary" in page_dict
assert "body_sections" in page_dict
def test_compose_prompt_new_moment_content(self, build_compose_prompt):
"""New moments section includes title, summary, time range, and tags."""
page = MockTechniquePage()
m = _moment(title="Sidechain Pump", summary="How to create a sidechain pump",
start_time=30.0, end_time=45.5, plugins=["FabFilter Pro-C 2"])
new = [(m, _cls_info(tags=["compression", "sidechain"]))]
result = build_compose_prompt(page, [], new, "Creator")
new_section_start = result.index("")
new_section_end = result.index("")
new_section = result[new_section_start:new_section_end]
assert "Sidechain Pump" in new_section
assert "How to create a sidechain pump" in new_section
assert "30.0s" in new_section
assert "45.5s" in new_section
assert "FabFilter Pro-C 2" in new_section
assert "compression" in new_section
assert "sidechain" in new_section
# ── Tests for compose-or-create branching ────────────────────────────────────
class TestComposeOrCreateBranching:
"""Tests for the compose-or-create detection and branching in stage5_synthesis.
Full integration-level mocking of stage5_synthesis is fragile (many DB queries).
Instead, we verify:
1. The code structure has correct branching (compose_target check → two paths)
2. _compose_into_existing calls the LLM with compose prompt and returns parsed result
"""
def test_compose_branch_exists_in_source(self):
"""Verify stage5 has compose detection → _compose_into_existing call path."""
from pathlib import Path
src = Path("backend/pipeline/stages.py").read_text()
# The compose detection block
assert "compose_matches = session.execute(" in src
assert "compose_target = compose_matches[0] if compose_matches else None" in src
# The compose branch calls _compose_into_existing
assert "if compose_target is not None:" in src
assert "_compose_into_existing(" in src
# The create branch calls _synthesize_chunk
assert "elif len(moment_group) <= chunk_size:" in src
def test_create_branch_when_no_compose_target(self):
"""Verify the else/elif branches call _synthesize_chunk, not _compose_into_existing."""
from pathlib import Path
src = Path("backend/pipeline/stages.py").read_text()
# Find the compose branch and the create branch — they're mutually exclusive
compose_branch_idx = src.index("if compose_target is not None:")
create_branch_idx = src.index("elif len(moment_group) <= chunk_size:")
# The create branch must come after the compose branch (same if/elif chain)
assert create_branch_idx > compose_branch_idx
# _synthesize_chunk should appear in the create branch, not compose
create_block = src[create_branch_idx:create_branch_idx + 500]
assert "_synthesize_chunk(" in create_block
@patch("pipeline.stages._safe_parse_llm_response")
@patch("pipeline.stages._make_llm_callback", return_value=lambda **kw: None)
@patch("pipeline.stages.estimate_max_tokens", return_value=4000)
@patch("pipeline.stages._load_prompt", return_value="compose system prompt")
def test_compose_into_existing_calls_llm(
self, mock_load_prompt, mock_estimate, mock_callback, mock_parse,
):
"""_compose_into_existing calls LLM with compose prompt and returns parsed result."""
from pipeline.schemas import SynthesisResult, SynthesizedPage
from pipeline.stages import _compose_into_existing
mock_llm = MagicMock()
mock_llm.complete.return_value = "raw response"
synth_page = SynthesizedPage(
title="Merged Page", slug="merged-page", topic_category="Sound Design",
summary="Merged", body_sections=[], signal_chains=[], plugins=[],
source_quality="high", moment_indices=[0, 1],
)
mock_parse.return_value = SynthesisResult(pages=[synth_page])
page = MockTechniquePage()
existing_moments = [_moment(title="Old Moment")]
new_moments = [(_moment(title="New Moment"), _cls_info())]
result = _compose_into_existing(
page, existing_moments, new_moments,
"Sound Design", "COPYCATT", "system prompt",
mock_llm, None, "text", 8000, str(uuid.uuid4()), None,
)
# LLM was called
mock_llm.complete.assert_called_once()
# The compose prompt template was loaded
mock_load_prompt.assert_called_once()
call_args = mock_load_prompt.call_args
assert "stage5_compose" in call_args[0][0]
# Result has the expected page
assert len(result.pages) == 1
assert result.pages[0].title == "Merged Page"
# ── Tests for body_sections_format and TechniquePageVideo ────────────────────
class TestBodySectionsFormatAndTracking:
"""Tests for body_sections_format='v2' and TechniquePageVideo insertion."""
def test_body_sections_format_v2_set_on_page(self):
"""Verify the persist section sets body_sections_format='v2' on pages."""
# Read stages.py source and verify the assignment exists
from pathlib import Path
stages_src = Path("backend/pipeline/stages.py").read_text()
# The line `page.body_sections_format = "v2"` must appear in the persist block
assert 'page.body_sections_format = "v2"' in stages_src, (
"body_sections_format = 'v2' assignment not found in stages.py"
)
def test_technique_page_video_pg_insert(self):
"""Verify TechniquePageVideo insertion uses pg_insert with on_conflict_do_nothing."""
from pathlib import Path
stages_src = Path("backend/pipeline/stages.py").read_text()
assert "pg_insert(TechniquePageVideo.__table__)" in stages_src, (
"pg_insert(TechniquePageVideo.__table__) not found in stages.py"
)
assert "on_conflict_do_nothing()" in stages_src, (
"on_conflict_do_nothing() not found in stages.py"
)
def test_technique_page_video_values(self):
"""Verify TechniquePageVideo INSERT includes technique_page_id and source_video_id."""
from pathlib import Path
stages_src = Path("backend/pipeline/stages.py").read_text()
# Find the pg_insert block
idx = stages_src.index("pg_insert(TechniquePageVideo.__table__)")
block = stages_src[idx:idx + 200]
assert "technique_page_id" in block
assert "source_video_id" in block
# ── Tests for category case-insensitivity ────────────────────────────────────
class TestCategoryCaseInsensitive:
"""Verify the compose detection query uses func.lower for category matching."""
def test_compose_detection_uses_func_lower(self):
"""The compose detection query must use func.lower on both sides."""
from pathlib import Path
stages_src = Path("backend/pipeline/stages.py").read_text()
# Find the compose detection block — need enough window to capture the full query
idx = stages_src.index("Compose-or-create detection")
block = stages_src[idx:idx + 600]
assert "func.lower(TechniquePage.topic_category)" in block
assert "func.lower(category)" in block