"""Unit tests for compose pipeline logic in stage5_synthesis. Covers: - _build_compose_user_prompt(): XML structure, offset indices, empty existing, page JSON - Compose-or-create branching: compose triggered vs create fallback - body_sections_format='v2' on persisted pages - TechniquePageVideo insertion via pg_insert with on_conflict_do_nothing """ from __future__ import annotations import json import uuid from collections import namedtuple from unittest.mock import MagicMock, patch import pytest # ── Lightweight mock objects ───────────────────────────────────────────────── class _MockContentType: """Mimics ContentType enum with .value.""" def __init__(self, value: str) -> None: self.value = value MockKeyMoment = namedtuple("MockKeyMoment", [ "id", "title", "summary", "content_type", "start_time", "end_time", "plugins", "raw_transcript", "technique_page_id", "source_video_id", ]) def _moment( title: str = "Test Moment", summary: str = "A moment.", content_type: str = "technique_demo", start_time: float = 0.0, end_time: float = 10.0, plugins: list[str] | None = None, raw_transcript: str | None = "Some transcript text", technique_page_id: uuid.UUID | None = None, source_video_id: uuid.UUID | None = None, ) -> MockKeyMoment: return MockKeyMoment( id=uuid.uuid4(), title=title, summary=summary, content_type=_MockContentType(content_type), start_time=start_time, end_time=end_time, plugins=plugins or [], raw_transcript=raw_transcript or "", technique_page_id=technique_page_id, source_video_id=source_video_id, ) class _MockSourceQuality: """Mimics source_quality enum with .value.""" def __init__(self, value: str = "high") -> None: self.value = value class MockTechniquePage: """Lightweight stand-in for the ORM TechniquePage.""" def __init__( self, title: str = "Reverb Techniques", slug: str = "reverb-techniques", topic_category: str = "Sound Design", summary: str = "A page about reverb.", body_sections: list | None = None, signal_chains: list | None = None, plugins: list[str] | None = None, source_quality: str = "high", creator_id: uuid.UUID | None = None, body_sections_format: str | None = None, ): self.id = uuid.uuid4() self.title = title self.slug = slug self.topic_category = topic_category self.summary = summary self.body_sections = body_sections or [{"heading": "Overview", "content": "Intro text."}] self.signal_chains = signal_chains or [] self.plugins = plugins or ["Valhalla VintageVerb"] self.source_quality = _MockSourceQuality(source_quality) self.creator_id = creator_id or uuid.uuid4() self.body_sections_format = body_sections_format def _cls_info(tags: list[str] | None = None) -> dict: return {"topic_category": "Sound Design", "topic_tags": tags or ["reverb", "delay"]} # ── Import the function under test ─────────────────────────────────────────── # We need to patch modules before importing stages in some tests. # For _build_compose_user_prompt we can import directly since it's a pure function # that only depends on _build_moments_text. @pytest.fixture def build_compose_prompt(): """Import _build_compose_user_prompt from stages.""" from pipeline.stages import _build_compose_user_prompt return _build_compose_user_prompt # ── Tests for _build_compose_user_prompt ───────────────────────────────────── class TestBuildComposeUserPrompt: """Tests for _build_compose_user_prompt XML structure and offset math.""" def test_compose_prompt_xml_structure(self, build_compose_prompt): """Verify output contains all required XML tags.""" page = MockTechniquePage() existing = [_moment(title="Existing 1")] new = [(_moment(title="New 1"), _cls_info())] result = build_compose_prompt(page, existing, new, "COPYCATT") assert "" in result assert "" in result assert "" in result assert "" in result assert "" in result assert "" in result assert "" in result assert "" in result assert "COPYCATT" in result def test_compose_prompt_offset_indices(self, build_compose_prompt): """With 3 existing + 2 new moments, new moments should use [3] and [4].""" page = MockTechniquePage() existing = [ _moment(title=f"Existing {i}") for i in range(3) ] new = [ (_moment(title=f"New {i}"), _cls_info()) for i in range(2) ] result = build_compose_prompt(page, existing, new, "COPYCATT") # New moments section should have [3] and [4] new_section_start = result.index("") new_section_end = result.index("") new_section = result[new_section_start:new_section_end] assert "[3]" in new_section assert "[4]" in new_section # Should NOT have [0], [1], [2] in the new section assert "[0]" not in new_section assert "[1]" not in new_section assert "[2]" not in new_section def test_compose_prompt_empty_existing_moments(self, build_compose_prompt): """0 existing moments → new moments start at [0].""" page = MockTechniquePage() existing = [] new = [ (_moment(title="New A"), _cls_info()), (_moment(title="New B"), _cls_info()), ] result = build_compose_prompt(page, existing, new, "COPYCATT") new_section_start = result.index("") new_section_end = result.index("") new_section = result[new_section_start:new_section_end] assert "[0]" in new_section assert "[1]" in new_section def test_compose_prompt_page_json(self, build_compose_prompt): """Existing page should be serialized as JSON within tags.""" page = MockTechniquePage(title="My Page", slug="my-page", topic_category="Mixing") result = build_compose_prompt(page, [], [(_moment(), _cls_info())], "Creator") page_section_start = result.index("") + len("") page_section_end = result.index("") page_json_str = result[page_section_start:page_section_end].strip() page_dict = json.loads(page_json_str) assert page_dict["title"] == "My Page" assert page_dict["slug"] == "my-page" assert page_dict["topic_category"] == "Mixing" assert "summary" in page_dict assert "body_sections" in page_dict def test_compose_prompt_new_moment_content(self, build_compose_prompt): """New moments section includes title, summary, time range, and tags.""" page = MockTechniquePage() m = _moment(title="Sidechain Pump", summary="How to create a sidechain pump", start_time=30.0, end_time=45.5, plugins=["FabFilter Pro-C 2"]) new = [(m, _cls_info(tags=["compression", "sidechain"]))] result = build_compose_prompt(page, [], new, "Creator") new_section_start = result.index("") new_section_end = result.index("") new_section = result[new_section_start:new_section_end] assert "Sidechain Pump" in new_section assert "How to create a sidechain pump" in new_section assert "30.0s" in new_section assert "45.5s" in new_section assert "FabFilter Pro-C 2" in new_section assert "compression" in new_section assert "sidechain" in new_section # ── Tests for compose-or-create branching ──────────────────────────────────── class TestComposeOrCreateBranching: """Tests for the compose-or-create detection and branching in stage5_synthesis. Full integration-level mocking of stage5_synthesis is fragile (many DB queries). Instead, we verify: 1. The code structure has correct branching (compose_target check → two paths) 2. _compose_into_existing calls the LLM with compose prompt and returns parsed result """ def test_compose_branch_exists_in_source(self): """Verify stage5 has compose detection → _compose_into_existing call path.""" from pathlib import Path src = Path("backend/pipeline/stages.py").read_text() # The compose detection block assert "compose_matches = session.execute(" in src assert "compose_target = compose_matches[0] if compose_matches else None" in src # The compose branch calls _compose_into_existing assert "if compose_target is not None:" in src assert "_compose_into_existing(" in src # The create branch calls _synthesize_chunk assert "elif len(moment_group) <= chunk_size:" in src def test_create_branch_when_no_compose_target(self): """Verify the else/elif branches call _synthesize_chunk, not _compose_into_existing.""" from pathlib import Path src = Path("backend/pipeline/stages.py").read_text() # Find the compose branch and the create branch — they're mutually exclusive compose_branch_idx = src.index("if compose_target is not None:") create_branch_idx = src.index("elif len(moment_group) <= chunk_size:") # The create branch must come after the compose branch (same if/elif chain) assert create_branch_idx > compose_branch_idx # _synthesize_chunk should appear in the create branch, not compose create_block = src[create_branch_idx:create_branch_idx + 500] assert "_synthesize_chunk(" in create_block @patch("pipeline.stages._safe_parse_llm_response") @patch("pipeline.stages._make_llm_callback", return_value=lambda **kw: None) @patch("pipeline.stages.estimate_max_tokens", return_value=4000) @patch("pipeline.stages._load_prompt", return_value="compose system prompt") def test_compose_into_existing_calls_llm( self, mock_load_prompt, mock_estimate, mock_callback, mock_parse, ): """_compose_into_existing calls LLM with compose prompt and returns parsed result.""" from pipeline.schemas import SynthesisResult, SynthesizedPage from pipeline.stages import _compose_into_existing mock_llm = MagicMock() mock_llm.complete.return_value = "raw response" synth_page = SynthesizedPage( title="Merged Page", slug="merged-page", topic_category="Sound Design", summary="Merged", body_sections=[], signal_chains=[], plugins=[], source_quality="high", moment_indices=[0, 1], ) mock_parse.return_value = SynthesisResult(pages=[synth_page]) page = MockTechniquePage() existing_moments = [_moment(title="Old Moment")] new_moments = [(_moment(title="New Moment"), _cls_info())] result = _compose_into_existing( page, existing_moments, new_moments, "Sound Design", "COPYCATT", "system prompt", mock_llm, None, "text", 8000, str(uuid.uuid4()), None, ) # LLM was called mock_llm.complete.assert_called_once() # The compose prompt template was loaded mock_load_prompt.assert_called_once() call_args = mock_load_prompt.call_args assert "stage5_compose" in call_args[0][0] # Result has the expected page assert len(result.pages) == 1 assert result.pages[0].title == "Merged Page" # ── Tests for body_sections_format and TechniquePageVideo ──────────────────── class TestBodySectionsFormatAndTracking: """Tests for body_sections_format='v2' and TechniquePageVideo insertion.""" def test_body_sections_format_v2_set_on_page(self): """Verify the persist section sets body_sections_format='v2' on pages.""" # Read stages.py source and verify the assignment exists from pathlib import Path stages_src = Path("backend/pipeline/stages.py").read_text() # The line `page.body_sections_format = "v2"` must appear in the persist block assert 'page.body_sections_format = "v2"' in stages_src, ( "body_sections_format = 'v2' assignment not found in stages.py" ) def test_technique_page_video_pg_insert(self): """Verify TechniquePageVideo insertion uses pg_insert with on_conflict_do_nothing.""" from pathlib import Path stages_src = Path("backend/pipeline/stages.py").read_text() assert "pg_insert(TechniquePageVideo.__table__)" in stages_src, ( "pg_insert(TechniquePageVideo.__table__) not found in stages.py" ) assert "on_conflict_do_nothing()" in stages_src, ( "on_conflict_do_nothing() not found in stages.py" ) def test_technique_page_video_values(self): """Verify TechniquePageVideo INSERT includes technique_page_id and source_video_id.""" from pathlib import Path stages_src = Path("backend/pipeline/stages.py").read_text() # Find the pg_insert block idx = stages_src.index("pg_insert(TechniquePageVideo.__table__)") block = stages_src[idx:idx + 200] assert "technique_page_id" in block assert "source_video_id" in block # ── Tests for category case-insensitivity ──────────────────────────────────── class TestCategoryCaseInsensitive: """Verify the compose detection query uses func.lower for category matching.""" def test_compose_detection_uses_func_lower(self): """The compose detection query must use func.lower on both sides.""" from pathlib import Path stages_src = Path("backend/pipeline/stages.py").read_text() # Find the compose detection block — need enough window to capture the full query idx = stages_src.index("Compose-or-create detection") block = stages_src[idx:idx + 600] assert "func.lower(TechniquePage.topic_category)" in block assert "func.lower(category)" in block