"""Tests for compose-mode prompt building and validation. Covers prompt construction, citation re-indexing math, category filtering, and edge cases — no LLM calls required. """ from __future__ import annotations import json import pytest from pipeline.citation_utils import validate_citations from pipeline.schemas import BodySection, BodySubSection, SynthesizedPage from pipeline.test_harness import ( MockKeyMoment, _MockContentType, build_compose_prompt, build_moments_text, ) # ── Fixtures / helpers ─────────────────────────────────────────────────────── def _moment( title: str = "Test Moment", summary: str = "A moment.", content_type: str = "technique_demo", start_time: float = 0.0, end_time: float = 10.0, plugins: list[str] | None = None, raw_transcript: str | None = "Some transcript text", ) -> MockKeyMoment: return MockKeyMoment( title=title, summary=summary, content_type=_MockContentType(content_type), start_time=start_time, end_time=end_time, plugins=plugins or [], raw_transcript=raw_transcript or "", ) def _cls_info( category: str = "Sound Design", tags: list[str] | None = None, ) -> dict: return { "topic_category": category, "topic_tags": tags or ["reverb", "delay"], } def _make_page( title: str = "Reverb Techniques", slug: str = "reverb-techniques", category: str = "Sound Design", sections: list[BodySection] | None = None, moment_indices: list[int] | None = None, ) -> dict: """Build a SynthesizedPage dict (as it would appear in harness output).""" if sections is None: sections = [ BodySection( heading="Overview", content="Reverb is essential [0]. Basics of space [1].", subsections=[ BodySubSection( heading="Room Types", content="Rooms vary in character [2].", ) ], ) ] page = SynthesizedPage( title=title, slug=slug, topic_category=category, summary="A page about reverb.", body_sections=sections, moment_indices=moment_indices or [0, 1, 2], ) return json.loads(page.model_dump_json()) # ── TestBuildComposePrompt ────────────────────────────────────────────────── class TestBuildComposePrompt: """Verify prompt construction for compose mode.""" def test_prompt_contains_xml_tags(self): """Existing page + 3 old + 2 new → prompt has all required XML tags.""" existing_moments = [(_moment(title=f"Old {i}"), _cls_info()) for i in range(3)] new_moments = [(_moment(title=f"New {i}"), _cls_info()) for i in range(2)] page = _make_page(moment_indices=[0, 1, 2]) prompt = build_compose_prompt( existing_page=page, existing_moments=existing_moments, new_moments=new_moments, creator_name="DJ Test", ) assert "" in prompt assert "" in prompt assert "" in prompt assert "" in prompt assert "" in prompt assert "" in prompt assert "" in prompt assert "" in prompt def test_old_moments_indexed_0_to_n(self): """3 old moments are indexed [0], [1], [2].""" existing_moments = [(_moment(title=f"Old {i}"), _cls_info()) for i in range(3)] new_moments = [(_moment(title=f"New {i}"), _cls_info()) for i in range(2)] page = _make_page() prompt = build_compose_prompt( existing_page=page, existing_moments=existing_moments, new_moments=new_moments, creator_name="DJ Test", ) # Old moments section uses [0], [1], [2] existing_block = prompt.split("")[1].split("")[0] assert "[0] Title:" in existing_block assert "[1] Title:" in existing_block assert "[2] Title:" in existing_block def test_new_moments_indexed_n_to_n_plus_m(self): """2 new moments after 3 old → indexed [3] and [4].""" existing_moments = [(_moment(title=f"Old {i}"), _cls_info()) for i in range(3)] new_moments = [(_moment(title=f"New {i}"), _cls_info()) for i in range(2)] page = _make_page() prompt = build_compose_prompt( existing_page=page, existing_moments=existing_moments, new_moments=new_moments, creator_name="DJ Test", ) new_block = prompt.split("")[1].split("")[0] assert "[3] Title:" in new_block assert "[4] Title:" in new_block # Should NOT contain [0]-[2] in new moments block assert "[0] Title:" not in new_block def test_creator_name_in_prompt(self): page = _make_page() prompt = build_compose_prompt( existing_page=page, existing_moments=[(_moment(), _cls_info())], new_moments=[(_moment(), _cls_info())], creator_name="Keota", ) assert "Keota" in prompt def test_existing_page_json_valid(self): """Existing page JSON in the prompt is valid and parseable.""" page = _make_page() prompt = build_compose_prompt( existing_page=page, existing_moments=[(_moment(), _cls_info())], new_moments=[(_moment(), _cls_info())], creator_name="Test", ) page_block = prompt.split("")[1].split("")[0].strip() parsed = json.loads(page_block) assert parsed["title"] == "Reverb Techniques" assert parsed["slug"] == "reverb-techniques" def test_moment_format_matches_build_moments_text(self): """Existing moments format matches build_moments_text output.""" moments = [ (_moment(title="Delay Basics", plugins=["Valhalla"]), _cls_info(tags=["delay"])), ] page = _make_page() prompt = build_compose_prompt( existing_page=page, existing_moments=moments, new_moments=[(_moment(), _cls_info())], creator_name="Test", ) # build_moments_text produces the same format for existing moments expected_text, _ = build_moments_text(moments, "Sound Design") existing_block = prompt.split("")[1].split("")[0].strip() assert expected_text.strip() == existing_block # ── TestCitationReindexing ────────────────────────────────────────────────── class TestCitationReindexing: """Verify citation index math for compose mode.""" def test_5_old_3_new_valid_range(self): """5 old + 3 new → valid range is [0]-[7], moment_count=8.""" # Build content that references all 8 indices sections = [ BodySection( heading="Section", content="Refs [0] [1] [2] [3] [4] [5] [6] [7].", ) ] result = validate_citations(sections, moment_count=8) assert result["valid"] is True assert result["total_citations"] == 8 assert result["invalid_indices"] == [] def test_accepts_citations_in_valid_range(self): """validate_citations with moment_count=8 accepts [0]-[7].""" sections = [ BodySection( heading="S1", content="See [0] and [3] and [7].", subsections=[ BodySubSection(heading="Sub", content="Also [1] [2] [4] [5] [6].") ], ) ] result = validate_citations(sections, moment_count=8) assert result["valid"] is True assert result["invalid_indices"] == [] def test_rejects_out_of_range_citation(self): """validate_citations with moment_count=8 rejects [8].""" sections = [ BodySection( heading="S1", content="Bad ref [8] and valid [0].", ) ] result = validate_citations(sections, moment_count=8) assert result["valid"] is False assert 8 in result["invalid_indices"] def test_compose_offset_arithmetic(self): """Verify the offset math: N existing → new moments start at [N].""" n_existing = 5 n_new = 3 existing = [(_moment(title=f"E{i}"), _cls_info()) for i in range(n_existing)] new = [(_moment(title=f"N{i}"), _cls_info()) for i in range(n_new)] page = _make_page() prompt = build_compose_prompt( existing_page=page, existing_moments=existing, new_moments=new, creator_name="Test", ) new_block = prompt.split("")[1].split("")[0] # First new moment should be [5], last should be [7] assert "[5] Title:" in new_block assert "[6] Title:" in new_block assert "[7] Title:" in new_block assert "[4] Title:" not in new_block # last old moment, not in new block # ── TestCategoryFiltering ─────────────────────────────────────────────────── class TestCategoryFiltering: """Verify that compose filters moments by category to match existing page.""" def test_only_matching_category_moments_used(self): """Moments from category B are excluded when composing a category A page.""" page = _make_page(category="Sound Design") existing = [(_moment(title="E0"), _cls_info(category="Sound Design"))] # Mix of matching and non-matching new moments new_sound = [(_moment(title="New SD"), _cls_info(category="Sound Design"))] new_mixing = [(_moment(title="New Mix"), _cls_info(category="Mixing"))] # build_compose_prompt doesn't filter by category — that's run_compose's job. # But we can verify the prompt only contains what we pass in. prompt = build_compose_prompt( existing_page=page, existing_moments=existing, new_moments=new_sound, # Only Sound Design moments creator_name="Test", ) new_block = prompt.split("")[1].split("")[0] assert "New SD" in new_block assert "New Mix" not in new_block def test_category_from_page_used_in_moments_text(self): """The page's topic_category is used in the moment formatting.""" page = _make_page(category="Mixing") existing = [(_moment(), _cls_info(category="Mixing"))] new = [(_moment(), _cls_info(category="Mixing"))] prompt = build_compose_prompt( existing_page=page, existing_moments=existing, new_moments=new, creator_name="Test", ) # The category in the formatted moments comes from the page's topic_category assert "Category: Mixing" in prompt # ── TestEdgeCases ────────────────────────────────────────────────────────── class TestEdgeCases: """Edge cases for compose prompt construction.""" def test_empty_new_moments(self): """Empty new moments → prompt still valid with empty new_moments block.""" page = _make_page() existing = [(_moment(), _cls_info())] prompt = build_compose_prompt( existing_page=page, existing_moments=existing, new_moments=[], creator_name="Test", ) assert "" in prompt assert "" in prompt # Existing moments still present assert "[0] Title:" in prompt def test_single_new_moment_at_offset_n(self): """Single new moment after 2 existing → indexed [2].""" existing = [(_moment(title=f"E{i}"), _cls_info()) for i in range(2)] new = [(_moment(title="Single New"), _cls_info())] page = _make_page() prompt = build_compose_prompt( existing_page=page, existing_moments=existing, new_moments=new, creator_name="Test", ) new_block = prompt.split("")[1].split("")[0] assert "[2] Title: Single New" in new_block def test_existing_page_no_subsections(self): """Page with sections but no subsections → handled correctly.""" sections = [ BodySection(heading="Flat Section", content="Content [0]."), ] page = _make_page(sections=sections, moment_indices=[0]) existing = [(_moment(), _cls_info())] new = [(_moment(title="New One"), _cls_info())] prompt = build_compose_prompt( existing_page=page, existing_moments=existing, new_moments=new, creator_name="Test", ) page_block = prompt.split("")[1].split("")[0].strip() parsed = json.loads(page_block) assert len(parsed["body_sections"]) == 1 assert parsed["body_sections"][0]["subsections"] == [] def test_large_offset_indices(self): """10 existing + 5 new → new moments indexed [10]-[14].""" existing = [(_moment(title=f"E{i}"), _cls_info()) for i in range(10)] new = [(_moment(title=f"N{i}"), _cls_info()) for i in range(5)] page = _make_page() prompt = build_compose_prompt( existing_page=page, existing_moments=existing, new_moments=new, creator_name="Test", ) new_block = prompt.split("")[1].split("")[0] assert "[10] Title:" in new_block assert "[14] Title:" in new_block assert "[9] Title:" not in new_block # last existing, not in new block