chrysopedia/backend/pipeline/test_harness_compose.py
jlightner 5cd7db8938 test: 16 unit tests covering compose prompt XML structure, citation off…
- "backend/pipeline/test_harness_compose.py"
- ".gsd/milestones/M014/slices/S02/tasks/T03-SUMMARY.md"

GSD-Task: S02/T03
2026-04-03 01:08:41 +00:00

389 lines
14 KiB
Python

"""Tests for compose-mode prompt building and validation.
Covers prompt construction, citation re-indexing math, category filtering,
and edge cases — no LLM calls required.
"""
from __future__ import annotations
import json
import pytest
from pipeline.citation_utils import validate_citations
from pipeline.schemas import BodySection, BodySubSection, SynthesizedPage
from pipeline.test_harness import (
MockKeyMoment,
_MockContentType,
build_compose_prompt,
build_moments_text,
)
# ── Fixtures / helpers ───────────────────────────────────────────────────────
def _moment(
title: str = "Test Moment",
summary: str = "A moment.",
content_type: str = "technique_demo",
start_time: float = 0.0,
end_time: float = 10.0,
plugins: list[str] | None = None,
raw_transcript: str | None = "Some transcript text",
) -> MockKeyMoment:
return MockKeyMoment(
title=title,
summary=summary,
content_type=_MockContentType(content_type),
start_time=start_time,
end_time=end_time,
plugins=plugins or [],
raw_transcript=raw_transcript or "",
)
def _cls_info(
category: str = "Sound Design",
tags: list[str] | None = None,
) -> dict:
return {
"topic_category": category,
"topic_tags": tags or ["reverb", "delay"],
}
def _make_page(
title: str = "Reverb Techniques",
slug: str = "reverb-techniques",
category: str = "Sound Design",
sections: list[BodySection] | None = None,
moment_indices: list[int] | None = None,
) -> dict:
"""Build a SynthesizedPage dict (as it would appear in harness output)."""
if sections is None:
sections = [
BodySection(
heading="Overview",
content="Reverb is essential [0]. Basics of space [1].",
subsections=[
BodySubSection(
heading="Room Types",
content="Rooms vary in character [2].",
)
],
)
]
page = SynthesizedPage(
title=title,
slug=slug,
topic_category=category,
summary="A page about reverb.",
body_sections=sections,
moment_indices=moment_indices or [0, 1, 2],
)
return json.loads(page.model_dump_json())
# ── TestBuildComposePrompt ──────────────────────────────────────────────────
class TestBuildComposePrompt:
"""Verify prompt construction for compose mode."""
def test_prompt_contains_xml_tags(self):
"""Existing page + 3 old + 2 new → prompt has all required XML tags."""
existing_moments = [(_moment(title=f"Old {i}"), _cls_info()) for i in range(3)]
new_moments = [(_moment(title=f"New {i}"), _cls_info()) for i in range(2)]
page = _make_page(moment_indices=[0, 1, 2])
prompt = build_compose_prompt(
existing_page=page,
existing_moments=existing_moments,
new_moments=new_moments,
creator_name="DJ Test",
)
assert "<existing_page>" in prompt
assert "</existing_page>" in prompt
assert "<existing_moments>" in prompt
assert "</existing_moments>" in prompt
assert "<new_moments>" in prompt
assert "</new_moments>" in prompt
assert "<creator>" in prompt
assert "</creator>" in prompt
def test_old_moments_indexed_0_to_n(self):
"""3 old moments are indexed [0], [1], [2]."""
existing_moments = [(_moment(title=f"Old {i}"), _cls_info()) for i in range(3)]
new_moments = [(_moment(title=f"New {i}"), _cls_info()) for i in range(2)]
page = _make_page()
prompt = build_compose_prompt(
existing_page=page,
existing_moments=existing_moments,
new_moments=new_moments,
creator_name="DJ Test",
)
# Old moments section uses [0], [1], [2]
existing_block = prompt.split("<existing_moments>")[1].split("</existing_moments>")[0]
assert "[0] Title:" in existing_block
assert "[1] Title:" in existing_block
assert "[2] Title:" in existing_block
def test_new_moments_indexed_n_to_n_plus_m(self):
"""2 new moments after 3 old → indexed [3] and [4]."""
existing_moments = [(_moment(title=f"Old {i}"), _cls_info()) for i in range(3)]
new_moments = [(_moment(title=f"New {i}"), _cls_info()) for i in range(2)]
page = _make_page()
prompt = build_compose_prompt(
existing_page=page,
existing_moments=existing_moments,
new_moments=new_moments,
creator_name="DJ Test",
)
new_block = prompt.split("<new_moments>")[1].split("</new_moments>")[0]
assert "[3] Title:" in new_block
assert "[4] Title:" in new_block
# Should NOT contain [0]-[2] in new moments block
assert "[0] Title:" not in new_block
def test_creator_name_in_prompt(self):
page = _make_page()
prompt = build_compose_prompt(
existing_page=page,
existing_moments=[(_moment(), _cls_info())],
new_moments=[(_moment(), _cls_info())],
creator_name="Keota",
)
assert "<creator>Keota</creator>" in prompt
def test_existing_page_json_valid(self):
"""Existing page JSON in the prompt is valid and parseable."""
page = _make_page()
prompt = build_compose_prompt(
existing_page=page,
existing_moments=[(_moment(), _cls_info())],
new_moments=[(_moment(), _cls_info())],
creator_name="Test",
)
page_block = prompt.split("<existing_page>")[1].split("</existing_page>")[0].strip()
parsed = json.loads(page_block)
assert parsed["title"] == "Reverb Techniques"
assert parsed["slug"] == "reverb-techniques"
def test_moment_format_matches_build_moments_text(self):
"""Existing moments format matches build_moments_text output."""
moments = [
(_moment(title="Delay Basics", plugins=["Valhalla"]), _cls_info(tags=["delay"])),
]
page = _make_page()
prompt = build_compose_prompt(
existing_page=page,
existing_moments=moments,
new_moments=[(_moment(), _cls_info())],
creator_name="Test",
)
# build_moments_text produces the same format for existing moments
expected_text, _ = build_moments_text(moments, "Sound Design")
existing_block = prompt.split("<existing_moments>")[1].split("</existing_moments>")[0].strip()
assert expected_text.strip() == existing_block
# ── TestCitationReindexing ──────────────────────────────────────────────────
class TestCitationReindexing:
"""Verify citation index math for compose mode."""
def test_5_old_3_new_valid_range(self):
"""5 old + 3 new → valid range is [0]-[7], moment_count=8."""
# Build content that references all 8 indices
sections = [
BodySection(
heading="Section",
content="Refs [0] [1] [2] [3] [4] [5] [6] [7].",
)
]
result = validate_citations(sections, moment_count=8)
assert result["valid"] is True
assert result["total_citations"] == 8
assert result["invalid_indices"] == []
def test_accepts_citations_in_valid_range(self):
"""validate_citations with moment_count=8 accepts [0]-[7]."""
sections = [
BodySection(
heading="S1",
content="See [0] and [3] and [7].",
subsections=[
BodySubSection(heading="Sub", content="Also [1] [2] [4] [5] [6].")
],
)
]
result = validate_citations(sections, moment_count=8)
assert result["valid"] is True
assert result["invalid_indices"] == []
def test_rejects_out_of_range_citation(self):
"""validate_citations with moment_count=8 rejects [8]."""
sections = [
BodySection(
heading="S1",
content="Bad ref [8] and valid [0].",
)
]
result = validate_citations(sections, moment_count=8)
assert result["valid"] is False
assert 8 in result["invalid_indices"]
def test_compose_offset_arithmetic(self):
"""Verify the offset math: N existing → new moments start at [N]."""
n_existing = 5
n_new = 3
existing = [(_moment(title=f"E{i}"), _cls_info()) for i in range(n_existing)]
new = [(_moment(title=f"N{i}"), _cls_info()) for i in range(n_new)]
page = _make_page()
prompt = build_compose_prompt(
existing_page=page,
existing_moments=existing,
new_moments=new,
creator_name="Test",
)
new_block = prompt.split("<new_moments>")[1].split("</new_moments>")[0]
# First new moment should be [5], last should be [7]
assert "[5] Title:" in new_block
assert "[6] Title:" in new_block
assert "[7] Title:" in new_block
assert "[4] Title:" not in new_block # last old moment, not in new block
# ── TestCategoryFiltering ───────────────────────────────────────────────────
class TestCategoryFiltering:
"""Verify that compose filters moments by category to match existing page."""
def test_only_matching_category_moments_used(self):
"""Moments from category B are excluded when composing a category A page."""
page = _make_page(category="Sound Design")
existing = [(_moment(title="E0"), _cls_info(category="Sound Design"))]
# Mix of matching and non-matching new moments
new_sound = [(_moment(title="New SD"), _cls_info(category="Sound Design"))]
new_mixing = [(_moment(title="New Mix"), _cls_info(category="Mixing"))]
# build_compose_prompt doesn't filter by category — that's run_compose's job.
# But we can verify the prompt only contains what we pass in.
prompt = build_compose_prompt(
existing_page=page,
existing_moments=existing,
new_moments=new_sound, # Only Sound Design moments
creator_name="Test",
)
new_block = prompt.split("<new_moments>")[1].split("</new_moments>")[0]
assert "New SD" in new_block
assert "New Mix" not in new_block
def test_category_from_page_used_in_moments_text(self):
"""The page's topic_category is used in the moment formatting."""
page = _make_page(category="Mixing")
existing = [(_moment(), _cls_info(category="Mixing"))]
new = [(_moment(), _cls_info(category="Mixing"))]
prompt = build_compose_prompt(
existing_page=page,
existing_moments=existing,
new_moments=new,
creator_name="Test",
)
# The category in the formatted moments comes from the page's topic_category
assert "Category: Mixing" in prompt
# ── TestEdgeCases ──────────────────────────────────────────────────────────
class TestEdgeCases:
"""Edge cases for compose prompt construction."""
def test_empty_new_moments(self):
"""Empty new moments → prompt still valid with empty new_moments block."""
page = _make_page()
existing = [(_moment(), _cls_info())]
prompt = build_compose_prompt(
existing_page=page,
existing_moments=existing,
new_moments=[],
creator_name="Test",
)
assert "<new_moments>" in prompt
assert "</new_moments>" in prompt
# Existing moments still present
assert "[0] Title:" in prompt
def test_single_new_moment_at_offset_n(self):
"""Single new moment after 2 existing → indexed [2]."""
existing = [(_moment(title=f"E{i}"), _cls_info()) for i in range(2)]
new = [(_moment(title="Single New"), _cls_info())]
page = _make_page()
prompt = build_compose_prompt(
existing_page=page,
existing_moments=existing,
new_moments=new,
creator_name="Test",
)
new_block = prompt.split("<new_moments>")[1].split("</new_moments>")[0]
assert "[2] Title: Single New" in new_block
def test_existing_page_no_subsections(self):
"""Page with sections but no subsections → handled correctly."""
sections = [
BodySection(heading="Flat Section", content="Content [0]."),
]
page = _make_page(sections=sections, moment_indices=[0])
existing = [(_moment(), _cls_info())]
new = [(_moment(title="New One"), _cls_info())]
prompt = build_compose_prompt(
existing_page=page,
existing_moments=existing,
new_moments=new,
creator_name="Test",
)
page_block = prompt.split("<existing_page>")[1].split("</existing_page>")[0].strip()
parsed = json.loads(page_block)
assert len(parsed["body_sections"]) == 1
assert parsed["body_sections"][0]["subsections"] == []
def test_large_offset_indices(self):
"""10 existing + 5 new → new moments indexed [10]-[14]."""
existing = [(_moment(title=f"E{i}"), _cls_info()) for i in range(10)]
new = [(_moment(title=f"N{i}"), _cls_info()) for i in range(5)]
page = _make_page()
prompt = build_compose_prompt(
existing_page=page,
existing_moments=existing,
new_moments=new,
creator_name="Test",
)
new_block = prompt.split("<new_moments>")[1].split("</new_moments>")[0]
assert "[10] Title:" in new_block
assert "[14] Title:" in new_block
assert "[9] Title:" not in new_block # last existing, not in new block