- "backend/pipeline/test_harness_compose.py" - ".gsd/milestones/M014/slices/S02/tasks/T03-SUMMARY.md" GSD-Task: S02/T03
389 lines
14 KiB
Python
389 lines
14 KiB
Python
"""Tests for compose-mode prompt building and validation.
|
|
|
|
Covers prompt construction, citation re-indexing math, category filtering,
|
|
and edge cases — no LLM calls required.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
|
|
import pytest
|
|
|
|
from pipeline.citation_utils import validate_citations
|
|
from pipeline.schemas import BodySection, BodySubSection, SynthesizedPage
|
|
from pipeline.test_harness import (
|
|
MockKeyMoment,
|
|
_MockContentType,
|
|
build_compose_prompt,
|
|
build_moments_text,
|
|
)
|
|
|
|
|
|
# ── Fixtures / helpers ───────────────────────────────────────────────────────
|
|
|
|
|
|
def _moment(
|
|
title: str = "Test Moment",
|
|
summary: str = "A moment.",
|
|
content_type: str = "technique_demo",
|
|
start_time: float = 0.0,
|
|
end_time: float = 10.0,
|
|
plugins: list[str] | None = None,
|
|
raw_transcript: str | None = "Some transcript text",
|
|
) -> MockKeyMoment:
|
|
return MockKeyMoment(
|
|
title=title,
|
|
summary=summary,
|
|
content_type=_MockContentType(content_type),
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
plugins=plugins or [],
|
|
raw_transcript=raw_transcript or "",
|
|
)
|
|
|
|
|
|
def _cls_info(
|
|
category: str = "Sound Design",
|
|
tags: list[str] | None = None,
|
|
) -> dict:
|
|
return {
|
|
"topic_category": category,
|
|
"topic_tags": tags or ["reverb", "delay"],
|
|
}
|
|
|
|
|
|
def _make_page(
|
|
title: str = "Reverb Techniques",
|
|
slug: str = "reverb-techniques",
|
|
category: str = "Sound Design",
|
|
sections: list[BodySection] | None = None,
|
|
moment_indices: list[int] | None = None,
|
|
) -> dict:
|
|
"""Build a SynthesizedPage dict (as it would appear in harness output)."""
|
|
if sections is None:
|
|
sections = [
|
|
BodySection(
|
|
heading="Overview",
|
|
content="Reverb is essential [0]. Basics of space [1].",
|
|
subsections=[
|
|
BodySubSection(
|
|
heading="Room Types",
|
|
content="Rooms vary in character [2].",
|
|
)
|
|
],
|
|
)
|
|
]
|
|
page = SynthesizedPage(
|
|
title=title,
|
|
slug=slug,
|
|
topic_category=category,
|
|
summary="A page about reverb.",
|
|
body_sections=sections,
|
|
moment_indices=moment_indices or [0, 1, 2],
|
|
)
|
|
return json.loads(page.model_dump_json())
|
|
|
|
|
|
# ── TestBuildComposePrompt ──────────────────────────────────────────────────
|
|
|
|
|
|
class TestBuildComposePrompt:
|
|
"""Verify prompt construction for compose mode."""
|
|
|
|
def test_prompt_contains_xml_tags(self):
|
|
"""Existing page + 3 old + 2 new → prompt has all required XML tags."""
|
|
existing_moments = [(_moment(title=f"Old {i}"), _cls_info()) for i in range(3)]
|
|
new_moments = [(_moment(title=f"New {i}"), _cls_info()) for i in range(2)]
|
|
page = _make_page(moment_indices=[0, 1, 2])
|
|
|
|
prompt = build_compose_prompt(
|
|
existing_page=page,
|
|
existing_moments=existing_moments,
|
|
new_moments=new_moments,
|
|
creator_name="DJ Test",
|
|
)
|
|
|
|
assert "<existing_page>" in prompt
|
|
assert "</existing_page>" in prompt
|
|
assert "<existing_moments>" in prompt
|
|
assert "</existing_moments>" in prompt
|
|
assert "<new_moments>" in prompt
|
|
assert "</new_moments>" in prompt
|
|
assert "<creator>" in prompt
|
|
assert "</creator>" in prompt
|
|
|
|
def test_old_moments_indexed_0_to_n(self):
|
|
"""3 old moments are indexed [0], [1], [2]."""
|
|
existing_moments = [(_moment(title=f"Old {i}"), _cls_info()) for i in range(3)]
|
|
new_moments = [(_moment(title=f"New {i}"), _cls_info()) for i in range(2)]
|
|
page = _make_page()
|
|
|
|
prompt = build_compose_prompt(
|
|
existing_page=page,
|
|
existing_moments=existing_moments,
|
|
new_moments=new_moments,
|
|
creator_name="DJ Test",
|
|
)
|
|
|
|
# Old moments section uses [0], [1], [2]
|
|
existing_block = prompt.split("<existing_moments>")[1].split("</existing_moments>")[0]
|
|
assert "[0] Title:" in existing_block
|
|
assert "[1] Title:" in existing_block
|
|
assert "[2] Title:" in existing_block
|
|
|
|
def test_new_moments_indexed_n_to_n_plus_m(self):
|
|
"""2 new moments after 3 old → indexed [3] and [4]."""
|
|
existing_moments = [(_moment(title=f"Old {i}"), _cls_info()) for i in range(3)]
|
|
new_moments = [(_moment(title=f"New {i}"), _cls_info()) for i in range(2)]
|
|
page = _make_page()
|
|
|
|
prompt = build_compose_prompt(
|
|
existing_page=page,
|
|
existing_moments=existing_moments,
|
|
new_moments=new_moments,
|
|
creator_name="DJ Test",
|
|
)
|
|
|
|
new_block = prompt.split("<new_moments>")[1].split("</new_moments>")[0]
|
|
assert "[3] Title:" in new_block
|
|
assert "[4] Title:" in new_block
|
|
# Should NOT contain [0]-[2] in new moments block
|
|
assert "[0] Title:" not in new_block
|
|
|
|
def test_creator_name_in_prompt(self):
|
|
page = _make_page()
|
|
prompt = build_compose_prompt(
|
|
existing_page=page,
|
|
existing_moments=[(_moment(), _cls_info())],
|
|
new_moments=[(_moment(), _cls_info())],
|
|
creator_name="Keota",
|
|
)
|
|
assert "<creator>Keota</creator>" in prompt
|
|
|
|
def test_existing_page_json_valid(self):
|
|
"""Existing page JSON in the prompt is valid and parseable."""
|
|
page = _make_page()
|
|
prompt = build_compose_prompt(
|
|
existing_page=page,
|
|
existing_moments=[(_moment(), _cls_info())],
|
|
new_moments=[(_moment(), _cls_info())],
|
|
creator_name="Test",
|
|
)
|
|
page_block = prompt.split("<existing_page>")[1].split("</existing_page>")[0].strip()
|
|
parsed = json.loads(page_block)
|
|
assert parsed["title"] == "Reverb Techniques"
|
|
assert parsed["slug"] == "reverb-techniques"
|
|
|
|
def test_moment_format_matches_build_moments_text(self):
|
|
"""Existing moments format matches build_moments_text output."""
|
|
moments = [
|
|
(_moment(title="Delay Basics", plugins=["Valhalla"]), _cls_info(tags=["delay"])),
|
|
]
|
|
page = _make_page()
|
|
|
|
prompt = build_compose_prompt(
|
|
existing_page=page,
|
|
existing_moments=moments,
|
|
new_moments=[(_moment(), _cls_info())],
|
|
creator_name="Test",
|
|
)
|
|
|
|
# build_moments_text produces the same format for existing moments
|
|
expected_text, _ = build_moments_text(moments, "Sound Design")
|
|
existing_block = prompt.split("<existing_moments>")[1].split("</existing_moments>")[0].strip()
|
|
assert expected_text.strip() == existing_block
|
|
|
|
|
|
# ── TestCitationReindexing ──────────────────────────────────────────────────
|
|
|
|
|
|
class TestCitationReindexing:
|
|
"""Verify citation index math for compose mode."""
|
|
|
|
def test_5_old_3_new_valid_range(self):
|
|
"""5 old + 3 new → valid range is [0]-[7], moment_count=8."""
|
|
# Build content that references all 8 indices
|
|
sections = [
|
|
BodySection(
|
|
heading="Section",
|
|
content="Refs [0] [1] [2] [3] [4] [5] [6] [7].",
|
|
)
|
|
]
|
|
result = validate_citations(sections, moment_count=8)
|
|
assert result["valid"] is True
|
|
assert result["total_citations"] == 8
|
|
assert result["invalid_indices"] == []
|
|
|
|
def test_accepts_citations_in_valid_range(self):
|
|
"""validate_citations with moment_count=8 accepts [0]-[7]."""
|
|
sections = [
|
|
BodySection(
|
|
heading="S1",
|
|
content="See [0] and [3] and [7].",
|
|
subsections=[
|
|
BodySubSection(heading="Sub", content="Also [1] [2] [4] [5] [6].")
|
|
],
|
|
)
|
|
]
|
|
result = validate_citations(sections, moment_count=8)
|
|
assert result["valid"] is True
|
|
assert result["invalid_indices"] == []
|
|
|
|
def test_rejects_out_of_range_citation(self):
|
|
"""validate_citations with moment_count=8 rejects [8]."""
|
|
sections = [
|
|
BodySection(
|
|
heading="S1",
|
|
content="Bad ref [8] and valid [0].",
|
|
)
|
|
]
|
|
result = validate_citations(sections, moment_count=8)
|
|
assert result["valid"] is False
|
|
assert 8 in result["invalid_indices"]
|
|
|
|
def test_compose_offset_arithmetic(self):
|
|
"""Verify the offset math: N existing → new moments start at [N]."""
|
|
n_existing = 5
|
|
n_new = 3
|
|
existing = [(_moment(title=f"E{i}"), _cls_info()) for i in range(n_existing)]
|
|
new = [(_moment(title=f"N{i}"), _cls_info()) for i in range(n_new)]
|
|
page = _make_page()
|
|
|
|
prompt = build_compose_prompt(
|
|
existing_page=page,
|
|
existing_moments=existing,
|
|
new_moments=new,
|
|
creator_name="Test",
|
|
)
|
|
|
|
new_block = prompt.split("<new_moments>")[1].split("</new_moments>")[0]
|
|
# First new moment should be [5], last should be [7]
|
|
assert "[5] Title:" in new_block
|
|
assert "[6] Title:" in new_block
|
|
assert "[7] Title:" in new_block
|
|
assert "[4] Title:" not in new_block # last old moment, not in new block
|
|
|
|
|
|
# ── TestCategoryFiltering ───────────────────────────────────────────────────
|
|
|
|
|
|
class TestCategoryFiltering:
|
|
"""Verify that compose filters moments by category to match existing page."""
|
|
|
|
def test_only_matching_category_moments_used(self):
|
|
"""Moments from category B are excluded when composing a category A page."""
|
|
page = _make_page(category="Sound Design")
|
|
existing = [(_moment(title="E0"), _cls_info(category="Sound Design"))]
|
|
|
|
# Mix of matching and non-matching new moments
|
|
new_sound = [(_moment(title="New SD"), _cls_info(category="Sound Design"))]
|
|
new_mixing = [(_moment(title="New Mix"), _cls_info(category="Mixing"))]
|
|
|
|
# build_compose_prompt doesn't filter by category — that's run_compose's job.
|
|
# But we can verify the prompt only contains what we pass in.
|
|
prompt = build_compose_prompt(
|
|
existing_page=page,
|
|
existing_moments=existing,
|
|
new_moments=new_sound, # Only Sound Design moments
|
|
creator_name="Test",
|
|
)
|
|
|
|
new_block = prompt.split("<new_moments>")[1].split("</new_moments>")[0]
|
|
assert "New SD" in new_block
|
|
assert "New Mix" not in new_block
|
|
|
|
def test_category_from_page_used_in_moments_text(self):
|
|
"""The page's topic_category is used in the moment formatting."""
|
|
page = _make_page(category="Mixing")
|
|
existing = [(_moment(), _cls_info(category="Mixing"))]
|
|
new = [(_moment(), _cls_info(category="Mixing"))]
|
|
|
|
prompt = build_compose_prompt(
|
|
existing_page=page,
|
|
existing_moments=existing,
|
|
new_moments=new,
|
|
creator_name="Test",
|
|
)
|
|
|
|
# The category in the formatted moments comes from the page's topic_category
|
|
assert "Category: Mixing" in prompt
|
|
|
|
|
|
# ── TestEdgeCases ──────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestEdgeCases:
|
|
"""Edge cases for compose prompt construction."""
|
|
|
|
def test_empty_new_moments(self):
|
|
"""Empty new moments → prompt still valid with empty new_moments block."""
|
|
page = _make_page()
|
|
existing = [(_moment(), _cls_info())]
|
|
|
|
prompt = build_compose_prompt(
|
|
existing_page=page,
|
|
existing_moments=existing,
|
|
new_moments=[],
|
|
creator_name="Test",
|
|
)
|
|
|
|
assert "<new_moments>" in prompt
|
|
assert "</new_moments>" in prompt
|
|
# Existing moments still present
|
|
assert "[0] Title:" in prompt
|
|
|
|
def test_single_new_moment_at_offset_n(self):
|
|
"""Single new moment after 2 existing → indexed [2]."""
|
|
existing = [(_moment(title=f"E{i}"), _cls_info()) for i in range(2)]
|
|
new = [(_moment(title="Single New"), _cls_info())]
|
|
page = _make_page()
|
|
|
|
prompt = build_compose_prompt(
|
|
existing_page=page,
|
|
existing_moments=existing,
|
|
new_moments=new,
|
|
creator_name="Test",
|
|
)
|
|
|
|
new_block = prompt.split("<new_moments>")[1].split("</new_moments>")[0]
|
|
assert "[2] Title: Single New" in new_block
|
|
|
|
def test_existing_page_no_subsections(self):
|
|
"""Page with sections but no subsections → handled correctly."""
|
|
sections = [
|
|
BodySection(heading="Flat Section", content="Content [0]."),
|
|
]
|
|
page = _make_page(sections=sections, moment_indices=[0])
|
|
existing = [(_moment(), _cls_info())]
|
|
new = [(_moment(title="New One"), _cls_info())]
|
|
|
|
prompt = build_compose_prompt(
|
|
existing_page=page,
|
|
existing_moments=existing,
|
|
new_moments=new,
|
|
creator_name="Test",
|
|
)
|
|
|
|
page_block = prompt.split("<existing_page>")[1].split("</existing_page>")[0].strip()
|
|
parsed = json.loads(page_block)
|
|
assert len(parsed["body_sections"]) == 1
|
|
assert parsed["body_sections"][0]["subsections"] == []
|
|
|
|
def test_large_offset_indices(self):
|
|
"""10 existing + 5 new → new moments indexed [10]-[14]."""
|
|
existing = [(_moment(title=f"E{i}"), _cls_info()) for i in range(10)]
|
|
new = [(_moment(title=f"N{i}"), _cls_info()) for i in range(5)]
|
|
page = _make_page()
|
|
|
|
prompt = build_compose_prompt(
|
|
existing_page=page,
|
|
existing_moments=existing,
|
|
new_moments=new,
|
|
creator_name="Test",
|
|
)
|
|
|
|
new_block = prompt.split("<new_moments>")[1].split("</new_moments>")[0]
|
|
assert "[10] Title:" in new_block
|
|
assert "[14] Title:" in new_block
|
|
assert "[9] Title:" not in new_block # last existing, not in new block
|