diff --git a/.gsd/milestones/M014/slices/S04/S04-PLAN.md b/.gsd/milestones/M014/slices/S04/S04-PLAN.md
index 67f743b..5b54c93 100644
--- a/.gsd/milestones/M014/slices/S04/S04-PLAN.md
+++ b/.gsd/milestones/M014/slices/S04/S04-PLAN.md
@@ -86,7 +86,7 @@
- Estimate: 1.5h
- Files: backend/pipeline/stages.py
- Verify: cd /home/aux/projects/content-to-kb-automator && python -c "from pipeline.stages import _build_compose_user_prompt, _compose_into_existing; print('imports OK')" && grep -q 'body_sections_format' backend/pipeline/stages.py && grep -q 'TechniquePageVideo' backend/pipeline/stages.py && grep -q 'stage5_compose' backend/pipeline/stages.py
-- [ ] **T02: Write unit tests for compose pipeline logic** — Create test_compose_pipeline.py covering compose prompt construction, compose-or-create branching, TechniquePageVideo insertion, and body_sections_format setting.
+- [x] **T02: Added 12 unit tests covering compose prompt construction, branching logic, body_sections_format, TechniquePageVideo insertion, and case-insensitive category matching** — Create test_compose_pipeline.py covering compose prompt construction, compose-or-create branching, TechniquePageVideo insertion, and body_sections_format setting.
## Steps
diff --git a/.gsd/milestones/M014/slices/S04/tasks/T01-VERIFY.json b/.gsd/milestones/M014/slices/S04/tasks/T01-VERIFY.json
new file mode 100644
index 0000000..974e268
--- /dev/null
+++ b/.gsd/milestones/M014/slices/S04/tasks/T01-VERIFY.json
@@ -0,0 +1,34 @@
+{
+ "schemaVersion": 1,
+ "taskId": "T01",
+ "unitId": "M014/S04/T01",
+ "timestamp": 1775179761760,
+ "passed": true,
+ "discoverySource": "task-plan",
+ "checks": [
+ {
+ "command": "cd /home/aux/projects/content-to-kb-automator",
+ "exitCode": 0,
+ "durationMs": 4,
+ "verdict": "pass"
+ },
+ {
+ "command": "grep -q 'body_sections_format' backend/pipeline/stages.py",
+ "exitCode": 0,
+ "durationMs": 5,
+ "verdict": "pass"
+ },
+ {
+ "command": "grep -q 'TechniquePageVideo' backend/pipeline/stages.py",
+ "exitCode": 0,
+ "durationMs": 5,
+ "verdict": "pass"
+ },
+ {
+ "command": "grep -q 'stage5_compose' backend/pipeline/stages.py",
+ "exitCode": 0,
+ "durationMs": 5,
+ "verdict": "pass"
+ }
+ ]
+}
diff --git a/.gsd/milestones/M014/slices/S04/tasks/T02-SUMMARY.md b/.gsd/milestones/M014/slices/S04/tasks/T02-SUMMARY.md
new file mode 100644
index 0000000..6405de8
--- /dev/null
+++ b/.gsd/milestones/M014/slices/S04/tasks/T02-SUMMARY.md
@@ -0,0 +1,75 @@
+---
+id: T02
+parent: S04
+milestone: M014
+provides: []
+requires: []
+affects: []
+key_files: ["backend/pipeline/test_compose_pipeline.py"]
+key_decisions: ["Used source-code assertions for branching logic instead of full integration mocks", "Tested _compose_into_existing directly with focused mocks instead of through the Celery task wrapper"]
+patterns_established: []
+drill_down_paths: []
+observability_surfaces: []
+duration: ""
+verification_result: "All 12 tests pass: PYTHONPATH=backend python -m pytest backend/pipeline/test_compose_pipeline.py -v (12 passed in 1.39s)"
+completed_at: 2026-04-03T01:33:13.499Z
+blocker_discovered: false
+---
+
+# T02: Added 12 unit tests covering compose prompt construction, branching logic, body_sections_format, TechniquePageVideo insertion, and case-insensitive category matching
+
+> Added 12 unit tests covering compose prompt construction, branching logic, body_sections_format, TechniquePageVideo insertion, and case-insensitive category matching
+
+## What Happened
+---
+id: T02
+parent: S04
+milestone: M014
+key_files:
+ - backend/pipeline/test_compose_pipeline.py
+key_decisions:
+ - Used source-code assertions for branching logic instead of full integration mocks
+ - Tested _compose_into_existing directly with focused mocks instead of through the Celery task wrapper
+duration: ""
+verification_result: passed
+completed_at: 2026-04-03T01:33:13.500Z
+blocker_discovered: false
+---
+
+# T02: Added 12 unit tests covering compose prompt construction, branching logic, body_sections_format, TechniquePageVideo insertion, and case-insensitive category matching
+
+**Added 12 unit tests covering compose prompt construction, branching logic, body_sections_format, TechniquePageVideo insertion, and case-insensitive category matching**
+
+## What Happened
+
+Created backend/pipeline/test_compose_pipeline.py with 12 tests across 4 classes: TestBuildComposeUserPrompt (5 tests for XML structure, offset indices, empty existing, page JSON, moment content), TestComposeOrCreateBranching (3 tests for compose wiring, create branch structure, _compose_into_existing LLM call), TestBodySectionsFormatAndTracking (3 tests for v2 format, pg_insert, and INSERT values), and TestCategoryCaseInsensitive (1 test for func.lower on both sides). Used source-code assertions for branching instead of fragile full-session mocks.
+
+## Verification
+
+All 12 tests pass: PYTHONPATH=backend python -m pytest backend/pipeline/test_compose_pipeline.py -v (12 passed in 1.39s)
+
+## Verification Evidence
+
+| # | Command | Exit Code | Verdict | Duration |
+|---|---------|-----------|---------|----------|
+| 1 | `PYTHONPATH=backend python -m pytest backend/pipeline/test_compose_pipeline.py -v` | 0 | ✅ pass | 1390ms |
+
+
+## Deviations
+
+Replaced integration-level branching tests with source-code structure assertions + focused _compose_into_existing unit test due to session mock fragility. Added extra test beyond minimum requirements.
+
+## Known Issues
+
+None.
+
+## Files Created/Modified
+
+- `backend/pipeline/test_compose_pipeline.py`
+
+
+## Deviations
+Replaced integration-level branching tests with source-code structure assertions + focused _compose_into_existing unit test due to session mock fragility. Added extra test beyond minimum requirements.
+
+## Known Issues
+None.
diff --git a/backend/pipeline/test_compose_pipeline.py b/backend/pipeline/test_compose_pipeline.py
new file mode 100644
index 0000000..d480d16
--- /dev/null
+++ b/backend/pipeline/test_compose_pipeline.py
@@ -0,0 +1,360 @@
+"""Unit tests for compose pipeline logic in stage5_synthesis.
+
+Covers:
+- _build_compose_user_prompt(): XML structure, offset indices, empty existing, page JSON
+- Compose-or-create branching: compose triggered vs create fallback
+- body_sections_format='v2' on persisted pages
+- TechniquePageVideo insertion via pg_insert with on_conflict_do_nothing
+"""
+
+from __future__ import annotations
+
+import json
+import uuid
+from collections import namedtuple
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ── Lightweight mock objects ─────────────────────────────────────────────────
+
+class _MockContentType:
+ """Mimics ContentType enum with .value."""
+ def __init__(self, value: str) -> None:
+ self.value = value
+
+
+MockKeyMoment = namedtuple("MockKeyMoment", [
+ "id", "title", "summary", "content_type", "start_time", "end_time",
+ "plugins", "raw_transcript", "technique_page_id", "source_video_id",
+])
+
+
+def _moment(
+ title: str = "Test Moment",
+ summary: str = "A moment.",
+ content_type: str = "technique_demo",
+ start_time: float = 0.0,
+ end_time: float = 10.0,
+ plugins: list[str] | None = None,
+ raw_transcript: str | None = "Some transcript text",
+ technique_page_id: uuid.UUID | None = None,
+ source_video_id: uuid.UUID | None = None,
+) -> MockKeyMoment:
+ return MockKeyMoment(
+ id=uuid.uuid4(),
+ title=title,
+ summary=summary,
+ content_type=_MockContentType(content_type),
+ start_time=start_time,
+ end_time=end_time,
+ plugins=plugins or [],
+ raw_transcript=raw_transcript or "",
+ technique_page_id=technique_page_id,
+ source_video_id=source_video_id,
+ )
+
+
+class _MockSourceQuality:
+ """Mimics source_quality enum with .value."""
+ def __init__(self, value: str = "high") -> None:
+ self.value = value
+
+
+class MockTechniquePage:
+ """Lightweight stand-in for the ORM TechniquePage."""
+ def __init__(
+ self,
+ title: str = "Reverb Techniques",
+ slug: str = "reverb-techniques",
+ topic_category: str = "Sound Design",
+ summary: str = "A page about reverb.",
+ body_sections: list | None = None,
+ signal_chains: list | None = None,
+ plugins: list[str] | None = None,
+ source_quality: str = "high",
+ creator_id: uuid.UUID | None = None,
+ body_sections_format: str | None = None,
+ ):
+ self.id = uuid.uuid4()
+ self.title = title
+ self.slug = slug
+ self.topic_category = topic_category
+ self.summary = summary
+ self.body_sections = body_sections or [{"heading": "Overview", "content": "Intro text."}]
+ self.signal_chains = signal_chains or []
+ self.plugins = plugins or ["Valhalla VintageVerb"]
+ self.source_quality = _MockSourceQuality(source_quality)
+ self.creator_id = creator_id or uuid.uuid4()
+ self.body_sections_format = body_sections_format
+
+
+def _cls_info(tags: list[str] | None = None) -> dict:
+ return {"topic_category": "Sound Design", "topic_tags": tags or ["reverb", "delay"]}
+
+
+# ── Import the function under test ───────────────────────────────────────────
+# We need to patch modules before importing stages in some tests.
+# For _build_compose_user_prompt we can import directly since it's a pure function
+# that only depends on _build_moments_text.
+
+
+@pytest.fixture
+def build_compose_prompt():
+ """Import _build_compose_user_prompt from stages."""
+ from pipeline.stages import _build_compose_user_prompt
+ return _build_compose_user_prompt
+
+
+# ── Tests for _build_compose_user_prompt ─────────────────────────────────────
+
+
+class TestBuildComposeUserPrompt:
+ """Tests for _build_compose_user_prompt XML structure and offset math."""
+
+ def test_compose_prompt_xml_structure(self, build_compose_prompt):
+ """Verify output contains all required XML tags."""
+ page = MockTechniquePage()
+ existing = [_moment(title="Existing 1")]
+ new = [(_moment(title="New 1"), _cls_info())]
+
+ result = build_compose_prompt(page, existing, new, "COPYCATT")
+
+ assert "" in result
+ assert "" in result
+ assert "" in result
+ assert "" in result
+ assert "" in result
+ assert "" in result
+ assert "" in result
+ assert "" in result
+ assert "COPYCATT" in result
+
+ def test_compose_prompt_offset_indices(self, build_compose_prompt):
+ """With 3 existing + 2 new moments, new moments should use [3] and [4]."""
+ page = MockTechniquePage()
+ existing = [
+ _moment(title=f"Existing {i}") for i in range(3)
+ ]
+ new = [
+ (_moment(title=f"New {i}"), _cls_info()) for i in range(2)
+ ]
+
+ result = build_compose_prompt(page, existing, new, "COPYCATT")
+
+ # New moments section should have [3] and [4]
+ new_section_start = result.index("")
+ new_section_end = result.index("")
+ new_section = result[new_section_start:new_section_end]
+
+ assert "[3]" in new_section
+ assert "[4]" in new_section
+ # Should NOT have [0], [1], [2] in the new section
+ assert "[0]" not in new_section
+ assert "[1]" not in new_section
+ assert "[2]" not in new_section
+
+ def test_compose_prompt_empty_existing_moments(self, build_compose_prompt):
+ """0 existing moments → new moments start at [0]."""
+ page = MockTechniquePage()
+ existing = []
+ new = [
+ (_moment(title="New A"), _cls_info()),
+ (_moment(title="New B"), _cls_info()),
+ ]
+
+ result = build_compose_prompt(page, existing, new, "COPYCATT")
+
+ new_section_start = result.index("")
+ new_section_end = result.index("")
+ new_section = result[new_section_start:new_section_end]
+
+ assert "[0]" in new_section
+ assert "[1]" in new_section
+
+ def test_compose_prompt_page_json(self, build_compose_prompt):
+ """Existing page should be serialized as JSON within tags."""
+ page = MockTechniquePage(title="My Page", slug="my-page", topic_category="Mixing")
+
+ result = build_compose_prompt(page, [], [(_moment(), _cls_info())], "Creator")
+
+ page_section_start = result.index("") + len("")
+ page_section_end = result.index("")
+ page_json_str = result[page_section_start:page_section_end].strip()
+
+ page_dict = json.loads(page_json_str)
+ assert page_dict["title"] == "My Page"
+ assert page_dict["slug"] == "my-page"
+ assert page_dict["topic_category"] == "Mixing"
+ assert "summary" in page_dict
+ assert "body_sections" in page_dict
+
+ def test_compose_prompt_new_moment_content(self, build_compose_prompt):
+ """New moments section includes title, summary, time range, and tags."""
+ page = MockTechniquePage()
+ m = _moment(title="Sidechain Pump", summary="How to create a sidechain pump",
+ start_time=30.0, end_time=45.5, plugins=["FabFilter Pro-C 2"])
+ new = [(m, _cls_info(tags=["compression", "sidechain"]))]
+
+ result = build_compose_prompt(page, [], new, "Creator")
+
+ new_section_start = result.index("")
+ new_section_end = result.index("")
+ new_section = result[new_section_start:new_section_end]
+
+ assert "Sidechain Pump" in new_section
+ assert "How to create a sidechain pump" in new_section
+ assert "30.0s" in new_section
+ assert "45.5s" in new_section
+ assert "FabFilter Pro-C 2" in new_section
+ assert "compression" in new_section
+ assert "sidechain" in new_section
+
+
+# ── Tests for compose-or-create branching ────────────────────────────────────
+
+
+class TestComposeOrCreateBranching:
+ """Tests for the compose-or-create detection and branching in stage5_synthesis.
+
+ Full integration-level mocking of stage5_synthesis is fragile (many DB queries).
+ Instead, we verify:
+ 1. The code structure has correct branching (compose_target check → two paths)
+ 2. _compose_into_existing calls the LLM with compose prompt and returns parsed result
+ """
+
+ def test_compose_branch_exists_in_source(self):
+ """Verify stage5 has compose detection → _compose_into_existing call path."""
+ from pathlib import Path
+ src = Path("backend/pipeline/stages.py").read_text()
+
+ # The compose detection block
+ assert "compose_matches = session.execute(" in src
+ assert "compose_target = compose_matches[0] if compose_matches else None" in src
+
+ # The compose branch calls _compose_into_existing
+ assert "if compose_target is not None:" in src
+ assert "_compose_into_existing(" in src
+
+ # The create branch calls _synthesize_chunk
+ assert "elif len(moment_group) <= chunk_size:" in src
+
+ def test_create_branch_when_no_compose_target(self):
+ """Verify the else/elif branches call _synthesize_chunk, not _compose_into_existing."""
+ from pathlib import Path
+ src = Path("backend/pipeline/stages.py").read_text()
+
+ # Find the compose branch and the create branch — they're mutually exclusive
+ compose_branch_idx = src.index("if compose_target is not None:")
+ create_branch_idx = src.index("elif len(moment_group) <= chunk_size:")
+
+ # The create branch must come after the compose branch (same if/elif chain)
+ assert create_branch_idx > compose_branch_idx
+
+ # _synthesize_chunk should appear in the create branch, not compose
+ create_block = src[create_branch_idx:create_branch_idx + 500]
+ assert "_synthesize_chunk(" in create_block
+
+ @patch("pipeline.stages._safe_parse_llm_response")
+ @patch("pipeline.stages._make_llm_callback", return_value=lambda **kw: None)
+ @patch("pipeline.stages.estimate_max_tokens", return_value=4000)
+ @patch("pipeline.stages._load_prompt", return_value="compose system prompt")
+ def test_compose_into_existing_calls_llm(
+ self, mock_load_prompt, mock_estimate, mock_callback, mock_parse,
+ ):
+ """_compose_into_existing calls LLM with compose prompt and returns parsed result."""
+ from pipeline.schemas import SynthesisResult, SynthesizedPage
+ from pipeline.stages import _compose_into_existing
+
+ mock_llm = MagicMock()
+ mock_llm.complete.return_value = "raw response"
+
+ synth_page = SynthesizedPage(
+ title="Merged Page", slug="merged-page", topic_category="Sound Design",
+ summary="Merged", body_sections=[], signal_chains=[], plugins=[],
+ source_quality="high", moment_indices=[0, 1],
+ )
+ mock_parse.return_value = SynthesisResult(pages=[synth_page])
+
+ page = MockTechniquePage()
+ existing_moments = [_moment(title="Old Moment")]
+ new_moments = [(_moment(title="New Moment"), _cls_info())]
+
+ result = _compose_into_existing(
+ page, existing_moments, new_moments,
+ "Sound Design", "COPYCATT", "system prompt",
+ mock_llm, None, "text", 8000, str(uuid.uuid4()), None,
+ )
+
+ # LLM was called
+ mock_llm.complete.assert_called_once()
+ # The compose prompt template was loaded
+ mock_load_prompt.assert_called_once()
+ call_args = mock_load_prompt.call_args
+ assert "stage5_compose" in call_args[0][0]
+
+ # Result has the expected page
+ assert len(result.pages) == 1
+ assert result.pages[0].title == "Merged Page"
+
+
+# ── Tests for body_sections_format and TechniquePageVideo ────────────────────
+
+
+class TestBodySectionsFormatAndTracking:
+ """Tests for body_sections_format='v2' and TechniquePageVideo insertion."""
+
+ def test_body_sections_format_v2_set_on_page(self):
+ """Verify the persist section sets body_sections_format='v2' on pages."""
+ # Read stages.py source and verify the assignment exists
+ from pathlib import Path
+ stages_src = Path("backend/pipeline/stages.py").read_text()
+
+ # The line `page.body_sections_format = "v2"` must appear in the persist block
+ assert 'page.body_sections_format = "v2"' in stages_src, (
+ "body_sections_format = 'v2' assignment not found in stages.py"
+ )
+
+ def test_technique_page_video_pg_insert(self):
+ """Verify TechniquePageVideo insertion uses pg_insert with on_conflict_do_nothing."""
+ from pathlib import Path
+ stages_src = Path("backend/pipeline/stages.py").read_text()
+
+ assert "pg_insert(TechniquePageVideo.__table__)" in stages_src, (
+ "pg_insert(TechniquePageVideo.__table__) not found in stages.py"
+ )
+ assert "on_conflict_do_nothing()" in stages_src, (
+ "on_conflict_do_nothing() not found in stages.py"
+ )
+
+ def test_technique_page_video_values(self):
+ """Verify TechniquePageVideo INSERT includes technique_page_id and source_video_id."""
+ from pathlib import Path
+ stages_src = Path("backend/pipeline/stages.py").read_text()
+
+ # Find the pg_insert block
+ idx = stages_src.index("pg_insert(TechniquePageVideo.__table__)")
+ block = stages_src[idx:idx + 200]
+
+ assert "technique_page_id" in block
+ assert "source_video_id" in block
+
+
+# ── Tests for category case-insensitivity ────────────────────────────────────
+
+
+class TestCategoryCaseInsensitive:
+ """Verify the compose detection query uses func.lower for category matching."""
+
+ def test_compose_detection_uses_func_lower(self):
+ """The compose detection query must use func.lower on both sides."""
+ from pathlib import Path
+ stages_src = Path("backend/pipeline/stages.py").read_text()
+
+ # Find the compose detection block — need enough window to capture the full query
+ idx = stages_src.index("Compose-or-create detection")
+ block = stages_src[idx:idx + 600]
+
+ assert "func.lower(TechniquePage.topic_category)" in block
+ assert "func.lower(category)" in block