360 lines
14 KiB
Python
360 lines
14 KiB
Python
"""Unit tests for compose pipeline logic in stage5_synthesis.
|
|
|
|
Covers:
|
|
- _build_compose_user_prompt(): XML structure, offset indices, empty existing, page JSON
|
|
- Compose-or-create branching: compose triggered vs create fallback
|
|
- body_sections_format='v2' on persisted pages
|
|
- TechniquePageVideo insertion via pg_insert with on_conflict_do_nothing
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import uuid
|
|
from collections import namedtuple
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
|
|
# ── Lightweight mock objects ─────────────────────────────────────────────────
|
|
|
|
class _MockContentType:
|
|
"""Mimics ContentType enum with .value."""
|
|
def __init__(self, value: str) -> None:
|
|
self.value = value
|
|
|
|
|
|
MockKeyMoment = namedtuple("MockKeyMoment", [
|
|
"id", "title", "summary", "content_type", "start_time", "end_time",
|
|
"plugins", "raw_transcript", "technique_page_id", "source_video_id",
|
|
])
|
|
|
|
|
|
def _moment(
|
|
title: str = "Test Moment",
|
|
summary: str = "A moment.",
|
|
content_type: str = "technique_demo",
|
|
start_time: float = 0.0,
|
|
end_time: float = 10.0,
|
|
plugins: list[str] | None = None,
|
|
raw_transcript: str | None = "Some transcript text",
|
|
technique_page_id: uuid.UUID | None = None,
|
|
source_video_id: uuid.UUID | None = None,
|
|
) -> MockKeyMoment:
|
|
return MockKeyMoment(
|
|
id=uuid.uuid4(),
|
|
title=title,
|
|
summary=summary,
|
|
content_type=_MockContentType(content_type),
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
plugins=plugins or [],
|
|
raw_transcript=raw_transcript or "",
|
|
technique_page_id=technique_page_id,
|
|
source_video_id=source_video_id,
|
|
)
|
|
|
|
|
|
class _MockSourceQuality:
|
|
"""Mimics source_quality enum with .value."""
|
|
def __init__(self, value: str = "high") -> None:
|
|
self.value = value
|
|
|
|
|
|
class MockTechniquePage:
|
|
"""Lightweight stand-in for the ORM TechniquePage."""
|
|
def __init__(
|
|
self,
|
|
title: str = "Reverb Techniques",
|
|
slug: str = "reverb-techniques",
|
|
topic_category: str = "Sound Design",
|
|
summary: str = "A page about reverb.",
|
|
body_sections: list | None = None,
|
|
signal_chains: list | None = None,
|
|
plugins: list[str] | None = None,
|
|
source_quality: str = "high",
|
|
creator_id: uuid.UUID | None = None,
|
|
body_sections_format: str | None = None,
|
|
):
|
|
self.id = uuid.uuid4()
|
|
self.title = title
|
|
self.slug = slug
|
|
self.topic_category = topic_category
|
|
self.summary = summary
|
|
self.body_sections = body_sections or [{"heading": "Overview", "content": "Intro text."}]
|
|
self.signal_chains = signal_chains or []
|
|
self.plugins = plugins or ["Valhalla VintageVerb"]
|
|
self.source_quality = _MockSourceQuality(source_quality)
|
|
self.creator_id = creator_id or uuid.uuid4()
|
|
self.body_sections_format = body_sections_format
|
|
|
|
|
|
def _cls_info(tags: list[str] | None = None) -> dict:
|
|
return {"topic_category": "Sound Design", "topic_tags": tags or ["reverb", "delay"]}
|
|
|
|
|
|
# ── Import the function under test ───────────────────────────────────────────
|
|
# We need to patch modules before importing stages in some tests.
|
|
# For _build_compose_user_prompt we can import directly since it's a pure function
|
|
# that only depends on _build_moments_text.
|
|
|
|
|
|
@pytest.fixture
|
|
def build_compose_prompt():
|
|
"""Import _build_compose_user_prompt from stages."""
|
|
from pipeline.stages import _build_compose_user_prompt
|
|
return _build_compose_user_prompt
|
|
|
|
|
|
# ── Tests for _build_compose_user_prompt ─────────────────────────────────────
|
|
|
|
|
|
class TestBuildComposeUserPrompt:
|
|
"""Tests for _build_compose_user_prompt XML structure and offset math."""
|
|
|
|
def test_compose_prompt_xml_structure(self, build_compose_prompt):
|
|
"""Verify output contains all required XML tags."""
|
|
page = MockTechniquePage()
|
|
existing = [_moment(title="Existing 1")]
|
|
new = [(_moment(title="New 1"), _cls_info())]
|
|
|
|
result = build_compose_prompt(page, existing, new, "COPYCATT")
|
|
|
|
assert "<existing_page>" in result
|
|
assert "</existing_page>" in result
|
|
assert "<existing_moments>" in result
|
|
assert "</existing_moments>" in result
|
|
assert "<new_moments>" in result
|
|
assert "</new_moments>" in result
|
|
assert "<creator>" in result
|
|
assert "</creator>" in result
|
|
assert "COPYCATT" in result
|
|
|
|
def test_compose_prompt_offset_indices(self, build_compose_prompt):
|
|
"""With 3 existing + 2 new moments, new moments should use [3] and [4]."""
|
|
page = MockTechniquePage()
|
|
existing = [
|
|
_moment(title=f"Existing {i}") for i in range(3)
|
|
]
|
|
new = [
|
|
(_moment(title=f"New {i}"), _cls_info()) for i in range(2)
|
|
]
|
|
|
|
result = build_compose_prompt(page, existing, new, "COPYCATT")
|
|
|
|
# New moments section should have [3] and [4]
|
|
new_section_start = result.index("<new_moments>")
|
|
new_section_end = result.index("</new_moments>")
|
|
new_section = result[new_section_start:new_section_end]
|
|
|
|
assert "[3]" in new_section
|
|
assert "[4]" in new_section
|
|
# Should NOT have [0], [1], [2] in the new section
|
|
assert "[0]" not in new_section
|
|
assert "[1]" not in new_section
|
|
assert "[2]" not in new_section
|
|
|
|
def test_compose_prompt_empty_existing_moments(self, build_compose_prompt):
|
|
"""0 existing moments → new moments start at [0]."""
|
|
page = MockTechniquePage()
|
|
existing = []
|
|
new = [
|
|
(_moment(title="New A"), _cls_info()),
|
|
(_moment(title="New B"), _cls_info()),
|
|
]
|
|
|
|
result = build_compose_prompt(page, existing, new, "COPYCATT")
|
|
|
|
new_section_start = result.index("<new_moments>")
|
|
new_section_end = result.index("</new_moments>")
|
|
new_section = result[new_section_start:new_section_end]
|
|
|
|
assert "[0]" in new_section
|
|
assert "[1]" in new_section
|
|
|
|
def test_compose_prompt_page_json(self, build_compose_prompt):
|
|
"""Existing page should be serialized as JSON within <existing_page> tags."""
|
|
page = MockTechniquePage(title="My Page", slug="my-page", topic_category="Mixing")
|
|
|
|
result = build_compose_prompt(page, [], [(_moment(), _cls_info())], "Creator")
|
|
|
|
page_section_start = result.index("<existing_page>") + len("<existing_page>")
|
|
page_section_end = result.index("</existing_page>")
|
|
page_json_str = result[page_section_start:page_section_end].strip()
|
|
|
|
page_dict = json.loads(page_json_str)
|
|
assert page_dict["title"] == "My Page"
|
|
assert page_dict["slug"] == "my-page"
|
|
assert page_dict["topic_category"] == "Mixing"
|
|
assert "summary" in page_dict
|
|
assert "body_sections" in page_dict
|
|
|
|
def test_compose_prompt_new_moment_content(self, build_compose_prompt):
|
|
"""New moments section includes title, summary, time range, and tags."""
|
|
page = MockTechniquePage()
|
|
m = _moment(title="Sidechain Pump", summary="How to create a sidechain pump",
|
|
start_time=30.0, end_time=45.5, plugins=["FabFilter Pro-C 2"])
|
|
new = [(m, _cls_info(tags=["compression", "sidechain"]))]
|
|
|
|
result = build_compose_prompt(page, [], new, "Creator")
|
|
|
|
new_section_start = result.index("<new_moments>")
|
|
new_section_end = result.index("</new_moments>")
|
|
new_section = result[new_section_start:new_section_end]
|
|
|
|
assert "Sidechain Pump" in new_section
|
|
assert "How to create a sidechain pump" in new_section
|
|
assert "30.0s" in new_section
|
|
assert "45.5s" in new_section
|
|
assert "FabFilter Pro-C 2" in new_section
|
|
assert "compression" in new_section
|
|
assert "sidechain" in new_section
|
|
|
|
|
|
# ── Tests for compose-or-create branching ────────────────────────────────────
|
|
|
|
|
|
class TestComposeOrCreateBranching:
|
|
"""Tests for the compose-or-create detection and branching in stage5_synthesis.
|
|
|
|
Full integration-level mocking of stage5_synthesis is fragile (many DB queries).
|
|
Instead, we verify:
|
|
1. The code structure has correct branching (compose_target check → two paths)
|
|
2. _compose_into_existing calls the LLM with compose prompt and returns parsed result
|
|
"""
|
|
|
|
def test_compose_branch_exists_in_source(self):
|
|
"""Verify stage5 has compose detection → _compose_into_existing call path."""
|
|
from pathlib import Path
|
|
src = Path("backend/pipeline/stages.py").read_text()
|
|
|
|
# The compose detection block
|
|
assert "compose_matches = session.execute(" in src
|
|
assert "compose_target = compose_matches[0] if compose_matches else None" in src
|
|
|
|
# The compose branch calls _compose_into_existing
|
|
assert "if compose_target is not None:" in src
|
|
assert "_compose_into_existing(" in src
|
|
|
|
# The create branch calls _synthesize_chunk
|
|
assert "elif len(moment_group) <= chunk_size:" in src
|
|
|
|
def test_create_branch_when_no_compose_target(self):
|
|
"""Verify the else/elif branches call _synthesize_chunk, not _compose_into_existing."""
|
|
from pathlib import Path
|
|
src = Path("backend/pipeline/stages.py").read_text()
|
|
|
|
# Find the compose branch and the create branch — they're mutually exclusive
|
|
compose_branch_idx = src.index("if compose_target is not None:")
|
|
create_branch_idx = src.index("elif len(moment_group) <= chunk_size:")
|
|
|
|
# The create branch must come after the compose branch (same if/elif chain)
|
|
assert create_branch_idx > compose_branch_idx
|
|
|
|
# _synthesize_chunk should appear in the create branch, not compose
|
|
create_block = src[create_branch_idx:create_branch_idx + 500]
|
|
assert "_synthesize_chunk(" in create_block
|
|
|
|
@patch("pipeline.stages._safe_parse_llm_response")
|
|
@patch("pipeline.stages._make_llm_callback", return_value=lambda **kw: None)
|
|
@patch("pipeline.stages.estimate_max_tokens", return_value=4000)
|
|
@patch("pipeline.stages._load_prompt", return_value="compose system prompt")
|
|
def test_compose_into_existing_calls_llm(
|
|
self, mock_load_prompt, mock_estimate, mock_callback, mock_parse,
|
|
):
|
|
"""_compose_into_existing calls LLM with compose prompt and returns parsed result."""
|
|
from pipeline.schemas import SynthesisResult, SynthesizedPage
|
|
from pipeline.stages import _compose_into_existing
|
|
|
|
mock_llm = MagicMock()
|
|
mock_llm.complete.return_value = "raw response"
|
|
|
|
synth_page = SynthesizedPage(
|
|
title="Merged Page", slug="merged-page", topic_category="Sound Design",
|
|
summary="Merged", body_sections=[], signal_chains=[], plugins=[],
|
|
source_quality="high", moment_indices=[0, 1],
|
|
)
|
|
mock_parse.return_value = SynthesisResult(pages=[synth_page])
|
|
|
|
page = MockTechniquePage()
|
|
existing_moments = [_moment(title="Old Moment")]
|
|
new_moments = [(_moment(title="New Moment"), _cls_info())]
|
|
|
|
result = _compose_into_existing(
|
|
page, existing_moments, new_moments,
|
|
"Sound Design", "COPYCATT", "system prompt",
|
|
mock_llm, None, "text", 8000, str(uuid.uuid4()), None,
|
|
)
|
|
|
|
# LLM was called
|
|
mock_llm.complete.assert_called_once()
|
|
# The compose prompt template was loaded
|
|
mock_load_prompt.assert_called_once()
|
|
call_args = mock_load_prompt.call_args
|
|
assert "stage5_compose" in call_args[0][0]
|
|
|
|
# Result has the expected page
|
|
assert len(result.pages) == 1
|
|
assert result.pages[0].title == "Merged Page"
|
|
|
|
|
|
# ── Tests for body_sections_format and TechniquePageVideo ────────────────────
|
|
|
|
|
|
class TestBodySectionsFormatAndTracking:
|
|
"""Tests for body_sections_format='v2' and TechniquePageVideo insertion."""
|
|
|
|
def test_body_sections_format_v2_set_on_page(self):
|
|
"""Verify the persist section sets body_sections_format='v2' on pages."""
|
|
# Read stages.py source and verify the assignment exists
|
|
from pathlib import Path
|
|
stages_src = Path("backend/pipeline/stages.py").read_text()
|
|
|
|
# The line `page.body_sections_format = "v2"` must appear in the persist block
|
|
assert 'page.body_sections_format = "v2"' in stages_src, (
|
|
"body_sections_format = 'v2' assignment not found in stages.py"
|
|
)
|
|
|
|
def test_technique_page_video_pg_insert(self):
|
|
"""Verify TechniquePageVideo insertion uses pg_insert with on_conflict_do_nothing."""
|
|
from pathlib import Path
|
|
stages_src = Path("backend/pipeline/stages.py").read_text()
|
|
|
|
assert "pg_insert(TechniquePageVideo.__table__)" in stages_src, (
|
|
"pg_insert(TechniquePageVideo.__table__) not found in stages.py"
|
|
)
|
|
assert "on_conflict_do_nothing()" in stages_src, (
|
|
"on_conflict_do_nothing() not found in stages.py"
|
|
)
|
|
|
|
def test_technique_page_video_values(self):
|
|
"""Verify TechniquePageVideo INSERT includes technique_page_id and source_video_id."""
|
|
from pathlib import Path
|
|
stages_src = Path("backend/pipeline/stages.py").read_text()
|
|
|
|
# Find the pg_insert block
|
|
idx = stages_src.index("pg_insert(TechniquePageVideo.__table__)")
|
|
block = stages_src[idx:idx + 200]
|
|
|
|
assert "technique_page_id" in block
|
|
assert "source_video_id" in block
|
|
|
|
|
|
# ── Tests for category case-insensitivity ────────────────────────────────────
|
|
|
|
|
|
class TestCategoryCaseInsensitive:
|
|
"""Verify the compose detection query uses func.lower for category matching."""
|
|
|
|
def test_compose_detection_uses_func_lower(self):
|
|
"""The compose detection query must use func.lower on both sides."""
|
|
from pathlib import Path
|
|
stages_src = Path("backend/pipeline/stages.py").read_text()
|
|
|
|
# Find the compose detection block — need enough window to capture the full query
|
|
idx = stages_src.index("Compose-or-create detection")
|
|
block = stages_src[idx:idx + 600]
|
|
|
|
assert "func.lower(TechniquePage.topic_category)" in block
|
|
assert "func.lower(category)" in block
|