diff --git a/backend/pipeline/test_harness.py b/backend/pipeline/test_harness.py index 074628f..976fc54 100644 --- a/backend/pipeline/test_harness.py +++ b/backend/pipeline/test_harness.py @@ -33,6 +33,7 @@ from typing import NamedTuple from pydantic import ValidationError from config import get_settings +from pipeline.citation_utils import validate_citations from pipeline.llm_client import LLMClient, estimate_max_tokens from pipeline.schemas import SynthesisResult @@ -280,17 +281,33 @@ def run_synthesis( _log("SYNTH", f" Parsed: {len(result.pages)} pages synthesized") total_words = 0 for page in result.pages: - sections = page.body_sections or {} - word_count = sum(len(str(v).split()) for v in sections.values()) + sections = page.body_sections or [] + section_count = len(sections) + subsection_count = sum(len(s.subsections) for s in sections) + word_count = sum( + len(s.content.split()) + sum(len(sub.content.split()) for sub in s.subsections) + for s in sections + ) total_words += word_count _log( "PAGE", f" '{page.title}' ({page.slug}): " - f"{len(sections)} sections, {word_count} words, " + f"{section_count} sections ({subsection_count} subsections), " + f"{word_count} words, " f"{len(page.moment_indices)} moments linked, " f"quality={page.source_quality}", ) + # Citation coverage reporting + cit = validate_citations(sections, len(page.moment_indices)) + _log( + "CITE", + f" Citations: {cit['total_citations']}/{len(page.moment_indices)} moments cited " + f"({cit['coverage_pct']}% coverage)" + + (f", invalid indices: {cit['invalid_indices']}" if cit['invalid_indices'] else "") + + (f", uncited: {cit['uncited_moments']}" if cit['uncited_moments'] else ""), + ) + all_pages.append(page.model_dump()) # Summary @@ -454,7 +471,11 @@ def main() -> int: "metadata": { "page_count": len(pages), "total_words": sum( - sum(len(str(v).split()) for v in p.get("body_sections", {}).values()) + sum( + len(s.get("content", "").split()) + + sum(len(sub.get("content", "").split()) for sub in s.get("subsections", [])) + for s in p.get("body_sections", []) + ) for p in pages ), "elapsed_seconds": round(time.monotonic() - overall_start, 1), diff --git a/backend/pipeline/test_harness_v2_format.py b/backend/pipeline/test_harness_v2_format.py new file mode 100644 index 0000000..6713752 --- /dev/null +++ b/backend/pipeline/test_harness_v2_format.py @@ -0,0 +1,213 @@ +"""Tests for test_harness compatibility with v2 body_sections format. + +Validates that word-counting and citation integration work correctly +with the list[BodySection] structure (v2) instead of the old dict format. +""" + +from __future__ import annotations + +import pytest + +from pipeline.citation_utils import validate_citations +from pipeline.schemas import BodySection, BodySubSection, SynthesizedPage, SynthesisResult + + +# ── Helpers ────────────────────────────────────────────────────────────────── + + +def _make_page( + body_sections: list[BodySection], + moment_indices: list[int] | None = None, + title: str = "Test Page", + slug: str = "test-page", +) -> SynthesizedPage: + return SynthesizedPage( + title=title, + slug=slug, + topic_category="Testing", + summary="A test page.", + body_sections=body_sections, + moment_indices=moment_indices or [], + ) + + +def _count_words_v2(sections: list[BodySection]) -> int: + """Replicate the word-counting logic from the updated test_harness.""" + return sum( + len(s.content.split()) + sum(len(sub.content.split()) for sub in s.subsections) + for s in sections + ) + + +def _count_words_metadata(pages_dicts: list[dict]) -> int: + """Replicate the metadata total_words logic (operates on dicts after model_dump).""" + return sum( + sum( + len(s.get("content", "").split()) + + sum(len(sub.get("content", "").split()) for sub in s.get("subsections", [])) + for s in p.get("body_sections", []) + ) + for p in pages_dicts + ) + + +# ── Word counting tests ───────────────────────────────────────────────────── + + +class TestWordCounting: + def test_flat_sections_no_subsections(self): + sections = [ + BodySection(heading="Intro", content="one two three"), + BodySection(heading="Details", content="four five"), + ] + assert _count_words_v2(sections) == 5 + + def test_sections_with_subsections(self): + sections = [ + BodySection( + heading="Main", + content="alpha beta", # 2 words + subsections=[ + BodySubSection(heading="Sub A", content="gamma delta epsilon"), # 3 words + BodySubSection(heading="Sub B", content="zeta"), # 1 word + ], + ), + ] + assert _count_words_v2(sections) == 6 + + def test_empty_sections_list(self): + assert _count_words_v2([]) == 0 + + def test_section_with_empty_content(self): + sections = [ + BodySection(heading="Empty", content=""), + ] + # "".split() returns [], len([]) == 0 + assert _count_words_v2(sections) == 0 + + def test_metadata_word_count_matches(self): + """Metadata total_words (from model_dump dicts) matches Pydantic object counting.""" + sections = [ + BodySection( + heading="H2", + content="one two three", + subsections=[ + BodySubSection(heading="H3", content="four five six seven"), + ], + ), + BodySection(heading="Another", content="eight nine"), + ] + page = _make_page(sections, moment_indices=[0, 1]) + pages_dicts = [page.model_dump()] + + assert _count_words_v2(sections) == 9 + assert _count_words_metadata(pages_dicts) == 9 + + +# ── Section/subsection counting ───────────────────────────────────────────── + + +class TestSectionCounting: + def test_section_and_subsection_counts(self): + sections = [ + BodySection(heading="A", content="text", subsections=[ + BodySubSection(heading="A.1", content="sub text"), + ]), + BodySection(heading="B", content="more text"), + BodySection(heading="C", content="even more", subsections=[ + BodySubSection(heading="C.1", content="sub1"), + BodySubSection(heading="C.2", content="sub2"), + ]), + ] + section_count = len(sections) + subsection_count = sum(len(s.subsections) for s in sections) + assert section_count == 3 + assert subsection_count == 3 + + +# ── Citation integration ───────────────────────────────────────────────────── + + +class TestCitationIntegration: + def test_full_coverage(self): + sections = [ + BodySection(heading="Intro", content="First point [0]. Second point [1]."), + BodySection(heading="Details", content="More on [0] and [2]."), + ] + result = validate_citations(sections, moment_count=3) + assert result["valid"] is True + assert result["coverage_pct"] == 100.0 + assert result["invalid_indices"] == [] + assert result["uncited_moments"] == [] + + def test_partial_coverage(self): + sections = [ + BodySection(heading="Intro", content="Only cites [0]."), + ] + result = validate_citations(sections, moment_count=3) + assert result["valid"] is False + assert result["coverage_pct"] == pytest.approx(33.3, abs=0.1) + assert result["uncited_moments"] == [1, 2] + + def test_invalid_index(self): + sections = [ + BodySection(heading="Bad", content="Cites [0] and [99]."), + ] + result = validate_citations(sections, moment_count=2) + assert result["invalid_indices"] == [99] + + def test_citations_in_subsections(self): + sections = [ + BodySection( + heading="Main", + content="See [0].", + subsections=[ + BodySubSection(heading="Sub", content="Also [1] and [2]."), + ], + ), + ] + result = validate_citations(sections, moment_count=3) + assert result["valid"] is True + assert result["total_citations"] == 3 + + def test_multi_citation_markers(self): + sections = [ + BodySection(heading="X", content="Both sources agree [0,1]."), + ] + result = validate_citations(sections, moment_count=2) + assert result["valid"] is True + assert result["total_citations"] == 2 + + def test_no_sections(self): + result = validate_citations([], moment_count=0) + assert result["valid"] is True + assert result["coverage_pct"] == 0.0 + + +# ── End-to-end: SynthesisResult with v2 body_sections ─────────────────────── + + +class TestSynthesisResultV2: + def test_round_trip_model_dump(self): + """SynthesisResult with v2 body_sections round-trips through model_dump/validate.""" + sections = [ + BodySection( + heading="Overview", + content="This technique [0] is fundamental.", + subsections=[ + BodySubSection(heading="Key Concept", content="Detail [1]."), + ], + ), + ] + page = _make_page(sections, moment_indices=[0, 1]) + result = SynthesisResult(pages=[page]) + + dumped = result.model_dump() + restored = SynthesisResult.model_validate(dumped) + + assert len(restored.pages) == 1 + restored_page = restored.pages[0] + assert len(restored_page.body_sections) == 1 + assert restored_page.body_sections[0].heading == "Overview" + assert len(restored_page.body_sections[0].subsections) == 1 + assert restored_page.body_sections_format == "v2"