From 44197f550ca4329c4d278c10ca13e12f21ff7c0b Mon Sep 17 00:00:00 2001 From: jlightner Date: Fri, 3 Apr 2026 00:54:27 +0000 Subject: [PATCH] =?UTF-8?q?test:=20Updated=20test=5Fharness.py=20word-coun?= =?UTF-8?q?t/section-count=20logic=20for=20list[B=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - "backend/pipeline/test_harness.py" - "backend/pipeline/test_harness_v2_format.py" GSD-Task: S01/T03 --- .gsd/milestones/M014/slices/S01/S01-PLAN.md | 2 +- .../M014/slices/S01/tasks/T02-VERIFY.json | 16 ++ .../M014/slices/S01/tasks/T03-SUMMARY.md | 78 +++++++ backend/pipeline/test_harness.py | 29 ++- backend/pipeline/test_harness_v2_format.py | 213 ++++++++++++++++++ 5 files changed, 333 insertions(+), 5 deletions(-) create mode 100644 .gsd/milestones/M014/slices/S01/tasks/T02-VERIFY.json create mode 100644 .gsd/milestones/M014/slices/S01/tasks/T03-SUMMARY.md create mode 100644 backend/pipeline/test_harness_v2_format.py diff --git a/.gsd/milestones/M014/slices/S01/S01-PLAN.md b/.gsd/milestones/M014/slices/S01/S01-PLAN.md index d4b62df..47ff4f3 100644 --- a/.gsd/milestones/M014/slices/S01/S01-PLAN.md +++ b/.gsd/milestones/M014/slices/S01/S01-PLAN.md @@ -58,7 +58,7 @@ - Estimate: 45m - Files: prompts/stage5_synthesis.txt - Verify: cd /home/aux/projects/content-to-kb-automator && python -c "import json; t=open('prompts/stage5_synthesis.txt').read(); assert 'body_sections_format' in t; assert 'Citation rules' in t or 'citation rules' in t.lower(); assert '\"subsections\"' in t; print('Prompt v5 structure OK')" -- [ ] **T03: Update test harness for new body_sections structure and add citation reporting** — Update the test harness word-count and section-count logic to walk the new list-of-objects body_sections structure instead of dict.values(). Add citation coverage reporting using the citation_utils from T01. This closes the slice — the harness can parse and report on v2 synthesis output. +- [x] **T03: Updated test_harness.py word-count/section-count logic for list[BodySection] v2 format and added per-page citation coverage reporting with 13 new tests passing** — Update the test harness word-count and section-count logic to walk the new list-of-objects body_sections structure instead of dict.values(). Add citation coverage reporting using the citation_utils from T01. This closes the slice — the harness can parse and report on v2 synthesis output. ## Steps diff --git a/.gsd/milestones/M014/slices/S01/tasks/T02-VERIFY.json b/.gsd/milestones/M014/slices/S01/tasks/T02-VERIFY.json new file mode 100644 index 0000000..8ea63e3 --- /dev/null +++ b/.gsd/milestones/M014/slices/S01/tasks/T02-VERIFY.json @@ -0,0 +1,16 @@ +{ + "schemaVersion": 1, + "taskId": "T02", + "unitId": "M014/S01/T02", + "timestamp": 1775177568135, + "passed": true, + "discoverySource": "task-plan", + "checks": [ + { + "command": "cd /home/aux/projects/content-to-kb-automator", + "exitCode": 0, + "durationMs": 6, + "verdict": "pass" + } + ] +} diff --git a/.gsd/milestones/M014/slices/S01/tasks/T03-SUMMARY.md b/.gsd/milestones/M014/slices/S01/tasks/T03-SUMMARY.md new file mode 100644 index 0000000..3a767f9 --- /dev/null +++ b/.gsd/milestones/M014/slices/S01/tasks/T03-SUMMARY.md @@ -0,0 +1,78 @@ +--- +id: T03 +parent: S01 +milestone: M014 +provides: [] +requires: [] +affects: [] +key_files: ["backend/pipeline/test_harness.py", "backend/pipeline/test_harness_v2_format.py"] +key_decisions: ["Word count includes both section.content and subsection.content; heading text excluded", "Citation report uses CITE log tag for grep-friendly structured output"] +patterns_established: [] +drill_down_paths: [] +observability_surfaces: [] +duration: "" +verification_result: "Ran both test suites — 28 tests total, all passing: test_harness_v2_format.py (13 passed), test_citation_utils.py (15 passed)." +completed_at: 2026-04-03T00:54:24.346Z +blocker_discovered: false +--- + +# T03: Updated test_harness.py word-count/section-count logic for list[BodySection] v2 format and added per-page citation coverage reporting with 13 new tests passing + +> Updated test_harness.py word-count/section-count logic for list[BodySection] v2 format and added per-page citation coverage reporting with 13 new tests passing + +## What Happened +--- +id: T03 +parent: S01 +milestone: M014 +key_files: + - backend/pipeline/test_harness.py + - backend/pipeline/test_harness_v2_format.py +key_decisions: + - Word count includes both section.content and subsection.content; heading text excluded + - Citation report uses CITE log tag for grep-friendly structured output +duration: "" +verification_result: passed +completed_at: 2026-04-03T00:54:24.347Z +blocker_discovered: false +--- + +# T03: Updated test_harness.py word-count/section-count logic for list[BodySection] v2 format and added per-page citation coverage reporting with 13 new tests passing + +**Updated test_harness.py word-count/section-count logic for list[BodySection] v2 format and added per-page citation coverage reporting with 13 new tests passing** + +## What Happened + +Two locations in test_harness.py treated body_sections as a dict. Updated both to walk BodySection objects: per-page summary now counts words from section.content + subsection.content, reports H2/subsection counts separately, and logs citation coverage via validate_citations(). Metadata total_words uses the same structure on serialized dicts. Created test_harness_v2_format.py with 13 tests covering word counting, section counting, citation integration, and SynthesisResult v2 round-trip. + +## Verification + +Ran both test suites — 28 tests total, all passing: test_harness_v2_format.py (13 passed), test_citation_utils.py (15 passed). + +## Verification Evidence + +| # | Command | Exit Code | Verdict | Duration | +|---|---------|-----------|---------|----------| +| 1 | `python -m pytest backend/pipeline/test_harness_v2_format.py -v` | 0 | ✅ pass | 80ms | +| 2 | `python -m pytest backend/pipeline/test_citation_utils.py -v` | 0 | ✅ pass | 80ms | + + +## Deviations + +None. + +## Known Issues + +None. + +## Files Created/Modified + +- `backend/pipeline/test_harness.py` +- `backend/pipeline/test_harness_v2_format.py` + + +## Deviations +None. + +## Known Issues +None. diff --git a/backend/pipeline/test_harness.py b/backend/pipeline/test_harness.py index 074628f..976fc54 100644 --- a/backend/pipeline/test_harness.py +++ b/backend/pipeline/test_harness.py @@ -33,6 +33,7 @@ from typing import NamedTuple from pydantic import ValidationError from config import get_settings +from pipeline.citation_utils import validate_citations from pipeline.llm_client import LLMClient, estimate_max_tokens from pipeline.schemas import SynthesisResult @@ -280,17 +281,33 @@ def run_synthesis( _log("SYNTH", f" Parsed: {len(result.pages)} pages synthesized") total_words = 0 for page in result.pages: - sections = page.body_sections or {} - word_count = sum(len(str(v).split()) for v in sections.values()) + sections = page.body_sections or [] + section_count = len(sections) + subsection_count = sum(len(s.subsections) for s in sections) + word_count = sum( + len(s.content.split()) + sum(len(sub.content.split()) for sub in s.subsections) + for s in sections + ) total_words += word_count _log( "PAGE", f" '{page.title}' ({page.slug}): " - f"{len(sections)} sections, {word_count} words, " + f"{section_count} sections ({subsection_count} subsections), " + f"{word_count} words, " f"{len(page.moment_indices)} moments linked, " f"quality={page.source_quality}", ) + # Citation coverage reporting + cit = validate_citations(sections, len(page.moment_indices)) + _log( + "CITE", + f" Citations: {cit['total_citations']}/{len(page.moment_indices)} moments cited " + f"({cit['coverage_pct']}% coverage)" + + (f", invalid indices: {cit['invalid_indices']}" if cit['invalid_indices'] else "") + + (f", uncited: {cit['uncited_moments']}" if cit['uncited_moments'] else ""), + ) + all_pages.append(page.model_dump()) # Summary @@ -454,7 +471,11 @@ def main() -> int: "metadata": { "page_count": len(pages), "total_words": sum( - sum(len(str(v).split()) for v in p.get("body_sections", {}).values()) + sum( + len(s.get("content", "").split()) + + sum(len(sub.get("content", "").split()) for sub in s.get("subsections", [])) + for s in p.get("body_sections", []) + ) for p in pages ), "elapsed_seconds": round(time.monotonic() - overall_start, 1), diff --git a/backend/pipeline/test_harness_v2_format.py b/backend/pipeline/test_harness_v2_format.py new file mode 100644 index 0000000..6713752 --- /dev/null +++ b/backend/pipeline/test_harness_v2_format.py @@ -0,0 +1,213 @@ +"""Tests for test_harness compatibility with v2 body_sections format. + +Validates that word-counting and citation integration work correctly +with the list[BodySection] structure (v2) instead of the old dict format. +""" + +from __future__ import annotations + +import pytest + +from pipeline.citation_utils import validate_citations +from pipeline.schemas import BodySection, BodySubSection, SynthesizedPage, SynthesisResult + + +# ── Helpers ────────────────────────────────────────────────────────────────── + + +def _make_page( + body_sections: list[BodySection], + moment_indices: list[int] | None = None, + title: str = "Test Page", + slug: str = "test-page", +) -> SynthesizedPage: + return SynthesizedPage( + title=title, + slug=slug, + topic_category="Testing", + summary="A test page.", + body_sections=body_sections, + moment_indices=moment_indices or [], + ) + + +def _count_words_v2(sections: list[BodySection]) -> int: + """Replicate the word-counting logic from the updated test_harness.""" + return sum( + len(s.content.split()) + sum(len(sub.content.split()) for sub in s.subsections) + for s in sections + ) + + +def _count_words_metadata(pages_dicts: list[dict]) -> int: + """Replicate the metadata total_words logic (operates on dicts after model_dump).""" + return sum( + sum( + len(s.get("content", "").split()) + + sum(len(sub.get("content", "").split()) for sub in s.get("subsections", [])) + for s in p.get("body_sections", []) + ) + for p in pages_dicts + ) + + +# ── Word counting tests ───────────────────────────────────────────────────── + + +class TestWordCounting: + def test_flat_sections_no_subsections(self): + sections = [ + BodySection(heading="Intro", content="one two three"), + BodySection(heading="Details", content="four five"), + ] + assert _count_words_v2(sections) == 5 + + def test_sections_with_subsections(self): + sections = [ + BodySection( + heading="Main", + content="alpha beta", # 2 words + subsections=[ + BodySubSection(heading="Sub A", content="gamma delta epsilon"), # 3 words + BodySubSection(heading="Sub B", content="zeta"), # 1 word + ], + ), + ] + assert _count_words_v2(sections) == 6 + + def test_empty_sections_list(self): + assert _count_words_v2([]) == 0 + + def test_section_with_empty_content(self): + sections = [ + BodySection(heading="Empty", content=""), + ] + # "".split() returns [], len([]) == 0 + assert _count_words_v2(sections) == 0 + + def test_metadata_word_count_matches(self): + """Metadata total_words (from model_dump dicts) matches Pydantic object counting.""" + sections = [ + BodySection( + heading="H2", + content="one two three", + subsections=[ + BodySubSection(heading="H3", content="four five six seven"), + ], + ), + BodySection(heading="Another", content="eight nine"), + ] + page = _make_page(sections, moment_indices=[0, 1]) + pages_dicts = [page.model_dump()] + + assert _count_words_v2(sections) == 9 + assert _count_words_metadata(pages_dicts) == 9 + + +# ── Section/subsection counting ───────────────────────────────────────────── + + +class TestSectionCounting: + def test_section_and_subsection_counts(self): + sections = [ + BodySection(heading="A", content="text", subsections=[ + BodySubSection(heading="A.1", content="sub text"), + ]), + BodySection(heading="B", content="more text"), + BodySection(heading="C", content="even more", subsections=[ + BodySubSection(heading="C.1", content="sub1"), + BodySubSection(heading="C.2", content="sub2"), + ]), + ] + section_count = len(sections) + subsection_count = sum(len(s.subsections) for s in sections) + assert section_count == 3 + assert subsection_count == 3 + + +# ── Citation integration ───────────────────────────────────────────────────── + + +class TestCitationIntegration: + def test_full_coverage(self): + sections = [ + BodySection(heading="Intro", content="First point [0]. Second point [1]."), + BodySection(heading="Details", content="More on [0] and [2]."), + ] + result = validate_citations(sections, moment_count=3) + assert result["valid"] is True + assert result["coverage_pct"] == 100.0 + assert result["invalid_indices"] == [] + assert result["uncited_moments"] == [] + + def test_partial_coverage(self): + sections = [ + BodySection(heading="Intro", content="Only cites [0]."), + ] + result = validate_citations(sections, moment_count=3) + assert result["valid"] is False + assert result["coverage_pct"] == pytest.approx(33.3, abs=0.1) + assert result["uncited_moments"] == [1, 2] + + def test_invalid_index(self): + sections = [ + BodySection(heading="Bad", content="Cites [0] and [99]."), + ] + result = validate_citations(sections, moment_count=2) + assert result["invalid_indices"] == [99] + + def test_citations_in_subsections(self): + sections = [ + BodySection( + heading="Main", + content="See [0].", + subsections=[ + BodySubSection(heading="Sub", content="Also [1] and [2]."), + ], + ), + ] + result = validate_citations(sections, moment_count=3) + assert result["valid"] is True + assert result["total_citations"] == 3 + + def test_multi_citation_markers(self): + sections = [ + BodySection(heading="X", content="Both sources agree [0,1]."), + ] + result = validate_citations(sections, moment_count=2) + assert result["valid"] is True + assert result["total_citations"] == 2 + + def test_no_sections(self): + result = validate_citations([], moment_count=0) + assert result["valid"] is True + assert result["coverage_pct"] == 0.0 + + +# ── End-to-end: SynthesisResult with v2 body_sections ─────────────────────── + + +class TestSynthesisResultV2: + def test_round_trip_model_dump(self): + """SynthesisResult with v2 body_sections round-trips through model_dump/validate.""" + sections = [ + BodySection( + heading="Overview", + content="This technique [0] is fundamental.", + subsections=[ + BodySubSection(heading="Key Concept", content="Detail [1]."), + ], + ), + ] + page = _make_page(sections, moment_indices=[0, 1]) + result = SynthesisResult(pages=[page]) + + dumped = result.model_dump() + restored = SynthesisResult.model_validate(dumped) + + assert len(restored.pages) == 1 + restored_page = restored.pages[0] + assert len(restored_page.body_sections) == 1 + assert restored_page.body_sections[0].heading == "Overview" + assert len(restored_page.body_sections[0].subsections) == 1 + assert restored_page.body_sections_format == "v2"