diff --git a/.gsd/milestones/M014/M014-ROADMAP.md b/.gsd/milestones/M014/M014-ROADMAP.md index 0ce660c..14a13dc 100644 --- a/.gsd/milestones/M014/M014-ROADMAP.md +++ b/.gsd/milestones/M014/M014-ROADMAP.md @@ -9,7 +9,7 @@ Restructure technique pages to be broader (per-creator+category across videos), | S01 | Synthesis Prompt v5 — Nested Sections + Citations | high | — | ✅ | Run test harness with new prompt → output has list-of-objects body_sections with H2/H3 nesting, citation markers on key claims, broader page scope. | | S02 | Composition Prompt + Test Harness Compose Mode | high | S01 | ✅ | Run test harness --compose mode with existing page + new moments → merged output with deduplication, new sections, updated citations. | | S03 | Data Model + Migration | low | — | ✅ | Alembic migration runs clean. API response includes body_sections_format and source_videos fields. | -| S04 | Pipeline Compose-or-Create Logic | high | S01, S02, S03 | ⬜ | Process two COPYCATT videos. Second video's moments composed into existing page. technique_page_videos has both video IDs. | +| S04 | Pipeline Compose-or-Create Logic | high | S01, S02, S03 | ✅ | Process two COPYCATT videos. Second video's moments composed into existing page. technique_page_videos has both video IDs. | | S05 | Frontend — Nested Rendering, TOC, Citations | medium | S03 | ⬜ | Format-2 page renders with TOC, nested sections, clickable citations. Format-1 pages unchanged. | | S06 | Admin UI — Multi-Source Pipeline Management | medium | S03, S04 | ⬜ | Admin view for multi-source page shows source dropdown, composition history, per-video chunking inspection. | | S07 | Search — Per-Section Embeddings + Deep Linking | medium | S04, S05 | ⬜ | Search 'LFO grain position' → section-level result → click → navigates to page#section and scrolls. | diff --git a/.gsd/milestones/M014/slices/S04/S04-SUMMARY.md b/.gsd/milestones/M014/slices/S04/S04-SUMMARY.md new file mode 100644 index 0000000..691f746 --- /dev/null +++ b/.gsd/milestones/M014/slices/S04/S04-SUMMARY.md @@ -0,0 +1,93 @@ +--- +id: S04 +parent: M014 +milestone: M014 +provides: + - Compose-or-create branching in stage5_synthesis + - body_sections_format='v2' on all technique pages + - TechniquePageVideo join table populated for every page+video combination +requires: + - slice: S01 + provides: Synthesis prompt v5 with nested sections + citations format + - slice: S02 + provides: Compose prompt + test harness validation + - slice: S03 + provides: Data model with body_sections_format column and TechniquePageVideo table +affects: + - S06 + - S07 +key_files: + - backend/pipeline/stages.py + - backend/pipeline/test_compose_pipeline.py +key_decisions: + - Compose detection queries all matching pages and warns on multiple matches, uses first + - pg_insert with on_conflict_do_nothing for idempotent TechniquePageVideo inserts + - Source-code assertions for branching logic tests instead of fragile full-session mocks +patterns_established: + - Compose-or-create branching: query existing pages by creator_id + LOWER(category), compose into first match, fall through to standard synthesis otherwise + - XML-tagged compose prompt with offset-indexed moments: existing [0]-[N-1], new [N]-[N+M-1] +observability_surfaces: + - INFO log when compose path triggered: 'Stage 5: Composing into existing page ...' + - WARNING log when multiple pages match creator+category +drill_down_paths: + - .gsd/milestones/M014/slices/S04/tasks/T01-SUMMARY.md + - .gsd/milestones/M014/slices/S04/tasks/T02-SUMMARY.md +duration: "" +verification_result: passed +completed_at: 2026-04-03T01:34:24.402Z +blocker_discovered: false +--- + +# S04: Pipeline Compose-or-Create Logic + +**Stage 5 now detects existing technique pages by creator+category and branches to a compose path that merges new video content into them, with body_sections_format='v2' and TechniquePageVideo tracking on all pages.** + +## What Happened + +Two tasks delivered the compose-or-create pipeline logic and its test suite. + +T01 added `_build_compose_user_prompt()` and `_compose_into_existing()` helper functions to stages.py, then wired compose detection into the stage5_synthesis per-category loop. The detection query uses `creator_id + LOWER(topic_category)` for case-insensitive matching. When an existing page matches, the compose path loads its linked moments, builds an XML-tagged prompt with offset-indexed moment references, and calls the LLM with the stage5_compose.txt system prompt. If no match, the existing chunked synthesis path runs unchanged. All pages now get `body_sections_format='v2'` set unconditionally, and a `TechniquePageVideo` row is inserted via `pg_insert` with `on_conflict_do_nothing` for idempotency. + +T02 created 12 unit tests across 4 test classes: prompt construction (5 tests for XML structure, offset indices, empty existing moments, page JSON serialization, moment content), branching logic (3 tests using source-code assertions + focused mocks), format/tracking (3 tests for v2 flag, pg_insert usage, INSERT values), and case sensitivity (1 test verifying func.lower on both sides of the category comparison). All 12 pass. + +## Verification + +All slice-level verification checks pass: +1. `PYTHONPATH=backend python -c "from pipeline.stages import _build_compose_user_prompt, _compose_into_existing; print('imports OK')"` → exit 0 +2. `grep -q 'body_sections_format' backend/pipeline/stages.py` → exit 0 +3. `grep -q 'TechniquePageVideo' backend/pipeline/stages.py` → exit 0 +4. `grep -q 'stage5_compose' backend/pipeline/stages.py` → exit 0 +5. `PYTHONPATH=backend python -m pytest backend/pipeline/test_compose_pipeline.py -v` → 12 passed in 1.50s + +## Requirements Advanced + +- R012 — Stage 5 now composes new video content into existing technique pages instead of overwriting, fulfilling the incremental update aspect of R012 + +## Requirements Validated + +None. + +## New Requirements Surfaced + +None. + +## Requirements Invalidated or Re-scoped + +None. + +## Deviations + +Used `default=str` in `json.dumps()` for page serialization in `_build_compose_user_prompt()` to handle UUID/datetime fields — not in plan but necessary for robustness. T02 replaced integration-level branching tests with source-code structure assertions + focused unit tests due to session mock fragility. + +## Known Limitations + +None. + +## Follow-ups + +None. + +## Files Created/Modified + +- `backend/pipeline/stages.py` — Added _build_compose_user_prompt(), _compose_into_existing(), compose-or-create branching in stage5_synthesis, body_sections_format='v2' setting, TechniquePageVideo insertion +- `backend/pipeline/test_compose_pipeline.py` — New file: 12 unit tests covering compose prompt construction, branching logic, format tracking, and case-insensitive matching diff --git a/.gsd/milestones/M014/slices/S04/S04-UAT.md b/.gsd/milestones/M014/slices/S04/S04-UAT.md new file mode 100644 index 0000000..371e6d1 --- /dev/null +++ b/.gsd/milestones/M014/slices/S04/S04-UAT.md @@ -0,0 +1,67 @@ +# S04: Pipeline Compose-or-Create Logic — UAT + +**Milestone:** M014 +**Written:** 2026-04-03T01:34:24.402Z + +## UAT: Pipeline Compose-or-Create Logic + +### Preconditions +- PostgreSQL running with current schema (body_sections_format column on technique_pages, technique_page_videos table) +- Stage 5 compose prompt file exists at expected path (stage5_compose.txt) +- At least one creator with processed video data in the database + +### Test 1: Fresh synthesis (no existing page) +**Steps:** +1. Process a video for a creator+category combination that has no existing technique page +2. Check the resulting technique_pages row + +**Expected:** +- Standard synthesis path runs (no compose log message) +- `body_sections_format = 'v2'` on the created page +- `technique_page_videos` row exists linking the page to the source video + +### Test 2: Compose into existing page +**Steps:** +1. Process a second video by the same creator with moments in the same topic category as Test 1 +2. Check API logs for compose detection +3. Query technique_pages for that creator+category + +**Expected:** +- Log message: `Stage 5: Composing into existing page '
tags. Create TableOfContents component that takes v2 sections and renders nested anchor list. Parse [N] and [N,M] citation markers in content strings and replace with links to #km-{momentId}. Add all CSS (TOC styles, subsection styles, citation link superscript). Update snapshotToOverlay to pass through list-format body_sections.
+ - Estimate: 2h
+ - Files: frontend/src/api/public-client.ts, frontend/src/pages/TechniquePage.tsx, frontend/src/components/TableOfContents.tsx, frontend/src/utils/citations.tsx, frontend/src/App.css
+ - Verify: cd frontend && npm run build
+- [ ] **T02: Deploy to ub01 and verify v1 backward compatibility** — Push changes to ub01, rebuild the web container, and verify that existing v1 technique pages render correctly with no regressions. Since no v2 pages exist in production yet (S04 hasn't populated any), v2 rendering is verified structurally via the TypeScript build. This task confirms the live deployment works.
+ - Estimate: 30m
+ - Files: frontend/src/pages/TechniquePage.tsx
+ - Verify: ssh ub01 'cd /vmPool/r/repos/xpltdco/chrysopedia && git pull && docker compose build chrysopedia-web-8096 && docker compose up -d chrysopedia-web-8096' && sleep 5 && curl -s -o /dev/null -w '%{http_code}' http://ub01:8096/
diff --git a/.gsd/milestones/M014/slices/S05/S05-RESEARCH.md b/.gsd/milestones/M014/slices/S05/S05-RESEARCH.md
new file mode 100644
index 0000000..d1f6670
--- /dev/null
+++ b/.gsd/milestones/M014/slices/S05/S05-RESEARCH.md
@@ -0,0 +1,97 @@
+# S05 Research: Frontend — Nested Rendering, TOC, Citations
+
+## Summary
+
+Straightforward frontend work. The v2 data model is well-defined (S01/S03), the existing TechniquePage component has clear seams for modification, and no new libraries are needed. The main work is: detect format version → render v2 sections with H2/H3 nesting → add a TOC → make `[N]` citation markers clickable links to key moments.
+
+## Recommendation
+
+Light research — known patterns, established codebase. Three tasks: (1) update TypeScript types + build format-aware rendering, (2) add TOC component, (3) wire citation markers to key moment anchors. All in existing files plus one new component.
+
+## Implementation Landscape
+
+### Current State
+
+**TechniquePage.tsx** (~350 lines) fetches technique by slug, supports version switching, renders body_sections with `Object.entries()` treating it as `Record `
+
+3. **TOC component** (new, e.g. `components/TableOfContents.tsx`):
+ - Takes v2 sections array, generates anchor IDs from headings (slugify)
+ - Renders nested list: H2 entries with indented H3 sub-entries
+ - Each item is an `` that smooth-scrolls
+ - Add `id` attributes to rendered H2/H3 elements for anchor targets
+ - Position: inside `.technique-columns__main`, above the prose, or as a sticky sidebar element
+
+4. **Citation rendering**:
+ - Parse `[N]` and `[N,M]` patterns in content strings
+ - Replace with `[N]`
+ - Need the key_moments array to map index → moment ID
+ - Superscript styling for citation markers
+
+5. **CSS additions** (`App.css`):
+ - `.technique-toc` — TOC container, possibly sticky
+ - `.technique-toc__item`, `.technique-toc__sub-item` — nested list styles
+ - `.technique-prose__subsection` — H3 sections
+ - `.citation-link` — superscript, accent-colored, hover underline
+ - Section anchor scroll offset (account for sticky nav)
+
+### Slug Generation for Section IDs
+
+Need a simple `toSlug()` utility: lowercase, replace spaces/special chars with hyphens, strip duplicates. No existing utility in the codebase. Can be a 3-line function inline or in `utils/`.
+
+### Version Overlay Consideration
+
+The `snapshotToOverlay()` function extracts `body_sections` from historical versions. It currently casts to `Record tags. Create TableOfContents component that takes v2 sections and renders nested anchor list. Parse [N] and [N,M] citation markers in content strings and replace with links to #km-{momentId}. Add all CSS (TOC styles, subsection styles, citation link superscript). Update snapshotToOverlay to pass through list-format body_sections.
+
+## Inputs
+
+- ``frontend/src/api/public-client.ts` — current TechniquePageDetail interface missing body_sections_format and source_videos`
+- ``frontend/src/pages/TechniquePage.tsx` — current v1-only body_sections renderer and snapshotToOverlay function`
+- ``frontend/src/App.css` — existing technique-prose styles to extend`
+
+## Expected Output
+
+- ``frontend/src/api/public-client.ts` — updated with BodySectionV2, BodySubSectionV2 interfaces, widened body_sections type, body_sections_format field, source_videos field`
+- ``frontend/src/pages/TechniquePage.tsx` — format-aware renderer (v1 dict vs v2 list), citation rendering in prose, TOC integration, updated snapshotToOverlay`
+- ``frontend/src/components/TableOfContents.tsx` — new TOC component with nested anchor links and slug-based IDs`
+- ``frontend/src/utils/citations.tsx` — parseCitations function that converts [N] and [N,M] markers to React elements with anchor links`
+- ``frontend/src/App.css` — new styles for .technique-toc, .technique-prose__subsection, .citation-link, section scroll-margin-top`
+
+## Verification
+
+cd frontend && npm run build
diff --git a/.gsd/milestones/M014/slices/S05/tasks/T01-SUMMARY.md b/.gsd/milestones/M014/slices/S05/tasks/T01-SUMMARY.md
new file mode 100644
index 0000000..c7b4fa4
--- /dev/null
+++ b/.gsd/milestones/M014/slices/S05/tasks/T01-SUMMARY.md
@@ -0,0 +1,84 @@
+---
+id: T01
+parent: S05
+milestone: M014
+provides: []
+requires: []
+affects: []
+key_files: ["frontend/src/api/public-client.ts", "frontend/src/pages/TechniquePage.tsx", "frontend/src/components/TableOfContents.tsx", "frontend/src/utils/citations.tsx", "frontend/src/App.css"]
+key_decisions: ["Subsection IDs use compound slugs (sectionSlug--subSlug) to avoid collisions", "TOC uses CSS counters for numbered references", "Invalid citation indices render as plain text"]
+patterns_established: []
+drill_down_paths: []
+observability_surfaces: []
+duration: ""
+verification_result: "Build verification: `cd frontend && npm run build` exits 0, producing 56 modules with no TypeScript or Vite errors."
+completed_at: 2026-04-03T01:42:45.503Z
+blocker_discovered: false
+---
+
+# T01: Added format-aware v2 body_sections rendering with nested TOC, citation anchor links, and subsection styles while preserving v1 dict rendering unchanged
+
+> Added format-aware v2 body_sections rendering with nested TOC, citation anchor links, and subsection styles while preserving v1 dict rendering unchanged
+
+## What Happened
+---
+id: T01
+parent: S05
+milestone: M014
+key_files:
+ - frontend/src/api/public-client.ts
+ - frontend/src/pages/TechniquePage.tsx
+ - frontend/src/components/TableOfContents.tsx
+ - frontend/src/utils/citations.tsx
+ - frontend/src/App.css
+key_decisions:
+ - Subsection IDs use compound slugs (sectionSlug--subSlug) to avoid collisions
+ - TOC uses CSS counters for numbered references
+ - Invalid citation indices render as plain text
+duration: ""
+verification_result: passed
+completed_at: 2026-04-03T01:42:45.504Z
+blocker_discovered: false
+---
+
+# T01: Added format-aware v2 body_sections rendering with nested TOC, citation anchor links, and subsection styles while preserving v1 dict rendering unchanged
+
+**Added format-aware v2 body_sections rendering with nested TOC, citation anchor links, and subsection styles while preserving v1 dict rendering unchanged**
+
+## What Happened
+
+Updated TechniquePageDetail TypeScript types with body_sections_format, source_videos, and BodySectionV2/BodySubSectionV2 interfaces. Built format-aware rendering in TechniquePage.tsx: v2 array format renders TableOfContents + H2 sections with slugified IDs + H3 subsections, all with citation parsing. V1 dict rendering unchanged. Created TableOfContents component with CSS-counter-numbered nested anchor links. Created parseCitations utility that converts [N] and [N,M] markers to superscript anchor links targeting key moment IDs. Updated snapshotToOverlay for v2 format passthrough. Added CSS for TOC card, subsection borders, citation superscripts, and scroll-margin-top on anchored sections.
+
+## Verification
+
+Build verification: `cd frontend && npm run build` exits 0, producing 56 modules with no TypeScript or Vite errors.
+
+## Verification Evidence
+
+| # | Command | Exit Code | Verdict | Duration |
+|---|---------|-----------|---------|----------|
+| 1 | `cd frontend && npm run build` | 0 | ✅ pass | 3500ms |
+
+
+## Deviations
+
+None.
+
+## Known Issues
+
+None.
+
+## Files Created/Modified
+
+- `frontend/src/api/public-client.ts`
+- `frontend/src/pages/TechniquePage.tsx`
+- `frontend/src/components/TableOfContents.tsx`
+- `frontend/src/utils/citations.tsx`
+- `frontend/src/App.css`
+
+
+## Deviations
+None.
+
+## Known Issues
+None.
diff --git a/.gsd/milestones/M014/slices/S05/tasks/T02-PLAN.md b/.gsd/milestones/M014/slices/S05/tasks/T02-PLAN.md
new file mode 100644
index 0000000..aa41c94
--- /dev/null
+++ b/.gsd/milestones/M014/slices/S05/tasks/T02-PLAN.md
@@ -0,0 +1,25 @@
+---
+estimated_steps: 1
+estimated_files: 1
+skills_used: []
+---
+
+# T02: Deploy to ub01 and verify v1 backward compatibility
+
+Push changes to ub01, rebuild the web container, and verify that existing v1 technique pages render correctly with no regressions. Since no v2 pages exist in production yet (S04 hasn't populated any), v2 rendering is verified structurally via the TypeScript build. This task confirms the live deployment works.
+
+## Inputs
+
+- ``frontend/src/api/public-client.ts` — updated types from T01`
+- ``frontend/src/pages/TechniquePage.tsx` — updated renderer from T01`
+- ``frontend/src/components/TableOfContents.tsx` — new component from T01`
+- ``frontend/src/utils/citations.tsx` — new utility from T01`
+- ``frontend/src/App.css` — updated styles from T01`
+
+## Expected Output
+
+- ``frontend/src/pages/TechniquePage.tsx` — verified working in production (no changes, just deployment verification)`
+
+## Verification
+
+ssh ub01 'cd /vmPool/r/repos/xpltdco/chrysopedia && git pull && docker compose build chrysopedia-web-8096 && docker compose up -d chrysopedia-web-8096' && sleep 5 && curl -s -o /dev/null -w '%{http_code}' http://ub01:8096/
diff --git a/frontend/src/App.css b/frontend/src/App.css
index 4d334df..dbc1c7d 100644
--- a/frontend/src/App.css
+++ b/frontend/src/App.css
@@ -1968,6 +1968,129 @@ a.app-footer__repo:hover {
line-height: 1.5;
}
+/* ── Table of Contents ────────────────────────────────────────────────────── */
+
+.technique-toc {
+ background: var(--color-bg-surface);
+ border: 1px solid var(--color-border);
+ border-radius: 0.5rem;
+ padding: 1rem 1.25rem;
+ margin-bottom: 1.5rem;
+}
+
+.technique-toc__title {
+ font-size: 0.8125rem;
+ font-weight: 600;
+ text-transform: uppercase;
+ letter-spacing: 0.04em;
+ color: var(--color-text-secondary);
+ margin-bottom: 0.75rem;
+}
+
+.technique-toc__list {
+ list-style: none;
+ padding: 0;
+ margin: 0;
+ counter-reset: toc-section;
+}
+
+.technique-toc__item {
+ counter-increment: toc-section;
+ margin-bottom: 0.25rem;
+}
+
+.technique-toc__link {
+ color: var(--color-accent);
+ text-decoration: none;
+ font-size: 0.875rem;
+ line-height: 1.6;
+}
+
+.technique-toc__link::before {
+ content: counter(toc-section) ". ";
+ color: var(--color-text-muted);
+}
+
+.technique-toc__link:hover {
+ text-decoration: underline;
+}
+
+.technique-toc__sublist {
+ list-style: none;
+ padding-left: 1.25rem;
+ margin: 0.125rem 0 0.25rem;
+ counter-reset: toc-sub;
+}
+
+.technique-toc__subitem {
+ counter-increment: toc-sub;
+}
+
+.technique-toc__sublink {
+ color: var(--color-text-secondary);
+ text-decoration: none;
+ font-size: 0.8125rem;
+ line-height: 1.6;
+}
+
+.technique-toc__sublink::before {
+ content: counter(toc-section) "." counter(toc-sub) " ";
+ color: var(--color-text-muted);
+}
+
+.technique-toc__sublink:hover {
+ color: var(--color-accent);
+ text-decoration: underline;
+}
+
+/* ── V2 subsections ───────────────────────────────────────────────────────── */
+
+.technique-prose__subsection {
+ margin-left: 0.75rem;
+ margin-bottom: 1rem;
+ padding-left: 0.75rem;
+ border-left: 2px solid var(--color-border);
+}
+
+.technique-prose__subsection h3 {
+ font-size: 1.0625rem;
+ font-weight: 600;
+ margin-bottom: 0.375rem;
+ color: var(--color-text-primary);
+}
+
+.technique-prose__subsection p {
+ font-size: 0.9375rem;
+ color: var(--color-text-primary);
+ line-height: 1.7;
+}
+
+/* ── Citation links ───────────────────────────────────────────────────────── */
+
+.citation-group {
+ font-size: 0.75em;
+ line-height: 1;
+ vertical-align: super;
+}
+
+.citation-link {
+ color: var(--color-accent);
+ text-decoration: none;
+ font-weight: 600;
+ cursor: pointer;
+}
+
+.citation-link:hover {
+ text-decoration: underline;
+}
+
+/* ── Scroll margin for section anchors ────────────────────────────────────── */
+
+.technique-prose__section[id],
+.technique-prose__subsection[id] {
+ scroll-margin-top: 5rem;
+}
+
/* ── Key moments list ─────────────────────────────────────────────────────── */
.technique-moments {
diff --git a/frontend/src/api/public-client.ts b/frontend/src/api/public-client.ts
index 43ef801..c5bf25c 100644
--- a/frontend/src/api/public-client.ts
+++ b/frontend/src/api/public-client.ts
@@ -56,6 +56,24 @@ export interface RelatedLinkItem {
reason: string;
}
+export interface BodySubSectionV2 {
+ heading: string;
+ content: string;
+}
+
+export interface BodySectionV2 {
+ heading: string;
+ content: string;
+ subsections: BodySubSectionV2[];
+}
+
+export interface SourceVideoSummary {
+ id: string;
+ filename: string;
+ content_type: string;
+ added_at: string | null;
+}
+
export interface TechniquePageDetail {
id: string;
title: string;
@@ -63,7 +81,8 @@ export interface TechniquePageDetail {
topic_category: string;
topic_tags: string[] | null;
summary: string | null;
- body_sections: Record {content as string} {String(content as string)} {parseCitations(section.content, technique.key_moments)} {parseCitations(sub.content, technique.key_moments)} {content as string} {String(content as string)}{sectionTitle}
- {typeof content === "string" ? (
-
- {JSON.stringify(content, null, 2)}
-
- ) : (
- {section.heading}
+ {section.content && (
+ {sub.heading}
+ {sub.content && (
+ {sectionTitle}
+ {typeof content === "string" ? (
+
+ {JSON.stringify(content, null, 2)}
+
+ ) : (
+