From 415f4541058151259d11b0a344b8550e9dd76006 Mon Sep 17 00:00:00 2001 From: jlightner Date: Fri, 3 Apr 2026 09:43:37 +0000 Subject: [PATCH] =?UTF-8?q?chore:=20remove=202,367=20lines=20of=20dead=20c?= =?UTF-8?q?ode=20=E2=80=94=20orphaned=20CSS,=20unused=20imports,=20stale?= =?UTF-8?q?=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deleted files: - generate_stage5_variants.py (874 lines) — superseded by pipeline.quality toolkit - PROJECT_CONTEXT.md (461 lines) — stale, .gsd/PROJECT.md is the living doc - CHRYSOPEDIA-ASSESSMENT.md (654 lines) — M011 triage artifact, all findings actioned CSS cleanup (364 lines): - 20 orphaned block groups from deleted review queue/old components - Duplicate .btn base rule, .btn--warning, @keyframes stagePulse Python imports: - routers/pipeline.py: uuid, literal_column, over, text - tests/test_pipeline.py: 9 unused imports (PropertyMock, create_engine, etc.) Build verified: tsc --noEmit clean, npm run build clean (59 modules, 0 warnings). --- CHRYSOPEDIA-ASSESSMENT.md | 654 ------------------------ PROJECT_CONTEXT.md | 461 ----------------- backend/routers/pipeline.py | 3 - backend/tests/test_pipeline.py | 14 +- frontend/src/App.css | 364 -------------- generate_stage5_variants.py | 874 --------------------------------- 6 files changed, 3 insertions(+), 2367 deletions(-) delete mode 100644 CHRYSOPEDIA-ASSESSMENT.md delete mode 100644 PROJECT_CONTEXT.md delete mode 100644 generate_stage5_variants.py diff --git a/CHRYSOPEDIA-ASSESSMENT.md b/CHRYSOPEDIA-ASSESSMENT.md deleted file mode 100644 index bc85a35..0000000 --- a/CHRYSOPEDIA-ASSESSMENT.md +++ /dev/null @@ -1,654 +0,0 @@ -# UI/UX Assessment: Chrysopedia -## Assessment Date: March 31, 2026 -## Assessed By: Claude (UI/UX Assessor Agent) -## Scope: Full public-facing application — Home, Topics, Creators, Technique detail, Search, About -## Method: Live browser navigation with desktop (1280×800) and mobile (390×844) viewports - ---- - -## 1. Product Context Summary - -Chrysopedia is a **structured knowledgebase for electronic music production techniques**, built by extracting and distilling long-form video tutorials into searchable, categorized text content. The core value proposition — "Skip the 4-hour videos, find the insight you need in seconds" — is communicated clearly on the homepage. Content is organized along three axes: **Topics** (hierarchical categories like Sound Design > Bass), **Creators** (individual producers whose tutorials have been processed), and **Techniques** (the atomic content units combining narrative text, key moments with timestamps, signal chains, and related items). The product targets the niche intersection of electronic music producers who learn from YouTube tutorials but want faster access to specific techniques. Currently at v0.8.0 with 5 creators, ~175 techniques, and 8 top-level topic categories spanning the full production pipeline from Workflow through Mastering. - ---- - -## 2. Journey Inventory - -### Journey 1: "Find a technique I know I want" -| Attribute | Detail | -|-----------|--------| -| **Entry point** | Homepage search bar (auto-focused on load) | -| **Steps** | 1. Type keyword → 2. See autocomplete (5 results) → 3. Click result OR "See all results" → 4. Read technique page | -| **Step count** | 2–3 clicks | -| **Friction** | Low — autocomplete is responsive, results show type badges (TECHNIQUE) and creator names | -| **Drop-off risk** | Low — search is the primary CTA and works well | -| **Unhappy path** | No "no results" state was tested; unclear what happens with misspellings or vague queries | - -### Journey 2: "Browse by topic — I know the category" -| Attribute | Detail | -|-----------|--------| -| **Entry point** | Nav → Topics, or homepage Topics card | -| **Steps** | 1. Click Topics → 2. Scan categories → 3. Click sub-topic → 4. See techniques grouped by creator → 5. Click technique | -| **Step count** | 3–4 clicks | -| **Friction** | Medium — Topics page is dense; all categories are expanded by default showing all sub-topics simultaneously. Filter input exists but may go unnoticed | -| **Drop-off risk** | Medium — wall-of-text feeling; no visual differentiation between categories beyond icons | -| **Decision points** | Choosing between categories requires understanding the taxonomy (e.g., "Sound Design" vs "Synthesis" vs "Mixing" — distinctions unclear for beginners) | - -### Journey 3: "Explore by creator — I follow this producer" -| Attribute | Detail | -|-----------|--------| -| **Entry point** | Nav → Creators | -| **Steps** | 1. Click Creators → 2. See creator list with genre filters → 3. Click creator → 4. See technique list → 5. Click technique | -| **Step count** | 3–4 clicks | -| **Friction** | Low — clean layout, genre pill filters, sort options (Random/A-Z/Views) | -| **Drop-off risk** | Low for users who know a creator name; higher for discovery since only 5 creators exist currently | -| **Discoverability** | Good — breadcrumbs present on creator detail page ("← Creators") | - -### Journey 4: "I'm a beginner — where do I start?" -| Attribute | Detail | -|-----------|--------| -| **Entry point** | Homepage | -| **Steps** | 1. Read homepage → 2. ??? | -| **Step count** | Unknown — **no guided beginner path exists** | -| **Friction** | **High** — homepage assumes the user knows what to search for. "Start Exploring" button links to Topics, which presents an expert-level taxonomy. No "Start Here" guide, no skill-level filtering, no curated learning paths | -| **Drop-off risk** | **Critical** — this is the primary gap for the stated goal of serving beginners | -| **Discoverability** | The 3-step "how it works" cards on the homepage explain the *platform*, not the *content*. A beginner doesn't know what "FM synthesis" or "gain staging" means | - -### Journey 5: "Discover something interesting (leisure browsing)" -| Attribute | Detail | -|-----------|--------| -| **Entry point** | Homepage → Featured Technique or Recently Added | -| **Steps** | 1. Scroll homepage → 2. See featured technique → 3. Click through → 4. Read → 5. Follow "Related Techniques" | -| **Step count** | 2–3 clicks | -| **Friction** | Medium — Featured Technique section is nice but only shows one. Recently Added shows 4. No "surprise me" or "random technique" feature. Related Techniques at bottom of detail pages create a browsable chain but require scrolling past the full article | -| **Drop-off risk** | Medium — limited discovery surfaces mean the browse loop runs dry quickly | - -### Journey 6: "Find the exact video moment a technique comes from" -| Attribute | Detail | -|-----------|--------| -| **Entry point** | Technique detail page → Key Moments sidebar | -| **Steps** | 1. Navigate to technique → 2. Read Key Moments with timestamps → 3. ??? | -| **Step count** | 2 steps, then dead end | -| **Friction** | **High** — timestamps are shown (e.g., "18:07 – 18:26") but there is **no link to the source video**. The user can see the video title but cannot click through to YouTube at the timestamp | -| **Drop-off risk** | High — this is a core promise unfulfilled. The source attribution is present but not actionable | - ---- - -## 3. Heuristic Scorecard - -| # | Dimension | Rating | Justification | -|---|-----------|--------|---------------| -| 3.1 | **First Impression & Orientation** | 3 — Adequate | The landing page clearly communicates *what* Chrysopedia does ("Production Knowledge, Distilled") and the 3-step value prop is effective. However, the dark theme with cyan accents, while aesthetically consistent, creates a somewhat intimidating "developer tool" feeling rather than an inviting creative learning environment. The search bar is auto-focused (good), but there is no guidance for users who don't know what to search for. | -| 3.2 | **Navigation & Wayfinding** | 4 — Good | Three-item top nav (Home / Topics / Creators) is clean and understandable. Breadcrumbs appear on topic drill-downs and creator detail pages. Active nav state is visually indicated. The "← Back" links on technique pages are helpful. However, there's no global search in the nav bar (only on homepage), and no way to get back to search results after clicking into a technique. | -| 3.3 | **Layout, Visual Hierarchy & Typography** | 3 — Adequate | Consistent dark theme with clear card-based layout. Technique cards show category badges, tags, and preview text effectively. However: heading hierarchy is broken (two H1 elements per page — the nav logo and the page title); body text contrast is borderline (rgb(139,139,154) on the dark background is ~4.2:1, just at the WCAG AA threshold); the technique detail page splits into a content column and Key Moments sidebar without clear visual separation. | -| 3.4 | **Interaction Design & Feedback** | 3 — Adequate | Search autocomplete works well with immediate feedback. Tag pills and category badges are clickable and lead to relevant filtered views. However: no loading states or skeleton screens observed; page transitions are instant (SSG) which is good; hover states on cards are subtle (border color change) and could be more pronounced; the "Hide sub-topics ▲" toggles on the Topics page work but the animation is absent — the content simply appears/disappears. | -| 3.5 | **Form Design & Data Entry** | 3 — Adequate | Only two input fields exist: homepage search and topics filter. Both function correctly. Search has a clear placeholder, a visible search button, a clear (×) button, and autocomplete. However, the search input on the results page loses focus after submission, and there's no keyboard shortcut (e.g., "/" to focus search). | -| 3.6 | **Error Handling & Recovery** | 2 — Deficient | No error states were observed or testable on the public site. The "Report issue with this page" link at the bottom of technique pages is good but could be more prominent. Empty states were not encountered, but the Counterpoint sub-topic under Music Theory shows "0 techniques · 0 creators" with no helpful message. No 404 page was tested. | -| 3.7 | **Content Quality & Microcopy** | 4 — Good | Technique descriptions are detailed, well-structured, and genuinely educational. The writing is clear and uses appropriate domain terminology without being inaccessible. Key Moments provide excellent summaries with timestamps. Tag taxonomy is consistent. However: button labels could be more specific ("Start Exploring" → what exactly?); the "Admin ▾" dropdown in the nav appears for all users and adds confusion. | -| 3.8 | **Accessibility (WCAG Baseline)** | 2 — Deficient | Multiple issues: (1) No skip-to-content link; (2) Duplicate H1 elements on every page (nav H1 + page H1); (3) Heading hierarchy skips (H1 → H3 on homepage, skipping H2 for the value prop cards); (4) Body/subtitle text color contrast borderline at ~4.2:1; (5) `lang="en"` is correctly set; (6) No images on the site (good — no alt text issues); (7) Keyboard navigation untested but tab order appears logical; (8) Focus indicators not visually checked but browser defaults should apply. | -| 3.9 | **Responsive Design & Device Adaptation** | 4 — Good | Mobile layout adapts well: nav items wrap horizontally (not hamburger menu), cards stack vertically, search input and button stack correctly. Topics page columns become single-column on mobile. Technique detail page stacks the Key Moments sidebar below the content on mobile. However: the nav wrapping means all items (Home, Topics, Creators, Admin ▾) are visible but cramped on mobile; a hamburger menu would be cleaner at mobile widths. | -| 3.10 | **Performance Perception** | 5 — Excellent | Pages load near-instantly (static site generation). No layout shifts observed. Search autocomplete appears with no perceptible delay. Route transitions are fast. This is a strength — the site *feels* snappy. | -| 3.11 | **Trust & Credibility** | 3 — Adequate | The site looks polished and intentional. Open-source on GitHub adds transparency. Creator attribution is consistent. Version number in footer (v0.8.0) signals active development but also "not finished." The "Admin ▾" dropdown in the nav for all visitors undermines professional polish. No broken links observed. | -| 3.12 | **Emotional Design & Polish** | 2 — Deficient | This is the largest opportunity area. The site is functionally competent but emotionally flat. There are zero moments of delight, no micro-interactions, no personality. The dark theme is utilitarian, not atmospheric. For a music production audience — people who are creative, aesthetic-minded, and accustomed to visually rich tools like Ableton, Serum, and YouTube — the current design feels like a developer's side project rather than a creative tool. No sound, no visualization, no interactivity beyond search. The "Featured Technique" section could be a moment of delight but is styled identically to every other card. | - -**Overall Heuristic Average: 3.2 / 5 — Adequate with notable gaps** - ---- - -## 4. Detailed Findings - -### Critical & High Priority - ---- - -### F01 — No Guided Entry Point for Beginners - -**Severity:** Critical -**Dimension:** 3.1 — First Impression & Orientation -**Journey affected:** Journey 4 (beginner orientation) -**Location:** Homepage - -**Observation:** -The homepage assumes users know what to search for. The "Start Exploring" button links to the Topics page, which presents an expert-level taxonomy (Workflow, Music Theory, Sound Design, Synthesis, Arrangement, Mixing, Mastering). A beginner who doesn't know what "gain staging" or "FM synthesis" means has no entry point. There are no skill-level filters, no "Start Here" guide, no curated learning paths. - -**Impact:** -Beginners — a primary target audience per the project goal — will bounce. They cannot form a search query because they don't have the vocabulary. The Topics taxonomy makes sense to producers who already know the production pipeline, but is meaningless to someone who just downloaded their first DAW. - -**Recommendation:** -Add a **"New to Production?"** section on the homepage with 3-5 curated starting paths: -- "Making Your First Beat" → curated techniques about drums, basic arrangement -- "Understanding Sound Design" → fundamental synthesis techniques -- "Getting a Clean Mix" → EQ and compression basics - -Additionally, add **skill-level tags** to techniques (Beginner / Intermediate / Advanced) so the Topics and Search views can be filtered by experience level. - -**Effort estimate:** Medium -**Dependencies:** Requires content curation and potentially new metadata on technique records - ---- - -### F02 — Key Moments Timestamps Are Not Linked to Source Videos - -**Severity:** High -**Dimension:** 3.4 — Interaction Design & Feedback -**Journey affected:** Journey 6 (find the video moment) -**Location:** Technique detail page — Key Moments sidebar - -**Observation:** -Key Moments display the source video title and timestamps (e.g., "1:12 – 1:31") but these are not clickable links to the source YouTube video. The video title is truncated and not linked. The user can see *which* video the technique came from but cannot watch the specific segment. - -**Impact:** -The platform's value proposition is "skip the 4-hour videos." But users who want to *see* the technique demonstrated — rather than just read about it — are dead-ended. This undermines the "distilled from creator tutorials" narrative: if you can't get back to the source, the distillation feels like a one-way door. - -**Recommendation:** -Make each Key Moment's video title a link to the YouTube video at the specific timestamp (using `?t=` parameter). Format: `https://youtube.com/watch?v={id}&t={start_seconds}`. Optionally embed a small video player preview. At minimum, the video title should be a clickable link. - -**Effort estimate:** Small (if video URLs and timestamps are in the data) -**Dependencies:** Requires source video URLs in the technique data model - ---- - -### F03 — "Admin ▾" Dropdown Visible to All Users - -**Severity:** High -**Dimension:** 3.11 — Trust & Credibility -**Journey affected:** All journeys -**Location:** Global navigation bar - -**Observation:** -The top-right navigation includes an "Admin ▾" dropdown button visible to all visitors, not just authenticated administrators. This exposes the internal tooling surface to public users. - -**Impact:** -For regular users, this creates confusion ("Am I supposed to be an admin?") and undermines the professional polish of the product. It signals "this is a development prototype" rather than a finished product. If the dropdown contains functional admin actions, it's also a potential security concern. - -**Recommendation:** -Hide the Admin dropdown behind authentication. If the site has no auth system, use a URL parameter or local storage flag (`?admin=true` or `localStorage.chrysopedia_admin`) to enable the admin UI only when needed. - -**Effort estimate:** Small -**Dependencies:** None - ---- - -### F04 — Emotional Flatness — No Moments of Delight - -**Severity:** High -**Dimension:** 3.12 — Emotional Design & Polish -**Journey affected:** All journeys, especially Journey 5 (leisure browsing) -**Location:** Site-wide - -**Observation:** -The site is functional but emotionally inert. There are no micro-interactions (hover animations, transition effects, visual feedback), no personality in the design, no music-production-related visual metaphors or motifs. The dark theme is uniform dark navy (#0f0f14) with cyan (#22d3ee) accents — utilitarian, not atmospheric. Card hover states are a barely perceptible border color change. Page transitions are instant with no visual continuity. The "Featured Technique" section looks identical to every other content card. - -**Impact:** -The stated goal is to make the site "more fun or enticing to use." Electronic music producers are an aesthetic-driven audience accustomed to rich, visual tools. The current design creates no desire to return, no browsing enjoyment, no sense of discovery or play. It's a reference database that reads like one, when it could read like a creative exploration tool. - -**Recommendation:** -Implement a progressive approach: -1. **Phase 1 (Quick wins):** Add hover animations on cards (subtle scale transform + shadow), smooth scroll behavior, staggered card entrance animations on page load, a pulsing/breathing effect on the featured technique card -2. **Phase 2 (Atmospheric):** Add a subtle waveform or frequency visualization to the header area, use gradient backgrounds that shift by topic category (warm for drums, cool for synthesis), add audio waveform-inspired decorative elements -3. **Phase 3 (Interactive):** Add a "Random Technique" button with a slot-machine animation, add a "Technique of the Day" with a unique visual treatment, consider playable audio snippets on technique pages where relevant - -**Effort estimate:** Medium (Phase 1), Large (Phase 2-3) -**Dependencies:** F01 (beginner paths would benefit from the same visual treatment) - ---- - -### F05 — Topics Page Information Overload - -**Severity:** High -**Dimension:** 3.2 — Navigation & Wayfinding -**Journey affected:** Journey 2 (browse by topic) -**Location:** /topics - -**Observation:** -The Topics page loads with all 8 categories fully expanded, displaying every sub-topic simultaneously. This creates a wall of ~50+ line items across 2 columns, requiring significant scrolling to see all categories. The "Filter topics..." input at the top could help, but it's easy to miss and its behavior is unclear (does it filter categories? sub-topics? both?). The "Hide sub-topics ▲" toggles exist but default to open. - -**Impact:** -Users seeking a specific sub-topic must scan a large, undifferentiated list. The information density overwhelms the browsing intent. Beginners especially will be intimidated by the sheer number of technical terms presented simultaneously. - -**Recommendation:** -1. **Default to collapsed categories** — show only the 8 top-level categories with their icons, descriptions, and technique/sub-topic counts. Let users expand the ones they're interested in. -2. **Add visual differentiation** — use distinct colors or gradient accents per category (synth = purple, drums = orange, etc.) to create visual anchors for scanning. -3. **Make the filter input more prominent** — add a descriptive label ("Filter by topic name"), increase its width, and show a live count of matching results. - -**Effort estimate:** Small (collapse default), Medium (visual differentiation) -**Dependencies:** None - ---- - -### Medium Priority - ---- - -### F06 — No Search in Global Navigation - -**Severity:** Medium -**Dimension:** 3.2 — Navigation & Wayfinding -**Journey affected:** Journey 1 (find a known technique) -**Location:** Navigation bar (all pages except homepage) - -**Observation:** -The search bar only appears on the homepage hero section and the search results page. When a user is on the Topics, Creators, or Technique detail pages, there is no way to search without navigating back to the homepage. - -**Impact:** -Users who want to search from any page must click "Home" first, breaking their flow. This is a common pattern in knowledgebase sites but creates unnecessary friction, especially for the core use case of "find answers fast." - -**Recommendation:** -Add a compact search input (or a search icon that expands into an input) in the navigation bar across all pages. Alternatively, support a keyboard shortcut (Cmd+K or "/") to open a global search modal. - -**Effort estimate:** Medium -**Dependencies:** None - ---- - -### F07 — Heading Hierarchy and Semantic HTML Issues - -**Severity:** Medium -**Dimension:** 3.8 — Accessibility -**Journey affected:** Screen reader users on all pages -**Location:** All pages - -**Observation:** -Every page contains two H1 elements (the "Chrysopedia" logo in the nav and the page title). The homepage skips from H1 directly to H3 for the "How it works" cards, omitting H2 entirely. On the technique detail page, the heading structure is H1 → H2 (technique title) → H3 (section headings), which is correct, but the nav H1 still duplicates. - -**Impact:** -Screen reader users rely on heading hierarchy for navigation. Multiple H1 elements and skipped heading levels create a confusing document outline. This is a WCAG 2.1 Level A violation (1.3.1 Info and Relationships). - -**Recommendation:** -1. Change the nav "Chrysopedia" to a styled `` or `

` instead of H1 -2. Fix homepage heading hierarchy: the "Production Knowledge, Distilled" should be the single H1, the value prop card titles should be H2 -3. Audit all pages for heading level consistency - -**Effort estimate:** Small -**Dependencies:** None - ---- - -### F08 — Missing Skip-to-Content Link - -**Severity:** Medium -**Dimension:** 3.8 — Accessibility -**Journey affected:** Keyboard and screen reader users on all pages -**Location:** All pages - -**Observation:** -There is no skip-to-content link that allows keyboard users to bypass the navigation and jump directly to the main content area. - -**Impact:** -Keyboard users must tab through all navigation items on every page load before reaching the content. This is particularly tedious on pages with many links (Topics has 58 links, Creator detail has 56). - -**Recommendation:** -Add a visually hidden skip link as the first focusable element: `` with CSS that shows it only on focus. - -**Effort estimate:** Trivial -**Dependencies:** None - ---- - -### F09 — Subtitle/Body Text Contrast on Dark Theme - -**Severity:** Medium -**Dimension:** 3.8 — Accessibility -**Journey affected:** All journeys -**Location:** All pages — subtitle text, card descriptions, and metadata - -**Observation:** -Secondary text uses color `rgb(139, 139, 154)` (#8B8B9A) on the dark background. Against the page background of approximately #0f0f14, this yields a contrast ratio of approximately 4.2:1 — just barely meeting WCAG AA for normal text (4.5:1 required) and likely failing under some calculations. The text appears dim and difficult to read, particularly on the homepage description and technique card previews. - -**Impact:** -Users in non-ideal lighting conditions, those with low vision, or those on monitors with lower contrast ratios may struggle to read secondary content — which includes technique descriptions, tag labels, and metadata that are essential for navigation decisions. - -**Recommendation:** -Increase the secondary text color to at least `rgb(160, 160, 180)` (#A0A0B4) for a ~5.5:1 ratio, or better yet `rgb(180, 180, 195)` (#B4B4C3) for ~6.5:1. The current aesthetic can be preserved while improving readability. - -**Effort estimate:** Trivial -**Dependencies:** None - ---- - -### F10 — No "Surprise Me" or Random Discovery Feature - -**Severity:** Medium -**Dimension:** 3.12 — Emotional Design & Polish -**Journey affected:** Journey 5 (leisure browsing) -**Location:** Homepage, potentially global - -**Observation:** -For a knowledgebase with 175+ techniques, there is no serendipitous discovery mechanism. The "Featured Technique" shows one article. "Recently Added" shows four. The Creators page has a "Random" sort, but there's no global "show me something interesting" feature. - -**Impact:** -The stated goal includes "high value leisure paths." Currently, the only leisure path is linear browsing through topic hierarchies or scrolling through a creator's technique list. There's no mechanism to surprise, delight, or introduce users to techniques they wouldn't have searched for. - -**Recommendation:** -1. Add a **"Random Technique" button** on the homepage (floating or in the header) that navigates to a randomly selected technique -2. Add a **"Technique of the Day"** feature that highlights a different technique each day with unique visual treatment -3. On technique detail pages, add a **"Show me another like this"** button that uses tag similarity to suggest an unexpected related technique -4. Consider a **"Discovery Mode"** that shows a carousel or tinder-style card interface for casual browsing - -**Effort estimate:** Small (random button), Medium (technique of the day), Large (discovery mode) -**Dependencies:** None - ---- - -### F11 — Creator Stats Line Poorly Formatted - -**Severity:** Medium -**Dimension:** 3.3 — Layout, Visual Hierarchy & Typography -**Journey affected:** Journey 3 (explore by creator) -**Location:** Creator detail page (e.g., /creators/copycatt) - -**Observation:** -Below the creator name, the stats are displayed as a single unpunctuated string: "12 videos Sound design: 13 Workflow: 10 Mixing: 10 Synthesis: 5 Music Theory: 5 Sound Design: 3 Arrangement: 3". This is hard to scan and parse visually. It appears as a run-on sentence rather than structured data. - -**Impact:** -Users cannot quickly understand a creator's content distribution across topics. The visual weight of this line is lost because it reads as a paragraph rather than structured metadata. - -**Recommendation:** -Format the stats as visual pills or a small bar chart. For example: -- Use topic-colored badges: `[Sound Design: 13] [Workflow: 10] [Mixing: 10]` -- Or a horizontal bar chart showing relative content volume per category -- Separate "12 videos" from the topic breakdown with a visual divider - -**Effort estimate:** Small -**Dependencies:** None - ---- - -### F12 — Mobile Nav Should Use Hamburger Menu - -**Severity:** Medium -**Dimension:** 3.9 — Responsive Design & Device Adaptation -**Journey affected:** All mobile journeys -**Location:** Navigation bar on mobile viewport - -**Observation:** -On mobile (390px wide), the navigation items (Home, Topics, Creators, Admin ▾) display inline and wrap to a second row. While functional, this consumes vertical space and looks cramped. The "Admin ▾" button is particularly problematic on mobile — it adds a non-user-facing element to already constrained space. - -**Impact:** -Mobile users see a cluttered header. The nav items are small and close together, potentially causing mis-taps. This is a polish issue rather than a functional blocker, but it affects first impressions on mobile. - -**Recommendation:** -Implement a hamburger menu for viewports below 768px. The expanded menu should display items with generous touch targets (minimum 44×44px). This also makes it easy to hide the Admin option on mobile. - -**Effort estimate:** Medium -**Dependencies:** F03 (Admin visibility) - ---- - -### Low Priority / Enhancements - ---- - -### F13 — No Page-Specific Titles - -**Severity:** Low -**Dimension:** 3.8 — Accessibility / SEO -**Journey affected:** All journeys -**Location:** Browser tab title on all pages - -**Observation:** -Every page has the same title: "Chrysopedia". The Topics page, Creators page, individual technique pages, and About page all show the same browser tab title. This makes it impossible to distinguish tabs and hurts SEO. - -**Impact:** -Users with multiple Chrysopedia tabs cannot differentiate them. Search engines cannot distinguish pages from the title alone. Browser history shows identical entries. - -**Recommendation:** -Use descriptive page titles: "Bass — Sound Design — Chrysopedia", "COPYCATT — Chrysopedia", "FM Bass Foundation by COPYCATT — Chrysopedia", etc. - -**Effort estimate:** Trivial -**Dependencies:** None - ---- - -### F14 — Empty Topic Sub-categories Show No Guidance - -**Severity:** Low -**Dimension:** 3.6 — Error Handling & Recovery -**Journey affected:** Journey 2 (browse by topic) -**Location:** Topics page — e.g., Counterpoint (0 techniques · 0 creators), Physical Modeling (0 techniques · 0 creators) - -**Observation:** -Some sub-topics have zero techniques and zero creators but are still listed without any context. Users who click into these will find nothing. - -**Impact:** -Minor — these are likely placeholders for future content. But they set expectations that cannot be met and clutter the topic list. - -**Recommendation:** -Either hide empty sub-topics by default (with a "Show all including empty" toggle), or add a subtle "Coming soon" badge. If clicked, the empty topic page should say "No techniques yet — check back soon" rather than an empty list. - -**Effort estimate:** Trivial -**Dependencies:** None - ---- - -### F15 — "Start Exploring" Button Label is Vague - -**Severity:** Low -**Dimension:** 3.7 — Content Quality & Microcopy -**Journey affected:** Journey 2, Journey 4 -**Location:** Homepage, About page - -**Observation:** -The primary CTA below the value proposition cards says "Start Exploring" but links to the Topics page. The label doesn't tell the user what they'll find when they click. - -**Impact:** -Minor — users will click and see Topics. But a more specific label could set better expectations and serve different user types. - -**Recommendation:** -Consider context-aware CTAs: -- "Browse All Topics" (descriptive) -- "Find Your First Technique" (beginner-oriented) -- Or split into two CTAs: "Browse Topics" / "Search for Something Specific" - -**Effort estimate:** Trivial -**Dependencies:** F01 (if beginner path is added, this button could link there) - ---- - -### F16 — Tag Overflow on Technique Cards - -**Severity:** Low -**Dimension:** 3.3 — Layout, Visual Hierarchy & Typography -**Journey affected:** Journey 2, Journey 3 -**Location:** Technique cards on search results, topic pages, and creator pages - -**Observation:** -Some technique cards (e.g., "Drum Synthesis Fundamentals by COPYCATT") display 8+ tag pills, causing them to wrap to multiple lines and pushing the preview text far down the card. The tag density varies significantly between techniques, creating uneven card heights in the grid. - -**Impact:** -Visual inconsistency makes scanning harder. Cards with many tags feel cluttered compared to cards with 2-3 tags. - -**Recommendation:** -Limit visible tags to 3-4, with a "+N more" indicator that reveals the full list on hover or click. Alternatively, show only the primary category tag on cards and reveal full tags on the detail page. - -**Effort estimate:** Small -**Dependencies:** None - ---- - -## 5. Strategic Summary & Development Handoff - -### Overall Assessment - -Chrysopedia is a **solid content platform with excellent fundamentals** — the content quality is genuinely high, the information architecture is well-structured, search works well, and performance is excellent. The site successfully delivers on its core promise of distilling long-form tutorials into searchable, structured knowledge. - -However, the site currently optimizes for **expert users who already know what they're looking for** while neglecting the discovery and delight dimensions that would make it compelling for beginners and casual browsers. The aesthetic is competent but emotionally flat — it reads as a database interface rather than a creative learning environment. For an audience of music producers who are accustomed to rich, visual, and interactive tools, the current experience is unlikely to generate the kind of engagement and return visits that grow a community. - -**The single most impactful area for improvement is adding guided entry points for beginners and discovery mechanisms for casual browsers.** The content is the product's greatest asset; the primary gap is surfacing that content to users who don't yet know how to find it. - ---- - -### Top 5 Priorities - -#### Priority 1: Beginner Onboarding & Learning Paths (F01) - -**Objective:** Give users who don't know music production terminology a clear starting point. -**Success criteria:** A first-time visitor with zero production knowledge can find and read their first relevant technique within 60 seconds. - -##### Subtasks: -1. Define 3-5 beginner learning paths with curated technique selections (e.g., "Your First Beat", "Understanding Synthesis", "Mixing Basics") -2. Create a "New to Production?" section on the homepage above the fold (or as a prominent secondary CTA alongside search) -3. Add skill-level metadata (beginner/intermediate/advanced) to technique records -4. Build a learning path page template that sequences techniques in pedagogical order -5. Add skill-level filter pills to Topics and Search views - -##### Acceptance criteria: -- [ ] Homepage has a visible "New to Production?" section or CTA -- [ ] At least 3 curated learning paths are accessible from the homepage -- [ ] Each path sequences 5-10 techniques in learning order -- [ ] Skill-level filters appear on Topics and Search result pages - -##### Context for implementing agent: -The site appears to be built with a static site generator (SSG). Learning paths could be implemented as a new content type in the data model. Technique cards already support metadata tags — skill level could be an additional categorical tag. The homepage layout uses a grid of cards, so a "New to Production?" section could follow the same component pattern. - ---- - -#### Priority 2: Source Video Deep Links on Key Moments (F02) - -**Objective:** Make Key Moments actionable by linking timestamps to source YouTube videos. -**Success criteria:** Every Key Moment timestamp is a clickable link that opens the source video at the correct time. - -##### Subtasks: -1. Audit the data model to confirm source video URLs and timestamps are available -2. Add YouTube link construction (video_id + timestamp → `?t=` parameter) -3. Render each Key Moment video title as a link to YouTube at the start timestamp -4. Add a small YouTube icon or "Watch" affordance next to the timestamp -5. Consider adding an embedded YouTube player that auto-seeks to the relevant timestamp - -##### Acceptance criteria: -- [ ] Every Key Moment timestamp links to the source YouTube video at the correct time -- [ ] Links open in a new tab -- [ ] Video title is the link text, styled as a clickable element - -##### Context for implementing agent: -Key Moments currently show a video title (e.g., "Sound Design - Everything In 2 Hours Speedr...") and timestamps. The video URL likely exists in the data pipeline since these are sourced from YouTube. The implementation is a template change on the technique detail page, specifically in the Key Moments sidebar component. - ---- - -#### Priority 3: Visual Delight & Atmosphere (F04) - -**Objective:** Transform the site from a utilitarian database into an engaging creative learning environment. -**Success criteria:** Users describe the site as "fun to browse" and spend more time per session. - -##### Subtasks: -1. Add card hover animations (scale: 1.02, box-shadow increase, 200ms ease transition) -2. Add staggered entrance animations for card grids on page load (fade-in-up, 50ms delay between cards) -3. Redesign the "Featured Technique" section with a larger, visually distinct treatment (gradient border, glow effect, or full-bleed layout) -4. Add smooth scroll behavior globally -5. Implement subtle category-colored accents (topic category → color mapping for borders, badges, and highlights) -6. Add a "Random Technique" button with a dice icon in the nav or hero area -7. Consider a waveform or frequency visualization motif in the header/hero background - -##### Acceptance criteria: -- [ ] Cards animate on hover with smooth transitions -- [ ] Card grids use staggered entrance animations -- [ ] Featured Technique has a visually distinct, premium treatment -- [ ] "Random Technique" button exists and functions -- [ ] At least 3 distinct category colors are used across the site - -##### Context for implementing agent: -The site uses a dark theme with CSS custom properties. Card components use border styling that can be enhanced with transitions. Animations can be implemented with CSS `@keyframes` and `animation-delay` calculated from card index. The category color system could use a CSS custom property map (`--cat-synthesis: #8b5cf6; --cat-mixing: #3b82f6;` etc.). - ---- - -#### Priority 4: Collapse Topics & Improve Navigation (F05, F06) - -**Objective:** Reduce Topics page overwhelm and add global search. -**Success criteria:** Topics page loads in an unintimidating collapsed state; search is accessible from every page. - -##### Subtasks: -1. Change Topics page to default-collapsed categories (click to expand) -2. Add expand/collapse animation (slide-down, ~200ms) -3. Add a search input to the global navigation bar (compact style, visible on all pages) -4. Support Cmd+K keyboard shortcut to focus the search bar -5. Style each topic category card with a distinct accent color for visual differentiation -6. Make the topic filter input more prominent with a label and live count - -##### Acceptance criteria: -- [ ] Topics page loads with all categories collapsed -- [ ] Clicking a category smoothly expands its sub-topics -- [ ] Search input is available in the nav bar on all pages -- [ ] Cmd+K or "/" keyboard shortcut focuses the search input - -##### Context for implementing agent: -The Topics page currently renders all categories with their sub-topics expanded. The toggle mechanism exists ("Hide sub-topics ▲") so the infrastructure is there — it just needs the default state flipped. The global search could be a simplified version of the homepage search component, placed in the nav bar with responsive sizing. - ---- - -#### Priority 5: Accessibility Fixes (F07, F08, F09, F13) - -**Objective:** Bring the site to WCAG 2.1 Level AA compliance on core accessibility metrics. -**Success criteria:** All automated accessibility checks pass (axe-core, Lighthouse accessibility). - -##### Subtasks: -1. Fix heading hierarchy: single H1 per page, sequential heading levels -2. Add skip-to-content link -3. Increase secondary text contrast ratio to ≥ 4.5:1 -4. Add page-specific `` elements -5. Hide Admin dropdown from public users -6. Run axe-core audit and fix any additional findings - -##### Acceptance criteria: -- [ ] Each page has exactly one H1 element -- [ ] Heading levels are sequential (no skips) -- [ ] Skip-to-content link is present and functional -- [ ] All text meets WCAG AA contrast ratios (4.5:1 for normal text, 3:1 for large text) -- [ ] Browser tab titles are page-specific -- [ ] Lighthouse Accessibility score ≥ 90 - -##### Context for implementing agent: -The nav component uses an H1 for the "Chrysopedia" logo — change to a styled `<span>` or `<div>`. Page titles need to be set in the `<head>` per-route. The skip link is a single `<a>` element with a CSS `.sr-only` class that becomes visible on focus. Text contrast fix is a single CSS custom property change. - ---- - -### Quick Wins (Trivial/Small Effort) - -1. **F08 — Add skip-to-content link** (Trivial) — one HTML element + 5 lines CSS -2. **F13 — Add page-specific titles** (Trivial) — template change per route -3. **F09 — Increase text contrast** (Trivial) — CSS color value change -4. **F15 — Improve "Start Exploring" label** (Trivial) — text change -5. **F14 — Handle empty sub-topics** (Trivial) — conditional display -6. **F03 — Hide Admin dropdown from public** (Small) — conditional rendering -7. **F07 — Fix heading hierarchy** (Small) — semantic HTML changes -8. **F11 — Format creator stats** (Small) — component redesign -9. **F16 — Limit visible tags on cards** (Small) — component change - ---- - -### Systemic Patterns - -1. **Expert-first design bias.** The site consistently assumes users already know what they want. Search assumes vocabulary. Topics assumes taxonomy knowledge. There is no scaffolding for beginners. This is the single most impactful systemic issue. - -2. **Content-rich, experience-poor.** The content quality is genuinely excellent — detailed, educational, well-structured. But the presentation treats every piece of content identically. There's no visual hierarchy between "featured" and "ordinary" content, no emotional differentiation between categories, no design system that celebrates the content. - -3. **Missing feedback loops.** Users consume content but have no way to signal what was useful, save favorites, track what they've read, or build a personal learning path. The experience is purely consumptive with no personalization or progression signals. - -4. **Accessibility as an afterthought.** Multiple WCAG issues (heading hierarchy, contrast, skip links, page titles) suggest accessibility was not part of the development process. These are individually small fixes but collectively indicate a need for an accessibility-first development practice going forward. - ---- - -## Appendix A: Pages Traversed - -| Page | URL | Key Observations | -|------|-----|------------------| -| Homepage | / | Clear value prop, search works well, lacks beginner guidance | -| Topics | /topics | Dense, all expanded by default, good taxonomy but overwhelming | -| Creators | /creators | Clean, good genre filters, "Random" sort is nice | -| Creator Detail | /creators/copycatt | Good back navigation, stats line needs formatting | -| Technique Detail | /techniques/fm-bass-foundation-copycatt | Two-column layout, Key Moments excellent but not linked | -| Technique Detail (dense) | /techniques/drum-synthesis-fundamentals-copycatt | 28 key moments, content-heavy, good related techniques section | -| Search Results | /search?q=bass | Clean results, 20 results for "bass", good metadata on cards | -| Topic Drill-down | /topics/sound-design/bass | Good breadcrumbs, grouped by creator, clean layout | -| About | /about | Clear explanation, nice pipeline visualization | -| Homepage (mobile) | / (390px) | Layout adapts but nav is cramped | -| Topics (mobile) | /topics (390px) | Single column works well | - -## Appendix B: Technical Notes - -- **Framework:** Static Site Generator (pages load instantly, no client-side routing observed) -- **Version:** v0.8.0 (Build 2026-03-31) -- **Content volume:** ~175 techniques, 5 creators, 8 topic categories, ~50 sub-topics -- **Accessibility issues found:** Duplicate H1, heading level skips, missing skip link, borderline contrast on secondary text, no page-specific titles -- **Performance:** Excellent — no measurable lag on any interaction -- **No images found** on the site (all content is text-based) -- **Footer links:** GitHub (source code), About page — good transparency diff --git a/PROJECT_CONTEXT.md b/PROJECT_CONTEXT.md deleted file mode 100644 index aa1335c..0000000 --- a/PROJECT_CONTEXT.md +++ /dev/null @@ -1,461 +0,0 @@ -# Chrysopedia — Project Context Document -> Auto-generated: 2026-04-01 | Assessed Stage: **Integration/Stabilization** | Root: `/home/aux/projects/content-to-kb-automator` - -## Overview - -Chrysopedia is a **self-hosted knowledge extraction and retrieval system for electronic music production content**. It takes raw video files (tutorials, livestreams, track breakdowns) from 50+ electronic music producers, transcribes them via Whisper, runs them through a multi-stage LLM pipeline to extract structured knowledge, and serves the results through a search-first web UI designed for mid-session retrieval — a producer Alt+Tabs from their DAW, searches for a technique, absorbs the answer, and gets back to work in under 30 seconds. - -**Audience:** Electronic music producers, primarily one power user (the project owner) with a personal library of 100-500 video files. Single-admin tool, not multi-tenant. - -**Project type:** Full-stack web application with an LLM-powered data pipeline. Monorepo with backend (Python/FastAPI), frontend (React/TypeScript), Whisper transcription script, Docker Compose deployment, and prompt engineering toolkit. - -**Evidence for purpose:** Extensive 37-page spec (`chrysopedia-spec.md`), README with architecture diagrams, detailed PROJECT.md in GSD artifacts, 23 decisions logged, 32 requirements tracked (28 validated, 1 active, 4 out-of-scope). Etymology: *chrysopoeia* (alchemical transmutation) + *encyclopedia*. - -**Canonical development directory:** This is **not** the active development location. Per `CLAUDE.md`, all future development happens on `ub01` at `/vmPool/r/repos/xpltdco/chrysopedia`. This directory was the initial workspace. GitHub: `github.com/xpltdco/chrysopedia` (private, xpltdco org). - ---- - -## Architecture & Stack - -### Technology Stack - -| Layer | Technology | Version/Notes | -|-------|-----------|---------------| -| **Backend** | Python 3.12, FastAPI, SQLAlchemy (async), Pydantic Settings | API + business logic | -| **Task Queue** | Celery + Redis (broker + result backend) | Sync tasks, concurrency=1 | -| **Database** | PostgreSQL 16 (asyncpg driver) | Primary data store | -| **Vector DB** | Qdrant v1.13.2 | Semantic search embeddings | -| **Embeddings** | Ollama (nomic-embed-text, 768-dim) | Local CPU inference | -| **LLM** | OpenAI-compatible API (DGX Sparks Qwen primary, Ollama fallback) | Per-stage model routing (chat vs thinking) | -| **Frontend** | React 18.3, TypeScript 5.6, Vite 6, React Router 6.28 | Zero UI libraries — all custom CSS | -| **Web Server** | nginx 1.27 (Alpine) | SPA routing + API proxy | -| **Containerization** | Docker Compose | 8 services, dedicated bridge network | -| **Deployment** | ub01 (on-premises server) | Bind mounts to `/vmPool/r/services/chrysopedia_*` | -| **Reverse Proxy** | nginx on nuc01 (separate machine) | Routes `chrysopedia.xpltd.co` → ub01:8096 | - -### System Architecture - -``` -Desktop (GPU workstation — hal0022) - └── whisper/transcribe.py → JSON transcripts → SCP/rsync to /watch folder - -Docker Compose on ub01 (8 services on 172.32.0.0/24): - ┌─────────────┐ ┌───────┐ ┌────────┐ ┌────────┐ - │ PostgreSQL │ │ Redis │ │ Qdrant │ │ Ollama │ - │ :5433→5432 │ │ broker│ │ vector │ │ embed │ - └──────┬───────┘ └───┬───┘ └───┬────┘ └───┬────┘ - └──────┬───────┴──────────┴────────────┘ - │ - ┌─────────────┼────────────────────────────────┐ - │ FastAPI API │ Celery Worker │ Watcher │ - │ REST + admin │ LLM pipeline │ /watch→POST │ - └──────────────┴─────────────────┴──────────────┘ - │ - ┌─────────────┴──────┐ - │ nginx (React SPA) │ - │ :8096→80 │ - └────────────────────┘ -``` - -**Data flow:** Video → Whisper transcript JSON → Watcher POSTs to `/api/v1/ingest` → Celery pipeline (4 LLM stages: segment → extract → classify → synthesize) → KeyMoments + TechniquePages in PostgreSQL → Embeddings in Qdrant → Search-first web UI. - -**External integrations:** -- OpenWebUI at `chat.forgetyour.name` (DGX Sparks Qwen models for LLM inference) -- AdGuard DNS on ub01 for internal domain resolution -- nginx on nuc01 for external HTTPS termination (via Certbot) - -### Data Model - -11 entities across 11 tables: - -| Entity | Purpose | Key Fields | -|--------|---------|------------| -| **Creator** | Artists/producers | name, slug, genres[], folder_name, hidden | -| **SourceVideo** | Processed video files | filename, content_hash (dedup), processing_status, classification_data (JSONB) | -| **TranscriptSegment** | Whisper output rows | start_time, end_time, text, segment_index, topic_label | -| **KeyMoment** | LLM-extracted insights | title, summary, start_time, end_time, content_type, plugins[] | -| **TechniquePage** | Synthesized knowledge (primary output) | title, slug, topic_category, topic_tags[], body_sections (JSONB), signal_chains (JSONB), plugins[] | -| **TechniquePageVersion** | Pre-overwrite snapshots | content_snapshot (JSONB), pipeline_metadata (JSONB), version_number | -| **RelatedTechniqueLink** | Cross-references | source→target, relationship type | -| **Tag** | Topic taxonomy | name, category, aliases[] | -| **ContentReport** | User-reported issues | report_type, status, admin_notes | -| **PipelineRun** | Pipeline execution record | video_id, run_number, trigger, status, total_tokens | -| **PipelineEvent** | Per-stage execution log | stage, event_type, token counts, payload (JSONB), debug I/O columns | - -**Relationships:** Creator → SourceVideo → TranscriptSegment, KeyMoment; Creator → TechniquePage → KeyMoment, TechniquePageVersion, RelatedTechniqueLink; SourceVideo → PipelineRun → PipelineEvent. - -**Migrations:** 11 Alembic migrations (001 through 011), covering initial schema through pipeline runs and classification cache additions. - ---- - -## Project Structure - -``` -chrysopedia/ -├── backend/ # FastAPI application (10,209 LOC Python) -│ ├── main.py # App entry, middleware, router mounting -│ ├── config.py # Pydantic Settings (all env vars) -│ ├── database.py # Async engine + session factory -│ ├── models.py # 11 SQLAlchemy ORM models -│ ├── schemas.py # Pydantic request/response schemas (422 lines) -│ ├── worker.py # Celery app config -│ ├── watcher.py # Folder monitor → auto-ingest service -│ ├── search_service.py # Async semantic + keyword search (603 lines) -│ ├── redis_client.py # Redis client for feature flags -│ ├── routers/ # 9 API router modules -│ │ ├── health.py, ingest.py, search.py, techniques.py -│ │ ├── creators.py, topics.py, videos.py -│ │ ├── pipeline.py (admin), reports.py -│ ├── pipeline/ # LLM pipeline core (2,908 LOC) -│ │ ├── stages.py # 4 LLM stages + orchestrator (2,102 lines — largest file) -│ │ ├── llm_client.py # OpenAI-compatible sync client with fallback -│ │ ├── embedding_client.py # Sync embedding client for Celery -│ │ ├── qdrant_client.py # Qdrant upsert + collection management -│ │ ├── schemas.py # Pipeline data schemas -│ │ └── quality/ # Prompt optimization toolkit (2,507 LOC) -│ │ ├── fitness.py # LLM fitness test suite (9 tests) -│ │ ├── scorer.py # 5-dimension LLM-as-judge scoring -│ │ ├── optimizer.py # Automated prompt A/B optimization -│ │ ├── variant_generator.py # LLM-powered prompt mutation -│ │ └── voice_dial.py # Voice preservation dial -│ └── tests/ # Integration tests (2,754 LOC, 65 tests) -├── frontend/ # React SPA (9,975 LOC TypeScript + CSS) -│ └── src/ -│ ├── pages/ # 10 page components -│ ├── components/ # 9 shared components -│ ├── hooks/ # 2 custom hooks -│ ├── api/ # Typed API client -│ └── App.css # 4,871 lines — all styles (no CSS framework) -├── whisper/ # Desktop transcription scripts -├── prompts/ # 3 active prompt templates + 100 stage5 variants -├── alembic/ # 11 database migrations -├── config/ # canonical_tags.yaml (7-category topic taxonomy) -├── docker/ # Dockerfile.api, Dockerfile.web, nginx.conf -├── docker-compose.yml # 8-service stack definition -├── generate_stage5_variants.py # Stage 5 prompt variant generator (874 lines — one-off tool) -├── .gsd/ # GSD project management artifacts -│ ├── PROJECT.md, REQUIREMENTS.md, DECISIONS.md, KNOWLEDGE.md -│ └── milestones/ # 13 completed milestone artifacts -└── .env.example # Environment variable template -``` - -**Entry points:** -- `backend/main.py` → FastAPI app (`uvicorn main:app`) -- `backend/worker.py` → Celery worker (`celery -A worker worker`) -- `backend/watcher.py` → Folder watcher service (`python watcher.py`) -- `frontend/src/main.tsx` → React app (Vite dev server or nginx-served build) -- `whisper/transcribe.py` → Desktop transcription CLI -- `backend/pipeline/quality/__main__.py` → Prompt quality toolkit CLI - ---- - -## Configuration & Environment - -### Environment Variables - -| Variable | Purpose | Default | -|----------|---------|---------| -| `POSTGRES_USER` | Database user | `chrysopedia` | -| `POSTGRES_PASSWORD` | Database password | `changeme` | -| `POSTGRES_DB` | Database name | `chrysopedia` | -| `DATABASE_URL` | Full async connection string | Composed from above | -| `REDIS_URL` | Redis broker URL | `redis://chrysopedia-redis:6379/0` | -| `LLM_API_URL` | Primary LLM endpoint | OpenWebUI on DGX | -| `LLM_API_KEY` | LLM authentication | Required | -| `LLM_MODEL` | Default LLM model name | `fyn-llm-agent-chat` | -| `LLM_FALLBACK_URL` / `_MODEL` | Fallback LLM endpoint | Same as primary | -| `LLM_STAGE{2-5}_MODEL` | Per-stage model override | chat for 2/4, think for 3/5 | -| `LLM_STAGE{2-5}_MODALITY` | chat or thinking per stage | See above | -| `LLM_MAX_TOKENS` | LLM response token limit | `32768` | -| `LLM_TEMPERATURE` | LLM temperature | `0.0` (deterministic) | -| `SYNTHESIS_CHUNK_SIZE` | Max moments per synthesis call | `30` | -| `EMBEDDING_API_URL` | Ollama embedding endpoint | Container-internal | -| `EMBEDDING_MODEL` | Embedding model name | `nomic-embed-text` | -| `EMBEDDING_DIMENSIONS` | Vector dimensionality | `768` | -| `QDRANT_URL` | Qdrant endpoint | Container-internal | -| `QDRANT_COLLECTION` | Qdrant collection name | `chrysopedia` | -| `APP_ENV` | Environment name | `development` | -| `APP_LOG_LEVEL` | Log level | `info` | -| `APP_SECRET_KEY` | Application secret | `changeme-generate-a-real-secret` | -| `CORS_ORIGINS` | Allowed CORS origins | `["*"]` | -| `REVIEW_MODE` | Require admin review of moments | `true` | -| `DEBUG_MODE` | Capture full LLM I/O in events | `false` | -| `TRANSCRIPT_STORAGE_PATH` | Transcript file storage | `/data/transcripts` | -| `VIDEO_METADATA_PATH` | Video metadata storage | `/data/video_meta` | -| `PROMPTS_PATH` | Prompt template directory | `./prompts` | -| `GIT_COMMIT_SHA` | Build-time commit hash | `unknown` | -| `WATCH_FOLDER` | Watcher monitored directory | `/watch` | -| `WATCHER_API_URL` | Ingest endpoint for watcher | Container-internal | -| `WATCHER_STABILITY_SECONDS` | File stability wait time | `2` | -| `WATCHER_POLL_INTERVAL` | Filesystem poll interval | `5` | -| `GIT_COMMIT_SHA` (build arg) | Passed at Docker build time for footer | `dev` | -| `VITE_GIT_COMMIT` (build arg) | Frontend build-time constant | `dev` | - -### Environments - -- **Production:** Docker Compose on ub01, `.env` file with real credentials -- **Local dev:** Backend runs locally with `docker compose up -d chrysopedia-db chrysopedia-redis`, `.env` in backend/ -- **Test:** Uses real PostgreSQL (test database), configured in `backend/tests/conftest.py` -- No staging environment exists. - -### Secrets Management - -Environment variables via `.env` file (gitignored). No vault, KMS, or sealed secrets. The `.env.example` contains placeholders. `backend/.env` exists locally (not tracked in git) and contains a real API key — this is expected for local dev but the key should be rotated if this directory is ever shared. - ---- - -## Development Workflow - -### Getting Started - -```bash -# 1. Clone the repo -git clone git@github.com:xpltdco/chrysopedia.git -cd chrysopedia - -# 2. Configure environment -cp .env.example .env -# Edit .env with real LLM_API_KEY and POSTGRES_PASSWORD - -# 3. Start infrastructure -docker compose up -d - -# 4. Run migrations -docker exec chrysopedia-api alembic upgrade head - -# 5. Pull embedding model (first time) -docker exec chrysopedia-ollama ollama pull nomic-embed-text - -# 6. Verify -curl http://localhost:8096/health -``` - -**For local backend development (outside Docker):** -```bash -python -m venv .venv && source .venv/bin/activate -pip install -r backend/requirements.txt -docker compose up -d chrysopedia-db chrysopedia-redis # just infra -alembic upgrade head -cd backend && uvicorn main:app --reload --host 0.0.0.0 --port 8001 # 8001 to avoid kerf-engine conflict on 8000 -``` - -**For frontend development:** -```bash -cd frontend && npm ci && npm run dev -``` - -### Key Commands - -| Task | Command | -|------|---------| -| Start full stack | `docker compose up -d` | -| Rebuild after code changes | `docker compose build && docker compose up -d` | -| Run migrations | `docker exec chrysopedia-api alembic upgrade head` | -| Create migration | `alembic revision --autogenerate -m "description"` | -| View API logs | `docker logs -f chrysopedia-api` | -| View worker logs | `docker logs -f chrysopedia-worker` | -| Run tests | `cd backend && pytest` | -| Frontend dev server | `cd frontend && npm run dev` | -| Frontend build | `cd frontend && npm run build` | -| Prompt quality CLI | `cd backend && python -m pipeline.quality` | -| Deploy to ub01 | `ssh ub01; cd /vmPool/r/repos/xpltdco/chrysopedia; git pull && docker compose build && docker compose up -d` | - -### CI/CD Pipeline - -**None.** No `.github/workflows/`, no CI config files. Deployment is manual: `git pull && docker compose build && docker compose up -d` on ub01. [inferred — high confidence based on absence of any CI configuration] - -### Code Conventions - -- **Python:** No linter config (no ruff, black, flake8 config files found). Code follows PEP 8 by convention. Type hints used throughout (Python 3.12 features like `X | None`). -- **TypeScript:** No ESLint config. TypeScript strict mode via tsconfig. Zero-dependency UI (no UI libraries, no Tailwind). -- **CSS:** Single monolithic `App.css` (4,871 lines). 77 CSS custom properties for theming. Dark theme with cyan accent (`#22d3ee`). -- **Naming:** Slugified URLs, snake_case Python, camelCase TypeScript. SQLAlchemy models use `Mapped` annotations. Pydantic schemas use `model_config = {"from_attributes": True}`. -- **No pre-commit hooks, no `.editorconfig`, no formatter configs.** - ---- - -## Current State Assessment - -**Stage: Integration/Stabilization** — All 13 milestones complete. 28 of 32 requirements validated. 171 commits over 3 days (March 29–April 1, 2026) by a single contributor. The system is deployed and running. However, it was built rapidly by AI agents (GSD workflow), the pipeline is running inline (not via Celery chain as originally designed per recent commit `29f6e74`), and there are no CI/CD guardrails. The codebase is functional but hasn't been through the hardening that comes from sustained multi-user operation. - -### Recent Activity - -- **171 commits** from 2026-03-29 to 2026-04-01 (3 days of intense development) -- **Single contributor:** jlightner -- **Last commit:** `29f6e74` — "pipeline: run stages inline instead of Celery chain dispatch" -- **Most recent work:** Stage 5 prompt optimization (100 variant prompts generated), inline pipeline execution, prompt quality toolkit (M013) - -### Active Branches - -Only `main` exists. All development has been on a single branch. No feature branches, no release branches. - -### What's Working - -- Full 6-stage pipeline (transcription → ingestion → LLM extraction → review → synthesis → search) -- Docker Compose deployment with 8 services, healthchecks on all containers -- Search (semantic via Qdrant + keyword fallback with multi-token AND matching) -- Admin review queue with approve/edit/reject workflows -- Pipeline admin dashboard with event logs, token usage, retrigger controls -- 10-page React SPA with responsive design, topic taxonomy, creator browse, technique detail -- Folder watcher for auto-ingestion of new transcripts -- Article versioning with pipeline metadata snapshots -- 65 integration tests covering all major API paths -- Prompt quality toolkit (fitness tests, scoring, automated optimization) - -### What's In Progress - -- **Stage 5 prompt optimization:** 100 variant prompts generated (`prompts/stage5_variants/`), active A/B testing with the quality toolkit. The most recent commits are all prompt refinement. -- **Inline pipeline execution:** The latest commit switches from Celery chain dispatch to inline stage execution, suggesting the Celery chaining had issues. -- **`generate_stage5_variants.py`** (874 lines) is a one-off script at project root — should likely be absorbed into the quality toolkit or removed. - -### Technical Debt Inventory - -**Zero TODOs/FIXMEs/HACKs in source code.** All annotations found were in `node_modules/` (third-party). This is notable — either debt was addressed as it arose, or code annotations weren't used as a practice. - -**Implicit debt captured in KNOWLEDGE.md:** -- QdrantManager uses random UUIDs for point IDs, causing duplicates on re-index (noted as deferred fix — use deterministic UUIDs) -- LLM-generated topic categories have inconsistent casing (deferred) -- Stage 4 classification data stored in Redis with 24h TTL instead of DB columns (expedient but fragile) - -**Structural debt:** -- `frontend/src/App.css` — 4,871-line monolithic stylesheet. No CSS modules, no component-scoped styles. -- `backend/pipeline/stages.py` — 2,102 lines. All 4 LLM stages + orchestrator in one file. -- `generate_stage5_variants.py` — 874-line one-off script at project root. -- `prompts/stage5_variants/.v016.txt.swp` — vim swap file committed (harmless but untidy). -- No authentication on any endpoint (admin or public). Single-admin tool by design, but the admin endpoints are exposed to anyone on the network. -- CORS allows all origins (`"*"`). - -### Test Coverage - -- **Framework:** pytest + pytest-asyncio -- **Test count:** 65 tests across 4 files (ingest: 6, pipeline: 11, public API: 26, search: 22) -- **Test LOC:** 2,754 (27% of backend source LOC) -- **Approach:** Integration tests against real PostgreSQL with NullPool. Mock LLM responses via fixtures. httpx.AsyncClient with ASGI transport for API tests. -- **Missing:** No frontend tests. No unit tests for pipeline stages in isolation. No load/performance tests. No test for the watcher service. No test for the quality toolkit. -- **No CI:** Tests are run manually (`cd backend && pytest`). - -### Documentation Status - -- **README.md:** Comprehensive (19KB) — architecture diagrams, quick start, full API reference, environment variables, deployment instructions. High quality. -- **chrysopedia-spec.md:** Detailed 37-page product specification. Thorough and thoughtful. -- **CLAUDE.md:** Development reference with deployment info and quick commands. -- **GSD artifacts:** 13 milestone summaries, 23 decisions, 32 requirements, extensive KNOWLEDGE.md with 30+ lessons learned. Unusually thorough project history. -- **prompts/README.md:** Exists (not inspected in detail). -- **whisper/README.md:** Exists for transcription docs. -- **Missing:** No API documentation generation (no OpenAPI spec export, though FastAPI auto-generates one at `/docs`). No architecture decision records beyond GSD decisions. No runbook for operations/debugging. - ---- - -## Red Flags & Observations - -### Security - -1. **No authentication on any endpoint.** Admin endpoints (pipeline control, review queue, debug mode toggle) are accessible to anyone who can reach the server. Acceptable for a single-user tool on a private network, but risky if the port is ever exposed. -2. **CORS allows all origins** (`cors_origins: ["*"]`). No restriction on which domains can call the API. -3. **`backend/.env` contains a real API key** (`sk-dcdd...`). Not tracked in git (correctly gitignored), but present on disk. Standard for local dev. -4. **`APP_SECRET_KEY` defaults to `changeme-generate-a-real-secret`** in config.py. If the .env doesn't override this, it's a predictable secret (though it's unclear if anything actually uses it — no session/JWT middleware found). - -### Architectural Concerns - -5. **Monolithic CSS file** (4,871 lines). Any style change requires searching through a single massive file. No component isolation. -6. **stages.py god file** (2,102 lines). Four LLM stages + orchestrator + helpers all in one module. Each stage is a complex function with JSON parsing, error recovery, and DB writes. -7. **Pipeline switched from Celery chains to inline execution** (latest commit). This suggests Celery task chaining had reliability issues. Inline execution means the API request thread runs all LLM stages synchronously — a single pipeline run could take 10+ minutes blocking a worker. -8. **Qdrant duplicate points on re-index** (documented in KNOWLEDGE.md, unfixed). Random UUIDs mean every re-embed creates duplicates instead of upserts. -9. **No retry/backoff on LLM API calls** beyond the primary→fallback pattern. If both endpoints are down, the pipeline fails immediately. - -### Fragile Areas - -10. **Classification data in Redis with 24h TTL.** If Redis restarts between stage 4 and stage 5, classification data is lost and stage 5 fails or produces degraded output. -11. **Frontend has zero type-safe API layer.** The `public-client.ts` uses `fetch()` directly. No generated types from the backend schema. API contract drift is possible. -12. **Single-branch development.** All 171 commits on `main`. No protection against broken deploys. - -### Inconsistencies - -13. **FastAPI version in `app = FastAPI(version="0.1.0")` vs `package.json` version `"0.8.0"`.** No single source of truth for the project version. - ---- - -## Trajectory & Opportunities - -### Where It's Heading - -The most recent work is **prompt quality optimization** — generating 100 stage 5 variants and building automated A/B testing infrastructure. The project owner is clearly focused on improving the LLM output quality now that the infrastructure is stable. - -The inline pipeline execution change suggests the next phase may involve **processing real video content at scale** and encountering reliability issues with the current architecture. - -### Partially Built / Stubbed Features - -- **Content reports** — Model and API exist (`ContentReport`, `/api/v1/reports`), admin reports page exists, but unclear if actively used. -- **View counts** — `view_count` field on Creator and TechniquePage models, but no increment logic found. Fields default to 0. -- **Creator hidden flag** — `hidden` boolean on Creator model (migration 009), but no admin UI to toggle it. -- **Genre filtering on Creators page** — Spec mentions it, UI has it, but genre data depends on pipeline classification which may not populate genres consistently. - -### Capability Gaps - -- **No authentication/authorization.** Adding a simple API key or basic auth for admin endpoints would be a quick security win. -- **No WebSocket/SSE for pipeline progress.** The admin UI polls for pipeline status. Real-time updates would improve the pipeline monitoring experience. -- **No full-text search index.** Keyword search uses `ILIKE` which doesn't scale. PostgreSQL `tsvector`/GIN index would be significantly faster. -- **No backup strategy documented.** PostgreSQL data and Qdrant vectors are on bind mounts but no backup cron or strategy is mentioned. -- **No content analytics.** No view tracking, no search query logging, no usage metrics beyond pipeline token counts. - -### Low-Hanging Fruit - -1. **Fix Qdrant duplicate points** — Switch to deterministic UUIDs based on content hash. Small change, big data quality impact. -2. **Add basic auth to admin endpoints** — A single API key middleware for `/admin/*` and `/review/*` routes. -3. **Split `stages.py`** — Extract each stage into its own module. The file is already structured with clear stage boundaries. -4. **Normalize topic category casing** — `.lower()` or `.title()` in stage 4 output. One-line fix for data consistency. -5. **Delete `generate_stage5_variants.py`** from project root (or move into quality toolkit). -6. **Add a `Makefile`** with common commands (build, test, deploy, migrate) to replace the manual command documentation. - -### Logical Next Features - -Based on the trajectory and spec: -1. **Batch processing pipeline** — Process the full video library (100-500 files). Will stress-test pipeline reliability. -2. **Content analytics** — View tracking, popular searches, usage patterns. -3. **Improved search** — Full-text search index, search result ranking improvements, faceted filtering. -4. **Multi-user support** — Authentication, user-specific bookmarks/notes on techniques. -5. **Video timestamp deep links** — If videos are accessible on the network, link directly to the timestamp in a player. - ---- - -## Key Files Reference - -| File | Purpose | -|------|---------| -| `chrysopedia-spec.md` | Full product specification (37 pages) — read first for product understanding | -| `README.md` | Architecture, setup, API reference, deployment guide | -| `CLAUDE.md` | Development context and canonical directory warning | -| `backend/main.py` | FastAPI app entry point, middleware, router mounting | -| `backend/config.py` | All environment variables with defaults (Pydantic Settings) | -| `backend/models.py` | All 11 SQLAlchemy ORM models — the data model source of truth | -| `backend/schemas.py` | Pydantic request/response schemas | -| `backend/pipeline/stages.py` | LLM pipeline — all 4 stages and orchestrator (the most complex file) | -| `backend/pipeline/llm_client.py` | LLM API client with primary/fallback and thinking mode support | -| `backend/search_service.py` | Semantic + keyword search implementation | -| `backend/watcher.py` | Transcript folder watcher service | -| `frontend/src/App.tsx` | React app root with routing | -| `frontend/src/App.css` | All styles (4,871 lines) | -| `frontend/src/api/public-client.ts` | Typed API client | -| `config/canonical_tags.yaml` | 7-category topic taxonomy definition | -| `docker-compose.yml` | Full 8-service stack definition | -| `.env.example` | Environment variable template | -| `.gsd/PROJECT.md` | Living project state document with milestone history | -| `.gsd/KNOWLEDGE.md` | Lessons learned and patterns (30+ entries) — invaluable for newcomers | -| `.gsd/DECISIONS.md` | 23 architectural decisions with rationale | -| `.gsd/REQUIREMENTS.md` | 32 requirements with validation status | - ---- - -## Uncertainties & Open Questions - -1. **Is the pipeline actually processing real content?** The system is deployed, but it's unclear how many videos have been processed through the pipeline. The test fixtures use sample data, and the prompt optimization work suggests the pipeline output quality isn't yet satisfactory. [inferred — medium confidence] - -2. **Why did Celery chain dispatch get replaced with inline execution?** The latest commit (`29f6e74`) switches to inline, but no commit message explains the issue. Was it a Celery reliability problem, a debugging convenience, or a permanent architectural change? [unknown — needs project owner input] - -3. **Is the domain `chrysopedia.xpltd.co` actually configured?** M003 mentions domain + DNS setup, KNOWLEDGE.md documents the XPLTD domain flow, but the nginx config uses `server_name _` (catch-all). [inferred — likely configured on nuc01's nginx, not in this codebase] - -4. **What's the actual LLM infrastructure?** References to "DGX Sparks Qwen" and "FYN" suggest a private GPU cluster. The API endpoint is `chat.forgetyour.name` which appears to be an OpenWebUI instance. The relationship between these systems and their reliability characteristics would matter for pipeline scaling. [low confidence — outside codebase] - -5. **Are there plans for multi-user access?** The spec says "single-admin tool" but the architecture (separate frontend, API, PostgreSQL) could support multiple users. No authentication means this is purely a trust-boundary question. [inferred — currently single-user by design] - -6. **What is the `CHRYSOPEDIA-ASSESSMENT.md` (42KB)?** Not read in detail — appears to be a UI/UX assessment that fed into M011 decisions. [low confidence on contents] diff --git a/backend/routers/pipeline.py b/backend/routers/pipeline.py index c9d62e5..727ba01 100644 --- a/backend/routers/pipeline.py +++ b/backend/routers/pipeline.py @@ -14,7 +14,6 @@ Admin: import asyncio import json import logging -import uuid from datetime import datetime, timezone from typing import Annotated @@ -169,7 +168,6 @@ async def list_pipeline_videos( stmt = stmt.where(SourceVideo.creator_id == creator_id) # Total count before pagination - from sqlalchemy import literal_column count_result = await db.execute( select(func.count()).select_from(stmt.subquery()) ) @@ -945,7 +943,6 @@ async def get_stale_pages( """ import hashlib from pathlib import Path as _Path - from sqlalchemy import over, text from models import TechniquePage, TechniquePageVersion settings = get_settings() diff --git a/backend/tests/test_pipeline.py b/backend/tests/test_pipeline.py index d5b7119..f944ad9 100644 --- a/backend/tests/test_pipeline.py +++ b/backend/tests/test_pipeline.py @@ -14,16 +14,14 @@ import json import os import pathlib import uuid -from unittest.mock import MagicMock, patch, PropertyMock +from unittest.mock import MagicMock, patch import openai import pytest -from sqlalchemy import create_engine, select -from sqlalchemy.orm import Session, sessionmaker -from sqlalchemy.pool import NullPool +from sqlalchemy import select +from sqlalchemy.orm import sessionmaker from models import ( - Creator, KeyMoment, KeyMomentContentType, ProcessingStatus, @@ -31,12 +29,6 @@ from models import ( TechniquePage, TranscriptSegment, ) -from pipeline.schemas import ( - ClassificationResult, - ExtractionResult, - SegmentationResult, - SynthesisResult, -) from tests.fixtures.mock_llm_responses import ( STAGE2_SEGMENTATION_RESPONSE, diff --git a/frontend/src/App.css b/frontend/src/App.css index f1ef26d..eda2f1d 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -265,17 +265,6 @@ a.app-footer__repo:hover { /* ── Queue header ─────────────────────────────────────────────────────────── */ -.queue-header { - display: flex; - align-items: center; - justify-content: space-between; - margin-bottom: 1rem; -} - -.queue-header h2 { - font-size: 1.25rem; - font-weight: 700; -} /* ── Stats bar ────────────────────────────────────────────────────────────── */ @@ -285,36 +274,6 @@ a.app-footer__repo:hover { margin-bottom: 1rem; } -.stats-card { - flex: 1; - display: flex; - flex-direction: column; - align-items: center; - padding: 0.75rem; - border-radius: 0.5rem; - background: var(--color-bg-surface); - border: 1px solid var(--color-border); - box-shadow: 0 1px 3px var(--color-shadow); -} - -.stats-card__count { - font-size: 1.5rem; - font-weight: 700; - line-height: 1; -} - -.stats-card__label { - font-size: 0.75rem; - text-transform: uppercase; - letter-spacing: 0.04em; - color: var(--color-text-secondary); - margin-top: 0.25rem; -} - -.stats-card--pending .stats-card__count { color: var(--color-badge-pending-text); } -.stats-card--approved .stats-card__count { color: var(--color-badge-approved-text); } -.stats-card--edited .stats-card__count { color: var(--color-badge-edited-text); } -.stats-card--rejected .stats-card__count { color: var(--color-badge-rejected-text); } /* ── Filter tabs ──────────────────────────────────────────────────────────── */ @@ -371,11 +330,6 @@ a.app-footer__repo:hover { /* ── Queue cards ──────────────────────────────────────────────────────────── */ -.queue-list { - display: flex; - flex-direction: column; - gap: 0.5rem; -} .queue-card { display: block; @@ -558,19 +512,6 @@ a.app-footer__repo:hover { /* ── Pagination ───────────────────────────────────────────────────────────── */ -.pagination { - display: flex; - align-items: center; - justify-content: center; - gap: 1rem; - margin-top: 1.25rem; - padding: 0.75rem 0; -} - -.pagination__info { - font-size: 0.8125rem; - color: var(--color-text-secondary); -} /* ── Detail page ──────────────────────────────────────────────────────────── */ @@ -586,78 +527,9 @@ a.app-footer__repo:hover { color: #c084fc; } -.detail-header { - display: flex; - align-items: center; - gap: 0.75rem; - margin-bottom: 1rem; -} - -.detail-header h2 { - font-size: 1.25rem; - font-weight: 700; -} - -.detail-card { - display: grid; - grid-template-columns: 1fr 1fr; - gap: 1rem; -} - -.detail-field { - display: flex; - flex-direction: column; - gap: 0.125rem; -} - -.detail-field label { - font-size: 0.6875rem; - font-weight: 600; - text-transform: uppercase; - letter-spacing: 0.04em; - color: var(--color-text-muted); -} - -.detail-field span, -.detail-field p { - font-size: 0.875rem; - color: var(--color-text-primary); -} - -.detail-field--full { - grid-column: 1 / -1; -} - -.detail-transcript { - background: var(--color-bg-transcript); - padding: 0.75rem; - border-radius: 0.375rem; - font-size: 0.8125rem; - line-height: 1.6; - white-space: pre-wrap; - max-height: 20rem; - overflow-y: auto; -} /* ── Action bar ───────────────────────────────────────────────────────────── */ -.action-bar { - display: flex; - flex-wrap: wrap; - gap: 0.5rem; - margin-top: 1rem; -} - -.action-error { - background: var(--color-error-bg); - border: 1px solid var(--color-error-border); - border-radius: 0.375rem; - padding: 0.5rem 0.75rem; - color: var(--color-badge-rejected-text); - font-size: 0.8125rem; - margin-top: 0.75rem; - margin-bottom: 0.75rem; -} /* ── Edit form ────────────────────────────────────────────────────────────── */ @@ -671,85 +543,9 @@ a.app-footer__repo:hover { margin-bottom: 0.75rem; } -.edit-field { - margin-bottom: 0.75rem; -} - -.edit-field label { - display: block; - font-size: 0.75rem; - font-weight: 600; - color: var(--color-text-secondary); - margin-bottom: 0.25rem; -} - -.edit-field input, -.edit-field textarea, -.edit-field select { - width: 100%; - padding: 0.5rem 0.75rem; - border: 1px solid var(--color-border); - border-radius: 0.375rem; - font-size: 0.875rem; - font-family: inherit; - line-height: 1.5; - color: var(--color-text-primary); - background: var(--color-bg-input); - transition: border-color 0.15s; -} - -.edit-field input:focus, -.edit-field textarea:focus, -.edit-field select:focus { - outline: none; - border-color: var(--color-accent); - box-shadow: 0 0 0 2px var(--color-accent-focus); -} - -.edit-actions { - display: flex; - gap: 0.5rem; - margin-top: 1rem; -} /* ── Dialogs (modal overlays) ─────────────────────────────────────────────── */ -.dialog-overlay { - position: fixed; - inset: 0; - background: var(--color-overlay); - display: flex; - align-items: center; - justify-content: center; - z-index: 100; -} - -.dialog { - background: var(--color-bg-surface); - border-radius: 0.75rem; - padding: 1.5rem; - width: 90%; - max-width: 28rem; - box-shadow: 0 10px 40px var(--color-shadow-heavy); -} - -.dialog h3 { - font-size: 1.125rem; - font-weight: 600; - margin-bottom: 0.5rem; -} - -.dialog__hint { - font-size: 0.8125rem; - color: var(--color-text-secondary); - margin-bottom: 1rem; -} - -.dialog__actions { - display: flex; - gap: 0.5rem; - margin-top: 1rem; -} /* ── Loading / empty states ───────────────────────────────────────────────── */ @@ -925,29 +721,6 @@ a.app-footer__repo:hover { gap: 0.5rem; } - .stats-card { - flex-direction: row; - justify-content: space-between; - } - - .detail-card { - grid-template-columns: 1fr; - } - - .queue-header { - flex-direction: column; - align-items: flex-start; - gap: 0.5rem; - } - - .action-bar { - flex-direction: column; - } - - .action-bar .btn { - width: 100%; - justify-content: center; - } .app-header { flex-direction: column; @@ -1778,11 +1551,6 @@ a.app-footer__repo:hover { font-style: italic; } -.pill-list { - display: flex; - flex-wrap: wrap; - gap: 0.375rem; -} .badge--category { background: var(--color-badge-category-bg); @@ -1903,7 +1671,6 @@ a.app-footer__repo:hover { } - .technique-header__title-row { display: flex; align-items: flex-start; @@ -2382,15 +2149,6 @@ a.app-footer__repo:hover { /* ── Plugins ──────────────────────────────────────────────────────────────── */ -.technique-plugins { - margin-bottom: 2rem; -} - -.technique-plugins h2 { - font-size: 1.25rem; - font-weight: 700; - margin-bottom: 0.5rem; -} /* ── Related techniques ───────────────────────────────────────────────────── */ @@ -2416,51 +2174,6 @@ a.app-footer__repo:hover { } } -.related-card { - background: var(--color-surface-card); - border: 1px solid var(--color-border); - border-radius: 0.5rem; - padding: 0.875rem 1rem; - display: flex; - flex-direction: column; - gap: 0.25rem; -} - -.related-card__title { - color: var(--color-link-accent); - text-decoration: none; - font-weight: 600; - font-size: 0.9375rem; -} - -.related-card__title:hover { - text-decoration: underline; -} - -.related-card__creator { - font-size: 0.8125rem; - color: var(--color-text-muted); -} - -.related-card__badge { - display: inline-block; - align-self: flex-start; - font-size: 0.6875rem; - font-weight: 600; - text-transform: uppercase; - letter-spacing: 0.03em; - padding: 0.125rem 0.5rem; - border-radius: 999px; - background: var(--color-badge-bg, rgba(255, 255, 255, 0.08)); - color: var(--color-badge-text, var(--color-text-muted)); -} - -.related-card__reason { - font-size: 0.75rem; - font-style: italic; - color: var(--color-text-muted); - margin: 0; -} /* ══════════════════════════════════════════════════════════════════════════════ CREATORS BROWSE @@ -2904,49 +2617,6 @@ a.app-footer__repo:hover { gap: 0.5rem; } -.creator-technique-card { - display: flex; - flex-direction: column; - gap: 0.25rem; - padding: 0.875rem 1rem; - background: var(--color-bg-surface); - border: 1px solid var(--color-border); - border-radius: 0.5rem; - text-decoration: none; - color: inherit; - box-shadow: 0 1px 3px var(--color-shadow); - transition: border-color 0.15s, box-shadow 0.15s, transform 0.2s; - will-change: transform; -} - -.creator-technique-card:hover { - border-color: var(--color-accent-hover); - box-shadow: 0 2px 8px var(--color-accent-subtle); - transform: scale(1.02); -} - -.creator-technique-card__title { - font-size: 0.9375rem; - font-weight: 600; -} - -.creator-technique-card__meta { - display: flex; - align-items: center; - gap: 0.5rem; - flex-wrap: wrap; -} - -.creator-technique-card__tags { - display: inline-flex; - gap: 0.25rem; -} - -.creator-technique-card__summary { - font-size: 0.8125rem; - color: var(--color-text-secondary); - line-height: 1.4; -} /* ── Creator Featured Technique ─────────────────────────────────────── */ @@ -3502,13 +3172,7 @@ a.app-footer__repo:hover { } /* Technique cards: smaller padding */ - .creator-technique-card { - padding: 0.625rem 0.75rem; - } - .creator-technique-card__title { - font-size: 0.875rem; - } /* Edit form: full width, stacked link rows */ .creator-edit-form { @@ -3676,22 +3340,9 @@ a.app-footer__repo:hover { justify-content: flex-end; } -.report-issue-btn { - margin-top: 0.5rem; - align-self: flex-start; -} /* ── Buttons ────────────────────────────────────────────────────────────── */ -.btn { - padding: 0.5rem 1rem; - border-radius: 6px; - font-size: 0.85rem; - cursor: pointer; - border: 1px solid var(--color-border); - transition: background 0.15s, border-color 0.15s; -} - .btn--small { padding: 0.3rem 0.7rem; font-size: 0.8rem; @@ -3733,16 +3384,6 @@ a.app-footer__repo:hover { opacity: 0.85; } -.btn--warning { - background: var(--color-warning, #f0ad4e); - color: #1a1a1a; - border-color: var(--color-warning, #f0ad4e); -} - -.btn--warning:hover:not(:disabled) { - opacity: 0.85; -} - .btn--random { background: var(--color-bg-input); color: var(--color-text-primary); @@ -4613,11 +4254,6 @@ a.app-footer__repo:hover { color: #f44336; } -@keyframes stagePulse { - 0%, 100% { opacity: 1; } - 50% { opacity: 0.4; } -} - /* Connector chevron between dots */ .stage-timeline__step + .stage-timeline__step::before { content: "›"; diff --git a/generate_stage5_variants.py b/generate_stage5_variants.py deleted file mode 100644 index 293ce8b..0000000 --- a/generate_stage5_variants.py +++ /dev/null @@ -1,874 +0,0 @@ -"""Generate 100 stage 5 synthesis prompt variants. - -Each prompt is a complete, plug-and-play replacement for stage5_synthesis.txt. -The JSON output format, field rules, and input format are invariant across all -variants — only the instructional philosophy sections vary. - -Dimensions of variation: - 1. Role/persona framing - 2. Page purpose framing - 3. Voice preservation strategy - 4. Section structure philosophy - 5. Detail handling approach - 6. Teaching rhythm - 7. Reader model - 8. Summary style - 9. Synthesis philosophy - 10. Extra emphasis / experimental modifier - -Run: python generate_stage5_variants.py -Output: prompts/stage5_variants/v001.txt through v100.txt -""" -from pathlib import Path -import hashlib - -OUTPUT_DIR = Path(__file__).parent / "prompts" / "stage5_variants" - -# ═══════════════════════════════════════════════════════════════════════════════ -# INVARIANT SECTIONS — preserved across all variants for parser compatibility -# ═══════════════════════════════════════════════════════════════════════════════ - -SIGNAL_CHAINS_SECTION = """## Signal chains - -When the source moments describe a signal routing chain (oscillator → effects → processing → bus), represent it as a structured signal chain object. Signal chains are only included when the creator explicitly walks through routing — do not infer chains from casual plugin mentions. - -Format signal chain steps to include the role of each stage, not just the plugin name: -- Good: ["Noise osc (Vital)", "Transient Shaper (Kilohearts, attack +6dB)", "EQ (Pro-Q 3, shelf -3dB @ 12kHz)", "Send → Trash 2 (tape algo, 35% wet)"] -- Bad: ["Vital", "Kilohearts", "EQ", "Trash 2"]""" - -SOURCE_QUALITY_SECTION = """## Source quality assessment - -Assess source_quality based on the nature of the input moments: -- **structured**: Moments come from a planned tutorial with clear instructional flow. Most details are explicitly taught. -- **mixed**: Some moments are well-structured, others are scattered or conversational. Common for track breakdowns. -- **unstructured**: Moments are extracted from livestreams, Q&A sessions, or very informal content. Insights were scattered across a long session.""" - -INPUT_FORMAT_SECTION = """## Input format - -The creator name is provided in a <creator> tag. Key moments are provided inside <moments> tags as a JSON array, enriched with classification metadata (topic_category, topic_tags). All moments are from the same creator and related topic area. ALWAYS use the creator name from the <creator> tag in titles, slugs, and prose — never invent or guess a creator name from transcript content.""" - -OUTPUT_FORMAT_SECTION = """## Output format - -Return a JSON object with a single key "pages" containing a list of synthesized pages. Most inputs produce a single page, but if the moments clearly cover two distinctly separate techniques (e.g., moments about both "kick design" and "hi-hat design" that happen to share a topic_category), split them into separate pages. When splitting, you MUST assign each moment to exactly one page via the moment_indices field — every input moment index must appear in exactly one page's moment_indices array. - -```json -{ - "pages": [ - { - "title": "Snare Design by ExampleCreator", - "slug": "snare-design-examplecreator", - "topic_category": "Sound design", - "topic_tags": ["drums", "snare", "layering", "saturation", "transient shaping"], - "summary": "ExampleCreator builds snares as three independent layers — transient click, tonal body, and noise tail — with each shaped by a transient shaper before any bus processing. The signature crunch comes from parallel soft-clip saturation with a pre-delay that preserves the clean transient. In dense mixes, he uses HP sidechaining on the snare bus to maintain punch without competing with sub content.", - "body_sections": { - "Layer construction": "ExampleCreator builds snares as three independent layers, each shaped before they are summed. The transient click is a short noise burst (2-5ms decay) — he uses Vital's noise oscillator for this, sometimes with a bandpass around 2-4kHz to control the character. The tonal body is a pitched sine or triangle wave around 180-220Hz, tuned to complement the key of the track. The tail is filtered white noise with a fast exponential decay.\\n\\nThe critical insight: he shapes each layer's transient independently before any bus processing. He uses Kilohearts Transient Shaper (attack +4 to +6dB, sustain -6 to -8dB) rather than compression for this, because \\"compression adds sustain as a side effect while a transient shaper gives you direct independent control of both.\\"", - "Saturation and the crunch character": "The signature ExampleCreator snare crunch comes from parallel saturation — not inline. He routes the summed snare to a send with Trash 2 using the tape algorithm at 30-40% wet. The key detail: he puts a pre-delay of approximately 5ms on the saturation send, which lets the clean transient click through untouched while only the body and tail pick up harmonic content.\\n\\nHe explicitly warns against saturating the transient directly — says it \\"smears the snap into mush\\" and you lose the precision that makes the snare cut through.", - "Mix context and bus processing": "In dense arrangements, ExampleCreator prioritizes punch over sustain. On the snare bus compressor, he uses a high-pass sidechain filter (around 200-300Hz) so low-end energy from the body layer does not trigger gain reduction. This keeps the snare's ability to cut through the mix independent of whatever the sub bass is doing.\\n\\nHe also checks the snare against the lead or vocal bus specifically, not just soloed — because the 2-4kHz presence range is where both elements compete, and he would rather notch the snare's body slightly than lose vocal clarity." - }, - "signal_chains": [ - { - "name": "Snare layer processing", - "steps": [ - "Noise osc (Vital) → Transient Shaper (Kilohearts, attack +6dB, sustain -8dB) → EQ (Pro-Q 3, shelf -3dB @ 12kHz)", - "Dry path → snare bus", - "Send → Pre-delay (5ms) → Trash 2 (tape algorithm, 35% wet) → snare bus" - ] - } - ], - "plugins": ["Vital", "Kilohearts Transient Shaper", "FabFilter Pro-Q 3", "iZotope Trash 2"], - "source_quality": "structured", - "moment_indices": [0, 1, 2, 3, 4] - } - ] -} -```""" - -FIELD_RULES_SECTION = """## Field rules - -- **title**: The technique or concept name followed by "by {name from <creator> tag}" — concise and search-friendly. Examples: "Snare Design by Break", "Bass Resampling Workflow by KOAN Sound", "Mid-Side EQ for Width by Mr. Bill". Use title case. -- **slug**: URL-safe, lowercase, hyphenated version of the title including creator name. Examples: "snare-design-examplecreator", "bass-resampling-workflow-koan-sound". -- **topic_tags**: Merge and deduplicate from input moment tags. Add any clearly relevant tags the moments missed. Keep tags specific — "sidechain compression" not "audio processing". -- **summary**: 2-4 sentences. The most important insight first, then the method, then the distinguishing detail. A reader should get the core idea from the summary alone. -- **body_sections**: Dict of section_name → prose content. Section names derived from content (never generic). Each section 2-5 substantive paragraphs. -- **plugins**: List of string plugin names. Plain strings only — never objects. Include only plugins the creator mentioned or demonstrated. Use standard/common plugin names. -- **moment_indices**: Zero-indexed list referencing which input moments this page covers. Every input moment must appear in exactly one page's moment_indices.""" - -INVARIANT_TAIL = f""" -{SIGNAL_CHAINS_SECTION} - -{SOURCE_QUALITY_SECTION} - -{INPUT_FORMAT_SECTION} - -{OUTPUT_FORMAT_SECTION} - -{FIELD_RULES_SECTION}""" - - -# ═══════════════════════════════════════════════════════════════════════════════ -# DIMENSION 1: ROLE / PERSONA FRAMING -# ═══════════════════════════════════════════════════════════════════════════════ - -ROLE_FRAMINGS = [ - # 0 — Studio colleague - """You are a music producer and technical writer who just spent hours studying this creator's content. Your job is to distill what you learned into a technique page that captures both the knowledge and the creator's way of teaching it. Write like you're explaining to a fellow producer what you picked up — direct, specific, and with genuine respect for the creator's craft.""", - - # 1 — Embedded documentarian - """You are a technical documentarian embedded in this creator's studio. You've watched them work, heard their explanations, caught their asides and warnings. Now you're writing the reference page that captures everything a producer needs to apply these techniques. Your documentation style: precise but alive — the creator's personality should come through in how you present their methods.""", - - # 2 — Knowledge architect - """You are a knowledge architect for a music production encyclopedia. Your specialty: transforming scattered teaching moments into structured, authoritative reference pages that producers consult mid-session. You combine the rigor of technical documentation with the warmth of a mentor's explanation. Every page you write earns its existence by being faster and more useful than watching the source content.""", - - # 3 — Producer's notebook - """You are writing the definitive session notes for this creator's techniques. Think of it as the notebook a serious student would keep after attending a masterclass — except your notes are organized, thorough, and capture the creator's exact words when they said something worth remembering. A producer should be able to open these notes mid-session and immediately find what they need.""", - - # 4 — Translator of expertise - """You are an expert at translating tacit production knowledge into written form. Creators demonstrate techniques through action and explanation — your job is to capture both what they do and why they do it, preserving the specificity that makes their approach unique. You write pages that make a reader feel like they're getting a private lesson from the creator.""", - - # 5 — Technical mentor - """You are a senior music production educator writing reference material derived from studying a specific creator's methods. You understand that the value isn't in generic production advice — it's in this creator's particular approach, their specific settings, their reasoning, and their personality. Your pages teach the creator's way, not just the technique in the abstract.""", - - # 6 — Craft journalist - """You are a music production journalist who specializes in technique deep-dives. You interview creators by studying their content meticulously, then write articles that capture their methodology with the precision of technical writing and the readability of great journalism. Your articles are the kind producers bookmark and return to.""", - - # 7 — Applied researcher - """You are a researcher cataloging production techniques for a knowledge base that producers use as their go-to reference. Your approach: extract the creator's methodology with scientific precision, but present it with the directness and personality that makes it stick. Every claim you make is grounded in what the creator actually said or demonstrated.""", -] - -# ═══════════════════════════════════════════════════════════════════════════════ -# DIMENSION 2: PAGE PURPOSE FRAMING -# ═══════════════════════════════════════════════════════════════════════════════ - -PAGE_PURPOSE_FRAMINGS = [ - # 0 — Reference card - """## What you are creating - -A Chrysopedia technique page is a focused reference document that a music producer consults mid-session. The reader is Alt+Tabbing from their DAW — they need the key insight fast, with enough depth to apply it correctly. This page must earn its existence by being more efficient than re-watching the video. - -The page has two parts: -1. **Study guide prose** — rich paragraphs organized by sub-aspect of the technique. Reads like notes from an expert mentor, not a textbook. Each section should be self-contained enough that a producer can jump to the relevant section and get value immediately. -2. **Key moments index** — a compact list of source moments with descriptive titles for quick scanning.""", - - # 1 — Distilled knowledge - """## What you are creating - -A Chrysopedia technique page distills a creator's teaching into its most useful form. It is not a summary of a video — it is the knowledge from that video, reorganized for immediate application. A producer reading this page should absorb the core technique in under 2 minutes, with deeper detail available for those who want it. - -The page contains: -1. **Study guide prose** — substantive paragraphs covering each sub-aspect of the technique. This reads like a knowledgeable colleague explaining what they learned, not a generic article. Every paragraph should contain at least one specific, actionable detail. -2. **Key moments index** — reference list of the source moments for readers who want to trace information back to the original content.""", - - # 2 — Private lesson - """## What you are creating - -A Chrysopedia technique page captures a creator's methodology so completely that reading it feels like getting a private lesson. The difference between this and a wiki article: personality, specificity, and the creator's reasoning behind their choices. - -Two sections work together: -1. **Study guide prose** — detailed paragraphs organized by sub-aspects of the technique. Written in the creator's teaching voice — their emphasis, their warnings, their specific numbers. This is where the value lives. -2. **Key moments index** — compact reference list of the individual source moments that contributed to this page, with descriptive titles for scanning.""", - - # 3 — Technique blueprint - """## What you are creating - -A Chrysopedia technique page is a blueprint for applying a specific creator's approach. It's what you'd pin above your monitor if you wanted to work the way they work. Not generic production advice — this creator's specific method, with their exact settings, their reasoning, and their opinionated takes. - -Structure: -1. **Study guide prose** — organized by sub-aspect of the technique. Each section teaches one distinct facet with enough depth to actually apply it. Reads like expert notes, not a textbook. -2. **Key moments index** — quick-reference list of the source moments with descriptive titles.""", - - # 4 — Knowledge accelerator - """## What you are creating - -A Chrysopedia technique page accelerates learning. A creator might spend 20 minutes explaining a technique across a video — your page captures that same knowledge in a format that takes 2 minutes to read and immediately apply. The page doesn't replace the video; it makes the video's knowledge accessible at the speed of reading. - -Two complementary sections: -1. **Study guide prose** — rich, detailed paragraphs grouped by sub-aspect. Each section builds understanding of one facet of the technique. The creator's voice and specific details are preserved — this is what makes the page worth reading over a generic tutorial. -2. **Key moments index** — a compact list linking back to the individual source moments for deeper exploration.""", - - # 5 — Applied wisdom - """## What you are creating - -A Chrysopedia technique page captures applied wisdom — not just what a creator does, but why they do it, when they do it differently, and what they warn against. This is the page a producer reads when they want to understand someone's approach, not just replicate their settings. - -The page includes: -1. **Study guide prose** — substantive paragraphs covering each dimension of the technique. Written to transfer understanding, not just information. The creator's personality, opinions, and reasoning are essential — they're what make this page valuable over a settings list. -2. **Key moments index** — compact reference list of source moments with scannable titles.""", -] - -# ═══════════════════════════════════════════════════════════════════════════════ -# DIMENSION 3: VOICE PRESERVATION STRATEGY -# ═══════════════════════════════════════════════════════════════════════════════ - -VOICE_STRATEGIES = [ - # 0 — Aggressive direct quoting - """## Voice and tone - -**Capturing the creator's voice is the highest priority after factual accuracy.** - -Write as if you're relaying what this creator taught, in a way that preserves who they are as a teacher: - -- **Direct and confident** — state what the creator does, never hedge with "appears to" or "seems like" -- **Quote aggressively** — when the creator uses a vivid metaphor, gives a blunt warning, states a strong opinion, or coins a memorable phrase, quote them verbatim with quotation marks. These quotes are often the most valuable part of the page. A page without direct quotes has failed. Examples: 'He warns against saturating transients directly — says it "smears the snap into mush."' or '"Every bus you add is another place you'll put a compressor that doesn't need to be there."' -- **Preserve their vocabulary** — if the creator says "punchy," "muddy," "surgical," "fat," "tight," use those exact words. Don't substitute clinical synonyms. -- **Specific always** — concrete values (frequencies, ratios, ms, dB, plugin names, settings) over vague descriptions. "Uses compression" is never acceptable when specifics exist in the source. -- **Technical but natural** — use production terminology as the creator does, explaining only when their explanation adds value""", - - # 1 — Woven personality - """## Voice and tone - -The creator's personality should be woven into the fabric of every section, not bolted on as occasional quotes. - -- **Direct and authoritative** — write what the creator does as established fact. No hedging, no "appears to," no "it seems like" -- **The creator's words as anchor points** — identify the 3-5 most memorable things the creator said (strong opinions, vivid metaphors, blunt warnings) and quote them directly with quotation marks at the moments they matter most. These quotes should feel like the heartbeat of the page. -- **Their language, your structure** — adopt the creator's vocabulary and emphasis patterns throughout the prose. If they're emphatic about something, your prose should convey that emphasis. If they're casual, let the prose breathe. -- **Never genericize** — if the source says "crank it to about 40 percent," write that, not "increase the drive." Their phrasing carries information that a paraphrase loses. -- **Specificity is non-negotiable** — always include concrete values (Hz, ms, dB, ratios, plugin names, specific settings) when the source provides them""", - - # 2 — Teaching voice capture - """## Voice and tone - -Your goal: make the reader feel like the creator is explaining this to them directly. - -- **Confident, not academic** — state what the creator does definitively. No "the creator appears to prefer" — just "he uses" / "she sets" -- **Capture teaching moments verbatim** — when the creator explains WHY they do something, or warns against a mistake, or describes what something sounds like — quote their exact words. These are the moments where voice matters most. Use quotation marks for direct quotes. -- **Preserve their technical dialect** — every creator has a vocabulary. Some say "crunch," others say "saturation character." Some say "tight," others say "controlled." Use THEIR word, not the textbook word. -- **Emphasize what they emphasize** — if the creator spent 30 seconds on a specific setting, it matters. If they mentioned something in passing, it's secondary. The page should mirror the creator's sense of what's important. -- **Specifics before adjectives** — "EQ shelf at -3dB around 12kHz" not "a gentle high-frequency rolloff." Always include the actual values from the source material.""", - - # 3 — Personality through structure - """## Voice and tone - -The creator's personality should shape not just the words but the priorities and structure of the page. - -- **Write what the creator would want you to remember** — if they repeated something, lingered on it, or said it with conviction, that's the core of the page. Build around those moments. -- **Direct quotes for impact moments** — quote the creator when they say something that can't be paraphrased without losing meaning. Strong opinions, vivid descriptions, warnings, and "aha" explanations deserve their exact words in quotation marks. -- **Adopt their frame of reference** — if the creator thinks in terms of "energy" and "movement," use those concepts. If they think in terms of "surgical precision" and "control," use those. Don't impose a different conceptual framework. -- **Confident and direct** — never hedge. "He sets the attack to 4ms" not "he appears to prefer an attack around 4ms" -- **Specifics are the substance** — every section should contain concrete values: frequencies, time values, percentages, ratios, plugin names, specific settings. Vague descriptions waste the reader's time.""", - - # 4 — Selective voice - """## Voice and tone - -Write with precision and personality — capture the creator's approach without performing their personality. The voice should serve clarity. - -- **Direct and definitive** — what the creator does is stated as fact. No hedging words, no "perhaps," no "seems to." -- **Strategic quoting** — quote the creator directly (with quotation marks) at 2-4 key moments per section where their exact words carry meaning that paraphrase would lose: warnings, colorful metaphors, strong opinions, memorable one-liners. Don't over-quote — the quotes should hit harder by being selective. -- **Their vocabulary, always** — use the creator's specific terminology. If they say "smear," you say "smear." If they say "glue," you say "glue." These words encode production knowledge that synonyms don't carry. -- **Specifics are mandatory** — Hz values, ms values, dB settings, percentage values, plugin names, algorithm choices. If the source contains a specific number, it appears in the page. "Adjust to taste" is never acceptable. -- **Accessible to all levels** — use production terminology naturally but explain non-obvious concepts when the creator's own explanation illuminates them""", - - # 5 — Conversational authority - """## Voice and tone - -Write like you're a knowledgeable producer who deeply studied this creator's work and is now passing on what you learned. Conversational authority — you know this stuff because you learned it from someone who knows it better. - -- **No hesitation** — state techniques, settings, and approaches as established method. "He does X" not "he tends to do X" -- **The creator in their own words** — pull direct quotes (in quotation marks) for any moment where the creator's phrasing is more vivid, more precise, or more memorable than a paraphrase would be. Aim for at least 2-3 direct quotes per page. These are the lines a reader will remember. -- **Specific and grounded** — every technical claim needs concrete backing from the source: plugin names, settings, frequencies, ratios, time values, routing decisions. Abstract advice is worthless. -- **Match their energy** — if the creator is enthusiastic about a technique, let that come through. If they're cautionary, convey the gravity. If they're playful, allow some lightness. The tone should match the teaching moment. -- **Efficiency** — say it once, say it well. Don't pad paragraphs. Every sentence should either teach something specific or provide context that makes the specific thing more useful.""", -] - -# ═══════════════════════════════════════════════════════════════════════════════ -# DIMENSION 4: SECTION STRUCTURE PHILOSOPHY -# ═══════════════════════════════════════════════════════════════════════════════ - -SECTION_STRUCTURES = [ - # 0 — Content-derived workflow order - """## Body sections structure - -Derive section names from the actual content — never use generic names like "Overview," "Step-by-Step Process," "Key Settings," or "Tips and Variations." - -Each section covers one distinct sub-aspect of the technique, organized in the order a producer would naturally work through these decisions: - -Good section names: "Layer construction" / "Saturation and the crunch character" / "Mix context and bus processing" / "Oscillator setup and FM routing" -Bad section names: "Overview" / "Introduction" / "Key Settings" / "Tips" / "Conclusion" - -Each section: 2-5 substantive paragraphs. Paragraphs flow logically within each section, building a complete picture of that sub-aspect. A section with 1-2 sentences is too thin — merge it or expand it.""", - - # 1 — Decision-point organization - """## Body sections structure - -Organize sections around the decisions the creator makes, not around a linear procedure. - -Name each section for the specific decision or sub-problem it addresses. If the creator's technique involves choosing oscillator settings, then processing, then mixing — those aren't "Step 1, Step 2, Step 3." They're specific named concerns like "Oscillator architecture" / "Saturation chain design" / "Bus treatment in dense mixes." - -Never use generic names: "Overview," "Process," "Settings," "Tips," "Summary" — these signal lazy organization. - -Each section: 2-5 paragraphs of substantive content. Every paragraph should contain at least one specific detail (setting, value, plugin, or rationale). Merge thin sections; split bloated ones.""", - - # 2 — Concept-first organization - """## Body sections structure - -Organize by concept, not by sequence. Each section should teach one complete idea — the what, the why, and the how — so a reader can jump to any section and get full value. - -Section names should tell the reader exactly what they'll learn: -- Good: "Parallel saturation for crunch without smear" / "Frequency-specific ducking" / "The resampling loop" -- Bad: "Overview" / "Step-by-Step" / "Key Settings" / "Tips and Tricks" / "Conclusion" - -Descriptive section names are a feature, not decoration. A producer scanning the page should know from the section names alone whether this page has what they need. - -Each section: 2-5 substantive paragraphs. No filler, no padding. Every paragraph earns its place with specific information.""", - - # 3 — Problem-solution sections - """## Body sections structure - -Frame sections around the problems the creator is solving, not the tools they're using. - -Good section names describe what the creator achieves or addresses: -- "Getting the snare to cut without competing with vocals" / "Adding organic movement to static patches" / "Preserving transient punch through the bus chain" - -Bad section names describe generic categories: -- "Overview" / "EQ Settings" / "Compression" / "Tips" / "Final Thoughts" - -The test: could this section name appear on any technique page? If yes, it's too generic. Each name should be specific to THIS technique. - -Each section: 2-5 paragraphs of substantive prose. Flow logically within each section. Merge thin topics; never leave a section that's just 1-2 sentences.""", - - # 4 — Layered depth sections - """## Body sections structure - -Name sections for the distinct layers of the technique — each section adds depth to the reader's understanding. - -Think of the sections as answering different questions about the technique: -- "How the layers are built" (the construction) -- "Where the character comes from" (the signature element) -- "How it sits in a mix" (the context) - -Never use generic section names: "Overview" / "Process" / "Settings" / "Tips" / "Summary" — these are the enemy of good technique pages. - -Section names should be specific enough that a producer can scan them and immediately know if this page covers what they need. "Sidechain routing for low-end clarity" tells you something. "Processing" tells you nothing. - -Each section: 2-5 meaty paragraphs. Every paragraph must contain concrete information — no filler sentences like "this is an important aspect of the technique." """, - - # 5 — Action-oriented sections - """## Body sections structure - -Name each section for what the producer will understand or be able to do after reading it. - -Section names should be active and specific to the content: -- "Building the transient layer" / "Dialing in the parallel saturation" / "Checking mono compatibility on the sub bus" -- NOT: "Overview" / "Step 1" / "Settings" / "Tips" / "Advanced Techniques" - -The golden rule: if the section name could work for any technique page, it's too generic. Rename it with specifics from THIS creator's approach. - -Each section should be independently valuable — a producer who reads only that section should learn something concrete and applicable. 2-5 paragraphs per section, with specific values, settings, and rationale in every paragraph.""", -] - -# ═══════════════════════════════════════════════════════════════════════════════ -# DIMENSION 5: DETAIL HANDLING -# ═══════════════════════════════════════════════════════════════════════════════ - -DETAIL_APPROACHES = [ - # 0 — Specifics-first - """## Plugin and detail rules - -**Specifics first, always.** When the creator gives a value, that value leads. Don't bury "4kHz" at the end of a sentence that starts with three adjectives. The number IS the information. - -Include plugin names, settings, and parameters when the creator was teaching that setting — explaining why they chose it, what it does, or how to configure it. If a plugin is merely visible or mentioned in passing, include it in the plugins list but not the body prose. - -A page full of specific values (frequencies, ratios, ms, dB, percentages, plugin names, algorithm choices) reads like expertise. A page full of adjectives ("nice," "subtle," "aggressive") reads like guessing. Always choose the specific value over the descriptive adjective.""", - - # 1 — Context-wrapped specifics - """## Plugin and detail rules - -Every specific value needs its context — why this number, what problem it solves, when you'd change it. - -Don't just list "attack +6dB" — explain that the creator uses +6dB attack on the transient shaper because they want the initial click to punch through without relying on compression (which adds sustain as a side effect). The value and the reasoning form a unit. - -Include plugin names and settings only when the creator was teaching that setting — spending time on why they chose it. A plugin merely visible in their session belongs in the plugins list, not the prose. - -Never use vague fill: "experiment with settings," "adjust to taste," "use your ears." If the creator gave a specific value, use it. If they gave a range, state the range.""", - - # 2 — Progressive disclosure - """## Plugin and detail rules - -Present details in progressive layers: concept → specific implementation → edge cases and context. - -First paragraph of a section: what the creator does and why (the concept). Second paragraph: the specific values, plugin names, settings, and routing (the implementation). Third paragraph if warranted: when they do it differently, what to watch out for (the nuance). - -Include specific plugin names, settings, and parameters when the creator taught that setting intentionally — not when a plugin was merely visible. This distinction is critical: a page explaining demonstrated plugins reads like education; a page listing every visible plugin reads like a gear list. - -Concrete values (Hz, ms, dB, %, ratios, plugin names) are mandatory whenever the source provides them. Never substitute vague descriptions for available specifics.""", - - # 3 — Interleaved detail and rationale - """## Plugin and detail rules - -Weave specifics and reasoning together in every paragraph. Don't separate "what" from "why" — they belong in the same breath. - -Good: "He uses Kilohearts Transient Shaper at +6dB attack rather than compression, because compression adds sustain as a side effect." -Bad: "He shapes the transient using a transient shaper. The attack is set to +6dB." - -Include plugin names, settings, and parameters only for plugins the creator was actively teaching — explaining why they chose it, what it does, how to configure it. Casually visible plugins go in the plugins list, not the prose. - -If the source contains a number, the page contains that number. No rounding to vague terms, no replacing "12%" with "a small amount." Specificity is the currency of credibility.""", - - # 4 — Show-don't-tell specifics - """## Plugin and detail rules - -Show the technique through its specifics. Don't tell the reader the snare "cuts through" — show them the 4kHz click layer, the -6dB blend ratio, the transient shaper at +6dB attack. - -Every technical claim in the body must be supported by at least one concrete detail from the source material: a frequency value, a time value, a ratio, a plugin name, a specific setting, a routing decision. - -Include plugin names and settings only when the creator demonstrated and explained that setting. Plugins merely visible in the background go in the plugins list but stay out of the prose narrative. - -The test for detail sufficiency: could a producer read this section and actually do what the creator did? If the answer requires guessing at any setting, more detail is needed.""", -] - -# ═══════════════════════════════════════════════════════════════════════════════ -# DIMENSION 6: TEACHING RHYTHM -# ═══════════════════════════════════════════════════════════════════════════════ - -TEACHING_RHYTHMS = [ - # 0 — Key insight first - """## Synthesis approach - -Lead with the most valuable insight. The first section of the page should contain the core technique — the thing that makes this creator's approach distinctive. Supporting detail and context follow. - -Within each section, front-load the "aha" moment. The first sentence or two should deliver the key idea; subsequent paragraphs provide the mechanics and rationale. A producer who reads only the first paragraph of each section should still learn something meaningful. - -Merge related moments — if the creator discusses the same concept at different points, synthesize them into one coherent treatment. Resolve redundancy. Note contradictions with context (e.g., "In dense mixes, he pulls it back; in sparse arrangements, he leaves room for the tail"). - -Build a logical flow across sections: typically sound source → processing → mixing context, but follow whatever order serves this specific technique best.""", - - # 1 — Build understanding progressively - """## Synthesis approach - -Build understanding layer by layer. Each section should add depth to what came before, so a reader who makes it through the whole page has deep understanding, while a reader who stops early still got the fundamentals. - -First section: the core concept and the creator's primary approach. Middle sections: specific implementation details, tools, settings, and reasoning. Final section: context, edge cases, or the creator's broader philosophy about this technique. - -Within sections, start with what the creator does, then explain why they do it, then provide the specific settings. This rhythm — method → reasoning → specifics — mirrors how good teaching works. - -Merge moments that cover the same ground. Organize by conceptual flow, not by the order the creator happened to discuss things. The page should feel structured even if the source content wasn't.""", - - # 2 — Each section stands alone - """## Synthesis approach - -Write each section to be independently valuable. A producer who jumps directly to "Parallel saturation chain" should get full value from that section without having read what came before. - -This means each section needs: -- What the creator does (the technique) -- Why they do it (the reasoning) -- How specifically (the values, settings, tools) - -Avoid forward or backward references between sections when possible. If context from another section is needed, include a brief restatement rather than saying "as mentioned above." - -Organize sections in the order a producer would naturally encounter these decisions. Merge moments that address the same sub-topic. Note contradictions with their context.""", - - # 3 — Problem-driven rhythm - """## Synthesis approach - -Anchor each section in the problem the creator is solving. Before the technique, before the settings, the reader should understand what sonic goal the creator is pursuing. - -Rhythm within each section: Problem → the creator's solution → specific implementation → why this works (or what to watch for). - -Example: "In dense arrangements, the snare body competes with the sub bass for attention. ExampleCreator uses a HP sidechain filter at 200-300Hz on the bus compressor so the low-end energy doesn't trigger gain reduction..." - -Merge moments that address the same problem. Build sections in the order of a natural production workflow. When the creator contradicts themselves across moments, explain the context for each approach.""", - - # 4 — Mirror teaching emphasis - """## Synthesis approach - -Match the creator's sense of what matters. If they spent 2 minutes on a specific setting, that setting gets prominent treatment. If they mentioned something in passing, it's a supporting detail, not a section heading. - -The creator's emphasis IS your structural guide. What they lingered on becomes a section. What they rushed through becomes a sentence within a larger section. What they warned against gets its own call-out. - -Merge related moments into coherent sections. Organize by logical workflow order (typically: sound source → processing → mix context), not by timestamp order. Resolve redundancy — say it once, thoroughly. Note contradictions with the context for each approach. - -Within sections, lead with the most distinctive aspect of the creator's approach — the thing that makes their method different from the generic technique.""", -] - -# ═══════════════════════════════════════════════════════════════════════════════ -# DIMENSION 7: READER MODEL -# ═══════════════════════════════════════════════════════════════════════════════ - -READER_MODELS = [ - # 0 — Mid-session producer - """## Reader context - -Your reader is a producer mid-session. They Alt+Tabbed from their DAW to find this specific technique. They want the key insight in seconds, with enough depth to apply it correctly, and then they want to get back to work. Respect their time: be direct, be specific, don't pad. - -Use production terminology naturally — your reader knows what a transient shaper does. But when the creator's explanation of a concept adds unique value (their specific reasoning or approach), include that explanation.""", - - # 1 — Deliberate student - """## Reader context - -Your reader is deliberately studying this technique. They're not in a rush — they want to deeply understand this creator's approach so they can incorporate it into their own workflow. They'll read the whole page. - -This means depth matters: explain the reasoning behind decisions, capture the creator's philosophy, and provide the specific settings they'll need. But don't waste their time with filler or repetition — density of useful information per sentence is the goal.""", - - # 2 — Any skill level - """## Reader context - -Your reader could be a beginner or an expert. Write so both get value: use production terminology naturally (don't over-explain fundamentals), but when the creator explains a non-obvious concept in an illuminating way, include that explanation — it helps beginners and often contains nuance that experts appreciate too. - -The page should be immediately scannable (clear section names, specific details prominent) for the expert who knows what they're looking for, and readable end-to-end for the learner who wants the full picture.""", - - # 3 — Producer comparing approaches - """## Reader context - -Your reader often knows the general technique — what they want is THIS creator's specific approach. They're reading to understand what's distinctive about this method: what choices this creator makes that others might not, what values they favor, what they explicitly warn against. - -Foreground what's specific to this creator's approach. Generic production advice that any tutorial would give is low-value filler. The creator's particular settings, their reasoning, their strong opinions — that's why someone reads this page instead of a generic article.""", - - # 4 — Quick-reference user - """## Reader context - -Your reader uses technique pages as quick reference — they come back to the same page multiple times as they work. The page needs to be scannable on revisit: clear section names that help them find the right part, specific values they can grab without re-reading full paragraphs, and a logical organization that matches their mental model of the workflow. - -First read should be engaging and educational. Second and third reads should be efficient — the information architecture supports both.""", -] - -# ═══════════════════════════════════════════════════════════════════════════════ -# DIMENSION 8: SUMMARY STYLE -# ═══════════════════════════════════════════════════════════════════════════════ - -SUMMARY_STYLES = [ - # 0 — Lead with the method - """## Summary requirements - -The summary (2-4 sentences) should lead with the most distinctive aspect of the creator's approach, then the method, then the key distinguishing detail. A reader should get the core technique from the summary alone. - -Example quality: "ExampleCreator builds snares as three independent layers — transient click, tonal body, and noise tail — with each shaped by a transient shaper before any bus processing. The signature crunch comes from parallel soft-clip saturation with a pre-delay that preserves the clean transient." - -Bad: "ExampleCreator discusses various techniques for snare production including layering and saturation." (Too vague — worthless as a summary.)""", - - # 1 — Lead with the insight - """## Summary requirements - -The summary (2-4 sentences) should open with the most surprising or valuable insight — the thing that would make a producer stop scrolling and read the full page. Then provide enough context to understand the approach. - -The summary is the page's elevator pitch. It should answer: "Why should I read this?" with a specific, concrete answer, not a vague topic description. Include at least one specific detail (a setting, a technique, a routing decision) in the summary.""", - - # 2 — Lead with the problem - """## Summary requirements - -The summary (2-4 sentences) should open with the problem or goal the creator is addressing, then describe their specific approach. This gives the reader an immediate "do I need this?" signal. - -Good: "To get snares that cut through dense arrangements without competing with the sub, ExampleCreator builds them as three independent layers..." Bad: "This page covers ExampleCreator's snare design techniques." - -Include at least one specific technical detail in the summary — a setting, a value, a plugin name. Vague summaries waste everyone's time.""", - - # 3 — Dense technical summary - """## Summary requirements - -Pack the summary (2-4 sentences) with the maximum amount of useful technical information. Every sentence should contain a concrete detail — a specific approach, a setting, a plugin, a value. The summary should be useful on its own as a quick reference. - -Think of it as: if a producer could only read the summary and nothing else, what would give them the most value? Prioritize the creator's specific method over generic descriptions of the topic.""", -] - -# ═══════════════════════════════════════════════════════════════════════════════ -# DIMENSION 9: SYNTHESIS PHILOSOPHY EXTRAS -# ═══════════════════════════════════════════════════════════════════════════════ - -SYNTHESIS_EXTRAS = [ - # 0 — None (no extra modifier) - "", - - # 1 — Anti-filler emphasis - """## Absolute prohibitions - -- Never write "adjust to taste" or "experiment with settings" — if the creator gave a value, use it. If they gave a range, state the range. -- Never use filler phrases: "it's worth noting," "interestingly," "it should be mentioned," "it's important to remember" — just state the thing. -- Never open a section with a topic sentence that restates the section name: if the section is called "Parallel saturation chain," don't start with "The parallel saturation chain is an important part of..." -- Never close a section with a vague restatement: "This technique is very useful for achieving good results." -- Every sentence must contain information that a producer could act on or learn from.""", - - # 2 — Contradiction and nuance emphasis - """## Handling nuance - -When the source moments reveal nuance or context-dependent choices, treat that as high-value content: - -- If the creator uses different settings for different contexts (dense vs. sparse arrangements, different genres, different stages of a mix), document BOTH with their context. This is gold — it shows the reader when to adapt. -- If the creator contradicts themselves, don't smooth it over. Note both positions with the context: "For punchy drums, he pushes the drive to 60%; for more subtle glue, he backs it off to 25-30%." -- If the creator warns against a common mistake, give that warning prominent placement. Warnings are often the most remembered and useful parts of a technique page. -- If the creator states a strong opinion, preserve it with attribution. Opinions from experienced producers are more valuable than neutral descriptions.""", - - # 3 — Engagement emphasis - """## Writing engagement - -The page must be enticing to read, not just technically accurate: - -- Open sections with something specific and concrete — a technique, a value, a surprising choice. Never open with a general statement about the topic's importance. -- Vary sentence length and rhythm. A long technical sentence followed by a short punchy one. Monotone paragraph structure is the enemy of engagement. -- Let the creator's personality drive the energy. If they're enthusiastic, that enthusiasm should be palpable. If they're precise and methodical, the prose should reflect that controlled energy. -- End sections with something memorable — a key takeaway, a direct quote, a warning. Not a limp summary sentence.""", - - # 4 — Cross-reference awareness - """## Cross-referencing within the page - -When one section's technique depends on or connects to another section's concept, make the connection explicit but brief: - -- "The pre-delay on the saturation send (covered in the previous section) is what makes this parallel approach work — without it, the transient gets smeared." -- Don't repeat details extensively across sections — reference them and move on. - -The page should feel like a cohesive piece of writing, not a collection of independent sections. Transitions between sections should be natural, even though a reader might skip to any section.""", - - # 5 — Confidence calibration - """## Confidence and attribution - -Distinguish between what the creator explicitly teaches and what you're inferring: - -- When the creator states something directly, write it as fact: "He sets the attack to 4ms." -- When you're connecting dots between moments that the creator didn't explicitly connect, use lighter framing: "This likely serves the same goal as his approach to the transient layer — keeping each element independent before the bus." -- Never invent connections, techniques, or settings that aren't in the source material. -- Never soften the creator's own stated opinions. If they're blunt, be blunt. "He says OTT on snares is 'always a mistake'" — don't smooth this to "he suggests being careful with OTT on snares." """, - - # 6 — Efficiency mandate - """## Efficiency - -Every sentence on this page must earn its place. The page exists so a producer doesn't have to watch a full video — if the page wastes their time, it has failed. - -- No preamble. No "in this section we will explore." Start with the content. -- No restating what was just said in different words. -- No padding paragraphs to reach some imagined length requirement. Three paragraphs of dense, specific information beat five paragraphs of padded prose. -- If something can be said in one sentence, don't use two. -- The entire page should be consumable in under 2 minutes of focused reading. If it's longer, you've included filler.""", -] - -# ═══════════════════════════════════════════════════════════════════════════════ -# COMBINATION STRATEGY -# ═══════════════════════════════════════════════════════════════════════════════ - -def build_prompt( - role_idx: int, - purpose_idx: int, - voice_idx: int, - structure_idx: int, - detail_idx: int, - rhythm_idx: int, - reader_idx: int, - summary_idx: int, - extra_idx: int, -) -> str: - """Assemble a complete prompt from dimension indices.""" - parts = [ - ROLE_FRAMINGS[role_idx % len(ROLE_FRAMINGS)], - "", - PAGE_PURPOSE_FRAMINGS[purpose_idx % len(PAGE_PURPOSE_FRAMINGS)], - "", - VOICE_STRATEGIES[voice_idx % len(VOICE_STRATEGIES)], - "", - SECTION_STRUCTURES[structure_idx % len(SECTION_STRUCTURES)], - "", - DETAIL_APPROACHES[detail_idx % len(DETAIL_APPROACHES)], - "", - TEACHING_RHYTHMS[rhythm_idx % len(TEACHING_RHYTHMS)], - "", - READER_MODELS[reader_idx % len(READER_MODELS)], - "", - SUMMARY_STYLES[summary_idx % len(SUMMARY_STYLES)], - ] - - extra = SYNTHESIS_EXTRAS[extra_idx % len(SYNTHESIS_EXTRAS)] - if extra: - parts.extend(["", extra]) - - parts.append(INVARIANT_TAIL) - - return "\n".join(parts) - - -def generate_combinations() -> list[tuple[str, dict]]: - """Generate 100 diverse combinations covering the design space. - - Strategy: - - First 8: single-dimension sweeps (hold everything at 0, vary one dim) - - Next 30: systematic coverage of high-impact dimensions - - Next 30: diagonal walks and cross-pollination - - Final 32: hash-based diverse sampling for maximum coverage - """ - combos = [] - seen_hashes = set() - - def add(r, p, v, s, d, t, rd, sm, e, label=""): - dims = { - "role": r % len(ROLE_FRAMINGS), - "purpose": p % len(PAGE_PURPOSE_FRAMINGS), - "voice": v % len(VOICE_STRATEGIES), - "structure": s % len(SECTION_STRUCTURES), - "detail": d % len(DETAIL_APPROACHES), - "rhythm": t % len(TEACHING_RHYTHMS), - "reader": rd % len(READER_MODELS), - "summary": sm % len(SUMMARY_STYLES), - "extra": e % len(SYNTHESIS_EXTRAS), - } - key = tuple(sorted(dims.items())) - h = hash(key) - if h in seen_hashes: - return False - seen_hashes.add(h) - - prompt = build_prompt( - dims["role"], dims["purpose"], dims["voice"], dims["structure"], - dims["detail"], dims["rhythm"], dims["reader"], dims["summary"], - dims["extra"], - ) - meta = {**dims, "label": label} - combos.append((prompt, meta)) - return True - - # ── Group 1: Curated strong combinations (highest priority) ──────── - strong = [ - # "Private lesson" feel: mentor + lesson framing + teaching voice + problem sections - (5, 2, 2, 3, 1, 3, 1, 1, 3, "curated-private-lesson"), - # "Quick reference" feel: knowledge architect + reference card + selective voice + action sections - (2, 0, 4, 5, 0, 0, 4, 3, 6, "curated-quick-reference"), - # "Deep study" feel: applied researcher + distilled knowledge + woven personality + concept sections - (7, 1, 1, 2, 2, 1, 1, 0, 2, "curated-deep-study"), - # "Workshop notes" feel: notebook + technique blueprint + aggressive quoting + workflow sections - (3, 3, 0, 0, 3, 4, 0, 0, 1, "curated-workshop-notes"), - # "Journalist profile" feel: craft journalist + applied wisdom + conversational + problem-driven - (6, 5, 5, 3, 4, 3, 3, 1, 3, "curated-journalist-profile"), - # "Efficient expert" feel: translator + accelerator + selective voice + layered depth + efficiency - (4, 4, 4, 4, 0, 0, 0, 3, 6, "curated-efficient-expert"), - # "Personality forward" feel: documentarian + private lesson + aggressive quoting + emphasis mirror - (1, 2, 0, 1, 3, 4, 2, 1, 5, "curated-personality-forward"), - # "Beginner friendly" feel: mentor + distilled + teaching capture + progressive + any-level - (5, 1, 2, 2, 2, 1, 2, 2, 4, "curated-beginner-friendly"), - # "Expert density" feel: researcher + blueprint + selective + decision-point + specifics-first - (7, 3, 4, 1, 0, 2, 3, 3, 1, "curated-expert-density"), - # "Minimal filler" feel: colleague + accelerator + woven + action + interleaved + mid-session - (0, 4, 1, 5, 3, 0, 0, 0, 6, "curated-minimal-filler"), - # "Maximum voice" feel: documentarian + private lesson + aggressive + content-derived + context - (1, 2, 0, 0, 1, 4, 2, 1, 5, "curated-maximum-voice"), - # "Scannable" feel: architect + reference + selective + layered + specifics-first + quick-ref - (2, 0, 4, 4, 0, 2, 4, 3, 1, "curated-scannable"), - # "Narrative" feel: journalist + applied wisdom + woven + concept + progressive + student - (6, 5, 1, 2, 2, 1, 1, 1, 3, "curated-narrative"), - # "Practical" feel: colleague + distilled + conversational + workflow + show-don't-tell + mid-session - (0, 1, 5, 0, 4, 0, 0, 0, 6, "curated-practical"), - # "Authoritative" feel: researcher + blueprint + personality-through-structure + decision + dense - (7, 3, 3, 1, 0, 2, 3, 3, 5, "curated-authoritative"), - # "Masterclass recap" feel: documentarian + distilled + conversational + problem + context-wrapped + problem-driven + any-level - (1, 1, 5, 3, 1, 3, 2, 1, 3, "curated-masterclass-recap"), - # "Studio diary" feel: colleague + private lesson + aggressive quoting + content-derived + show-don't-tell + mirror emphasis - (0, 2, 0, 0, 4, 4, 0, 0, 1, "curated-studio-diary"), - # "Terse wisdom" feel: translator + accelerator + selective + action + specifics-first + key-insight-first + mid-session + dense + efficiency - (4, 4, 4, 5, 0, 0, 0, 3, 6, "curated-terse-wisdom"), - # "Mentor monologue" feel: mentor + applied wisdom + teaching capture + concept + progressive + student + lead-with-insight + engagement - (5, 5, 2, 2, 1, 1, 1, 1, 3, "curated-mentor-monologue"), - # "Producer's cheat sheet" feel: architect + reference card + selective voice + decision-point + specifics-first + standalone + quick-ref + dense + anti-filler - (2, 0, 4, 1, 0, 2, 4, 3, 1, "curated-cheat-sheet"), - ] - for combo in strong: - add(*combo) - - # ── Group 2: Baseline + single-dimension sweeps ────────────────────── - add(0, 0, 0, 0, 0, 0, 0, 0, 0, "baseline-all-defaults") - - for i in range(1, len(ROLE_FRAMINGS)): - add(i, 0, 0, 0, 0, 0, 0, 0, 0, f"sweep-role-{i}") - for i in range(1, len(PAGE_PURPOSE_FRAMINGS)): - add(0, i, 0, 0, 0, 0, 0, 0, 0, f"sweep-purpose-{i}") - for i in range(1, len(VOICE_STRATEGIES)): - add(0, 0, i, 0, 0, 0, 0, 0, 0, f"sweep-voice-{i}") - for i in range(1, len(SECTION_STRUCTURES)): - add(0, 0, 0, i, 0, 0, 0, 0, 0, f"sweep-structure-{i}") - for i in range(1, len(DETAIL_APPROACHES)): - add(0, 0, 0, 0, i, 0, 0, 0, 0, f"sweep-detail-{i}") - for i in range(1, len(TEACHING_RHYTHMS)): - add(0, 0, 0, 0, 0, i, 0, 0, 0, f"sweep-rhythm-{i}") - for i in range(1, len(READER_MODELS)): - add(0, 0, 0, 0, 0, 0, i, 0, 0, f"sweep-reader-{i}") - for i in range(1, len(SUMMARY_STYLES)): - add(0, 0, 0, 0, 0, 0, 0, i, 0, f"sweep-summary-{i}") - for i in range(1, len(SYNTHESIS_EXTRAS)): - add(0, 0, 0, 0, 0, 0, 0, 0, i, f"sweep-extra-{i}") - - # ── Group 3: Diagonal walks (vary all dims simultaneously) ─────────── - for i in range(15): - add(i, i+1, i+2, i+3, i+4, i, i+1, i+2, i+3, f"diagonal-{i}") - - # ── Group 4: High-impact cross-pollinations ────────────────────────── - # Voice × Structure (the two most impactful content dimensions) - for v in range(len(VOICE_STRATEGIES)): - for s in range(len(SECTION_STRUCTURES)): - if v == 0 and s == 0: - continue # skip baseline dup - add(0, 0, v, s, 0, 0, 0, 0, 0, f"cross-voice{v}-struct{s}") - - # Voice × Detail - for v in range(len(VOICE_STRATEGIES)): - for d in range(len(DETAIL_APPROACHES)): - if v == 0 and d == 0: - continue - add(1, 1, v, 0, d, 0, 1, 0, 0, f"cross-voice{v}-detail{d}") - - # Role × Extra (persona + experimental modifier) - for r in range(len(ROLE_FRAMINGS)): - for e in [1, 3, 6]: # anti-filler, engagement, efficiency - add(r, 0, 0, 0, 0, 0, 0, 0, e, f"cross-role{r}-extra{e}") - - # ── Group 5: Hash-based diverse fill to reach 100 ──────────────────── - seed = 42 - attempts = 0 - while len(combos) < 100 and attempts < 500: - h = hashlib.md5(f"chrysopedia-{seed}-{attempts}".encode()).hexdigest() - digits = [int(h[i:i+2], 16) for i in range(0, 18, 2)] - add( - digits[0], digits[1], digits[2], digits[3], digits[4], - digits[5], digits[6], digits[7], digits[8], - f"diverse-{seed+attempts}", - ) - attempts += 1 - - return combos[:100] - - -def main(): - OUTPUT_DIR.mkdir(parents=True, exist_ok=True) - - combos = generate_combinations() - print(f"Generated {len(combos)} unique prompt variants") - - manifest = [] - for i, (prompt, meta) in enumerate(combos, 1): - filename = f"v{i:03d}.txt" - filepath = OUTPUT_DIR / filename - filepath.write_text(prompt, encoding="utf-8") - - manifest.append({ - "file": filename, - "label": meta["label"], - "chars": len(prompt), - "role": meta["role"], - "purpose": meta["purpose"], - "voice": meta["voice"], - "structure": meta["structure"], - "detail": meta["detail"], - "rhythm": meta["rhythm"], - "reader": meta["reader"], - "summary": meta["summary"], - "extra": meta["extra"], - }) - - # Write manifest - import json - manifest_path = OUTPUT_DIR / "manifest.json" - manifest_path.write_text(json.dumps(manifest, indent=2), encoding="utf-8") - - # Print summary - print(f"\nOutput directory: {OUTPUT_DIR}") - print(f"Prompt files: v001.txt — v{len(combos):03d}.txt") - print(f"Manifest: {manifest_path}") - print(f"\nSize range: {min(m['chars'] for m in manifest):,} — {max(m['chars'] for m in manifest):,} chars") - print(f"Mean size: {sum(m['chars'] for m in manifest) // len(manifest):,} chars") - - # Dimension coverage - for dim in ["role", "purpose", "voice", "structure", "detail", "rhythm", "reader", "summary", "extra"]: - vals = set(m[dim] for m in manifest) - print(f" {dim}: {len(vals)} unique values used") - - # Label groups - labels = [m["label"] for m in manifest] - groups = {} - for l in labels: - prefix = l.split("-")[0] if "-" in l else l - groups[prefix] = groups.get(prefix, 0) + 1 - print(f"\nLabel groups: {dict(sorted(groups.items()))}") - - -if __name__ == "__main__": - main()