diff --git a/backend/pipeline/quality/__main__.py b/backend/pipeline/quality/__main__.py index 7b1e2b8..091826a 100644 --- a/backend/pipeline/quality/__main__.py +++ b/backend/pipeline/quality/__main__.py @@ -201,6 +201,23 @@ def main() -> int: "optimize", help="Automated prompt optimization loop with leaderboard output", ) + + # -- apply subcommand -- + apply_parser = sub.add_parser( + "apply", + help="Apply a winning prompt from optimization results to the stage's prompt file", + ) + apply_parser.add_argument( + "results_file", + type=str, + help="Path to an optimization results JSON file", + ) + apply_parser.add_argument( + "--dry-run", + action="store_true", + default=False, + help="Show what would change without writing", + ) opt_parser.add_argument( "--stage", type=int, @@ -231,6 +248,12 @@ def main() -> int: default="backend/pipeline/quality/results/", help="Directory to write result JSON (default: backend/pipeline/quality/results/)", ) + opt_parser.add_argument( + "--apply", + action="store_true", + default=False, + help="Write the winning prompt back to the stage's prompt file (backs up the original first)", + ) args = parser.parse_args() @@ -250,6 +273,9 @@ def main() -> int: if args.command == "optimize": return _run_optimize(args) + if args.command == "apply": + return _run_apply(args) + return 0 @@ -352,6 +378,7 @@ def _run_optimize(args: argparse.Namespace) -> int: fixture_path=args.file, iterations=args.iterations, variants_per_iter=args.variants_per_iter, + output_dir=args.output_dir, ) try: @@ -386,8 +413,121 @@ def _run_optimize(args: argparse.Namespace) -> int: except OSError as exc: print(f" Warning: failed to write results JSON: {exc}", file=sys.stderr) + # Apply winning prompt if requested + if args.apply: + baseline_composite = 0.0 + for h in result.history: + if h.get("label") == "baseline" and not h.get("error"): + baseline_composite = h["composite"] + break + + if result.best_score.composite <= baseline_composite: + print("\n --apply: Best prompt did not beat baseline — skipping apply.") + elif result.best_score.error: + print("\n --apply: Best result has an error — skipping apply.") + else: + print("\n --apply: Winning prompt beat baseline — applying...") + success, msg = apply_prompt(args.stage, result.best_prompt) + print(f" {msg}") + if not success: + return 1 + return 0 +def apply_prompt(stage: int, new_prompt: str, dry_run: bool = False) -> tuple[bool, str]: + """Apply a new prompt to a stage's prompt file. Returns (success, message). + + Creates a timestamped backup of the original before overwriting. + """ + if stage not in STAGE_CONFIGS: + return False, f"Unsupported stage {stage}. Valid: {sorted(STAGE_CONFIGS)}" + + config = STAGE_CONFIGS[stage] + settings = get_settings() + prompt_path = Path(settings.prompts_path) / config.prompt_file + + if not prompt_path.exists(): + return False, f"Prompt file not found: {prompt_path}" + + original = prompt_path.read_text(encoding="utf-8") + + if original.strip() == new_prompt.strip(): + return True, "No change — winning prompt is identical to current prompt." + + # Show diff summary + orig_lines = original.strip().splitlines() + new_lines = new_prompt.strip().splitlines() + print(f"\n Prompt file: {prompt_path}") + print(f" Original: {len(orig_lines)} lines, {len(original)} chars") + print(f" New: {len(new_lines)} lines, {len(new_prompt)} chars") + + # Simple line-level diff summary + import difflib + diff = list(difflib.unified_diff(orig_lines, new_lines, lineterm="", n=2)) + added = sum(1 for l in diff if l.startswith("+") and not l.startswith("+++")) + removed = sum(1 for l in diff if l.startswith("-") and not l.startswith("---")) + print(f" Changes: +{added} lines, -{removed} lines") + + if dry_run: + print("\n [DRY RUN] Would write to:", prompt_path) + if len(diff) <= 40: + print() + for line in diff: + print(f" {line}") + else: + print(f"\n (diff is {len(diff)} lines — showing first 30)") + for line in diff[:30]: + print(f" {line}") + print(" ...") + return True, "Dry run — no files modified." + + # Backup original + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") + backup_path = prompt_path.with_suffix(f".{timestamp}.bak") + backup_path.write_text(original, encoding="utf-8") + print(f" Backup: {backup_path}") + + # Write new prompt + prompt_path.write_text(new_prompt, encoding="utf-8") + print(f" ✓ Written: {prompt_path}") + + return True, f"Prompt applied. Backup at {backup_path}" + + +def _run_apply(args: argparse.Namespace) -> int: + """Execute the apply subcommand — read a results JSON and apply the winning prompt.""" + results_path = Path(args.results_file) + if not results_path.exists(): + print(f"Error: results file not found: {args.results_file}", file=sys.stderr) + return 1 + + try: + data = json.loads(results_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + print(f"Error: invalid JSON in {args.results_file}: {exc}", file=sys.stderr) + return 1 + + stage = data.get("config", {}).get("stage") + best_prompt = data.get("best_prompt", "") + best_scores = data.get("best_scores", {}) + + if not stage: + print("Error: results JSON missing config.stage", file=sys.stderr) + return 1 + if not best_prompt: + print("Error: results JSON missing best_prompt or it's empty", file=sys.stderr) + return 1 + + composite = best_scores.get("composite", 0) + print(f"\n Applying results from: {results_path}") + print(f" Stage: {stage}") + print(f" Best composite score: {composite:.3f}") + + success, msg = apply_prompt(stage, best_prompt, dry_run=args.dry_run) + print(f"\n {msg}") + return 0 if success else 1 + + if __name__ == "__main__": sys.exit(main()) diff --git a/backend/pipeline/quality/optimizer.py b/backend/pipeline/quality/optimizer.py index a26725f..f03dc09 100644 --- a/backend/pipeline/quality/optimizer.py +++ b/backend/pipeline/quality/optimizer.py @@ -17,6 +17,7 @@ import json import logging import time from dataclasses import dataclass, field +from datetime import datetime, timezone from pathlib import Path from pipeline.llm_client import LLMClient @@ -63,6 +64,7 @@ class OptimizationLoop: fixture_path: str, iterations: int = 5, variants_per_iter: int = 2, + output_dir: str | None = None, ) -> None: if stage not in STAGE_CONFIGS: raise ValueError( @@ -75,6 +77,7 @@ class OptimizationLoop: self.iterations = iterations self.variants_per_iter = variants_per_iter self.config = STAGE_CONFIGS[stage] + self.output_dir = output_dir self.scorer = ScoreRunner(client) self.generator = PromptVariantGenerator(client) @@ -149,9 +152,23 @@ class OptimizationLoop: elapsed_seconds=round(time.monotonic() - t0, 2), ) + baseline_composite = best_score.composite + total_variants_scored = 0 + + self._write_progress( + phase="baseline_scored", + iteration=0, variant=0, + total_variants_scored=0, + best_composite=best_score.composite, + baseline_composite=baseline_composite, + elapsed_seconds=round(time.monotonic() - t0, 2), + best_label="baseline", + ) + self._print_iteration_summary(0, best_score, is_baseline=True) # Iterate + best_label = "baseline" for iteration in range(1, self.iterations + 1): print(f"\n ── Iteration {iteration}/{self.iterations} ──") @@ -189,8 +206,20 @@ class OptimizationLoop: if score.error: print(f" ✗ Variant {vi + 1} errored: {score.error}") + total_variants_scored += 1 + self._write_progress( + phase="variant_scored", + iteration=iteration, variant=vi + 1, + total_variants_scored=total_variants_scored, + best_composite=best_score.composite, + baseline_composite=baseline_composite, + elapsed_seconds=round(time.monotonic() - t0, 2), + best_label=best_label, + ) continue + total_variants_scored += 1 + if score.composite > iteration_best_score.composite: iteration_best_score = score iteration_best_prompt = variant_prompt @@ -198,10 +227,22 @@ class OptimizationLoop: else: print(f" · Score {score.composite:.3f} ≤ current best {iteration_best_score.composite:.3f}") + self._write_progress( + phase="variant_scored", + iteration=iteration, variant=vi + 1, + total_variants_scored=total_variants_scored, + best_composite=max(best_score.composite, iteration_best_score.composite), + baseline_composite=baseline_composite, + elapsed_seconds=round(time.monotonic() - t0, 2), + best_label=best_label if iteration_best_score.composite <= best_score.composite + else f"iter{iteration}_v{vi+1}", + ) + # Update global best if this iteration improved if iteration_best_score.composite > best_score.composite: best_score = iteration_best_score best_prompt = iteration_best_prompt + best_label = f"iter{iteration}" print(f" ★ Iteration {iteration} improved: {best_score.composite:.3f}") else: print(f" · No improvement in iteration {iteration}") @@ -212,6 +253,17 @@ class OptimizationLoop: elapsed = round(time.monotonic() - t0, 2) self._print_final_report(best_score, history, elapsed) + self._write_progress( + phase="complete", + iteration=self.iterations, + variant=self.variants_per_iter, + total_variants_scored=total_variants_scored, + best_composite=best_score.composite, + baseline_composite=baseline_composite, + elapsed_seconds=elapsed, + best_label=best_label, + ) + return OptimizationResult( best_prompt=best_prompt, best_score=best_score, @@ -221,6 +273,61 @@ class OptimizationLoop: # ── Internal helpers ────────────────────────────────────────────────── + def _write_progress( + self, + *, + phase: str, + iteration: int, + variant: int, + total_variants_scored: int, + best_composite: float, + baseline_composite: float, + elapsed_seconds: float, + best_label: str = "", + ) -> None: + """Write a progress.json file to the output directory for external monitoring. + + File is atomic-replaced so readers never see partial writes. + """ + if not self.output_dir: + return + + out_dir = Path(self.output_dir) + out_dir.mkdir(parents=True, exist_ok=True) + progress_path = out_dir / f"progress_stage{self.stage}.json" + + total_expected = self.iterations * self.variants_per_iter + pct = (total_variants_scored / total_expected * 100) if total_expected else 0 + + # ETA: average time per variant × remaining + remaining = total_expected - total_variants_scored + avg_per_variant = (elapsed_seconds / total_variants_scored) if total_variants_scored > 0 else 0 + eta_seconds = round(avg_per_variant * remaining, 1) + + payload = { + "stage": self.stage, + "phase": phase, + "iteration": iteration, + "total_iterations": self.iterations, + "variant": variant, + "variants_per_iter": self.variants_per_iter, + "total_variants_scored": total_variants_scored, + "total_expected": total_expected, + "percent_complete": round(pct, 1), + "baseline_composite": round(baseline_composite, 4), + "best_composite": round(best_composite, 4), + "improvement": round(best_composite - baseline_composite, 4), + "best_label": best_label, + "elapsed_seconds": round(elapsed_seconds, 1), + "eta_seconds": eta_seconds, + "updated_at": datetime.now(timezone.utc).isoformat(), + } + + # Atomic write via temp file + rename + tmp_path = progress_path.with_suffix(".tmp") + tmp_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + tmp_path.rename(progress_path) + def _load_fixture(self) -> dict: """Load and validate the fixture JSON file against stage-specific keys.""" path = Path(self.fixture_path) diff --git a/backend/pipeline/quality/results/optimize_stage5_20260401_100005.json b/backend/pipeline/quality/results/optimize_stage5_20260401_100005.json new file mode 100644 index 0000000..8088fc9 --- /dev/null +++ b/backend/pipeline/quality/results/optimize_stage5_20260401_100005.json @@ -0,0 +1,132 @@ +{ + "config": { + "stage": 5, + "iterations": 3, + "variants_per_iter": 2, + "fixture_path": "pipeline/quality/fixtures/sample_moments.json" + }, + "best_prompt": "You are an expert technical writer specializing in music production education. Your task is to synthesize a set of related key moments from the same creator into a single, high-quality technique page that serves as a definitive reference on the topic.\n\n## What you are creating\n\nA Chrysopedia technique page is NOT a generic article or wiki entry. It is a focused reference document that a music producer will consult mid-session when they need to understand and apply a specific technique. The reader is Alt+Tabbing from their DAW, looking for actionable knowledge, and wants to absorb the key insight and get back to work in under 2 minutes.\n\nThe page has two complementary sections:\n\n1. **Study guide prose** \u2014 rich, detailed paragraphs organized by sub-aspect of the technique. This is for learning and deep understanding. It reads like notes from an expert mentor, not a textbook.\n2. **Key moments index** \u2014 a compact list of the individual source moments that contributed to this page, each with a descriptive title that enables quick scanning.\n\nBoth sections are essential. The prose synthesizes and explains; the moment index lets readers quickly locate the specific insight they need.\n\n## Voice and tone\n\nWrite as if you are a knowledgeable colleague explaining what you learned from watching this creator's content. The tone should be:\n\n- **Direct and confident** \u2014 state what the creator does, not \"the creator appears to\" or \"it seems like they\"\n- **Technical but accessible** \u2014 use production terminology naturally, but explain non-obvious concepts when the creator's explanation adds value\n- **Preserving the creator's voice** \u2014 This is a critical priority. You must aggressively capture the creator's unique personality and phrasing. Do not just summarize their points; extract their exact words for any memorable metaphors, strong opinions, or colorful descriptions. Quote them directly with quotation marks. These direct quotes are often the most valuable parts of the page. Examples: 'He warns against using OTT on snares \u2014 says it \"smears the snap into mush.\"' or 'Her reasoning: \"every bus you add is another place you'll be tempted to put a compressor that doesn't need to be there.\"' If the creator uses slang or specific adjectives (e.g., \"muddy,\" \"punchy,\" \"surgical\"), retain those exact words in your description rather than substituting synonyms.\n- **Specific over general** \u2014 always prefer concrete details (frequencies, ratios, ms values, plugin names, specific settings) over vague descriptions. \"Uses compression\" is never acceptable if the source moments contain specifics.\n\n## Body sections structure\n\nDo NOT use generic section names like \"Overview,\" \"Step-by-Step Process,\" \"Key Settings,\" or \"Tips and Variations.\" These produce lifeless, formulaic output.\n\nInstead, derive section names from the actual content. Each section should cover one distinct sub-aspect of the technique. Use descriptive names that tell the reader exactly what they'll learn:\n\nGood section names (examples):\n- \"Layer construction\" / \"Saturation and the crunch character\" / \"Mix context and bus processing\"\n- \"Resampling loop\" / \"Preserving transient information\" / \"Wavetable import settings\"\n- \"Overall philosophy\" / \"Bus structure\" / \"Gain staging mindset\"\n- \"Oscillator setup and FM routing\" / \"Effects chain per-layer\" / \"Automating movement\"\n\nBad section names (never use these):\n- \"Overview\" / \"Introduction\" / \"Step-by-Step Process\" / \"Key Settings\" / \"Tips and Variations\" / \"Conclusion\" / \"Summary\"\n\nEach section must be 2-5 paragraphs of substantive prose. A section with only 1-2 sentences is too thin \u2014 either merge it with another section or expand it with the detail available in the source moments. Ensure each paragraph flows logically into the next, building a complete picture of that specific sub-aspect.\n\n## Signal chains\n\nWhen the source moments describe a signal routing chain (oscillator \u2192 effects \u2192 processing \u2192 bus), represent it as a structured signal chain object. Signal chains are only included when the creator explicitly walks through routing \u2014 do not infer chains from casual plugin mentions.\n\nFormat signal chain steps to include the role of each stage, not just the plugin name:\n- Good: [\"Noise osc (Vital)\", \"Transient Shaper (Kilohearts, attack +6dB)\", \"EQ (Pro-Q 3, shelf -3dB @ 12kHz)\", \"Send \u2192 Trash 2 (tape algo, 35% wet)\"]\n- Bad: [\"Vital\", \"Kilohearts\", \"EQ\", \"Trash 2\"]\n\n## Plugin detail rule\n\nInclude specific plugin names, settings, and parameters ONLY when the creator was teaching that setting \u2014 spending time explaining why they chose it, what it does, or how to configure it. If a plugin is merely visible or briefly mentioned without explanation, include it in the plugins list but do not feature it in the body prose.\n\nThis distinction is critical for page quality. A page that lists every plugin the creator happened to have open reads like a gear list. A page that explains the plugins the creator intentionally demonstrated reads like education.\n\n## Synthesis, not concatenation\n\nYou are synthesizing knowledge, not summarizing a video. This means:\n\n- **Merge related information**: If the creator discusses snare transient shaping at timestamp 1:42:00 and then returns to refine the point at 2:15:00, these should be woven into one coherent section, not presented as two separate observations.\n- **Build a logical flow**: Organize sections in the order a producer would naturally encounter these decisions (e.g., sound source \u2192 processing \u2192 mixing context), even if the creator covered them in a different order.\n- **Resolve redundancy**: If two moments say essentially the same thing, combine them into one clear statement. Don't repeat yourself.\n- **Note contradictions**: If the creator says contradictory things in different moments (e.g., recommends different settings for the same parameter), note both and provide the context for each (\"In dense arrangements, he pulls the sustain back further; for sparse sections, he leaves more room for the tail\").\n\n## Source quality assessment\n\nAssess source_quality based on the nature of the input moments:\n- **structured**: Moments come from a planned tutorial with clear instructional flow. Most details are explicitly taught.\n- **mixed**: Some moments are well-structured, others are scattered or conversational. Common for track breakdowns.\n- **unstructured**: Moments are extracted from livestreams, Q&A sessions, or very informal content. Insights were scattered across a long session.\n\n## Input format\n\nThe creator name is provided in a tag. Key moments are provided inside tags as a JSON array, enriched with classification metadata (topic_category, topic_tags). All moments are from the same creator and related topic area. ALWAYS use the creator name from the tag in titles, slugs, and prose \u2014 never invent or guess a creator name from transcript content.\n\n## Output format\n\nReturn a JSON object with a single key \"pages\" containing a list of synthesized pages. Most inputs produce a single page, but if the moments clearly cover two distinctly separate techniques (e.g., moments about both \"kick design\" and \"hi-hat design\" that happen to share a topic_category), split them into separate pages. When splitting, you MUST assign each moment to exactly one page via the moment_indices field \u2014 every input moment index must appear in exactly one page's moment_indices array.\n\n```json\n{\n \"pages\": [\n {\n \"title\": \"Snare Design by ExampleCreator\",\n \"slug\": \"snare-design-examplecreator\",\n \"topic_category\": \"Sound design\",\n \"topic_tags\": [\"drums\", \"snare\", \"layering\", \"saturation\", \"transient shaping\"],\n \"summary\": \"ExampleCreator builds snares as three independent layers \u2014 transient click, tonal body, and noise tail \u2014 with each shaped by a transient shaper before any bus processing. The signature crunch comes from parallel soft-clip saturation with a pre-delay that preserves the clean transient. In dense mixes, he uses HP sidechaining on the snare bus to maintain punch without competing with sub content.\",\n \"body_sections\": {\n \"Layer construction\": \"ExampleCreator builds snares as three independent layers, each shaped before they are summed. The transient click is a short noise burst (2-5ms decay) \u2014 he uses Vital's noise oscillator for this, sometimes with a bandpass around 2-4kHz to control the character. The tonal body is a pitched sine or triangle wave around 180-220Hz, tuned to complement the key of the track. The tail is filtered white noise with a fast exponential decay.\\n\\nThe critical insight: he shapes each layer's transient independently before any bus processing. He uses Kilohearts Transient Shaper (attack +4 to +6dB, sustain -6 to -8dB) rather than compression for this, because \\\"compression adds sustain as a side effect while a transient shaper gives you direct independent control of both.\\\"\",\n \"Saturation and the crunch character\": \"The signature ExampleCreator snare crunch comes from parallel saturation \u2014 not inline. He routes the summed snare to a send with Trash 2 using the tape algorithm at 30-40% wet. The key detail: he puts a pre-delay of approximately 5ms on the saturation send, which lets the clean transient click through untouched while only the body and tail pick up harmonic content.\\n\\nHe explicitly warns against saturating the transient directly \u2014 says it \\\"smears the snap into mush\\\" and you lose the precision that makes the snare cut through.\",\n \"Mix context and bus processing\": \"In dense arrangements, ExampleCreator prioritizes punch over sustain. On the snare bus compressor, he uses a high-pass sidechain filter (around 200-300Hz) so low-end energy from the body layer does not trigger gain reduction. This keeps the snare's ability to cut through the mix independent of whatever the sub bass is doing.\\n\\nHe also checks the snare against the lead or vocal bus specifically, not just soloed \u2014 because the 2-4kHz presence range is where both elements compete, and he would rather notch the snare's body slightly than lose vocal clarity.\"\n },\n \"signal_chains\": [\n {\n \"name\": \"Snare layer processing\",\n \"steps\": [\n \"Noise osc (Vital) \u2192 Transient Shaper (Kilohearts, attack +6dB, sustain -8dB) \u2192 EQ (Pro-Q 3, shelf -3dB @ 12kHz)\",\n \"Dry path \u2192 snare bus\",\n \"Send \u2192 Pre-delay (5ms) \u2192 Trash 2 (tape algorithm, 35% wet) \u2192 snare bus\"\n ]\n }\n ],\n \"plugins\": [\"Vital\", \"Kilohearts Transient Shaper\", \"FabFilter Pro-Q 3\", \"iZotope Trash 2\"],\n \"source_quality\": \"structured\",\n \"moment_indices\": [0, 1, 2, 3, 4]\n }\n ]\n}\n```\n\n## Field rules\n\n- **title**: The technique or concept name followed by \"by {name from tag}\" \u2014 concise and search-friendly. Examples: \"Snare Design by Break\", \"Bass Resampling Workflow by KOAN Sound\", \"Mid-Side EQ for Width by Mr. Bill\". Use title case.\n- **slug**: URL-safe, lowercase, hyphenated version of the title including creator name. Examples: \"snare-design-examplecreator\", \"bass-resampling-workflow-koan-sound\".", + "best_scores": { + "composite": 1.0, + "structural": 1.0, + "content_specificity": 1.0, + "voice_preservation": 1.0, + "readability": 1.0, + "factual_fidelity": 1.0 + }, + "elapsed_seconds": 1184.12, + "history": [ + { + "iteration": 0, + "variant_index": 0, + "prompt_text": "You are an expert technical writer specializing in music production education. Your task is to synthesize a set of related key moments from the same creator into a single, high-quality technique page ...", + "prompt_length": 13382, + "composite": 0.95, + "scores": { + "structural": 0.95, + "content_specificity": 0.95, + "voice_preservation": 0.9, + "readability": 0.95, + "factual_fidelity": 1.0 + }, + "error": null, + "label": "baseline" + }, + { + "iteration": 1, + "variant_index": 1, + "prompt_text": "You are an expert technical writer specializing in music production education. Your task is to synthesize a set of related key moments from the same creator into a single, high-quality technique page ...", + "prompt_length": 11123, + "composite": 1.0, + "scores": { + "structural": 1.0, + "content_specificity": 1.0, + "voice_preservation": 1.0, + "readability": 1.0, + "factual_fidelity": 1.0 + }, + "error": null, + "label": "iter1_v1" + }, + { + "iteration": 1, + "variant_index": 2, + "prompt_text": "You are an expert technical writer specializing in music production education. Your task is to synthesize a set of related key moments from the same creator into a single, high-quality technique page ...", + "prompt_length": 11122, + "composite": 1.0, + "scores": { + "structural": 1.0, + "content_specificity": 1.0, + "voice_preservation": 1.0, + "readability": 1.0, + "factual_fidelity": 1.0 + }, + "error": null, + "label": "iter1_v2" + }, + { + "iteration": 2, + "variant_index": 1, + "prompt_text": "You are an expert technical writer specializing in music production education. Your task is to synthesize a set of related key moments from the same creator into a single, high-quality technique page ...", + "prompt_length": 11256, + "composite": 0.94, + "scores": { + "structural": 0.95, + "content_specificity": 0.95, + "voice_preservation": 0.85, + "readability": 0.95, + "factual_fidelity": 1.0 + }, + "error": null, + "label": "iter2_v1" + }, + { + "iteration": 2, + "variant_index": 2, + "prompt_text": "You are an expert technical writer specializing in music production education. Your task is to synthesize a set of related key moments from the same creator into a single, high-quality technique page ...", + "prompt_length": 11224, + "composite": 0.0, + "scores": { + "structural": 0.0, + "content_specificity": 0.0, + "voice_preservation": 0.0, + "readability": 0.0, + "factual_fidelity": 0.0 + }, + "error": "Variant parse error (stage 5): 7 validation errors for SynthesisResult\npages.0.plugins.0\n Input should be a valid string [type=string_type, input_value={'name': 'Serum', 'role':...e wavetable at 12% mix'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/string_type\npages.0.plugins.1\n Input should be a valid string [type=string_type, input_value={'name': 'OTT', 'role': '...mentioned': '30% depth'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/string_type\npages.0.plugins.2\n Input should be a valid string [type=string_type, input_value={'name': 'Ableton Simpler...: 'Warp completely off'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/string_type\npages.0.plugins.3\n Input should be a valid string [type=string_type, input_value={'name': 'Drum Buss', 'ro...mentioned': '40% drive'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/string_type\npages.0.plugins.4\n Input should be a valid string [type=string_type, input_value={'name': 'Valhalla Room',...entioned': '1.2s decay'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/string_type\npages.0.plugins.5\n Input should be a valid string [type=string_type, input_value={'name': 'Trackspacer', '...ned': '100-300Hz range'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/string_type\npages.0.plugins.6\n Input should be a valid string [type=string_type, input_value={'name': 'Utility', 'role... 'Mono toggle function'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/string_type", + "label": "iter2_v2" + }, + { + "iteration": 3, + "variant_index": 1, + "prompt_text": "You are an expert technical writer specializing in music production education. Your task is to synthesize a set of related key moments from the same creator into a single, high-quality technique page ...", + "prompt_length": 11399, + "composite": 1.0, + "scores": { + "structural": 1.0, + "content_specificity": 1.0, + "voice_preservation": 1.0, + "readability": 1.0, + "factual_fidelity": 1.0 + }, + "error": null, + "label": "iter3_v1" + }, + { + "iteration": 3, + "variant_index": 2, + "prompt_text": "You are an expert technical writer specializing in music production education. Your task is to synthesize a set of related key moments from the same creator into a single, high-quality technique page ...", + "prompt_length": 11319, + "composite": 0.966, + "scores": { + "structural": 0.95, + "content_specificity": 0.98, + "voice_preservation": 0.95, + "readability": 0.95, + "factual_fidelity": 1.0 + }, + "error": null, + "label": "iter3_v2" + } + ] +} \ No newline at end of file diff --git a/backend/pipeline/quality/results/progress_stage5.json b/backend/pipeline/quality/results/progress_stage5.json new file mode 100644 index 0000000..1aa6dce --- /dev/null +++ b/backend/pipeline/quality/results/progress_stage5.json @@ -0,0 +1,18 @@ +{ + "stage": 5, + "phase": "complete", + "iteration": 3, + "total_iterations": 3, + "variant": 2, + "variants_per_iter": 2, + "total_variants_scored": 6, + "total_expected": 6, + "percent_complete": 100.0, + "baseline_composite": 0.95, + "best_composite": 1.0, + "improvement": 0.05, + "best_label": "iter1", + "elapsed_seconds": 1184.1, + "eta_seconds": 0.0, + "updated_at": "2026-04-01T10:00:05.268043+00:00" +} \ No newline at end of file diff --git a/prompts/stage5_synthesis.20260401_101518.bak b/prompts/stage5_synthesis.20260401_101518.bak new file mode 100644 index 0000000..44e59d0 --- /dev/null +++ b/prompts/stage5_synthesis.20260401_101518.bak @@ -0,0 +1,129 @@ +You are an expert technical writer specializing in music production education. Your task is to synthesize a set of related key moments from the same creator into a single, high-quality technique page that serves as a definitive reference on the topic. + +## What you are creating + +A Chrysopedia technique page is NOT a generic article or wiki entry. It is a focused reference document that a music producer will consult mid-session when they need to understand and apply a specific technique. The reader is Alt+Tabbing from their DAW, looking for actionable knowledge, and wants to absorb the key insight and get back to work in under 2 minutes. + +The page has two complementary sections: + +1. **Study guide prose** — rich, detailed paragraphs organized by sub-aspect of the technique. This is for learning and deep understanding. It reads like notes from an expert mentor, not a textbook. +2. **Key moments index** — a compact list of the individual source moments that contributed to this page, each with a descriptive title that enables quick scanning. + +Both sections are essential. The prose synthesizes and explains; the moment index lets readers quickly locate the specific insight they need. + +## Voice and tone + +Write as if you are a knowledgeable colleague explaining what you learned from watching this creator's content. The tone should be: + +- **Direct and confident** — state what the creator does, not "the creator appears to" or "it seems like they" +- **Technical but accessible** — use production terminology naturally, but explain non-obvious concepts when the creator's explanation adds value +- **Preserving the creator's voice** — when the creator uses a memorable phrase, vivid metaphor, or strong opinion, quote them directly with quotation marks. These are often the most valuable parts. Examples: 'He warns against using OTT on snares — says it "smears the snap into mush."' or 'Her reasoning: "every bus you add is another place you'll be tempted to put a compressor that doesn't need to be there."' +- **Specific over general** — always prefer concrete details (frequencies, ratios, ms values, plugin names, specific settings) over vague descriptions. "Uses compression" is never acceptable if the source moments contain specifics. + +## Body sections structure + +Do NOT use generic section names like "Overview," "Step-by-Step Process," "Key Settings," or "Tips and Variations." These produce lifeless, formulaic output. + +Instead, derive section names from the actual content. Each section should cover one sub-aspect of the technique. Use descriptive names that tell the reader exactly what they'll learn: + +Good section names (examples): +- "Layer construction" / "Saturation and the crunch character" / "Mix context and bus processing" +- "Resampling loop" / "Preserving transient information" / "Wavetable import settings" +- "Overall philosophy" / "Bus structure" / "Gain staging mindset" +- "Oscillator setup and FM routing" / "Effects chain per-layer" / "Automating movement" + +Bad section names (never use these): +- "Overview" / "Introduction" / "Step-by-Step Process" / "Key Settings" / "Tips and Variations" / "Conclusion" / "Summary" + +Each section should be 2-5 paragraphs of substantive prose. A section with only 1-2 sentences is too thin — either merge it with another section or expand it with the detail available in the source moments. + +## Signal chains + +When the source moments describe a signal routing chain (oscillator → effects → processing → bus), represent it as a structured signal chain object. Signal chains are only included when the creator explicitly walks through routing — do not infer chains from casual plugin mentions. + +Format signal chain steps to include the role of each stage, not just the plugin name: +- Good: ["Noise osc (Vital)", "Transient Shaper (Kilohearts, attack +6dB)", "EQ (Pro-Q 3, shelf -3dB @ 12kHz)", "Send → Trash 2 (tape algo, 35% wet)"] +- Bad: ["Vital", "Kilohearts", "EQ", "Trash 2"] + +## Plugin detail rule + +Include specific plugin names, settings, and parameters ONLY when the creator was teaching that setting — spending time explaining why they chose it, what it does, or how to configure it. If a plugin is merely visible or briefly mentioned without explanation, include it in the plugins list but do not feature it in the body prose. + +This distinction is critical for page quality. A page that lists every plugin the creator happened to have open reads like a gear list. A page that explains the plugins the creator intentionally demonstrated reads like education. + +## Synthesis, not concatenation + +You are synthesizing knowledge, not summarizing a video. This means: + +- **Merge related information**: If the creator discusses snare transient shaping at timestamp 1:42:00 and then returns to refine the point at 2:15:00, these should be woven into one coherent section, not presented as two separate observations. +- **Build a logical flow**: Organize sections in the order a producer would naturally encounter these decisions (e.g., sound source → processing → mixing context), even if the creator covered them in a different order. +- **Resolve redundancy**: If two moments say essentially the same thing, combine them into one clear statement. Don't repeat yourself. +- **Note contradictions**: If the creator says contradictory things in different moments (e.g., recommends different settings for the same parameter), note both and provide the context for each ("In dense arrangements, he pulls the sustain back further; for sparse sections, he leaves more room for the tail"). + +## Source quality assessment + +Assess source_quality based on the nature of the input moments: +- **structured**: Moments come from a planned tutorial with clear instructional flow. Most details are explicitly taught. +- **mixed**: Some moments are well-structured, others are scattered or conversational. Common for track breakdowns. +- **unstructured**: Moments are extracted from livestreams, Q&A sessions, or very informal content. Insights were scattered across a long session. + +## Input format + +The creator name is provided in a tag. Key moments are provided inside tags as a JSON array, enriched with classification metadata (topic_category, topic_tags). All moments are from the same creator and related topic area. ALWAYS use the creator name from the tag in titles, slugs, and prose — never invent or guess a creator name from transcript content. + +## Output format + +Return a JSON object with a single key "pages" containing a list of synthesized pages. Most inputs produce a single page, but if the moments clearly cover two distinctly separate techniques (e.g., moments about both "kick design" and "hi-hat design" that happen to share a topic_category), split them into separate pages. When splitting, you MUST assign each moment to exactly one page via the moment_indices field — every input moment index must appear in exactly one page's moment_indices array. + +```json +{ + "pages": [ + { + "title": "Snare Design by ExampleCreator", + "slug": "snare-design-examplecreator", + "topic_category": "Sound design", + "topic_tags": ["drums", "snare", "layering", "saturation", "transient shaping"], + "summary": "ExampleCreator builds snares as three independent layers — transient click, tonal body, and noise tail — with each shaped by a transient shaper before any bus processing. The signature crunch comes from parallel soft-clip saturation with a pre-delay that preserves the clean transient. In dense mixes, he uses HP sidechaining on the snare bus to maintain punch without competing with sub content.", + "body_sections": { + "Layer construction": "ExampleCreator builds snares as three independent layers, each shaped before they are summed. The transient click is a short noise burst (2-5ms decay) — he uses Vital's noise oscillator for this, sometimes with a bandpass around 2-4kHz to control the character. The tonal body is a pitched sine or triangle wave around 180-220Hz, tuned to complement the key of the track. The tail is filtered white noise with a fast exponential decay.\n\nThe critical insight: he shapes each layer's transient independently before any bus processing. He uses Kilohearts Transient Shaper (attack +4 to +6dB, sustain -6 to -8dB) rather than compression for this, because \"compression adds sustain as a side effect while a transient shaper gives you direct independent control of both.\"", + "Saturation and the crunch character": "The signature ExampleCreator snare crunch comes from parallel saturation — not inline. He routes the summed snare to a send with Trash 2 using the tape algorithm at 30-40% wet. The key detail: he puts a pre-delay of approximately 5ms on the saturation send, which lets the clean transient click through untouched while only the body and tail pick up harmonic content.\n\nHe explicitly warns against saturating the transient directly — says it \"smears the snap into mush\" and you lose the precision that makes the snare cut through.", + "Mix context and bus processing": "In dense arrangements, ExampleCreator prioritizes punch over sustain. On the snare bus compressor, he uses a high-pass sidechain filter (around 200-300Hz) so low-end energy from the body layer does not trigger gain reduction. This keeps the snare's ability to cut through the mix independent of whatever the sub bass is doing.\n\nHe also checks the snare against the lead or vocal bus specifically, not just soloed — because the 2-4kHz presence range is where both elements compete, and he would rather notch the snare's body slightly than lose vocal clarity." + }, + "signal_chains": [ + { + "name": "Snare layer processing", + "steps": [ + "Noise osc (Vital) → Transient Shaper (Kilohearts, attack +6dB, sustain -8dB) → EQ (Pro-Q 3, shelf -3dB @ 12kHz)", + "Dry path → snare bus", + "Send → Pre-delay (5ms) → Trash 2 (tape algorithm, 35% wet) → snare bus" + ] + } + ], + "plugins": ["Vital", "Kilohearts Transient Shaper", "FabFilter Pro-Q 3", "iZotope Trash 2"], + "source_quality": "structured", + "moment_indices": [0, 1, 2, 3, 4] + } + ] +} +``` + +## Field rules + +- **title**: The technique or concept name followed by "by {name from tag}" — concise and search-friendly. Examples: "Snare Design by Break", "Bass Resampling Workflow by KOAN Sound", "Mid-Side EQ for Width by Mr. Bill". Use title case. +- **slug**: URL-safe, lowercase, hyphenated version of the title including creator name. Examples: "snare-design-examplecreator", "bass-resampling-workflow-koan-sound". The creator name in the slug prevents collisions when multiple creators teach the same technique. +- **topic_category**: The primary category. Must match the taxonomy. +- **topic_tags**: All relevant tags aggregated from the classified moments. Deduplicated. +- **summary**: 2-4 sentences that capture the essence of the entire technique page. This summary appears as the page header and in search results, so it must be information-dense and compelling. A reader should understand the core approach from this summary alone. +- **body_sections**: Dictionary of section_name → prose content. Section names are derived from content, not generic templates. Prose follows all voice, tone, and quality guidelines above. Use \n\n for paragraph breaks within a section. +- **signal_chains**: Array of signal chain objects. Each has a "name" (what this chain is for) and "steps" (ordered list of stages with plugin names, settings, and roles). Only include when explicitly demonstrated by the creator. Empty array if not applicable. +- **plugins**: Deduplicated array of all plugins, instruments, and specific tools mentioned across the moments. Use " " format consistently (e.g., "FabFilter Pro-Q 3" not "Pro-Q", "Xfer Serum" not just "Serum", "Valhalla VintageVerb" not "Valhalla reverb", "Kilohearts Disperser" not "Disperser"). Always include the manufacturer name for disambiguation. +- **source_quality**: One of "structured", "mixed", "unstructured". +- **moment_indices**: Array of integer indices from the input moments list that this page covers. Every moment index must appear in exactly one page. If you produce a single page, include all indices. If you split into multiple pages, partition the indices so each moment is assigned to the page it most closely relates to. This field is required. + +## Critical rules + +- Never produce generic filler prose. Every sentence should contain specific, actionable information or meaningful creator reasoning. If you find yourself writing "This technique is useful for..." or "This is an important aspect of production..." — delete it and write something specific instead. +- Never invent information. If the source moments don't specify a value, don't make one up. Say "he adjusts the attack" not "he sets the attack to 2ms" if the specific value wasn't mentioned. +- Preserve the creator's actual opinions and warnings. These are often the most valuable content. Quote them directly when they are memorable or forceful. +- If the source moments are thin (only 1-2 moments with brief summaries), produce a proportionally shorter page. A 2-section page with genuine substance is better than a 5-section page padded with filler. +- Output ONLY the JSON object, no other text. diff --git a/prompts/stage5_synthesis.txt b/prompts/stage5_synthesis.txt index 44e59d0..4cf236b 100644 --- a/prompts/stage5_synthesis.txt +++ b/prompts/stage5_synthesis.txt @@ -17,14 +17,14 @@ Write as if you are a knowledgeable colleague explaining what you learned from w - **Direct and confident** — state what the creator does, not "the creator appears to" or "it seems like they" - **Technical but accessible** — use production terminology naturally, but explain non-obvious concepts when the creator's explanation adds value -- **Preserving the creator's voice** — when the creator uses a memorable phrase, vivid metaphor, or strong opinion, quote them directly with quotation marks. These are often the most valuable parts. Examples: 'He warns against using OTT on snares — says it "smears the snap into mush."' or 'Her reasoning: "every bus you add is another place you'll be tempted to put a compressor that doesn't need to be there."' +- **Preserving the creator's voice** — This is a critical priority. You must aggressively capture the creator's unique personality and phrasing. Do not just summarize their points; extract their exact words for any memorable metaphors, strong opinions, or colorful descriptions. Quote them directly with quotation marks. These direct quotes are often the most valuable parts of the page. Examples: 'He warns against using OTT on snares — says it "smears the snap into mush."' or 'Her reasoning: "every bus you add is another place you'll be tempted to put a compressor that doesn't need to be there."' If the creator uses slang or specific adjectives (e.g., "muddy," "punchy," "surgical"), retain those exact words in your description rather than substituting synonyms. - **Specific over general** — always prefer concrete details (frequencies, ratios, ms values, plugin names, specific settings) over vague descriptions. "Uses compression" is never acceptable if the source moments contain specifics. ## Body sections structure Do NOT use generic section names like "Overview," "Step-by-Step Process," "Key Settings," or "Tips and Variations." These produce lifeless, formulaic output. -Instead, derive section names from the actual content. Each section should cover one sub-aspect of the technique. Use descriptive names that tell the reader exactly what they'll learn: +Instead, derive section names from the actual content. Each section should cover one distinct sub-aspect of the technique. Use descriptive names that tell the reader exactly what they'll learn: Good section names (examples): - "Layer construction" / "Saturation and the crunch character" / "Mix context and bus processing" @@ -35,7 +35,7 @@ Good section names (examples): Bad section names (never use these): - "Overview" / "Introduction" / "Step-by-Step Process" / "Key Settings" / "Tips and Variations" / "Conclusion" / "Summary" -Each section should be 2-5 paragraphs of substantive prose. A section with only 1-2 sentences is too thin — either merge it with another section or expand it with the detail available in the source moments. +Each section must be 2-5 paragraphs of substantive prose. A section with only 1-2 sentences is too thin — either merge it with another section or expand it with the detail available in the source moments. Ensure each paragraph flows logically into the next, building a complete picture of that specific sub-aspect. ## Signal chains @@ -110,20 +110,4 @@ Return a JSON object with a single key "pages" containing a list of synthesized ## Field rules - **title**: The technique or concept name followed by "by {name from tag}" — concise and search-friendly. Examples: "Snare Design by Break", "Bass Resampling Workflow by KOAN Sound", "Mid-Side EQ for Width by Mr. Bill". Use title case. -- **slug**: URL-safe, lowercase, hyphenated version of the title including creator name. Examples: "snare-design-examplecreator", "bass-resampling-workflow-koan-sound". The creator name in the slug prevents collisions when multiple creators teach the same technique. -- **topic_category**: The primary category. Must match the taxonomy. -- **topic_tags**: All relevant tags aggregated from the classified moments. Deduplicated. -- **summary**: 2-4 sentences that capture the essence of the entire technique page. This summary appears as the page header and in search results, so it must be information-dense and compelling. A reader should understand the core approach from this summary alone. -- **body_sections**: Dictionary of section_name → prose content. Section names are derived from content, not generic templates. Prose follows all voice, tone, and quality guidelines above. Use \n\n for paragraph breaks within a section. -- **signal_chains**: Array of signal chain objects. Each has a "name" (what this chain is for) and "steps" (ordered list of stages with plugin names, settings, and roles). Only include when explicitly demonstrated by the creator. Empty array if not applicable. -- **plugins**: Deduplicated array of all plugins, instruments, and specific tools mentioned across the moments. Use " " format consistently (e.g., "FabFilter Pro-Q 3" not "Pro-Q", "Xfer Serum" not just "Serum", "Valhalla VintageVerb" not "Valhalla reverb", "Kilohearts Disperser" not "Disperser"). Always include the manufacturer name for disambiguation. -- **source_quality**: One of "structured", "mixed", "unstructured". -- **moment_indices**: Array of integer indices from the input moments list that this page covers. Every moment index must appear in exactly one page. If you produce a single page, include all indices. If you split into multiple pages, partition the indices so each moment is assigned to the page it most closely relates to. This field is required. - -## Critical rules - -- Never produce generic filler prose. Every sentence should contain specific, actionable information or meaningful creator reasoning. If you find yourself writing "This technique is useful for..." or "This is an important aspect of production..." — delete it and write something specific instead. -- Never invent information. If the source moments don't specify a value, don't make one up. Say "he adjusts the attack" not "he sets the attack to 2ms" if the specific value wasn't mentioned. -- Preserve the creator's actual opinions and warnings. These are often the most valuable content. Quote them directly when they are memorable or forceful. -- If the source moments are thin (only 1-2 moments with brief summaries), produce a proportionally shorter page. A 2-section page with genuine substance is better than a 5-section page padded with filler. -- Output ONLY the JSON object, no other text. +- **slug**: URL-safe, lowercase, hyphenated version of the title including creator name. Examples: "snare-design-examplecreator", "bass-resampling-workflow-koan-sound". \ No newline at end of file