feat: Added VoiceDial class with 3-band prompt modification and ScoreRu…

- "backend/pipeline/quality/voice_dial.py" - "backend/pipeline/quality/scorer.py" - "backend/pipeline/quality/__main__.py" GSD-Task: S02/T02
2026-04-01 08:57:07 +00:00 · 2026-04-01 08:57:07 +00:00 · 0086573af5
commit 0086573af5
parent 91cae921a4
3 changed files with 218 additions and 8 deletions
--- a/backend/pipeline/quality/main.py
+++ b/backend/pipeline/quality/main.py
@ -94,10 +94,28 @@ def _run_score(args: argparse.Namespace) -> int:
        print("No moments found in input file", file=sys.stderr)
        return 1

-    # -- Build page stub from moments for scoring --
-    # When --voice-level is set, T02 will re-synthesize. For now, build a
-    # minimal page representation from the moments so the scorer has
-    # something to evaluate.
+    settings = get_settings()
+    client = LLMClient(settings)
+    runner = ScoreRunner(client)
+
+    # -- Voice-level mode: re-synthesize then score --
+    if args.voice_level is not None:
+        voice_level = args.voice_level
+        if not (0.0 <= voice_level <= 1.0):
+            print("--voice-level must be between 0.0 and 1.0", file=sys.stderr)
+            return 1
+
+        print(f"\nRe-synthesizing + scoring for '{creator_name}' ({len(moments)} moments, voice_level={voice_level})...")
+        result = runner.synthesize_and_score(moments, creator_name, voice_level)
+
+        if result.error:
+            runner.print_report(result)
+            return 1
+
+        runner.print_report(result)
+        return 0
+
+    # -- Standard mode: build page stub from moments, score directly --
    page_json = {
        "title": f"{creator_name} — Technique Page",
        "creator_name": creator_name,
@ -111,10 +129,6 @@ def _run_score(args: argparse.Namespace) -> int:
        ],
    }

-    settings = get_settings()
-    client = LLMClient(settings)
-    runner = ScoreRunner(client)
-
    print(f"\nScoring page for '{creator_name}' ({len(moments)} moments)...")

    result = runner.score_page(page_json, moments)
--- a/backend/pipeline/quality/scorer.py
+++ b/backend/pipeline/quality/scorer.py
@ -13,6 +13,7 @@ from __future__ import annotations

 import json
 import logging
+import sys
 import time
 from dataclasses import dataclass, field

@ -20,6 +21,7 @@ import openai
 from pydantic import BaseModel

 from pipeline.llm_client import LLMClient
+from pipeline.quality.voice_dial import VoiceDial

 logger = logging.getLogger(__name__)

@ -213,6 +215,109 @@ class ScoreRunner:
            elapsed_seconds=elapsed,
        )

+    def synthesize_and_score(
+        self,
+        moments: list[dict],
+        creator_name: str,
+        voice_level: float,
+    ) -> ScoreResult:
+        """Re-synthesize from source moments with a voice-dialed prompt, then score.
+
+        Loads the stage 5 synthesis prompt from disk, applies the VoiceDial
+        modifier at the given voice_level, calls the LLM to produce a
+        SynthesisResult, then scores the first page.
+
+        Parameters
+        ----------
+        moments:
+            Source key moments (dicts with summary, transcript_excerpt, etc.)
+        creator_name:
+            Creator name to inject into the synthesis prompt.
+        voice_level:
+            Float 0.0–1.0 controlling voice preservation intensity.
+
+        Returns
+        -------
+        ScoreResult with per-dimension scores after voice-dialed re-synthesis.
+        """
+        from pipeline.schemas import SynthesisResult
+        from pipeline.stages import _get_stage_config, _load_prompt
+
+        # Load and modify the stage 5 system prompt
+        try:
+            base_prompt = _load_prompt("stage5_synthesis.txt")
+        except FileNotFoundError as exc:
+            return ScoreResult(error=f"Prompt file not found: {exc}")
+
+        dial = VoiceDial(base_prompt)
+        modified_prompt = dial.modify(voice_level)
+        band = dial.band_name(voice_level)
+
+        # Build user prompt in the same format as _synthesize_chunk
+        moments_json = json.dumps(moments, indent=2)
+        user_prompt = f"<creator>{creator_name}</creator>\n<moments>\n{moments_json}\n</moments>"
+
+        model_override, modality = _get_stage_config(5)
+
+        print(f"  Re-synthesizing at voice_level={voice_level} (band={band})...")
+
+        t0 = time.monotonic()
+        try:
+            raw = self.client.complete(
+                system_prompt=modified_prompt,
+                user_prompt=user_prompt,
+                response_model=SynthesisResult,
+                modality=modality,
+                model_override=model_override,
+            )
+            elapsed_synth = round(time.monotonic() - t0, 2)
+        except (openai.APIConnectionError, openai.APITimeoutError) as exc:
+            elapsed_synth = round(time.monotonic() - t0, 2)
+            url = self.client.settings.llm_api_url
+            fallback = self.client.settings.llm_fallback_url
+            return ScoreResult(
+                elapsed_seconds=elapsed_synth,
+                error=(
+                    f"Cannot reach LLM endpoint at {url} (fallback {fallback}). "
+                    f"Error: {exc}"
+                ),
+            )
+
+        # Parse synthesis response
+        raw_text = str(raw).strip()
+        try:
+            synthesis = self.client.parse_response(raw_text, SynthesisResult)
+        except (json.JSONDecodeError, ValueError, Exception) as exc:
+            logger.error("Malformed synthesis response: %.300s", raw_text)
+            return ScoreResult(
+                elapsed_seconds=elapsed_synth,
+                error=f"Malformed synthesis response: {exc}. Raw excerpt: {raw_text[:200]}",
+            )
+
+        if not synthesis.pages:
+            return ScoreResult(
+                elapsed_seconds=elapsed_synth,
+                error="Synthesis returned no pages.",
+            )
+
+        # Score the first page
+        page = synthesis.pages[0]
+        page_json = {
+            "title": page.title,
+            "creator_name": creator_name,
+            "summary": page.summary,
+            "body_sections": [
+                {"heading": heading, "content": content}
+                for heading, content in page.body_sections.items()
+            ],
+        }
+
+        print(f"  Synthesis complete ({elapsed_synth}s). Scoring...")
+        result = self.score_page(page_json, moments)
+        # Include synthesis time in total
+        result.elapsed_seconds = round(result.elapsed_seconds + elapsed_synth, 2)
+        return result
+
    def print_report(self, result: ScoreResult) -> None:
        """Print a formatted scoring report to stdout."""
        print("\n" + "=" * 60)
--- a/backend/pipeline/quality/voice_dial.py
+++ b/backend/pipeline/quality/voice_dial.py
@ -0,0 +1,91 @@
+"""Voice preservation dial — modifies Stage 5 synthesis prompt by intensity band.
+
+Three bands control how much of the creator's original voice is preserved:
+  - Low  (0.0–0.33): Clinical, encyclopedic tone — suppress direct quotes
+  - Mid  (0.34–0.66): Base prompt unchanged (already ~0.6 voice preservation)
+  - High (0.67–1.0): Maximum voice — prioritize exact words, strong opinions
+"""
+from __future__ import annotations
+
+
+# ── Band modifier text ────────────────────────────────────────────────────────
+
+_LOW_BAND_MODIFIER = """
+
+## Voice Suppression Override
+
+IMPORTANT — override the voice/tone guidelines above. For this synthesis:
+
+- Do NOT include any direct quotes from the creator. Rephrase all insights in neutral third-person encyclopedic style.
+- Do NOT attribute opinions or preferences to the creator by name (avoid "he recommends", "she prefers").
+- Remove all personality markers, humor, strong opinions, and conversational tone.
+- Write as a reference manual: factual, impersonal, technically precise.
+- Replace phrases like "he warns against" with neutral statements like "this approach is generally avoided because."
+- Suppress colloquialisms and informal language entirely.
+"""
+
+_HIGH_BAND_MODIFIER = """
+
+## Maximum Voice Preservation Override
+
+IMPORTANT — amplify the voice/tone guidelines above. For this synthesis:
+
+- Maximize the use of direct quotes from the transcript. Every memorable phrase, vivid metaphor, or strong opinion should be quoted verbatim with quotation marks.
+- Attribute all insights, preferences, and techniques to the creator by name — use their name frequently.
+- Preserve personality, humor, strong opinions, and conversational tone. If the creator is emphatic, the prose should feel emphatic.
+- Prioritize the creator's exact words over paraphrase. When a transcript excerpt contains a usable phrase, quote it rather than summarizing it.
+- Include warnings, caveats, and opinionated asides in the creator's own voice.
+- The resulting page should feel like the creator is speaking directly to the reader through the text.
+"""
+
+
+# ── VoiceDial class ───────────────────────────────────────────────────────────
+
+class VoiceDial:
+    """Modifies a Stage 5 synthesis prompt based on a voice_level parameter.
+
+    Parameters
+    ----------
+    base_prompt:
+        The original stage5_synthesis.txt system prompt content.
+    """
+
+    # Band boundaries
+    LOW_UPPER = 0.33
+    HIGH_LOWER = 0.67
+
+    def __init__(self, base_prompt: str) -> None:
+        self.base_prompt = base_prompt
+
+    def modify(self, voice_level: float) -> str:
+        """Return the system prompt modified for the given voice_level.
+
+        Parameters
+        ----------
+        voice_level:
+            Float 0.0–1.0. Values outside this range are clamped.
+
+        Returns
+        -------
+        str
+            Modified system prompt with band-appropriate instructions appended.
+        """
+        voice_level = max(0.0, min(1.0, voice_level))
+
+        if voice_level <= self.LOW_UPPER:
+            return self.base_prompt + _LOW_BAND_MODIFIER
+        elif voice_level >= self.HIGH_LOWER:
+            return self.base_prompt + _HIGH_BAND_MODIFIER
+        else:
+            # Mid band — base prompt is already moderate voice preservation
+            return self.base_prompt
+
+    @staticmethod
+    def band_name(voice_level: float) -> str:
+        """Return the human-readable band name for a voice_level value."""
+        voice_level = max(0.0, min(1.0, voice_level))
+        if voice_level <= VoiceDial.LOW_UPPER:
+            return "low"
+        elif voice_level >= VoiceDial.HIGH_LOWER:
+            return "high"
+        return "mid"