feat: Added VoiceDial class with 3-band prompt modification and ScoreRu…

- "backend/pipeline/quality/voice_dial.py"
- "backend/pipeline/quality/scorer.py"
- "backend/pipeline/quality/__main__.py"

GSD-Task: S02/T02
This commit is contained in:
jlightner 2026-04-01 08:57:07 +00:00
parent 91cae921a4
commit 0086573af5
3 changed files with 218 additions and 8 deletions

View file

@ -94,10 +94,28 @@ def _run_score(args: argparse.Namespace) -> int:
print("No moments found in input file", file=sys.stderr) print("No moments found in input file", file=sys.stderr)
return 1 return 1
# -- Build page stub from moments for scoring -- settings = get_settings()
# When --voice-level is set, T02 will re-synthesize. For now, build a client = LLMClient(settings)
# minimal page representation from the moments so the scorer has runner = ScoreRunner(client)
# something to evaluate.
# -- Voice-level mode: re-synthesize then score --
if args.voice_level is not None:
voice_level = args.voice_level
if not (0.0 <= voice_level <= 1.0):
print("--voice-level must be between 0.0 and 1.0", file=sys.stderr)
return 1
print(f"\nRe-synthesizing + scoring for '{creator_name}' ({len(moments)} moments, voice_level={voice_level})...")
result = runner.synthesize_and_score(moments, creator_name, voice_level)
if result.error:
runner.print_report(result)
return 1
runner.print_report(result)
return 0
# -- Standard mode: build page stub from moments, score directly --
page_json = { page_json = {
"title": f"{creator_name} — Technique Page", "title": f"{creator_name} — Technique Page",
"creator_name": creator_name, "creator_name": creator_name,
@ -111,10 +129,6 @@ def _run_score(args: argparse.Namespace) -> int:
], ],
} }
settings = get_settings()
client = LLMClient(settings)
runner = ScoreRunner(client)
print(f"\nScoring page for '{creator_name}' ({len(moments)} moments)...") print(f"\nScoring page for '{creator_name}' ({len(moments)} moments)...")
result = runner.score_page(page_json, moments) result = runner.score_page(page_json, moments)

View file

@ -13,6 +13,7 @@ from __future__ import annotations
import json import json
import logging import logging
import sys
import time import time
from dataclasses import dataclass, field from dataclasses import dataclass, field
@ -20,6 +21,7 @@ import openai
from pydantic import BaseModel from pydantic import BaseModel
from pipeline.llm_client import LLMClient from pipeline.llm_client import LLMClient
from pipeline.quality.voice_dial import VoiceDial
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -213,6 +215,109 @@ class ScoreRunner:
elapsed_seconds=elapsed, elapsed_seconds=elapsed,
) )
def synthesize_and_score(
self,
moments: list[dict],
creator_name: str,
voice_level: float,
) -> ScoreResult:
"""Re-synthesize from source moments with a voice-dialed prompt, then score.
Loads the stage 5 synthesis prompt from disk, applies the VoiceDial
modifier at the given voice_level, calls the LLM to produce a
SynthesisResult, then scores the first page.
Parameters
----------
moments:
Source key moments (dicts with summary, transcript_excerpt, etc.)
creator_name:
Creator name to inject into the synthesis prompt.
voice_level:
Float 0.01.0 controlling voice preservation intensity.
Returns
-------
ScoreResult with per-dimension scores after voice-dialed re-synthesis.
"""
from pipeline.schemas import SynthesisResult
from pipeline.stages import _get_stage_config, _load_prompt
# Load and modify the stage 5 system prompt
try:
base_prompt = _load_prompt("stage5_synthesis.txt")
except FileNotFoundError as exc:
return ScoreResult(error=f"Prompt file not found: {exc}")
dial = VoiceDial(base_prompt)
modified_prompt = dial.modify(voice_level)
band = dial.band_name(voice_level)
# Build user prompt in the same format as _synthesize_chunk
moments_json = json.dumps(moments, indent=2)
user_prompt = f"<creator>{creator_name}</creator>\n<moments>\n{moments_json}\n</moments>"
model_override, modality = _get_stage_config(5)
print(f" Re-synthesizing at voice_level={voice_level} (band={band})...")
t0 = time.monotonic()
try:
raw = self.client.complete(
system_prompt=modified_prompt,
user_prompt=user_prompt,
response_model=SynthesisResult,
modality=modality,
model_override=model_override,
)
elapsed_synth = round(time.monotonic() - t0, 2)
except (openai.APIConnectionError, openai.APITimeoutError) as exc:
elapsed_synth = round(time.monotonic() - t0, 2)
url = self.client.settings.llm_api_url
fallback = self.client.settings.llm_fallback_url
return ScoreResult(
elapsed_seconds=elapsed_synth,
error=(
f"Cannot reach LLM endpoint at {url} (fallback {fallback}). "
f"Error: {exc}"
),
)
# Parse synthesis response
raw_text = str(raw).strip()
try:
synthesis = self.client.parse_response(raw_text, SynthesisResult)
except (json.JSONDecodeError, ValueError, Exception) as exc:
logger.error("Malformed synthesis response: %.300s", raw_text)
return ScoreResult(
elapsed_seconds=elapsed_synth,
error=f"Malformed synthesis response: {exc}. Raw excerpt: {raw_text[:200]}",
)
if not synthesis.pages:
return ScoreResult(
elapsed_seconds=elapsed_synth,
error="Synthesis returned no pages.",
)
# Score the first page
page = synthesis.pages[0]
page_json = {
"title": page.title,
"creator_name": creator_name,
"summary": page.summary,
"body_sections": [
{"heading": heading, "content": content}
for heading, content in page.body_sections.items()
],
}
print(f" Synthesis complete ({elapsed_synth}s). Scoring...")
result = self.score_page(page_json, moments)
# Include synthesis time in total
result.elapsed_seconds = round(result.elapsed_seconds + elapsed_synth, 2)
return result
def print_report(self, result: ScoreResult) -> None: def print_report(self, result: ScoreResult) -> None:
"""Print a formatted scoring report to stdout.""" """Print a formatted scoring report to stdout."""
print("\n" + "=" * 60) print("\n" + "=" * 60)

View file

@ -0,0 +1,91 @@
"""Voice preservation dial — modifies Stage 5 synthesis prompt by intensity band.
Three bands control how much of the creator's original voice is preserved:
- Low (0.00.33): Clinical, encyclopedic tone suppress direct quotes
- Mid (0.340.66): Base prompt unchanged (already ~0.6 voice preservation)
- High (0.671.0): Maximum voice prioritize exact words, strong opinions
"""
from __future__ import annotations
# ── Band modifier text ────────────────────────────────────────────────────────
_LOW_BAND_MODIFIER = """
## Voice Suppression Override
IMPORTANT override the voice/tone guidelines above. For this synthesis:
- Do NOT include any direct quotes from the creator. Rephrase all insights in neutral third-person encyclopedic style.
- Do NOT attribute opinions or preferences to the creator by name (avoid "he recommends", "she prefers").
- Remove all personality markers, humor, strong opinions, and conversational tone.
- Write as a reference manual: factual, impersonal, technically precise.
- Replace phrases like "he warns against" with neutral statements like "this approach is generally avoided because."
- Suppress colloquialisms and informal language entirely.
"""
_HIGH_BAND_MODIFIER = """
## Maximum Voice Preservation Override
IMPORTANT amplify the voice/tone guidelines above. For this synthesis:
- Maximize the use of direct quotes from the transcript. Every memorable phrase, vivid metaphor, or strong opinion should be quoted verbatim with quotation marks.
- Attribute all insights, preferences, and techniques to the creator by name use their name frequently.
- Preserve personality, humor, strong opinions, and conversational tone. If the creator is emphatic, the prose should feel emphatic.
- Prioritize the creator's exact words over paraphrase. When a transcript excerpt contains a usable phrase, quote it rather than summarizing it.
- Include warnings, caveats, and opinionated asides in the creator's own voice.
- The resulting page should feel like the creator is speaking directly to the reader through the text.
"""
# ── VoiceDial class ───────────────────────────────────────────────────────────
class VoiceDial:
"""Modifies a Stage 5 synthesis prompt based on a voice_level parameter.
Parameters
----------
base_prompt:
The original stage5_synthesis.txt system prompt content.
"""
# Band boundaries
LOW_UPPER = 0.33
HIGH_LOWER = 0.67
def __init__(self, base_prompt: str) -> None:
self.base_prompt = base_prompt
def modify(self, voice_level: float) -> str:
"""Return the system prompt modified for the given voice_level.
Parameters
----------
voice_level:
Float 0.01.0. Values outside this range are clamped.
Returns
-------
str
Modified system prompt with band-appropriate instructions appended.
"""
voice_level = max(0.0, min(1.0, voice_level))
if voice_level <= self.LOW_UPPER:
return self.base_prompt + _LOW_BAND_MODIFIER
elif voice_level >= self.HIGH_LOWER:
return self.base_prompt + _HIGH_BAND_MODIFIER
else:
# Mid band — base prompt is already moderate voice preservation
return self.base_prompt
@staticmethod
def band_name(voice_level: float) -> str:
"""Return the human-readable band name for a voice_level value."""
voice_level = max(0.0, min(1.0, voice_level))
if voice_level <= VoiceDial.LOW_UPPER:
return "low"
elif voice_level >= VoiceDial.HIGH_LOWER:
return "high"
return "mid"