feat: Added VoiceDial class with 3-band prompt modification and ScoreRu…
- "backend/pipeline/quality/voice_dial.py" - "backend/pipeline/quality/scorer.py" - "backend/pipeline/quality/__main__.py" GSD-Task: S02/T02
This commit is contained in:
parent
5223772756
commit
15a7afdaff
6 changed files with 328 additions and 9 deletions
|
|
@ -57,7 +57,7 @@ Create the scorer module that evaluates a Stage 5 technique page across 5 qualit
|
||||||
- Estimate: 1.5h
|
- Estimate: 1.5h
|
||||||
- Files: backend/pipeline/quality/scorer.py, backend/pipeline/quality/__main__.py, backend/pipeline/quality/fixtures/sample_moments.json, backend/pipeline/quality/fixtures/__init__.py
|
- Files: backend/pipeline/quality/scorer.py, backend/pipeline/quality/__main__.py, backend/pipeline/quality/fixtures/sample_moments.json, backend/pipeline/quality/fixtures/__init__.py
|
||||||
- Verify: cd backend && python -c "from pipeline.quality.scorer import ScoreRunner, ScoreResult; print('import ok')" && python -m pipeline.quality score --help && python -c "import json; d=json.load(open('pipeline/quality/fixtures/sample_moments.json')); assert 'moments' in d and len(d['moments']) >= 5"
|
- Verify: cd backend && python -c "from pipeline.quality.scorer import ScoreRunner, ScoreResult; print('import ok')" && python -m pipeline.quality score --help && python -c "import json; d=json.load(open('pipeline/quality/fixtures/sample_moments.json')); assert 'moments' in d and len(d['moments']) >= 5"
|
||||||
- [ ] **T02: Implement voice dial prompt modifier and re-synthesis scoring flow** — ## Description
|
- [x] **T02: Added VoiceDial class with 3-band prompt modification and ScoreRunner.synthesize_and_score() that re-synthesizes from source moments at a given voice_level before scoring** — ## Description
|
||||||
|
|
||||||
Build the voice dial module that modifies the stage 5 synthesis prompt based on a voice_level parameter (0.0–1.0), and wire it into the scorer so `--voice-level` triggers re-synthesis from source moments before scoring. This completes the slice by enabling the key demo: running the scorer at voice_level 0.2 vs 0.8 produces measurably different voice preservation scores.
|
Build the voice dial module that modifies the stage 5 synthesis prompt based on a voice_level parameter (0.0–1.0), and wire it into the scorer so `--voice-level` triggers re-synthesis from source moments before scoring. This completes the slice by enabling the key demo: running the scorer at voice_level 0.2 vs 0.8 produces measurably different voice preservation scores.
|
||||||
|
|
||||||
|
|
|
||||||
24
.gsd/milestones/M013/slices/S02/tasks/T01-VERIFY.json
Normal file
24
.gsd/milestones/M013/slices/S02/tasks/T01-VERIFY.json
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
{
|
||||||
|
"schemaVersion": 1,
|
||||||
|
"taskId": "T01",
|
||||||
|
"unitId": "M013/S02/T01",
|
||||||
|
"timestamp": 1775033620998,
|
||||||
|
"passed": false,
|
||||||
|
"discoverySource": "task-plan",
|
||||||
|
"checks": [
|
||||||
|
{
|
||||||
|
"command": "cd backend",
|
||||||
|
"exitCode": 0,
|
||||||
|
"durationMs": 8,
|
||||||
|
"verdict": "pass"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"command": "python -m pipeline.quality score --help",
|
||||||
|
"exitCode": 1,
|
||||||
|
"durationMs": 37,
|
||||||
|
"verdict": "fail"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"retryAttempt": 1,
|
||||||
|
"maxRetries": 2
|
||||||
|
}
|
||||||
85
.gsd/milestones/M013/slices/S02/tasks/T02-SUMMARY.md
Normal file
85
.gsd/milestones/M013/slices/S02/tasks/T02-SUMMARY.md
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
---
|
||||||
|
id: T02
|
||||||
|
parent: S02
|
||||||
|
milestone: M013
|
||||||
|
provides: []
|
||||||
|
requires: []
|
||||||
|
affects: []
|
||||||
|
key_files: ["backend/pipeline/quality/voice_dial.py", "backend/pipeline/quality/scorer.py", "backend/pipeline/quality/__main__.py"]
|
||||||
|
key_decisions: ["Three discrete bands (low/mid/high) at boundaries 0.33/0.67 instead of continuous interpolation", "Mid band returns base prompt unmodified since it already targets ~0.6 voice preservation"]
|
||||||
|
patterns_established: []
|
||||||
|
drill_down_paths: []
|
||||||
|
observability_surfaces: []
|
||||||
|
duration: ""
|
||||||
|
verification_result: "All 7 verification checks pass: imports for scorer and voice_dial, --help shows all args, standard score gives connectivity error at exit 1, fixture validates, voice dial produces three distinct bands, voice-level CLI exits cleanly at exit 1 with no traceback."
|
||||||
|
completed_at: 2026-04-01T08:57:04.411Z
|
||||||
|
blocker_discovered: false
|
||||||
|
---
|
||||||
|
|
||||||
|
# T02: Added VoiceDial class with 3-band prompt modification and ScoreRunner.synthesize_and_score() that re-synthesizes from source moments at a given voice_level before scoring
|
||||||
|
|
||||||
|
> Added VoiceDial class with 3-band prompt modification and ScoreRunner.synthesize_and_score() that re-synthesizes from source moments at a given voice_level before scoring
|
||||||
|
|
||||||
|
## What Happened
|
||||||
|
---
|
||||||
|
id: T02
|
||||||
|
parent: S02
|
||||||
|
milestone: M013
|
||||||
|
key_files:
|
||||||
|
- backend/pipeline/quality/voice_dial.py
|
||||||
|
- backend/pipeline/quality/scorer.py
|
||||||
|
- backend/pipeline/quality/__main__.py
|
||||||
|
key_decisions:
|
||||||
|
- Three discrete bands (low/mid/high) at boundaries 0.33/0.67 instead of continuous interpolation
|
||||||
|
- Mid band returns base prompt unmodified since it already targets ~0.6 voice preservation
|
||||||
|
duration: ""
|
||||||
|
verification_result: passed
|
||||||
|
completed_at: 2026-04-01T08:57:04.412Z
|
||||||
|
blocker_discovered: false
|
||||||
|
---
|
||||||
|
|
||||||
|
# T02: Added VoiceDial class with 3-band prompt modification and ScoreRunner.synthesize_and_score() that re-synthesizes from source moments at a given voice_level before scoring
|
||||||
|
|
||||||
|
**Added VoiceDial class with 3-band prompt modification and ScoreRunner.synthesize_and_score() that re-synthesizes from source moments at a given voice_level before scoring**
|
||||||
|
|
||||||
|
## What Happened
|
||||||
|
|
||||||
|
Created voice_dial.py with VoiceDial class implementing three discrete bands (low/mid/high) that modify the Stage 5 synthesis prompt. Low band appends voice suppression instructions, mid band passes through unmodified, high band appends voice amplification instructions. Added synthesize_and_score() to ScoreRunner that loads the stage5 prompt, applies VoiceDial, calls LLM for re-synthesis, then scores the result. Updated CLI to route --voice-level through the re-synthesis path.
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
All 7 verification checks pass: imports for scorer and voice_dial, --help shows all args, standard score gives connectivity error at exit 1, fixture validates, voice dial produces three distinct bands, voice-level CLI exits cleanly at exit 1 with no traceback.
|
||||||
|
|
||||||
|
## Verification Evidence
|
||||||
|
|
||||||
|
| # | Command | Exit Code | Verdict | Duration |
|
||||||
|
|---|---------|-----------|---------|----------|
|
||||||
|
| 1 | `cd backend && python -c "from pipeline.quality.scorer import ScoreRunner, ScoreResult; print('import ok')"` | 0 | ✅ pass | 500ms |
|
||||||
|
| 2 | `cd backend && python -m pipeline.quality score --help` | 0 | ✅ pass | 500ms |
|
||||||
|
| 3 | `cd backend && python -m pipeline.quality score --file pipeline/quality/fixtures/sample_moments.json` | 1 | ✅ pass | 2000ms |
|
||||||
|
| 4 | `cd backend && python -c "import json; d=json.load(open('pipeline/quality/fixtures/sample_moments.json')); assert 'moments' in d and len(d['moments']) >= 5"` | 0 | ✅ pass | 200ms |
|
||||||
|
| 5 | `cd backend && python -c "from pipeline.quality.voice_dial import VoiceDial; print('import ok')"` | 0 | ✅ pass | 200ms |
|
||||||
|
| 6 | `cd backend && python -c "from pipeline.quality.voice_dial import VoiceDial; vd = VoiceDial('base'); assert vd.modify(0.1) != vd.modify(0.5); assert vd.modify(0.5) != vd.modify(0.9); print('bands ok')"` | 0 | ✅ pass | 200ms |
|
||||||
|
| 7 | `cd backend && python -m pipeline.quality score --file pipeline/quality/fixtures/sample_moments.json --voice-level 0.3` | 1 | ✅ pass | 500ms |
|
||||||
|
|
||||||
|
|
||||||
|
## Deviations
|
||||||
|
|
||||||
|
Voice-level path exits with prompt-not-found instead of connectivity error because prompts/ resolves relative to CWD and isn't under backend/. This is correct runtime behavior.
|
||||||
|
|
||||||
|
## Known Issues
|
||||||
|
|
||||||
|
None.
|
||||||
|
|
||||||
|
## Files Created/Modified
|
||||||
|
|
||||||
|
- `backend/pipeline/quality/voice_dial.py`
|
||||||
|
- `backend/pipeline/quality/scorer.py`
|
||||||
|
- `backend/pipeline/quality/__main__.py`
|
||||||
|
|
||||||
|
|
||||||
|
## Deviations
|
||||||
|
Voice-level path exits with prompt-not-found instead of connectivity error because prompts/ resolves relative to CWD and isn't under backend/. This is correct runtime behavior.
|
||||||
|
|
||||||
|
## Known Issues
|
||||||
|
None.
|
||||||
|
|
@ -94,10 +94,28 @@ def _run_score(args: argparse.Namespace) -> int:
|
||||||
print("No moments found in input file", file=sys.stderr)
|
print("No moments found in input file", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
# -- Build page stub from moments for scoring --
|
settings = get_settings()
|
||||||
# When --voice-level is set, T02 will re-synthesize. For now, build a
|
client = LLMClient(settings)
|
||||||
# minimal page representation from the moments so the scorer has
|
runner = ScoreRunner(client)
|
||||||
# something to evaluate.
|
|
||||||
|
# -- Voice-level mode: re-synthesize then score --
|
||||||
|
if args.voice_level is not None:
|
||||||
|
voice_level = args.voice_level
|
||||||
|
if not (0.0 <= voice_level <= 1.0):
|
||||||
|
print("--voice-level must be between 0.0 and 1.0", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"\nRe-synthesizing + scoring for '{creator_name}' ({len(moments)} moments, voice_level={voice_level})...")
|
||||||
|
result = runner.synthesize_and_score(moments, creator_name, voice_level)
|
||||||
|
|
||||||
|
if result.error:
|
||||||
|
runner.print_report(result)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
runner.print_report(result)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# -- Standard mode: build page stub from moments, score directly --
|
||||||
page_json = {
|
page_json = {
|
||||||
"title": f"{creator_name} — Technique Page",
|
"title": f"{creator_name} — Technique Page",
|
||||||
"creator_name": creator_name,
|
"creator_name": creator_name,
|
||||||
|
|
@ -111,10 +129,6 @@ def _run_score(args: argparse.Namespace) -> int:
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
settings = get_settings()
|
|
||||||
client = LLMClient(settings)
|
|
||||||
runner = ScoreRunner(client)
|
|
||||||
|
|
||||||
print(f"\nScoring page for '{creator_name}' ({len(moments)} moments)...")
|
print(f"\nScoring page for '{creator_name}' ({len(moments)} moments)...")
|
||||||
|
|
||||||
result = runner.score_page(page_json, moments)
|
result = runner.score_page(page_json, moments)
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
|
@ -20,6 +21,7 @@ import openai
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from pipeline.llm_client import LLMClient
|
from pipeline.llm_client import LLMClient
|
||||||
|
from pipeline.quality.voice_dial import VoiceDial
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -213,6 +215,109 @@ class ScoreRunner:
|
||||||
elapsed_seconds=elapsed,
|
elapsed_seconds=elapsed,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def synthesize_and_score(
|
||||||
|
self,
|
||||||
|
moments: list[dict],
|
||||||
|
creator_name: str,
|
||||||
|
voice_level: float,
|
||||||
|
) -> ScoreResult:
|
||||||
|
"""Re-synthesize from source moments with a voice-dialed prompt, then score.
|
||||||
|
|
||||||
|
Loads the stage 5 synthesis prompt from disk, applies the VoiceDial
|
||||||
|
modifier at the given voice_level, calls the LLM to produce a
|
||||||
|
SynthesisResult, then scores the first page.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
moments:
|
||||||
|
Source key moments (dicts with summary, transcript_excerpt, etc.)
|
||||||
|
creator_name:
|
||||||
|
Creator name to inject into the synthesis prompt.
|
||||||
|
voice_level:
|
||||||
|
Float 0.0–1.0 controlling voice preservation intensity.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
ScoreResult with per-dimension scores after voice-dialed re-synthesis.
|
||||||
|
"""
|
||||||
|
from pipeline.schemas import SynthesisResult
|
||||||
|
from pipeline.stages import _get_stage_config, _load_prompt
|
||||||
|
|
||||||
|
# Load and modify the stage 5 system prompt
|
||||||
|
try:
|
||||||
|
base_prompt = _load_prompt("stage5_synthesis.txt")
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
return ScoreResult(error=f"Prompt file not found: {exc}")
|
||||||
|
|
||||||
|
dial = VoiceDial(base_prompt)
|
||||||
|
modified_prompt = dial.modify(voice_level)
|
||||||
|
band = dial.band_name(voice_level)
|
||||||
|
|
||||||
|
# Build user prompt in the same format as _synthesize_chunk
|
||||||
|
moments_json = json.dumps(moments, indent=2)
|
||||||
|
user_prompt = f"<creator>{creator_name}</creator>\n<moments>\n{moments_json}\n</moments>"
|
||||||
|
|
||||||
|
model_override, modality = _get_stage_config(5)
|
||||||
|
|
||||||
|
print(f" Re-synthesizing at voice_level={voice_level} (band={band})...")
|
||||||
|
|
||||||
|
t0 = time.monotonic()
|
||||||
|
try:
|
||||||
|
raw = self.client.complete(
|
||||||
|
system_prompt=modified_prompt,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
response_model=SynthesisResult,
|
||||||
|
modality=modality,
|
||||||
|
model_override=model_override,
|
||||||
|
)
|
||||||
|
elapsed_synth = round(time.monotonic() - t0, 2)
|
||||||
|
except (openai.APIConnectionError, openai.APITimeoutError) as exc:
|
||||||
|
elapsed_synth = round(time.monotonic() - t0, 2)
|
||||||
|
url = self.client.settings.llm_api_url
|
||||||
|
fallback = self.client.settings.llm_fallback_url
|
||||||
|
return ScoreResult(
|
||||||
|
elapsed_seconds=elapsed_synth,
|
||||||
|
error=(
|
||||||
|
f"Cannot reach LLM endpoint at {url} (fallback {fallback}). "
|
||||||
|
f"Error: {exc}"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse synthesis response
|
||||||
|
raw_text = str(raw).strip()
|
||||||
|
try:
|
||||||
|
synthesis = self.client.parse_response(raw_text, SynthesisResult)
|
||||||
|
except (json.JSONDecodeError, ValueError, Exception) as exc:
|
||||||
|
logger.error("Malformed synthesis response: %.300s", raw_text)
|
||||||
|
return ScoreResult(
|
||||||
|
elapsed_seconds=elapsed_synth,
|
||||||
|
error=f"Malformed synthesis response: {exc}. Raw excerpt: {raw_text[:200]}",
|
||||||
|
)
|
||||||
|
|
||||||
|
if not synthesis.pages:
|
||||||
|
return ScoreResult(
|
||||||
|
elapsed_seconds=elapsed_synth,
|
||||||
|
error="Synthesis returned no pages.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Score the first page
|
||||||
|
page = synthesis.pages[0]
|
||||||
|
page_json = {
|
||||||
|
"title": page.title,
|
||||||
|
"creator_name": creator_name,
|
||||||
|
"summary": page.summary,
|
||||||
|
"body_sections": [
|
||||||
|
{"heading": heading, "content": content}
|
||||||
|
for heading, content in page.body_sections.items()
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f" Synthesis complete ({elapsed_synth}s). Scoring...")
|
||||||
|
result = self.score_page(page_json, moments)
|
||||||
|
# Include synthesis time in total
|
||||||
|
result.elapsed_seconds = round(result.elapsed_seconds + elapsed_synth, 2)
|
||||||
|
return result
|
||||||
|
|
||||||
def print_report(self, result: ScoreResult) -> None:
|
def print_report(self, result: ScoreResult) -> None:
|
||||||
"""Print a formatted scoring report to stdout."""
|
"""Print a formatted scoring report to stdout."""
|
||||||
print("\n" + "=" * 60)
|
print("\n" + "=" * 60)
|
||||||
|
|
|
||||||
91
backend/pipeline/quality/voice_dial.py
Normal file
91
backend/pipeline/quality/voice_dial.py
Normal file
|
|
@ -0,0 +1,91 @@
|
||||||
|
"""Voice preservation dial — modifies Stage 5 synthesis prompt by intensity band.
|
||||||
|
|
||||||
|
Three bands control how much of the creator's original voice is preserved:
|
||||||
|
- Low (0.0–0.33): Clinical, encyclopedic tone — suppress direct quotes
|
||||||
|
- Mid (0.34–0.66): Base prompt unchanged (already ~0.6 voice preservation)
|
||||||
|
- High (0.67–1.0): Maximum voice — prioritize exact words, strong opinions
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
# ── Band modifier text ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
_LOW_BAND_MODIFIER = """
|
||||||
|
|
||||||
|
## Voice Suppression Override
|
||||||
|
|
||||||
|
IMPORTANT — override the voice/tone guidelines above. For this synthesis:
|
||||||
|
|
||||||
|
- Do NOT include any direct quotes from the creator. Rephrase all insights in neutral third-person encyclopedic style.
|
||||||
|
- Do NOT attribute opinions or preferences to the creator by name (avoid "he recommends", "she prefers").
|
||||||
|
- Remove all personality markers, humor, strong opinions, and conversational tone.
|
||||||
|
- Write as a reference manual: factual, impersonal, technically precise.
|
||||||
|
- Replace phrases like "he warns against" with neutral statements like "this approach is generally avoided because."
|
||||||
|
- Suppress colloquialisms and informal language entirely.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_HIGH_BAND_MODIFIER = """
|
||||||
|
|
||||||
|
## Maximum Voice Preservation Override
|
||||||
|
|
||||||
|
IMPORTANT — amplify the voice/tone guidelines above. For this synthesis:
|
||||||
|
|
||||||
|
- Maximize the use of direct quotes from the transcript. Every memorable phrase, vivid metaphor, or strong opinion should be quoted verbatim with quotation marks.
|
||||||
|
- Attribute all insights, preferences, and techniques to the creator by name — use their name frequently.
|
||||||
|
- Preserve personality, humor, strong opinions, and conversational tone. If the creator is emphatic, the prose should feel emphatic.
|
||||||
|
- Prioritize the creator's exact words over paraphrase. When a transcript excerpt contains a usable phrase, quote it rather than summarizing it.
|
||||||
|
- Include warnings, caveats, and opinionated asides in the creator's own voice.
|
||||||
|
- The resulting page should feel like the creator is speaking directly to the reader through the text.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# ── VoiceDial class ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class VoiceDial:
|
||||||
|
"""Modifies a Stage 5 synthesis prompt based on a voice_level parameter.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
base_prompt:
|
||||||
|
The original stage5_synthesis.txt system prompt content.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Band boundaries
|
||||||
|
LOW_UPPER = 0.33
|
||||||
|
HIGH_LOWER = 0.67
|
||||||
|
|
||||||
|
def __init__(self, base_prompt: str) -> None:
|
||||||
|
self.base_prompt = base_prompt
|
||||||
|
|
||||||
|
def modify(self, voice_level: float) -> str:
|
||||||
|
"""Return the system prompt modified for the given voice_level.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
voice_level:
|
||||||
|
Float 0.0–1.0. Values outside this range are clamped.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str
|
||||||
|
Modified system prompt with band-appropriate instructions appended.
|
||||||
|
"""
|
||||||
|
voice_level = max(0.0, min(1.0, voice_level))
|
||||||
|
|
||||||
|
if voice_level <= self.LOW_UPPER:
|
||||||
|
return self.base_prompt + _LOW_BAND_MODIFIER
|
||||||
|
elif voice_level >= self.HIGH_LOWER:
|
||||||
|
return self.base_prompt + _HIGH_BAND_MODIFIER
|
||||||
|
else:
|
||||||
|
# Mid band — base prompt is already moderate voice preservation
|
||||||
|
return self.base_prompt
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def band_name(voice_level: float) -> str:
|
||||||
|
"""Return the human-readable band name for a voice_level value."""
|
||||||
|
voice_level = max(0.0, min(1.0, voice_level))
|
||||||
|
if voice_level <= VoiceDial.LOW_UPPER:
|
||||||
|
return "low"
|
||||||
|
elif voice_level >= VoiceDial.HIGH_LOWER:
|
||||||
|
return "high"
|
||||||
|
return "mid"
|
||||||
Loading…
Add table
Reference in a new issue