r"""ASS (Advanced SubStation Alpha) caption generator for shorts. Converts word-level timings from Whisper transcripts into ASS subtitle files with word-by-word karaoke highlighting. Each word gets its own Dialogue line with {\k} tags that control highlight duration. Pure functions — no DB access, no Celery dependency. """ from __future__ import annotations import logging from pathlib import Path from typing import Any logger = logging.getLogger(__name__) # ── Default style configuration ────────────────────────────────────────────── DEFAULT_STYLE: dict[str, Any] = { "font_name": "Arial", "font_size": 48, "primary_colour": "&H00FFFFFF", # white (BGR + alpha) "secondary_colour": "&H0000FFFF", # yellow highlight "outline_colour": "&H00000000", # black outline "back_colour": "&H80000000", # semi-transparent black shadow "bold": -1, # bold "outline": 3, "shadow": 1, "alignment": 2, # bottom-center "margin_v": 60, # 60px from bottom (~15% on 1920h) } def _format_ass_time(seconds: float) -> str: """Convert seconds to ASS timestamp format: H:MM:SS.cc (centiseconds). >>> _format_ass_time(65.5) '0:01:05.50' >>> _format_ass_time(0.0) '0:00:00.00' """ if seconds < 0: seconds = 0.0 h = int(seconds // 3600) m = int((seconds % 3600) // 60) s = seconds % 60 return f"{h}:{m:02d}:{s:05.2f}" def _build_ass_header(style_config: dict[str, Any]) -> str: """Build ASS file header with script info and style definition.""" cfg = {**DEFAULT_STYLE, **(style_config or {})} header = ( "[Script Info]\n" "Title: Chrysopedia Auto-Captions\n" "ScriptType: v4.00+\n" "PlayResX: 1080\n" "PlayResY: 1920\n" "WrapStyle: 0\n" "ScaledBorderAndShadow: yes\n" "\n" "[V4+ Styles]\n" "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, " "OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, " "ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, " "Alignment, MarginL, MarginR, MarginV, Encoding\n" f"Style: Default,{cfg['font_name']},{cfg['font_size']}," f"{cfg['primary_colour']},{cfg['secondary_colour']}," f"{cfg['outline_colour']},{cfg['back_colour']}," f"{cfg['bold']},0,0,0," f"100,100,0,0,1,{cfg['outline']},{cfg['shadow']}," f"{cfg['alignment']},20,20,{cfg['margin_v']},1\n" "\n" "[Events]\n" "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n" ) return header def generate_ass_captions( word_timings: list[dict[str, Any]], clip_start: float, style_config: dict[str, Any] | None = None, ) -> str: """Generate ASS subtitle content from word-level timings. Each word is emitted as a separate Dialogue line with karaoke timing (``{\\k}``) so that words highlight one-by-one. All word timestamps are offset by ``-clip_start`` to make them clip-relative (i.e. the first frame of the clip is t=0). Parameters ---------- word_timings : list[dict] Word-timing dicts with ``word``, ``start``, ``end`` keys. ``start`` and ``end`` are absolute times in seconds. clip_start : float Absolute start time of the clip in seconds. Subtracted from all word timestamps. style_config : dict | None Override style parameters (merged onto DEFAULT_STYLE). Returns ------- str — Full ASS file content. Empty dialogue section if no timings. """ header = _build_ass_header(style_config) if not word_timings: logger.debug("No word timings provided — returning header-only ASS") return header lines: list[str] = [header] for w in word_timings: word_text = w.get("word", "").strip() if not word_text: continue abs_start = float(w.get("start", 0.0)) abs_end = float(w.get("end", abs_start)) # Make clip-relative rel_start = max(0.0, abs_start - clip_start) rel_end = max(rel_start, abs_end - clip_start) # Karaoke duration in centiseconds k_duration = max(1, round((rel_end - rel_start) * 100)) start_ts = _format_ass_time(rel_start) end_ts = _format_ass_time(rel_end) # Dialogue line with karaoke tag line = ( f"Dialogue: 0,{start_ts},{end_ts},Default,,0,0,0,," f"{{\\k{k_duration}}}{word_text}" ) lines.append(line) return "\n".join(lines) + "\n" def write_ass_file(ass_content: str, output_path: Path) -> Path: """Write ASS content to disk. Creates parent directories if needed. Returns the output path. """ output_path = Path(output_path) output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(ass_content, encoding="utf-8") logger.debug("Wrote ASS captions to %s (%d bytes)", output_path, len(ass_content)) return output_path