chrysopedia/backend/pipeline/card_renderer.py
jlightner fa493e2640 feat: Built ffmpeg-based card renderer with concat demuxer pipeline and…
- "backend/pipeline/card_renderer.py"
- "backend/pipeline/shorts_generator.py"
- "backend/pipeline/stages.py"
- "backend/models.py"
- "alembic/versions/028_add_shorts_template.py"
- "backend/pipeline/test_card_renderer.py"

GSD-Task: S04/T02
2026-04-04 11:17:38 +00:00

298 lines
9 KiB
Python

"""FFmpeg-based intro/outro card video generation and segment concatenation.
Generates solid-color card clips with centered text using ffmpeg lavfi
(color + drawtext filters). Provides concat demuxer logic to assemble
intro + main clip + outro into a final short.
Pure functions — no DB access, no Celery dependency.
"""
from __future__ import annotations
import logging
import subprocess
import tempfile
from pathlib import Path
logger = logging.getLogger(__name__)
FFMPEG_TIMEOUT_SECS = 120
# Default template values
DEFAULT_ACCENT_COLOR = "#22d3ee"
DEFAULT_FONT_FAMILY = "Inter"
DEFAULT_INTRO_DURATION = 2.0
DEFAULT_OUTRO_DURATION = 2.0
def render_card(
text: str,
duration_secs: float,
width: int,
height: int,
accent_color: str = DEFAULT_ACCENT_COLOR,
font_family: str = DEFAULT_FONT_FAMILY,
) -> list[str]:
"""Build ffmpeg command args that generate a card mp4 from lavfi input.
Produces a solid black background with centered white text and a thin
accent-color underline bar at the bottom third.
Args:
text: Display text (e.g., creator name or "Thanks for watching").
duration_secs: Card duration in seconds.
width: Output width in pixels.
height: Output height in pixels.
accent_color: Hex color for the underline glow bar.
font_family: Font family for drawtext (must be available on system).
Returns:
List of ffmpeg command arguments (without the output path — caller appends).
"""
if duration_secs <= 0:
raise ValueError(f"duration_secs must be positive, got {duration_secs}")
if width <= 0 or height <= 0:
raise ValueError(f"dimensions must be positive, got {width}x{height}")
# Font size scales with height — ~5% of output height
font_size = max(24, int(height * 0.05))
# Accent bar: thin horizontal line at ~65% down
bar_y = int(height * 0.65)
bar_height = max(2, int(height * 0.004))
bar_margin = int(width * 0.2)
# Escape text for ffmpeg drawtext (colons, backslashes, single quotes)
escaped_text = (
text.replace("\\", "\\\\")
.replace(":", "\\:")
.replace("'", "'\\''")
)
# Build complex filtergraph:
# 1. color source for black background
# 2. drawtext for centered title
# 3. drawbox for accent underline bar
filtergraph = (
f"color=c=black:s={width}x{height}:d={duration_secs}:r=30,"
f"drawtext=text='{escaped_text}'"
f":fontcolor=white:fontsize={font_size}"
f":fontfile='':font='{font_family}'"
f":x=(w-text_w)/2:y=(h-text_h)/2-{font_size},"
f"drawbox=x={bar_margin}:y={bar_y}"
f":w={width - 2 * bar_margin}:h={bar_height}"
f":color='{accent_color}'@0.8:t=fill"
)
cmd = [
"ffmpeg",
"-y",
"-f", "lavfi",
"-i", filtergraph,
"-c:v", "libx264",
"-preset", "fast",
"-crf", "23",
"-pix_fmt", "yuv420p",
"-t", str(duration_secs),
# Silent audio track so concat with audio segments works
"-f", "lavfi",
"-i", f"anullsrc=r=44100:cl=stereo:d={duration_secs}",
"-c:a", "aac",
"-b:a", "128k",
"-shortest",
"-movflags", "+faststart",
]
return cmd
def render_card_to_file(
text: str,
duration_secs: float,
width: int,
height: int,
output_path: Path,
accent_color: str = DEFAULT_ACCENT_COLOR,
font_family: str = DEFAULT_FONT_FAMILY,
) -> Path:
"""Generate a card mp4 file via ffmpeg.
Args:
text: Display text for the card.
duration_secs: Card duration in seconds.
width: Output width in pixels.
height: Output height in pixels.
output_path: Destination mp4 file.
accent_color: Hex color for accent elements.
font_family: Font family for text.
Returns:
The output_path on success.
Raises:
subprocess.CalledProcessError: If ffmpeg exits non-zero.
subprocess.TimeoutExpired: If ffmpeg exceeds timeout.
"""
cmd = render_card(
text=text,
duration_secs=duration_secs,
width=width,
height=height,
accent_color=accent_color,
font_family=font_family,
)
cmd.append(str(output_path))
logger.info(
"Rendering card: text=%r duration=%.1fs size=%dx%d%s",
text, duration_secs, width, height, output_path,
)
result = subprocess.run(
cmd,
capture_output=True,
timeout=FFMPEG_TIMEOUT_SECS,
)
if result.returncode != 0:
stderr_text = result.stderr.decode("utf-8", errors="replace")[-2000:]
logger.error("Card render failed (rc=%d): %s", result.returncode, stderr_text)
raise subprocess.CalledProcessError(
result.returncode, cmd, output=result.stdout, stderr=result.stderr,
)
logger.info("Card rendered: %s (%d bytes)", output_path, output_path.stat().st_size)
return output_path
def build_concat_list(segments: list[Path], list_path: Path) -> Path:
"""Write an ffmpeg concat demuxer list file.
Args:
segments: Ordered list of segment mp4 paths.
list_path: Where to write the concat list.
Returns:
The list_path.
"""
lines = [f"file '{seg.resolve()}'" for seg in segments]
list_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
return list_path
def concat_segments(segments: list[Path], output_path: Path) -> Path:
"""Concatenate mp4 segments using ffmpeg concat demuxer.
All segments must share the same codec settings (libx264/aac, same
resolution). Uses ``-c copy`` for fast stream-copy concatenation.
Args:
segments: Ordered list of segment mp4 paths.
output_path: Destination mp4 file.
Returns:
The output_path on success.
Raises:
ValueError: If segments list is empty.
subprocess.CalledProcessError: If ffmpeg exits non-zero.
subprocess.TimeoutExpired: If ffmpeg exceeds timeout.
"""
if not segments:
raise ValueError("segments list cannot be empty")
# Write concat list to a temp file
with tempfile.NamedTemporaryFile(
mode="w", suffix=".txt", delete=False, prefix="concat_",
) as f:
for seg in segments:
f.write(f"file '{seg.resolve()}'\n")
list_path = Path(f.name)
try:
cmd = [
"ffmpeg",
"-y",
"-f", "concat",
"-safe", "0",
"-i", str(list_path),
"-c", "copy",
"-movflags", "+faststart",
str(output_path),
]
logger.info(
"Concatenating %d segments → %s",
len(segments), output_path,
)
result = subprocess.run(
cmd,
capture_output=True,
timeout=FFMPEG_TIMEOUT_SECS,
)
if result.returncode != 0:
stderr_text = result.stderr.decode("utf-8", errors="replace")[-2000:]
logger.error(
"Concat failed (rc=%d): %s", result.returncode, stderr_text,
)
raise subprocess.CalledProcessError(
result.returncode, cmd, output=result.stdout, stderr=result.stderr,
)
logger.info(
"Concatenated %d segments: %s (%d bytes)",
len(segments), output_path, output_path.stat().st_size,
)
return output_path
finally:
# Clean up temp list file
try:
list_path.unlink()
except OSError:
pass
def parse_template_config(
shorts_template: dict | None,
) -> dict:
"""Parse a creator's shorts_template JSONB into normalized config.
Expected schema::
{
"show_intro": true,
"intro_text": "Creator Name Presents",
"intro_duration": 2.0,
"show_outro": true,
"outro_text": "Thanks for watching!",
"outro_duration": 2.0,
"accent_color": "#22d3ee",
"font_family": "Inter"
}
Missing fields get defaults. Returns a dict with all keys guaranteed.
"""
if not shorts_template:
return {
"show_intro": False,
"intro_text": "",
"intro_duration": DEFAULT_INTRO_DURATION,
"show_outro": False,
"outro_text": "",
"outro_duration": DEFAULT_OUTRO_DURATION,
"accent_color": DEFAULT_ACCENT_COLOR,
"font_family": DEFAULT_FONT_FAMILY,
}
return {
"show_intro": bool(shorts_template.get("show_intro", False)),
"intro_text": str(shorts_template.get("intro_text", "")),
"intro_duration": float(shorts_template.get("intro_duration", DEFAULT_INTRO_DURATION)),
"show_outro": bool(shorts_template.get("show_outro", False)),
"outro_text": str(shorts_template.get("outro_text", "")),
"outro_duration": float(shorts_template.get("outro_duration", DEFAULT_OUTRO_DURATION)),
"accent_color": str(shorts_template.get("accent_color", DEFAULT_ACCENT_COLOR)),
"font_family": str(shorts_template.get("font_family", DEFAULT_FONT_FAMILY)),
}