mirror of
https://github.com/xpltdco/media-rip.git
synced 2026-04-03 10:54:00 -06:00
Full-featured self-hosted yt-dlp web frontend:
- Python 3.12+ / FastAPI backend with async SQLite, SSE transport, session isolation
- Vue 3 / TypeScript / Pinia frontend with real-time progress, theme picker
- 3 built-in themes (cyberpunk/dark/light) + drop-in custom theme system
- Admin auth (bcrypt), purge system, cookie upload, file serving
- Docker multi-stage build, GitHub Actions CI/CD
- 179 backend tests, 29 frontend tests (208 total)
Slices: S01 (Foundation), S02 (SSE+Sessions), S03 (Frontend),
S04 (Admin+Auth), S05 (Themes), S06 (Docker+CI)
330 lines
11 KiB
Python
330 lines
11 KiB
Python
"""Download service — yt-dlp wrapper with sync-to-async progress bridging.
|
|
|
|
Wraps synchronous yt-dlp operations in a :class:`~concurrent.futures.ThreadPoolExecutor`
|
|
and bridges progress events to the async world via :class:`~app.core.sse_broker.SSEBroker`.
|
|
Each download job gets a **fresh** ``YoutubeDL`` instance — they are never shared across
|
|
threads (yt-dlp has mutable internal state: cookies, temp files, logger).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import os
|
|
import uuid
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from datetime import datetime, timezone
|
|
|
|
import yt_dlp
|
|
|
|
from app.core.config import AppConfig
|
|
from app.core.database import (
|
|
create_job,
|
|
get_job,
|
|
update_job_progress,
|
|
update_job_status,
|
|
)
|
|
from app.core.sse_broker import SSEBroker
|
|
from app.models.job import (
|
|
FormatInfo,
|
|
Job,
|
|
JobCreate,
|
|
JobStatus,
|
|
ProgressEvent,
|
|
)
|
|
from app.services.output_template import resolve_template
|
|
|
|
logger = logging.getLogger("mediarip.download")
|
|
|
|
|
|
class DownloadService:
|
|
"""Manages yt-dlp downloads with async-compatible progress reporting.
|
|
|
|
Parameters
|
|
----------
|
|
config:
|
|
Application configuration (download paths, concurrency, templates).
|
|
db:
|
|
Async SQLite connection (aiosqlite).
|
|
broker:
|
|
SSE event broker for real-time progress push.
|
|
loop:
|
|
The asyncio event loop. Captured once at construction — must not be
|
|
called from inside a worker thread.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
config: AppConfig,
|
|
db, # aiosqlite.Connection
|
|
broker: SSEBroker,
|
|
loop: asyncio.AbstractEventLoop,
|
|
) -> None:
|
|
self._config = config
|
|
self._db = db
|
|
self._broker = broker
|
|
self._loop = loop
|
|
self._executor = ThreadPoolExecutor(
|
|
max_workers=config.downloads.max_concurrent,
|
|
thread_name_prefix="ytdl",
|
|
)
|
|
# Per-job throttle state for DB writes (only used inside worker threads)
|
|
self._last_db_percent: dict[str, float] = {}
|
|
|
|
# ------------------------------------------------------------------
|
|
# Public async interface
|
|
# ------------------------------------------------------------------
|
|
|
|
async def enqueue(self, job_create: JobCreate, session_id: str) -> Job:
|
|
"""Create a job and submit it for background download.
|
|
|
|
Returns the ``Job`` immediately with status ``queued``.
|
|
"""
|
|
job_id = str(uuid.uuid4())
|
|
template = resolve_template(
|
|
job_create.url,
|
|
job_create.output_template,
|
|
self._config,
|
|
)
|
|
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
job = Job(
|
|
id=job_id,
|
|
session_id=session_id,
|
|
url=job_create.url,
|
|
status=JobStatus.queued,
|
|
format_id=job_create.format_id,
|
|
quality=job_create.quality,
|
|
output_template=template,
|
|
created_at=now,
|
|
)
|
|
|
|
await create_job(self._db, job)
|
|
logger.info("Job %s created for URL: %s", job_id, job_create.url)
|
|
|
|
# Build yt-dlp options
|
|
output_dir = self._config.downloads.output_dir
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
outtmpl = os.path.join(output_dir, template)
|
|
|
|
opts: dict = {
|
|
"outtmpl": outtmpl,
|
|
"quiet": True,
|
|
"no_warnings": True,
|
|
"noprogress": True,
|
|
}
|
|
if job_create.format_id:
|
|
opts["format"] = job_create.format_id
|
|
elif job_create.quality:
|
|
opts["format"] = job_create.quality
|
|
|
|
self._loop.run_in_executor(
|
|
self._executor,
|
|
self._run_download,
|
|
job_id,
|
|
job_create.url,
|
|
opts,
|
|
session_id,
|
|
)
|
|
return job
|
|
|
|
async def get_formats(self, url: str) -> list[FormatInfo]:
|
|
"""Extract available formats for *url* without downloading.
|
|
|
|
Runs yt-dlp ``extract_info`` in the thread pool.
|
|
"""
|
|
info = await self._loop.run_in_executor(
|
|
self._executor,
|
|
self._extract_info,
|
|
url,
|
|
)
|
|
if not info:
|
|
return []
|
|
|
|
formats_raw = info.get("formats") or []
|
|
result: list[FormatInfo] = []
|
|
for f in formats_raw:
|
|
result.append(
|
|
FormatInfo(
|
|
format_id=f.get("format_id", "unknown"),
|
|
ext=f.get("ext", "unknown"),
|
|
resolution=f.get("resolution"),
|
|
codec=f.get("vcodec"),
|
|
filesize=f.get("filesize"), # may be None — that's fine
|
|
format_note=f.get("format_note"),
|
|
vcodec=f.get("vcodec"),
|
|
acodec=f.get("acodec"),
|
|
)
|
|
)
|
|
|
|
# Sort: best resolution first (descending by height, fallback 0)
|
|
result.sort(
|
|
key=lambda fi: _parse_resolution_height(fi.resolution),
|
|
reverse=True,
|
|
)
|
|
return result
|
|
|
|
async def cancel(self, job_id: str) -> None:
|
|
"""Mark a job as failed with a cancellation message.
|
|
|
|
Note: yt-dlp has no reliable mid-stream abort mechanism. The
|
|
worker thread continues but the job is marked as failed in the DB.
|
|
"""
|
|
await update_job_status(
|
|
self._db, job_id, JobStatus.failed.value, "Cancelled by user"
|
|
)
|
|
logger.info("Job %s cancelled by user", job_id)
|
|
|
|
def shutdown(self) -> None:
|
|
"""Shut down the thread pool (non-blocking)."""
|
|
self._executor.shutdown(wait=False)
|
|
logger.info("Download executor shut down")
|
|
|
|
# ------------------------------------------------------------------
|
|
# Private — runs in worker threads
|
|
# ------------------------------------------------------------------
|
|
|
|
def _run_download(
|
|
self,
|
|
job_id: str,
|
|
url: str,
|
|
opts: dict,
|
|
session_id: str,
|
|
) -> None:
|
|
"""Execute yt-dlp download in a worker thread.
|
|
|
|
Creates a fresh ``YoutubeDL`` instance (never shared) and bridges
|
|
progress events to the async event loop.
|
|
"""
|
|
logger.info("Job %s starting download: %s", job_id, url)
|
|
self._last_db_percent[job_id] = -1.0
|
|
|
|
def progress_hook(d: dict) -> None:
|
|
try:
|
|
event = ProgressEvent.from_yt_dlp(job_id, d)
|
|
|
|
# Always publish to SSE broker (cheap, in-memory)
|
|
self._broker.publish(session_id, event)
|
|
|
|
# Throttle DB writes: ≥1% change or status change
|
|
last_pct = self._last_db_percent.get(job_id, -1.0)
|
|
status_changed = d.get("status") in ("finished", "error")
|
|
pct_changed = abs(event.percent - last_pct) >= 1.0
|
|
|
|
if pct_changed or status_changed:
|
|
self._last_db_percent[job_id] = event.percent
|
|
logger.debug(
|
|
"Job %s DB write: percent=%.1f status=%s",
|
|
job_id, event.percent, event.status,
|
|
)
|
|
future = asyncio.run_coroutine_threadsafe(
|
|
update_job_progress(
|
|
self._db,
|
|
job_id,
|
|
event.percent,
|
|
event.speed,
|
|
event.eta,
|
|
event.filename,
|
|
),
|
|
self._loop,
|
|
)
|
|
# Block worker thread until DB write completes
|
|
future.result(timeout=10)
|
|
except Exception:
|
|
logger.exception("Job %s progress hook error", job_id)
|
|
|
|
opts["progress_hooks"] = [progress_hook]
|
|
|
|
try:
|
|
# Mark as downloading and notify SSE
|
|
asyncio.run_coroutine_threadsafe(
|
|
update_job_status(self._db, job_id, JobStatus.downloading.value),
|
|
self._loop,
|
|
).result(timeout=10)
|
|
self._broker.publish(session_id, {
|
|
"event": "job_update",
|
|
"data": {"job_id": job_id, "status": "downloading", "percent": 0,
|
|
"speed": None, "eta": None, "filename": None},
|
|
})
|
|
|
|
# Fresh YoutubeDL instance — never shared
|
|
with yt_dlp.YoutubeDL(opts) as ydl:
|
|
ydl.download([url])
|
|
|
|
# Mark as completed and notify SSE
|
|
asyncio.run_coroutine_threadsafe(
|
|
update_job_status(self._db, job_id, JobStatus.completed.value),
|
|
self._loop,
|
|
).result(timeout=10)
|
|
self._broker.publish(session_id, {
|
|
"event": "job_update",
|
|
"data": {"job_id": job_id, "status": "completed", "percent": 100,
|
|
"speed": None, "eta": None, "filename": None},
|
|
})
|
|
logger.info("Job %s completed", job_id)
|
|
|
|
except Exception as e:
|
|
logger.error("Job %s failed: %s", job_id, e, exc_info=True)
|
|
try:
|
|
asyncio.run_coroutine_threadsafe(
|
|
update_job_status(
|
|
self._db, job_id, JobStatus.failed.value, str(e)
|
|
),
|
|
self._loop,
|
|
).result(timeout=10)
|
|
self._broker.publish(session_id, {
|
|
"event": "job_update",
|
|
"data": {"job_id": job_id, "status": "failed", "percent": 0,
|
|
"speed": None, "eta": None, "filename": None,
|
|
"error_message": str(e)},
|
|
})
|
|
except Exception:
|
|
logger.exception("Job %s failed to update status after error", job_id)
|
|
|
|
finally:
|
|
self._last_db_percent.pop(job_id, None)
|
|
|
|
def _extract_info(self, url: str) -> dict | None:
|
|
"""Run yt-dlp extract_info synchronously (called from thread pool)."""
|
|
opts = {
|
|
"quiet": True,
|
|
"no_warnings": True,
|
|
"skip_download": True,
|
|
}
|
|
try:
|
|
with yt_dlp.YoutubeDL(opts) as ydl:
|
|
return ydl.extract_info(url, download=False)
|
|
except Exception:
|
|
logger.exception("Format extraction failed for %s", url)
|
|
return None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _parse_resolution_height(resolution: str | None) -> int:
|
|
"""Extract numeric height from a resolution string like '1080p' or '1920x1080'.
|
|
|
|
Returns 0 for unparseable values so they sort last.
|
|
"""
|
|
if not resolution:
|
|
return 0
|
|
resolution = resolution.lower().strip()
|
|
# Handle "1080p" style
|
|
if resolution.endswith("p"):
|
|
try:
|
|
return int(resolution[:-1])
|
|
except ValueError:
|
|
pass
|
|
# Handle "1920x1080" style
|
|
if "x" in resolution:
|
|
try:
|
|
return int(resolution.split("x")[-1])
|
|
except ValueError:
|
|
pass
|
|
# Handle bare number
|
|
try:
|
|
return int(resolution)
|
|
except ValueError:
|
|
return 0
|