Compare commits

...

6 commits

Author SHA1 Message Date
jlightner
70910d516e fix: detect CIFS/NFS via /proc/mounts before opening DB
Instead of trying WAL mode and recovering after failure, proactively
detect network filesystems by parsing /proc/mounts and skip WAL
entirely. This avoids the stale WAL/SHM files that made recovery
impossible on CIFS mounts.
2026-04-01 05:53:40 +00:00
jlightner
6682438163 fix: probe WAL with test write + close/reopen fallback for CIFS
The PRAGMA journal_mode=WAL returns 'wal' even on CIFS mounts where
WAL shared-memory actually fails. Now we do a concrete test write after
setting WAL mode; if it fails, we close the connection, remove stale
WAL/SHM files, reopen in DELETE mode.
2026-04-01 05:13:06 +00:00
jlightner
3205c101c3 fix: graceful WAL mode fallback for CIFS/network filesystems
When the data directory is on a CIFS/SMB mount (or other filesystem
lacking mmap shared-memory support), SQLite WAL mode fails with
'locking protocol' or 'readonly database' errors. The init_db function
now detects this and falls back to DELETE journal mode automatically.
2026-04-01 05:04:45 +00:00
jlightner
23143b4e11 Merge fix/archive-org-audio-detection: correct audio-only detection for archive.org 2026-04-01 04:21:27 +00:00
jlightner
d518304331 fix: detect video from URL extension when yt-dlp extract_flat strips codec info
archive.org and other direct-file hosts return metadata without vcodec
when using extract_flat mode. The UI was incorrectly labeling these as
'Audio Only'. Now we check the URL path extension and yt-dlp's reported
ext against known video containers as a fallback before marking a source
as audio-only.

Fixes incorrect audio-only detection for archive.org video URLs.
2026-04-01 04:21:19 +00:00
xpltd
44e24e9393 README: add Docker image location + pull/run instructions
- Added ghcr.io/xpltdco/media-rip:latest prominently in Quickstart
- Added curl one-liner to grab docker-compose.yml
- Added docker run alternative for users who don't want compose
- Updated features: 9 built-in themes (was 3)
2026-03-22 17:15:21 -05:00
3 changed files with 142 additions and 12 deletions

View file

@ -11,7 +11,7 @@ A self-hostable yt-dlp web frontend. Paste a URL, pick quality, download — wit
- **Real-time progress** — Server-Sent Events stream download progress to the browser instantly.
- **Session isolation** — Each browser gets its own download queue. No cross-talk.
- **Playlist support** — Collapsible parent/child jobs with per-video status tracking.
- **Three built-in themes** — Cyberpunk (default), Dark, Light. Switch in the header.
- **9 built-in themes** — 5 dark (Cyberpunk, Dark, Midnight, Hacker, Neon) + 4 light (Light, Paper, Arctic, Solarized). Admin picks the pair, visitors toggle dark/light.
- **Custom themes** — Drop a CSS file into `/themes` volume. No rebuild needed.
- **Admin panel** — Session management, storage info, manual purge, error logs. Protected by bcrypt auth.
- **Cookie auth** — Upload cookies.txt per session for paywalled/private content.
@ -21,8 +21,32 @@ A self-hostable yt-dlp web frontend. Paste a URL, pick quality, download — wit
## Quickstart
The Docker image is published to GitHub Container Registry:
```
ghcr.io/xpltdco/media-rip:latest
```
Pull and run with Docker Compose (recommended):
```bash
docker compose up
# Download the compose file
curl -O https://raw.githubusercontent.com/xpltdco/media-rip/master/docker-compose.yml
# Start the container
docker compose up -d
```
Or pull and run directly:
```bash
docker run -d \
--name mediarip \
-p 8080:8000 \
-v ./downloads:/downloads \
-v mediarip-data:/data \
--restart unless-stopped \
ghcr.io/xpltdco/media-rip:latest
```
Open [http://localhost:8080](http://localhost:8080) and paste a URL. On first run, you'll set an admin password.

View file

@ -1,8 +1,10 @@
"""SQLite database layer with WAL mode and async CRUD operations.
"""SQLite database layer with async CRUD operations.
Uses aiosqlite for async access. ``init_db`` sets critical PRAGMAs
(busy_timeout, WAL, synchronous) *before* creating any tables so that
concurrent download workers never hit ``SQLITE_BUSY``.
(busy_timeout, journal_mode, synchronous) *before* creating any tables so
that concurrent download workers never hit ``SQLITE_BUSY``. WAL mode is
preferred on local filesystems; DELETE mode is used automatically when a
network filesystem (CIFS, NFS) is detected.
"""
from __future__ import annotations
@ -90,19 +92,31 @@ async def init_db(db_path: str) -> aiosqlite.Connection:
PRAGMA order matters:
1. ``busy_timeout`` prevents immediate ``SQLITE_BUSY`` on lock contention
2. ``journal_mode=WAL`` enables concurrent readers + single writer
3. ``synchronous=NORMAL`` safe durability level for WAL mode
2. ``journal_mode`` WAL for local filesystems, DELETE for network mounts
(CIFS/NFS lack the shared-memory primitives WAL requires)
3. ``synchronous=NORMAL`` safe durability level
Returns the ready-to-use connection.
"""
# Detect network filesystem *before* opening the DB so we never attempt
# WAL on CIFS/NFS (which creates broken SHM files that persist).
use_wal = not _is_network_filesystem(db_path)
db = await aiosqlite.connect(db_path)
db.row_factory = aiosqlite.Row
# --- PRAGMAs (before any DDL) ---
await db.execute("PRAGMA busy_timeout = 5000")
result = await db.execute("PRAGMA journal_mode = WAL")
row = await result.fetchone()
journal_mode = row[0] if row else "unknown"
if use_wal:
journal_mode = await _try_journal_mode(db, "wal")
else:
logger.info(
"Network filesystem detected for %s — using DELETE journal mode",
db_path,
)
journal_mode = await _try_journal_mode(db, "delete")
logger.info("journal_mode set to %s", journal_mode)
await db.execute("PRAGMA synchronous = NORMAL")
@ -115,6 +129,54 @@ async def init_db(db_path: str) -> aiosqlite.Connection:
return db
def _is_network_filesystem(db_path: str) -> bool:
"""Return True if *db_path* resides on a network filesystem (CIFS, NFS, etc.).
Parses ``/proc/mounts`` (Linux) to find the filesystem type of the
longest-prefix mount matching the database directory. Returns False
on non-Linux hosts or if detection fails.
"""
import os
network_fs_types = {"cifs", "nfs", "nfs4", "smb", "smbfs", "9p", "fuse.sshfs"}
try:
db_dir = os.path.dirname(os.path.abspath(db_path))
with open("/proc/mounts", "r") as f:
mounts = f.readlines()
best_match = ""
best_fstype = ""
for line in mounts:
parts = line.split()
if len(parts) < 3:
continue
mountpoint, fstype = parts[1], parts[2]
if db_dir.startswith(mountpoint) and len(mountpoint) > len(best_match):
best_match = mountpoint
best_fstype = fstype
is_net = best_fstype in network_fs_types
if is_net:
logger.info(
"Detected %s filesystem at %s for database %s",
best_fstype, best_match, db_path,
)
return is_net
except Exception:
return False
async def _try_journal_mode(
db: aiosqlite.Connection, mode: str,
) -> str:
"""Try setting *mode* and return the actual journal mode string."""
try:
result = await db.execute(f"PRAGMA journal_mode = {mode}")
row = await result.fetchone()
return (row[0] if row else "unknown").lower()
except Exception as exc:
logger.warning("PRAGMA journal_mode=%s failed: %s", mode, exc)
return "error"
# ---------------------------------------------------------------------------
# CRUD helpers
# ---------------------------------------------------------------------------

View file

@ -561,6 +561,31 @@ class DownloadService:
url_lower = url.lower()
return any(domain in url_lower for domain in audio_domains)
@staticmethod
def _url_or_ext_implies_video(url: str, ext: str | None) -> bool:
"""Return True if the URL path or reported extension is a known video container.
This acts as a fallback when yt-dlp's extract_flat mode strips codec
metadata (common for archive.org, direct-file URLs, etc.), which would
otherwise cause the UI to wrongly label the source as "audio only".
"""
video_extensions = {
"mp4", "mkv", "webm", "avi", "mov", "flv", "wmv", "mpg",
"mpeg", "m4v", "ts", "3gp", "ogv",
}
# Check the extension reported by yt-dlp
if ext and ext.lower() in video_extensions:
return True
# Check the URL path for a video file extension
from urllib.parse import urlparse
path = urlparse(url).path.lower()
# Strip any trailing slashes / query residue
path = path.rstrip("/")
for vext in video_extensions:
if path.endswith(f".{vext}"):
return True
return False
@staticmethod
def _get_auth_hint(url: str) -> str | None:
"""Return a user-facing hint for sites that commonly need auth."""
@ -645,13 +670,27 @@ class DownloadService:
"url": e.get("url") or e.get("webpage_url", ""),
"duration": e.get("duration"),
})
# Domain-based detection may miss video playlists on generic
# hosting sites (e.g. archive.org). If any entry URL looks like
# a video file, override domain_audio for the whole playlist.
playlist_audio = domain_audio
if playlist_audio:
for e_check in entries:
entry_url = e_check.get("url", "")
if self._url_or_ext_implies_video(entry_url, None):
playlist_audio = False
break
if not playlist_audio and not domain_audio:
# Also check the top-level URL itself
if self._url_or_ext_implies_video(url, info.get("ext")):
playlist_audio = False
result = {
"type": "playlist",
"title": info.get("title", "Playlist"),
"count": len(entries),
"entries": entries,
"is_audio_only": domain_audio,
"default_ext": self._guess_ext_from_url(url, domain_audio),
"is_audio_only": playlist_audio,
"default_ext": self._guess_ext_from_url(url, playlist_audio),
}
if unavailable_count > 0:
result["unavailable_count"] = unavailable_count
@ -659,6 +698,11 @@ class DownloadService:
else:
# Single video/track
has_video = bool(info.get("vcodec") and info["vcodec"] != "none")
# extract_flat mode often strips codec info, so also check the
# URL extension and the reported ext — if either is a known video
# container we should NOT mark it as audio-only.
if not has_video:
has_video = self._url_or_ext_implies_video(url, info.get("ext"))
is_audio_only = domain_audio or not has_video
# Detect likely file extension
ext = info.get("ext")