From 7b82ad0325c71ff4ca853a993c9714d963c37adc Mon Sep 17 00:00:00 2001 From: xpltd Date: Thu, 19 Mar 2026 09:56:10 -0500 Subject: [PATCH] Docker self-hosting: fix persistence, add data_dir config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Critical fix: - Dockerfile env var was MEDIARIP__DATABASE__PATH (ignored) — now MEDIARIP__SERVER__DB_PATH DB was landing at /app/mediarip.db (lost on restart) instead of /data/mediarip.db Persistence model: - /downloads → media files (bind mount recommended) - /data → SQLite DB, session cookies, error logs (named volume) - /themes → custom CSS themes (read-only bind mount) - /app/config.yaml → optional YAML config (read-only bind mount) Other changes: - Add server.data_dir config field (default: /data) for explicit session storage - Cookie storage uses data_dir instead of fragile path math from output_dir parent - Lifespan creates data_dir on startup - .dockerignore excludes tests, dev DB, egg-info - docker-compose.yml: inline admin/purge config examples - docker-compose.example.yml: parameterized with env vars - .env.example: session mode, clearer docs - README: Docker volumes table, admin setup docs, full config reference - PROJECT.md: reflects completed v1.0 state - REQUIREMENTS.md: all 26 requirements validated --- .dockerignore | 9 +++++ .env.example | 13 +++++-- Dockerfile | 3 +- README.md | 54 +++++++++++++++++++++----- backend/app/core/config.py | 1 + backend/app/main.py | 4 ++ backend/app/routers/cookies.py | 16 ++++---- backend/tests/conftest.py | 7 +++- backend/tests/test_download_service.py | 5 ++- backend/tests/test_file_serving.py | 2 +- docker-compose.example.yml | 30 +++++++------- docker-compose.yml | 16 ++++++-- 12 files changed, 115 insertions(+), 45 deletions(-) diff --git a/.dockerignore b/.dockerignore index 6f7a4cb..f99deb2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -20,6 +20,10 @@ coverage/ tmp/ .env .env.* +!.env.example +backend/tests/ +backend/mediarip.db* +backend/media_rip.egg-info/ # Ignore git .git/ @@ -31,3 +35,8 @@ tmp/ *.code-workspace *.swp *.swo + +# Ignore docs / meta +LICENSE +README.md +CHANGELOG.md diff --git a/.env.example b/.env.example index 9fb2f1c..44d750c 100644 --- a/.env.example +++ b/.env.example @@ -3,13 +3,20 @@ # Copy this file to .env and fill in your values. # Used with docker-compose.example.yml (secure deployment with Caddy). -# Your domain name (for Caddy auto-TLS) +# ── Required for Caddy auto-TLS ── DOMAIN=media.example.com -# Admin credentials +# ── Admin credentials ── # Username for the admin panel ADMIN_USERNAME=admin # Bcrypt password hash — generate with: -# python -c "import bcrypt; print(bcrypt.hashpw(b'YOUR_PASSWORD', bcrypt.gensalt()).decode())" +# docker run --rm python:3.12-slim python -c \ +# "import bcrypt; print(bcrypt.hashpw(b'YOUR_PASSWORD', bcrypt.gensalt()).decode())" ADMIN_PASSWORD_HASH= + +# ── Session mode (optional) ── +# isolated = each browser has its own queue (default) +# shared = all users see all downloads +# open = no session tracking +SESSION_MODE=isolated diff --git a/Dockerfile b/Dockerfile index d041640..9c85935 100644 --- a/Dockerfile +++ b/Dockerfile @@ -55,7 +55,8 @@ USER mediarip # Environment defaults ENV MEDIARIP__DOWNLOADS__OUTPUT_DIR=/downloads \ - MEDIARIP__DATABASE__PATH=/data/mediarip.db \ + MEDIARIP__SERVER__DB_PATH=/data/mediarip.db \ + MEDIARIP__SERVER__DATA_DIR=/data \ PYTHONUNBUFFERED=1 EXPOSE 8000 diff --git a/README.md b/README.md index 8a7037e..bcc8f76 100644 --- a/README.md +++ b/README.md @@ -3,16 +3,20 @@ A self-hostable yt-dlp web frontend. Paste a URL, pick quality, download — with session isolation, real-time progress, and a cyberpunk default theme. ![License](https://img.shields.io/badge/license-MIT-blue) +![Docker](https://img.shields.io/badge/docker-ghcr.io%2Fxpltdco%2Fmedia--rip-blue) ## Features - **Paste & download** — Any URL yt-dlp supports. Format picker with live quality extraction. - **Real-time progress** — Server-Sent Events stream download progress to the browser instantly. - **Session isolation** — Each browser gets its own download queue. No cross-talk. +- **Playlist support** — Collapsible parent/child jobs with per-video status tracking. - **Three built-in themes** — Cyberpunk (default), Dark, Light. Switch in the header. - **Custom themes** — Drop a CSS file into `/themes` volume. No rebuild needed. -- **Admin panel** — Session management, storage info, manual purge. Protected by HTTP Basic + bcrypt. -- **Zero telemetry** — No outbound requests. Your downloads are your business. +- **Admin panel** — Session management, storage info, manual purge, error logs. Protected by bcrypt auth. +- **Cookie auth** — Upload cookies.txt per session for paywalled/private content. +- **Auto-purge** — Configurable scheduled cleanup of old downloads and logs. +- **Zero telemetry** — No outbound requests. No CDN, no fonts, no analytics. CSP enforced. - **Mobile-friendly** — Responsive layout with bottom tabs on small screens. ## Quickstart @@ -25,6 +29,17 @@ Open [http://localhost:8080](http://localhost:8080) and paste a URL. Downloads are saved to `./downloads/`. +## Docker Volumes + +| Mount | Purpose | Persists | +|-------|---------|----------| +| `/downloads` | Downloaded media files | ✅ Bind mount recommended | +| `/data` | SQLite database, session cookies, error logs | ✅ Named volume recommended | +| `/themes` | Custom theme CSS overrides (optional) | Read-only bind mount | +| `/app/config.yaml` | YAML config file (optional) | Read-only bind mount | + +**Important:** The `/data` volume contains the database (download history, admin state, error logs) and session cookie files. Use a named volume or bind mount to persist across container restarts. + ## Configuration All settings have sensible defaults. Override via environment variables or `config.yaml`: @@ -32,6 +47,8 @@ All settings have sensible defaults. Override via environment variables or `conf | Variable | Default | Description | |----------|---------|-------------| | `MEDIARIP__SERVER__PORT` | `8000` | Internal server port | +| `MEDIARIP__SERVER__DB_PATH` | `/data/mediarip.db` | SQLite database path | +| `MEDIARIP__SERVER__DATA_DIR` | `/data` | Persistent data directory | | `MEDIARIP__DOWNLOADS__OUTPUT_DIR` | `/downloads` | Where files are saved | | `MEDIARIP__DOWNLOADS__MAX_CONCURRENT` | `3` | Maximum parallel downloads | | `MEDIARIP__SESSION__MODE` | `isolated` | `isolated`, `shared`, or `open` | @@ -41,6 +58,7 @@ All settings have sensible defaults. Override via environment variables or `conf | `MEDIARIP__ADMIN__PASSWORD_HASH` | _(empty)_ | Bcrypt hash of admin password | | `MEDIARIP__PURGE__ENABLED` | `false` | Enable auto-purge of old downloads | | `MEDIARIP__PURGE__MAX_AGE_HOURS` | `168` | Delete downloads older than this | +| `MEDIARIP__PURGE__CRON` | `0 3 * * *` | Purge schedule (cron syntax) | | `MEDIARIP__THEMES_DIR` | `/themes` | Custom themes directory | ### Session Modes @@ -49,6 +67,25 @@ All settings have sensible defaults. Override via environment variables or `conf - **shared**: All sessions see all downloads. Good for household/team use. - **open**: No session tracking at all. +### Admin Panel + +Enable the admin panel to manage sessions, view storage, trigger manual purge, and review error logs: + +```yaml +# docker-compose.yml environment section +MEDIARIP__ADMIN__ENABLED: "true" +MEDIARIP__ADMIN__USERNAME: "admin" +MEDIARIP__ADMIN__PASSWORD_HASH: "$2b$12$..." # see below +``` + +Generate a bcrypt password hash: +```bash +docker run --rm python:3.12-slim python -c \ + "import bcrypt; print(bcrypt.hashpw(b'YOUR_PASSWORD', bcrypt.gensalt()).decode())" +``` + +Admin state (login, settings changes) persists in the SQLite database at `/data/mediarip.db`. + ## Custom Themes 1. Create a folder in your themes volume: `./themes/my-theme/` @@ -70,7 +107,7 @@ See the built-in themes in `frontend/src/themes/` for fully commented examples. ## Secure Deployment -For production with TLS: +For production with TLS, use the included Caddy reverse proxy: ```bash cp docker-compose.example.yml docker-compose.yml @@ -79,12 +116,7 @@ cp .env.example .env docker compose up -d ``` -This uses Caddy as a reverse proxy with automatic Let's Encrypt TLS. - -Generate an admin password hash: -```bash -python -c "import bcrypt; print(bcrypt.hashpw(b'YOUR_PASSWORD', bcrypt.gensalt()).decode())" -``` +Caddy automatically provisions Let's Encrypt TLS certificates for your domain. ## Development @@ -95,7 +127,7 @@ cd backend python -m venv .venv .venv/bin/pip install -r requirements.txt .venv/bin/pip install pytest pytest-asyncio pytest-anyio httpx ruff -.venv/bin/python -m pytest tests/ -v +.venv/bin/python -m pytest tests/ -v -m "not integration" ``` ### Frontend @@ -120,6 +152,7 @@ npm run build # Production build | `/api/formats` | GET | Extract available formats for a URL | | `/api/events` | GET | SSE stream for real-time progress | | `/api/cookies` | POST | Upload cookies.txt for authenticated downloads | +| `/api/cookies` | DELETE | Remove cookies.txt for current session | | `/api/themes` | GET | List available custom themes | | `/api/admin/*` | GET/POST | Admin endpoints (requires auth) | @@ -130,6 +163,7 @@ npm run build # Production build - **Transport**: Server-Sent Events for real-time progress - **Database**: SQLite with WAL mode - **Styling**: CSS custom properties (no Tailwind, no component library) +- **Container**: Multi-stage build, non-root user, amd64 + arm64 ## License diff --git a/backend/app/core/config.py b/backend/app/core/config.py index f1061f0..5d017d0 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -39,6 +39,7 @@ class ServerConfig(BaseModel): port: int = 8000 log_level: str = "info" db_path: str = "mediarip.db" + data_dir: str = "/data" class DownloadsConfig(BaseModel): diff --git a/backend/app/main.py b/backend/app/main.py index a063c68..f74e35e 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -57,6 +57,10 @@ async def lifespan(app: FastAPI): ) # --- Database --- + # Ensure data directory exists for DB and session state + data_dir = Path(config.server.data_dir) + data_dir.mkdir(parents=True, exist_ok=True) + db = await init_db(config.server.db_path) logger.info("Database initialised at %s", config.server.db_path) diff --git a/backend/app/routers/cookies.py b/backend/app/routers/cookies.py index f6f16b3..38b8b8e 100644 --- a/backend/app/routers/cookies.py +++ b/backend/app/routers/cookies.py @@ -13,12 +13,10 @@ logger = logging.getLogger("mediarip.cookies") router = APIRouter(tags=["cookies"]) -COOKIES_DIR = "data/sessions" - -def _cookie_path(output_base: str, session_id: str) -> Path: +def _cookie_path(data_dir: str, session_id: str) -> Path: """Return the cookies.txt path for a session.""" - return Path(output_base).parent / COOKIES_DIR / session_id / "cookies.txt" + return Path(data_dir) / "sessions" / session_id / "cookies.txt" @router.post("/cookies") @@ -29,7 +27,7 @@ async def upload_cookies( ) -> dict: """Upload a Netscape-format cookies.txt for the current session. - File is stored at data/sessions/{session_id}/cookies.txt. + File is stored at {data_dir}/sessions/{session_id}/cookies.txt. CRLF line endings are normalized to LF. """ content = await file.read() @@ -38,7 +36,7 @@ async def upload_cookies( text = content.decode("utf-8", errors="replace").replace("\r\n", "\n") config = request.app.state.config - cookie_file = _cookie_path(config.downloads.output_dir, session_id) + cookie_file = _cookie_path(config.server.data_dir, session_id) cookie_file.parent.mkdir(parents=True, exist_ok=True) cookie_file.write_text(text, encoding="utf-8") @@ -54,7 +52,7 @@ async def delete_cookies( ) -> dict: """Delete the cookies.txt for the current session.""" config = request.app.state.config - cookie_file = _cookie_path(config.downloads.output_dir, session_id) + cookie_file = _cookie_path(config.server.data_dir, session_id) if cookie_file.is_file(): cookie_file.unlink() @@ -64,12 +62,12 @@ async def delete_cookies( return {"status": "not_found"} -def get_cookie_path_for_session(output_dir: str, session_id: str) -> str | None: +def get_cookie_path_for_session(data_dir: str, session_id: str) -> str | None: """Return the cookies.txt path if it exists for a session, else None. Called by DownloadService to pass cookiefile to yt-dlp. """ - path = _cookie_path(output_dir, session_id) + path = _cookie_path(data_dir, session_id) if path.is_file(): return str(path) return None diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 831713a..bfff066 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -26,7 +26,10 @@ def test_config(tmp_path: Path) -> AppConfig: """Return an AppConfig with downloads.output_dir pointing at a temp dir.""" dl_dir = tmp_path / "downloads" dl_dir.mkdir() - return AppConfig(downloads={"output_dir": str(dl_dir)}) + return AppConfig( + server={"data_dir": str(tmp_path / "data")}, + downloads={"output_dir": str(dl_dir)}, + ) @pytest_asyncio.fixture() @@ -75,7 +78,7 @@ async def client(tmp_path: Path): # Build config pointing at temp resources config = AppConfig( - server={"db_path": db_path}, + server={"db_path": db_path, "data_dir": str(tmp_path / "data")}, downloads={"output_dir": str(dl_dir)}, ) diff --git a/backend/tests/test_download_service.py b/backend/tests/test_download_service.py index 1c83a78..1d5284d 100644 --- a/backend/tests/test_download_service.py +++ b/backend/tests/test_download_service.py @@ -33,7 +33,10 @@ async def download_env(tmp_path): dl_dir.mkdir() db_path = str(tmp_path / "test.db") - config = AppConfig(downloads={"output_dir": str(dl_dir)}) + config = AppConfig( + server={"data_dir": str(tmp_path / "data")}, + downloads={"output_dir": str(dl_dir)}, + ) db = await init_db(db_path) loop = asyncio.get_running_loop() broker = SSEBroker(loop) diff --git a/backend/tests/test_file_serving.py b/backend/tests/test_file_serving.py index 0797d39..1249332 100644 --- a/backend/tests/test_file_serving.py +++ b/backend/tests/test_file_serving.py @@ -24,7 +24,7 @@ async def file_client(tmp_path): dl_dir.mkdir() config = AppConfig( - server={"db_path": db_path}, + server={"db_path": db_path, "data_dir": str(tmp_path / "data")}, downloads={"output_dir": str(dl_dir)}, ) diff --git a/docker-compose.example.yml b/docker-compose.example.yml index 33117d7..8f73842 100644 --- a/docker-compose.example.yml +++ b/docker-compose.example.yml @@ -1,12 +1,10 @@ -# media.rip() — Docker Compose example with Caddy reverse proxy +# media.rip() — Docker Compose with Caddy reverse proxy (recommended for production) # -# This is the recommended deployment configuration. # Caddy automatically provisions TLS certificates via Let's Encrypt. # -# Usage: -# 1. Replace YOUR_DOMAIN with your actual domain -# 2. Set a strong admin password hash (see below) -# 3. Run: docker compose up -d +# Setup: +# 1. Copy .env.example to .env and fill in your values +# 2. Run: docker compose -f docker-compose.example.yml up -d # # Generate a bcrypt password hash: # docker run --rm python:3.12-slim python -c \ @@ -18,19 +16,21 @@ services: container_name: media-rip restart: unless-stopped volumes: - - downloads:/downloads - - data:/data - # Optional: custom themes - # - ./themes:/themes:ro - # Optional: config file - # - ./config.yaml:/app/config.yaml:ro + - downloads:/downloads # Downloaded media files + - data:/data # Database, sessions, error logs + # Optional: + # - ./themes:/themes:ro # Custom theme CSS overrides + # - ./config.yaml:/app/config.yaml:ro # YAML config file environment: - # Admin panel (optional — remove to disable) + # Admin panel MEDIARIP__ADMIN__ENABLED: "true" - MEDIARIP__ADMIN__USERNAME: "admin" + MEDIARIP__ADMIN__USERNAME: "${ADMIN_USERNAME:-admin}" MEDIARIP__ADMIN__PASSWORD_HASH: "${ADMIN_PASSWORD_HASH}" # Session mode: isolated (default), shared, or open - # MEDIARIP__SESSION__MODE: "isolated" + MEDIARIP__SESSION__MODE: "${SESSION_MODE:-isolated}" + # Auto-purge (optional) + # MEDIARIP__PURGE__ENABLED: "true" + # MEDIARIP__PURGE__MAX_AGE_HOURS: "168" expose: - "8000" healthcheck: diff --git a/docker-compose.yml b/docker-compose.yml index 7885de5..f3882a0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,6 +5,7 @@ # # The app will be available at http://localhost:8080 # Downloads are persisted in ./downloads/ +# Database + session state persisted in the mediarip-data volume. services: mediarip: @@ -13,11 +14,20 @@ services: ports: - "8080:8000" volumes: - - ./downloads:/downloads # Downloaded files - - ./themes:/themes # Custom themes (optional) - - mediarip-data:/data # Database + internal state + - ./downloads:/downloads # Downloaded media files (browsable) + - mediarip-data:/data # Database, sessions, error logs + # Optional: + # - ./themes:/themes:ro # Custom theme CSS overrides + # - ./config.yaml:/app/config.yaml:ro # YAML config file environment: - MEDIARIP__SESSION__MODE=isolated + # Admin panel (disabled by default): + # - MEDIARIP__ADMIN__ENABLED=true + # - MEDIARIP__ADMIN__USERNAME=admin + # - MEDIARIP__ADMIN__PASSWORD_HASH=$2b$12$...your.bcrypt.hash... + # Auto-purge (disabled by default): + # - MEDIARIP__PURGE__ENABLED=true + # - MEDIARIP__PURGE__MAX_AGE_HOURS=168 restart: unless-stopped healthcheck: test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/health')"]