feat: Pipeline runs — per-execution tracking with run-scoped events

Data model: - New pipeline_runs table (id, video_id, run_number, trigger, status, started_at, finished_at, error_stage, total_tokens) - pipeline_events gains run_id FK (nullable for backward compat) - Alembic migration 010_add_pipeline_runs Backend: - run_pipeline() creates a PipelineRun, threads run_id through all stages - _emit_event() and _make_llm_callback() accept and store run_id - Stage 6 (final) calls _finish_run() to mark complete with token totals - mark_pipeline_error marks run as error - Revoke marks running runs as cancelled - Trigger endpoints pass trigger type (manual, clean_reprocess) - New GET /admin/pipeline/runs/{video_id} — lists runs with event counts - GET /admin/pipeline/events supports ?run_id= filter Frontend: - Expanded video detail now shows RunList instead of flat EventLog - Each run is a collapsible card showing: run number, trigger type, status badge, timestamps, token count, event count - Latest run auto-expands, older runs collapsed - Legacy events (pre-run-tracking) shown as separate collapsible section - Run cards color-coded: cyan border for running, red for error, gray for cancelled - EventLog accepts optional runId prop to scope events to a single run
2026-03-31 17:13:41 +00:00 · 2026-03-31 17:13:41 +00:00 · c2db9aa011
commit c2db9aa011
parent cd3b57a156
7 changed files with 605 additions and 50 deletions
--- a/alembic/versions/010_add_pipeline_runs.py
+++ b/alembic/versions/010_add_pipeline_runs.py
@ -0,0 +1,54 @@
 """Add pipeline_runs table and run_id FK on pipeline_events.
 Each pipeline trigger creates a run. Events are scoped to runs
 for clean per-execution audit trails.
 Revision ID: 010_add_pipeline_runs
 Revises: 009_add_creator_hidden_flag
 """
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects.postgresql import UUID
 revision = "010_add_pipeline_runs"
 down_revision = "009_add_creator_hidden_flag"
 branch_labels = None
 depends_on = None
 def upgrade() -> None:
    # Create enums
    pipeline_run_trigger = sa.Enum(
        "manual", "clean_reprocess", "auto_ingest", "bulk",
        name="pipeline_run_trigger",
    )
    pipeline_run_status = sa.Enum(
        "running", "complete", "error", "cancelled",
        name="pipeline_run_status",
    )
    op.create_table(
        "pipeline_runs",
        sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
        sa.Column("video_id", UUID(as_uuid=True), sa.ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False, index=True),
        sa.Column("run_number", sa.Integer, nullable=False),
        sa.Column("trigger", pipeline_run_trigger, nullable=False),
        sa.Column("status", pipeline_run_status, nullable=False, server_default="running"),
        sa.Column("started_at", sa.DateTime, nullable=False, server_default=sa.text("now()")),
        sa.Column("finished_at", sa.DateTime, nullable=True),
        sa.Column("error_stage", sa.String(50), nullable=True),
        sa.Column("total_tokens", sa.Integer, nullable=False, server_default="0"),
    )
    # Add run_id to pipeline_events (nullable for backward compat)
    op.add_column(
        "pipeline_events",
        sa.Column("run_id", UUID(as_uuid=True), sa.ForeignKey("pipeline_runs.id", ondelete="SET NULL"), nullable=True, index=True),
    )
 def downgrade() -> None:
    op.drop_column("pipeline_events", "run_id")
    op.drop_table("pipeline_runs")
    op.execute("DROP TYPE IF EXISTS pipeline_run_trigger")
    op.execute("DROP TYPE IF EXISTS pipeline_run_status")
--- a/backend/models.py
+++ b/backend/models.py
@ -361,6 +361,62 @@ class ContentReport(Base):
    resolved_at: Mapped[datetime | None] = mapped_column(nullable=True)
 # ── Pipeline Event ───────────────────────────────────────────────────────────
 class PipelineRunStatus(str, enum.Enum):
    """Status of a pipeline run."""
    running = "running"
    complete = "complete"
    error = "error"
    cancelled = "cancelled"
 class PipelineRunTrigger(str, enum.Enum):
    """What initiated a pipeline run."""
    manual = "manual"
    clean_reprocess = "clean_reprocess"
    auto_ingest = "auto_ingest"
    bulk = "bulk"
 class PipelineRun(Base):
    """A single execution of the pipeline for a video.
    Each trigger/retrigger creates a new run. Events are scoped to a run
    via run_id, giving a clean audit trail per execution.
    """
    __tablename__ = "pipeline_runs"
    id: Mapped[uuid.UUID] = _uuid_pk()
    video_id: Mapped[uuid.UUID] = mapped_column(
        ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False, index=True,
    )
    run_number: Mapped[int] = mapped_column(
        Integer, nullable=False, doc="Auto-increment per video, 1-indexed"
    )
    trigger: Mapped[PipelineRunTrigger] = mapped_column(
        Enum(PipelineRunTrigger, name="pipeline_run_trigger", create_constraint=True),
        nullable=False,
    )
    status: Mapped[PipelineRunStatus] = mapped_column(
        Enum(PipelineRunStatus, name="pipeline_run_status", create_constraint=True),
        default=PipelineRunStatus.running,
        server_default="running",
    )
    started_at: Mapped[datetime] = mapped_column(
        default=_now, server_default=func.now()
    )
    finished_at: Mapped[datetime | None] = mapped_column(nullable=True)
    error_stage: Mapped[str | None] = mapped_column(String(50), nullable=True)
    total_tokens: Mapped[int] = mapped_column(Integer, default=0, server_default="0")
    # relationships
    video: Mapped[SourceVideo] = sa_relationship()
    events: Mapped[list[PipelineEvent]] = sa_relationship(
        back_populates="run", foreign_keys="PipelineEvent.run_id"
    )
 # ── Pipeline Event ───────────────────────────────────────────────────────────
 class PipelineEvent(Base):
@ -375,6 +431,9 @@ class PipelineEvent(Base):
    video_id: Mapped[uuid.UUID] = mapped_column(
        UUID(as_uuid=True), nullable=False, index=True,
    )
    run_id: Mapped[uuid.UUID | None] = mapped_column(
        ForeignKey("pipeline_runs.id", ondelete="SET NULL"), nullable=True, index=True,
    )
    stage: Mapped[str] = mapped_column(
        String(50), nullable=False, doc="stage2_segmentation, stage3_extraction, etc."
    )
@ -397,3 +456,8 @@ class PipelineEvent(Base):
    system_prompt_text: Mapped[str | None] = mapped_column(Text, nullable=True)
    user_prompt_text: Mapped[str | None] = mapped_column(Text, nullable=True)
    response_text: Mapped[str | None] = mapped_column(Text, nullable=True)
    # relationships
    run: Mapped[PipelineRun | None] = sa_relationship(
        back_populates="events", foreign_keys=[run_id]
    )
--- a/backend/pipeline/stages.py
+++ b/backend/pipeline/stages.py
@ -76,6 +76,7 @@ def _emit_event(
    stage: str,
    event_type: str,
    *,
    run_id: str | None = None,
    prompt_tokens: int | None = None,
    completion_tokens: int | None = None,
    total_tokens: int | None = None,
@ -92,6 +93,7 @@ def _emit_event(
        try:
            event = PipelineEvent(
                video_id=video_id,
                run_id=run_id,
                stage=stage,
                event_type=event_type,
                prompt_tokens=prompt_tokens,
@ -132,6 +134,7 @@ def _make_llm_callback(
    stage: str,
    system_prompt: str | None = None,
    user_prompt: str | None = None,
    run_id: str | None = None,
 ):
    """Create an on_complete callback for LLMClient that emits llm_call events.
@ -149,6 +152,7 @@ def _make_llm_callback(
            video_id=video_id,
            stage=stage,
            event_type="llm_call",
            run_id=run_id,
            model=model,
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
@ -285,7 +289,7 @@ def _safe_parse_llm_response(
 # ── Stage 2: Segmentation ───────────────────────────────────────────────────
@celery_app.task(bind=True, max_retries=3, default_retry_delay=30)
-def stage2_segmentation(self, video_id: str) -> str:
+def stage2_segmentation(self, video_id: str, run_id: str | None = None) -> str:
    """Analyze transcript segments and identify topic boundaries.
    Loads all TranscriptSegment rows for the video, sends them to the LLM
@ -295,7 +299,7 @@ def stage2_segmentation(self, video_id: str) -> str:
    """
    start = time.monotonic()
    logger.info("Stage 2 (segmentation) starting for video_id=%s", video_id)
-    _emit_event(video_id, "stage2_segmentation", "start")
+    _emit_event(video_id, "stage2_segmentation", "start", run_id=run_id)
    session = _get_sync_session()
    try:
@ -331,7 +335,7 @@ def stage2_segmentation(self, video_id: str) -> str:
        hard_limit = get_settings().llm_max_tokens_hard_limit
        max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage2_segmentation", hard_limit=hard_limit)
        logger.info("Stage 2 using model=%s, modality=%s, max_tokens=%d", model_override or "default", modality, max_tokens)
-        raw = llm.complete(system_prompt, user_prompt, response_model=SegmentationResult, on_complete=_make_llm_callback(video_id, "stage2_segmentation", system_prompt=system_prompt, user_prompt=user_prompt),
+        raw = llm.complete(system_prompt, user_prompt, response_model=SegmentationResult, on_complete=_make_llm_callback(video_id, "stage2_segmentation", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id),
                           modality=modality, model_override=model_override, max_tokens=max_tokens)
        result = _safe_parse_llm_response(raw, SegmentationResult, llm, system_prompt, user_prompt,
                                          modality=modality, model_override=model_override)
@ -345,7 +349,7 @@ def stage2_segmentation(self, video_id: str) -> str:
        session.commit()
        elapsed = time.monotonic() - start
-        _emit_event(video_id, "stage2_segmentation", "complete")
+        _emit_event(video_id, "stage2_segmentation", "complete", run_id=run_id)
        logger.info(
            "Stage 2 (segmentation) completed for video_id=%s in %.1fs — %d topic groups found",
            video_id, elapsed, len(result.segments),
@ -356,7 +360,7 @@ def stage2_segmentation(self, video_id: str) -> str:
        raise  # Don't retry missing prompt files
    except Exception as exc:
        session.rollback()
-        _emit_event(video_id, "stage2_segmentation", "error", payload={"error": str(exc)})
+        _emit_event(video_id, "stage2_segmentation", "error", run_id=run_id, payload={"error": str(exc)})
        logger.error("Stage 2 failed for video_id=%s: %s", video_id, exc)
        raise self.retry(exc=exc)
    finally:
@ -366,7 +370,7 @@ def stage2_segmentation(self, video_id: str) -> str:
 # ── Stage 3: Extraction ─────────────────────────────────────────────────────
@celery_app.task(bind=True, max_retries=3, default_retry_delay=30)
-def stage3_extraction(self, video_id: str) -> str:
+def stage3_extraction(self, video_id: str, run_id: str | None = None) -> str:
    """Extract key moments from each topic segment group.
    Groups segments by topic_label, calls the LLM for each group to extract
@ -376,7 +380,7 @@ def stage3_extraction(self, video_id: str) -> str:
    """
    start = time.monotonic()
    logger.info("Stage 3 (extraction) starting for video_id=%s", video_id)
-    _emit_event(video_id, "stage3_extraction", "start")
+    _emit_event(video_id, "stage3_extraction", "start", run_id=run_id)
    session = _get_sync_session()
    try:
@ -423,7 +427,7 @@ def stage3_extraction(self, video_id: str) -> str:
            )
            max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage3_extraction", hard_limit=hard_limit)
-            raw = llm.complete(system_prompt, user_prompt, response_model=ExtractionResult, on_complete=_make_llm_callback(video_id, "stage3_extraction", system_prompt=system_prompt, user_prompt=user_prompt),
+            raw = llm.complete(system_prompt, user_prompt, response_model=ExtractionResult, on_complete=_make_llm_callback(video_id, "stage3_extraction", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id),
                               modality=modality, model_override=model_override, max_tokens=max_tokens)
            result = _safe_parse_llm_response(raw, ExtractionResult, llm, system_prompt, user_prompt,
                                              modality=modality, model_override=model_override)
@ -451,7 +455,7 @@ def stage3_extraction(self, video_id: str) -> str:
        session.commit()
        elapsed = time.monotonic() - start
-        _emit_event(video_id, "stage3_extraction", "complete")
+        _emit_event(video_id, "stage3_extraction", "complete", run_id=run_id)
        logger.info(
            "Stage 3 (extraction) completed for video_id=%s in %.1fs — %d moments created",
            video_id, elapsed, total_moments,
@ -462,7 +466,7 @@ def stage3_extraction(self, video_id: str) -> str:
        raise
    except Exception as exc:
        session.rollback()
-        _emit_event(video_id, "stage3_extraction", "error", payload={"error": str(exc)})
+        _emit_event(video_id, "stage3_extraction", "error", run_id=run_id, payload={"error": str(exc)})
        logger.error("Stage 3 failed for video_id=%s: %s", video_id, exc)
        raise self.retry(exc=exc)
    finally:
@ -472,7 +476,7 @@ def stage3_extraction(self, video_id: str) -> str:
 # ── Stage 4: Classification ─────────────────────────────────────────────────
@celery_app.task(bind=True, max_retries=3, default_retry_delay=30)
-def stage4_classification(self, video_id: str) -> str:
+def stage4_classification(self, video_id: str, run_id: str | None = None) -> str:
    """Classify key moments against the canonical tag taxonomy.
    Loads all KeyMoment rows for the video, sends them to the LLM with the
@ -485,7 +489,7 @@ def stage4_classification(self, video_id: str) -> str:
    """
    start = time.monotonic()
    logger.info("Stage 4 (classification) starting for video_id=%s", video_id)
-    _emit_event(video_id, "stage4_classification", "start")
+    _emit_event(video_id, "stage4_classification", "start", run_id=run_id)
    session = _get_sync_session()
    try:
@ -532,7 +536,7 @@ def stage4_classification(self, video_id: str) -> str:
        hard_limit = get_settings().llm_max_tokens_hard_limit
        max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage4_classification", hard_limit=hard_limit)
        logger.info("Stage 4 using model=%s, modality=%s, max_tokens=%d", model_override or "default", modality, max_tokens)
-        raw = llm.complete(system_prompt, user_prompt, response_model=ClassificationResult, on_complete=_make_llm_callback(video_id, "stage4_classification", system_prompt=system_prompt, user_prompt=user_prompt),
+        raw = llm.complete(system_prompt, user_prompt, response_model=ClassificationResult, on_complete=_make_llm_callback(video_id, "stage4_classification", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id),
                           modality=modality, model_override=model_override, max_tokens=max_tokens)
        result = _safe_parse_llm_response(raw, ClassificationResult, llm, system_prompt, user_prompt,
                                          modality=modality, model_override=model_override)
@ -564,7 +568,7 @@ def stage4_classification(self, video_id: str) -> str:
        _store_classification_data(video_id, classification_data)
        elapsed = time.monotonic() - start
-        _emit_event(video_id, "stage4_classification", "complete")
+        _emit_event(video_id, "stage4_classification", "complete", run_id=run_id)
        logger.info(
            "Stage 4 (classification) completed for video_id=%s in %.1fs — %d moments classified",
            video_id, elapsed, len(classification_data),
@ -575,7 +579,7 @@ def stage4_classification(self, video_id: str) -> str:
        raise
    except Exception as exc:
        session.rollback()
-        _emit_event(video_id, "stage4_classification", "error", payload={"error": str(exc)})
+        _emit_event(video_id, "stage4_classification", "error", run_id=run_id, payload={"error": str(exc)})
        logger.error("Stage 4 failed for video_id=%s: %s", video_id, exc)
        raise self.retry(exc=exc)
    finally:
@ -694,7 +698,7 @@ def _capture_pipeline_metadata() -> dict:
 # ── Stage 5: Synthesis ───────────────────────────────────────────────────────
@celery_app.task(bind=True, max_retries=3, default_retry_delay=30)
-def stage5_synthesis(self, video_id: str) -> str:
+def stage5_synthesis(self, video_id: str, run_id: str | None = None) -> str:
    """Synthesize technique pages from classified key moments.
    Groups moments by (creator, topic_category), calls the LLM to synthesize
@ -707,7 +711,7 @@ def stage5_synthesis(self, video_id: str) -> str:
    """
    start = time.monotonic()
    logger.info("Stage 5 (synthesis) starting for video_id=%s", video_id)
-    _emit_event(video_id, "stage5_synthesis", "start")
+    _emit_event(video_id, "stage5_synthesis", "start", run_id=run_id)
    settings = get_settings()
    session = _get_sync_session()
@ -777,7 +781,7 @@ def stage5_synthesis(self, video_id: str) -> str:
            user_prompt = f"<creator>{creator_name}</creator>\n<moments>\n{moments_text}\n</moments>"
            max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage5_synthesis", hard_limit=hard_limit)
-            raw = llm.complete(system_prompt, user_prompt, response_model=SynthesisResult, on_complete=_make_llm_callback(video_id, "stage5_synthesis", system_prompt=system_prompt, user_prompt=user_prompt),
+            raw = llm.complete(system_prompt, user_prompt, response_model=SynthesisResult, on_complete=_make_llm_callback(video_id, "stage5_synthesis", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id),
                               modality=modality, model_override=model_override, max_tokens=max_tokens)
            result = _safe_parse_llm_response(raw, SynthesisResult, llm, system_prompt, user_prompt,
                                              modality=modality, model_override=model_override)
@ -885,7 +889,7 @@ def stage5_synthesis(self, video_id: str) -> str:
        session.commit()
        elapsed = time.monotonic() - start
-        _emit_event(video_id, "stage5_synthesis", "complete")
+        _emit_event(video_id, "stage5_synthesis", "complete", run_id=run_id)
        logger.info(
            "Stage 5 (synthesis) completed for video_id=%s in %.1fs — %d pages created/updated",
            video_id, elapsed, pages_created,
@ -896,7 +900,7 @@ def stage5_synthesis(self, video_id: str) -> str:
        raise
    except Exception as exc:
        session.rollback()
-        _emit_event(video_id, "stage5_synthesis", "error", payload={"error": str(exc)})
+        _emit_event(video_id, "stage5_synthesis", "error", run_id=run_id, payload={"error": str(exc)})
        logger.error("Stage 5 failed for video_id=%s: %s", video_id, exc)
        raise self.retry(exc=exc)
    finally:
@ -906,7 +910,7 @@ def stage5_synthesis(self, video_id: str) -> str:
 # ── Stage 6: Embed & Index ───────────────────────────────────────────────────
@celery_app.task(bind=True, max_retries=0)
-def stage6_embed_and_index(self, video_id: str) -> str:
+def stage6_embed_and_index(self, video_id: str, run_id: str | None = None) -> str:
    """Generate embeddings for technique pages and key moments, then upsert to Qdrant.
    This is a non-blocking side-effect stage — failures are logged but do not
@ -946,6 +950,8 @@ def stage6_embed_and_index(self, video_id: str) -> str:
        if not moments and not pages:
            logger.info("Stage 6: No moments or pages for video_id=%s, skipping.", video_id)
            if run_id:
                _finish_run(run_id, "complete")
            return video_id
        embed_client = EmbeddingClient(settings)
@ -1030,6 +1036,8 @@ def stage6_embed_and_index(self, video_id: str) -> str:
            "%d pages, %d moments processed",
            video_id, elapsed, len(pages), len(moments),
        )
        if run_id:
            _finish_run(run_id, "complete")
        return video_id
    except Exception as exc:
@ -1039,6 +1047,8 @@ def stage6_embed_and_index(self, video_id: str) -> str:
            "Pipeline continues — embeddings can be regenerated later.",
            video_id, exc,
        )
        if run_id:
            _finish_run(run_id, "complete")  # Run is still "complete" — stage6 is best-effort
        return video_id
    finally:
        session.close()
@ -1099,14 +1109,68 @@ def _load_prior_pages(video_id: str) -> list[str]:
 # ── Orchestrator ─────────────────────────────────────────────────────────────
@celery_app.task
-def mark_pipeline_error(request, exc, traceback, video_id: str) -> None:
+def mark_pipeline_error(request, exc, traceback, video_id: str, run_id: str | None = None) -> None:
    """Error callback — marks video as errored when a pipeline stage fails."""
    logger.error("Pipeline failed for video_id=%s: %s", video_id, exc)
    _set_error_status(video_id, "pipeline", exc)
    if run_id:
        _finish_run(run_id, "error", error_stage="pipeline")
 def _create_run(video_id: str, trigger: str) -> str:
    """Create a PipelineRun and return its id."""
    from models import PipelineRun, PipelineRunTrigger
    session = _get_sync_session()
    try:
        # Compute run_number: max existing + 1
        from sqlalchemy import func as sa_func
        max_num = session.execute(
            select(sa_func.coalesce(sa_func.max(PipelineRun.run_number), 0))
            .where(PipelineRun.video_id == video_id)
        ).scalar() or 0
        run = PipelineRun(
            video_id=video_id,
            run_number=max_num + 1,
            trigger=PipelineRunTrigger(trigger),
        )
        session.add(run)
        session.commit()
        run_id = str(run.id)
        return run_id
    finally:
        session.close()
 def _finish_run(run_id: str, status: str, error_stage: str | None = None) -> None:
    """Update a PipelineRun's status and finished_at."""
    from models import PipelineRun, PipelineRunStatus, _now
    session = _get_sync_session()
    try:
        run = session.execute(
            select(PipelineRun).where(PipelineRun.id == run_id)
        ).scalar_one_or_none()
        if run:
            run.status = PipelineRunStatus(status)
            run.finished_at = _now()
            if error_stage:
                run.error_stage = error_stage
            # Aggregate total tokens from events
            total = session.execute(
                select(func.coalesce(func.sum(PipelineEvent.total_tokens), 0))
                .where(PipelineEvent.run_id == run_id)
            ).scalar() or 0
            run.total_tokens = total
            session.commit()
    except Exception as exc:
        logger.warning("Failed to finish run %s: %s", run_id, exc)
    finally:
        session.close()
@celery_app.task
-def run_pipeline(video_id: str) -> str:
+def run_pipeline(video_id: str, trigger: str = "manual") -> str:
    """Orchestrate the full pipeline (stages 2-5) with resumability.
    Checks the current processing_status of the video and chains only the
@ -1139,33 +1203,35 @@ def run_pipeline(video_id: str) -> str:
    # Snapshot prior technique pages before pipeline resets key_moments
    _snapshot_prior_pages(video_id)
    # Create a pipeline run record
    run_id = _create_run(video_id, trigger)
    logger.info("run_pipeline: created run_id=%s for video_id=%s (trigger=%s)", run_id, video_id, trigger)
    # Build the chain based on current status
    stages = []
    if status in (ProcessingStatus.not_started, ProcessingStatus.queued):
        stages = [
-            stage2_segmentation.s(video_id),
+            stage2_segmentation.s(video_id, run_id=run_id),
-            stage3_extraction.s(),  # receives video_id from previous
+            stage3_extraction.s(run_id=run_id),  # receives video_id from previous
-            stage4_classification.s(),
+            stage4_classification.s(run_id=run_id),
-            stage5_synthesis.s(),
+            stage5_synthesis.s(run_id=run_id),
-            stage6_embed_and_index.s(),
+            stage6_embed_and_index.s(run_id=run_id),
        ]
    elif status == ProcessingStatus.processing:
        # Resuming a previously-started pipeline — re-run from stage 2
        stages = [
-            stage2_segmentation.s(video_id),
+            stage2_segmentation.s(video_id, run_id=run_id),
-            stage3_extraction.s(),
+            stage3_extraction.s(run_id=run_id),
-            stage4_classification.s(),
+            stage4_classification.s(run_id=run_id),
-            stage5_synthesis.s(),
+            stage5_synthesis.s(run_id=run_id),
-            stage6_embed_and_index.s(),
+            stage6_embed_and_index.s(run_id=run_id),
        ]
    elif status == ProcessingStatus.error:
        # Retrigger after error — re-run full pipeline
        stages = [
-            stage2_segmentation.s(video_id),
+            stage2_segmentation.s(video_id, run_id=run_id),
-            stage3_extraction.s(),
+            stage3_extraction.s(run_id=run_id),
-            stage4_classification.s(),
+            stage4_classification.s(run_id=run_id),
-            stage5_synthesis.s(),
+            stage5_synthesis.s(run_id=run_id),
-            stage6_embed_and_index.s(),
+            stage6_embed_and_index.s(run_id=run_id),
        ]
    elif status == ProcessingStatus.complete:
        logger.info(
@ -1187,11 +1253,11 @@ def run_pipeline(video_id: str) -> str:
            session.close()
        pipeline = celery_chain(*stages)
-        error_cb = mark_pipeline_error.s(video_id)
+        error_cb = mark_pipeline_error.s(video_id, run_id=run_id)
        pipeline.apply_async(link_error=error_cb)
        logger.info(
-            "run_pipeline: dispatched %d stages for video_id=%s",
+            "run_pipeline: dispatched %d stages for video_id=%s (run_id=%s)",
-            len(stages), video_id,
+            len(stages), video_id, run_id,
        )
    return video_id
--- a/backend/routers/pipeline.py
+++ b/backend/routers/pipeline.py
@ -13,6 +13,7 @@ Admin:
 import logging
 import uuid
 from datetime import datetime, timezone
 from typing import Annotated
 from fastapi import APIRouter, Depends, HTTPException, Query
@ -21,7 +22,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from config import get_settings
 from database import get_session
-from models import PipelineEvent, SourceVideo, Creator, KeyMoment, TranscriptSegment, ProcessingStatus
+from models import PipelineEvent, PipelineRun, PipelineRunStatus, SourceVideo, Creator, KeyMoment, TranscriptSegment, ProcessingStatus
 from redis_client import get_redis
 from schemas import DebugModeResponse, DebugModeUpdate, TokenStageSummary, TokenSummaryResponse
@ -50,7 +51,7 @@ async def trigger_pipeline(
    from pipeline.stages import run_pipeline
    try:
-        run_pipeline.delay(str(video.id))
+        run_pipeline.delay(str(video.id), trigger="manual")
        logger.info("Pipeline manually triggered for video_id=%s", video_id)
    except Exception as exc:
        logger.warning("Failed to dispatch pipeline for video_id=%s: %s", video_id, exc)
@ -88,6 +89,24 @@ async def list_pipeline_videos(
        .subquery()
    )
    # Subquery for the most recent pipeline run per video
    latest_run = (
        select(
            PipelineRun.video_id,
            PipelineRun.id.label("run_id"),
            PipelineRun.run_number,
            PipelineRun.trigger.label("run_trigger"),
            PipelineRun.status.label("run_status"),
            PipelineRun.started_at.label("run_started_at"),
            PipelineRun.finished_at.label("run_finished_at"),
            PipelineRun.error_stage.label("run_error_stage"),
            PipelineRun.total_tokens.label("run_total_tokens"),
        )
        .order_by(PipelineRun.video_id, PipelineRun.started_at.desc())
        .distinct(PipelineRun.video_id)
        .subquery()
    )
    # Subquery for the most recent stage start event per video (active stage indicator)
    latest_stage = (
        select(
@ -117,10 +136,19 @@ async def list_pipeline_videos(
            latest_stage.c.active_stage,
            latest_stage.c.active_stage_status,
            latest_stage.c.stage_started_at,
            latest_run.c.run_id,
            latest_run.c.run_number,
            latest_run.c.run_trigger,
            latest_run.c.run_status,
            latest_run.c.run_started_at,
            latest_run.c.run_finished_at,
            latest_run.c.run_error_stage,
            latest_run.c.run_total_tokens,
        )
        .join(Creator, SourceVideo.creator_id == Creator.id)
        .outerjoin(event_counts, SourceVideo.id == event_counts.c.video_id)
        .outerjoin(latest_stage, SourceVideo.id == latest_stage.c.video_id)
        .outerjoin(latest_run, SourceVideo.id == latest_run.c.video_id)
        .order_by(SourceVideo.updated_at.desc())
    )
@ -143,6 +171,16 @@ async def list_pipeline_videos(
                "active_stage": r.active_stage,
                "active_stage_status": r.active_stage_status,
                "stage_started_at": r.stage_started_at.isoformat() if r.stage_started_at else None,
                "latest_run": {
                    "id": str(r.run_id),
                    "run_number": r.run_number,
                    "trigger": r.run_trigger.value if hasattr(r.run_trigger, 'value') else r.run_trigger,
                    "status": r.run_status.value if hasattr(r.run_status, 'value') else r.run_status,
                    "started_at": r.run_started_at.isoformat() if r.run_started_at else None,
                    "finished_at": r.run_finished_at.isoformat() if r.run_finished_at else None,
                    "error_stage": r.run_error_stage,
                    "total_tokens": r.run_total_tokens or 0,
                } if r.run_id else None,
            }
            for r in rows
        ],
@ -216,7 +254,7 @@ async def clean_retrigger_pipeline(
    # Now trigger the pipeline
    from pipeline.stages import run_pipeline
    try:
-        run_pipeline.delay(str(video.id))
+        run_pipeline.delay(str(video.id), trigger="clean_reprocess")
        logger.info("Clean retrigger dispatched for video_id=%s", video_id)
    except Exception as exc:
        logger.warning("Failed to dispatch pipeline after cleanup for video_id=%s: %s", video_id, exc)
@ -235,10 +273,14 @@ async def clean_retrigger_pipeline(
 # ── Admin: Revoke ────────────────────────────────────────────────────────────
@router.post("/admin/pipeline/revoke/{video_id}")
-async def revoke_pipeline(video_id: str):
+async def revoke_pipeline(
    video_id: str,
    db: AsyncSession = Depends(get_session),
 ):
    """Revoke/cancel active Celery tasks for a video.
    Uses Celery's revoke with terminate=True to kill running tasks.
    Also marks the latest running pipeline_run as cancelled.
    This is best-effort — the task may have already completed.
    """
    from worker import celery_app
@ -257,6 +299,18 @@ async def revoke_pipeline(video_id: str):
                    revoked_count += 1
                    logger.info("Revoked task %s for video_id=%s", task["id"], video_id)
        # Mark any running pipeline_runs as cancelled
        running_runs = await db.execute(
            select(PipelineRun).where(
                PipelineRun.video_id == video_id,
                PipelineRun.status == PipelineRunStatus.running,
            )
        )
        for run in running_runs.scalars().all():
            run.status = PipelineRunStatus.cancelled
            run.finished_at = datetime.now(timezone.utc).replace(tzinfo=None)
        await db.commit()
        return {
            "status": "revoked" if revoked_count > 0 else "no_active_tasks",
            "video_id": video_id,
@ -317,6 +371,65 @@ async def recent_pipeline_activity(
    }
 # ── Admin: Pipeline runs ─────────────────────────────────────────────────────
@router.get("/admin/pipeline/runs/{video_id}")
 async def list_pipeline_runs(
    video_id: str,
    db: AsyncSession = Depends(get_session),
 ):
    """List all pipeline runs for a video, newest first."""
    # Count events per run
    event_counts = (
        select(
            PipelineEvent.run_id,
            func.count().label("event_count"),
        )
        .where(PipelineEvent.run_id.isnot(None))
        .group_by(PipelineEvent.run_id)
        .subquery()
    )
    stmt = (
        select(
            PipelineRun,
            event_counts.c.event_count,
        )
        .outerjoin(event_counts, PipelineRun.id == event_counts.c.run_id)
        .where(PipelineRun.video_id == video_id)
        .order_by(PipelineRun.started_at.desc())
    )
    result = await db.execute(stmt)
    rows = result.all()
    # Also count legacy events (run_id IS NULL) for this video
    legacy_count_result = await db.execute(
        select(func.count())
        .select_from(PipelineEvent)
        .where(PipelineEvent.video_id == video_id, PipelineEvent.run_id.is_(None))
    )
    legacy_count = legacy_count_result.scalar() or 0
    items = []
    for run, evt_count in rows:
        items.append({
            "id": str(run.id),
            "run_number": run.run_number,
            "trigger": run.trigger.value if hasattr(run.trigger, 'value') else str(run.trigger),
            "status": run.status.value if hasattr(run.status, 'value') else str(run.status),
            "started_at": run.started_at.isoformat() if run.started_at else None,
            "finished_at": run.finished_at.isoformat() if run.finished_at else None,
            "error_stage": run.error_stage,
            "total_tokens": run.total_tokens or 0,
            "event_count": evt_count or 0,
        })
    return {
        "items": items,
        "legacy_event_count": legacy_count,
    }
 # ── Admin: Event log ─────────────────────────────────────────────────────────
@router.get("/admin/pipeline/events/{video_id}")
@ -326,12 +439,15 @@ async def list_pipeline_events(
    limit: Annotated[int, Query(ge=1, le=200)] = 100,
    stage: Annotated[str | None, Query(description="Filter by stage name")] = None,
    event_type: Annotated[str | None, Query(description="Filter by event type")] = None,
    run_id: Annotated[str | None, Query(description="Filter by pipeline run ID")] = None,
    order: Annotated[str, Query(description="Sort order: asc or desc")] = "desc",
    db: AsyncSession = Depends(get_session),
 ):
    """Get pipeline events for a video. Default: newest first (desc)."""
    stmt = select(PipelineEvent).where(PipelineEvent.video_id == video_id)
    if run_id:
        stmt = stmt.where(PipelineEvent.run_id == run_id)
    if stage:
        stmt = stmt.where(PipelineEvent.stage == stage)
    if event_type:
--- a/frontend/src/App.css
+++ b/frontend/src/App.css
@ -3740,6 +3740,107 @@ a.app-footer__repo:hover {
  flex-shrink: 0;
 }
 /* ── Run List & Cards ─────────────────────────────────────────────────────── */
 .run-list {
  display: flex;
  flex-direction: column;
  gap: 0.5rem;
 }
 .run-card {
  border: 1px solid var(--color-border);
  border-radius: 8px;
  overflow: hidden;
 }
 .run-card--running {
  border-color: var(--color-accent);
 }
 .run-card--error {
  border-color: rgba(244, 67, 54, 0.4);
 }
 .run-card--cancelled {
  border-color: rgba(158, 158, 158, 0.3);
 }
 .run-card__header {
  display: flex;
  align-items: center;
  gap: 0.5rem;
  width: 100%;
  padding: 0.6rem 1rem;
  background: var(--color-surface);
  border: none;
  color: var(--color-text);
  cursor: pointer;
  font-size: 0.85rem;
  text-align: left;
 }
 .run-card__header:hover {
  background: var(--color-bg-input);
 }
 .run-card__arrow {
  flex-shrink: 0;
  width: 1rem;
  color: var(--color-text-muted);
 }
 .run-card__number {
  font-weight: 600;
  white-space: nowrap;
 }
 .run-card__trigger {
  color: var(--color-text-muted);
  font-size: 0.8rem;
  white-space: nowrap;
 }
 .run-card__time {
  color: var(--color-text-muted);
  font-size: 0.8rem;
  white-space: nowrap;
 }
 .run-card__duration {
  color: var(--color-text-muted);
  font-size: 0.75rem;
  white-space: nowrap;
 }
 .run-card__tokens {
  color: var(--color-text-muted);
  font-size: 0.8rem;
  white-space: nowrap;
  margin-left: auto;
 }
 .run-card__events {
  color: var(--color-text-muted);
  font-size: 0.8rem;
  white-space: nowrap;
 }
 .run-card__error-stage {
  color: #f44336;
  font-size: 0.8rem;
  white-space: nowrap;
 }
 .run-card__body {
  border-top: 1px solid var(--color-border);
  padding: 0.5rem;
 }
 .run-card--legacy .run-card__header {
  opacity: 0.7;
 }
 /* ── Worker Status Indicator ────────────────────────────────────────────── */
 .worker-status {
--- a/frontend/src/api/public-client.ts
+++ b/frontend/src/api/public-client.ts
@ -413,6 +413,16 @@ export interface PipelineVideoItem {
  active_stage: string | null;
  active_stage_status: string | null;
  stage_started_at: string | null;
  latest_run: {
    id: string;
    run_number: number;
    trigger: string;
    status: string;
    started_at: string | null;
    finished_at: string | null;
    error_stage: string | null;
    total_tokens: number;
  } | null;
 }
 export interface PipelineVideoListResponse {
@ -502,15 +512,37 @@ export async function fetchRecentActivity(limit = 10): Promise<RecentActivityRes
  return request<RecentActivityResponse>(`${BASE}/admin/pipeline/recent-activity?limit=${limit}`);
 }
 export interface PipelineRunItem {
  id: string;
  run_number: number;
  trigger: string;
  status: string;
  started_at: string | null;
  finished_at: string | null;
  error_stage: string | null;
  total_tokens: number;
  event_count: number;
 }
 export interface PipelineRunsResponse {
  items: PipelineRunItem[];
  legacy_event_count: number;
 }
 export async function fetchPipelineRuns(videoId: string): Promise<PipelineRunsResponse> {
  return request<PipelineRunsResponse>(`${BASE}/admin/pipeline/runs/${videoId}`);
 }
 export async function fetchPipelineEvents(
  videoId: string,
-  params: { offset?: number; limit?: number; stage?: string; event_type?: string; order?: "asc" | "desc" } = {},
+  params: { offset?: number; limit?: number; stage?: string; event_type?: string; run_id?: string; order?: "asc" | "desc" } = {},
 ): Promise<PipelineEventListResponse> {
  const qs = new URLSearchParams();
  if (params.offset !== undefined) qs.set("offset", String(params.offset));
  if (params.limit !== undefined) qs.set("limit", String(params.limit));
  if (params.stage) qs.set("stage", params.stage);
  if (params.event_type) qs.set("event_type", params.event_type);
  if (params.run_id) qs.set("run_id", params.run_id);
  if (params.order) qs.set("order", params.order);
  const query = qs.toString();
  return request<PipelineEventListResponse>(
--- a/frontend/src/pages/AdminPipeline.tsx
+++ b/frontend/src/pages/AdminPipeline.tsx
@ -9,6 +9,7 @@ import { useDocumentTitle } from "../hooks/useDocumentTitle";
 import {
  fetchPipelineVideos,
  fetchPipelineEvents,
  fetchPipelineRuns,
  fetchWorkerStatus,
  fetchDebugMode,
  setDebugMode,
@ -19,6 +20,7 @@ import {
  fetchRecentActivity,
  type PipelineVideoItem,
  type PipelineEvent,
  type PipelineRunItem,
  type WorkerStatusResponse,
  type RecentActivityItem,
 } from "../api/public-client";
@ -215,7 +217,7 @@ function DebugPayloadViewer({ event }: { event: PipelineEvent }) {
 // ── Event Log ────────────────────────────────────────────────────────────────
-function EventLog({ videoId, status }: { videoId: string; status: string }) {
+function EventLog({ videoId, status, runId }: { videoId: string; status: string; runId?: string }) {
  const [events, setEvents] = useState<PipelineEvent[]>([]);
  const [total, setTotal] = useState(0);
  const [loading, setLoading] = useState(true);
@ -233,6 +235,7 @@ function EventLog({ videoId, status }: { videoId: string; status: string }) {
        offset,
        limit,
        order: viewMode === "head" ? "asc" : "desc",
        run_id: runId,
      });
      setEvents(res.items);
      setTotal(res.total);
@ -477,6 +480,125 @@ function StatusFilter({
  );
 }
 // ── Run List ─────────────────────────────────────────────────────────────────
 const TRIGGER_LABELS: Record<string, string> = {
  manual: "Manual",
  clean_reprocess: "Clean Reprocess",
  auto_ingest: "Auto Ingest",
  bulk: "Bulk",
 };
 const RUN_STATUS_CLASS: Record<string, string> = {
  running: "pipeline-badge--active",
  complete: "pipeline-badge--success",
  error: "pipeline-badge--error",
  cancelled: "pipeline-badge--pending",
 };
 function RunList({ videoId, videoStatus }: { videoId: string; videoStatus: string }) {
  const [runs, setRuns] = useState<PipelineRunItem[]>([]);
  const [legacyCount, setLegacyCount] = useState(0);
  const [loading, setLoading] = useState(true);
  const [expandedRunId, setExpandedRunId] = useState<string | null>(null);
  const [showLegacy, setShowLegacy] = useState(false);
  const load = useCallback(async (silent = false) => {
    if (!silent) setLoading(true);
    try {
      const res = await fetchPipelineRuns(videoId);
      setRuns(res.items);
      setLegacyCount(res.legacy_event_count);
      // Auto-expand the latest run on first load
      if (!silent && res.items.length > 0 && expandedRunId === null) {
        const firstRun = res.items[0];
        if (firstRun) setExpandedRunId(firstRun.id);
      }
    } catch {
      // silently fail
    } finally {
      if (!silent) setLoading(false);
    }
  }, [videoId, expandedRunId]);
  useEffect(() => {
    void load();
  }, [load]);
  // Auto-refresh when video is processing
  useEffect(() => {
    if (videoStatus !== "processing" && videoStatus !== "queued") return;
    const id = setInterval(() => void load(true), 10_000);
    return () => clearInterval(id);
  }, [videoStatus, load]);
  if (loading) return <div className="loading">Loading runs…</div>;
  if (runs.length === 0 && legacyCount === 0) {
    return <div className="pipeline-events__empty">No pipeline runs recorded.</div>;
  }
  return (
    <div className="run-list">
      {runs.map((run) => {
        const isExpanded = expandedRunId === run.id;
        return (
          <div key={run.id} className={`run-card run-card--${run.status}`}>
            <button
              className="run-card__header"
              onClick={() => setExpandedRunId(isExpanded ? null : run.id)}
              aria-expanded={isExpanded}
            >
              <span className="run-card__arrow">{isExpanded ? "▾" : "▸"}</span>
              <span className="run-card__number">Run #{run.run_number}</span>
              <span className="run-card__trigger">{TRIGGER_LABELS[run.trigger] ?? run.trigger}</span>
              <span className={`pipeline-badge ${RUN_STATUS_CLASS[run.status] ?? ""}`}>
                {run.status}
              </span>
              <span className="run-card__time">{formatDate(run.started_at)}</span>
              {run.finished_at && (
                <span className="run-card__duration" title={`Finished: ${formatDate(run.finished_at)}`}>
                  → {formatDate(run.finished_at)}
                </span>
              )}
              <span className="run-card__tokens">{formatTokens(run.total_tokens)} tokens</span>
              <span className="run-card__events">{run.event_count} events</span>
              {run.error_stage && (
                <span className="run-card__error-stage">Failed at: {run.error_stage}</span>
              )}
            </button>
            {isExpanded && (
              <div className="run-card__body">
                <EventLog videoId={videoId} status={run.status} runId={run.id} />
              </div>
            )}
          </div>
        );
      })}
      {legacyCount > 0 && (
        <div className="run-card run-card--legacy">
          <button
            className="run-card__header"
            onClick={() => setShowLegacy((v) => !v)}
            aria-expanded={showLegacy}
          >
            <span className="run-card__arrow">{showLegacy ? "▾" : "▸"}</span>
            <span className="run-card__number">Legacy</span>
            <span className="run-card__trigger">Pre-run tracking</span>
            <span className="run-card__events">{legacyCount} events</span>
          </button>
          {showLegacy && (
            <div className="run-card__body">
              <EventLog videoId={videoId} status="complete" />
            </div>
          )}
        </div>
      )}
    </div>
  );
 }
 // ── Stage Timeline ───────────────────────────────────────────────────────────
 const PIPELINE_STAGES = [
@ -1065,7 +1187,7 @@ export default function AdminPipeline() {
                    <span>Created: {formatDate(video.created_at)}</span>
                    <span>Updated: {formatDate(video.updated_at)}</span>
                  </div>
-                  <EventLog videoId={video.id} status={video.processing_status} />
+                  <RunList videoId={video.id} videoStatus={video.processing_status} />
                </div>
              )}
            </div>