From cd3b57a15678f5b882655382fa0107c009b161fd Mon Sep 17 00:00:00 2001 From: jlightner Date: Tue, 31 Mar 2026 16:32:25 +0000 Subject: [PATCH] fix: Clean retrigger preserves transcript_segments (pipeline input data) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deleting transcript_segments left the pipeline with nothing to process — all stages would skip immediately. Segments come from the ingest step, not from pipeline stages 2-6. Only pipeline_events and key_moments (pipeline output) are deleted during clean reprocess. --- backend/routers/pipeline.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/backend/routers/pipeline.py b/backend/routers/pipeline.py index 393c8c5..b551262 100644 --- a/backend/routers/pipeline.py +++ b/backend/routers/pipeline.py @@ -189,10 +189,10 @@ async def clean_retrigger_pipeline( await db.execute( KeyMoment.__table__.delete().where(KeyMoment.source_video_id == video_id) ) - # Delete transcript segments - await db.execute( - TranscriptSegment.__table__.delete().where(TranscriptSegment.source_video_id == video_id) - ) + # Note: transcript_segments are NOT deleted — they are the pipeline's input + # data created during ingest, not pipeline output. Deleting them would leave + # the pipeline with nothing to process. + # Reset status video.processing_status = ProcessingStatus.not_started await db.commit() @@ -200,7 +200,6 @@ async def clean_retrigger_pipeline( deleted_counts = { "pipeline_events": "cleared", "key_moments": "cleared", - "transcript_segments": "cleared", } # Best-effort Qdrant cleanup (non-blocking)