fix: Clean retrigger preserves transcript_segments (pipeline input data)

Deleting transcript_segments left the pipeline with nothing to process —
all stages would skip immediately. Segments come from the ingest step,
not from pipeline stages 2-6. Only pipeline_events and key_moments
(pipeline output) are deleted during clean reprocess.
This commit is contained in:
jlightner 2026-03-31 16:32:25 +00:00
parent 6f1c7dae00
commit cd3b57a156

View file

@ -189,10 +189,10 @@ async def clean_retrigger_pipeline(
await db.execute( await db.execute(
KeyMoment.__table__.delete().where(KeyMoment.source_video_id == video_id) KeyMoment.__table__.delete().where(KeyMoment.source_video_id == video_id)
) )
# Delete transcript segments # Note: transcript_segments are NOT deleted — they are the pipeline's input
await db.execute( # data created during ingest, not pipeline output. Deleting them would leave
TranscriptSegment.__table__.delete().where(TranscriptSegment.source_video_id == video_id) # the pipeline with nothing to process.
)
# Reset status # Reset status
video.processing_status = ProcessingStatus.not_started video.processing_status = ProcessingStatus.not_started
await db.commit() await db.commit()
@ -200,7 +200,6 @@ async def clean_retrigger_pipeline(
deleted_counts = { deleted_counts = {
"pipeline_events": "cleared", "pipeline_events": "cleared",
"key_moments": "cleared", "key_moments": "cleared",
"transcript_segments": "cleared",
} }
# Best-effort Qdrant cleanup (non-blocking) # Best-effort Qdrant cleanup (non-blocking)