"""Tests for the highlight scoring engine. Verifies heuristic scoring produces sensible orderings and handles edge cases gracefully. """ from __future__ import annotations import pytest from backend.pipeline.highlight_scorer import ( _content_type_weight, _duration_fitness, _plugin_richness, _source_quality_weight, _specificity_density, _transcript_energy, _video_type_weight, score_moment, ) # ── Fixture helpers ────────────────────────────────────────────────────────── def _ideal_moment() -> dict: """45s technique moment, 3 plugins, specific summary, structured source.""" return dict( start_time=10.0, end_time=55.0, # 45s duration content_type="technique", summary=( "Set the compressor threshold to -18 dB with a 4:1 ratio, " "then boost the high shelf at 12 kHz by 3.5 dB using FabFilter Pro-Q 3." ), plugins=["FabFilter Pro-Q 3", "SSL G-Bus Compressor", "Valhalla Room"], raw_transcript=( "The trick is to set the threshold low enough. Notice how " "the compressor grabs the transients. Because we want to preserve " "the dynamics, I always back off the ratio. The key is finding " "that sweet spot where it's controlling but not squashing." ), source_quality="structured", video_content_type="tutorial", ) def _mediocre_moment() -> dict: """90s settings moment, 1 plugin, decent summary, mixed source.""" return dict( start_time=120.0, end_time=210.0, # 90s duration content_type="settings", summary="Adjust the EQ settings for the vocal track to get a clearer sound.", plugins=["FabFilter Pro-Q 3"], raw_transcript=( "So here we're just going to adjust this. I think it sounds " "better when we cut some of the low end. Let me show you what " "I mean. Yeah, that's better." ), source_quality="mixed", video_content_type="breakdown", ) def _poor_moment() -> dict: """300s reasoning moment, 0 plugins, vague summary, unstructured source.""" return dict( start_time=0.0, end_time=300.0, # 300s duration → zero for duration_fitness content_type="reasoning", summary="General discussion about mixing philosophy and approach.", plugins=[], raw_transcript=( "I think mixing is really about taste. Everyone has their own " "approach. Some people like it loud, some people like it quiet. " "There's no right or wrong way to do it really." ), source_quality="unstructured", video_content_type="livestream", ) # ── Tests ──────────────────────────────────────────────────────────────────── class TestScoreMoment: def test_ideal_moment_scores_high(self): result = score_moment(**_ideal_moment()) assert result["score"] > 0.7, f"Ideal moment scored {result['score']}, expected > 0.7" def test_poor_moment_scores_low(self): result = score_moment(**_poor_moment()) assert result["score"] < 0.4, f"Poor moment scored {result['score']}, expected < 0.4" def test_ordering_is_sensible(self): ideal = score_moment(**_ideal_moment()) mediocre = score_moment(**_mediocre_moment()) poor = score_moment(**_poor_moment()) assert ideal["score"] > mediocre["score"] > poor["score"], ( f"Expected ideal ({ideal['score']:.3f}) > " f"mediocre ({mediocre['score']:.3f}) > " f"poor ({poor['score']:.3f})" ) def test_score_bounds(self): """All scores in [0.0, 1.0] for edge cases.""" edge_cases = [ dict(start_time=0, end_time=0, summary="", plugins=None, raw_transcript=None), dict(start_time=0, end_time=500, summary=None, plugins=[], raw_transcript=""), dict(start_time=0, end_time=45, summary="x" * 10000, plugins=["a"] * 100), dict(start_time=100, end_time=100), # zero duration ] for kwargs in edge_cases: result = score_moment(**kwargs) assert 0.0 <= result["score"] <= 1.0, f"Score {result['score']} out of bounds for {kwargs}" for dim, val in result["score_breakdown"].items(): assert 0.0 <= val <= 1.0, f"{dim}={val} out of bounds for {kwargs}" def test_missing_optional_fields(self): """None raw_transcript and None plugins don't crash.""" result = score_moment( start_time=10.0, end_time=55.0, content_type="technique", summary="A summary.", plugins=None, raw_transcript=None, source_quality=None, video_content_type=None, ) assert 0.0 <= result["score"] <= 1.0 assert result["duration_secs"] == 45.0 assert len(result["score_breakdown"]) == 7 def test_returns_duration_secs(self): result = score_moment(start_time=10.0, end_time=55.0) assert result["duration_secs"] == 45.0 def test_breakdown_has_seven_dimensions(self): result = score_moment(**_ideal_moment()) assert len(result["score_breakdown"]) == 7 expected_keys = { "duration_score", "content_density_score", "technique_relevance_score", "plugin_diversity_score", "engagement_proxy_score", "position_score", "uniqueness_score", } assert set(result["score_breakdown"].keys()) == expected_keys class TestDurationFitness: def test_bell_curve_peak(self): """45s scores higher than 10s, 10s scores higher than 400s.""" assert _duration_fitness(45) > _duration_fitness(10) assert _duration_fitness(10) > _duration_fitness(400) def test_sweet_spot(self): assert _duration_fitness(30) == 1.0 assert _duration_fitness(45) == 1.0 assert _duration_fitness(60) == 1.0 def test_zero_at_extremes(self): assert _duration_fitness(0) == 0.0 assert _duration_fitness(300) == 0.0 assert _duration_fitness(500) == 0.0 def test_negative_duration(self): assert _duration_fitness(-10) == 0.0 class TestContentTypeWeight: def test_technique_highest(self): assert _content_type_weight("technique") == 1.0 def test_reasoning_lowest_known(self): assert _content_type_weight("reasoning") == 0.4 def test_unknown_gets_default(self): assert _content_type_weight("unknown") == 0.5 assert _content_type_weight(None) == 0.5 class TestSpecificityDensity: def test_specific_summary_scores_high(self): summary = "Set threshold to -18 dB with 4:1 ratio, boost 12 kHz by 3.5 dB" score = _specificity_density(summary) assert score > 0.5 def test_vague_summary_scores_low(self): score = _specificity_density("General discussion about mixing philosophy.") assert score < 0.3 def test_empty_returns_zero(self): assert _specificity_density("") == 0.0 assert _specificity_density(None) == 0.0 class TestPluginRichness: def test_three_plugins_maxes_out(self): assert _plugin_richness(["a", "b", "c"]) == 1.0 def test_more_than_three_capped(self): assert _plugin_richness(["a", "b", "c", "d"]) == 1.0 def test_empty(self): assert _plugin_richness([]) == 0.0 assert _plugin_richness(None) == 0.0 class TestTranscriptEnergy: def test_teaching_phrases_score_high(self): transcript = ( "The trick is to notice how the compressor behaves. " "Because we want dynamics, I always set it gently. The key is balance." ) score = _transcript_energy(transcript) assert score > 0.5 def test_bland_transcript_scores_low(self): transcript = "And then we adjust this slider here. Okay that sounds fine." score = _transcript_energy(transcript) assert score < 0.3 def test_empty(self): assert _transcript_energy("") == 0.0 assert _transcript_energy(None) == 0.0 class TestSourceQualityWeight: def test_structured_highest(self): assert _source_quality_weight("structured") == 1.0 def test_none_default(self): assert _source_quality_weight(None) == 0.5 class TestVideoTypeWeight: def test_tutorial_highest(self): assert _video_type_weight("tutorial") == 1.0 def test_short_form_lowest(self): assert _video_type_weight("short_form") == 0.3 def test_none_default(self): assert _video_type_weight(None) == 0.5