promptlooper/backend/tests/test_scorer_keyword.py

193 lines
7.1 KiB
Python

"""Tests for the KeywordScorer."""
import asyncio
import pytest
from engine.scorers.keyword import KeywordScorer
class TestKeywordScorerInit:
def test_with_required_present(self):
scorer = KeywordScorer(required_present=["hello"])
assert scorer.required_present == ["hello"]
assert scorer.required_absent == []
def test_with_required_absent(self):
scorer = KeywordScorer(required_absent=["bad"])
assert scorer.required_present == []
assert scorer.required_absent == ["bad"]
def test_with_both_lists(self):
scorer = KeywordScorer(required_present=["good"], required_absent=["bad"])
assert scorer.required_present == ["good"]
assert scorer.required_absent == ["bad"]
def test_empty_lists_raises(self):
with pytest.raises(ValueError, match="At least one of"):
KeywordScorer()
def test_both_none_raises(self):
with pytest.raises(ValueError, match="At least one of"):
KeywordScorer(required_present=None, required_absent=None)
def test_both_empty_raises(self):
with pytest.raises(ValueError, match="At least one of"):
KeywordScorer(required_present=[], required_absent=[])
def test_name_property(self):
scorer = KeywordScorer(required_present=["test"])
assert scorer.name == "keyword"
def test_is_base_scorer(self):
from engine.scorers.base import BaseScorer
scorer = KeywordScorer(required_present=["test"])
assert isinstance(scorer, BaseScorer)
def test_case_sensitive_default_false(self):
scorer = KeywordScorer(required_present=["test"])
assert scorer.case_sensitive is False
def test_case_sensitive_explicit(self):
scorer = KeywordScorer(required_present=["test"], case_sensitive=True)
assert scorer.case_sensitive is True
class TestRequiredPresent:
def test_all_present_scores_1(self):
scorer = KeywordScorer(required_present=["hello", "world"])
assert scorer.score(None, "hello world", {}) == 1.0
def test_none_present_scores_0(self):
scorer = KeywordScorer(required_present=["hello", "world"])
assert scorer.score(None, "nothing here", {}) == 0.0
def test_partial_present_scores_ratio(self):
scorer = KeywordScorer(required_present=["hello", "world"])
assert scorer.score(None, "hello there", {}) == 0.5
def test_single_keyword_present(self):
scorer = KeywordScorer(required_present=["python"])
assert scorer.score(None, "I love python", {}) == 1.0
def test_single_keyword_absent(self):
scorer = KeywordScorer(required_present=["python"])
assert scorer.score(None, "I love java", {}) == 0.0
def test_keyword_substring_match(self):
scorer = KeywordScorer(required_present=["test"])
assert scorer.score(None, "testing is important", {}) == 1.0
def test_case_insensitive_by_default(self):
scorer = KeywordScorer(required_present=["Hello", "WORLD"])
assert scorer.score(None, "hello world", {}) == 1.0
def test_case_sensitive_match(self):
scorer = KeywordScorer(required_present=["Hello"], case_sensitive=True)
assert scorer.score(None, "Hello world", {}) == 1.0
def test_case_sensitive_no_match(self):
scorer = KeywordScorer(required_present=["Hello"], case_sensitive=True)
assert scorer.score(None, "hello world", {}) == 0.0
def test_three_of_four_present(self):
scorer = KeywordScorer(required_present=["a", "b", "c", "d"])
assert scorer.score(None, "a b c", {}) == 0.75
class TestRequiredAbsent:
def test_all_absent_scores_1(self):
scorer = KeywordScorer(required_absent=["error", "fail"])
assert scorer.score(None, "success", {}) == 1.0
def test_all_present_scores_0(self):
scorer = KeywordScorer(required_absent=["error", "fail"])
assert scorer.score(None, "error and fail", {}) == 0.0
def test_partial_absent_scores_ratio(self):
scorer = KeywordScorer(required_absent=["error", "fail"])
assert scorer.score(None, "error occurred", {}) == 0.5
def test_case_insensitive_absent(self):
scorer = KeywordScorer(required_absent=["ERROR"])
assert scorer.score(None, "an error occurred", {}) == 0.0
def test_case_sensitive_absent_not_found(self):
scorer = KeywordScorer(required_absent=["ERROR"], case_sensitive=True)
assert scorer.score(None, "an error occurred", {}) == 1.0
class TestCombinedPresenceAbsence:
def test_all_satisfied(self):
scorer = KeywordScorer(
required_present=["python", "code"],
required_absent=["error", "bug"],
)
assert scorer.score(None, "python code is great", {}) == 1.0
def test_none_satisfied(self):
scorer = KeywordScorer(
required_present=["python", "code"],
required_absent=["error", "bug"],
)
assert scorer.score(None, "error and bug", {}) == 0.0
def test_mixed_satisfaction(self):
# 1 present ("python") + 1 absent ("bug" not in output) = 2/4
scorer = KeywordScorer(
required_present=["python", "code"],
required_absent=["error", "bug"],
)
assert scorer.score(None, "python error", {}) == 0.5
def test_present_satisfied_absent_not(self):
# 2 present + 0 absent satisfied = 2/3
scorer = KeywordScorer(
required_present=["hello", "world"],
required_absent=["bad"],
)
result = scorer.score(None, "hello world bad", {})
assert abs(result - 2 / 3) < 1e-9
def test_absent_satisfied_present_not(self):
# 0 present + 1 absent satisfied = 1/2
scorer = KeywordScorer(
required_present=["hello"],
required_absent=["bad"],
)
assert scorer.score(None, "nothing here", {}) == 0.5
class TestAsyncScore:
def test_async_delegates_to_sync(self):
scorer = KeywordScorer(required_present=["hello"])
result = asyncio.get_event_loop().run_until_complete(
scorer.score_async(None, "hello world", {})
)
assert result == 1.0
class TestEdgeCases:
def test_empty_output(self):
scorer = KeywordScorer(required_present=["hello"])
assert scorer.score(None, "", {}) == 0.0
def test_empty_output_with_absent(self):
scorer = KeywordScorer(required_absent=["hello"])
assert scorer.score(None, "", {}) == 1.0
def test_multiline_output(self):
scorer = KeywordScorer(required_present=["line1", "line2"])
assert scorer.score(None, "line1\nline2", {}) == 1.0
def test_special_characters_in_keyword(self):
scorer = KeywordScorer(required_present=["c++", "c#"])
assert scorer.score(None, "I know c++ and c#", {}) == 1.0
def test_context_ignored(self):
scorer = KeywordScorer(required_present=["test"])
assert scorer.score("input", "test output", {"key": "val"}) == 1.0
def test_input_data_ignored(self):
scorer = KeywordScorer(required_present=["test"])
assert scorer.score({"complex": "input"}, "test output", {}) == 1.0