193 lines
7.1 KiB
Python
193 lines
7.1 KiB
Python
"""Tests for the KeywordScorer."""
|
|
|
|
import asyncio
|
|
|
|
import pytest
|
|
|
|
from engine.scorers.keyword import KeywordScorer
|
|
|
|
|
|
class TestKeywordScorerInit:
|
|
def test_with_required_present(self):
|
|
scorer = KeywordScorer(required_present=["hello"])
|
|
assert scorer.required_present == ["hello"]
|
|
assert scorer.required_absent == []
|
|
|
|
def test_with_required_absent(self):
|
|
scorer = KeywordScorer(required_absent=["bad"])
|
|
assert scorer.required_present == []
|
|
assert scorer.required_absent == ["bad"]
|
|
|
|
def test_with_both_lists(self):
|
|
scorer = KeywordScorer(required_present=["good"], required_absent=["bad"])
|
|
assert scorer.required_present == ["good"]
|
|
assert scorer.required_absent == ["bad"]
|
|
|
|
def test_empty_lists_raises(self):
|
|
with pytest.raises(ValueError, match="At least one of"):
|
|
KeywordScorer()
|
|
|
|
def test_both_none_raises(self):
|
|
with pytest.raises(ValueError, match="At least one of"):
|
|
KeywordScorer(required_present=None, required_absent=None)
|
|
|
|
def test_both_empty_raises(self):
|
|
with pytest.raises(ValueError, match="At least one of"):
|
|
KeywordScorer(required_present=[], required_absent=[])
|
|
|
|
def test_name_property(self):
|
|
scorer = KeywordScorer(required_present=["test"])
|
|
assert scorer.name == "keyword"
|
|
|
|
def test_is_base_scorer(self):
|
|
from engine.scorers.base import BaseScorer
|
|
scorer = KeywordScorer(required_present=["test"])
|
|
assert isinstance(scorer, BaseScorer)
|
|
|
|
def test_case_sensitive_default_false(self):
|
|
scorer = KeywordScorer(required_present=["test"])
|
|
assert scorer.case_sensitive is False
|
|
|
|
def test_case_sensitive_explicit(self):
|
|
scorer = KeywordScorer(required_present=["test"], case_sensitive=True)
|
|
assert scorer.case_sensitive is True
|
|
|
|
|
|
class TestRequiredPresent:
|
|
def test_all_present_scores_1(self):
|
|
scorer = KeywordScorer(required_present=["hello", "world"])
|
|
assert scorer.score(None, "hello world", {}) == 1.0
|
|
|
|
def test_none_present_scores_0(self):
|
|
scorer = KeywordScorer(required_present=["hello", "world"])
|
|
assert scorer.score(None, "nothing here", {}) == 0.0
|
|
|
|
def test_partial_present_scores_ratio(self):
|
|
scorer = KeywordScorer(required_present=["hello", "world"])
|
|
assert scorer.score(None, "hello there", {}) == 0.5
|
|
|
|
def test_single_keyword_present(self):
|
|
scorer = KeywordScorer(required_present=["python"])
|
|
assert scorer.score(None, "I love python", {}) == 1.0
|
|
|
|
def test_single_keyword_absent(self):
|
|
scorer = KeywordScorer(required_present=["python"])
|
|
assert scorer.score(None, "I love java", {}) == 0.0
|
|
|
|
def test_keyword_substring_match(self):
|
|
scorer = KeywordScorer(required_present=["test"])
|
|
assert scorer.score(None, "testing is important", {}) == 1.0
|
|
|
|
def test_case_insensitive_by_default(self):
|
|
scorer = KeywordScorer(required_present=["Hello", "WORLD"])
|
|
assert scorer.score(None, "hello world", {}) == 1.0
|
|
|
|
def test_case_sensitive_match(self):
|
|
scorer = KeywordScorer(required_present=["Hello"], case_sensitive=True)
|
|
assert scorer.score(None, "Hello world", {}) == 1.0
|
|
|
|
def test_case_sensitive_no_match(self):
|
|
scorer = KeywordScorer(required_present=["Hello"], case_sensitive=True)
|
|
assert scorer.score(None, "hello world", {}) == 0.0
|
|
|
|
def test_three_of_four_present(self):
|
|
scorer = KeywordScorer(required_present=["a", "b", "c", "d"])
|
|
assert scorer.score(None, "a b c", {}) == 0.75
|
|
|
|
|
|
class TestRequiredAbsent:
|
|
def test_all_absent_scores_1(self):
|
|
scorer = KeywordScorer(required_absent=["error", "fail"])
|
|
assert scorer.score(None, "success", {}) == 1.0
|
|
|
|
def test_all_present_scores_0(self):
|
|
scorer = KeywordScorer(required_absent=["error", "fail"])
|
|
assert scorer.score(None, "error and fail", {}) == 0.0
|
|
|
|
def test_partial_absent_scores_ratio(self):
|
|
scorer = KeywordScorer(required_absent=["error", "fail"])
|
|
assert scorer.score(None, "error occurred", {}) == 0.5
|
|
|
|
def test_case_insensitive_absent(self):
|
|
scorer = KeywordScorer(required_absent=["ERROR"])
|
|
assert scorer.score(None, "an error occurred", {}) == 0.0
|
|
|
|
def test_case_sensitive_absent_not_found(self):
|
|
scorer = KeywordScorer(required_absent=["ERROR"], case_sensitive=True)
|
|
assert scorer.score(None, "an error occurred", {}) == 1.0
|
|
|
|
|
|
class TestCombinedPresenceAbsence:
|
|
def test_all_satisfied(self):
|
|
scorer = KeywordScorer(
|
|
required_present=["python", "code"],
|
|
required_absent=["error", "bug"],
|
|
)
|
|
assert scorer.score(None, "python code is great", {}) == 1.0
|
|
|
|
def test_none_satisfied(self):
|
|
scorer = KeywordScorer(
|
|
required_present=["python", "code"],
|
|
required_absent=["error", "bug"],
|
|
)
|
|
assert scorer.score(None, "error and bug", {}) == 0.0
|
|
|
|
def test_mixed_satisfaction(self):
|
|
# 1 present ("python") + 1 absent ("bug" not in output) = 2/4
|
|
scorer = KeywordScorer(
|
|
required_present=["python", "code"],
|
|
required_absent=["error", "bug"],
|
|
)
|
|
assert scorer.score(None, "python error", {}) == 0.5
|
|
|
|
def test_present_satisfied_absent_not(self):
|
|
# 2 present + 0 absent satisfied = 2/3
|
|
scorer = KeywordScorer(
|
|
required_present=["hello", "world"],
|
|
required_absent=["bad"],
|
|
)
|
|
result = scorer.score(None, "hello world bad", {})
|
|
assert abs(result - 2 / 3) < 1e-9
|
|
|
|
def test_absent_satisfied_present_not(self):
|
|
# 0 present + 1 absent satisfied = 1/2
|
|
scorer = KeywordScorer(
|
|
required_present=["hello"],
|
|
required_absent=["bad"],
|
|
)
|
|
assert scorer.score(None, "nothing here", {}) == 0.5
|
|
|
|
|
|
class TestAsyncScore:
|
|
def test_async_delegates_to_sync(self):
|
|
scorer = KeywordScorer(required_present=["hello"])
|
|
result = asyncio.get_event_loop().run_until_complete(
|
|
scorer.score_async(None, "hello world", {})
|
|
)
|
|
assert result == 1.0
|
|
|
|
|
|
class TestEdgeCases:
|
|
def test_empty_output(self):
|
|
scorer = KeywordScorer(required_present=["hello"])
|
|
assert scorer.score(None, "", {}) == 0.0
|
|
|
|
def test_empty_output_with_absent(self):
|
|
scorer = KeywordScorer(required_absent=["hello"])
|
|
assert scorer.score(None, "", {}) == 1.0
|
|
|
|
def test_multiline_output(self):
|
|
scorer = KeywordScorer(required_present=["line1", "line2"])
|
|
assert scorer.score(None, "line1\nline2", {}) == 1.0
|
|
|
|
def test_special_characters_in_keyword(self):
|
|
scorer = KeywordScorer(required_present=["c++", "c#"])
|
|
assert scorer.score(None, "I know c++ and c#", {}) == 1.0
|
|
|
|
def test_context_ignored(self):
|
|
scorer = KeywordScorer(required_present=["test"])
|
|
assert scorer.score("input", "test output", {"key": "val"}) == 1.0
|
|
|
|
def test_input_data_ignored(self):
|
|
scorer = KeywordScorer(required_present=["test"])
|
|
assert scorer.score({"complex": "input"}, "test output", {}) == 1.0
|