Adds backend/engine/scorers/base.py with abstract name property, score() method, and score_async() default implementation. Updates scorers __init__.py to export BaseScorer. Includes 9 tests covering instantiation guards, sync/async dispatch, context dict usage, and partial implementation rejection.
112 lines
3.2 KiB
Python
112 lines
3.2 KiB
Python
"""Tests for the BaseScorer abstract class."""
|
|
|
|
import asyncio
|
|
from typing import Any
|
|
|
|
import pytest
|
|
|
|
from engine.scorers.base import BaseScorer
|
|
|
|
|
|
class ConcreteScorer(BaseScorer):
|
|
"""Minimal concrete scorer for testing."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "concrete"
|
|
|
|
def score(self, input_data: Any, output: str, context: dict) -> float:
|
|
return 0.75
|
|
|
|
|
|
class AsyncOverrideScorer(BaseScorer):
|
|
"""Scorer that overrides the async variant."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "async_override"
|
|
|
|
def score(self, input_data: Any, output: str, context: dict) -> float:
|
|
return 0.5
|
|
|
|
async def score_async(self, input_data: Any, output: str, context: dict) -> float:
|
|
return 0.99
|
|
|
|
|
|
class ContextAwareScorer(BaseScorer):
|
|
"""Scorer that uses context dict fields."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "context_aware"
|
|
|
|
def score(self, input_data: Any, output: str, context: dict) -> float:
|
|
# Use all expected context keys
|
|
config = context.get("config", {})
|
|
stages = context.get("stages", [])
|
|
ref = context.get("input_data")
|
|
if config and stages and ref is not None:
|
|
return 1.0
|
|
return 0.0
|
|
|
|
|
|
class TestBaseScorerInterface:
|
|
def test_cannot_instantiate_abstract_class(self):
|
|
with pytest.raises(TypeError):
|
|
BaseScorer()
|
|
|
|
def test_concrete_scorer_has_name(self):
|
|
scorer = ConcreteScorer()
|
|
assert scorer.name == "concrete"
|
|
|
|
def test_concrete_scorer_returns_float(self):
|
|
scorer = ConcreteScorer()
|
|
result = scorer.score("input", "output", {})
|
|
assert isinstance(result, float)
|
|
assert result == 0.75
|
|
|
|
def test_score_async_defaults_to_sync(self):
|
|
scorer = ConcreteScorer()
|
|
result = asyncio.get_event_loop().run_until_complete(
|
|
scorer.score_async("input", "output", {})
|
|
)
|
|
assert result == 0.75
|
|
|
|
def test_score_async_override(self):
|
|
scorer = AsyncOverrideScorer()
|
|
sync_result = scorer.score("input", "output", {})
|
|
async_result = asyncio.get_event_loop().run_until_complete(
|
|
scorer.score_async("input", "output", {})
|
|
)
|
|
assert sync_result == 0.5
|
|
assert async_result == 0.99
|
|
|
|
def test_context_dict_keys(self):
|
|
scorer = ContextAwareScorer()
|
|
context = {
|
|
"config": {"model": "gpt-4"},
|
|
"stages": [{"output": "stage1 output"}],
|
|
"input_data": "some input",
|
|
}
|
|
result = scorer.score("some input", "output", context)
|
|
assert result == 1.0
|
|
|
|
def test_context_dict_missing_keys(self):
|
|
scorer = ContextAwareScorer()
|
|
result = scorer.score("input", "output", {})
|
|
assert result == 0.0
|
|
|
|
def test_isinstance_check(self):
|
|
scorer = ConcreteScorer()
|
|
assert isinstance(scorer, BaseScorer)
|
|
|
|
def test_partial_implementation_raises(self):
|
|
"""A class that only implements name but not score cannot be instantiated."""
|
|
|
|
class PartialScorer(BaseScorer):
|
|
@property
|
|
def name(self) -> str:
|
|
return "partial"
|
|
|
|
with pytest.raises(TypeError):
|
|
PartialScorer()
|