promptlooper/backend/tests/test_scorer_base.py

"""Tests for the BaseScorer abstract class."""

import asyncio
from typing import Any

import pytest

from engine.scorers.base import BaseScorer


class ConcreteScorer(BaseScorer):
    """Minimal concrete scorer for testing."""

    @property
    def name(self) -> str:
        return "concrete"

    def score(self, input_data: Any, output: str, context: dict) -> float:
        return 0.75


class AsyncOverrideScorer(BaseScorer):
    """Scorer that overrides the async variant."""

    @property
    def name(self) -> str:
        return "async_override"

    def score(self, input_data: Any, output: str, context: dict) -> float:
        return 0.5

    async def score_async(self, input_data: Any, output: str, context: dict) -> float:
        return 0.99


class ContextAwareScorer(BaseScorer):
    """Scorer that uses context dict fields."""

    @property
    def name(self) -> str:
        return "context_aware"

    def score(self, input_data: Any, output: str, context: dict) -> float:
        # Use all expected context keys
        config = context.get("config", {})
        stages = context.get("stages", [])
        ref = context.get("input_data")
        if config and stages and ref is not None:
            return 1.0
        return 0.0


class TestBaseScorerInterface:
    def test_cannot_instantiate_abstract_class(self):
        with pytest.raises(TypeError):
            BaseScorer()

    def test_concrete_scorer_has_name(self):
        scorer = ConcreteScorer()
        assert scorer.name == "concrete"

    def test_concrete_scorer_returns_float(self):
        scorer = ConcreteScorer()
        result = scorer.score("input", "output", {})
        assert isinstance(result, float)
        assert result == 0.75

    def test_score_async_defaults_to_sync(self):
        scorer = ConcreteScorer()
        result = asyncio.get_event_loop().run_until_complete(
            scorer.score_async("input", "output", {})
        )
        assert result == 0.75

    def test_score_async_override(self):
        scorer = AsyncOverrideScorer()
        sync_result = scorer.score("input", "output", {})
        async_result = asyncio.get_event_loop().run_until_complete(
            scorer.score_async("input", "output", {})
        )
        assert sync_result == 0.5
        assert async_result == 0.99

    def test_context_dict_keys(self):
        scorer = ContextAwareScorer()
        context = {
            "config": {"model": "gpt-4"},
            "stages": [{"output": "stage1 output"}],
            "input_data": "some input",
        }
        result = scorer.score("some input", "output", context)
        assert result == 1.0

    def test_context_dict_missing_keys(self):
        scorer = ContextAwareScorer()
        result = scorer.score("input", "output", {})
        assert result == 0.0

    def test_isinstance_check(self):
        scorer = ConcreteScorer()
        assert isinstance(scorer, BaseScorer)

    def test_partial_implementation_raises(self):
        """A class that only implements name but not score cannot be instantiated."""

        class PartialScorer(BaseScorer):
            @property
            def name(self) -> str:
                return "partial"

        with pytest.raises(TypeError):
            PartialScorer()