promptlooper/backend/engine/scorers/base.py

"""Base scorer abstract class for PromptLooper scoring framework."""

from abc import ABC, abstractmethod
from typing import Any


class BaseScorer(ABC):
    """Abstract base class for all scorers.

    Scorers evaluate LLM outputs and return a float score in the 0.0–1.0 range.
    """

    @property
    @abstractmethod
    def name(self) -> str:
        """Unique identifier for this scorer."""
        ...

    @abstractmethod
    def score(self, input_data: Any, output: str, context: dict) -> float:
        """Score an LLM output.

        Args:
            input_data: The original input fed to the experiment.
            output: The LLM-generated output text to evaluate.
            context: Dict containing:
                - config: The experiment configuration dict.
                - stages: List of completed stage result dicts.
                - input_data: Same as the input_data argument (for convenience).
                Implementations may also receive reference data or other
                experiment-specific keys.

        Returns:
            A float between 0.0 and 1.0 (inclusive).
        """
        ...

    async def score_async(self, input_data: Any, output: str, context: dict) -> float:
        """Async variant of score.

        The default implementation delegates to the synchronous ``score`` method.
        Override this in scorers that need to perform async I/O (e.g. LLM calls,
        HTTP requests).
        """
        return self.score(input_data, output, context)