promptlooper/backend/engine/cache.py

"""Response cache layer for PromptLooper.

Caches LLM responses by a SHA-256 hash of the full configuration
(prompt + model + params + input_data) to avoid redundant API calls.
"""

import hashlib
import json
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any

from sqlalchemy import func, text
from sqlalchemy.orm import Session

from models import ResponseCache


@dataclass
class CachedResponse:
    """A cached LLM response retrieved from the database."""

    config_hash: str
    response: str
    model: str
    tokens_in: int | None
    tokens_out: int | None
    latency_ms: int | None
    created_at: datetime


@dataclass
class CacheStats:
    """Cache statistics."""

    total_entries: int
    hit_rate: float
    storage_size_bytes: int


def compute_config_hash(
    prompt: str,
    model: str,
    params: dict[str, Any],
    input_data: Any = None,
) -> str:
    """Compute a deterministic SHA-256 hash for a given configuration.

    The hash covers the full config so that any parameter change produces
    a different key.  Dict keys are sorted for determinism.
    """
    payload = {
        "prompt": prompt,
        "model": model,
        "params": params,
        "input_data": input_data,
    }
    canonical = json.dumps(payload, sort_keys=True, ensure_ascii=True, default=str)
    return hashlib.sha256(canonical.encode("utf-8")).hexdigest()


class ResponseCacheLayer:
    """Database-backed response cache.

    Works with both SQLite and PostgreSQL — the caller provides a
    SQLAlchemy session.
    """

    def __init__(self) -> None:
        self._hits: int = 0
        self._misses: int = 0

    def get(self, db: Session, config_hash: str) -> CachedResponse | None:
        """Look up a cached response by config hash.

        Returns None on cache miss.
        """
        row = db.get(ResponseCache, config_hash)
        if row is None:
            self._misses += 1
            return None

        self._hits += 1
        return CachedResponse(
            config_hash=row.config_hash,
            response=row.response,
            model=row.model,
            tokens_in=row.tokens_in,
            tokens_out=row.tokens_out,
            latency_ms=row.latency_ms,
            created_at=row.created_at,
        )

    def put(
        self,
        db: Session,
        config_hash: str,
        response: str,
        model: str,
        tokens_in: int | None = None,
        tokens_out: int | None = None,
        latency_ms: int | None = None,
        metadata: dict[str, Any] | None = None,
    ) -> None:
        """Store a response in the cache.

        If the config_hash already exists, the entry is updated (upsert).
        """
        existing = db.get(ResponseCache, config_hash)
        if existing is not None:
            existing.response = response
            existing.model = model
            existing.tokens_in = tokens_in
            existing.tokens_out = tokens_out
            existing.latency_ms = latency_ms
        else:
            entry = ResponseCache(
                config_hash=config_hash,
                response=response,
                model=model,
                tokens_in=tokens_in,
                tokens_out=tokens_out,
                latency_ms=latency_ms,
            )
            db.add(entry)
        db.commit()

    def cache_stats(self, db: Session) -> CacheStats:
        """Return cache statistics: hit rate, total entries, storage size."""
        total: int = db.query(func.count(ResponseCache.config_hash)).scalar() or 0

        total_lookups = self._hits + self._misses
        hit_rate = self._hits / total_lookups if total_lookups > 0 else 0.0

        # Approximate storage: sum of response text lengths.
        # For SQLite, length() returns character count; for Postgres, octet_length
        # would be more accurate, but length() works everywhere.
        size: int = (
            db.query(func.sum(func.length(ResponseCache.response))).scalar() or 0
        )

        return CacheStats(
            total_entries=total,
            hit_rate=hit_rate,
            storage_size_bytes=size,
        )