Full webhook system: CRUD endpoints (list/filter/get/create/update/delete), WebhookDelivery model for delivery audit trail, dispatch engine with 3-attempt retry and exponential backoff, Celery task integration with sync fallback, and webhook firing hooks in runner.py and sweep.py event paths.
301 lines
10 KiB
Python
301 lines
10 KiB
Python
"""Individual run execution for PromptLooper.
|
|
|
|
Executes a single Run by iterating through pipeline stages, rendering
|
|
prompts via Jinja2, checking/storing cache, calling the LLM adapter,
|
|
scoring results, and publishing progress events.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from typing import Any
|
|
|
|
from jinja2 import BaseLoader, Environment
|
|
from sqlalchemy.orm import Session
|
|
|
|
from engine.adapters.base import AdapterResponse, BaseAdapter
|
|
from engine.cache import ResponseCacheLayer, compute_config_hash
|
|
from models import Run, RunStatus, Score, StageResult
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Jinja2 environment with sandboxed string loader (no filesystem access)
|
|
_jinja_env = Environment(loader=BaseLoader(), autoescape=False)
|
|
|
|
|
|
class EventBus:
|
|
"""Simple event publisher that uses Redis pub/sub when available,
|
|
or falls back to in-process callbacks for single-container mode."""
|
|
|
|
def __init__(self, redis_client: Any | None = None, channel: str = "promptlooper:events"):
|
|
self._redis = redis_client
|
|
self._channel = channel
|
|
self._listeners: list[Any] = []
|
|
|
|
def add_listener(self, callback: Any) -> None:
|
|
self._listeners.append(callback)
|
|
|
|
def publish(self, event: dict[str, Any]) -> None:
|
|
payload = json.dumps(event, default=str)
|
|
if self._redis is not None:
|
|
try:
|
|
self._redis.publish(self._channel, payload)
|
|
except Exception:
|
|
logger.warning("Failed to publish event to Redis", exc_info=True)
|
|
for listener in self._listeners:
|
|
try:
|
|
listener(event)
|
|
except Exception:
|
|
logger.warning("Event listener error", exc_info=True)
|
|
|
|
|
|
def render_prompt(template_str: str, context: dict[str, Any]) -> str:
|
|
"""Render a Jinja2 template string with the given context."""
|
|
template = _jinja_env.from_string(template_str)
|
|
return template.render(**context)
|
|
|
|
|
|
def _build_stage_context(
|
|
stage_index: int,
|
|
stage_results: list[dict[str, Any]],
|
|
run_config: dict[str, Any],
|
|
input_data: Any = None,
|
|
) -> dict[str, Any]:
|
|
"""Build the Jinja2 template context for a given stage.
|
|
|
|
Available variables in templates:
|
|
- input: the original input data
|
|
- config: the run configuration
|
|
- stages: list of previous stage outputs (dicts with 'output', 'model', etc.)
|
|
- prev_output: the output text of the immediately preceding stage (or empty string)
|
|
- stage_index: current 0-based stage index
|
|
"""
|
|
prev_output = ""
|
|
if stage_results:
|
|
prev_output = stage_results[-1].get("output", "")
|
|
|
|
return {
|
|
"input": input_data or "",
|
|
"config": run_config,
|
|
"stages": stage_results,
|
|
"prev_output": prev_output,
|
|
"stage_index": stage_index,
|
|
}
|
|
|
|
|
|
async def run_single(
|
|
db: Session,
|
|
run: Run,
|
|
adapter: BaseAdapter,
|
|
cache: ResponseCacheLayer,
|
|
scorers: list[Any] | None = None,
|
|
event_bus: EventBus | None = None,
|
|
) -> Run:
|
|
"""Execute a single Run through all its pipeline stages.
|
|
|
|
Args:
|
|
db: SQLAlchemy session.
|
|
run: The Run ORM object (must already be persisted with config).
|
|
adapter: LLM adapter to use for completions.
|
|
cache: Response cache layer.
|
|
scorers: Optional list of scorer instances (must have .name and .score()).
|
|
event_bus: Optional event publisher for progress updates.
|
|
|
|
Returns:
|
|
The updated Run object.
|
|
"""
|
|
scorers = scorers or []
|
|
|
|
# Extract pipeline stages from config
|
|
config = run.config or {}
|
|
stages = config.get("pipeline_stages", [])
|
|
if not stages:
|
|
# Single-stage fallback: use prompt + model + params from config directly
|
|
stages = [
|
|
{
|
|
"prompt_template": config.get("prompt", config.get("prompt_template", "")),
|
|
"model": config.get("model", ""),
|
|
"params": config.get("params", {}),
|
|
}
|
|
]
|
|
|
|
input_data = config.get("input_data")
|
|
|
|
# Mark run as running
|
|
run.status = RunStatus.running
|
|
run.started_at = datetime.now(timezone.utc)
|
|
db.commit()
|
|
|
|
if event_bus:
|
|
event_bus.publish({
|
|
"type": "run.started",
|
|
"run_id": str(run.id),
|
|
"experiment_id": str(run.experiment_id),
|
|
})
|
|
|
|
total_tokens_in = 0
|
|
total_tokens_out = 0
|
|
completed_stages: list[dict[str, Any]] = []
|
|
t_start = time.perf_counter()
|
|
|
|
try:
|
|
for stage_index, stage_def in enumerate(stages):
|
|
prompt_template = stage_def.get("prompt_template", stage_def.get("prompt", ""))
|
|
model = stage_def.get("model", config.get("model", ""))
|
|
params = stage_def.get("params", config.get("params", {}))
|
|
|
|
# Build context and render prompt
|
|
template_ctx = _build_stage_context(
|
|
stage_index, completed_stages, config, input_data
|
|
)
|
|
rendered_prompt = render_prompt(prompt_template, template_ctx)
|
|
|
|
# Check cache
|
|
config_hash = compute_config_hash(rendered_prompt, model, params, input_data)
|
|
cached = cache.get(db, config_hash)
|
|
|
|
if cached is not None:
|
|
# Cache hit
|
|
response_text = cached.response
|
|
tokens_in = cached.tokens_in or 0
|
|
tokens_out = cached.tokens_out or 0
|
|
latency_ms = cached.latency_ms or 0
|
|
else:
|
|
# Cache miss — call LLM
|
|
adapter_resp: AdapterResponse = await adapter.complete(
|
|
rendered_prompt, model, params
|
|
)
|
|
response_text = adapter_resp.text
|
|
tokens_in = adapter_resp.tokens_in
|
|
tokens_out = adapter_resp.tokens_out
|
|
latency_ms = int(adapter_resp.latency_ms)
|
|
|
|
# Store in cache
|
|
cache.put(
|
|
db,
|
|
config_hash=config_hash,
|
|
response=response_text,
|
|
model=model,
|
|
tokens_in=tokens_in,
|
|
tokens_out=tokens_out,
|
|
latency_ms=latency_ms,
|
|
)
|
|
|
|
total_tokens_in += tokens_in
|
|
total_tokens_out += tokens_out
|
|
|
|
# Create StageResult record
|
|
stage_result = StageResult(
|
|
run_id=run.id,
|
|
stage_index=stage_index,
|
|
prompt_sent=rendered_prompt,
|
|
response_raw=response_text,
|
|
model_used=model,
|
|
parameters=params,
|
|
tokens_in=tokens_in,
|
|
tokens_out=tokens_out,
|
|
latency_ms=latency_ms,
|
|
)
|
|
db.add(stage_result)
|
|
db.flush()
|
|
|
|
completed_stages.append({
|
|
"output": response_text,
|
|
"model": model,
|
|
"tokens_in": tokens_in,
|
|
"tokens_out": tokens_out,
|
|
"latency_ms": latency_ms,
|
|
"stage_index": stage_index,
|
|
})
|
|
|
|
if event_bus:
|
|
event_bus.publish({
|
|
"type": "run.stage_completed",
|
|
"run_id": str(run.id),
|
|
"experiment_id": str(run.experiment_id),
|
|
"stage_index": stage_index,
|
|
"total_stages": len(stages),
|
|
})
|
|
|
|
# Run scorers on the final output
|
|
final_output = completed_stages[-1]["output"] if completed_stages else ""
|
|
scorer_context = {
|
|
"config": config,
|
|
"stages": completed_stages,
|
|
"input_data": input_data,
|
|
}
|
|
|
|
for scorer in scorers:
|
|
try:
|
|
if asyncio.iscoroutinefunction(getattr(scorer, "score", None)):
|
|
score_value = await scorer.score(input_data, final_output, scorer_context)
|
|
else:
|
|
score_value = scorer.score(input_data, final_output, scorer_context)
|
|
|
|
score_value = max(0.0, min(1.0, float(score_value)))
|
|
|
|
score_record = Score(
|
|
run_id=run.id,
|
|
scorer_name=scorer.name,
|
|
value=score_value,
|
|
)
|
|
db.add(score_record)
|
|
except Exception:
|
|
logger.warning(
|
|
"Scorer %s failed for run %s", scorer.name, run.id, exc_info=True
|
|
)
|
|
|
|
# Update run status and timing
|
|
duration_ms = int((time.perf_counter() - t_start) * 1000)
|
|
run.status = RunStatus.completed
|
|
run.completed_at = datetime.now(timezone.utc)
|
|
run.duration_ms = duration_ms
|
|
run.tokens_in = total_tokens_in
|
|
run.tokens_out = total_tokens_out
|
|
db.commit()
|
|
|
|
completed_event = {
|
|
"type": "run.completed",
|
|
"run_id": str(run.id),
|
|
"experiment_id": str(run.experiment_id),
|
|
"duration_ms": duration_ms,
|
|
"tokens_in": total_tokens_in,
|
|
"tokens_out": total_tokens_out,
|
|
}
|
|
if event_bus:
|
|
event_bus.publish(completed_event)
|
|
|
|
# Fire webhooks asynchronously
|
|
try:
|
|
from engine.tasks import fire_webhooks
|
|
fire_webhooks("run.completed", completed_event)
|
|
except Exception:
|
|
logger.warning("Failed to dispatch run.completed webhooks", exc_info=True)
|
|
|
|
except Exception as exc:
|
|
logger.error("Run %s failed: %s", run.id, exc, exc_info=True)
|
|
run.status = RunStatus.failed
|
|
run.completed_at = datetime.now(timezone.utc)
|
|
run.duration_ms = int((time.perf_counter() - t_start) * 1000)
|
|
db.commit()
|
|
|
|
failed_event = {
|
|
"type": "run.failed",
|
|
"run_id": str(run.id),
|
|
"experiment_id": str(run.experiment_id),
|
|
"error": str(exc),
|
|
}
|
|
if event_bus:
|
|
event_bus.publish(failed_event)
|
|
|
|
# Fire webhooks asynchronously
|
|
try:
|
|
from engine.tasks import fire_webhooks
|
|
fire_webhooks("run.failed", failed_event)
|
|
except Exception:
|
|
logger.warning("Failed to dispatch run.failed webhooks", exc_info=True)
|
|
raise
|
|
|
|
return run
|