diff --git a/backend/pipeline/quality/__init__.py b/backend/pipeline/quality/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/backend/pipeline/quality/__main__.py b/backend/pipeline/quality/__main__.py
new file mode 100644
index 0000000..7d6af95
--- /dev/null
+++ b/backend/pipeline/quality/__main__.py
@@ -0,0 +1,42 @@
+"""FYN-LLM fitness test suite.
+
+Run with: python -m pipeline.quality fitness
+"""
+from __future__ import annotations
+
+import argparse
+import sys
+
+from config import get_settings
+from pipeline.llm_client import LLMClient
+
+from .fitness import FitnessRunner
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        prog="pipeline.quality",
+        description="FYN-LLM quality assurance toolkit",
+    )
+    sub = parser.add_subparsers(dest="command")
+
+    # -- fitness subcommand --
+    sub.add_parser("fitness", help="Run LLM fitness tests across four categories")
+
+    args = parser.parse_args()
+
+    if args.command is None:
+        parser.print_help()
+        return 1
+
+    if args.command == "fitness":
+        settings = get_settings()
+        client = LLMClient(settings)
+        runner = FitnessRunner(client)
+        return runner.run_all()
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/backend/pipeline/quality/fitness.py b/backend/pipeline/quality/fitness.py
new file mode 100644
index 0000000..af39e74
--- /dev/null
+++ b/backend/pipeline/quality/fitness.py
@@ -0,0 +1,489 @@
+"""FYN-LLM fitness test runner.
+
+Tests four categories:
+1. Mandelbrot reasoning — factual knowledge / reasoning depth
+2. JSON compliance — simple and nested structured output
+3. Instruction following — bullet count, keyword inclusion, casing
+4. Diverse prompt battery — summarization, classification, extraction
+"""
+from __future__ import annotations
+
+import json
+import logging
+import time
+from dataclasses import dataclass, field
+
+import openai
+from pydantic import BaseModel
+
+from pipeline.llm_client import LLMClient
+
+logger = logging.getLogger(__name__)
+
+
+# ── Result types ─────────────────────────────────────────────────────────────
+
+@dataclass
+class TestResult:
+    """Outcome of a single fitness test."""
+
+    name: str
+    passed: bool
+    elapsed_seconds: float
+    token_count: int | None = None
+    detail: str = ""
+
+
+@dataclass
+class CategoryReport:
+    """Results for one test category."""
+
+    category: str
+    results: list[TestResult] = field(default_factory=list)
+
+    @property
+    def all_passed(self) -> bool:
+        return all(r.passed for r in self.results)
+
+
+# ── Pydantic models for JSON compliance tests ────────────────────────────────
+
+class SimpleItem(BaseModel):
+    name: str
+    count: int
+
+
+class Address(BaseModel):
+    street: str
+    city: str
+    zip_code: str
+
+
+class PersonWithAddress(BaseModel):
+    name: str
+    age: int
+    address: Address
+
+
+# ── Runner ───────────────────────────────────────────────────────────────────
+
+class FitnessRunner:
+    """Runs all fitness tests against the configured LLM endpoint."""
+
+    def __init__(self, client: LLMClient) -> None:
+        self.client = client
+
+    # ── Public entry point ───────────────────────────────────────────────
+
+    def run_all(self) -> int:
+        """Run all fitness tests, print report, return exit code (0=pass, 1=fail)."""
+        categories: list[CategoryReport] = []
+
+        # Connectivity pre-check — fail fast with a clear message
+        try:
+            self._probe_connectivity()
+        except (openai.APIConnectionError, openai.APITimeoutError) as exc:
+            url = self.client.settings.llm_api_url
+            fallback = self.client.settings.llm_fallback_url
+            print(
+                f"\n✗ Cannot reach LLM endpoint at {url} (fallback {fallback})\n"
+                f"  Error: {exc}\n"
+            )
+            return 1
+
+        categories.append(self._run_mandelbrot())
+        categories.append(self._run_json_compliance())
+        categories.append(self._run_instruction_following())
+        categories.append(self._run_diverse_battery())
+
+        self._print_report(categories)
+
+        return 0 if all(c.all_passed for c in categories) else 1
+
+    # ── Connectivity probe ───────────────────────────────────────────────
+
+    def _probe_connectivity(self) -> None:
+        """Quick completion to verify the endpoint is reachable."""
+        self.client.complete(
+            system_prompt="You are a test probe.",
+            user_prompt="Respond with the single word: ok",
+        )
+
+    # ── Category 1: Mandelbrot reasoning ─────────────────────────────────
+
+    def _run_mandelbrot(self) -> CategoryReport:
+        cat = CategoryReport(category="Mandelbrot Reasoning")
+        cat.results.append(self._test_mandelbrot())
+        return cat
+
+    def _test_mandelbrot(self) -> TestResult:
+        name = "mandelbrot_area_knowledge"
+        t0 = time.monotonic()
+        try:
+            resp = self.client.complete(
+                system_prompt="You are a mathematics expert. Answer precisely and concisely.",
+                user_prompt=(
+                    "What is the approximate area of the Mandelbrot set? "
+                    "Include the numerical value and mention whether the exact area is known."
+                ),
+                modality="thinking",
+            )
+            elapsed = time.monotonic() - t0
+            text = resp.lower()
+            # Check for key concepts
+            has_area = any(kw in text for kw in ["1.506", "1.507", "1.50659"])
+            has_uncertainty = any(
+                kw in text
+                for kw in ["not exactly known", "not known exactly", "approximate", "estimated", "conjecture"]
+            )
+            passed = has_area and has_uncertainty
+            detail = "" if passed else f"Missing: area={has_area}, uncertainty={has_uncertainty}. Response: {resp[:200]}"
+            return TestResult(
+                name=name,
+                passed=passed,
+                elapsed_seconds=round(elapsed, 2),
+                token_count=resp.completion_tokens,
+                detail=detail,
+            )
+        except Exception as exc:
+            return TestResult(
+                name=name,
+                passed=False,
+                elapsed_seconds=round(time.monotonic() - t0, 2),
+                detail=f"Exception: {exc}",
+            )
+
+    # ── Category 2: JSON compliance ──────────────────────────────────────
+
+    def _run_json_compliance(self) -> CategoryReport:
+        cat = CategoryReport(category="JSON Compliance")
+        cat.results.append(self._test_json_simple())
+        cat.results.append(self._test_json_nested())
+        return cat
+
+    def _test_json_simple(self) -> TestResult:
+        name = "json_simple_object"
+        t0 = time.monotonic()
+        try:
+            resp = self.client.complete(
+                system_prompt="You are a JSON generator. Output ONLY valid JSON, nothing else.",
+                user_prompt=(
+                    'Generate a JSON object with exactly two keys: "name" (a string) '
+                    'and "count" (an integer). Example structure: {"name": "...", "count": N}'
+                ),
+                response_model=SimpleItem,
+                modality="chat",
+            )
+            elapsed = time.monotonic() - t0
+            return self._validate_json(name, resp, SimpleItem, elapsed)
+        except Exception as exc:
+            return TestResult(
+                name=name,
+                passed=False,
+                elapsed_seconds=round(time.monotonic() - t0, 2),
+                detail=f"Exception: {exc}",
+            )
+
+    def _test_json_nested(self) -> TestResult:
+        name = "json_nested_object"
+        t0 = time.monotonic()
+        try:
+            resp = self.client.complete(
+                system_prompt="You are a JSON generator. Output ONLY valid JSON, nothing else.",
+                user_prompt=(
+                    'Generate a JSON object with keys "name" (string), "age" (integer), '
+                    'and "address" (object with "street", "city", "zip_code" string fields).'
+                ),
+                response_model=PersonWithAddress,
+                modality="chat",
+            )
+            elapsed = time.monotonic() - t0
+            return self._validate_json(name, resp, PersonWithAddress, elapsed)
+        except Exception as exc:
+            return TestResult(
+                name=name,
+                passed=False,
+                elapsed_seconds=round(time.monotonic() - t0, 2),
+                detail=f"Exception: {exc}",
+            )
+
+    def _validate_json(
+        self,
+        name: str,
+        resp: str,
+        model: type[BaseModel],
+        elapsed: float,
+    ) -> TestResult:
+        """Parse response as JSON, validate against Pydantic model."""
+        text = str(resp).strip()
+        if not text:
+            return TestResult(
+                name=name, passed=False, elapsed_seconds=round(elapsed, 2),
+                token_count=getattr(resp, "completion_tokens", None),
+                detail="Empty response from LLM",
+            )
+        try:
+            parsed = json.loads(text)
+        except json.JSONDecodeError as exc:
+            return TestResult(
+                name=name, passed=False, elapsed_seconds=round(elapsed, 2),
+                token_count=getattr(resp, "completion_tokens", None),
+                detail=f"Invalid JSON: {exc}. Raw: {text[:200]}",
+            )
+        try:
+            model.model_validate(parsed)
+        except Exception as exc:
+            return TestResult(
+                name=name, passed=False, elapsed_seconds=round(elapsed, 2),
+                token_count=getattr(resp, "completion_tokens", None),
+                detail=f"Schema validation failed: {exc}",
+            )
+        return TestResult(
+            name=name, passed=True, elapsed_seconds=round(elapsed, 2),
+            token_count=getattr(resp, "completion_tokens", None),
+        )
+
+    # ── Category 3: Instruction following ────────────────────────────────
+
+    def _run_instruction_following(self) -> CategoryReport:
+        cat = CategoryReport(category="Instruction Following")
+        cat.results.append(self._test_bullet_count())
+        cat.results.append(self._test_keyword_inclusion())
+        cat.results.append(self._test_lowercase_only())
+        return cat
+
+    def _test_bullet_count(self) -> TestResult:
+        name = "instruction_bullet_count"
+        t0 = time.monotonic()
+        try:
+            resp = self.client.complete(
+                system_prompt="Follow instructions exactly.",
+                user_prompt="List exactly 3 benefits of exercise. Use bullet points starting with '- '.",
+            )
+            elapsed = time.monotonic() - t0
+            lines = [l.strip() for l in str(resp).strip().splitlines() if l.strip().startswith("- ")]
+            passed = len(lines) == 3
+            detail = "" if passed else f"Expected 3 bullets, got {len(lines)}: {str(resp)[:200]}"
+            return TestResult(
+                name=name, passed=passed, elapsed_seconds=round(elapsed, 2),
+                token_count=resp.completion_tokens,
+                detail=detail,
+            )
+        except Exception as exc:
+            return TestResult(
+                name=name, passed=False,
+                elapsed_seconds=round(time.monotonic() - t0, 2),
+                detail=f"Exception: {exc}",
+            )
+
+    def _test_keyword_inclusion(self) -> TestResult:
+        name = "instruction_keyword_inclusion"
+        t0 = time.monotonic()
+        try:
+            resp = self.client.complete(
+                system_prompt="Follow instructions exactly.",
+                user_prompt=(
+                    "Write one sentence about the weather. "
+                    'You MUST include the word "elephant" somewhere in your sentence.'
+                ),
+            )
+            elapsed = time.monotonic() - t0
+            passed = "elephant" in str(resp).lower()
+            detail = "" if passed else f"Missing keyword 'elephant'. Response: {str(resp)[:200]}"
+            return TestResult(
+                name=name, passed=passed, elapsed_seconds=round(elapsed, 2),
+                token_count=resp.completion_tokens,
+                detail=detail,
+            )
+        except Exception as exc:
+            return TestResult(
+                name=name, passed=False,
+                elapsed_seconds=round(time.monotonic() - t0, 2),
+                detail=f"Exception: {exc}",
+            )
+
+    def _test_lowercase_only(self) -> TestResult:
+        name = "instruction_lowercase_only"
+        t0 = time.monotonic()
+        try:
+            resp = self.client.complete(
+                system_prompt="Follow instructions exactly.",
+                user_prompt=(
+                    "Write a short sentence about the ocean. "
+                    "Use ONLY lowercase letters — no uppercase at all, not even at the start."
+                ),
+            )
+            elapsed = time.monotonic() - t0
+            text = str(resp).strip()
+            # Allow non-alpha chars (punctuation, spaces, numbers) but no uppercase letters
+            has_upper = any(c.isupper() for c in text)
+            passed = not has_upper and len(text) > 5
+            detail = "" if passed else f"Contains uppercase or too short. Response: {text[:200]}"
+            return TestResult(
+                name=name, passed=passed, elapsed_seconds=round(elapsed, 2),
+                token_count=resp.completion_tokens,
+                detail=detail,
+            )
+        except Exception as exc:
+            return TestResult(
+                name=name, passed=False,
+                elapsed_seconds=round(time.monotonic() - t0, 2),
+                detail=f"Exception: {exc}",
+            )
+
+    # ── Category 4: Diverse prompt battery ───────────────────────────────
+
+    def _run_diverse_battery(self) -> CategoryReport:
+        cat = CategoryReport(category="Diverse Prompt Battery")
+        cat.results.append(self._test_summarization())
+        cat.results.append(self._test_classification())
+        cat.results.append(self._test_extraction())
+        return cat
+
+    def _test_summarization(self) -> TestResult:
+        name = "battery_summarization"
+        paragraph = (
+            "The James Webb Space Telescope (JWST) is the largest optical telescope in space. "
+            "Launched in December 2021, it is designed to conduct infrared astronomy. Its high "
+            "resolution and sensitivity allow it to view objects too old and distant for the Hubble "
+            "Space Telescope. Among its goals are observing the first stars and the formation of "
+            "the first galaxies, and detailed atmospheric characterization of exoplanets."
+        )
+        t0 = time.monotonic()
+        try:
+            resp = self.client.complete(
+                system_prompt="You are a concise summarizer.",
+                user_prompt=f"Summarize the following in exactly 2 sentences:\n\n{paragraph}",
+            )
+            elapsed = time.monotonic() - t0
+            text = str(resp).strip()
+            # Rough sentence count: split on period followed by space or end
+            sentences = [s.strip() for s in text.replace("! ", ". ").split(". ") if s.strip()]
+            # Be generous: 1-3 sentences is acceptable
+            passed = 1 <= len(sentences) <= 3 and len(text) > 20
+            detail = "" if passed else f"Expected ~2 sentences, got {len(sentences)}. Response: {text[:200]}"
+            return TestResult(
+                name=name, passed=passed, elapsed_seconds=round(elapsed, 2),
+                token_count=resp.completion_tokens,
+                detail=detail,
+            )
+        except Exception as exc:
+            return TestResult(
+                name=name, passed=False,
+                elapsed_seconds=round(time.monotonic() - t0, 2),
+                detail=f"Exception: {exc}",
+            )
+
+    def _test_classification(self) -> TestResult:
+        name = "battery_classification"
+        categories = ["technology", "sports", "politics", "science", "entertainment"]
+        t0 = time.monotonic()
+        try:
+            resp = self.client.complete(
+                system_prompt=(
+                    "You are a text classifier. Respond with ONLY one word from the given categories."
+                ),
+                user_prompt=(
+                    f"Classify the following text into one of these categories: {', '.join(categories)}\n\n"
+                    "Text: \"NASA's Perseverance rover has discovered organic molecules on Mars, "
+                    "suggesting the planet may have once harbored microbial life.\"\n\n"
+                    "Category:"
+                ),
+            )
+            elapsed = time.monotonic() - t0
+            answer = str(resp).strip().lower().rstrip(".")
+            passed = answer in categories
+            detail = "" if passed else f"Response '{answer}' not in {categories}"
+            return TestResult(
+                name=name, passed=passed, elapsed_seconds=round(elapsed, 2),
+                token_count=resp.completion_tokens,
+                detail=detail,
+            )
+        except Exception as exc:
+            return TestResult(
+                name=name, passed=False,
+                elapsed_seconds=round(time.monotonic() - t0, 2),
+                detail=f"Exception: {exc}",
+            )
+
+    def _test_extraction(self) -> TestResult:
+        name = "battery_extraction"
+        t0 = time.monotonic()
+        try:
+            resp = self.client.complete(
+                system_prompt="You are a data extractor. Output ONLY valid JSON, nothing else.",
+                user_prompt=(
+                    "Extract the following fields as a JSON object: "
+                    '"event_name", "date", "location"\n\n'
+                    "Text: \"The annual Tech Summit 2026 will be held on March 15, 2026 "
+                    'in San Francisco, California."\n\n'
+                    "JSON:"
+                ),
+                response_model=BaseModel,  # triggers json mode
+                modality="chat",
+            )
+            elapsed = time.monotonic() - t0
+            text = str(resp).strip()
+            if not text:
+                return TestResult(
+                    name=name, passed=False, elapsed_seconds=round(elapsed, 2),
+                    token_count=getattr(resp, "completion_tokens", None),
+                    detail="Empty response from LLM",
+                )
+            try:
+                parsed = json.loads(text)
+            except json.JSONDecodeError as exc:
+                return TestResult(
+                    name=name, passed=False, elapsed_seconds=round(elapsed, 2),
+                    token_count=getattr(resp, "completion_tokens", None),
+                    detail=f"Invalid JSON: {exc}. Raw: {text[:200]}",
+                )
+            required_keys = {"event_name", "date", "location"}
+            present = set(parsed.keys()) & required_keys
+            passed = present == required_keys
+            detail = "" if passed else f"Missing keys: {required_keys - present}"
+            return TestResult(
+                name=name, passed=passed, elapsed_seconds=round(elapsed, 2),
+                token_count=getattr(resp, "completion_tokens", None),
+                detail=detail,
+            )
+        except Exception as exc:
+            return TestResult(
+                name=name, passed=False,
+                elapsed_seconds=round(time.monotonic() - t0, 2),
+                detail=f"Exception: {exc}",
+            )
+
+    # ── Report formatting ────────────────────────────────────────────────
+
+    def _print_report(self, categories: list[CategoryReport]) -> None:
+        """Print a formatted pass/fail report to stdout."""
+        total = 0
+        passed_count = 0
+
+        print("\n" + "=" * 60)
+        print("  FYN-LLM FITNESS REPORT")
+        print("=" * 60)
+
+        for cat in categories:
+            status = "✓ PASS" if cat.all_passed else "✗ FAIL"
+            print(f"\n  [{status}] {cat.category}")
+            for r in cat.results:
+                total += 1
+                icon = "✓" if r.passed else "✗"
+                tokens = f" ({r.token_count} tok)" if r.token_count else ""
+                print(f"    {icon} {r.name}  [{r.elapsed_seconds}s{tokens}]")
+                if r.detail:
+                    # Indent detail lines
+                    for line in r.detail.splitlines():
+                        print(f"      {line}")
+                if r.passed:
+                    passed_count += 1
+
+        print("\n" + "-" * 60)
+        print(f"  Total: {passed_count}/{total} passed")
+        if passed_count == total:
+            print("  Result: ✓ ALL PASS")
+        else:
+            print(f"  Result: ✗ {total - passed_count} FAILED")
+        print("=" * 60 + "\n")