"""Tests for the FormatScorer.""" import asyncio import json from typing import Any import pytest from engine.scorers.format import FormatScorer class TestFormatScorerInit: def test_valid_format_types(self): for fmt in ("json", "markdown", "length", "structure"): scorer = FormatScorer(format_type=fmt) assert scorer.format_type == fmt def test_invalid_format_type_raises(self): with pytest.raises(ValueError, match="Invalid format_type"): FormatScorer(format_type="xml") def test_name_property(self): scorer = FormatScorer() assert scorer.name == "format" def test_is_base_scorer(self): from engine.scorers.base import BaseScorer scorer = FormatScorer() assert isinstance(scorer, BaseScorer) class TestJsonFormat: def test_valid_json_object(self): scorer = FormatScorer(format_type="json") assert scorer.score(None, '{"key": "value"}', {}) == 1.0 def test_valid_json_array(self): scorer = FormatScorer(format_type="json") assert scorer.score(None, '[1, 2, 3]', {}) == 1.0 def test_valid_json_string(self): scorer = FormatScorer(format_type="json") assert scorer.score(None, '"hello"', {}) == 1.0 def test_valid_json_number(self): scorer = FormatScorer(format_type="json") assert scorer.score(None, '42', {}) == 1.0 def test_valid_json_with_whitespace(self): scorer = FormatScorer(format_type="json") assert scorer.score(None, ' {"key": "value"} ', {}) == 1.0 def test_invalid_json(self): scorer = FormatScorer(format_type="json") assert scorer.score(None, "not json at all", {}) == 0.0 def test_empty_string(self): scorer = FormatScorer(format_type="json") assert scorer.score(None, "", {}) == 0.0 def test_partial_json(self): scorer = FormatScorer(format_type="json") assert scorer.score(None, '{"key":', {}) == 0.0 class TestMarkdownFormat: def test_headers_only(self): scorer = FormatScorer(format_type="markdown") output = "# Title\n\nSome text here." assert scorer.score(None, output, {}) == 0.5 def test_lists_only_unordered(self): scorer = FormatScorer(format_type="markdown") output = "Some text\n- item one\n- item two" assert scorer.score(None, output, {}) == 0.5 def test_lists_only_ordered(self): scorer = FormatScorer(format_type="markdown") output = "Some text\n1. first\n2. second" assert scorer.score(None, output, {}) == 0.5 def test_both_headers_and_lists(self): scorer = FormatScorer(format_type="markdown") output = "# Title\n\n- item one\n- item two" assert scorer.score(None, output, {}) == 1.0 def test_no_markdown(self): scorer = FormatScorer(format_type="markdown") output = "Just plain text without any formatting." assert scorer.score(None, output, {}) == 0.0 def test_nested_header_levels(self): scorer = FormatScorer(format_type="markdown") output = "## Subtitle\n\nContent here" assert scorer.score(None, output, {}) == 0.5 def test_asterisk_list(self): scorer = FormatScorer(format_type="markdown") output = "Some text\n* item one\n* item two" assert scorer.score(None, output, {}) == 0.5 def test_ordered_list_with_parenthesis(self): scorer = FormatScorer(format_type="markdown") output = "Text\n1) first\n2) second" assert scorer.score(None, output, {}) == 0.5 class TestLengthFormat: def test_within_range(self): scorer = FormatScorer(format_type="length", min_tokens=5, max_tokens=20) output = "this is a ten word sentence for the test case" assert scorer.score(None, output, {}) == 1.0 def test_exact_min(self): scorer = FormatScorer(format_type="length", min_tokens=3, max_tokens=10) assert scorer.score(None, "one two three", {}) == 1.0 def test_exact_max(self): scorer = FormatScorer(format_type="length", min_tokens=1, max_tokens=3) assert scorer.score(None, "one two three", {}) == 1.0 def test_below_min(self): scorer = FormatScorer(format_type="length", min_tokens=10, max_tokens=20) output = "only five words here now" result = scorer.score(None, output, {}) assert 0.0 < result < 1.0 assert result == 5 / 10 # 0.5 def test_above_max(self): scorer = FormatScorer(format_type="length", min_tokens=1, max_tokens=5) output = "one two three four five six seven eight nine ten" result = scorer.score(None, output, {}) assert 0.0 <= result < 1.0 def test_no_bounds(self): scorer = FormatScorer(format_type="length") assert scorer.score(None, "any text", {}) == 1.0 def test_only_min(self): scorer = FormatScorer(format_type="length", min_tokens=3) assert scorer.score(None, "one two three four", {}) == 1.0 def test_only_max(self): scorer = FormatScorer(format_type="length", max_tokens=5) assert scorer.score(None, "one two", {}) == 1.0 def test_empty_output(self): scorer = FormatScorer(format_type="length", min_tokens=5) # empty string splits to [''], which has length 1 result = scorer.score(None, "", {}) assert result < 1.0 def test_zero_min(self): scorer = FormatScorer(format_type="length", min_tokens=0, max_tokens=10) assert scorer.score(None, "hello", {}) == 1.0 class TestStructureFormat: def test_valid_structure(self): schema = { "type": "object", "required": ["name", "age"], "properties": { "name": {"type": "string"}, "age": {"type": "integer"}, }, } scorer = FormatScorer(format_type="structure", json_schema=schema) output = json.dumps({"name": "Alice", "age": 30}) assert scorer.score(None, output, {}) == 1.0 def test_missing_required_field(self): schema = { "type": "object", "required": ["name", "age"], "properties": { "name": {"type": "string"}, "age": {"type": "integer"}, }, } scorer = FormatScorer(format_type="structure", json_schema=schema) output = json.dumps({"name": "Alice"}) assert scorer.score(None, output, {}) == 0.0 def test_wrong_type(self): schema = {"type": "array"} scorer = FormatScorer(format_type="structure", json_schema=schema) output = json.dumps({"key": "value"}) assert scorer.score(None, output, {}) == 0.0 def test_valid_array_structure(self): schema = {"type": "array"} scorer = FormatScorer(format_type="structure", json_schema=schema) output = json.dumps([1, 2, 3]) assert scorer.score(None, output, {}) == 1.0 def test_no_schema_returns_zero(self): scorer = FormatScorer(format_type="structure") assert scorer.score(None, '{"key": "value"}', {}) == 0.0 def test_invalid_json_for_structure(self): schema = {"type": "object"} scorer = FormatScorer(format_type="structure", json_schema=schema) assert scorer.score(None, "not json", {}) == 0.0 def test_complex_schema(self): schema = { "type": "object", "required": ["results"], "properties": { "results": { "type": "array", "items": {"type": "object"}, }, }, } scorer = FormatScorer(format_type="structure", json_schema=schema) output = json.dumps({"results": [{"id": 1}, {"id": 2}]}) assert scorer.score(None, output, {}) == 1.0 class TestAsyncScoring: def test_async_delegates_to_sync(self): scorer = FormatScorer(format_type="json") result = asyncio.get_event_loop().run_until_complete( scorer.score_async(None, '{"valid": true}', {}) ) assert result == 1.0