Merge pull request #101 from asamal4/handle-no-toolcall

tisnik · web-flow · commit 395631e0a29b · 2025-11-13T17:05:13.000+01:00
handle no tool call alternative
diff --git a/src/lightspeed_evaluation/core/metrics/custom/custom.py b/src/lightspeed_evaluation/core/metrics/custom/custom.py
@@ -188,9 +188,7 @@ def _evaluate_tool_calls(
             return None, "No expected tool calls provided for tool evaluation"
 
         # Get actual tool calls from turn data (will be populated by API)
-        actual_tool_calls = getattr(turn_data, "tool_calls", [])
-        if not actual_tool_calls:
-            return 0.0, "No actual tool calls found in response"
+        actual_tool_calls = getattr(turn_data, "tool_calls", []) or []
 
         # Use the tool evaluation logic
         success, details = evaluate_tool_calls(
diff --git a/tests/unit/core/metrics/custom/test_custom.py b/tests/unit/core/metrics/custom/test_custom.py
@@ -0,0 +1,38 @@
+"""Tests for custom metrics module."""
+
+from lightspeed_evaluation.core.metrics.custom.custom import CustomMetrics
+from lightspeed_evaluation.core.models import TurnData
+
+
+class TestCustomMetrics:
+    """Test CustomMetrics class."""
+
+    def test_evaluate_tool_calls_with_none_tool_calls(self, mocker):
+        """Test that None tool_calls is handled correctly."""
+        # Mock LLM manager
+        mock_llm_manager = mocker.Mock()
+        mock_llm_manager.get_model_name.return_value = "test-model"
+        mock_llm_manager.get_llm_params.return_value = {}
+
+        custom_metrics = CustomMetrics(mock_llm_manager)
+
+        # TurnData with tool_calls = None
+        turn_data = TurnData(
+            turn_id="test_turn",
+            query="hello",
+            tool_calls=None,
+            expected_tool_calls=[
+                [
+                    [{"tool_name": "some_tool", "arguments": {}}]
+                ],  # Primary: expects tool
+                [],  # Alternative: no tools (should match None -> [])
+            ],
+        )
+
+        # Should match the empty alternative without error
+        score, reason = custom_metrics._evaluate_tool_calls(
+            _conv_data=None, _turn_idx=0, turn_data=turn_data, is_conversation=False
+        )
+
+        assert score == 1.0
+        assert "Alternative 2 matched" in reason