Skip to content

Commit 395631e

Browse files
authored
Merge pull request #101 from asamal4/handle-no-toolcall
handle no tool call alternative
2 parents 24f68c4 + e647043 commit 395631e

File tree

2 files changed

+39
-3
lines changed

2 files changed

+39
-3
lines changed

src/lightspeed_evaluation/core/metrics/custom/custom.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,7 @@ def _evaluate_tool_calls(
188188
return None, "No expected tool calls provided for tool evaluation"
189189

190190
# Get actual tool calls from turn data (will be populated by API)
191-
actual_tool_calls = getattr(turn_data, "tool_calls", [])
192-
if not actual_tool_calls:
193-
return 0.0, "No actual tool calls found in response"
191+
actual_tool_calls = getattr(turn_data, "tool_calls", []) or []
194192

195193
# Use the tool evaluation logic
196194
success, details = evaluate_tool_calls(
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""Tests for custom metrics module."""
2+
3+
from lightspeed_evaluation.core.metrics.custom.custom import CustomMetrics
4+
from lightspeed_evaluation.core.models import TurnData
5+
6+
7+
class TestCustomMetrics:
8+
"""Test CustomMetrics class."""
9+
10+
def test_evaluate_tool_calls_with_none_tool_calls(self, mocker):
11+
"""Test that None tool_calls is handled correctly."""
12+
# Mock LLM manager
13+
mock_llm_manager = mocker.Mock()
14+
mock_llm_manager.get_model_name.return_value = "test-model"
15+
mock_llm_manager.get_llm_params.return_value = {}
16+
17+
custom_metrics = CustomMetrics(mock_llm_manager)
18+
19+
# TurnData with tool_calls = None
20+
turn_data = TurnData(
21+
turn_id="test_turn",
22+
query="hello",
23+
tool_calls=None,
24+
expected_tool_calls=[
25+
[
26+
[{"tool_name": "some_tool", "arguments": {}}]
27+
], # Primary: expects tool
28+
[], # Alternative: no tools (should match None -> [])
29+
],
30+
)
31+
32+
# Should match the empty alternative without error
33+
score, reason = custom_metrics._evaluate_tool_calls(
34+
_conv_data=None, _turn_idx=0, turn_data=turn_data, is_conversation=False
35+
)
36+
37+
assert score == 1.0
38+
assert "Alternative 2 matched" in reason

0 commit comments

Comments
 (0)