@@ -56,8 +56,8 @@ class TurnData(BaseModel):
5656 expected_response : Optional [str ] = Field (
5757 default = None , min_length = 1 , description = "Expected response for comparison"
5858 )
59- expected_tool_calls : Optional [list [list [dict [str , Any ]]]] = Field (
60- default = None , description = "Expected tool call sequences"
59+ expected_tool_calls : Optional [list [list [list [ dict [str , Any ] ]]]] = Field (
60+ default = None , description = "Expected tool call sequences (with alternatives) "
6161 )
6262 expected_intent : Optional [str ] = Field (
6363 default = None , min_length = 1 , description = "Expected intent for intent evaluation"
@@ -93,19 +93,145 @@ def validate_turn_metrics(cls, v: Optional[list[str]]) -> Optional[list[str]]:
9393 @classmethod
9494 def validate_expected_tool_calls (
9595 cls , v : Optional [Any ]
96- ) -> Optional [list [list [dict [str , Any ]]]]:
97- """Validate expected tool calls when provided."""
96+ ) -> Optional [list [list [list [dict [str , Any ]]]]]:
97+ """Validate expected tool calls when provided.
98+
99+ Converts single set format to multiple sets format automatically for backward compatibility.
100+
101+ Input formats:
102+ 1. Single set: [[{tool_name, arguments}, ...], ...] -> Converted to multiple sets
103+ 2. Multiple sets: [[[{tool_name, arguments}, ...], ...], [[...], ...], ...] -> Used as-is
104+
105+ Output format:
106+ Multiple sets: [[[{tool_name, arguments}, ...], ...], [[...], ...], ...]
107+ """
98108 if v is None :
99109 return None
100110
111+ if not isinstance (v , list ):
112+ raise ValueError ("Expected tool calls must be a list" )
113+
114+ # Ensure multiple sets format (backward compatibility)
115+ data = cls ._ensure_multiple_sets_format (v )
116+
117+ # Validate multiple sets format
118+ return cls ._validate_multiple_sets (data )
119+
120+ # Future backward compatibility removal (minimal changes):
121+ # 1. Delete: _ensure_multiple_sets_format() and _is_single_set_format()
122+ # 2. Replace above with: return cls._validate_multiple_sets(v)
123+
124+ @classmethod
125+ def _ensure_multiple_sets_format (cls , v : list ) -> list [list [list [dict [str , Any ]]]]:
126+ """Ensure data is in multiple sets format (backward compatibility)."""
127+ # Convert single set format to multiple sets format if needed
128+ if cls ._is_single_set_format (v ):
129+ # Single set: [[tool1, tool2], [tool3]] -> Multiple sets: [[[tool1, tool2], [tool3]]]
130+ return [v ]
131+ # Already multiple sets: [[[tool1]], [[tool2]]] -> Keep as-is
132+ return v
133+
134+ @classmethod
135+ def _validate_multiple_sets (
136+ cls , data : list [list [list [dict [str , Any ]]]]
137+ ) -> list [list [list [dict [str , Any ]]]]:
138+ """Validate multiple sets format data."""
139+ # Reject empty sequences anywhere
140+ cls ._reject_empty_sequences (data )
141+
142+ # Validate each alternative set
143+ validated_alternatives = []
144+ for alternative in data :
145+ validated_alternative = cls ._validate_tool_call_sequences (alternative )
146+ validated_alternatives .append (validated_alternative )
147+
148+ # Apply constraints
149+ cls ._validate_empty_set_constraints (validated_alternatives )
150+ return validated_alternatives
151+
152+ @classmethod
153+ def _is_single_set_format (cls , v : list ) -> bool :
154+ """Detect if input is single set format (backward compatibility)."""
155+ if not v :
156+ return True # Empty list is single set format
157+
158+ # Check first element: if it's a dict, it's single set format
159+ # If it's a list, it could be multiple sets or single set with sequences
160+ first_element = v [0 ]
161+ if isinstance (first_element , dict ):
162+ return True # Single set: [tool1, tool2, ...]
163+
164+ if isinstance (first_element , list ):
165+ if not first_element :
166+ # Empty list [] - could be single set (empty sequence) or multiple sets (empty alt)
167+ # Check if there are multiple empty lists (indicates multiple sets format)
168+ return not (
169+ len (v ) > 1
170+ and all (isinstance (el , list ) and len (el ) == 0 for el in v )
171+ )
172+ # Non-empty list - check what's inside
173+ return isinstance (
174+ first_element [0 ], dict
175+ ) # dict = single set, list = multiple sets
176+
177+ return False
178+
179+ @classmethod
180+ def _reject_empty_sequences (cls , data : list [list [list [dict [str , Any ]]]]) -> None :
181+ """Reject empty sequences in data."""
182+ for i , alternative in enumerate (data ):
183+ for j , sequence in enumerate (alternative ):
184+ if isinstance (sequence , list ) and len (sequence ) == 0 :
185+ raise ValueError (
186+ f"Empty sequence at position { j } in alternative { i } is invalid. "
187+ "Use [] for no tools instead."
188+ )
189+
190+ @classmethod
191+ def _validate_empty_set_constraints (
192+ cls , result : list [list [list [dict [str , Any ]]]]
193+ ) -> None :
194+ """Validate that empty alternatives come after primary options (not first or only)."""
195+ if not result :
196+ return
197+
198+ if len (result ) == 1 and len (result [0 ]) == 0 :
199+ raise ValueError (
200+ "Empty set cannot be the only alternative. "
201+ "Empty alternatives should represent fallback scenarios, not primary options."
202+ )
203+
204+ if len (result ) > 1 and len (result [0 ]) == 0 :
205+ raise ValueError (
206+ "Empty set cannot be the first alternative. "
207+ "Empty alternatives should come after primary options."
208+ )
209+
210+ # Prevent multiple redundant empty alternatives
211+ empty_count = sum (1 for alt in result if len (alt ) == 0 )
212+ if empty_count > 1 :
213+ raise ValueError (
214+ f"Found { empty_count } empty alternatives. "
215+ "Multiple empty alternatives are redundant - use only one as fallback."
216+ )
217+
218+ @classmethod
219+ def _is_sequence_of_sequences (cls , seq : list ) -> bool :
220+ """Check if a sequence contains sequences (not dicts)."""
221+ return bool (seq ) and isinstance (seq [0 ], list )
222+
223+ @classmethod
224+ def _validate_tool_call_sequences (cls , v : Any ) -> list [list [dict [str , Any ]]]:
225+ """Validate tool call sequences structure."""
101226 if not isinstance (v , list ):
102227 raise ValueError ("Expected tool calls must be a list of sequences" )
103228
104- result = []
229+ validated_sequences = []
105230 for i , sequence in enumerate (v ):
106231 if not isinstance (sequence , list ):
107232 raise ValueError (f"Sequence { i } must be a list" )
108233
234+ # Empty sequences are already rejected by _reject_empty_sequences
109235 tool_calls = []
110236 for j , tool_call in enumerate (sequence ):
111237 if not isinstance (tool_call , dict ):
@@ -131,8 +257,8 @@ def validate_expected_tool_calls(
131257 }
132258 tool_calls .append (validated_tool_call )
133259
134- result .append (tool_calls )
135- return result
260+ validated_sequences .append (tool_calls )
261+ return validated_sequences
136262
137263
138264class EvaluationData (BaseModel ):
0 commit comments