Skip to content

Commit 87abe8b

Browse files
committed
Implement the 'max_tool_calls' parameter for the Responses API
Test max_tool_calls with builtin and mcp tools Update input prompt for more consistent tool calling Resolve merge conflicts Update integration test
1 parent a6ddbae commit 87abe8b

File tree

10 files changed

+244
-1
lines changed

10 files changed

+244
-1
lines changed

client-sdks/stainless/openapi.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7440,6 +7440,11 @@ components:
74407440
type: string
74417441
description: >-
74427442
(Optional) System message inserted into the model's context
7443+
max_tool_calls:
7444+
type: integer
7445+
description: >-
7446+
(Optional) Max number of total calls to built-in tools that can be processed
7447+
in a response
74437448
input:
74447449
type: array
74457450
items:
@@ -7798,6 +7803,11 @@ components:
77987803
(Optional) Additional fields to include in the response.
77997804
max_infer_iters:
78007805
type: integer
7806+
max_tool_calls:
7807+
type: integer
7808+
description: >-
7809+
(Optional) Max number of total calls to built-in tools that can be processed
7810+
in a response.
78017811
additionalProperties: false
78027812
required:
78037813
- input
@@ -7879,6 +7889,11 @@ components:
78797889
type: string
78807890
description: >-
78817891
(Optional) System message inserted into the model's context
7892+
max_tool_calls:
7893+
type: integer
7894+
description: >-
7895+
(Optional) Max number of total calls to built-in tools that can be processed
7896+
in a response
78827897
additionalProperties: false
78837898
required:
78847899
- created_at

docs/static/llama-stack-spec.html

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8204,6 +8204,10 @@
82048204
"type": "string",
82058205
"description": "(Optional) System message inserted into the model's context"
82068206
},
8207+
"max_tool_calls": {
8208+
"type": "integer",
8209+
"description": "(Optional) Max number of total calls to built-in tools that can be processed in a response"
8210+
},
82078211
"input": {
82088212
"type": "array",
82098213
"items": {
@@ -8702,6 +8706,10 @@
87028706
},
87038707
"max_infer_iters": {
87048708
"type": "integer"
8709+
},
8710+
"max_tool_calls": {
8711+
"type": "integer",
8712+
"description": "(Optional) Max number of total calls to built-in tools that can be processed in a response."
87058713
}
87068714
},
87078715
"additionalProperties": false,
@@ -8790,6 +8798,10 @@
87908798
"instructions": {
87918799
"type": "string",
87928800
"description": "(Optional) System message inserted into the model's context"
8801+
},
8802+
"max_tool_calls": {
8803+
"type": "integer",
8804+
"description": "(Optional) Max number of total calls to built-in tools that can be processed in a response"
87938805
}
87948806
},
87958807
"additionalProperties": false,

docs/static/llama-stack-spec.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6227,6 +6227,11 @@ components:
62276227
type: string
62286228
description: >-
62296229
(Optional) System message inserted into the model's context
6230+
max_tool_calls:
6231+
type: integer
6232+
description: >-
6233+
(Optional) Max number of total calls to built-in tools that can be processed
6234+
in a response
62306235
input:
62316236
type: array
62326237
items:
@@ -6585,6 +6590,11 @@ components:
65856590
(Optional) Additional fields to include in the response.
65866591
max_infer_iters:
65876592
type: integer
6593+
max_tool_calls:
6594+
type: integer
6595+
description: >-
6596+
(Optional) Max number of total calls to built-in tools that can be processed
6597+
in a response.
65886598
additionalProperties: false
65896599
required:
65906600
- input
@@ -6666,6 +6676,11 @@ components:
66666676
type: string
66676677
description: >-
66686678
(Optional) System message inserted into the model's context
6679+
max_tool_calls:
6680+
type: integer
6681+
description: >-
6682+
(Optional) Max number of total calls to built-in tools that can be processed
6683+
in a response
66696684
additionalProperties: false
66706685
required:
66716686
- created_at

docs/static/stainless-llama-stack-spec.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7440,6 +7440,11 @@ components:
74407440
type: string
74417441
description: >-
74427442
(Optional) System message inserted into the model's context
7443+
max_tool_calls:
7444+
type: integer
7445+
description: >-
7446+
(Optional) Max number of total calls to built-in tools that can be processed
7447+
in a response
74437448
input:
74447449
type: array
74457450
items:
@@ -7798,6 +7803,11 @@ components:
77987803
(Optional) Additional fields to include in the response.
77997804
max_infer_iters:
78007805
type: integer
7806+
max_tool_calls:
7807+
type: integer
7808+
description: >-
7809+
(Optional) Max number of total calls to built-in tools that can be processed
7810+
in a response.
78017811
additionalProperties: false
78027812
required:
78037813
- input
@@ -7879,6 +7889,11 @@ components:
78797889
type: string
78807890
description: >-
78817891
(Optional) System message inserted into the model's context
7892+
max_tool_calls:
7893+
type: integer
7894+
description: >-
7895+
(Optional) Max number of total calls to built-in tools that can be processed
7896+
in a response
78827897
additionalProperties: false
78837898
required:
78847899
- created_at

src/llama_stack/apis/agents/agents.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,7 @@ async def create_openai_response(
750750
"List of guardrails to apply during response generation. Guardrails provide safety and content moderation."
751751
),
752752
] = None,
753+
max_tool_calls: int | None = None,
753754
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
754755
"""Create a model response.
755756
@@ -760,6 +761,7 @@ async def create_openai_response(
760761
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
761762
:param include: (Optional) Additional fields to include in the response.
762763
:param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
764+
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
763765
:returns: An OpenAIResponseObject.
764766
"""
765767
...

src/llama_stack/apis/agents/openai_responses.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,7 @@ class OpenAIResponseObject(BaseModel):
591591
:param truncation: (Optional) Truncation strategy applied to the response
592592
:param usage: (Optional) Token usage information for the response
593593
:param instructions: (Optional) System message inserted into the model's context
594+
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response
594595
"""
595596

596597
created_at: int
@@ -612,6 +613,7 @@ class OpenAIResponseObject(BaseModel):
612613
truncation: str | None = None
613614
usage: OpenAIResponseUsage | None = None
614615
instructions: str | None = None
616+
max_tool_calls: int | None = None
615617

616618

617619
@json_schema_type

src/llama_stack/providers/inline/agents/meta_reference/agents.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ async def create_openai_response(
347347
include: list[str] | None = None,
348348
max_infer_iters: int | None = 10,
349349
guardrails: list[ResponseGuardrail] | None = None,
350+
max_tool_calls: int | None = None,
350351
) -> OpenAIResponseObject:
351352
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
352353
result = await self.openai_responses_impl.create_openai_response(
@@ -364,6 +365,7 @@ async def create_openai_response(
364365
include,
365366
max_infer_iters,
366367
guardrails,
368+
max_tool_calls,
367369
)
368370
return result # type: ignore[no-any-return]
369371

src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ async def create_openai_response(
255255
include: list[str] | None = None,
256256
max_infer_iters: int | None = 10,
257257
guardrails: list[str | ResponseGuardrailSpec] | None = None,
258+
max_tool_calls: int | None = None,
258259
):
259260
stream = bool(stream)
260261
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
@@ -270,6 +271,11 @@ async def create_openai_response(
270271
if not conversation.startswith("conv_"):
271272
raise InvalidConversationIdError(conversation)
272273

274+
if max_tool_calls is not None and max_tool_calls < 1:
275+
raise ValueError(
276+
f"Invalid 'max_tool_calls': integer below minimum value. Expected a value >= 1, but got {max_tool_calls} instead."
277+
)
278+
273279
stream_gen = self._create_streaming_response(
274280
input=input,
275281
conversation=conversation,
@@ -282,6 +288,7 @@ async def create_openai_response(
282288
tools=tools,
283289
max_infer_iters=max_infer_iters,
284290
guardrail_ids=guardrail_ids,
291+
max_tool_calls=max_tool_calls,
285292
)
286293

287294
if stream:
@@ -331,6 +338,7 @@ async def _create_streaming_response(
331338
tools: list[OpenAIResponseInputTool] | None = None,
332339
max_infer_iters: int | None = 10,
333340
guardrail_ids: list[str] | None = None,
341+
max_tool_calls: int | None = None,
334342
) -> AsyncIterator[OpenAIResponseObjectStream]:
335343
# These should never be None when called from create_openai_response (which sets defaults)
336344
# but we assert here to help mypy understand the types
@@ -373,6 +381,7 @@ async def _create_streaming_response(
373381
safety_api=self.safety_api,
374382
guardrail_ids=guardrail_ids,
375383
instructions=instructions,
384+
max_tool_calls=max_tool_calls,
376385
)
377386

378387
# Stream the response

src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def __init__(
115115
safety_api,
116116
guardrail_ids: list[str] | None = None,
117117
prompt: OpenAIResponsePrompt | None = None,
118+
max_tool_calls: int | None = None,
118119
):
119120
self.inference_api = inference_api
120121
self.ctx = ctx
@@ -141,6 +142,8 @@ def __init__(
141142
self.violation_detected = False
142143
# system message that is inserted into the model's context
143144
self.instructions = instructions
145+
# max number of total calls to built-in tools that can be processed in a response
146+
self.max_tool_calls = max_tool_calls
144147

145148
async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
146149
"""Create a refusal response to replace streaming content."""
@@ -186,6 +189,7 @@ def _snapshot_response(
186189
usage=self.accumulated_usage,
187190
instructions=self.instructions,
188191
prompt=self.prompt,
192+
max_tool_calls=self.max_tool_calls,
189193
)
190194

191195
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
@@ -893,7 +897,12 @@ async def _coordinate_tool_execution(
893897
) -> AsyncIterator[OpenAIResponseObjectStream]:
894898
"""Coordinate execution of both function and non-function tool calls."""
895899
# Execute non-function tool calls
896-
for tool_call in non_function_tool_calls:
900+
for idx, tool_call in enumerate(non_function_tool_calls):
901+
# Check if total calls to built-in and mcp tools exceeds max_tool_calls
902+
if self.max_tool_calls is not None and idx >= self.max_tool_calls:
903+
logger.info(f"Ignoring built-in and mcp tool call since {idx + 1} exceeds {self.max_tool_calls}.")
904+
break
905+
897906
# Find the item_id for this tool call
898907
matching_item_id = None
899908
for index, item_id in completion_result_data.tool_call_item_ids.items():

0 commit comments

Comments
 (0)