Skip to content

Commit 835e6c6

Browse files
committed
Implement the 'max_tool_calls' parameter for the Responses API
Test max_tool_calls with builtin and mcp tools Update input prompt for more consistent tool calling Resolve merge conflicts Update integration test Handle review comments
1 parent 8f4c431 commit 835e6c6

File tree

9 files changed

+240
-2
lines changed

9 files changed

+240
-2
lines changed

client-sdks/stainless/openapi.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6882,6 +6882,11 @@ components:
68826882
type: string
68836883
description: >-
68846884
(Optional) System message inserted into the model's context
6885+
max_tool_calls:
6886+
type: integer
6887+
description: >-
6888+
(Optional) Max number of total calls to built-in tools that can be processed
6889+
in a response
68856890
input:
68866891
type: array
68876892
items:
@@ -7240,6 +7245,11 @@ components:
72407245
(Optional) Additional fields to include in the response.
72417246
max_infer_iters:
72427247
type: integer
7248+
max_tool_calls:
7249+
type: integer
7250+
description: >-
7251+
(Optional) Max number of total calls to built-in tools that can be processed
7252+
in a response.
72437253
additionalProperties: false
72447254
required:
72457255
- input
@@ -7321,6 +7331,11 @@ components:
73217331
type: string
73227332
description: >-
73237333
(Optional) System message inserted into the model's context
7334+
max_tool_calls:
7335+
type: integer
7336+
description: >-
7337+
(Optional) Max number of total calls to built-in tools that can be processed
7338+
in a response
73247339
additionalProperties: false
73257340
required:
73267341
- created_at

docs/static/llama-stack-spec.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6166,6 +6166,11 @@ components:
61666166
type: string
61676167
description: >-
61686168
(Optional) System message inserted into the model's context
6169+
max_tool_calls:
6170+
type: integer
6171+
description: >-
6172+
(Optional) Max number of total calls to built-in tools that can be processed
6173+
in a response
61696174
input:
61706175
type: array
61716176
items:
@@ -6524,6 +6529,11 @@ components:
65246529
(Optional) Additional fields to include in the response.
65256530
max_infer_iters:
65266531
type: integer
6532+
max_tool_calls:
6533+
type: integer
6534+
description: >-
6535+
(Optional) Max number of total calls to built-in tools that can be processed
6536+
in a response.
65276537
additionalProperties: false
65286538
required:
65296539
- input
@@ -6605,6 +6615,11 @@ components:
66056615
type: string
66066616
description: >-
66076617
(Optional) System message inserted into the model's context
6618+
max_tool_calls:
6619+
type: integer
6620+
description: >-
6621+
(Optional) Max number of total calls to built-in tools that can be processed
6622+
in a response
66086623
additionalProperties: false
66096624
required:
66106625
- created_at

docs/static/stainless-llama-stack-spec.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6882,6 +6882,11 @@ components:
68826882
type: string
68836883
description: >-
68846884
(Optional) System message inserted into the model's context
6885+
max_tool_calls:
6886+
type: integer
6887+
description: >-
6888+
(Optional) Max number of total calls to built-in tools that can be processed
6889+
in a response
68856890
input:
68866891
type: array
68876892
items:
@@ -7240,6 +7245,11 @@ components:
72407245
(Optional) Additional fields to include in the response.
72417246
max_infer_iters:
72427247
type: integer
7248+
max_tool_calls:
7249+
type: integer
7250+
description: >-
7251+
(Optional) Max number of total calls to built-in tools that can be processed
7252+
in a response.
72437253
additionalProperties: false
72447254
required:
72457255
- input
@@ -7321,6 +7331,11 @@ components:
73217331
type: string
73227332
description: >-
73237333
(Optional) System message inserted into the model's context
7334+
max_tool_calls:
7335+
type: integer
7336+
description: >-
7337+
(Optional) Max number of total calls to built-in tools that can be processed
7338+
in a response
73247339
additionalProperties: false
73257340
required:
73267341
- created_at

src/llama_stack/apis/agents/agents.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ async def create_openai_response(
8787
"List of guardrails to apply during response generation. Guardrails provide safety and content moderation."
8888
),
8989
] = None,
90+
max_tool_calls: int | None = None,
9091
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
9192
"""Create a model response.
9293
@@ -97,6 +98,7 @@ async def create_openai_response(
9798
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
9899
:param include: (Optional) Additional fields to include in the response.
99100
:param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
101+
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
100102
:returns: An OpenAIResponseObject.
101103
"""
102104
...

src/llama_stack/apis/agents/openai_responses.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,7 @@ class OpenAIResponseObject(BaseModel):
594594
:param truncation: (Optional) Truncation strategy applied to the response
595595
:param usage: (Optional) Token usage information for the response
596596
:param instructions: (Optional) System message inserted into the model's context
597+
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response
597598
"""
598599

599600
created_at: int
@@ -615,6 +616,7 @@ class OpenAIResponseObject(BaseModel):
615616
truncation: str | None = None
616617
usage: OpenAIResponseUsage | None = None
617618
instructions: str | None = None
619+
max_tool_calls: int | None = None
618620

619621

620622
@json_schema_type

src/llama_stack/providers/inline/agents/meta_reference/agents.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ async def create_openai_response(
102102
include: list[str] | None = None,
103103
max_infer_iters: int | None = 10,
104104
guardrails: list[ResponseGuardrail] | None = None,
105+
max_tool_calls: int | None = None,
105106
) -> OpenAIResponseObject:
106107
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
107108
result = await self.openai_responses_impl.create_openai_response(
@@ -119,6 +120,7 @@ async def create_openai_response(
119120
include,
120121
max_infer_iters,
121122
guardrails,
123+
max_tool_calls,
122124
)
123125
return result # type: ignore[no-any-return]
124126

src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ async def create_openai_response(
255255
include: list[str] | None = None,
256256
max_infer_iters: int | None = 10,
257257
guardrails: list[str | ResponseGuardrailSpec] | None = None,
258+
max_tool_calls: int | None = None,
258259
):
259260
stream = bool(stream)
260261
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
@@ -270,6 +271,9 @@ async def create_openai_response(
270271
if not conversation.startswith("conv_"):
271272
raise InvalidConversationIdError(conversation)
272273

274+
if max_tool_calls is not None and max_tool_calls < 1:
275+
raise ValueError(f"Invalid {max_tool_calls=}; should be >= 1")
276+
273277
stream_gen = self._create_streaming_response(
274278
input=input,
275279
conversation=conversation,
@@ -282,6 +286,7 @@ async def create_openai_response(
282286
tools=tools,
283287
max_infer_iters=max_infer_iters,
284288
guardrail_ids=guardrail_ids,
289+
max_tool_calls=max_tool_calls,
285290
)
286291

287292
if stream:
@@ -331,6 +336,7 @@ async def _create_streaming_response(
331336
tools: list[OpenAIResponseInputTool] | None = None,
332337
max_infer_iters: int | None = 10,
333338
guardrail_ids: list[str] | None = None,
339+
max_tool_calls: int | None = None,
334340
) -> AsyncIterator[OpenAIResponseObjectStream]:
335341
# These should never be None when called from create_openai_response (which sets defaults)
336342
# but we assert here to help mypy understand the types
@@ -373,6 +379,7 @@ async def _create_streaming_response(
373379
safety_api=self.safety_api,
374380
guardrail_ids=guardrail_ids,
375381
instructions=instructions,
382+
max_tool_calls=max_tool_calls,
376383
)
377384

378385
# Stream the response

src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def __init__(
115115
safety_api,
116116
guardrail_ids: list[str] | None = None,
117117
prompt: OpenAIResponsePrompt | None = None,
118+
max_tool_calls: int | None = None,
118119
):
119120
self.inference_api = inference_api
120121
self.ctx = ctx
@@ -126,6 +127,10 @@ def __init__(
126127
self.safety_api = safety_api
127128
self.guardrail_ids = guardrail_ids or []
128129
self.prompt = prompt
130+
# System message that is inserted into the model's context
131+
self.instructions = instructions
132+
# Max number of total calls to built-in tools that can be processed in a response
133+
self.max_tool_calls = max_tool_calls
129134
self.sequence_number = 0
130135
# Store MCP tool mapping that gets built during tool processing
131136
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
@@ -139,8 +144,8 @@ def __init__(
139144
self.accumulated_usage: OpenAIResponseUsage | None = None
140145
# Track if we've sent a refusal response
141146
self.violation_detected = False
142-
# system message that is inserted into the model's context
143-
self.instructions = instructions
147+
# Track total calls made to built-in tools
148+
self.accumulated_builtin_tool_calls = 0
144149

145150
async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
146151
"""Create a refusal response to replace streaming content."""
@@ -186,6 +191,7 @@ def _snapshot_response(
186191
usage=self.accumulated_usage,
187192
instructions=self.instructions,
188193
prompt=self.prompt,
194+
max_tool_calls=self.max_tool_calls,
189195
)
190196

191197
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
@@ -894,6 +900,11 @@ async def _coordinate_tool_execution(
894900
"""Coordinate execution of both function and non-function tool calls."""
895901
# Execute non-function tool calls
896902
for tool_call in non_function_tool_calls:
903+
# Check if total calls made to built-in and mcp tools exceed max_tool_calls
904+
if self.max_tool_calls is not None and self.accumulated_builtin_tool_calls >= self.max_tool_calls:
905+
logger.info(f"Ignoring built-in and mcp tool call since reached the limit of {self.max_tool_calls=}.")
906+
break
907+
897908
# Find the item_id for this tool call
898909
matching_item_id = None
899910
for index, item_id in completion_result_data.tool_call_item_ids.items():
@@ -974,6 +985,9 @@ async def _coordinate_tool_execution(
974985
if tool_response_message:
975986
next_turn_messages.append(tool_response_message)
976987

988+
# Track number of calls made to built-in and mcp tools
989+
self.accumulated_builtin_tool_calls += 1
990+
977991
# Execute function tool calls (client-side)
978992
for tool_call in function_tool_calls:
979993
# Find the item_id for this tool call from our tracking dictionary

0 commit comments

Comments
 (0)