Skip to content

Commit 899c00c

Browse files
committed
Base implementation of non-streaming Responses API
1 parent b52ea28 commit 899c00c

File tree

5 files changed

+933
-13
lines changed

5 files changed

+933
-13
lines changed

src/app/endpoints/query.py

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -214,26 +214,33 @@ async def get_topic_summary(
214214
)
215215

216216

217-
@router.post("/query", responses=query_response)
218-
@authorize(Action.QUERY)
219-
async def query_endpoint_handler( # pylint: disable=R0914
217+
async def query_endpoint_handler_base( # pylint: disable=R0914
220218
request: Request,
221219
query_request: QueryRequest,
222220
auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
223-
mcp_headers: dict[str, dict[str, str]] = Depends(mcp_headers_dependency),
221+
mcp_headers: dict[str, dict[str, str]],
222+
retrieve_response_func: Any,
223+
get_topic_summary_func: Any,
224224
) -> QueryResponse:
225225
"""
226-
Handle request to the /query endpoint.
226+
Base handler for query endpoints (shared by Agent API and Responses API).
227227
228-
Processes a POST request to the /query endpoint, forwarding the
229-
user's query to a selected Llama Stack LLM or agent and
230-
returning the generated response.
228+
Processes a POST request to a query endpoint, forwarding the
229+
user's query to a selected Llama Stack LLM and returning the generated response.
231230
232231
Validates configuration and authentication, selects the appropriate model
233232
and provider, retrieves the LLM response, updates metrics, and optionally
234233
stores a transcript of the interaction. Handles connection errors to the
235234
Llama Stack service by returning an HTTP 500 error.
236235
236+
Args:
237+
request: The FastAPI request object
238+
query_request: The query request containing the user's question
239+
auth: Authentication tuple from dependency
240+
mcp_headers: MCP headers from dependency
241+
retrieve_response_func: The retrieve_response function to use (Agent or Responses API)
242+
get_topic_summary_func: The get_topic_summary function to use (Agent or Responses API)
243+
237244
Returns:
238245
QueryResponse: Contains the conversation ID and the LLM-generated response.
239246
"""
@@ -288,7 +295,7 @@ async def query_endpoint_handler( # pylint: disable=R0914
288295
),
289296
)
290297
summary, conversation_id, referenced_documents, token_usage = (
291-
await retrieve_response(
298+
await retrieve_response_func(
292299
client,
293300
llama_stack_model_id,
294301
query_request,
@@ -305,8 +312,8 @@ async def query_endpoint_handler( # pylint: disable=R0914
305312
session.query(UserConversation).filter_by(id=conversation_id).first()
306313
)
307314
if not existing_conversation:
308-
topic_summary = await get_topic_summary(
309-
query_request.query, client, model_id
315+
topic_summary = await get_topic_summary_func(
316+
query_request.query, client, llama_stack_model_id
310317
)
311318
# Convert RAG chunks to dictionary format once for reuse
312319
logger.info("Processing RAG chunks...")
@@ -416,6 +423,33 @@ async def query_endpoint_handler( # pylint: disable=R0914
416423
) from e
417424

418425

426+
@router.post("/query", responses=query_response)
427+
@authorize(Action.QUERY)
428+
async def query_endpoint_handler(
429+
request: Request,
430+
query_request: QueryRequest,
431+
auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
432+
mcp_headers: dict[str, dict[str, str]] = Depends(mcp_headers_dependency),
433+
) -> QueryResponse:
434+
"""
435+
Handle request to the /query endpoint using Agent API.
436+
437+
This is a wrapper around query_endpoint_handler_base that provides
438+
the Agent API specific retrieve_response and get_topic_summary functions.
439+
440+
Returns:
441+
QueryResponse: Contains the conversation ID and the LLM-generated response.
442+
"""
443+
return await query_endpoint_handler_base(
444+
request=request,
445+
query_request=query_request,
446+
auth=auth,
447+
mcp_headers=mcp_headers,
448+
retrieve_response_func=retrieve_response,
449+
get_topic_summary_func=get_topic_summary,
450+
)
451+
452+
419453
def select_model_and_provider_id(
420454
models: ModelListResponse, model_id: str | None, provider_id: str | None
421455
) -> tuple[str, str, str]:

0 commit comments

Comments
 (0)