@@ -214,26 +214,33 @@ async def get_topic_summary(
214214 )
215215
216216
217- @router .post ("/query" , responses = query_response )
218- @authorize (Action .QUERY )
219- async def query_endpoint_handler ( # pylint: disable=R0914
217+ async def query_endpoint_handler_base ( # pylint: disable=R0914
220218 request : Request ,
221219 query_request : QueryRequest ,
222220 auth : Annotated [AuthTuple , Depends (get_auth_dependency ())],
223- mcp_headers : dict [str , dict [str , str ]] = Depends (mcp_headers_dependency ),
221+ mcp_headers : dict [str , dict [str , str ]],
222+ retrieve_response_func : Any ,
223+ get_topic_summary_func : Any ,
224224) -> QueryResponse :
225225 """
226- Handle request to the / query endpoint .
226+ Base handler for query endpoints (shared by Agent API and Responses API) .
227227
228- Processes a POST request to the /query endpoint, forwarding the
229- user's query to a selected Llama Stack LLM or agent and
230- returning the generated response.
228+ Processes a POST request to a query endpoint, forwarding the
229+ user's query to a selected Llama Stack LLM and returning the generated response.
231230
232231 Validates configuration and authentication, selects the appropriate model
233232 and provider, retrieves the LLM response, updates metrics, and optionally
234233 stores a transcript of the interaction. Handles connection errors to the
235234 Llama Stack service by returning an HTTP 500 error.
236235
236+ Args:
237+ request: The FastAPI request object
238+ query_request: The query request containing the user's question
239+ auth: Authentication tuple from dependency
240+ mcp_headers: MCP headers from dependency
241+ retrieve_response_func: The retrieve_response function to use (Agent or Responses API)
242+ get_topic_summary_func: The get_topic_summary function to use (Agent or Responses API)
243+
237244 Returns:
238245 QueryResponse: Contains the conversation ID and the LLM-generated response.
239246 """
@@ -288,7 +295,7 @@ async def query_endpoint_handler( # pylint: disable=R0914
288295 ),
289296 )
290297 summary , conversation_id , referenced_documents , token_usage = (
291- await retrieve_response (
298+ await retrieve_response_func (
292299 client ,
293300 llama_stack_model_id ,
294301 query_request ,
@@ -305,8 +312,8 @@ async def query_endpoint_handler( # pylint: disable=R0914
305312 session .query (UserConversation ).filter_by (id = conversation_id ).first ()
306313 )
307314 if not existing_conversation :
308- topic_summary = await get_topic_summary (
309- query_request .query , client , model_id
315+ topic_summary = await get_topic_summary_func (
316+ query_request .query , client , llama_stack_model_id
310317 )
311318 # Convert RAG chunks to dictionary format once for reuse
312319 logger .info ("Processing RAG chunks..." )
@@ -416,6 +423,33 @@ async def query_endpoint_handler( # pylint: disable=R0914
416423 ) from e
417424
418425
426+ @router .post ("/query" , responses = query_response )
427+ @authorize (Action .QUERY )
428+ async def query_endpoint_handler (
429+ request : Request ,
430+ query_request : QueryRequest ,
431+ auth : Annotated [AuthTuple , Depends (get_auth_dependency ())],
432+ mcp_headers : dict [str , dict [str , str ]] = Depends (mcp_headers_dependency ),
433+ ) -> QueryResponse :
434+ """
435+ Handle request to the /query endpoint using Agent API.
436+
437+ This is a wrapper around query_endpoint_handler_base that provides
438+ the Agent API specific retrieve_response and get_topic_summary functions.
439+
440+ Returns:
441+ QueryResponse: Contains the conversation ID and the LLM-generated response.
442+ """
443+ return await query_endpoint_handler_base (
444+ request = request ,
445+ query_request = query_request ,
446+ auth = auth ,
447+ mcp_headers = mcp_headers ,
448+ retrieve_response_func = retrieve_response ,
449+ get_topic_summary_func = get_topic_summary ,
450+ )
451+
452+
419453def select_model_and_provider_id (
420454 models : ModelListResponse , model_id : str | None , provider_id : str | None
421455) -> tuple [str , str , str ]:
0 commit comments