Skip to content

Commit 576341d

Browse files
committed
Delay setup_model_metrics() until /metrics is called
This patch moves the setup_model_metrics() from the service startup to the first time the /metrics endpoint is called. This speed up the lightspeed-stack service initialization and also make lightspeed-stack more resilient regarding service initialization order because it no longer requires llama-stack to be started first (as setup_model_metrics() tries to connect to llama-stack and fetch the list of models from it). Signed-off-by: Lucas Alvares Gomes <[email protected]>
1 parent 423272b commit 576341d

File tree

5 files changed

+35
-8
lines changed

5 files changed

+35
-8
lines changed

src/app/endpoints/metrics.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,15 @@
77
CONTENT_TYPE_LATEST,
88
)
99

10+
from metrics.utils import setup_model_metrics
11+
1012
router = APIRouter(tags=["metrics"])
1113

1214

1315
@router.get("/metrics", response_class=PlainTextResponse)
14-
def metrics_endpoint_handler(_request: Request) -> PlainTextResponse:
16+
async def metrics_endpoint_handler(_request: Request) -> PlainTextResponse:
1517
"""Handle request to the /metrics endpoint."""
18+
# Setup the model metrics if not already done. This is a one-time setup
19+
# and will not be run again on subsequent calls to this endpoint
20+
await setup_model_metrics()
1621
return PlainTextResponse(generate_latest(), media_type=CONTENT_TYPE_LATEST)

src/app/main.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from configuration import configuration
1111
from log import get_logger
1212
import metrics
13-
from metrics.utils import setup_model_metrics
1413
from utils.common import register_mcp_servers_async
1514
import version
1615

@@ -81,6 +80,4 @@ async def startup_event() -> None:
8180
logger.info("Registering MCP servers")
8281
await register_mcp_servers_async(logger, configuration.configuration)
8382
get_logger("app.endpoints.handlers")
84-
logger.info("Setting up model metrics")
85-
await setup_model_metrics()
8683
logger.info("App startup complete")

src/metrics/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44
from client import LlamaStackClientHolder, AsyncLlamaStackClientHolder
55
from log import get_logger
66
import metrics
7+
from utils.common import run_once_async
78

89
logger = get_logger(__name__)
910

1011

12+
@run_once_async
1113
async def setup_model_metrics() -> None:
1214
"""Perform setup of all metrics related to LLM model and provider."""
15+
logger.info("Setting up model metrics")
1316
model_list = []
1417
if configuration.llama_stack_configuration.use_as_library_client:
1518
model_list = await AsyncLlamaStackClientHolder().get_client().models.list()
@@ -48,3 +51,4 @@ async def setup_model_metrics() -> None:
4851
model_name,
4952
default_model_value,
5053
)
54+
logger.info("Model metrics setup complete")

src/utils/common.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
"""Common utilities for the project."""
22

3-
from typing import Any, List, cast
3+
import asyncio
4+
from functools import wraps
45
from logging import Logger
6+
from typing import Any, List, cast, Callable
57

68
from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
7-
89
from llama_stack.distribution.library_client import (
910
AsyncLlamaStackAsLibraryClient,
1011
)
@@ -103,3 +104,18 @@ def _register_mcp_toolgroups_sync(
103104

104105
client.toolgroups.register(**registration_params)
105106
logger.debug("MCP server %s registered successfully", mcp.name)
107+
108+
109+
def run_once_async(func: Callable) -> Callable:
110+
"""Decorate an async function to run only once."""
111+
task = None
112+
113+
@wraps(func)
114+
async def wrapper(*args: Any, **kwargs: Any) -> Any:
115+
nonlocal task
116+
if task is None:
117+
loop = asyncio.get_running_loop()
118+
task = loop.create_task(func(*args, **kwargs))
119+
return await task
120+
121+
return wrapper

tests/unit/app/endpoints/test_metrics.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,20 @@
33
from app.endpoints.metrics import metrics_endpoint_handler
44

55

6-
def test_metrics_endpoint():
6+
async def test_metrics_endpoint(mocker):
77
"""Test the metrics endpoint handler."""
8-
response = metrics_endpoint_handler(None)
8+
mock_setup_metrics = mocker.patch(
9+
"app.endpoints.metrics.setup_model_metrics", return_value=None
10+
)
11+
response = await metrics_endpoint_handler(None)
912
assert response is not None
1013
assert response.status_code == 200
1114
assert "text/plain" in response.headers["Content-Type"]
1215

1316
response_body = response.body.decode()
1417

18+
# Assert metrics were set up
19+
mock_setup_metrics.assert_called_once()
1520
# Check if the response contains Prometheus metrics format
1621
assert "# TYPE ls_rest_api_calls_total counter" in response_body
1722
assert "# TYPE ls_response_duration_seconds histogram" in response_body

0 commit comments

Comments
 (0)