Skip to content

Commit 1acf3da

Browse files
umagoradofuchs
authored andcommitted
Delay setup_model_metrics() until /metrics is called
This patch moves the setup_model_metrics() from the service startup to the first time the /metrics endpoint is called. This speed up the lightspeed-stack service initialization and also make lightspeed-stack more resilient regarding service initialization order because it no longer requires llama-stack to be started first (as setup_model_metrics() tries to connect to llama-stack and fetch the list of models from it). Signed-off-by: Lucas Alvares Gomes <[email protected]> build actual image instead of using the actual one fix tests
1 parent cafb3ef commit 1acf3da

File tree

8 files changed

+42
-13
lines changed

8 files changed

+42
-13
lines changed

Containerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ RUN pip3.12 install uv
1919
COPY ${LSC_SOURCE_DIR}/src ./src
2020
COPY ${LSC_SOURCE_DIR}/pyproject.toml ${LSC_SOURCE_DIR}/LICENSE ${LSC_SOURCE_DIR}/README.md ${LSC_SOURCE_DIR}/uv.lock ./
2121

22-
RUN uv sync --locked --no-install-project --no-dev
22+
RUN uv sync --locked --no-dev
2323

2424

2525
# Final image without uv package manager

docker-compose.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ services:
1414
- lightspeednet
1515

1616
lightspeed-stack:
17-
image: quay.io/lightspeed-core/lightspeed-stack:latest
17+
build:
18+
context: .
19+
dockerfile: Containerfile
1820
container_name: lightspeed-stack
1921
ports:
2022
- "8080:8080"
@@ -29,4 +31,4 @@ services:
2931

3032
networks:
3133
lightspeednet:
32-
driver: bridge
34+
driver: bridge

src/app/endpoints/metrics.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,15 @@
77
CONTENT_TYPE_LATEST,
88
)
99

10+
from metrics.utils import setup_model_metrics
11+
1012
router = APIRouter(tags=["metrics"])
1113

1214

1315
@router.get("/metrics", response_class=PlainTextResponse)
14-
def metrics_endpoint_handler(_request: Request) -> PlainTextResponse:
16+
async def metrics_endpoint_handler(_request: Request) -> PlainTextResponse:
1517
"""Handle request to the /metrics endpoint."""
18+
# Setup the model metrics if not already done. This is a one-time setup
19+
# and will not be run again on subsequent calls to this endpoint
20+
await setup_model_metrics()
1621
return PlainTextResponse(generate_latest(), media_type=CONTENT_TYPE_LATEST)

src/app/main.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from configuration import configuration
1111
from log import get_logger
1212
import metrics
13-
from metrics.utils import setup_model_metrics
1413
from utils.common import register_mcp_servers_async
1514
import version
1615

@@ -81,6 +80,4 @@ async def startup_event() -> None:
8180
logger.info("Registering MCP servers")
8281
await register_mcp_servers_async(logger, configuration.configuration)
8382
get_logger("app.endpoints.handlers")
84-
logger.info("Setting up model metrics")
85-
await setup_model_metrics()
8683
logger.info("App startup complete")

src/metrics/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44
from client import LlamaStackClientHolder, AsyncLlamaStackClientHolder
55
from log import get_logger
66
import metrics
7+
from utils.common import run_once_async
78

89
logger = get_logger(__name__)
910

1011

12+
@run_once_async
1113
async def setup_model_metrics() -> None:
1214
"""Perform setup of all metrics related to LLM model and provider."""
15+
logger.info("Setting up model metrics")
1316
model_list = []
1417
if configuration.llama_stack_configuration.use_as_library_client:
1518
model_list = await AsyncLlamaStackClientHolder().get_client().models.list()
@@ -48,3 +51,4 @@ async def setup_model_metrics() -> None:
4851
model_name,
4952
default_model_value,
5053
)
54+
logger.info("Model metrics setup complete")

src/utils/common.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
"""Common utilities for the project."""
22

3-
from typing import Any, List, cast
3+
import asyncio
4+
from functools import wraps
45
from logging import Logger
6+
from typing import Any, List, cast, Callable
57

68
from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
7-
89
from llama_stack.distribution.library_client import (
910
AsyncLlamaStackAsLibraryClient,
1011
)
@@ -103,3 +104,18 @@ def _register_mcp_toolgroups_sync(
103104

104105
client.toolgroups.register(**registration_params)
105106
logger.debug("MCP server %s registered successfully", mcp.name)
107+
108+
109+
def run_once_async(func: Callable) -> Callable:
110+
"""Decorate an async function to run only once."""
111+
task = None
112+
113+
@wraps(func)
114+
async def wrapper(*args: Any, **kwargs: Any) -> Any:
115+
nonlocal task
116+
if task is None:
117+
loop = asyncio.get_running_loop()
118+
task = loop.create_task(func(*args, **kwargs))
119+
return await task
120+
121+
return wrapper

tests/e2e/features/rest_api.feature

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Feature: REST API tests
1010

1111
Scenario: Check if service report proper readiness state
1212
Given the system is in default state
13-
When I access REST API endpoint "readiness" using HTTP GET method
13+
When I access endpoint "readiness" using HTTP GET method
1414
Then The status code of the response is 200
1515
And The body of the response has the following schema
1616
"""
@@ -28,7 +28,7 @@ Feature: REST API tests
2828

2929
Scenario: Check if service report proper liveness state
3030
Given the system is in default state
31-
When I access REST API endpoint "liveness" using HTTP GET method
31+
When I access endpoint "liveness" using HTTP GET method
3232
Then The status code of the response is 200
3333
And The body of the response has the following schema
3434
"""

tests/unit/app/endpoints/test_metrics.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,20 @@
33
from app.endpoints.metrics import metrics_endpoint_handler
44

55

6-
def test_metrics_endpoint():
6+
async def test_metrics_endpoint(mocker):
77
"""Test the metrics endpoint handler."""
8-
response = metrics_endpoint_handler(None)
8+
mock_setup_metrics = mocker.patch(
9+
"app.endpoints.metrics.setup_model_metrics", return_value=None
10+
)
11+
response = await metrics_endpoint_handler(None)
912
assert response is not None
1013
assert response.status_code == 200
1114
assert "text/plain" in response.headers["Content-Type"]
1215

1316
response_body = response.body.decode()
1417

18+
# Assert metrics were set up
19+
mock_setup_metrics.assert_called_once()
1520
# Check if the response contains Prometheus metrics format
1621
assert "# TYPE ls_rest_api_calls_total counter" in response_body
1722
assert "# TYPE ls_response_duration_seconds histogram" in response_body

0 commit comments

Comments
 (0)