Skip to content

Commit 12cebf6

Browse files
committed
fix: add the stability level to the help message of the metric
1 parent e8973c9 commit 12cebf6

16 files changed

+47
-42
lines changed

pkg/bbr/metrics/metrics.go

+5-3
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@ limitations under the License.
1717
package metrics
1818

1919
import (
20+
"fmt"
2021
"sync"
2122

2223
"github.com/prometheus/client_golang/prometheus"
24+
compbasemetrics "k8s.io/component-base/metrics"
2325
"sigs.k8s.io/controller-runtime/pkg/metrics"
2426
)
2527

@@ -30,23 +32,23 @@ var (
3032
prometheus.CounterOpts{
3133
Subsystem: component,
3234
Name: "success_total",
33-
Help: "Count of successes pulling model name from body and injecting it in the request headers.",
35+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Count of successes pulling model name from body and injecting it in the request headers."),
3436
},
3537
[]string{},
3638
)
3739
modelNotInBodyCounter = prometheus.NewCounterVec(
3840
prometheus.CounterOpts{
3941
Subsystem: component,
4042
Name: "model_not_in_body_total",
41-
Help: "Count of times the model was not present in the request body.",
43+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Count of times the model was not present in the request body."),
4244
},
4345
[]string{},
4446
)
4547
modelNotParsedCounter = prometheus.NewCounterVec(
4648
prometheus.CounterOpts{
4749
Subsystem: component,
4850
Name: "model_not_parsed_total",
49-
Help: "Count of times the model was in the request body but we could not parse it.",
51+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Count of times the model was in the request body but we could not parse it."),
5052
},
5153
[]string{},
5254
)

pkg/epp/metrics/metrics.go

+20-17
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,15 @@ package metrics
1818

1919
import (
2020
"context"
21+
"fmt"
2122
"sync"
2223
"time"
2324

2425
"github.com/prometheus/client_golang/prometheus"
26+
compbasemetrics "k8s.io/component-base/metrics"
2527
"sigs.k8s.io/controller-runtime/pkg/log"
2628
"sigs.k8s.io/controller-runtime/pkg/metrics"
29+
2730
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
2831
)
2932

@@ -44,7 +47,7 @@ var (
4447
prometheus.CounterOpts{
4548
Subsystem: InferenceModelComponent,
4649
Name: "request_total",
47-
Help: "Counter of inference model requests broken out for each model and target model.",
50+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Counter of inference model requests broken out for each model and target model."),
4851
},
4952
[]string{"model_name", "target_model_name"},
5053
)
@@ -53,7 +56,7 @@ var (
5356
prometheus.CounterOpts{
5457
Subsystem: InferenceModelComponent,
5558
Name: "request_error_total",
56-
Help: "Counter of inference model requests errors broken out for each model and target model.",
59+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Counter of inference model requests errors broken out for each model and target model."),
5760
},
5861
[]string{"model_name", "target_model_name", "error_code"},
5962
)
@@ -62,7 +65,7 @@ var (
6265
prometheus.HistogramOpts{
6366
Subsystem: InferenceModelComponent,
6467
Name: "request_duration_seconds",
65-
Help: "Inference model response latency distribution in seconds for each model and target model.",
68+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Inference model response latency distribution in seconds for each model and target model."),
6669
Buckets: []float64{
6770
0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
6871
4, 5, 6, 8, 10, 15, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600,
@@ -75,7 +78,7 @@ var (
7578
prometheus.HistogramOpts{
7679
Subsystem: InferenceModelComponent,
7780
Name: "request_sizes",
78-
Help: "Inference model requests size distribution in bytes for each model and target model.",
81+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Inference model requests size distribution in bytes for each model and target model."),
7982
// Use buckets ranging from 1000 bytes (1KB) to 10^9 bytes (1GB).
8083
Buckets: []float64{
8184
64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, // More fine-grained up to 64KB
@@ -90,7 +93,7 @@ var (
9093
prometheus.HistogramOpts{
9194
Subsystem: InferenceModelComponent,
9295
Name: "response_sizes",
93-
Help: "Inference model responses size distribution in bytes for each model and target model.",
96+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Inference model responses size distribution in bytes for each model and target model."),
9497
// Most models have a response token < 8192 tokens. Each token, in average, has 4 characters.
9598
// 8192 * 4 = 32768.
9699
Buckets: []float64{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32778, 65536},
@@ -102,7 +105,7 @@ var (
102105
prometheus.HistogramOpts{
103106
Subsystem: InferenceModelComponent,
104107
Name: "input_tokens",
105-
Help: "Inference model input token count distribution for requests in each model.",
108+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Inference model input token count distribution for requests in each model."),
106109
// Most models have a input context window less than 1 million tokens.
107110
Buckets: []float64{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32778, 65536, 131072, 262144, 524288, 1048576},
108111
},
@@ -113,7 +116,7 @@ var (
113116
prometheus.HistogramOpts{
114117
Subsystem: InferenceModelComponent,
115118
Name: "output_tokens",
116-
Help: "Inference model output token count distribution for requests in each model.",
119+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Inference model output token count distribution for requests in each model."),
117120
// Most models generates output less than 8192 tokens.
118121
Buckets: []float64{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192},
119122
},
@@ -124,7 +127,7 @@ var (
124127
prometheus.GaugeOpts{
125128
Subsystem: InferenceModelComponent,
126129
Name: "running_requests",
127-
Help: "Inference model number of running requests in each model.",
130+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Inference model number of running requests in each model."),
128131
},
129132
[]string{"model_name"},
130133
)
@@ -134,7 +137,7 @@ var (
134137
prometheus.HistogramOpts{
135138
Subsystem: InferenceModelComponent,
136139
Name: "normalized_time_per_output_token_seconds",
137-
Help: "Inference model latency divided by number of output tokens in seconds for each model and target model.",
140+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Inference model latency divided by number of output tokens in seconds for each model and target model."),
138141
// From few milliseconds per token to multiple seconds per token
139142
Buckets: []float64{
140143
0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0,
@@ -148,7 +151,7 @@ var (
148151
prometheus.GaugeOpts{
149152
Subsystem: InferencePoolComponent,
150153
Name: "average_kv_cache_utilization",
151-
Help: "The average kv cache utilization for an inference server pool.",
154+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "The average kv cache utilization for an inference server pool."),
152155
},
153156
[]string{"name"},
154157
)
@@ -157,7 +160,7 @@ var (
157160
prometheus.GaugeOpts{
158161
Subsystem: InferencePoolComponent,
159162
Name: "average_queue_size",
160-
Help: "The average number of requests pending in the model server queue.",
163+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "The average number of requests pending in the model server queue."),
161164
},
162165
[]string{"name"},
163166
)
@@ -166,7 +169,7 @@ var (
166169
prometheus.GaugeOpts{
167170
Subsystem: InferencePoolComponent,
168171
Name: "ready_pods",
169-
Help: "The number of ready pods in the inference server pool.",
172+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "The number of ready pods in the inference server pool."),
170173
},
171174
[]string{"name"},
172175
)
@@ -176,19 +179,18 @@ var (
176179
prometheus.HistogramOpts{
177180
Subsystem: InferenceExtension,
178181
Name: "scheduler_e2e_duration_seconds",
179-
Help: "End-to-end scheduling latency distribution in seconds.",
182+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "End-to-end scheduling latency distribution in seconds."),
180183
Buckets: []float64{
181184
0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1,
182185
},
183-
// StabilityLevel: compbasemetrics.ALPHA,
184186
},
185187
[]string{},
186188
)
187189
SchedulerPluginProcessingLatencies = prometheus.NewHistogramVec(
188190
prometheus.HistogramOpts{
189191
Subsystem: InferenceExtension,
190192
Name: "scheduler_plugin_duration_seconds",
191-
Help: "Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.",
193+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name."),
192194
Buckets: []float64{
193195
0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1,
194196
},
@@ -201,8 +203,7 @@ var (
201203
prometheus.GaugeOpts{
202204
Subsystem: InferenceExtension,
203205
Name: "info",
204-
Help: "General information of the current build of Inference Extension.",
205-
// StabilityLevel: compbasemetrics.ALPHA,
206+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "General information of the current build of Inference Extension."),
206207
},
207208
[]string{"commit"},
208209
)
@@ -248,6 +249,8 @@ func Reset() {
248249
inferencePoolAvgQueueSize.Reset()
249250
inferencePoolReadyPods.Reset()
250251
SchedulerPluginProcessingLatencies.Reset()
252+
SchedulerE2ELatency.Reset()
253+
InferenceExtensionInfo.Reset()
251254
}
252255

253256
// RecordRequstCounter records the number of requests.

pkg/epp/metrics/testdata/input_tokens_metric

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HELP inference_model_input_tokens Inference model input token count distribution for requests in each model.
1+
# HELP inference_model_input_tokens [ALPHA] Inference model input token count distribution for requests in each model.
22
# TYPE inference_model_input_tokens histogram
33
inference_model_input_tokens_bucket{model_name="m10",target_model_name="t10",le="1"} 0
44
inference_model_input_tokens_bucket{model_name="m10",target_model_name="t10",le="8"} 0
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
# HELP inference_pool_average_kv_cache_utilization The average kv cache utilization for an inference server pool.
1+
# HELP inference_pool_average_kv_cache_utilization [ALPHA] The average kv cache utilization for an inference server pool.
22
# TYPE inference_pool_average_kv_cache_utilization gauge
33
inference_pool_average_kv_cache_utilization{name="p1"} 0.3

pkg/epp/metrics/testdata/normalized_time_per_output_token_seconds_metric

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HELP inference_model_normalized_time_per_output_token_seconds Inference model latency divided by number of output tokens in seconds for each model and target model.
1+
# HELP inference_model_normalized_time_per_output_token_seconds [ALPHA] Inference model latency divided by number of output tokens in seconds for each model and target model.
22
# TYPE inference_model_normalized_time_per_output_token_seconds histogram
33
inference_model_normalized_time_per_output_token_seconds_bucket{model_name="m10", target_model_name="t10", le="0.001"} 0
44
inference_model_normalized_time_per_output_token_seconds_bucket{model_name="m10", target_model_name="t10", le="0.002"} 0

pkg/epp/metrics/testdata/output_tokens_metric

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HELP inference_model_output_tokens Inference model output token count distribution for requests in each model.
1+
# HELP inference_model_output_tokens [ALPHA] Inference model output token count distribution for requests in each model.
22
# TYPE inference_model_output_tokens histogram
33
inference_model_output_tokens_bucket{model_name="m10",target_model_name="t10",le="1"} 0
44
inference_model_output_tokens_bucket{model_name="m10",target_model_name="t10",le="8"} 0
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
# HELP inference_pool_average_queue_size The average number of requests pending in the model server queue.
1+
# HELP inference_pool_average_queue_size [ALPHA] The average number of requests pending in the model server queue.
22
# TYPE inference_pool_average_queue_size gauge
33
inference_pool_average_queue_size{name="p1"} 0.4

pkg/epp/metrics/testdata/request_duration_seconds_metric

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HELP inference_model_request_duration_seconds Inference model response latency distribution in seconds for each model and target model.
1+
# HELP inference_model_request_duration_seconds [ALPHA] Inference model response latency distribution in seconds for each model and target model.
22
# TYPE inference_model_request_duration_seconds histogram
33
inference_model_request_duration_seconds_bucket{model_name="m10", target_model_name="t10", le="0.005"} 0
44
inference_model_request_duration_seconds_bucket{model_name="m10", target_model_name="t10", le="0.025"} 1

pkg/epp/metrics/testdata/request_error_total_metric

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HELP inference_model_request_error_total Counter of inference model requests errors broken out for each model and target model.
1+
# HELP inference_model_request_error_total [ALPHA] Counter of inference model requests errors broken out for each model and target model.
22
# TYPE inference_model_request_error_total counter
33
inference_model_request_error_total{error_code="Internal", model_name="m10",target_model_name="t10"} 2
44
inference_model_request_error_total{error_code="ModelServerError", model_name="m10",target_model_name="t11"} 1

pkg/epp/metrics/testdata/request_sizes_metric

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HELP inference_model_request_sizes Inference model requests size distribution in bytes for each model and target model.
1+
# HELP inference_model_request_sizes [ALPHA] Inference model requests size distribution in bytes for each model and target model.
22
# TYPE inference_model_request_sizes histogram
33
inference_model_request_sizes_bucket{model_name="m10",target_model_name="t10",le="64"} 0
44
inference_model_request_sizes_bucket{model_name="m10",target_model_name="t10",le="128"} 0

pkg/epp/metrics/testdata/request_total_metric

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HELP inference_model_request_total Counter of inference model requests broken out for each model and target model.
1+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
22
# TYPE inference_model_request_total counter
33
inference_model_request_total{model_name="m10", target_model_name="t10"} 2
44
inference_model_request_total{model_name="m10", target_model_name="t11"} 1

pkg/epp/metrics/testdata/response_sizes_metric

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HELP inference_model_response_sizes Inference model responses size distribution in bytes for each model and target model.
1+
# HELP inference_model_response_sizes [ALPHA] Inference model responses size distribution in bytes for each model and target model.
22
# TYPE inference_model_response_sizes histogram
33
inference_model_response_sizes_bucket{model_name="m10",target_model_name="t10",le="1"} 0
44
inference_model_response_sizes_bucket{model_name="m10",target_model_name="t10",le="8"} 0
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HELP inference_model_running_requests Inference model number of running requests in each model.
1+
# HELP inference_model_running_requests [ALPHA] Inference model number of running requests in each model.
22
# TYPE inference_model_running_requests gauge
33
inference_model_running_requests{model_name="m1"} 1
44
inference_model_running_requests{model_name="m2"} 1

pkg/epp/metrics/testdata/scheduler_e2e_duration_seconds_metric

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HELP inference_extension_scheduler_e2e_duration_seconds End-to-end scheduling latency distribution in seconds.
1+
# HELP inference_extension_scheduler_e2e_duration_seconds [ALPHA] End-to-end scheduling latency distribution in seconds.
22
# TYPE inference_extension_scheduler_e2e_duration_seconds histogram
33
inference_extension_scheduler_e2e_duration_seconds_bucket{le="0.0001"} 0
44
inference_extension_scheduler_e2e_duration_seconds_bucket{le="0.0002"} 1

pkg/epp/metrics/testdata/scheduler_plugin_processing_latencies_metric

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HELP inference_extension_scheduler_plugin_duration_seconds Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.
1+
# HELP inference_extension_scheduler_plugin_duration_seconds [ALPHA] Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.
22
# TYPE inference_extension_scheduler_plugin_duration_seconds histogram
33
inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginA",plugin_type="PreSchedule",le="0.0001"} 0
44
inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginA",plugin_type="PreSchedule",le="0.0002"} 0

0 commit comments

Comments
 (0)