You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Help: "Inference model number of running requests in each model.",
130
+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Inference model number of running requests in each model."),
128
131
},
129
132
[]string{"model_name"},
130
133
)
@@ -134,7 +137,7 @@ var (
134
137
prometheus.HistogramOpts{
135
138
Subsystem: InferenceModelComponent,
136
139
Name: "normalized_time_per_output_token_seconds",
137
-
Help: "Inference model latency divided by number of output tokens in seconds for each model and target model.",
140
+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Inference model latency divided by number of output tokens in seconds for each model and target model."),
138
141
// From few milliseconds per token to multiple seconds per token
Help: "Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.",
194
+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name."),
Copy file name to clipboardExpand all lines: pkg/epp/metrics/testdata/normalized_time_per_output_token_seconds_metric
+1-1
Original file line number
Diff line number
Diff line change
@@ -1,4 +1,4 @@
1
-
# HELP inference_model_normalized_time_per_output_token_seconds Inference model latency divided by number of output tokens in seconds for each model and target model.
1
+
# HELP inference_model_normalized_time_per_output_token_seconds [ALPHA] Inference model latency divided by number of output tokens in seconds for each model and target model.
2
2
# TYPE inference_model_normalized_time_per_output_token_seconds histogram
Copy file name to clipboardExpand all lines: pkg/epp/metrics/testdata/scheduler_plugin_processing_latencies_metric
+1-1
Original file line number
Diff line number
Diff line change
@@ -1,4 +1,4 @@
1
-
# HELP inference_extension_scheduler_plugin_duration_seconds Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.
1
+
# HELP inference_extension_scheduler_plugin_duration_seconds [ALPHA] Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.
2
2
# TYPE inference_extension_scheduler_plugin_duration_seconds histogram
0 commit comments