Skip to content

Commit a6ee559

Browse files
authored
Add scheduler e2e latency metric (#767)
1 parent a04d395 commit a6ee559

File tree

5 files changed

+151
-71
lines changed

5 files changed

+151
-71
lines changed

pkg/epp/metrics/metrics.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ import (
3030
const (
3131
InferenceModelComponent = "inference_model"
3232
InferencePoolComponent = "inference_pool"
33-
EPPComponent = "endpoint_picker"
3433
InferenceExtension = "inference_extension"
3534
)
3635

@@ -184,10 +183,22 @@ var (
184183
[]string{"name"},
185184
)
186185

187-
// Scheduler Plugin Metrics
186+
// Scheduler Metrics
187+
SchedulerE2ELatency = compbasemetrics.NewHistogramVec(
188+
&compbasemetrics.HistogramOpts{
189+
Subsystem: InferenceExtension,
190+
Name: "scheduler_e2e_duration_seconds",
191+
Help: "End-to-end scheduling latency distribution in seconds.",
192+
Buckets: []float64{
193+
0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1,
194+
},
195+
StabilityLevel: compbasemetrics.ALPHA,
196+
},
197+
[]string{},
198+
)
188199
SchedulerPluginProcessingLatencies = compbasemetrics.NewHistogramVec(
189200
&compbasemetrics.HistogramOpts{
190-
Subsystem: EPPComponent,
201+
Subsystem: InferenceExtension,
191202
Name: "scheduler_plugin_duration_seconds",
192203
Help: "Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.",
193204
Buckets: []float64{
@@ -230,6 +241,7 @@ func Register() {
230241
legacyregistry.MustRegister(inferencePoolReadyPods)
231242

232243
legacyregistry.MustRegister(SchedulerPluginProcessingLatencies)
244+
legacyregistry.MustRegister(SchedulerE2ELatency)
233245

234246
legacyregistry.MustRegister(InferenceExtensionInfo)
235247
})
@@ -335,6 +347,11 @@ func RecordSchedulerPluginProcessingLatency(pluginType, pluginName string, durat
335347
SchedulerPluginProcessingLatencies.WithLabelValues(pluginType, pluginName).Observe(duration.Seconds())
336348
}
337349

350+
// RecordSchedulerE2ELatency records the end-to-end scheduling latency.
351+
func RecordSchedulerE2ELatency(duration time.Duration) {
352+
SchedulerE2ELatency.WithLabelValues().Observe(duration.Seconds())
353+
}
354+
338355
func RecordInferenceExtensionInfo() {
339356
if CommitSHA != "" {
340357
InferenceExtensionInfo.WithLabelValues(CommitSHA).Set(1)

pkg/epp/metrics/metrics_test.go

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,50 @@ func TestSchedulerPluginProcessingLatencies(t *testing.T) {
614614
if err != nil {
615615
t.Fatal(err)
616616
}
617-
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, wantPluginLatencies, "endpoint_picker_scheduler_plugin_processing_latencies"); err != nil {
617+
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, wantPluginLatencies, "inference_extension_scheduler_plugin_duration_seconds"); err != nil {
618+
t.Error(err)
619+
}
620+
})
621+
}
622+
}
623+
624+
func TestSchedulerE2ELatency(t *testing.T) {
625+
scenarios := []struct {
626+
name string
627+
durations []time.Duration
628+
}{
629+
{
630+
name: "multiple scheduling latencies",
631+
durations: []time.Duration{
632+
200 * time.Microsecond, // 0.00014s - should go in the 0.0002 bucket
633+
800 * time.Microsecond, // 0.0008s - should go in the 0.001 bucket
634+
1500 * time.Microsecond, // 0.0015s - should go in the 0.002 bucket
635+
3 * time.Millisecond, // 0.003s - should go in the 0.005 bucket
636+
8 * time.Millisecond, // 0.008s - should go in the 0.01 bucket
637+
15 * time.Millisecond, // 0.015s - should go in the 0.02 bucket
638+
30 * time.Millisecond, // 0.03s - should go in the 0.05 bucket
639+
75 * time.Millisecond, // 0.075s - should go in the 0.1 bucket
640+
150 * time.Millisecond, // 0.15s - should go in the +Inf bucket
641+
},
642+
},
643+
}
644+
Register()
645+
for _, scenario := range scenarios {
646+
t.Run(scenario.name, func(t *testing.T) {
647+
for _, duration := range scenario.durations {
648+
RecordSchedulerE2ELatency(duration)
649+
}
650+
651+
wantE2ELatency, err := os.Open("testdata/scheduler_e2e_duration_seconds_metric")
652+
defer func() {
653+
if err := wantE2ELatency.Close(); err != nil {
654+
t.Error(err)
655+
}
656+
}()
657+
if err != nil {
658+
t.Fatal(err)
659+
}
660+
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, wantE2ELatency, "inference_extension_scheduler_e2e_duration_seconds"); err != nil {
618661
t.Error(err)
619662
}
620663
})
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# HELP inference_extension_scheduler_e2e_duration_seconds [ALPHA] End-to-end scheduling latency distribution in seconds.
2+
# TYPE inference_extension_scheduler_e2e_duration_seconds histogram
3+
inference_extension_scheduler_e2e_duration_seconds_bucket{le="0.0001"} 0
4+
inference_extension_scheduler_e2e_duration_seconds_bucket{le="0.0002"} 1
5+
inference_extension_scheduler_e2e_duration_seconds_bucket{le="0.0005"} 1
6+
inference_extension_scheduler_e2e_duration_seconds_bucket{le="0.001"} 2
7+
inference_extension_scheduler_e2e_duration_seconds_bucket{le="0.002"} 3
8+
inference_extension_scheduler_e2e_duration_seconds_bucket{le="0.005"} 4
9+
inference_extension_scheduler_e2e_duration_seconds_bucket{le="0.01"} 5
10+
inference_extension_scheduler_e2e_duration_seconds_bucket{le="0.02"} 6
11+
inference_extension_scheduler_e2e_duration_seconds_bucket{le="0.05"} 7
12+
inference_extension_scheduler_e2e_duration_seconds_bucket{le="0.1"} 8
13+
inference_extension_scheduler_e2e_duration_seconds_bucket{le="+Inf"} 9
14+
inference_extension_scheduler_e2e_duration_seconds_sum{} 0.2835
15+
inference_extension_scheduler_e2e_duration_seconds_count{} 9

0 commit comments

Comments
 (0)