Skip to content

Commit a04d395

Browse files
authored
Add queue and kv-cache scorers (#762)
* Add queue and kv-cache scorers * Remove helper function
1 parent 4c7ed4a commit a04d395

File tree

5 files changed

+292
-0
lines changed

5 files changed

+292
-0
lines changed

pkg/epp/scheduling/plugins/picker/max_score_picker.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
117
package picker
218

319
import (
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package scorer
18+
19+
import (
20+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
21+
)
22+
23+
type KVCacheScorer struct{}
24+
25+
func (ss *KVCacheScorer) Name() string {
26+
return "kv-cache"
27+
}
28+
29+
func (ss *KVCacheScorer) Score(ctx *types.SchedulingContext, pods []types.Pod) map[types.Pod]float64 {
30+
scores := make(map[types.Pod]float64, len(pods))
31+
for _, pod := range pods {
32+
scores[pod] = 1 - pod.GetMetrics().KVCacheUsagePercent
33+
}
34+
return scores
35+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package scorer
18+
19+
import (
20+
"context"
21+
"testing"
22+
23+
"github.com/stretchr/testify/assert"
24+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
25+
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
26+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
27+
)
28+
29+
func TestKvCacheScorer(t *testing.T) {
30+
tests := []struct {
31+
name string
32+
pods []types.Pod
33+
expectedScoresPod map[int]float64 // Map of pod index to expected score
34+
}{
35+
{
36+
name: "Different KV cache utilization",
37+
pods: []types.Pod{
38+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{KVCacheUsagePercent: 0.8}},
39+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{KVCacheUsagePercent: 0.5}},
40+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{KVCacheUsagePercent: 0.0}},
41+
},
42+
expectedScoresPod: map[int]float64{
43+
0: 0.2, // Highest KV cache usage (0.8) gets lowest score (1-0.8=0.2)
44+
1: 0.5, // Medium KV cache usage (0.5) gets medium score (1-0.5=0.5)
45+
2: 1.0, // No KV cache usage (0.0) gets highest score (1-0=1.0)
46+
},
47+
},
48+
{
49+
name: "Same KV cache utilization",
50+
pods: []types.Pod{
51+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{KVCacheUsagePercent: 0.6}},
52+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{KVCacheUsagePercent: 0.6}},
53+
},
54+
expectedScoresPod: map[int]float64{
55+
0: 0.4, // Both get same score (1-0.6=0.4)
56+
1: 0.4,
57+
},
58+
},
59+
{
60+
name: "Zero KV cache utilization",
61+
pods: []types.Pod{
62+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{KVCacheUsagePercent: 0.0}},
63+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{KVCacheUsagePercent: 0.0}},
64+
},
65+
expectedScoresPod: map[int]float64{
66+
0: 1.0, // No KV cache usage gets highest score
67+
1: 1.0,
68+
},
69+
},
70+
{
71+
name: "Full KV cache utilization",
72+
pods: []types.Pod{
73+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{KVCacheUsagePercent: 1.0}},
74+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{KVCacheUsagePercent: 0.5}},
75+
},
76+
expectedScoresPod: map[int]float64{
77+
0: 0.0, // Full KV cache (1.0) gets lowest score (1-1=0)
78+
1: 0.5, // Half KV cache (0.5) gets medium score (1-0.5=0.5)
79+
},
80+
},
81+
}
82+
83+
for _, tt := range tests {
84+
t.Run(tt.name, func(t *testing.T) {
85+
ctx := types.NewSchedulingContext(context.Background(), &types.LLMRequest{}, tt.pods)
86+
scorer := &KVCacheScorer{}
87+
scores := scorer.Score(ctx, tt.pods)
88+
89+
for i, pod := range tt.pods {
90+
expectedScore := tt.expectedScoresPod[i]
91+
assert.InDelta(t, expectedScore, scores[pod], 0.0001, "Pod %d should have score %f", i, expectedScore)
92+
}
93+
})
94+
}
95+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package scorer
18+
19+
import (
20+
"math"
21+
22+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
23+
)
24+
25+
type QueueScorer struct{}
26+
27+
func (q *QueueScorer) Name() string {
28+
return "queue"
29+
}
30+
31+
func (q *QueueScorer) Score(ctx *types.SchedulingContext, pods []types.Pod) map[types.Pod]float64 {
32+
minQueueSize := math.MaxInt
33+
maxQueueSize := math.MinInt
34+
35+
// Iterate through the remaining pods to find min and max
36+
for _, pod := range pods {
37+
queueSize := pod.GetMetrics().WaitingQueueSize
38+
if queueSize < minQueueSize {
39+
minQueueSize = queueSize
40+
}
41+
if queueSize > maxQueueSize {
42+
maxQueueSize = queueSize
43+
}
44+
}
45+
46+
// podScoreFunc calculates the score based on the queue size of each pod. Longer queue gets a lower score.
47+
podScoreFunc := func(pod types.Pod) float64 {
48+
if maxQueueSize == minQueueSize {
49+
// If all pods have the same queue size, return a neutral score
50+
return 1.0
51+
}
52+
return float64(maxQueueSize-pod.GetMetrics().WaitingQueueSize) / float64(maxQueueSize-minQueueSize)
53+
}
54+
55+
// Create a map to hold the scores for each pod
56+
scores := make(map[types.Pod]float64, len(pods))
57+
for _, pod := range pods {
58+
scores[pod] = podScoreFunc(pod)
59+
}
60+
return scores
61+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package scorer
18+
19+
import (
20+
"context"
21+
"testing"
22+
23+
"github.com/stretchr/testify/assert"
24+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
25+
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
26+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
27+
)
28+
29+
func TestQueueScorer(t *testing.T) {
30+
tests := []struct {
31+
name string
32+
pods []types.Pod
33+
expectedScoresPod map[int]float64 // Map of pod index to expected score
34+
}{
35+
{
36+
name: "Different queue sizes",
37+
pods: []types.Pod{
38+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{WaitingQueueSize: 10}},
39+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{WaitingQueueSize: 5}},
40+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{WaitingQueueSize: 0}},
41+
},
42+
expectedScoresPod: map[int]float64{
43+
0: 0.0, // Longest queue (10) gets lowest score
44+
1: 0.5, // Medium queue (5) gets medium score
45+
2: 1.0, // Shortest queue (0) gets highest score
46+
},
47+
},
48+
{
49+
name: "Same queue sizes",
50+
pods: []types.Pod{
51+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{WaitingQueueSize: 5}},
52+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{WaitingQueueSize: 5}},
53+
},
54+
expectedScoresPod: map[int]float64{
55+
0: 1.0, // When all pods have the same queue size, they get the same neutral score
56+
1: 1.0,
57+
},
58+
},
59+
{
60+
name: "Zero queue sizes",
61+
pods: []types.Pod{
62+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{WaitingQueueSize: 0}},
63+
&types.PodMetrics{Pod: &backend.Pod{}, Metrics: &backendmetrics.Metrics{WaitingQueueSize: 0}},
64+
},
65+
expectedScoresPod: map[int]float64{
66+
0: 1.0,
67+
1: 1.0,
68+
},
69+
},
70+
}
71+
72+
scorer := &QueueScorer{}
73+
74+
for _, tt := range tests {
75+
t.Run(tt.name, func(t *testing.T) {
76+
ctx := types.NewSchedulingContext(context.Background(), &types.LLMRequest{}, tt.pods)
77+
scores := scorer.Score(ctx, tt.pods)
78+
79+
for i, pod := range tt.pods {
80+
expectedScore := tt.expectedScoresPod[i]
81+
assert.InDelta(t, expectedScore, scores[pod], 0.0001, "Pod %d should have score %f", i, expectedScore)
82+
}
83+
})
84+
}
85+
}

0 commit comments

Comments
 (0)