@@ -26,47 +26,11 @@ import (
26
26
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
27
27
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
28
28
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
29
- "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/filter"
30
- "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/picker"
31
29
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
32
30
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error"
33
31
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
34
32
)
35
33
36
- var (
37
- lowLatencyFilter = & filter.DecisionTreeFilter {
38
- Current : filter .LowQueueFilter ,
39
- NextOnSuccess : & filter.DecisionTreeFilter {
40
- Current : filter .LoRAAffinityFilter ,
41
- NextOnSuccessOrFailure : & filter.DecisionTreeFilter {
42
- Current : filter .LeastQueueFilter ,
43
- NextOnSuccessOrFailure : & filter.DecisionTreeFilter {
44
- Current : filter .LeastKVCacheFilter ,
45
- },
46
- },
47
- },
48
- NextOnFailure : & filter.DecisionTreeFilter {
49
- Current : filter .LeastQueueFilter ,
50
- NextOnSuccessOrFailure : & filter.DecisionTreeFilter {
51
- Current : filter .LoRAAffinityFilter ,
52
- NextOnSuccessOrFailure : & filter.DecisionTreeFilter {
53
- Current : filter .LeastKVCacheFilter ,
54
- },
55
- },
56
- },
57
- }
58
-
59
- sheddableRequestFilter = & filter.DecisionTreeFilter {
60
- // When there is at least one model server that's not queuing requests, and still has KV
61
- // cache below a certain threshold, we consider this model server has capacity to handle
62
- // a sheddable request without impacting critical requests.
63
- Current : filter .HasCapacityFilter ,
64
- NextOnSuccess : lowLatencyFilter ,
65
- // If all pods are queuing or running above the KVCache threshold, we drop the sheddable
66
- // request to make room for critical requests. for this, we don't define nextOnFailure.
67
- }
68
- )
69
-
70
34
func NewScheduler (datastore Datastore ) * Scheduler {
71
35
return NewSchedulerWithConfig (datastore , defaultConfig )
72
36
}
@@ -206,19 +170,3 @@ func (s *Scheduler) runPostSchedulePlugins(ctx *types.SchedulingContext, res *ty
206
170
metrics .RecordSchedulerPluginProcessingLatency (plugins .PostSchedulePluginType , plugin .Name (), time .Since (before ))
207
171
}
208
172
}
209
-
210
- type defaultPlugin struct {
211
- picker.RandomPicker
212
- }
213
-
214
- func (p * defaultPlugin ) Name () string {
215
- return "DefaultPlugin"
216
- }
217
-
218
- func (p * defaultPlugin ) Filter (ctx * types.SchedulingContext , pods []types.Pod ) []types.Pod {
219
- if ctx .Req .Critical {
220
- return lowLatencyFilter .Filter (ctx , pods )
221
- }
222
-
223
- return sheddableRequestFilter .Filter (ctx , pods )
224
- }
0 commit comments