@@ -34,6 +34,7 @@ import (
34
34
"k8s.io/client-go/rest"
35
35
"k8s.io/component-base/metrics/legacyregistry"
36
36
ctrl "sigs.k8s.io/controller-runtime"
37
+ "sigs.k8s.io/controller-runtime/pkg/log"
37
38
"sigs.k8s.io/controller-runtime/pkg/log/zap"
38
39
"sigs.k8s.io/controller-runtime/pkg/manager"
39
40
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
@@ -43,7 +44,13 @@ import (
43
44
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
44
45
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics/collectors"
45
46
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling"
47
+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
48
+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/filter"
49
+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/picker"
50
+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/prefix"
51
+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/scorer"
46
52
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
53
+ envutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env"
47
54
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
48
55
)
49
56
@@ -107,8 +114,22 @@ var (
107
114
"Prometheus metric for the LoRA info metrics (must be in vLLM label format)." )
108
115
109
116
setupLog = ctrl .Log .WithName ("setup" )
117
+
118
+ // Environment variables
119
+ schedulerV2 = envutil .GetEnvString ("EXPERIMENTAL_USE_SCHEDULER_V2" , "false" , setupLog )
120
+ prefixCacheScheduling = envutil .GetEnvString ("ENABLE_PREFIX_CACHE_SCHEDULING" , "false" , setupLog )
110
121
)
111
122
123
+ func loadPrefixCacheConfig () prefix.Config {
124
+ baseLogger := log .Log .WithName ("env-config" )
125
+
126
+ return prefix.Config {
127
+ HashBlockSize : envutil .GetEnvInt ("PREFIX_CACHE_HASH_BLOCK_SIZE" , prefix .DefaultHashBlockSize , baseLogger ),
128
+ MaxPrefixBlocksToMatch : envutil .GetEnvInt ("PREFIX_CACHE_MAX_PREFIX_BLOCKS" , prefix .DefaultMaxPrefixBlocks , baseLogger ),
129
+ LRUIndexerCapacity : envutil .GetEnvInt ("PREFIX_CACHE_LRU_CAPACITY" , prefix .DefaultLRUIndexerCapacity , baseLogger ),
130
+ }
131
+ }
132
+
112
133
func main () {
113
134
if err := run (); err != nil {
114
135
os .Exit (1 )
@@ -172,6 +193,27 @@ func run() error {
172
193
datastore := datastore .NewDatastore (ctx , pmf )
173
194
174
195
scheduler := scheduling .NewScheduler (datastore )
196
+ if schedulerV2 == "true" {
197
+ queueScorerWeight := envutil .GetEnvInt ("QUEUE_SCORE_WEIGHT" , scorer .DefaultQueueScorerWeight , setupLog )
198
+ kvCacheScorerWeight := envutil .GetEnvInt ("KV_CACHE_SCORE_WEIGHT" , scorer .DefaultKVCacheScorerWeight , setupLog )
199
+ scorers := map [plugins.Scorer ]int {
200
+ & scorer.QueueScorer {}: queueScorerWeight ,
201
+ & scorer.KVCacheScorer {}: kvCacheScorerWeight ,
202
+ }
203
+ schedConfigOpts := []scheduling.ConfigOption {}
204
+ if prefixCacheScheduling == "true" {
205
+ prefixScorerWeight := envutil .GetEnvInt ("PREFIX_CACHE_SCORE_WEIGHT" , prefix .DefaultScorerWeight , setupLog )
206
+ schedConfigOpts = append (schedConfigOpts , scheduling .AddPrefixPlugin (loadPrefixCacheConfig (), prefixScorerWeight ))
207
+ }
208
+ schedulerConfig := scheduling .NewSchedulerConfig (
209
+ []plugins.PreSchedule {},
210
+ []plugins.Filter {filter .NewSheddableCapacityFilter ()},
211
+ scorers ,
212
+ picker .NewMaxScorePicker (),
213
+ []plugins.PostSchedule {},
214
+ schedConfigOpts ... )
215
+ scheduler = scheduling .NewSchedulerWithConfig (datastore , schedulerConfig )
216
+ }
175
217
serverRunner := & runserver.ExtProcServerRunner {
176
218
GrpcPort : * grpcPort ,
177
219
DestinationEndpointHintMetadataNamespace : * destinationEndpointHintMetadataNamespace ,
0 commit comments