@@ -18,6 +18,7 @@ package metrics
18
18
19
19
import (
20
20
"context"
21
+ "runtime/debug"
21
22
"sync"
22
23
"time"
23
24
@@ -219,6 +220,40 @@ var (
219
220
},
220
221
[]string {"commit" },
221
222
)
223
+
224
+ // Prefix indexer Metrics
225
+ PrefixCacheSize = compbasemetrics .NewGaugeVec (
226
+ & compbasemetrics.GaugeOpts {
227
+ Subsystem : InferenceExtension ,
228
+ Name : "prefix_indexer_size" ,
229
+ Help : "Size of the prefix indexer." ,
230
+ StabilityLevel : compbasemetrics .ALPHA ,
231
+ },
232
+ []string {},
233
+ )
234
+
235
+ PrefixCacheHitRatio = compbasemetrics .NewHistogramVec (
236
+ & compbasemetrics.HistogramOpts {
237
+ Subsystem : InferenceExtension ,
238
+ Name : "prefix_indexer_hit_ratio" ,
239
+ Help : "Ratio of prefix length matched to total prefix length in the cache lookup." ,
240
+ // Buckets from 0.0 to 1.0 in increments
241
+ Buckets : []float64 {0.0 , 0.1 , 0.2 , 0.3 , 0.4 , 0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1.0 },
242
+ StabilityLevel : compbasemetrics .ALPHA ,
243
+ },
244
+ []string {},
245
+ )
246
+
247
+ PrefixCacheHitLength = compbasemetrics .NewHistogramVec (
248
+ & compbasemetrics.HistogramOpts {
249
+ Subsystem : InferenceExtension ,
250
+ Name : "prefix_indexer_hit_bytes" ,
251
+ Help : "Length of the prefix match in number of bytes in the cache lookup." ,
252
+ Buckets : []float64 {0 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 , 4096 , 8192 , 16384 , 32768 , 65536 },
253
+ StabilityLevel : compbasemetrics .ALPHA ,
254
+ },
255
+ []string {},
256
+ )
222
257
)
223
258
224
259
var registerMetrics sync.Once
@@ -244,6 +279,10 @@ func Register() {
244
279
legacyregistry .MustRegister (SchedulerE2ELatency )
245
280
246
281
legacyregistry .MustRegister (InferenceExtensionInfo )
282
+
283
+ legacyregistry .MustRegister (PrefixCacheSize )
284
+ legacyregistry .MustRegister (PrefixCacheHitRatio )
285
+ legacyregistry .MustRegister (PrefixCacheHitLength )
247
286
})
248
287
}
249
288
@@ -352,8 +391,44 @@ func RecordSchedulerE2ELatency(duration time.Duration) {
352
391
SchedulerE2ELatency .WithLabelValues ().Observe (duration .Seconds ())
353
392
}
354
393
394
+ // RecordPrefixCacheSize records the size of the prefix indexer in megabytes.
395
+ func RecordPrefixCacheSize (size int64 ) {
396
+ PrefixCacheSize .WithLabelValues ().Set (float64 (size ))
397
+ }
398
+
399
+ // RecordPrefixCacheMatch records both the hit ratio and hit length for a prefix indexer match.
400
+ // matchedLength is the number of characters that matched, and totalLength is the total prefix length.
401
+ func RecordPrefixCacheMatch (matchedLength , totalLength int ) {
402
+ // Record the hit length metric
403
+ PrefixCacheHitLength .WithLabelValues ().Observe (float64 (matchedLength ))
404
+
405
+ // Record the hit ratio metric if totalLength is positive
406
+ if totalLength > 0 {
407
+ ratio := float64 (matchedLength ) / float64 (totalLength )
408
+ PrefixCacheHitRatio .WithLabelValues ().Observe (ratio )
409
+ }
410
+ }
411
+
355
412
func RecordInferenceExtensionInfo () {
356
413
if CommitSHA != "" {
357
414
InferenceExtensionInfo .WithLabelValues (CommitSHA ).Set (1 )
358
415
}
359
416
}
417
+
418
+ func init () {
419
+ info , ok := debug .ReadBuildInfo ()
420
+ if ! ok {
421
+ return
422
+ }
423
+
424
+ var Commit = func (i * debug.BuildInfo ) string {
425
+ for _ , setting := range i .Settings {
426
+ if setting .Key == "vcs.revision" {
427
+ return setting .Value
428
+ }
429
+ }
430
+ return ""
431
+ }(info )
432
+
433
+ CommitSHA = Commit
434
+ }
0 commit comments