|
32 | 32 | from vllm.engine.metrics import StatLoggerBase as VllmStatLoggerBase
|
33 | 33 | from vllm.engine.metrics import Stats as VllmStats
|
34 | 34 | from vllm.engine.metrics import SupportsMetricsInfo, build_1_2_5_buckets
|
35 |
| - |
| 35 | +from vllm.version import __version__ as _VLLM_VERSION |
36 | 36 |
|
37 | 37 | class TritonMetrics:
|
38 | 38 | def __init__(self, labels: List[str], max_model_len: int):
|
@@ -76,11 +76,14 @@ def __init__(self, labels: List[str], max_model_len: int):
|
76 | 76 | description="Number of generation tokens processed.",
|
77 | 77 | kind=pb_utils.MetricFamily.HISTOGRAM,
|
78 | 78 | )
|
79 |
| - self.histogram_best_of_request_family = pb_utils.MetricFamily( |
80 |
| - name="vllm:request_params_best_of", |
81 |
| - description="Histogram of the best_of request parameter.", |
82 |
| - kind=pb_utils.MetricFamily.HISTOGRAM, |
83 |
| - ) |
| 79 | + # 'best_of' metric has been hidden since vllm 0.6.3 |
| 80 | + # https://github.com/vllm-project/vllm/commit/cbc2ef55292b2af6ff742095c030e8425124c005 |
| 81 | + if _VLLM_VERSION < "0.6.3": |
| 82 | + self.histogram_best_of_request_family = pb_utils.MetricFamily( |
| 83 | + name="vllm:request_params_best_of", |
| 84 | + description="Histogram of the best_of request parameter.", |
| 85 | + kind=pb_utils.MetricFamily.HISTOGRAM, |
| 86 | + ) |
84 | 87 | self.histogram_n_request_family = pb_utils.MetricFamily(
|
85 | 88 | name="vllm:request_params_n",
|
86 | 89 | description="Histogram of the n request parameter.",
|
@@ -159,10 +162,11 @@ def __init__(self, labels: List[str], max_model_len: int):
|
159 | 162 | buckets=build_1_2_5_buckets(max_model_len),
|
160 | 163 | )
|
161 | 164 | )
|
162 |
| - self.histogram_best_of_request = self.histogram_best_of_request_family.Metric( |
163 |
| - labels=labels, |
164 |
| - buckets=[1, 2, 5, 10, 20], |
165 |
| - ) |
| 165 | + if _VLLM_VERSION < "0.6.3": |
| 166 | + self.histogram_best_of_request = self.histogram_best_of_request_family.Metric( |
| 167 | + labels=labels, |
| 168 | + buckets=[1, 2, 5, 10, 20], |
| 169 | + ) |
166 | 170 | self.histogram_n_request = self.histogram_n_request_family.Metric(
|
167 | 171 | labels=labels,
|
168 | 172 | buckets=[1, 2, 5, 10, 20],
|
@@ -247,10 +251,10 @@ def log(self, stats: VllmStats) -> None:
|
247 | 251 | self.metrics.histogram_num_generation_tokens_request,
|
248 | 252 | stats.num_generation_tokens_requests,
|
249 | 253 | ),
|
250 |
| - (self.metrics.histogram_best_of_request, stats.best_of_requests), |
251 | 254 | (self.metrics.histogram_n_request, stats.n_requests),
|
252 | 255 | ]
|
253 |
| - |
| 256 | + if _VLLM_VERSION < "0.6.3": |
| 257 | + histogram_metrics.append((self.metrics.histogram_best_of_request, stats.best_of_requests)) |
254 | 258 | for metric, data in counter_metrics:
|
255 | 259 | self._log_counter(metric, data)
|
256 | 260 | for metric, data in histogram_metrics:
|
|
0 commit comments