@@ -44,7 +44,7 @@ def _get_inputs(
44
44
sampling_parameters = None ,
45
45
return_finish_reason = None ,
46
46
return_cumulative_logprob = None ,
47
- return_num_token_ids = None ,
47
+ return_num_output_tokens = None ,
48
48
):
49
49
inputs = []
50
50
@@ -76,9 +76,13 @@ def _get_inputs(
76
76
np .array ([return_cumulative_logprob ], dtype = bool )
77
77
)
78
78
79
- if return_num_token_ids is not None :
80
- inputs .append (grpcclient .InferInput ("return_num_token_ids" , [1 ], "BOOL" ))
81
- inputs [- 1 ].set_data_from_numpy (np .array ([return_num_token_ids ], dtype = bool ))
79
+ if return_num_output_tokens is not None :
80
+ inputs .append (
81
+ grpcclient .InferInput ("return_num_output_tokens" , [1 ], "BOOL" )
82
+ )
83
+ inputs [- 1 ].set_data_from_numpy (
84
+ np .array ([return_num_output_tokens ], dtype = bool )
85
+ )
82
86
83
87
return inputs
84
88
@@ -131,15 +135,15 @@ def _assert_cumulative_logprob(self, return_cumulative_logprob):
131
135
assert cumulative_logprob != prev_cumulative_logprob
132
136
prev_cumulative_logprob = cumulative_logprob
133
137
134
- def _assert_num_token_ids (self , return_num_token_ids ):
138
+ def _assert_num_output_tokens (self , return_num_output_tokens ):
135
139
for response in self ._responses :
136
140
result , error = response ["result" ], response ["error" ]
137
141
assert error is None
138
- num_token_ids_np = result .as_numpy (name = "num_token_ids " )
139
- if return_num_token_ids is None or return_num_token_ids == False :
140
- assert num_token_ids_np is None
142
+ num_output_tokens_np = result .as_numpy (name = "num_output_tokens " )
143
+ if return_num_output_tokens is None or return_num_output_tokens == False :
144
+ assert num_output_tokens_np is None
141
145
continue
142
- num_token_ids = num_token_ids_np [0 ].astype (int )
146
+ num_output_tokens = num_output_tokens_np [0 ].astype (int )
143
147
# TODO: vLLM may return token ids identical to the previous one when
144
148
# streaming, for example:
145
149
#
@@ -156,30 +160,30 @@ def _assert_num_token_ids(self, return_num_token_ids):
156
160
# curr: text=' the term “', token_ids=array('l', [5, 1385, 44, 48])
157
161
#
158
162
# If this is no longer the case in a future release, change the assert
159
- # to assert num_token_ids > 0.
160
- assert num_token_ids >= 0
163
+ # to assert num_output_tokens > 0.
164
+ assert num_output_tokens >= 0
161
165
162
166
@pytest .mark .parametrize ("stream" , [True , False ])
163
167
@pytest .mark .parametrize ("return_finish_reason" , [None , True , False ])
164
168
@pytest .mark .parametrize ("return_cumulative_logprob" , [None , True , False ])
165
- @pytest .mark .parametrize ("return_num_token_ids " , [None , True , False ])
169
+ @pytest .mark .parametrize ("return_num_output_tokens " , [None , True , False ])
166
170
def test_additional_outputs (
167
171
self ,
168
172
stream ,
169
173
return_finish_reason ,
170
174
return_cumulative_logprob ,
171
- return_num_token_ids ,
175
+ return_num_output_tokens ,
172
176
):
173
177
inputs = self ._get_inputs (
174
178
self ._prompt ,
175
179
stream = stream ,
176
180
sampling_parameters = self ._sampling_parameters ,
177
181
return_finish_reason = return_finish_reason ,
178
182
return_cumulative_logprob = return_cumulative_logprob ,
179
- return_num_token_ids = return_num_token_ids ,
183
+ return_num_output_tokens = return_num_output_tokens ,
180
184
)
181
185
self ._llm_infer (inputs )
182
186
self ._assert_text_output_valid ()
183
187
self ._assert_finish_reason (return_finish_reason )
184
188
self ._assert_cumulative_logprob (return_cumulative_logprob )
185
- self ._assert_num_token_ids ( return_num_token_ids )
189
+ self ._assert_num_output_tokens ( return_num_output_tokens )
0 commit comments