Skip to content

Commit dae3c13

Browse files
committed
Rename num_token_ids to num_output_tokens
1 parent 5e9b09f commit dae3c13

File tree

3 files changed

+30
-26
lines changed

3 files changed

+30
-26
lines changed

ci/L0_additional_outputs_vllm/additional_outputs_test.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def _get_inputs(
4444
sampling_parameters=None,
4545
return_finish_reason=None,
4646
return_cumulative_logprob=None,
47-
return_num_token_ids=None,
47+
return_num_output_tokens=None,
4848
):
4949
inputs = []
5050

@@ -76,9 +76,13 @@ def _get_inputs(
7676
np.array([return_cumulative_logprob], dtype=bool)
7777
)
7878

79-
if return_num_token_ids is not None:
80-
inputs.append(grpcclient.InferInput("return_num_token_ids", [1], "BOOL"))
81-
inputs[-1].set_data_from_numpy(np.array([return_num_token_ids], dtype=bool))
79+
if return_num_output_tokens is not None:
80+
inputs.append(
81+
grpcclient.InferInput("return_num_output_tokens", [1], "BOOL")
82+
)
83+
inputs[-1].set_data_from_numpy(
84+
np.array([return_num_output_tokens], dtype=bool)
85+
)
8286

8387
return inputs
8488

@@ -131,15 +135,15 @@ def _assert_cumulative_logprob(self, return_cumulative_logprob):
131135
assert cumulative_logprob != prev_cumulative_logprob
132136
prev_cumulative_logprob = cumulative_logprob
133137

134-
def _assert_num_token_ids(self, return_num_token_ids):
138+
def _assert_num_output_tokens(self, return_num_output_tokens):
135139
for response in self._responses:
136140
result, error = response["result"], response["error"]
137141
assert error is None
138-
num_token_ids_np = result.as_numpy(name="num_token_ids")
139-
if return_num_token_ids is None or return_num_token_ids == False:
140-
assert num_token_ids_np is None
142+
num_output_tokens_np = result.as_numpy(name="num_output_tokens")
143+
if return_num_output_tokens is None or return_num_output_tokens == False:
144+
assert num_output_tokens_np is None
141145
continue
142-
num_token_ids = num_token_ids_np[0].astype(int)
146+
num_output_tokens = num_output_tokens_np[0].astype(int)
143147
# TODO: vLLM may return token ids identical to the previous one when
144148
# streaming, for example:
145149
#
@@ -156,30 +160,30 @@ def _assert_num_token_ids(self, return_num_token_ids):
156160
# curr: text=' the term “', token_ids=array('l', [5, 1385, 44, 48])
157161
#
158162
# If this is no longer the case in a future release, change the assert
159-
# to assert num_token_ids > 0.
160-
assert num_token_ids >= 0
163+
# to assert num_output_tokens > 0.
164+
assert num_output_tokens >= 0
161165

162166
@pytest.mark.parametrize("stream", [True, False])
163167
@pytest.mark.parametrize("return_finish_reason", [None, True, False])
164168
@pytest.mark.parametrize("return_cumulative_logprob", [None, True, False])
165-
@pytest.mark.parametrize("return_num_token_ids", [None, True, False])
169+
@pytest.mark.parametrize("return_num_output_tokens", [None, True, False])
166170
def test_additional_outputs(
167171
self,
168172
stream,
169173
return_finish_reason,
170174
return_cumulative_logprob,
171-
return_num_token_ids,
175+
return_num_output_tokens,
172176
):
173177
inputs = self._get_inputs(
174178
self._prompt,
175179
stream=stream,
176180
sampling_parameters=self._sampling_parameters,
177181
return_finish_reason=return_finish_reason,
178182
return_cumulative_logprob=return_cumulative_logprob,
179-
return_num_token_ids=return_num_token_ids,
183+
return_num_output_tokens=return_num_output_tokens,
180184
)
181185
self._llm_infer(inputs)
182186
self._assert_text_output_valid()
183187
self._assert_finish_reason(return_finish_reason)
184188
self._assert_cumulative_logprob(return_cumulative_logprob)
185-
self._assert_num_token_ids(return_num_token_ids)
189+
self._assert_num_output_tokens(return_num_output_tokens)

docs/additional_outputs.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ point value will be sent on the `cumulative_logprob` output tensor.
5959

6060
Supported since r24.11.
6161

62-
### Number of token IDs
62+
### Number of Output Tokens
6363

6464
The number of token IDs of the generated output text sent on this response. It
6565
is the difference in length of the token IDs generated from the last response to
@@ -68,8 +68,8 @@ presumed to be zero. See
6868
[here](https://github.com/vllm-project/vllm/blob/v0.6.3.post1/vllm/outputs.py#L21)
6969
for more details on the token IDs of the generated output text.
7070

71-
To enable, set `return_num_token_ids` input tensor to `True`. The unsigned
72-
integer value will be sent on the `num_token_ids` output tensor.
71+
To enable, set `return_num_output_tokens` input tensor to `True`. The unsigned
72+
integer value will be sent on the `num_output_tokens` output tensor.
7373

7474
Supported since r24.11.
7575

src/model.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def _auto_complete_inputs_and_outputs(auto_complete_model_config):
101101
"optional": True,
102102
},
103103
{
104-
"name": "return_num_token_ids",
104+
"name": "return_num_output_tokens",
105105
"data_type": "TYPE_BOOL",
106106
"dims": [1],
107107
"optional": True,
@@ -111,7 +111,7 @@ def _auto_complete_inputs_and_outputs(auto_complete_model_config):
111111
{"name": "text_output", "data_type": "TYPE_STRING", "dims": [-1]},
112112
{"name": "finish_reason", "data_type": "TYPE_STRING", "dims": [-1]},
113113
{"name": "cumulative_logprob", "data_type": "TYPE_FP32", "dims": [-1]},
114-
{"name": "num_token_ids", "data_type": "TYPE_UINT32", "dims": [-1]},
114+
{"name": "num_output_tokens", "data_type": "TYPE_UINT32", "dims": [-1]},
115115
]
116116

117117
# Collect input and output names from the provided model config.
@@ -348,11 +348,11 @@ def _get_input_tensors(self, request):
348348
else:
349349
parameters = request.parameters()
350350

351-
# return_finish_reason, return_cumulative_logprob, return_num_token_ids
351+
# return_finish_reason, return_cumulative_logprob, return_num_output_tokens
352352
additional_outputs = {
353353
"return_finish_reason": None,
354354
"return_cumulative_logprob": None,
355-
"return_num_token_ids": None,
355+
"return_num_output_tokens": None,
356356
}
357357
for tensor_name in additional_outputs.keys():
358358
tensor = pb_utils.get_input_tensor_by_name(request, tensor_name)
@@ -467,8 +467,8 @@ def _create_response(
467467
)
468468
)
469469

470-
# num_token_ids
471-
if additional_outputs["return_num_token_ids"]:
470+
# num_output_tokens
471+
if additional_outputs["return_num_output_tokens"]:
472472
if prev_request_output is None:
473473
# this is the first response
474474
prev_lens = [0] * len(request_output.outputs)
@@ -478,13 +478,13 @@ def _create_response(
478478
len(prev_output.token_ids)
479479
for prev_output in prev_request_output.outputs
480480
]
481-
num_token_ids = [
481+
num_output_tokens = [
482482
(len(output.token_ids) - prev_len)
483483
for output, prev_len in zip(request_output.outputs, prev_lens)
484484
]
485485
output_tensors.append(
486486
pb_utils.Tensor(
487-
"num_token_ids", np.asarray(num_token_ids, dtype=np.uint32)
487+
"num_output_tokens", np.asarray(num_output_tokens, dtype=np.uint32)
488488
)
489489
)
490490

0 commit comments

Comments
 (0)