Open
Description
Your current environment
from vllm import LLM, SamplingParams
from vllm.assets.audio import AudioAsset
from librosa import load as load_audio
# Create a Whisper encoder/decoder model instance
llm = LLM(
# model="openai/whisper-large-v3",
model = "",
trust_remote_code=True,
max_model_len=448,
max_num_seqs=400,
limit_mm_per_prompt={"audio": 1},
kv_cache_dtype="fp8",
task='transcription',
dtype="bfloat16",
enforce_eager=False,
max_logprobs=1
)
(waveform,sampling_rate)= load_audio('./sample.wav',sr=16000, mono=True)
prompts = [
{
"prompt": "<|startoftranscript|><|en|>",
"multi_modal_data": {
"audio": (waveform,sampling_rate),
},
}
]*1
#tried below also but same error
# prompts = [
# {
# "encoder_prompt":{
# "prompt":"",
# "multi_modal_data":{"audio":(waveform,sampling_rate)},
# },
# "decoder_prompt":{
# "prompt_token_ids":[
# 50258,
# 'en',
# 50360,
# 1
# ]
# }
# }
# ] * 1
# ,
# { # Test explicit encoder/decoder prompt
# "encoder_prompt": {
# "prompt": "",
# "multi_modal_data": {
# "audio": (waveform,sampling_rate),
# },
# },
# "decoder_prompt": "<|startoftranscript|>",
# }
# Create a sampling params object.
sampling_params = SamplingParams(
temperature=0,
top_p=1.0,
max_tokens=400,
detokenize=False,
skip_special_tokens=False,
)
start = time.time()
outputs = llm.generate(prompts, sampling_params)
# Print the outputs.
for output in outputs:
prompt = output.prompt
encoder_prompt = output.encoder_prompt
generated_text = output.outputs[0].text
print(f"Generated text: {generated_text!r}")
duration = time.time() - start
print("Duration:", duration)
print("RPS:", len(prompts) / duration)
Error:
Adding requests: 100%|██████████| 2/2 [00:00<00:00, 66.34it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8.93it/s, est. speed input: 17.86 toks/s, output: 35.72 toks/s]
Generated text: ''
Generated text: ''
Duration: 0.2592043876647949
RPS: 7.715918769810391
How would you like to use vllm
I want to run inference of a whisperv3 using vllm skd way. Is the code I am using correct ? Via API can't consume due to current infra restriction i have.
vLLM==0.9.0.1
python3.10
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.