vllm-project · hardikkgupta · Jun 22, 2025 · gemini-code-assist · Jun 23, 2025
@@ -1726,11 +1726,20 @@ class TranscriptionRequest(OpenAIBaseModel):
 
     prompt: str = Field(default="")
     """An optional text to guide the model's style or continue a previous audio
-    segment.
-
-    The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+    segment. The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
     should match the audio language.
     """
+
+    reuse_initial_prompt: bool = Field(
+        default=False,
+        description=(
+            "If True, the same `prompt` is appended to every audio chunk "
+            "after the first one.  If False (default), only the first chunk "
+            "receives the prompt."
+        ),
+    )
+
+
 
     response_format: AudioResponseFormat = Field(default="json")
     """

@@ -213,16 +213,21 @@ async def _preprocess_transcription(
         chunks = [y] if duration < 30 else self._split_audio(y, sr)
         prompts = []
         for i, chunk in enumerate(chunks):
+            # Decide whether to pass the original prompt to this chunk
+            if i == 0 or request.reuse_initial_prompt:
+                decoder_prompt = (
+                    f"<|startoftranscript|>{lang_token}<|transcribe|>"
+                    f"<|notimestamps|>{request.prompt}"
+                )
+            else:
+                decoder_prompt = ""
+
             prompt = {
                 "encoder_prompt": {
                     "prompt": "",
-                    "multi_modal_data": {
-                        "audio": (chunk, sr),
-                    },
+                    "multi_modal_data": {"audio": (chunk, sr)},
                 },
-                "decoder_prompt":
-                f"<|startoftranscript|>{lang_token}<|transcribe|><|notimestamps|>{request.prompt}"
-                if i == 0 else ""
+                "decoder_prompt": decoder_prompt,
             }
             prompts.append(cast(PromptType, prompt))
         return prompts, duration