Skip to content

Commit 4719460

Browse files
Fixing Chunked Prefill Test. (#19762)
Signed-off-by: Alexei V. Ivanov <[email protected]>
1 parent 466166d commit 4719460

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ steps:
8989
- VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
9090

9191
- label: Chunked Prefill Test
92-
mirror_hardwares: [amdexperimental]
92+
mirror_hardwares: [amdexperimental, amdproduction]
9393
source_file_dependencies:
9494
- vllm/
9595
- tests/basic_correctness/test_chunked_prefill

tests/basic_correctness/test_chunked_prefill.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,13 @@ def use_v0_only(monkeypatch: pytest.MonkeyPatch):
4949
# NOTE: Increasing this in this suite will fail CI because we currently cannot
5050
# reset distributed env properly. Use a value > 1 just when you test.
5151
@pytest.mark.parametrize("tensor_parallel_size", [1])
52-
@pytest.mark.parametrize("attention_backend", ["FLASHINFER", "FLASH_ATTN"])
52+
@pytest.mark.parametrize("attention_backend", [
53+
pytest.param("FLASHINFER",
54+
marks=pytest.mark.skipif(
55+
current_platform.is_rocm(),
56+
reason="FLASHINFER isn't supported on ROCm")),
57+
"FLASH_ATTN"
58+
])
5359
def test_models(
5460
hf_runner: HfRunner,
5561
vllm_runner: VllmRunner,
@@ -99,7 +105,13 @@ def test_models(
99105
@multi_gpu_test(num_gpus=2)
100106
@pytest.mark.parametrize("distributed_executor_backend", ["ray", "mp"])
101107
@pytest.mark.parametrize("model", MODELS)
102-
@pytest.mark.parametrize("attention_backend", ["FLASHINFER", "FLASH_ATTN"])
108+
@pytest.mark.parametrize("attention_backend", [
109+
pytest.param("FLASHINFER",
110+
marks=pytest.mark.skipif(
111+
current_platform.is_rocm(),
112+
reason="FLASHINFER isn't supported on ROCm")),
113+
"FLASH_ATTN"
114+
])
103115
def test_models_distributed(
104116
hf_runner: HfRunner,
105117
vllm_runner: VllmRunner,
@@ -172,6 +184,8 @@ def test_models_distributed(
172184
# Due to low-precision numerical divergence, this test is too sensitive to
173185
# the async postprocessor
174186
@pytest.mark.parametrize("disable_async_output_proc", [True])
187+
@pytest.mark.skipif(current_platform.is_rocm(),
188+
reason="machete_prepack_B isn't supported on ROCm")
175189
def test_models_with_fp8_kv_cache(
176190
vllm_runner: VllmRunner,
177191
example_prompts,

0 commit comments

Comments
 (0)