Skip to content

Commit 861a198

Browse files
authored
Tests' re-org (#39)
1 parent 1dab325 commit 861a198

File tree

9 files changed

+123
-68
lines changed

9 files changed

+123
-68
lines changed

ci/L0_backend_vllm/test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

2828
RET=0
29-
SUBTESTS="accuracy_test request_cancellation enabled_stream vllm_backend multi_lora"
29+
SUBTESTS="accuracy_test request_cancellation enabled_stream vllm_backend"
3030

3131
python3 -m pip install --upgrade pip && pip3 install tritonclient[grpc]
3232

ci/L0_backend_vllm/vllm_backend/test.sh

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -30,7 +30,7 @@ source ../../common/util.sh
3030
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
3131
SERVER=${TRITON_DIR}/bin/tritonserver
3232
BACKEND_DIR=${TRITON_DIR}/backends
33-
SERVER_ARGS="--model-repository=$(pwd)/models --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --log-verbose=1"
33+
SERVER_ARGS="--model-repository=$(pwd)/models --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --load-model=vllm_opt --log-verbose=1"
3434
SERVER_LOG="./vllm_backend_server.log"
3535
CLIENT_LOG="./vllm_backend_client.log"
3636
TEST_RESULT_FILE='test_results.txt'
@@ -50,6 +50,13 @@ function assert_curl_success {
5050

5151
rm -rf models && mkdir -p models
5252
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_opt
53+
# `vllm_opt`` model will be loaded on server start and stay loaded throughout
54+
# unittesting. To test vllm model load/unload we use a dedicated
55+
# `vllm_load_test`. To ensure that vllm's memory profiler will not error out
56+
# on `vllm_load_test` load, we reduce "gpu_memory_utilization" for `vllm_opt`,
57+
# so that at least 60% of GPU memory was available for other models.
58+
sed -i 's/"gpu_memory_utilization": 0.5/"gpu_memory_utilization": 0.4/' models/vllm_opt/1/model.json
59+
cp -r models/vllm_opt models/vllm_load_test
5360

5461
mkdir -p models/add_sub/1/
5562
wget -P models/add_sub/1/ https://raw.githubusercontent.com/triton-inference-server/python_backend/main/examples/add_sub/model.py
@@ -96,7 +103,7 @@ wait $SERVER_PID
96103
SERVER_ARGS="--model-repository=$(pwd)/models --backend-directory=${BACKEND_DIR} --backend-config=python,default-max-batch-size=8"
97104
SERVER_LOG="./vllm_test_cmdline_server.log"
98105

99-
rm -rf ./models/vllm_invalid_1 ./models/vllm_invalid_2
106+
rm -rf ./models/vllm_invalid_1 ./models/vllm_invalid_2 ./models/vllm_load_test
100107

101108
run_server
102109
if [ "$SERVER_PID" == "0" ]; then

ci/L0_backend_vllm/vllm_backend/vllm_backend_test.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions
@@ -48,21 +48,23 @@ def setUp(self):
4848
self.triton_client = grpcclient.InferenceServerClient(url="localhost:8001")
4949
self.vllm_model_name = "vllm_opt"
5050
self.python_model_name = "add_sub"
51+
self.vllm_load_test = "vllm_load_test"
5152

5253
def test_vllm_triton_backend(self):
5354
# Load both vllm and add_sub models
54-
self.triton_client.load_model(self.vllm_model_name)
55-
self.assertTrue(self.triton_client.is_model_ready(self.vllm_model_name))
55+
self.triton_client.load_model(self.vllm_load_test)
56+
self.assertTrue(self.triton_client.is_model_ready(self.vllm_load_test))
5657
self.triton_client.load_model(self.python_model_name)
5758
self.assertTrue(self.triton_client.is_model_ready(self.python_model_name))
5859

5960
# Unload vllm model and test add_sub model
60-
self.triton_client.unload_model(self.vllm_model_name)
61-
self.assertFalse(self.triton_client.is_model_ready(self.vllm_model_name))
61+
self.triton_client.unload_model(self.vllm_load_test)
62+
self.assertFalse(self.triton_client.is_model_ready(self.vllm_load_test))
6263
self._test_python_model()
6364

6465
# Load vllm model and unload add_sub model
65-
self.triton_client.load_model(self.vllm_model_name)
66+
self.triton_client.load_model(self.vllm_load_test)
67+
self.assertTrue(self.triton_client.is_model_ready(self.vllm_load_test))
6668
self.triton_client.unload_model(self.python_model_name)
6769
self.assertFalse(self.triton_client.is_model_ready(self.python_model_name))
6870

@@ -72,14 +74,17 @@ def test_vllm_triton_backend(self):
7274
sampling_parameters=SAMPLING_PARAMETERS,
7375
stream=False,
7476
send_parameters_as_tensor=True,
77+
model_name=self.vllm_load_test,
7578
)
7679
self._test_vllm_model(
7780
prompts=PROMPTS,
7881
sampling_parameters=SAMPLING_PARAMETERS,
7982
stream=False,
8083
send_parameters_as_tensor=False,
84+
model_name=self.vllm_load_test,
8185
)
82-
self.triton_client.unload_model(self.vllm_model_name)
86+
self.triton_client.unload_model(self.vllm_load_test)
87+
self.assertFalse(self.triton_client.is_model_ready(self.vllm_load_test))
8388

8489
def test_model_with_invalid_attributes(self):
8590
model_name = "vllm_invalid_1"
@@ -97,7 +102,6 @@ def test_exclude_input_in_output_default(self):
97102
in non-streaming mode.
98103
Expected result: prompt is returned with diffs.
99104
"""
100-
self.triton_client.load_model(self.vllm_model_name)
101105
prompts = [
102106
"The capital of France is",
103107
]
@@ -112,15 +116,13 @@ def test_exclude_input_in_output_default(self):
112116
send_parameters_as_tensor=True,
113117
expected_output=expected_output,
114118
)
115-
self.triton_client.unload_model(self.vllm_model_name)
116119

117120
def test_exclude_input_in_output_false(self):
118121
"""
119122
Verifying behavior for `exclude_input_in_output` = False
120123
in non-streaming mode.
121124
Expected result: prompt is returned with diffs.
122125
"""
123-
self.triton_client.load_model(self.vllm_model_name)
124126
# Test vllm model and unload vllm model
125127
prompts = [
126128
"The capital of France is",
@@ -137,15 +139,13 @@ def test_exclude_input_in_output_false(self):
137139
exclude_input_in_output=False,
138140
expected_output=expected_output,
139141
)
140-
self.triton_client.unload_model(self.vllm_model_name)
141142

142143
def test_exclude_input_in_output_true(self):
143144
"""
144145
Verifying behavior for `exclude_input_in_output` = True
145146
in non-streaming mode.
146147
Expected result: only diffs are returned.
147148
"""
148-
self.triton_client.load_model(self.vllm_model_name)
149149
# Test vllm model and unload vllm model
150150
prompts = [
151151
"The capital of France is",
@@ -162,7 +162,6 @@ def test_exclude_input_in_output_true(self):
162162
exclude_input_in_output=True,
163163
expected_output=expected_output,
164164
)
165-
self.triton_client.unload_model(self.vllm_model_name)
166165

167166
def _test_vllm_model(
168167
self,
@@ -172,6 +171,7 @@ def _test_vllm_model(
172171
send_parameters_as_tensor,
173172
exclude_input_in_output=None,
174173
expected_output=None,
174+
model_name="vllm_opt",
175175
):
176176
user_data = UserData()
177177
number_of_vllm_reqs = len(prompts)
@@ -183,12 +183,12 @@ def _test_vllm_model(
183183
i,
184184
stream,
185185
sampling_parameters,
186-
self.vllm_model_name,
186+
model_name,
187187
send_parameters_as_tensor,
188188
exclude_input_in_output=exclude_input_in_output,
189189
)
190190
self.triton_client.async_stream_infer(
191-
model_name=self.vllm_model_name,
191+
model_name=model_name,
192192
request_id=request_data["request_id"],
193193
inputs=request_data["inputs"],
194194
outputs=request_data["outputs"],

ci/L0_multi_gpu/test.sh

Lines changed: 10 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -25,61 +25,24 @@
2525
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

28-
source ../common/util.sh
29-
30-
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
31-
SERVER=${TRITON_DIR}/bin/tritonserver
32-
BACKEND_DIR=${TRITON_DIR}/backends
33-
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --log-verbose=1"
34-
SERVER_LOG="./vllm_multi_gpu_test_server.log"
35-
CLIENT_LOG="./vllm_multi_gpu_test_client.log"
36-
TEST_RESULT_FILE='test_results.txt'
37-
CLIENT_PY="./vllm_multi_gpu_test.py"
38-
SAMPLE_MODELS_REPO="../../samples/model_repository"
39-
EXPECTED_NUM_TESTS=1
40-
41-
rm -rf models && mkdir -p models
42-
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_opt
43-
sed -i '3s/^/ "tensor_parallel_size": 2,\n/' models/vllm_opt/1/model.json
44-
45-
python3 -m pip install --upgrade pip && pip3 install tritonclient[grpc] nvidia-ml-py3
46-
4728
RET=0
29+
SUBTESTS="vllm_backend multi_lora"
4830

49-
run_server
50-
if [ "$SERVER_PID" == "0" ]; then
51-
cat $SERVER_LOG
52-
echo -e "\n***\n*** Failed to start $SERVER\n***"
53-
exit 1
54-
fi
31+
python3 -m pip install --upgrade pip && pip3 install tritonclient[grpc]
5532

56-
set +e
57-
python3 $CLIENT_PY -v > $CLIENT_LOG 2>&1
33+
for TEST in ${SUBTESTS}; do
34+
(cd ${TEST} && bash -ex test.sh && cd ..)
5835

59-
if [ $? -ne 0 ]; then
60-
cat $CLIENT_LOG
61-
echo -e "\n***\n*** Running $CLIENT_PY FAILED. \n***"
62-
RET=1
63-
else
64-
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
6536
if [ $? -ne 0 ]; then
66-
cat $CLIENT_LOG
67-
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
37+
echo "Subtest ${TEST} FAILED"
6838
RET=1
6939
fi
70-
fi
71-
set -e
72-
73-
kill $SERVER_PID
74-
wait $SERVER_PID
75-
rm -rf models/
40+
done
7641

77-
if [ $RET -eq 1 ]; then
78-
cat $CLIENT_LOG
79-
cat $SERVER_LOG
80-
echo -e "\n***\n*** Multi GPU Utilization test FAILED. \n***"
42+
if [ $RET -eq 0 ]; then
43+
echo -e "\n***\n*** vLLM Multi-GPU Tests Passed\n***"
8144
else
82-
echo -e "\n***\n*** Multi GPU Utilization test PASSED. \n***"
45+
echo -e "\n***\n*** vLLM Multi-GPU Tests FAILED\n***"
8346
fi
8447

8548
exit $RET

ci/L0_multi_gpu/vllm_backend/test.sh

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/bin/bash
2+
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions
6+
# are met:
7+
# * Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# * Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in the
11+
# documentation and/or other materials provided with the distribution.
12+
# * Neither the name of NVIDIA CORPORATION nor the names of its
13+
# contributors may be used to endorse or promote products derived
14+
# from this software without specific prior written permission.
15+
#
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
28+
source ../../common/util.sh
29+
30+
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
31+
SERVER=${TRITON_DIR}/bin/tritonserver
32+
BACKEND_DIR=${TRITON_DIR}/backends
33+
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --log-verbose=1"
34+
SERVER_LOG="./vllm_multi_gpu_test_server.log"
35+
CLIENT_LOG="./vllm_multi_gpu_test_client.log"
36+
TEST_RESULT_FILE='test_results.txt'
37+
CLIENT_PY="./vllm_multi_gpu_test.py"
38+
SAMPLE_MODELS_REPO="../../../samples/model_repository"
39+
EXPECTED_NUM_TESTS=1
40+
41+
rm -rf models && mkdir -p models
42+
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_opt
43+
sed -i '3s/^/ "tensor_parallel_size": 2,\n/' models/vllm_opt/1/model.json
44+
45+
python3 -m pip install --upgrade pip && pip3 install tritonclient[grpc] nvidia-ml-py3
46+
47+
RET=0
48+
49+
run_server
50+
if [ "$SERVER_PID" == "0" ]; then
51+
cat $SERVER_LOG
52+
echo -e "\n***\n*** Failed to start $SERVER\n***"
53+
exit 1
54+
fi
55+
56+
set +e
57+
python3 $CLIENT_PY -v > $CLIENT_LOG 2>&1
58+
59+
if [ $? -ne 0 ]; then
60+
cat $CLIENT_LOG
61+
echo -e "\n***\n*** Running $CLIENT_PY FAILED. \n***"
62+
RET=1
63+
else
64+
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
65+
if [ $? -ne 0 ]; then
66+
cat $CLIENT_LOG
67+
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
68+
RET=1
69+
fi
70+
fi
71+
set -e
72+
73+
kill $SERVER_PID
74+
wait $SERVER_PID
75+
rm -rf models/
76+
77+
if [ $RET -eq 1 ]; then
78+
cat $CLIENT_LOG
79+
cat $SERVER_LOG
80+
echo -e "\n***\n*** Multi GPU Utilization test FAILED. \n***"
81+
else
82+
echo -e "\n***\n*** Multi GPU Utilization test PASSED. \n***"
83+
fi
84+
85+
exit $RET

ci/L0_multi_gpu/vllm_multi_gpu_test.py renamed to ci/L0_multi_gpu/vllm_backend/vllm_multi_gpu_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions
@@ -32,7 +32,7 @@
3232
import tritonclient.grpc as grpcclient
3333
from tritonclient.utils import *
3434

35-
sys.path.append("../common")
35+
sys.path.append("../../common")
3636
from test_util import TestResultCollector, UserData, callback, create_vllm_request
3737

3838

0 commit comments

Comments
 (0)