Skip to content

Commit b71088a

Browse files
oandreeva-nvkthui
andauthored
vLLM multi gpu tests adjustments (#65)
Co-authored-by: Jacky <[email protected]>
1 parent 0df1013 commit b71088a

File tree

2 files changed

+101
-6
lines changed

2 files changed

+101
-6
lines changed

ci/L0_multi_gpu/multi_lora/test.sh

Lines changed: 98 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,16 @@ cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_llama_multi_lora
5252

5353
export SERVER_ENABLE_LORA=true
5454

55+
# Check boolean flag value for `enable_lora`
5556
model_json=$(cat <<EOF
5657
{
5758
"model":"./weights/backbone/gemma-2b",
58-
"disable_log_requests": "true",
59+
"disable_log_requests": true,
5960
"gpu_memory_utilization": 0.7,
6061
"tensor_parallel_size": 2,
6162
"block_size": 16,
62-
"enforce_eager": "true",
63-
"enable_lora": "true",
63+
"enforce_eager": true,
64+
"enable_lora": true,
6465
"max_lora_rank": 32,
6566
"lora_extra_vocab_size": 256,
6667
"distributed_executor_backend":"ray"
@@ -110,16 +111,108 @@ set -e
110111
kill $SERVER_PID
111112
wait $SERVER_PID
112113

114+
# Check string flag value for `enable_lora`
115+
model_json=$(cat <<EOF
116+
{
117+
"model":"./weights/backbone/gemma-2b",
118+
"disable_log_requests": true,
119+
"gpu_memory_utilization": 0.7,
120+
"tensor_parallel_size": 2,
121+
"block_size": 16,
122+
"enforce_eager": true,
123+
"enable_lora": "true",
124+
"max_lora_rank": 32,
125+
"lora_extra_vocab_size": 256,
126+
"distributed_executor_backend":"ray"
127+
}
128+
EOF
129+
)
130+
echo "$model_json" > models/vllm_llama_multi_lora/1/model.json
131+
132+
run_server
133+
if [ "$SERVER_PID" == "0" ]; then
134+
cat $SERVER_LOG
135+
echo -e "\n***\n*** Failed to start $SERVER\n***"
136+
exit 1
137+
fi
138+
139+
set +e
140+
python3 $CLIENT_PY -v > $CLIENT_LOG 2>&1
141+
142+
if [ $? -ne 0 ]; then
143+
cat $CLIENT_LOG
144+
echo -e "\n***\n*** Running $CLIENT_PY FAILED. \n***"
145+
RET=1
146+
else
147+
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
148+
if [ $? -ne 0 ]; then
149+
cat $CLIENT_LOG
150+
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
151+
RET=1
152+
fi
153+
fi
154+
set -e
155+
156+
kill $SERVER_PID
157+
wait $SERVER_PID
158+
159+
# disable lora
160+
export SERVER_ENABLE_LORA=false
161+
# check bool flag value for `enable_lora`
162+
model_json=$(cat <<EOF
163+
{
164+
"model":"./weights/backbone/gemma-2b",
165+
"disable_log_requests": true,
166+
"gpu_memory_utilization": 0.8,
167+
"tensor_parallel_size": 2,
168+
"block_size": 16,
169+
"enforce_eager": true,
170+
"enable_lora": false,
171+
"lora_extra_vocab_size": 256,
172+
"distributed_executor_backend":"ray"
173+
}
174+
EOF
175+
)
176+
echo "$model_json" > models/vllm_llama_multi_lora/1/model.json
177+
178+
run_server
179+
if [ "$SERVER_PID" == "0" ]; then
180+
cat $SERVER_LOG
181+
echo -e "\n***\n*** Failed to start $SERVER\n***"
182+
exit 1
183+
fi
184+
185+
set +e
186+
python3 $CLIENT_PY -v >> $CLIENT_LOG 2>&1
187+
188+
if [ $? -ne 0 ]; then
189+
cat $CLIENT_LOG
190+
echo -e "\n***\n*** Running $CLIENT_PY FAILED. \n***"
191+
RET=1
192+
else
193+
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
194+
if [ $? -ne 0 ]; then
195+
cat $CLIENT_LOG
196+
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
197+
RET=1
198+
fi
199+
fi
200+
set -e
201+
202+
kill $SERVER_PID
203+
wait $SERVER_PID
204+
113205
# disable lora
114206
export SERVER_ENABLE_LORA=false
207+
# check string flag value for `enable_lora`
115208
model_json=$(cat <<EOF
116209
{
117210
"model":"./weights/backbone/gemma-2b",
118-
"disable_log_requests": "true",
211+
"disable_log_requests": true,
119212
"gpu_memory_utilization": 0.8,
120213
"tensor_parallel_size": 2,
121214
"block_size": 16,
122-
"enforce_eager": "true",
215+
"enforce_eager": true,
123216
"enable_lora": "false",
124217
"lora_extra_vocab_size": 256,
125218
"distributed_executor_backend":"ray"

src/model.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,11 @@ def init_engine(self):
189189
def setup_lora(self):
190190
self.enable_lora = False
191191

192+
# Check if `enable_lora` field is in the `model.json`,
193+
# and if it is, read its contents, which can be string or bool.
192194
if (
193195
"enable_lora" in self.vllm_engine_config.keys()
194-
and self.vllm_engine_config["enable_lora"].lower() == "true"
196+
and str(self.vllm_engine_config["enable_lora"]).lower() == "true"
195197
):
196198
# create Triton LoRA weights repository
197199
multi_lora_args_filepath = os.path.join(

0 commit comments

Comments
 (0)