@@ -52,15 +52,16 @@ cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_llama_multi_lora
52
52
53
53
export SERVER_ENABLE_LORA=true
54
54
55
+ # Check boolean flag value for `enable_lora`
55
56
model_json=$( cat << EOF
56
57
{
57
58
"model":"./weights/backbone/gemma-2b",
58
- "disable_log_requests": " true" ,
59
+ "disable_log_requests": true,
59
60
"gpu_memory_utilization": 0.7,
60
61
"tensor_parallel_size": 2,
61
62
"block_size": 16,
62
- "enforce_eager": " true" ,
63
- "enable_lora": " true" ,
63
+ "enforce_eager": true,
64
+ "enable_lora": true,
64
65
"max_lora_rank": 32,
65
66
"lora_extra_vocab_size": 256,
66
67
"distributed_executor_backend":"ray"
@@ -110,16 +111,108 @@ set -e
110
111
kill $SERVER_PID
111
112
wait $SERVER_PID
112
113
114
+ # Check string flag value for `enable_lora`
115
+ model_json=$( cat << EOF
116
+ {
117
+ "model":"./weights/backbone/gemma-2b",
118
+ "disable_log_requests": true,
119
+ "gpu_memory_utilization": 0.7,
120
+ "tensor_parallel_size": 2,
121
+ "block_size": 16,
122
+ "enforce_eager": true,
123
+ "enable_lora": "true",
124
+ "max_lora_rank": 32,
125
+ "lora_extra_vocab_size": 256,
126
+ "distributed_executor_backend":"ray"
127
+ }
128
+ EOF
129
+ )
130
+ echo " $model_json " > models/vllm_llama_multi_lora/1/model.json
131
+
132
+ run_server
133
+ if [ " $SERVER_PID " == " 0" ]; then
134
+ cat $SERVER_LOG
135
+ echo -e " \n***\n*** Failed to start $SERVER \n***"
136
+ exit 1
137
+ fi
138
+
139
+ set +e
140
+ python3 $CLIENT_PY -v > $CLIENT_LOG 2>&1
141
+
142
+ if [ $? -ne 0 ]; then
143
+ cat $CLIENT_LOG
144
+ echo -e " \n***\n*** Running $CLIENT_PY FAILED. \n***"
145
+ RET=1
146
+ else
147
+ check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
148
+ if [ $? -ne 0 ]; then
149
+ cat $CLIENT_LOG
150
+ echo -e " \n***\n*** Test Result Verification FAILED.\n***"
151
+ RET=1
152
+ fi
153
+ fi
154
+ set -e
155
+
156
+ kill $SERVER_PID
157
+ wait $SERVER_PID
158
+
159
+ # disable lora
160
+ export SERVER_ENABLE_LORA=false
161
+ # check bool flag value for `enable_lora`
162
+ model_json=$( cat << EOF
163
+ {
164
+ "model":"./weights/backbone/gemma-2b",
165
+ "disable_log_requests": true,
166
+ "gpu_memory_utilization": 0.8,
167
+ "tensor_parallel_size": 2,
168
+ "block_size": 16,
169
+ "enforce_eager": true,
170
+ "enable_lora": false,
171
+ "lora_extra_vocab_size": 256,
172
+ "distributed_executor_backend":"ray"
173
+ }
174
+ EOF
175
+ )
176
+ echo " $model_json " > models/vllm_llama_multi_lora/1/model.json
177
+
178
+ run_server
179
+ if [ " $SERVER_PID " == " 0" ]; then
180
+ cat $SERVER_LOG
181
+ echo -e " \n***\n*** Failed to start $SERVER \n***"
182
+ exit 1
183
+ fi
184
+
185
+ set +e
186
+ python3 $CLIENT_PY -v >> $CLIENT_LOG 2>&1
187
+
188
+ if [ $? -ne 0 ]; then
189
+ cat $CLIENT_LOG
190
+ echo -e " \n***\n*** Running $CLIENT_PY FAILED. \n***"
191
+ RET=1
192
+ else
193
+ check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
194
+ if [ $? -ne 0 ]; then
195
+ cat $CLIENT_LOG
196
+ echo -e " \n***\n*** Test Result Verification FAILED.\n***"
197
+ RET=1
198
+ fi
199
+ fi
200
+ set -e
201
+
202
+ kill $SERVER_PID
203
+ wait $SERVER_PID
204
+
113
205
# disable lora
114
206
export SERVER_ENABLE_LORA=false
207
+ # check string flag value for `enable_lora`
115
208
model_json=$( cat << EOF
116
209
{
117
210
"model":"./weights/backbone/gemma-2b",
118
- "disable_log_requests": " true" ,
211
+ "disable_log_requests": true,
119
212
"gpu_memory_utilization": 0.8,
120
213
"tensor_parallel_size": 2,
121
214
"block_size": 16,
122
- "enforce_eager": " true" ,
215
+ "enforce_eager": true,
123
216
"enable_lora": "false",
124
217
"lora_extra_vocab_size": 256,
125
218
"distributed_executor_backend":"ray"
0 commit comments