Skip to content

Commit 14fe60f

Browse files
committed
Add resolve_model_relative_to_config_file config option
Signed-off-by: Leon Kiefer <[email protected]>
1 parent 0b9c8e2 commit 14fe60f

File tree

4 files changed

+58
-0
lines changed

4 files changed

+58
-0
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ Specifically,
130130
and
131131
[here](https://github.com/vllm-project/vllm/blob/ee8217e5bee5860469204ee57077a91138c9af02/vllm/engine/arg_utils.py#L201).
132132

133+
When using local model files, specify the path to the model in the `model` field.
134+
By default relative paths are resolved relative to the working directory of the Triton server process.
135+
To specify a path relative to the `model.json` file, set the `resolve_model_relative_to_config_file` field to `true`.
136+
133137
For multi-GPU support, EngineArgs like tensor_parallel_size can be specified in
134138
[model.json](samples/model_repository/vllm_model/1/model.json).
135139

ci/L0_backend_vllm/vllm_backend/test.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ function assert_curl_success {
4949
}
5050

5151
rm -rf models && mkdir -p models
52+
53+
# operational vllm model
5254
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_opt
5355
# `vllm_opt` model will be loaded on server start and stay loaded throughout
5456
# unittesting. To test vllm model load/unload we use a dedicated
@@ -58,10 +60,22 @@ cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_opt
5860
sed -i 's/"gpu_memory_utilization": 0.5/"gpu_memory_utilization": 0.4/' models/vllm_opt/1/model.json
5961
cp -r models/vllm_opt models/vllm_load_test
6062

63+
# python model
6164
mkdir -p models/add_sub/1/
6265
wget -P models/add_sub/1/ https://raw.githubusercontent.com/triton-inference-server/python_backend/main/examples/add_sub/model.py
6366
wget -P models/add_sub https://raw.githubusercontent.com/triton-inference-server/python_backend/main/examples/add_sub/config.pbtxt
6467

68+
# local vllm model
69+
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_local
70+
sed -i 's/"facebook\/opt-125m"/"./local_model"/' models/vllm_local/1/model.json
71+
sed -i '/"model": /a "resolve_model_relative_to_config_file": true,' models/vllm_local/1/model.json
72+
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/config.json
73+
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/merges.txt
74+
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/pytorch_model.bin
75+
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/special_tokens_map.json
76+
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/tokenizer_config.json
77+
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/vocab.json
78+
6579
# Invalid model attribute
6680
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_1/
6781
sed -i 's/"disable_log_requests"/"invalid_attribute"/' models/vllm_invalid_1/1/model.json

ci/L0_backend_vllm/vllm_backend/vllm_backend_test.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def setUp(self):
5050
self.python_model_name = "add_sub"
5151
self.ensemble_model_name = "ensemble_model"
5252
self.vllm_load_test = "vllm_load_test"
53+
self.local_vllm_model_name = "vllm_local"
5354

5455
def test_vllm_triton_backend(self):
5556
# Load both vllm and add_sub models
@@ -93,6 +94,31 @@ def test_vllm_triton_backend(self):
9394
)
9495
self.triton_client.unload_model(self.vllm_load_test)
9596
self.assertFalse(self.triton_client.is_model_ready(self.vllm_load_test))
97+
98+
def test_local_vllm_model(self):
99+
# Load local vllm model
100+
self.triton_client.load_model(self.local_vllm_model_name)
101+
self.assertTrue(self.triton_client.is_model_ready(self.local_vllm_model_name))
102+
103+
# Test local vllm model
104+
self._test_vllm_model(
105+
prompts=PROMPTS,
106+
sampling_parameters=SAMPLING_PARAMETERS,
107+
stream=False,
108+
send_parameters_as_tensor=True,
109+
model_name=self.local_vllm_model_name,
110+
)
111+
self._test_vllm_model(
112+
prompts=PROMPTS,
113+
sampling_parameters=SAMPLING_PARAMETERS,
114+
stream=False,
115+
send_parameters_as_tensor=False,
116+
model_name=self.local_vllm_model_name,
117+
)
118+
119+
# Unload local vllm model
120+
self.triton_client.unload_model(self.local_vllm_model_name)
121+
self.assertFalse(self.triton_client.is_model_ready(self.local_vllm_model_name))
96122

97123
def test_model_with_invalid_attributes(self):
98124
model_name = "vllm_invalid_1"

src/model.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,20 @@ def init_engine(self):
219219
# Check for LoRA config and set it up if enabled
220220
self.setup_lora()
221221

222+
# Resolve the model path relative to the config file
223+
if self.vllm_engine_config.pop("resolve_model_relative_to_config_file", False):
224+
new_path = os.path.abspath(
225+
os.path.join(
226+
pb_utils.get_model_dir(), self.vllm_engine_config["model"]
227+
)
228+
)
229+
# Check if the resolved path is subdirectory of the model directory
230+
if not new_path.startswith(pb_utils.get_model_dir()):
231+
raise ValueError(
232+
f"Resolved model path '{new_path}' is not a subdirectory of the model directory '{pb_utils.get_model_dir()}'"
233+
)
234+
self.vllm_engine_config["model"] = new_path
235+
222236
# Create an AsyncLLMEngine from the config from JSON
223237
aync_engine_args = AsyncEngineArgs(**self.vllm_engine_config)
224238
self.llm_engine = AsyncLLMEngine.from_engine_args(aync_engine_args)

0 commit comments

Comments
 (0)