Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions src/sagemaker/image_uri_config/huggingface-vllm-neuronx.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{
"inference": {
"processors": [
"inf2"
],
"version_aliases": {
"0.10": "0.10.2"
},
"versions": {
"0.10.2": {
"py_versions": [
"py310"
],
"registries": {
"af-south-1": "626614931356",
"ap-east-1": "871362719292",
"ap-east-2": "975050140332",
"ap-northeast-1": "763104351884",
"ap-northeast-2": "763104351884",
"ap-northeast-3": "364406365360",
"ap-south-1": "763104351884",
"ap-south-2": "772153158452",
"ap-southeast-1": "763104351884",
"ap-southeast-2": "763104351884",
"ap-southeast-3": "907027046896",
"ap-southeast-4": "457447274322",
"ap-southeast-5": "550225433462",
"ap-southeast-6": "633930458069",
"ap-southeast-7": "590183813437",
"ca-central-1": "763104351884",
"ca-west-1": "204538143572",
"cn-north-1": "727897471807",
"cn-northwest-1": "727897471807",
"eu-central-1": "763104351884",
"eu-central-2": "380420809688",
"eu-north-1": "763104351884",
"eu-south-1": "692866216735",
"eu-south-2": "503227376785",
"eu-west-1": "763104351884",
"eu-west-2": "763104351884",
"eu-west-3": "763104351884",
"il-central-1": "780543022126",
"me-central-1": "914824155844",
"me-south-1": "217643126080",
"mx-central-1": "637423239942",
"sa-east-1": "763104351884",
"us-east-1": "763104351884",
"us-east-2": "763104351884",
"us-gov-east-1": "446045086412",
"us-gov-west-1": "442386744353",
"us-iso-east-1": "886529160074",
"us-isob-east-1": "094389454867",
"us-isof-east-1": "303241398832",
"us-isof-south-1": "454834333376",
"us-west-1": "763104351884",
"us-west-2": "763104351884"
},
"tag_prefix": "0.10.2",
"repository": "huggingface-vllm-inference-neuronx",
"container_version": {
"inf2": "ubuntu22.04"
},
"sdk_versions": [
"sdk2.26.0"
]
}
}
}
}
104 changes: 104 additions & 0 deletions tests/unit/sagemaker/image_uris/test_huggingface_vllm_neuronx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import

import pytest
from packaging.version import parse

from sagemaker.huggingface import get_huggingface_llm_image_uri
from tests.unit.sagemaker.image_uris import expected_uris

# Mapping of vLLM versions to expected image tags
VLLM_VERSIONS_MAPPING = {
"inf2": {
"0.10.2": "0.10.2-neuronx-py310-sdk2.26.0-ubuntu22.04",
},
}


@pytest.mark.parametrize("load_config", ["huggingface-vllm-neuronx.json"], indirect=True)
def test_vllm_neuronx_uris(load_config):
"""Test that vLLM NeuronX image URIs are correctly generated."""
VERSIONS = load_config["inference"]["versions"]
device = load_config["inference"]["processors"][0]

# Fail if device is not in mapping
if device not in VLLM_VERSIONS_MAPPING:
raise ValueError(f"Device {device} not found in VLLM_VERSIONS_MAPPING")

# Get highest version for the device
highest_version = max(VLLM_VERSIONS_MAPPING[device].keys(), key=lambda x: parse(x))

for version in VERSIONS:
ACCOUNTS = load_config["inference"]["versions"][version]["registries"]
for region in ACCOUNTS.keys():
uri = get_huggingface_llm_image_uri(
"huggingface-vllm-neuronx",
region=region,
version=version,
)

# Skip only if test version is higher than highest known version
if parse(version) > parse(highest_version):
print(
f"Skipping version check for {version} as it is higher than "
f"the highest known version {highest_version} in VLLM_VERSIONS_MAPPING."
)
continue

expected = expected_uris.huggingface_llm_framework_uri(
"huggingface-vllm-inference-neuronx",
ACCOUNTS[region],
version,
VLLM_VERSIONS_MAPPING[device][version],
region=region,
)
assert expected == uri


@pytest.mark.parametrize("load_config", ["huggingface-vllm-neuronx.json"], indirect=True)
def test_vllm_neuronx_version_aliases(load_config):
"""Test that version aliases work correctly."""
version_aliases = load_config["inference"].get("version_aliases", {})

for alias, full_version in version_aliases.items():
uri_alias = get_huggingface_llm_image_uri(
"huggingface-vllm-neuronx",
region="us-east-1",
version=alias,
)
uri_full = get_huggingface_llm_image_uri(
"huggingface-vllm-neuronx",
region="us-east-1",
version=full_version,
)
# URIs should be identical
assert uri_alias == uri_full


@pytest.mark.parametrize("load_config", ["huggingface-vllm-neuronx.json"], indirect=True)
def test_vllm_neuronx_all_regions(load_config):
"""Test that all regions have valid registry mappings."""
version = "0.10.2"
registries = load_config["inference"]["versions"][version]["registries"]

for region in registries.keys():
uri = get_huggingface_llm_image_uri(
"huggingface-vllm-neuronx",
region=region,
version=version,
)
# Validate URI format
assert uri.startswith(f"{registries[region]}.dkr.ecr.{region}")
assert "huggingface-vllm-inference-neuronx" in uri
assert "0.10.2" in uri