|
| 1 | +# Copyright (c) 2024 Snowflake Inc. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +"""Constants for the remote development environment plugin.""" |
| 16 | + |
| 17 | +import enum |
| 18 | +from dataclasses import dataclass |
| 19 | +from typing import Optional |
| 20 | + |
| 21 | + |
| 22 | +class SnowflakeCloudType(enum.Enum): |
| 23 | + AWS = "aws" |
| 24 | + AZURE = "azure" |
| 25 | + GCP = "gcp" |
| 26 | + |
| 27 | + @classmethod |
| 28 | + def from_value(cls, value: str) -> "SnowflakeCloudType": |
| 29 | + assert value |
| 30 | + for k in cls: |
| 31 | + if k.value == value.lower(): |
| 32 | + return k |
| 33 | + else: |
| 34 | + raise ValueError(f"'{cls.__name__}' enum not found for '{value}'") |
| 35 | + |
| 36 | + |
| 37 | +@dataclass(frozen=True) |
| 38 | +class ComputeResources: |
| 39 | + cpu: float # Number of vCPU cores |
| 40 | + memory: float # Memory in GiB |
| 41 | + gpu: int = 0 # Number of GPUs |
| 42 | + gpu_type: Optional[str] = None |
| 43 | + |
| 44 | + |
| 45 | +# SPCS specification constants |
| 46 | +DEFAULT_CONTAINER_NAME = "main" |
| 47 | +ENABLE_REMOTE_DEV_ENV_VAR = "IS_REMOTE_DEV" |
| 48 | +MEMORY_VOLUME_NAME = "dshm" |
| 49 | +USER_WORKSPACE_VOLUME_NAME = "user-workspace" |
| 50 | +USER_WORKSPACE_VOLUME_MOUNT_PATH = "/root/workspace" |
| 51 | +USER_VSCODE_DATA_VOLUME_NAME = "user-vscode-data" |
| 52 | +USER_VSCODE_DATA_VOLUME_MOUNT_PATH = "/root/.vscode-server" |
| 53 | + |
| 54 | +# Service naming constants |
| 55 | +SERVICE_NAME_PREFIX = "SNOW_REMOTE" |
| 56 | + |
| 57 | +# Service status constants |
| 58 | +SERVICE_STATUS_READY = "READY" |
| 59 | +SERVICE_STATUS_SUSPENDED = "SUSPENDED" |
| 60 | +SERVICE_STATUS_SUSPENDING = "SUSPENDING" |
| 61 | +SERVICE_STATUS_PENDING = "PENDING" |
| 62 | +SERVICE_STATUS_STARTING = "STARTING" |
| 63 | +SERVICE_STATUS_FAILED = "FAILED" |
| 64 | +SERVICE_STATUS_ERROR = "ERROR" |
| 65 | +SERVICE_STATUS_UNKNOWN = "UNKNOWN" |
| 66 | + |
| 67 | +# Service operation result constants |
| 68 | +SERVICE_RESULT_CREATED = "created" |
| 69 | +SERVICE_RESULT_RESUMED = "resumed" |
| 70 | +SERVICE_RESULT_RUNNING = "running" |
| 71 | + |
| 72 | +# Default timeout for service operations |
| 73 | +DEFAULT_SERVICE_TIMEOUT_MINUTES = 10 |
| 74 | +STATUS_CHECK_INTERVAL_SECONDS = 10 |
| 75 | + |
| 76 | +# Default container image information |
| 77 | +DEFAULT_IMAGE_REPO = "/snowflake/images/snowflake_images" |
| 78 | +DEFAULT_IMAGE_CPU = "st_plat/runtime/x86/runtime_image/snowbooks" |
| 79 | +DEFAULT_IMAGE_GPU = "st_plat/runtime/x86/generic_gpu/runtime_image/snowbooks" |
| 80 | +DEFAULT_IMAGE_TAG = "1.7.1" |
| 81 | + |
| 82 | +# Percent of container memory to allocate for /dev/shm volume |
| 83 | +MEMORY_VOLUME_SIZE = 0.3 |
| 84 | + |
| 85 | +# Default ports |
| 86 | +DEFAULT_SERVER_PORT = 12020 |
| 87 | +DEFAULT_WEBSOCKET_PORT = 12021 |
| 88 | + |
| 89 | +# Endpoint names |
| 90 | +SERVER_UI_ENDPOINT_NAME = "server-ui" |
| 91 | +WEBSOCKET_SSH_ENDPOINT_NAME = "websocket-ssh" |
| 92 | +RAY_DASHBOARD_ENDPOINT_NAME = "ray-dashboard" |
| 93 | + |
| 94 | +# ML runtime health check settings |
| 95 | +ML_RUNTIME_HEALTH_CHECK_PORT = "5001" |
| 96 | +ENABLE_HEALTH_CHECKS = "false" |
| 97 | + |
| 98 | +# Ray environment variables |
| 99 | +RAY_ENV_VARS = { |
| 100 | + "HEAD_CLIENT_SERVER_PORT": "10001", |
| 101 | + "HEAD_GCS_PORT": "12001", |
| 102 | + "HEAD_DASHBOARD_GRPC_PORT": "12002", |
| 103 | + "HEAD_DASHBOARD_PORT": "12003", |
| 104 | + "OBJECT_MANAGER_PORT": "12011", |
| 105 | + "NODE_MANAGER_PORT": "12012", |
| 106 | + "RUNTIME_ENV_AGENT_PORT": "12013", |
| 107 | + "DASHBOARD_AGENT_GRPC_PORT": "12014", |
| 108 | + "DASHBOARD_AGENT_LISTEN_PORT": "12015", |
| 109 | + "MIN_WORKER_PORT": "12031", |
| 110 | + "MAX_WORKER_PORT": "13000", |
| 111 | +} |
| 112 | + |
| 113 | +# Ray endpoint configurations |
| 114 | +RAY_ENDPOINTS = [ |
| 115 | + {"name": "ray-client-server-endpoint", "port": 10001, "protocol": "TCP"}, |
| 116 | + {"name": "ray-gcs-endpoint", "port": 12001, "protocol": "TCP"}, |
| 117 | + {"name": "ray-dashboard-grpc-endpoint", "port": 12002, "protocol": "TCP"}, |
| 118 | + {"name": "ray-object-manager-endpoint", "port": 12011, "protocol": "TCP"}, |
| 119 | + {"name": "ray-node-manager-endpoint", "port": 12012, "protocol": "TCP"}, |
| 120 | + {"name": "ray-runtime-agent-endpoint", "port": 12013, "protocol": "TCP"}, |
| 121 | + {"name": "ray-dashboard-agent-grpc-endpoint", "port": 12014, "protocol": "TCP"}, |
| 122 | + {"name": "ephemeral-port-range", "portRange": "32768-60999", "protocol": "TCP"}, |
| 123 | + {"name": "ray-worker-port-range", "portRange": "12031-13000", "protocol": "TCP"}, |
| 124 | +] |
| 125 | + |
| 126 | +# Compute pool resource information |
| 127 | +COMMON_INSTANCE_FAMILIES = { |
| 128 | + "CPU_X64_XS": ComputeResources(cpu=1, memory=6), |
| 129 | + "CPU_X64_S": ComputeResources(cpu=3, memory=13), |
| 130 | + "CPU_X64_M": ComputeResources(cpu=6, memory=28), |
| 131 | + "CPU_X64_L": ComputeResources(cpu=28, memory=116), |
| 132 | + "HIGHMEM_X64_S": ComputeResources(cpu=6, memory=58), |
| 133 | +} |
| 134 | + |
| 135 | +AWS_INSTANCE_FAMILIES = { |
| 136 | + "HIGHMEM_X64_M": ComputeResources(cpu=28, memory=240), |
| 137 | + "HIGHMEM_X64_L": ComputeResources(cpu=124, memory=984), |
| 138 | + "GPU_NV_S": ComputeResources(cpu=6, memory=27, gpu=1, gpu_type="A10G"), |
| 139 | + "GPU_NV_M": ComputeResources(cpu=44, memory=178, gpu=4, gpu_type="A10G"), |
| 140 | + "GPU_NV_L": ComputeResources(cpu=92, memory=1112, gpu=8, gpu_type="A100"), |
| 141 | +} |
| 142 | + |
| 143 | +AZURE_INSTANCE_FAMILIES = { |
| 144 | + "HIGHMEM_X64_M": ComputeResources(cpu=28, memory=244), |
| 145 | + "HIGHMEM_X64_L": ComputeResources(cpu=92, memory=654), |
| 146 | + "GPU_NV_XS": ComputeResources(cpu=3, memory=26, gpu=1, gpu_type="T4"), |
| 147 | + "GPU_NV_SM": ComputeResources(cpu=32, memory=424, gpu=1, gpu_type="A10"), |
| 148 | + "GPU_NV_2M": ComputeResources(cpu=68, memory=858, gpu=2, gpu_type="A10"), |
| 149 | + "GPU_NV_3M": ComputeResources(cpu=44, memory=424, gpu=2, gpu_type="A100"), |
| 150 | + "GPU_NV_SL": ComputeResources(cpu=92, memory=858, gpu=4, gpu_type="A100"), |
| 151 | +} |
| 152 | + |
| 153 | +CLOUD_INSTANCE_FAMILIES = { |
| 154 | + SnowflakeCloudType.AWS: AWS_INSTANCE_FAMILIES, |
| 155 | + SnowflakeCloudType.AZURE: AZURE_INSTANCE_FAMILIES, |
| 156 | +} |
0 commit comments