Skip to content

Commit 3137229

Browse files
committed
Add spec generation utils
Register the remote dev plugin and add remote dev container spec
1 parent 62af98e commit 3137229

File tree

8 files changed

+1609
-0
lines changed

8 files changed

+1609
-0
lines changed

src/snowflake/cli/_plugins/remote/commands.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,18 @@
2525
)
2626

2727

28+
@app.command("start", requires_connection=True)
29+
def start_service(**options) -> None:
30+
"""
31+
Start a remote development environment.
32+
33+
This is a placeholder command for the remote plugin.
34+
Full functionality will be implemented in subsequent PRs.
35+
"""
36+
log.info("Start command called - functionality coming soon!")
37+
log.info("Full functionality will be available in upcoming releases.")
38+
39+
2840
@app.command("list", requires_connection=True)
2941
def list_services(**options) -> None:
3042
"""
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
# Copyright (c) 2024 Snowflake Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Constants for the remote development environment plugin."""
16+
17+
import enum
18+
from dataclasses import dataclass
19+
from typing import Optional
20+
21+
22+
class SnowflakeCloudType(enum.Enum):
23+
AWS = "aws"
24+
AZURE = "azure"
25+
GCP = "gcp"
26+
27+
@classmethod
28+
def from_value(cls, value: str) -> "SnowflakeCloudType":
29+
assert value
30+
for k in cls:
31+
if k.value == value.lower():
32+
return k
33+
else:
34+
raise ValueError(f"'{cls.__name__}' enum not found for '{value}'")
35+
36+
37+
@dataclass(frozen=True)
38+
class ComputeResources:
39+
cpu: float # Number of vCPU cores
40+
memory: float # Memory in GiB
41+
gpu: int = 0 # Number of GPUs
42+
gpu_type: Optional[str] = None
43+
44+
45+
# SPCS specification constants
46+
DEFAULT_CONTAINER_NAME = "main"
47+
ENABLE_REMOTE_DEV_ENV_VAR = "IS_REMOTE_DEV"
48+
MEMORY_VOLUME_NAME = "dshm"
49+
USER_WORKSPACE_VOLUME_NAME = "user-workspace"
50+
USER_WORKSPACE_VOLUME_MOUNT_PATH = "/root/workspace"
51+
USER_VSCODE_DATA_VOLUME_NAME = "user-vscode-data"
52+
USER_VSCODE_DATA_VOLUME_MOUNT_PATH = "/root/.vscode-server"
53+
54+
# Service naming constants
55+
SERVICE_NAME_PREFIX = "SNOW_REMOTE"
56+
57+
# Service status constants
58+
SERVICE_STATUS_READY = "READY"
59+
SERVICE_STATUS_SUSPENDED = "SUSPENDED"
60+
SERVICE_STATUS_SUSPENDING = "SUSPENDING"
61+
SERVICE_STATUS_PENDING = "PENDING"
62+
SERVICE_STATUS_STARTING = "STARTING"
63+
SERVICE_STATUS_FAILED = "FAILED"
64+
SERVICE_STATUS_ERROR = "ERROR"
65+
SERVICE_STATUS_UNKNOWN = "UNKNOWN"
66+
67+
# Service operation result constants
68+
SERVICE_RESULT_CREATED = "created"
69+
SERVICE_RESULT_RESUMED = "resumed"
70+
SERVICE_RESULT_RUNNING = "running"
71+
72+
# Default timeout for service operations
73+
DEFAULT_SERVICE_TIMEOUT_MINUTES = 10
74+
STATUS_CHECK_INTERVAL_SECONDS = 10
75+
76+
# Default container image information
77+
DEFAULT_IMAGE_REPO = "/snowflake/images/snowflake_images"
78+
DEFAULT_IMAGE_CPU = "st_plat/runtime/x86/runtime_image/snowbooks"
79+
DEFAULT_IMAGE_GPU = "st_plat/runtime/x86/generic_gpu/runtime_image/snowbooks"
80+
DEFAULT_IMAGE_TAG = "1.7.1"
81+
82+
# Percent of container memory to allocate for /dev/shm volume
83+
MEMORY_VOLUME_SIZE = 0.3
84+
85+
# Default ports
86+
DEFAULT_SERVER_PORT = 12020
87+
DEFAULT_WEBSOCKET_PORT = 12021
88+
89+
# Endpoint names
90+
SERVER_UI_ENDPOINT_NAME = "server-ui"
91+
WEBSOCKET_SSH_ENDPOINT_NAME = "websocket-ssh"
92+
RAY_DASHBOARD_ENDPOINT_NAME = "ray-dashboard"
93+
94+
# ML runtime health check settings
95+
ML_RUNTIME_HEALTH_CHECK_PORT = "5001"
96+
ENABLE_HEALTH_CHECKS = "false"
97+
98+
# Ray environment variables
99+
RAY_ENV_VARS = {
100+
"HEAD_CLIENT_SERVER_PORT": "10001",
101+
"HEAD_GCS_PORT": "12001",
102+
"HEAD_DASHBOARD_GRPC_PORT": "12002",
103+
"HEAD_DASHBOARD_PORT": "12003",
104+
"OBJECT_MANAGER_PORT": "12011",
105+
"NODE_MANAGER_PORT": "12012",
106+
"RUNTIME_ENV_AGENT_PORT": "12013",
107+
"DASHBOARD_AGENT_GRPC_PORT": "12014",
108+
"DASHBOARD_AGENT_LISTEN_PORT": "12015",
109+
"MIN_WORKER_PORT": "12031",
110+
"MAX_WORKER_PORT": "13000",
111+
}
112+
113+
# Ray endpoint configurations
114+
RAY_ENDPOINTS = [
115+
{"name": "ray-client-server-endpoint", "port": 10001, "protocol": "TCP"},
116+
{"name": "ray-gcs-endpoint", "port": 12001, "protocol": "TCP"},
117+
{"name": "ray-dashboard-grpc-endpoint", "port": 12002, "protocol": "TCP"},
118+
{"name": "ray-object-manager-endpoint", "port": 12011, "protocol": "TCP"},
119+
{"name": "ray-node-manager-endpoint", "port": 12012, "protocol": "TCP"},
120+
{"name": "ray-runtime-agent-endpoint", "port": 12013, "protocol": "TCP"},
121+
{"name": "ray-dashboard-agent-grpc-endpoint", "port": 12014, "protocol": "TCP"},
122+
{"name": "ephemeral-port-range", "portRange": "32768-60999", "protocol": "TCP"},
123+
{"name": "ray-worker-port-range", "portRange": "12031-13000", "protocol": "TCP"},
124+
]
125+
126+
# Compute pool resource information
127+
COMMON_INSTANCE_FAMILIES = {
128+
"CPU_X64_XS": ComputeResources(cpu=1, memory=6),
129+
"CPU_X64_S": ComputeResources(cpu=3, memory=13),
130+
"CPU_X64_M": ComputeResources(cpu=6, memory=28),
131+
"CPU_X64_L": ComputeResources(cpu=28, memory=116),
132+
"HIGHMEM_X64_S": ComputeResources(cpu=6, memory=58),
133+
}
134+
135+
AWS_INSTANCE_FAMILIES = {
136+
"HIGHMEM_X64_M": ComputeResources(cpu=28, memory=240),
137+
"HIGHMEM_X64_L": ComputeResources(cpu=124, memory=984),
138+
"GPU_NV_S": ComputeResources(cpu=6, memory=27, gpu=1, gpu_type="A10G"),
139+
"GPU_NV_M": ComputeResources(cpu=44, memory=178, gpu=4, gpu_type="A10G"),
140+
"GPU_NV_L": ComputeResources(cpu=92, memory=1112, gpu=8, gpu_type="A100"),
141+
}
142+
143+
AZURE_INSTANCE_FAMILIES = {
144+
"HIGHMEM_X64_M": ComputeResources(cpu=28, memory=244),
145+
"HIGHMEM_X64_L": ComputeResources(cpu=92, memory=654),
146+
"GPU_NV_XS": ComputeResources(cpu=3, memory=26, gpu=1, gpu_type="T4"),
147+
"GPU_NV_SM": ComputeResources(cpu=32, memory=424, gpu=1, gpu_type="A10"),
148+
"GPU_NV_2M": ComputeResources(cpu=68, memory=858, gpu=2, gpu_type="A10"),
149+
"GPU_NV_3M": ComputeResources(cpu=44, memory=424, gpu=2, gpu_type="A100"),
150+
"GPU_NV_SL": ComputeResources(cpu=92, memory=858, gpu=4, gpu_type="A100"),
151+
}
152+
153+
CLOUD_INSTANCE_FAMILIES = {
154+
SnowflakeCloudType.AWS: AWS_INSTANCE_FAMILIES,
155+
SnowflakeCloudType.AZURE: AZURE_INSTANCE_FAMILIES,
156+
}

0 commit comments

Comments
 (0)