Skip to content

Commit bcf3056

Browse files
committed
(spike) using rhoai lls
1 parent 528e0b9 commit bcf3056

File tree

1 file changed

+44
-10
lines changed

1 file changed

+44
-10
lines changed

docker-compose.yaml

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,54 @@
11
services:
2+
# vLLM service for Red Hat distribution
3+
vllm:
4+
image: vllm/vllm-openai:latest
5+
platform: linux/amd64
6+
container_name: vllm
7+
ports:
8+
- "8000:8000"
9+
environment:
10+
- MODEL_NAME=${INFERENCE_MODEL:-microsoft/DialoGPT-small}
11+
- HOST=0.0.0.0
12+
- PORT=8000
13+
volumes:
14+
- vllm_models:/root/.cache/huggingface
15+
networks:
16+
- lightspeednet
17+
healthcheck:
18+
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
19+
interval: 10s
20+
timeout: 5s
21+
retries: 3
22+
start_period: 30s
23+
24+
# Red Hat llama-stack distribution in standalone mode
225
llama-stack:
3-
build:
4-
context: .
5-
dockerfile: test.containerfile
26+
image: quay.io/opendatahub/llama-stack:odh
27+
platform: linux/amd64
628
container_name: llama-stack
729
ports:
8-
- "8321:8321" # Expose llama-stack on 8321 (adjust if needed)
9-
volumes:
10-
- ./run.yaml:/app-root/run.yaml:Z
30+
- "8321:8321"
1131
environment:
32+
# Standalone mode configuration
33+
- STANDALONE=true
34+
- VLLM_URL=http://vllm:8000/v1
35+
- INFERENCE_MODEL=${INFERENCE_MODEL:-microsoft/DialoGPT-small}
36+
37+
# Optional Configuration
1238
- OPENAI_API_KEY=${OPENAI_API_KEY}
39+
- BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
40+
- TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
41+
depends_on:
42+
vllm:
43+
condition: service_healthy
1344
networks:
1445
- lightspeednet
1546
healthcheck:
1647
test: ["CMD", "curl", "-f", "http://localhost:8321/v1/health"]
17-
interval: 10s # how often to run the check
18-
timeout: 5s # how long to wait before considering it failed
19-
retries: 3 # how many times to retry before marking as unhealthy
20-
start_period: 15s # time to wait before starting checks
48+
interval: 10s
49+
timeout: 5s
50+
retries: 3
51+
start_period: 15s
2152

2253
lightspeed-stack:
2354
build:
@@ -42,6 +73,9 @@ services:
4273
retries: 3 # how many times to retry before marking as unhealthy
4374
start_period: 5s # time to wait before starting checks
4475

76+
volumes:
77+
vllm_models:
78+
4579
networks:
4680
lightspeednet:
4781
driver: bridge

0 commit comments

Comments
 (0)