Skip to content

Commit 908a4ee

Browse files
committed
Adapt ChatQnA vLLM Gaudi parameters to latest changes
Signed-off-by: Eero Tamminen <[email protected]>
1 parent f38b1b0 commit 908a4ee

File tree

2 files changed

+10
-25
lines changed

2 files changed

+10
-25
lines changed

helm-charts/chatqna/gaudi-vllm-values.yaml

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,18 @@ vllm:
3030
periodSeconds: 5
3131
timeoutSeconds: 1
3232

33-
# TODO: these are taken from GenAIExamples HPU manifest as-is
34-
# vLLM chart needs to adopt / apply relevant ones
35-
HABANA_LOGS: "/tmp/habana_logs"
36-
NUMBA_CACHE_DIR: "/tmp"
33+
# TODO: GenAIExamples HPU manifest mentions additional env vars:
34+
# https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml#L194
35+
# should they be specified here and/or in vLLM chart values?
3736
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
3837
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
39-
HF_HOME: "/tmp/.cache/huggingface"
40-
GPU_MEMORY_UTILIZATION: "0.5"
41-
DTYPE: "auto"
42-
TENSOR_PARALLEL_SIZE: "1"
43-
BLOCK_SIZE: "128"
44-
MAX_NUM_SEQS: "256"
45-
MAX_SEQ_LEN_TO_CAPTURE: "2048"
38+
39+
extraCmdArgs: [
40+
"--tensor-parallel-size", "1",
41+
"--block-size", "128",
42+
"--max-num-seqs", "256",
43+
"--max-seq_len-to-capture", "2048"
44+
]
4645

4746

4847
# Reranking: second largest bottleneck when reranking is in use

helm-charts/chatqna/values.yaml

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -71,21 +71,7 @@ tgi:
7171
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
7272
vllm:
7373
enabled: false
74-
# TODO: manifest in GenAIExamples uses "meta-llama/Meta-Llama-3-8B-Instruct" instead?
7574
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
76-
# TODO: these are non-redundant/non-broken options used by Agent component,
77-
# but I think their values should be handled inside vLLM component, with
78-
# deployment applying numbers set in configMap, based on values YAML file
79-
# variables.
80-
extraCmdArgs: [
81-
"--enforce-eager",
82-
"--tensor-parallel-size", "1",
83-
"--dtype", "auto",
84-
"--block-size", "128",
85-
"--max-num-seqs", "256",
86-
"--max-seq_len-to-capture", "2048",
87-
"--gpu-memory-utilization", "0.5"
88-
]
8975

9076
# disable guardrails-usvc by default
9177
# See guardrails-values.yaml for guardrail related options

0 commit comments

Comments
 (0)