File tree Expand file tree Collapse file tree 2 files changed +10
-25
lines changed Expand file tree Collapse file tree 2 files changed +10
-25
lines changed Original file line number Diff line number Diff line change @@ -30,19 +30,18 @@ vllm:
30
30
periodSeconds : 5
31
31
timeoutSeconds : 1
32
32
33
- # TODO: these are taken from GenAIExamples HPU manifest as-is
34
- # vLLM chart needs to adopt / apply relevant ones
35
- HABANA_LOGS : " /tmp/habana_logs"
36
- NUMBA_CACHE_DIR : " /tmp"
33
+ # TODO: GenAIExamples HPU manifest mentions additional env vars:
34
+ # https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml#L194
35
+ # should they be specified here and/or in vLLM chart values?
37
36
PT_HPU_ENABLE_LAZY_COLLECTIVES : " true"
38
37
OMPI_MCA_btl_vader_single_copy_mechanism : " none"
39
- HF_HOME : " /tmp/.cache/huggingface "
40
- GPU_MEMORY_UTILIZATION : " 0.5 "
41
- DTYPE : " auto "
42
- TENSOR_PARALLEL_SIZE : " 1 "
43
- BLOCK_SIZE : " 128 "
44
- MAX_NUM_SEQS : " 256 "
45
- MAX_SEQ_LEN_TO_CAPTURE : " 2048 "
38
+
39
+ extraCmdArgs : [
40
+ " --tensor-parallel-size " , "1",
41
+ " --block-size " , "128",
42
+ " --max-num-seqs " , "256",
43
+ " --max-seq_len-to-capture " , "2048 "
44
+ ]
46
45
47
46
48
47
# Reranking: second largest bottleneck when reranking is in use
Original file line number Diff line number Diff line change 71
71
LLM_MODEL_ID : Intel/neural-chat-7b-v3-3
72
72
vllm :
73
73
enabled : false
74
- # TODO: manifest in GenAIExamples uses "meta-llama/Meta-Llama-3-8B-Instruct" instead?
75
74
LLM_MODEL_ID : Intel/neural-chat-7b-v3-3
76
- # TODO: these are non-redundant/non-broken options used by Agent component,
77
- # but I think their values should be handled inside vLLM component, with
78
- # deployment applying numbers set in configMap, based on values YAML file
79
- # variables.
80
- extraCmdArgs : [
81
- " --enforce-eager" ,
82
- " --tensor-parallel-size" , "1",
83
- " --dtype" , "auto",
84
- " --block-size" , "128",
85
- " --max-num-seqs" , "256",
86
- " --max-seq_len-to-capture" , "2048",
87
- " --gpu-memory-utilization" , "0.5"
88
- ]
89
75
90
76
# disable guardrails-usvc by default
91
77
# See guardrails-values.yaml for guardrail related options
You can’t perform that action at this time.
0 commit comments