opea-project · lianhao · Dec 30, 2024
@@ -235,7 +235,7 @@ docker run \
   -e no_proxy=${no_proxy} \
   -e vLLM_LLM_ENDPOINT=$vLLM_LLM_ENDPOINT \
   -e HF_TOKEN=$HF_TOKEN \
-  -e LLM_MODEL=$LLM_MODEL \
+  -e LLM_MODEL_ID=$LLM_MODEL \
   opea/llm-vllm:latest
 ```
 

@@ -38,7 +38,7 @@ services:
       https_proxy: ${https_proxy}
       vLLM_ENDPOINT: ${vLLM_ENDPOINT}
       HF_TOKEN: ${HF_TOKEN}
-      LLM_MODEL: ${LLM_MODEL}
+      LLM_MODEL_ID: ${LLM_MODEL}
     restart: unless-stopped
 
 networks:

@@ -9,6 +9,6 @@ docker run -d --rm \
     -e https_proxy=$https_proxy \
     -e vLLM_ENDPOINT=$vLLM_ENDPOINT \
     -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \
-    -e LLM_MODEL=$LLM_MODEL \
+    -e LLM_MODEL_ID=$LLM_MODEL \
     -e LOGFLAG=$LOGFLAG \
     opea/llm-vllm:latest
@@ -83,7 +83,7 @@ async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, Searche
     headers = {}
     if access_token:
         headers = {"Authorization": f"Bearer {access_token}"}
-    model_name = input.model if input.model else os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct")
+    model_name = input.model if input.model else os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
     llm_endpoint = get_llm_endpoint(model_name)
     llm = VLLMOpenAI(
         openai_api_key="EMPTY", openai_api_base=llm_endpoint + "/v1", model_name=model_name, default_headers=headers

@@ -38,7 +38,7 @@ services:
       https_proxy: ${https_proxy}
       vLLM_ENDPOINT: ${vLLM_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      LLM_MODEL: ${LLM_MODEL}
+      LLM_MODEL_ID: ${LLM_MODEL}
     restart: unless-stopped
 
 networks:

@@ -9,6 +9,6 @@ docker run -d --rm \
     -e https_proxy=$https_proxy \
     -e vLLM_ENDPOINT=$vLLM_ENDPOINT \
     -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \
-    -e LLM_MODEL=$LLM_MODEL \
+    -e LLM_MODEL_ID=$LLM_MODEL \
     -e LOGFLAG=$LOGFLAG \
     opea/llm-vllm-llamaindex:latest
@@ -43,7 +43,7 @@ async def llm_generate(input: LLMParamsDoc):
     if logflag:
         logger.info(input)
     llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008")
-    model_name = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct")
+    model_name = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
     llm = OpenAILike(
         api_key="fake",
         api_base=llm_endpoint + "/v1",

@@ -58,7 +58,7 @@ function start_service() {
         --ipc=host \
         -e vLLM_ENDPOINT=$vLLM_ENDPOINT \
         -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \
-        -e LLM_MODEL=$LLM_MODEL \
+        -e LLM_MODEL_ID=$LLM_MODEL \
         opea/llm-vllm:comps
 
     # check whether vllm ray is fully ready

@@ -57,7 +57,7 @@ function start_service() {
         --ipc=host \
         -e vLLM_ENDPOINT=$vLLM_ENDPOINT \
         -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \
-        -e LLM_MODEL=$LLM_MODEL \
+        -e LLM_MODEL_ID=$LLM_MODEL \
         opea/llm-vllm-llamaindex:comps
 
     # check whether vllm ray is fully ready