vllm-project · llm-net · Aug 11, 2025 · Aug 11, 2025 · Aug 11, 2025 · gemini-code-assist
diff --git a/.github/workflows/build-custom-router.yml b/.github/workflows/build-custom-router.yml
@@ -0,0 +1,44 @@
+name: Build Custom Router Image
+
+on:
+  push:
+    branches:
+      - fix-max-model-len
+      - main
+  workflow_dispatch:
+
+jobs:
+  build:
+    permissions:
+      contents: read
+      packages: write
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      # Login to GitHub Container Registry (GHCR)
+      - name: Login to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: docker/Dockerfile
+          push: true
+          tags: |
+            ghcr.io/${{ github.repository }}/router:latest
+            ghcr.io/${{ github.repository }}/router:max-model-len-fix
+            ghcr.io/${{ github.repository }}/router:${{ github.sha }}
+          cache-from: type=registry,ref=ghcr.io/${{ github.repository }}/router:buildcache
+          cache-to: type=registry,ref=ghcr.io/${{ github.repository }}/router:buildcache,mode=max
diff --git a/src/vllm_router/protocols.py b/src/vllm_router/protocols.py
@@ -49,6 +49,7 @@ class ModelCard(OpenAIBaseModel):
     owned_by: str = "vllm"
     root: Optional[str] = None
     parent: Optional[str] = None
+    max_model_len: Optional[int] = None
 
 
 class ModelList(OpenAIBaseModel):

diff --git a/src/vllm_router/routers/main_router.py b/src/vllm_router/routers/main_router.py
@@ -152,6 +152,7 @@ async def show_models():
                 created=model_info.created,
                 owned_by=model_info.owned_by,
                 parent=model_info.parent,
+                max_model_len=model_info.max_model_len,
             )
             model_cards.append(model_card)
             existing_models.add(model_id)

diff --git a/src/vllm_router/service_discovery.py b/src/vllm_router/service_discovery.py
@@ -50,6 +50,7 @@ class ModelInfo:
     root: Optional[str] = None
     parent: Optional[str] = None
     is_adapter: bool = False
+    max_model_len: Optional[int] = None
 
     @classmethod
     def from_dict(cls, data: Dict) -> "ModelInfo":
@@ -62,6 +63,7 @@ def from_dict(cls, data: Dict) -> "ModelInfo":
             root=data.get("root", None),
             parent=data.get("parent", None),
             is_adapter=data.get("parent") is not None,
+            max_model_len=data.get("max_model_len", None),
         )
 
     def to_dict(self) -> Dict:
@@ -74,6 +76,7 @@ def to_dict(self) -> Dict:
             "root": self.root,
             "parent": self.parent,
             "is_adapter": self.is_adapter,
+            "max_model_len": self.max_model_len,
         }