(spike) using rhoai lls

gallettilance · gallettilance · commit b4db84f26c3a · 2025-10-01T10:41:45.000-04:00
ls -la /Users/lance/Documents/RedHat/lightspeed-stack/.git/.COMMIT_EDITMSG*
diff --git a/Dockerfile.rh-llama-stack b/Dockerfile.rh-llama-stack
@@ -0,0 +1,9 @@
+# Custom Red Hat llama-stack image with missing dependencies
+FROM quay.io/opendatahub/llama-stack:rhoai-v2.25-latest
+
+# Install missing dependencies
+USER root
+RUN pip install faiss-cpu==1.11.0
+
+# Switch back to the original user
+USER 1001
diff --git a/README.md b/README.md
@@ -245,7 +245,7 @@ version = "0.1.0"
 description = "Llama Stack runner"
 authors = []
 dependencies = [
-    "llama-stack==0.2.21",
+    "llama-stack==0.2.23",
     "fastapi>=0.115.12",
     "opentelemetry-sdk>=1.34.0",
     "opentelemetry-exporter-otlp>=1.34.0",
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -1,23 +1,27 @@
 services:
+  # Red Hat llama-stack distribution with FAISS
   llama-stack:
     build:
       context: .
-      dockerfile: test.containerfile
+      dockerfile: Dockerfile.rh-llama-stack
+    platform: linux/amd64
     container_name: llama-stack
     ports:
-      - "8321:8321"  # Expose llama-stack on 8321 (adjust if needed)
+      - "8321:8321"
     volumes:
-      - ./run.yaml:/app-root/run.yaml:Z
+      - ./run.yaml:/opt/app-root/run.yaml:Z
     environment:
       - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
+      - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
     networks:
       - lightspeednet
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8321/v1/health"]
-      interval: 10s   # how often to run the check
-      timeout: 5s    # how long to wait before considering it failed
-      retries: 3      # how many times to retry before marking as unhealthy
-      start_period: 15s # time to wait before starting checks
+      interval: 10s
+      timeout: 5s
+      retries: 3
+      start_period: 30s
 
   lightspeed-stack:
     build:
@@ -44,4 +48,4 @@ services:
 
 networks:
   lightspeednet:
-    driver: bridge
+    driver: bridge
diff --git a/docs/deployment_guide.md b/docs/deployment_guide.md
@@ -676,7 +676,7 @@ a4982f43195537b9eb1cec510fe6655f245d6d4b7236a4759808115d5d719972
     description = "Default template for PDM package"
     authors = []
     dependencies = [
-        "llama-stack==0.2.18",
+        "llama-stack==0.2.23",
         "fastapi>=0.115.12",
         "opentelemetry-sdk>=1.34.0",
         "opentelemetry-exporter-otlp>=1.34.0",
diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -24,7 +24,7 @@ It is possible to run Lightspeed Core Stack service with Llama Stack "embedded"
 1. Add and install all required dependencies
     ```bash
     uv add \
-    "llama-stack==0.2.21" \
+    "llama-stack==0.2.23" \
     "fastapi>=0.115.12" \
     "opentelemetry-sdk>=1.34.0" \
     "opentelemetry-exporter-otlp>=1.34.0" \
diff --git a/examples/pyproject.llamastack.toml b/examples/pyproject.llamastack.toml
@@ -4,7 +4,7 @@ version = "0.1.0"
 description = "Default template for PDM package"
 authors = []
 dependencies = [
-    "llama-stack==0.2.21",
+    "llama-stack==0.2.23",
     "fastapi>=0.115.12",
     "opentelemetry-sdk>=1.34.0",
     "opentelemetry-exporter-otlp>=1.34.0",
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,8 +28,8 @@ dependencies = [
     # Used by authentication/k8s integration
     "kubernetes>=30.1.0",
     # Used to call Llama Stack APIs
-    "llama-stack==0.2.21",
-    "llama-stack-client==0.2.21",
+    "llama-stack==0.2.23",
+    "llama-stack-client==0.2.23",
     # Used by Logger
     "rich>=14.0.0",
     # Used by JWK token auth handler
diff --git a/run.yaml b/run.yaml
@@ -12,6 +12,7 @@ apis:
   - telemetry
   - tool_runtime
   - vector_io
+  - files
 benchmarks: []
 container_image: null
 datasets: []
@@ -105,6 +106,14 @@ providers:
     - provider_id: rag-runtime
       provider_type: inline::rag-runtime
       config: {}
+  files:
+    - provider_id: meta-reference-files
+      provider_type: inline::localfs
+      config:
+        storage_dir: .llama/distributions/ollama/files
+        metadata_store:
+          type: sqlite
+          db_path: .llama/distributions/ollama/files_metadata.db
   vector_io:
   - config:
       kvstore:
diff --git a/test.containerfile b/test.containerfile
diff --git a/uv.lock b/uv.lock