Skip to content

feat(mlx): add mlx backend #6049

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Aug 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion .github/workflows/backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -945,6 +945,19 @@ jobs:
backend: "kitten-tts"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
mlx-darwin:
uses: ./.github/workflows/backend_build_darwin.yml
with:
backend: "mlx"
build-type: "mps"
go-version: "1.24.x"
tag-suffix: "-metal-darwin-arm64-mlx"
runs-on: "macOS-14"
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
llama-cpp-darwin:
runs-on: macOS-14
strategy:
Expand Down Expand Up @@ -1118,4 +1131,4 @@ jobs:
run: |
for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
crane push llama-cpp.tar $tag
done
done
136 changes: 136 additions & 0 deletions .github/workflows/backend_build_darwin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
---
name: 'build darwin python backend container images (reusable)'

on:
workflow_call:
inputs:
backend:
description: 'Backend to build'
required: true
type: string
build-type:
description: 'Build type (e.g., mps)'
default: ''
type: string
go-version:
description: 'Go version to use'
default: '1.24.x'
type: string
tag-suffix:
description: 'Tag suffix for the built image'
required: true
type: string
runs-on:
description: 'Runner to use'
default: 'macOS-14'
type: string
secrets:
dockerUsername:
required: false
dockerPassword:
required: false
quayUsername:
required: true
quayPassword:
required: true

jobs:
darwin-backend-build:
runs-on: ${{ inputs.runs-on }}
strategy:
matrix:
go-version: ['${{ inputs.go-version }}']
steps:
- name: Clone
uses: actions/checkout@v5
with:
submodules: true

- name: Setup Go ${{ matrix.go-version }}
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go-version }}
cache: false

# You can test your matrix by printing the current Go version
- name: Display Go version
run: go version

- name: Dependencies
run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm

- name: Build ${{ inputs.backend }}-darwin
run: |
make protogen-go
BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} make build-darwin-python-backend

- name: Upload ${{ inputs.backend }}.tar
uses: actions/upload-artifact@v4
with:
name: ${{ inputs.backend }}-tar
path: backend-images/${{ inputs.backend }}.tar

darwin-backend-publish:
needs: darwin-backend-build
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
steps:
- name: Download ${{ inputs.backend }}.tar
uses: actions/download-artifact@v5
with:
name: ${{ inputs.backend }}-tar
path: .

- name: Install crane
run: |
curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
sudo mv crane /usr/local/bin/

- name: Log in to DockerHub
run: |
echo "${{ secrets.dockerPassword }}" | crane auth login docker.io -u "${{ secrets.dockerUsername }}" --password-stdin

- name: Log in to quay.io
run: |
echo "${{ secrets.quayPassword }}" | crane auth login quay.io -u "${{ secrets.quayUsername }}" --password-stdin

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
localai/localai-backends
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
type=sha
flavor: |
latest=auto
suffix=${{ inputs.tag-suffix }},onlatest=true

- name: Docker meta
id: quaymeta
uses: docker/metadata-action@v5
with:
images: |
quay.io/go-skynet/local-ai-backends
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
type=sha
flavor: |
latest=auto
suffix=${{ inputs.tag-suffix }},onlatest=true

- name: Push Docker image (DockerHub)
run: |
for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
crane push ${{ inputs.backend }}.tar $tag
done

- name: Push Docker image (Quay)
run: |
for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
crane push ${{ inputs.backend }}.tar $tag
done
82 changes: 45 additions & 37 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -132,43 +132,6 @@ test: test-models/testmodel.ggml protogen-go
$(MAKE) test-tts
$(MAKE) test-stablediffusion

backends/diffusers: docker-build-diffusers docker-save-diffusers build
./local-ai backends install "ocifile://$(abspath ./backend-images/diffusers.tar)"

backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"

backends/piper: docker-build-piper docker-save-piper build
./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"

backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"

backends/whisper: docker-build-whisper docker-save-whisper build
./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"

backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"

backends/local-store: docker-build-local-store docker-save-local-store build
./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"

backends/huggingface: docker-build-huggingface docker-save-huggingface build
./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"

backends/rfdetr: docker-build-rfdetr docker-save-rfdetr build
./local-ai backends install "ocifile://$(abspath ./backend-images/rfdetr.tar)"

backends/kitten-tts: docker-build-kitten-tts docker-save-kitten-tts build
./local-ai backends install "ocifile://$(abspath ./backend-images/kitten-tts.tar)"

backends/kokoro: docker-build-kokoro docker-save-kokoro build
./local-ai backends install "ocifile://$(abspath ./backend-images/kokoro.tar)"

backends/llama-cpp-darwin: build
bash ./scripts/build-llama-cpp-darwin.sh
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"

########################################################
## AIO tests
########################################################
Expand Down Expand Up @@ -361,6 +324,51 @@ docker-image-intel:
## Backends
########################################################


backends/diffusers: docker-build-diffusers docker-save-diffusers build
./local-ai backends install "ocifile://$(abspath ./backend-images/diffusers.tar)"

backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"

backends/piper: docker-build-piper docker-save-piper build
./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"

backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"

backends/whisper: docker-build-whisper docker-save-whisper build
./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"

backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"

backends/local-store: docker-build-local-store docker-save-local-store build
./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"

backends/huggingface: docker-build-huggingface docker-save-huggingface build
./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"

backends/rfdetr: docker-build-rfdetr docker-save-rfdetr build
./local-ai backends install "ocifile://$(abspath ./backend-images/rfdetr.tar)"

backends/kitten-tts: docker-build-kitten-tts docker-save-kitten-tts build
./local-ai backends install "ocifile://$(abspath ./backend-images/kitten-tts.tar)"

backends/kokoro: docker-build-kokoro docker-save-kokoro build
./local-ai backends install "ocifile://$(abspath ./backend-images/kokoro.tar)"

backends/llama-cpp-darwin: build
bash ./scripts/build/llama-cpp-darwin.sh
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"

build-darwin-python-backend:
bash ./scripts/build/python-darwin.sh

backends/mlx: build
BACKEND=mlx BUILD_TYPE=mps $(MAKE) build-darwin-python-backend
./local-ai backends install "ocifile://$(abspath ./backend-images/mlx.tar)"

backend-images:
mkdir -p backend-images

Expand Down
20 changes: 20 additions & 0 deletions backend/index.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
capabilities:
nvidia: "cuda12-rfdetr"
intel: "intel-rfdetr"
#amd: "rocm-rfdetr"

Check warning on line 95 in backend/index.yaml

View workflow job for this annotation

GitHub Actions / Yamllint

95:6 [comments] missing starting space in comment
nvidia-l4t: "nvidia-l4t-arm64-rfdetr"
default: "cpu-rfdetr"
- &vllm
Expand Down Expand Up @@ -127,6 +127,21 @@
nvidia: "cuda12-vllm"
amd: "rocm-vllm"
intel: "intel-vllm"
- &mlx
name: "mlx"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx"
icon: https://avatars.githubusercontent.com/u/102832242?s=200&v=4
urls:
- https://github.com/ml-explore/mlx-lm
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-mlx
license: MIT
description: |
Run LLMs with MLX
tags:
- text-to-text
- LLM
- MLX
- &rerankers
name: "rerankers"
alias: "rerankers"
Expand Down Expand Up @@ -371,6 +386,11 @@
- text-to-speech
- TTS
license: apache-2.0
- !!merge <<: *mlx
name: "mlx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-mlx"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-mlx
- !!merge <<: *kitten-tts
name: "kitten-tts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-kitten-tts"
Expand Down Expand Up @@ -700,7 +720,7 @@
capabilities:
nvidia: "cuda12-rfdetr-development"
intel: "intel-rfdetr-development"
#amd: "rocm-rfdetr-development"

Check warning on line 723 in backend/index.yaml

View workflow job for this annotation

GitHub Actions / Yamllint

723:6 [comments] missing starting space in comment
nvidia-l4t: "nvidia-l4t-arm64-rfdetr-development"
default: "cpu-rfdetr-development"
- !!merge <<: *rfdetr
Expand Down Expand Up @@ -905,7 +925,7 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-diffusers"
mirrors:
- localai/localai-backends:master-gpu-intel-diffusers
## exllama2

Check warning on line 928 in backend/index.yaml

View workflow job for this annotation

GitHub Actions / Yamllint

928:3 [comments-indentation] comment not indented like content
- !!merge <<: *exllama2
name: "exllama2-development"
capabilities:
Expand Down
12 changes: 3 additions & 9 deletions backend/python/bark/Makefile
Original file line number Diff line number Diff line change
@@ -1,29 +1,23 @@
.PHONY: ttsbark
ttsbark: protogen
ttsbark:
bash install.sh

.PHONY: run
run: protogen
run: ttsbark
@echo "Running bark..."
bash run.sh
@echo "bark run."

.PHONY: test
test: protogen
test: ttsbark
@echo "Testing bark..."
bash test.sh
@echo "bark tested."

.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py

.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py

backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__
14 changes: 4 additions & 10 deletions backend/python/chatterbox/Makefile
Original file line number Diff line number Diff line change
@@ -1,29 +1,23 @@
.PHONY: coqui
coqui: protogen
.PHONY: chatterbox
chatterbox:
bash install.sh

.PHONY: run
run: protogen
run: chatterbox
@echo "Running coqui..."
bash run.sh
@echo "coqui run."

.PHONY: test
test: protogen
test: chatterbox
@echo "Testing coqui..."
bash test.sh
@echo "coqui tested."

.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py

.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py

backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__
4 changes: 3 additions & 1 deletion backend/python/chatterbox/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ def LoadModel(self, request, context):
else:
print("CUDA is not available", file=sys.stderr)
device = "cpu"

mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
if mps_available:
device = "mps"
if not torch.cuda.is_available() and request.CUDA:
return backend_pb2.Result(success=False, message="CUDA is not available")

Expand Down
Loading
Loading