1- From 25ba8dfa43e2b4b101b890c88464b638427d3d42 Mon Sep 17 00:00:00 2001
1+ From 8d4bc83e2144cbbe5e634a53ac07a2c6a709b9c0 Mon Sep 17 00:00:00 2001
22From: Charles Xu <
[email protected] >
3- Date: Wed, 17 Jul 2024 13:28:18 +0200
3+ Date: Wed, 21 Aug 2024 07:31:51 +0200
44Subject: [PATCH] Use KleidiAI Int4 Matmul micro-kernels in llama.cpp
55
66- Update CMake file to fetch the Int4 micro-kernels from the KleidiAI
2121 create mode 100644 ggml-kleidiai.h
2222
2323diff --git a/CMakeLists.txt b/CMakeLists.txt
24- index 08481334..07f8f601 100644
24+ index 08481334..6aed4fc6 100644
2525--- a/CMakeLists.txt
2626+++ b/CMakeLists.txt
2727@@ -548,6 +548,57 @@ if (LLAMA_VULKAN)
@@ -32,9 +32,9 @@ index 08481334..07f8f601 100644
3232+
3333+ # Fetch KleidiAI sources:
3434+ include(FetchContent)
35- + set(KLEIDIAI_COMMIT_SHA "187d9aacddfb678c09f0831b18f87401b1b353c3 ")
35+ + set(KLEIDIAI_COMMIT_SHA "cb27bbe4cd47bb15d8236df3250ff105ef64e65b ")
3636+ set(KLEIDIAI_DOWNLOAD_URL "https://gitlab.arm.com/kleidi/kleidiai/-/archive/${KLEIDIAI_COMMIT_SHA}/kleidiai-${KLEIDIAI_COMMIT_SHA}.tar.gz")
37- + set(KLEIDIAI_ARCHIVE_MD5 "4a1eee013cb20464b534cb01212d19c9 ")
37+ + set(KLEIDIAI_ARCHIVE_MD5 "f4fa5d1070d9f0ab96f5c021d292dde3 ")
3838+
3939+ if (POLICY CMP0135)
4040+ cmake_policy(SET CMP0135 NEW)
@@ -66,7 +66,7 @@ index 08481334..07f8f601 100644
6666+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/)
6767+
6868+ list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c)
69- + list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0 .c)
69+ + list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0 .c)
7070+ list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c)
7171+ list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c)
7272+
@@ -123,7 +123,7 @@ index bd367c42..ed4ce0ae 100644
123123 if (this_size > max_size) {
124124diff --git a/ggml-kleidiai.cpp b/ggml-kleidiai.cpp
125125new file mode 100644
126- index 00000000..257a0d4c
126+ index 00000000..9129ea99
127127--- /dev/null
128128+++ b/ggml-kleidiai.cpp
129129@@ -0,0 +1,675 @@
@@ -176,7 +176,7 @@ index 00000000..257a0d4c
176176+ // KleidiAI micro-kernels
177177+ #include "kai_matmul_clamp_f32_qsi8d32p_qsi4c32p_interface.h"
178178+ #include "kai_lhs_quant_pack_qsi8d32p_f32.h"
179- + #include "kai_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0 .h"
179+ + #include "kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0 .h"
180180+ #include "kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.h"
181181+ #include "kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.h"
182182+
@@ -473,7 +473,7 @@ index 00000000..257a0d4c
473473+ v.nr = ukernel->get_nr();
474474+ v.kr = ukernel->get_kr();
475475+ v.sr = ukernel->get_sr();
476- + v.packed_size = kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0 (n, k, v.nr, v.kr, k_q4_0_block_size /* 32 */);
476+ + v.packed_size = kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0 (n, k, v.nr, v.kr, k_q4_0_block_size /* 32 */);
477477+
478478+ return v;
479479+ }
@@ -638,11 +638,11 @@ index 00000000..257a0d4c
638638+ // Temporary memory for the computation.
639639+ uint8_t *reshaped_data = (uint8_t*)malloc(reshaped_data_sz);
640640+
641- + struct kai_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0_params params;
641+ + struct kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0_params params;
642642+ params.lhs_zero_point = 1;
643643+ params.rhs_zero_point = 8;
644644+
645- + kai_run_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0 (
645+ + kai_run_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0 (
646646+ 1, n, k, // Dimensions
647647+ rhs_packing_params.nr, // Nr
648648+ rhs_packing_params.kr, // Kr
0 commit comments