Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,9 @@ set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")

# extra artifacts
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
option(GGML_CPU_REF_BACKEND "ggml: build reference CPU backend for testing" OFF)
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})

#
# dependencies
Expand Down Expand Up @@ -285,7 +286,9 @@ add_subdirectory(src)

if (GGML_BUILD_TESTS)
enable_testing()
add_subdirectory(tests)
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tests")
add_subdirectory(tests)
endif ()
endif ()

if (GGML_BUILD_EXAMPLES)
Expand Down
2 changes: 2 additions & 0 deletions ggml/include/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ extern "C" {
// Load all known backends from dynamic libraries
GGML_API void ggml_backend_load_all(void);
GGML_API void ggml_backend_load_all_from_path(const char * dir_path);
// Load all CPU dynamic libraries and register them
GGML_API ggml_backend_reg_t * ggml_backend_load_cpu_variants(void);

//
// Backend scheduler
Expand Down
1 change: 1 addition & 0 deletions ggml/include/ggml-cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ extern "C" {
//

// x86
GGML_BACKEND_API int ggml_cpu_has_sse2 (void);
GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
GGML_BACKEND_API int ggml_cpu_has_avx (void);
Expand Down
14 changes: 14 additions & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,20 @@ ggml_add_backend(WebGPU)
ggml_add_backend(zDNN)
ggml_add_backend(OpenCL)

if (GGML_CPU_REF_BACKEND)
if (NOT GGML_BACKEND_DL)
message(FATAL_ERROR "GGML_CPU_REF_BACKEND requires GGML_BACKEND_DL")
endif()
set(GGML_SYSTEM_ARCH "cpu-ref")
set(GGML_LLAMAFILE OFF)
set(GGML_CPU_HBM OFF)
set(GGML_OPENMP OFF)
set(GGML_CPU_KLEIDIAI OFF)
set(GGML_CPU_REPACK OFF)
ggml_add_cpu_backend_variant(ref)
target_compile_definitions(ggml PRIVATE GGML_USE_CPU_REF)
endif()

foreach (target ggml-base ggml)
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump
Expand Down
56 changes: 56 additions & 0 deletions ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -596,4 +596,60 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
if (backend_path) {
ggml_backend_load(backend_path);
}
#ifdef GGML_USE_CPU_REF
ggml_backend_load_best("cpu-ref", silent, dir_path);
#endif
}

ggml_backend_reg_t * ggml_backend_load_cpu_variants(void) {
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
const fs::path name_path = fs::u8path("cpu");
const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
const fs::path file_extension = backend_filename_extension();

std::vector<fs::path> search_paths;
#ifdef GGML_BACKEND_DIR
search_paths.push_back(fs::u8path(GGML_BACKEND_DIR));
#endif
// default search paths: executable directory, current directory
search_paths.push_back(get_executable_path());
search_paths.push_back(fs::current_path());

ggml_backend_reg_t * backends = nullptr;
size_t count = 0;
size_t capacity = 0;
for (const auto & search_path : search_paths) {
if (!fs::exists(search_path)) {
GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
continue;
}
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
for (const auto & entry : dir_it) {
if (entry.is_regular_file()) {
auto filename = entry.path().filename();
auto ext = entry.path().extension();
if (filename.native().find(file_prefix.native()) == 0 && ext == file_extension) {
fs::path path = search_path / filename;
ggml_backend_reg_t backend = get_reg().load_backend(path, false);
if (backend) {
if (count >= capacity) {
capacity = capacity == 0 ? 4 : capacity * 2;
ggml_backend_reg_t * new_backends = (ggml_backend_reg_t *)realloc(backends, (capacity + 1) * sizeof(ggml_backend_reg_t));

if (!new_backends) {
free(backends);
return nullptr;
}
backends = new_backends;
}
backends[count++] = backend;
}
}
}
}
}
if (backends) {
backends[count] = nullptr;
}
return backends;
}
40 changes: 40 additions & 0 deletions ggml/src/ggml-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1945,13 +1945,38 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s

// allocate nodes
ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend);

// This is an absolute hack, but it is only to try to force the use of the
// extra repack buffers to see if I can come up with a better way or get
// some feeback from others how to go about doing this.
ggml_backend_buffer_t extra_buffer = nullptr;
std::vector<ggml_backend_buffer_type_t> extra_buft_list;
auto * dev = ggml_backend_get_device(backend);
auto * reg = ggml_backend_dev_backend_reg(dev);
auto get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_dev_get_extra_bufts");
if (get_extra_bufts_fn) {
ggml_backend_buffer_type_t * extra_bufts = get_extra_bufts_fn(dev);
while (extra_bufts && *extra_bufts) {
extra_buft_list.push_back(*extra_bufts);
++extra_bufts;
}
}
if (extra_buft_list.size() > 0) {
// Setting size to 1 just to ensure that the underlying extra buffer
// allocation is called. In the case of the repack buffer it does not
// really use the buffer and the repacking is done directory on the
// tensor data.
extra_buffer = ggml_backend_buft_alloc_buffer(extra_buft_list[0], 1);
}

if (buffer == NULL) {
GGML_LOG_ERROR("%s: failed to allocate buffer for graph copy\n", __func__);
ggml_hash_set_free(&hash_set);
free(node_copies);
free(node_init);
ggml_free(ctx_allocated);
ggml_free(ctx_unallocated);
ggml_backend_buffer_free(extra_buffer);
return {
/* .buffer = */ NULL,
/* .ctx_allocated = */ NULL,
Expand All @@ -1965,6 +1990,20 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s
// copy data and init views
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];

// Again just here to see if I can get the repacking to work.
if (extra_buffer && !ggml_op_is_empty(node->op) && node->src[0]) {
auto dev = ggml_backend_buft_get_device(ggml_backend_buffer_get_type(extra_buffer));
if (ggml_backend_dev_supports_op(dev, node)) {
size_t id = ggml_hash_find(&hash_set, node->src[0]);
ggml_status status = ggml_backend_buffer_init_tensor(extra_buffer, node_copies[id]);
if (status != GGML_STATUS_SUCCESS) {
GGML_LOG_ERROR("%s: failed to initialize tensor in extra buffer for graph copy\n", __func__);
}
node_copies[id]->buffer = extra_buffer;
}
}

graph_copy_init_tensor(&hash_set, node_copies, node_init, node);
}

Expand All @@ -1980,6 +2019,7 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s
ggml_hash_set_free(&hash_set);
free(node_copies);
free(node_init);
ggml_backend_buffer_free(extra_buffer);

return {
/* .buffer = */ buffer,
Expand Down
6 changes: 6 additions & 0 deletions ggml/src/ggml-cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)

if (tag_name)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU-${tag_name}")
else()
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU")
endif()

if (APPLE AND GGML_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate)
if (ACCELERATE_FRAMEWORK)
Expand Down
8 changes: 8 additions & 0 deletions ggml/src/ggml-cpu/ggml-cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -3428,6 +3428,14 @@ int ggml_cpu_has_llamafile(void) {
#endif
}

int ggml_cpu_has_sse2(void) {
#if defined(__SSE2__)
return 1;
#else
return 0;
#endif
}

int ggml_cpu_has_sse3(void) {
#if defined(__SSE3__)
return 1;
Expand Down
7 changes: 5 additions & 2 deletions ggml/src/ggml-cpu/ggml-cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ struct ggml_backend_cpu_context {
};

static const char * ggml_backend_cpu_get_name(ggml_backend_t backend) {
return "CPU";
return GGML_CPU_VARIANT_NAME;

GGML_UNUSED(backend);
}
Expand Down Expand Up @@ -327,7 +327,7 @@ struct ggml_backend_cpu_device_context {
};

static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) {
return "CPU";
return GGML_CPU_VARIANT_NAME;

GGML_UNUSED(dev);
}
Expand Down Expand Up @@ -506,6 +506,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
ggml_cpu_init();

std::vector<ggml_backend_feature> features;
if (ggml_cpu_has_sse2()) {
features.push_back({ "SSE2", "1" });
}
if (ggml_cpu_has_sse3()) {
features.push_back({ "SSE3", "1" });
}
Expand Down
Loading