Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ jobs:
- Release
- Debug
setup:
- arch: none
backend: none
cc: gcc-13
cxx: g++-13
fc: gfortran-13
container: seissol/gha-cpu:davschneller-gpu-image
runner: ubuntu-24.04
pythonbreak: true
- arch: sm_60
backend: cuda
cc: gcc-13
Expand Down Expand Up @@ -98,7 +106,7 @@ jobs:
cd ../..

- name: checkout-device
uses: actions/checkout@v4
uses: actions/checkout@v5
with:
submodules: recursive

Expand Down
34 changes: 21 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,18 @@ if (NOT DEFINED DEVICE_BACKEND)
message(FATAL_ERROR "DEVICE_BACKEND variable has not been provided into the submodule")
else()
set(FOUND OFF)
foreach(VARIANT cuda hip oneapi hipsycl acpp)
foreach(VARIANT cuda hip oneapi hipsycl acpp none)
if (${DEVICE_BACKEND} STREQUAL ${VARIANT})
set(FOUND ON)
endif()
endforeach()
if (NOT FOUND)
message(FATAL_ERROR "DEVICE_BACKEND must be either cuda, hip, opeapi, acpp, or hipsycl. Given: ${DEVICE_BACKEND}")
message(FATAL_ERROR "DEVICE_BACKEND must be either none, cuda, hip, opeapi, acpp, hipsycl. Given: ${DEVICE_BACKEND}")
endif()
endif()

if (NOT DEFINED DEVICE_ARCH)
message(FATAL_ERROR "DEVICE_ARCH is not defined. "
"Supported for example: sm_60, sm_61, sm_70, sm_71, gfx906, gfx908, dg1, bdw, skl, Gen8, Gen9, Gen11, Gen12LP")
if ((NOT (${DEVICE_BACKEND} STREQUAL "none")) AND (NOT DEFINED DEVICE_ARCH))
message(FATAL_ERROR "DEVICE_ARCH has not been defined")
endif()

set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
Expand All @@ -44,10 +43,10 @@ elseif(${DEVICE_BACKEND} STREQUAL "hip")
elseif((${DEVICE_BACKEND} STREQUAL "oneapi") OR (${DEVICE_BACKEND} STREQUAL "hipsycl") OR (${DEVICE_BACKEND} STREQUAL "acpp"))
set(BACKEND_FOLDER "sycl")
include(sycl.cmake)
else()
add_library(device OBJECT device.cpp algorithms/Dummy.cpp)
endif()

string(TOUPPER ${BACKEND_FOLDER} BACKEND_UPPER_CASE)

# common options
target_compile_features(device PRIVATE cxx_std_17)

Expand All @@ -58,15 +57,24 @@ if (ENABLE_PROFILING_MARKERS)
target_compile_definitions(device PRIVATE PROFILING_ENABLED)
endif()

target_compile_definitions(device PRIVATE DEVICE_LANG_${BACKEND_UPPER_CASE})
if (NOT (${DEVICE_BACKEND} STREQUAL "none"))
string(TOUPPER ${BACKEND_FOLDER} BACKEND_UPPER_CASE)
target_compile_definitions(device PRIVATE DEVICE_LANG_${BACKEND_UPPER_CASE})

target_include_directories(device PRIVATE
interfaces/${BACKEND_FOLDER}
interfaces/common
algorithms/${BACKEND_FOLDER})
endif()

if (LOG_LEVEL_MASTER)
target_compile_definitions(device PRIVATE LOG_LEVEL=${LOG_LEVEL_MASTER})
endif()

target_include_directories(device PRIVATE .
interfaces/${BACKEND_FOLDER}
interfaces/common
algorithms/${BACKEND_FOLDER}
submodules)
if (DEFINED DEVICE_SUBMODULES)
target_include_directories(device PRIVATE ${DEVICE_SUBMODULES})
else()
target_include_directories(device PRIVATE submodules)
endif()

target_include_directories(device PRIVATE .)
2 changes: 1 addition & 1 deletion UsmAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class UsmAllocator {
using difference_type = std::ptrdiff_t;

UsmAllocator() noexcept = delete;
UsmAllocator(device::DeviceInstance& instance) noexcept : api(instance.api) {}
UsmAllocator(device::DeviceInstance& instance) noexcept : api(&instance.api()) {}

UsmAllocator(const UsmAllocator &) noexcept = default;
UsmAllocator(UsmAllocator &&) noexcept = default;
Expand Down
136 changes: 136 additions & 0 deletions algorithms/Dummy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// SPDX-FileCopyrightText: 2020-2024 SeisSol Group
//
// SPDX-License-Identifier: BSD-3-Clause

#include <cassert>
#include <cstdint>
#include <device.h>
#include "Algorithms.h"

namespace device {
template <typename T> void Algorithms::scaleArray(T *devArray,
T scalar,
const size_t numElements,
void* streamPtr) {
}
template void Algorithms::scaleArray(float *devArray, float scalar, const size_t numElements, void* streamPtr);
template void Algorithms::scaleArray(double *devArray, double scalar, const size_t numElements, void* streamPtr);
template void Algorithms::scaleArray(int *devArray, int scalar, const size_t numElements, void* streamPtr);
template void Algorithms::scaleArray(unsigned *devArray, unsigned scalar, const size_t numElements, void* streamPtr);
template void Algorithms::scaleArray(char *devArray, char scalar, const size_t numElements, void* streamPtr);

template <typename T> void Algorithms::fillArray(T *devArray, const T scalar, const size_t numElements, void* streamPtr) {
}
template void Algorithms::fillArray(float *devArray, float scalar, const size_t numElements, void* streamPtr);
template void Algorithms::fillArray(double *devArray, double scalar, const size_t numElements, void* streamPtr);
template void Algorithms::fillArray(int *devArray, int scalar, const size_t numElements, void* streamPtr);
template void Algorithms::fillArray(unsigned *devArray, unsigned scalar, const size_t numElements, void* streamPtr);
template void Algorithms::fillArray(char *devArray, char scalar, const size_t numElements, void* streamPtr);

void Algorithms::touchMemoryI(void *ptr, size_t size, bool clean, void* streamPtr) {
}

void Algorithms::incrementalAddI(
void** out,
void *base,
size_t increment,
size_t numElements,
void* streamPtr) {
}


void Algorithms::streamBatchedDataI(const void **baseSrcPtr,
void **baseDstPtr,
size_t elementSize,
size_t numElements,
void* streamPtr) {
}

template<typename T>
void Algorithms::accumulateBatchedData(const T **baseSrcPtr,
T **baseDstPtr,
size_t elementSize,
size_t numElements,
void* streamPtr) {
}

template void Algorithms::accumulateBatchedData(const float **baseSrcPtr,
float **baseDstPtr,
size_t elementSize,
size_t numElements,
void* streamPtr);

template void Algorithms::accumulateBatchedData(const double **baseSrcPtr,
double **baseDstPtr,
size_t elementSize,
size_t numElements,
void* streamPtr);

void Algorithms::touchBatchedMemoryI(void **basePtr,
size_t elementSize,
size_t numElements,
bool clean,
void* streamPtr) {
}


template<typename T>
void Algorithms::setToValue(T** out, T value, size_t elementSize, size_t numElements, void* streamPtr) {

}

template void Algorithms::setToValue(float** out, float value, size_t elementSize, size_t numElements, void* streamPtr);
template void Algorithms::setToValue(double** out, double value, size_t elementSize, size_t numElements, void* streamPtr);
template void Algorithms::setToValue(int** out, int value, size_t elementSize, size_t numElements, void* streamPtr);
template void Algorithms::setToValue(unsigned** out, unsigned value, size_t elementSize, size_t numElements, void* streamPtr);
template void Algorithms::setToValue(char** out, char value, size_t elementSize, size_t numElements, void* streamPtr);


void Algorithms::copyUniformToScatterI(const void *src,
void **dst,
size_t srcOffset,
size_t copySize,
size_t numElements,
void* streamPtr) {
}

void Algorithms::copyScatterToUniformI(const void **src,
void *dst,
size_t dstOffset,
size_t copySize,
size_t numElements,
void* streamPtr) {
}


template<typename T>
void Algorithms::compareDataWithHost(const T *hostPtr, const T *devPtr, const size_t numElements,
const std::string &dataName) {

};

template void Algorithms::compareDataWithHost(const float *hostPtr, const float *devPtr, const size_t numElements,
const std::string &dataName);
template void Algorithms::compareDataWithHost(const double *hostPtr, const double *devPtr, const size_t numElements,
const std::string &dataName);

template <typename AccT, typename VecT> void Algorithms::reduceVector(AccT* result, const VecT *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr) {
}

template void Algorithms::reduceVector(int* result, const int *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(unsigned* result, const unsigned *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(long* result, const int *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(unsigned long* result, const unsigned *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(long* result, const long *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(unsigned long* result, const unsigned long *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(long long* result, const int *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(unsigned long long* result, const unsigned *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(long long* result, const long *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(unsigned long long* result, const unsigned long *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(long long* result, const long long *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(unsigned long long* result, const unsigned long long *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(float* result, const float *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(double* result, const float *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);
template void Algorithms::reduceVector(double* result, const double *buffer, bool overrideResult, size_t size, ReductionType type, void* streamPtr);

} // namespace device
40 changes: 33 additions & 7 deletions device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
// SPDX-License-Identifier: BSD-3-Clause

#include "device.h"
#include "Algorithms.h"

#ifdef DEVICE_LANG_CUDA
#include "interfaces/cuda/CudaWrappedAPI.h"
#define DEVICE_ENABLED
#elif DEVICE_LANG_HIP
#include "interfaces/hip/HipWrappedAPI.h"
#define DEVICE_ENABLED
#elif DEVICE_LANG_SYCL
#include "interfaces/sycl/SyclWrappedAPI.h"
#else
#error "Unknown interface for the device wrapper"
#define DEVICE_ENABLED
#endif

using namespace device;
Expand All @@ -20,17 +22,41 @@ DeviceInstance::DeviceInstance() {
// NOTE: all headers inside of macros define their unique ConcreteInterface.
// Make sure to not include multiple different interfaces at the same time.
// Only one interface is allowed per program because of issues of unique compilers, etc.
api = new ConcreteAPI;
algorithms.setDeviceApi(api);
#ifdef DEVICE_ENABLED
apiP = std::make_unique<ConcreteAPI>();
algorithmsP = std::make_unique<Algorithms>();

algorithmsP->setDeviceApi(apiP.get());
#endif
}

DeviceInstance::~DeviceInstance() {
#ifdef DEVICE_ENABLED
this->finalize();
delete api;
api = nullptr;
#endif
}

void DeviceInstance::finalize() {
api->finalize();
#ifdef DEVICE_ENABLED
api().finalize();
#endif
}

DeviceInstance& DeviceInstance::instance() {
static DeviceInstance currentInstance;
return currentInstance;
}

AbstractAPI& DeviceInstance::api() {
if (apiP == nullptr) {
throw std::runtime_error("Device API was called; but it is not initialized.");
}
return *apiP;
}

Algorithms& DeviceInstance::algorithms() {
if (algorithmsP == nullptr) {
throw std::runtime_error("Device API was called; but it is not initialized.");
}
return *algorithmsP;
}
14 changes: 8 additions & 6 deletions device.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

#include "AbstractAPI.h"
#include "Algorithms.h"
#include <memory>
#include <stdexcept>

namespace device {

Expand All @@ -15,17 +17,17 @@ class DeviceInstance {
public:
DeviceInstance(const DeviceInstance &) = delete;
DeviceInstance &operator=(const DeviceInstance &) = delete;
static DeviceInstance &getInstance() {
static DeviceInstance instance;
return instance;
}
static DeviceInstance& instance();
~DeviceInstance();
void finalize();

AbstractAPI *api{nullptr};
Algorithms algorithms{};
AbstractAPI& api();

Algorithms& algorithms();

private:
std::unique_ptr<AbstractAPI> apiP{nullptr};
std::unique_ptr<Algorithms> algorithmsP{nullptr};
DeviceInstance();
};
} // namespace device
Expand Down
Loading