Skip to content

Commit 72829cd

Browse files
authored
[SYCL][NativeCPU] Fix alignment of global and local memory. (#19076)
For parameters, we were inferring a suitable alignment from the requested size, but for global and local memory we were not doing the same. Ideally we would keep track of what alignment we need, but UR does not expose this information to us so assume the worst.
1 parent 9c003e0 commit 72829cd

File tree

3 files changed

+22
-14
lines changed

3 files changed

+22
-14
lines changed

unified-runtime/source/adapters/native_cpu/common.hpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,20 @@ inline void *aligned_malloc(size_t alignment, size_t size) {
8181
return ptr;
8282
}
8383

84+
// In many cases we require aligned memory without being told what the alignment
85+
// requirement is. This helper function returns maximally aligned memory based
86+
// on the size.
87+
inline void *aligned_malloc(size_t size) {
88+
constexpr size_t max_alignment = 16 * sizeof(double);
89+
size_t alignment = max_alignment;
90+
while (alignment > size) {
91+
alignment >>= 1;
92+
}
93+
// aligned_malloc requires size to be a multiple of alignment; round up.
94+
size = (size + alignment - 1) & ~(alignment - 1);
95+
return aligned_malloc(alignment, size);
96+
}
97+
8498
inline void aligned_free(void *ptr) {
8599
#ifdef _MSC_VER
86100
_aligned_free(ptr);

unified-runtime/source/adapters/native_cpu/kernel.hpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ struct ur_kernel_handle_t_ : RefCounted {
4242

4343
~ur_kernel_handle_t_() {
4444
removeArgReferences();
45-
free(_localMemPool);
45+
native_cpu::aligned_free(_localMemPool);
4646
}
4747

4848
ur_kernel_handle_t_(ur_program_handle_t hProgram, const char *name,
@@ -59,7 +59,6 @@ struct ur_kernel_handle_t_ : RefCounted {
5959
args_index_t Indices;
6060
std::vector<size_t> ParamSizes;
6161
std::vector<bool> OwnsMem;
62-
static constexpr size_t MaxAlign = 16 * sizeof(double);
6362

6463
arguments() = default;
6564

@@ -109,11 +108,7 @@ struct ur_kernel_handle_t_ : RefCounted {
109108
}
110109
}
111110
if (NeedAlloc) {
112-
size_t Align = MaxAlign;
113-
while (Align > Size) {
114-
Align >>= 1;
115-
}
116-
Indices[Index] = native_cpu::aligned_malloc(Align, Size);
111+
Indices[Index] = native_cpu::aligned_malloc(Size);
117112
ParamSizes[Index] = Size;
118113
OwnsMem[Index] = true;
119114
}
@@ -158,8 +153,8 @@ struct ur_kernel_handle_t_ : RefCounted {
158153
if (reqSize == 0 || reqSize == _localMemPoolSize) {
159154
return;
160155
}
161-
// realloc handles nullptr case
162-
_localMemPool = (char *)realloc(_localMemPool, reqSize);
156+
native_cpu::aligned_free(_localMemPool);
157+
_localMemPool = static_cast<char *>(native_cpu::aligned_malloc(reqSize));
163158
_localMemPoolSize = reqSize;
164159
}
165160

unified-runtime/source/adapters/native_cpu/memory.hpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,11 @@
1919

2020
struct ur_mem_handle_t_ : ur_object {
2121
ur_mem_handle_t_(size_t Size, bool _IsImage)
22-
: _mem{static_cast<char *>(malloc(Size))}, _ownsMem{true},
23-
IsImage{_IsImage} {}
22+
: _mem{static_cast<char *>(native_cpu::aligned_malloc(Size))},
23+
_ownsMem{true}, IsImage{_IsImage} {}
2424

2525
ur_mem_handle_t_(void *HostPtr, size_t Size, bool _IsImage)
26-
: _mem{static_cast<char *>(malloc(Size))}, _ownsMem{true},
27-
IsImage{_IsImage} {
26+
: ur_mem_handle_t_(Size, _IsImage) {
2827
memcpy(_mem, HostPtr, Size);
2928
}
3029

@@ -34,7 +33,7 @@ struct ur_mem_handle_t_ : ur_object {
3433

3534
~ur_mem_handle_t_() {
3635
if (_ownsMem) {
37-
free(_mem);
36+
native_cpu::aligned_free(_mem);
3837
}
3938
}
4039

0 commit comments

Comments
 (0)