Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions projects/clr/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Full documentation for HIP is available at [rocm.docs.amd.com](https://rocm.docs
- `hipLibraryGetKernel` gets a kernel from library
- `hipLibraryGetKernelCount` gets kernel count in library
- `hipStreamCopyAttributes` copies attributes from source stream to destination stream
- `hipMemPrefetchBatchAsync` Prefetches a batch of memory ranges to the specified locations

## HIP 7.1 for ROCm 7.1

Expand Down
13 changes: 10 additions & 3 deletions projects/clr/hipamd/include/hip/amd_detail/hip_api_trace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
#define HIP_API_TABLE_STEP_VERSION 0
#define HIP_COMPILER_API_TABLE_STEP_VERSION 0
#define HIP_TOOLS_API_TABLE_STEP_VERSION 0
#define HIP_RUNTIME_API_TABLE_STEP_VERSION 17
#define HIP_RUNTIME_API_TABLE_STEP_VERSION 18

// HIP API interface
// HIP compiler dispatch functions
Expand Down Expand Up @@ -540,6 +540,10 @@ typedef hipError_t (*t_hipMemPrefetchAsync)(const void* dev_ptr, size_t count, i
typedef hipError_t (*t_hipMemPrefetchAsync_v2)(const void* dev_ptr, size_t count,
hipMemLocation location, unsigned int flags,
hipStream_t stream);
typedef hipError_t (*t_hipMemPrefetchBatchAsync)(void** dev_ptrs, size_t* sizes, size_t count,
hipMemLocation* prefetch_locs, size_t* prefetch_loc_idxs,
size_t num_prefetch_locs, unsigned long long flags,
hipStream_t stream);
typedef hipError_t (*t_hipMemPtrGetInfo)(void* ptr, size_t* size);
typedef hipError_t (*t_hipMemRangeGetAttribute)(void* data, size_t data_size,
hipMemRangeAttribute attribute, const void* dev_ptr,
Expand Down Expand Up @@ -1692,8 +1696,11 @@ struct HipDispatchTable {
t_hipKernelGetLibrary hipKernelGetLibrary_fn;
t_hipKernelGetName hipKernelGetName_fn;

// HIP_RUNTIME_API_TABLE_STEP_VERSION = 18
t_hipMemPrefetchBatchAsync hipMemPrefetchBatchAsync_fn;

// DO NOT EDIT ABOVE!
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 18
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 19

// ******************************************************************************************* //
//
Expand Down Expand Up @@ -1730,4 +1737,4 @@ struct HipToolsDispatchTable {
// 4) GENERATE COMMENT FOR NEXT STEP VERSION
// 5) ADD "DO NOT EDIT ABOVE!" COMMENT
// ******************************************************************************************* //
};
};
53 changes: 52 additions & 1 deletion projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,8 @@ enum hip_api_id_t {
HIP_API_ID_hipKernelGetLibrary = 446,
HIP_API_ID_hipLibraryEnumerateKernels = 447,
HIP_API_ID_hipKernelGetName = 448,
HIP_API_ID_LAST = 448,
HIP_API_ID_hipMemPrefetchBatchAsync = 449,
HIP_API_ID_LAST = 449,

HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice),
HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties),
Expand Down Expand Up @@ -789,6 +790,7 @@ static inline const char* hip_api_name(const uint32_t id) {
case HIP_API_ID_hipMemPoolTrimTo: return "hipMemPoolTrimTo";
case HIP_API_ID_hipMemPrefetchAsync: return "hipMemPrefetchAsync";
case HIP_API_ID_hipMemPrefetchAsync_v2: return "hipMemPrefetchAsync_v2";
case HIP_API_ID_hipMemPrefetchBatchAsync: return "hipMemPrefetchBatchAsync";
case HIP_API_ID_hipMemPtrGetInfo: return "hipMemPtrGetInfo";
case HIP_API_ID_hipMemRangeGetAttribute: return "hipMemRangeGetAttribute";
case HIP_API_ID_hipMemRangeGetAttributes: return "hipMemRangeGetAttributes";
Expand Down Expand Up @@ -1231,6 +1233,7 @@ static inline uint32_t hipApiIdByName(const char* name) {
if (strcmp("hipMemPoolTrimTo", name) == 0) return HIP_API_ID_hipMemPoolTrimTo;
if (strcmp("hipMemPrefetchAsync", name) == 0) return HIP_API_ID_hipMemPrefetchAsync;
if (strcmp("hipMemPrefetchAsync_v2", name) == 0) return HIP_API_ID_hipMemPrefetchAsync_v2;
if (strcmp("hipMemPrefetchBatchAsync", name) == 0) return HIP_API_ID_hipMemPrefetchBatchAsync;
if (strcmp("hipMemPtrGetInfo", name) == 0) return HIP_API_ID_hipMemPtrGetInfo;
if (strcmp("hipMemRangeGetAttribute", name) == 0) return HIP_API_ID_hipMemRangeGetAttribute;
if (strcmp("hipMemRangeGetAttributes", name) == 0) return HIP_API_ID_hipMemRangeGetAttributes;
Expand Down Expand Up @@ -3077,6 +3080,20 @@ typedef struct hip_api_data_s {
unsigned int flags;
hipStream_t stream;
} hipMemPrefetchAsync_v2;
struct {
void** dev_ptrs;
void* dev_ptrs__val;
size_t* sizes;
size_t sizes__val;
size_t count;
hipMemLocation* prefetch_locs;
hipMemLocation prefetch_locs__val;
size_t* prefetch_loc_idxs;
size_t prefetch_loc_idxs__val;
size_t num_prefetch_locs;
unsigned long long flags;
hipStream_t stream;
} hipMemPrefetchBatchAsync;
struct {
void* ptr;
size_t* size;
Expand Down Expand Up @@ -5714,6 +5731,17 @@ typedef struct hip_api_data_s {
cb_data.args.hipMemPrefetchAsync_v2.flags = (unsigned int)flags; \
cb_data.args.hipMemPrefetchAsync_v2.stream = (hipStream_t)stream; \
};
// hipMemPrefetchBatchAsync[('void**', 'dev_ptrs'), ('size_t*', 'sizes'), ('size_t', 'count'), ('hipMemLocation*', 'prefetch_locs'), ('size_t*', 'prefetch_loc_idxs'), ('size_t', 'num_prefetch_locs'), ('unsigned long long', 'flags'), ('hipStream_t', 'stream')]
#define INIT_hipMemPrefetchBatchAsync_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipMemPrefetchBatchAsync.dev_ptrs = (void**)dev_ptrs; \
cb_data.args.hipMemPrefetchBatchAsync.sizes = (size_t*)sizes; \
cb_data.args.hipMemPrefetchBatchAsync.count = (size_t)count; \
cb_data.args.hipMemPrefetchBatchAsync.prefetch_locs = (hipMemLocation*)prefetch_locs; \
cb_data.args.hipMemPrefetchBatchAsync.prefetch_loc_idxs = (size_t*)prefetch_loc_idxs; \
cb_data.args.hipMemPrefetchBatchAsync.num_prefetch_locs = (size_t)num_prefetch_locs; \
cb_data.args.hipMemPrefetchBatchAsync.flags = (unsigned long long)flags; \
cb_data.args.hipMemPrefetchBatchAsync.stream = (hipStream_t)stream; \
};
// hipMemPtrGetInfo[('void*', 'ptr'), ('size_t*', 'size')]
#define INIT_hipMemPtrGetInfo_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipMemPtrGetInfo.ptr = (void*)ptr; \
Expand Down Expand Up @@ -7923,6 +7951,13 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
// hipMemPrefetchAsync_v2[('const void*', 'dev_ptr'), ('size_t', 'count'), ('hipMemLocation', 'location'), ('unsigned int', 'flags'), ('hipStream_t', 'stream')]
case HIP_API_ID_hipMemPrefetchAsync_v2:
break;
// hipMemPrefetchBatchAsync[('void**', 'dev_ptrs'), ('size_t*', 'sizes'), ('size_t', 'count'), ('hipMemLocation*', 'prefetch_locs'), ('size_t*', 'prefetch_loc_idxs'), ('size_t', 'num_prefetch_locs'), ('unsigned long long', 'flags'), ('hipStream_t', 'stream')]
case HIP_API_ID_hipMemPrefetchBatchAsync:
if (data->args.hipMemPrefetchBatchAsync.dev_ptrs) data->args.hipMemPrefetchBatchAsync.dev_ptrs__val = *(data->args.hipMemPrefetchBatchAsync.dev_ptrs);
if (data->args.hipMemPrefetchBatchAsync.sizes) data->args.hipMemPrefetchBatchAsync.sizes__val = *(data->args.hipMemPrefetchBatchAsync.sizes);
if (data->args.hipMemPrefetchBatchAsync.prefetch_locs) data->args.hipMemPrefetchBatchAsync.prefetch_locs__val = *(data->args.hipMemPrefetchBatchAsync.prefetch_locs);
if (data->args.hipMemPrefetchBatchAsync.prefetch_loc_idxs) data->args.hipMemPrefetchBatchAsync.prefetch_loc_idxs__val = *(data->args.hipMemPrefetchBatchAsync.prefetch_loc_idxs);
break;
// hipMemPtrGetInfo[('void*', 'ptr'), ('size_t*', 'size')]
case HIP_API_ID_hipMemPtrGetInfo:
if (data->args.hipMemPtrGetInfo.size) data->args.hipMemPtrGetInfo.size__val = *(data->args.hipMemPtrGetInfo.size);
Expand Down Expand Up @@ -10768,6 +10803,22 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync_v2.stream);
oss << ")";
break;
case HIP_API_ID_hipMemPrefetchBatchAsync:
oss << "hipMemPrefetchBatchAsync(";
if (data->args.hipMemPrefetchBatchAsync.dev_ptrs == NULL) oss << "dev_ptrs=NULL";
else { oss << "dev_ptrs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchBatchAsync.dev_ptrs__val); }
if (data->args.hipMemPrefetchBatchAsync.sizes == NULL) oss << ", sizes=NULL";
else { oss << ", sizes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchBatchAsync.sizes__val); }
oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchBatchAsync.count);
if (data->args.hipMemPrefetchBatchAsync.prefetch_locs == NULL) oss << ", prefetch_locs=NULL";
else { oss << ", prefetch_locs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchBatchAsync.prefetch_locs__val); }
if (data->args.hipMemPrefetchBatchAsync.prefetch_loc_idxs == NULL) oss << ", prefetch_loc_idxs=NULL";
else { oss << ", prefetch_loc_idxs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchBatchAsync.prefetch_loc_idxs__val); }
oss << ", num_prefetch_locs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchBatchAsync.num_prefetch_locs);
oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchBatchAsync.flags);
oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchBatchAsync.stream);
oss << ")";
break;
case HIP_API_ID_hipMemPtrGetInfo:
oss << "hipMemPtrGetInfo(";
oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPtrGetInfo.ptr);
Expand Down
1 change: 1 addition & 0 deletions projects/clr/hipamd/src/amdhip.def
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,7 @@ hipMemcpy3DPeerAsync
hipGetDriverEntryPoint
hipGetDriverEntryPoint_spt
hipMemPrefetchAsync_v2
hipMemPrefetchBatchAsync
hipMemAdvise_v2
hipStreamGetId
hipLibraryLoadData
Expand Down
11 changes: 9 additions & 2 deletions projects/clr/hipamd/src/hip_api_trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,10 @@ hipError_t hipMemPoolTrimTo(hipMemPool_t mem_pool, size_t min_bytes_to_hold);
hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device, hipStream_t stream);
hipError_t hipMemPrefetchAsync_v2(const void* dev_ptr, size_t count, hipMemLocation location,
unsigned int flags, hipStream_t stream);
hipError_t hipMemPrefetchBatchAsync(void** dev_ptrs, size_t* sizes, size_t count,
hipMemLocation* prefetch_locs, size_t* prefetch_loc_idxs,
size_t num_prefetch_locs, unsigned long long flags,
hipStream_t stream);
hipError_t hipMemPtrGetInfo(void* ptr, size_t* size);
hipError_t hipMemRangeGetAttribute(void* data, size_t data_size, hipMemRangeAttribute attribute,
const void* dev_ptr, size_t count);
Expand Down Expand Up @@ -1173,6 +1177,7 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) {
ptrDispatchTable->hipMemPoolTrimTo_fn = hip::hipMemPoolTrimTo;
ptrDispatchTable->hipMemPrefetchAsync_fn = hip::hipMemPrefetchAsync;
ptrDispatchTable->hipMemPrefetchAsync_v2_fn = hip::hipMemPrefetchAsync_v2;
ptrDispatchTable->hipMemPrefetchBatchAsync_fn = hip::hipMemPrefetchBatchAsync;
ptrDispatchTable->hipMemPtrGetInfo_fn = hip::hipMemPtrGetInfo;
ptrDispatchTable->hipMemRangeGetAttribute_fn = hip::hipMemRangeGetAttribute;
ptrDispatchTable->hipMemRangeGetAttributes_fn = hip::hipMemRangeGetAttributes;
Expand Down Expand Up @@ -2099,15 +2104,17 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipStreamCopyAttributes_fn, 501);
HIP_ENFORCE_ABI(HipDispatchTable, hipLibraryEnumerateKernels_fn, 502);
HIP_ENFORCE_ABI(HipDispatchTable, hipKernelGetLibrary_fn, 503);
HIP_ENFORCE_ABI(HipDispatchTable, hipKernelGetName_fn, 504);
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 18
HIP_ENFORCE_ABI(HipDispatchTable, hipMemPrefetchBatchAsync_fn, 505);
// if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below
// will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.:
//
// HIP_ENFORCE_ABI(<table>, <functor>, 8)
//
// HIP_ENFORCE_ABI_VERSIONING(<table>, 9) <- 8 + 1 = 9
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 505)
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 506)

static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 17,
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 18,
"If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function "
"pointers and then update this check so it is true");
#endif
3 changes: 2 additions & 1 deletion projects/clr/hipamd/src/hip_hcc.map.in
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,7 @@ global:
hipLibraryEnumerateKernels;
hipKernelGetLibrary;
hipKernelGetName;
hipMemPrefetchBatchAsync;
local:
*;
} hip_7.1;
} hip_7.1;
145 changes: 145 additions & 0 deletions projects/clr/hipamd/src/hip_hmm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ namespace hip {
hipError_t ihipMallocManaged(void** ptr, size_t size, size_t align = 0, bool use_host_ptr = 0);
hipError_t ihipMemPrefetchAsync(const void* dev_ptr, size_t count, hipMemLocation location,
hipStream_t stream);
hipError_t ihipMemPrefetchBatchAsync(void** dev_ptrs, size_t* sizes, size_t count,
hipMemLocation* prefetch_locs, size_t* prefetch_loc_idxs,
size_t num_prefetch_locs, unsigned long long flags,
hipStream_t stream);
hipError_t ihipMemAdvise(const void* dev_ptr, size_t count, hipMemoryAdvise advice,
hipMemLocation location);

Expand Down Expand Up @@ -119,6 +123,19 @@ hipError_t hipMemPrefetchAsync_v2(const void* dev_ptr, size_t count, hipMemLocat
HIP_RETURN(ihipMemPrefetchAsync(dev_ptr, count, location, stream));
}

// ================================================================================================
hipError_t hipMemPrefetchBatchAsync(void** dev_ptrs, size_t* sizes, size_t count,
hipMemLocation* prefetch_locs, size_t* prefetch_loc_idxs,
size_t num_prefetch_locs, unsigned long long flags,
hipStream_t stream) {
HIP_INIT_API(hipMemPrefetchBatchAsync, dev_ptrs, sizes, count, prefetch_locs, prefetch_loc_idxs,
num_prefetch_locs, flags, stream);
CHECK_STREAM_CAPTURE_SUPPORTED();

HIP_RETURN(ihipMemPrefetchBatchAsync(dev_ptrs, sizes, count, prefetch_locs, prefetch_loc_idxs,
num_prefetch_locs, flags, stream));
}

// ================================================================================================
hipError_t hipMemAdvise(const void* dev_ptr, size_t count, hipMemoryAdvise advice, int device) {
HIP_INIT_API(hipMemAdvise, dev_ptr, count, advice, device);
Expand Down Expand Up @@ -352,6 +369,134 @@ hipError_t ihipMemPrefetchAsync(const void* dev_ptr, size_t count, hipMemLocatio
return hipSuccess;
}
// ================================================================================================
hipError_t ihipMemPrefetchBatchAsync(void** dev_ptrs, size_t* sizes, size_t count,
hipMemLocation* prefetch_locs, size_t* prefetch_loc_idxs,
size_t num_prefetch_locs, unsigned long long flags,
hipStream_t stream) {
// Upfront validation: NULL pointer checks
if ((dev_ptrs == nullptr) || (sizes == nullptr) || (prefetch_locs == nullptr) ||
(prefetch_loc_idxs == nullptr)) {
return hipErrorInvalidValue;
}

// Count relationship checks
if ((count == 0) || (num_prefetch_locs == 0) || (num_prefetch_locs > count)) {
return hipErrorInvalidValue;
}

// Flags and stream validation
if ((flags != 0) || (stream == nullptr)) {
return hipErrorInvalidValue;
}

// Index array validation: first element must be 0
if (prefetch_loc_idxs[0] != 0) {
return hipErrorInvalidValue;
}

// Index array validation: must be monotonically increasing and last element < count
for (size_t i = 0; i < num_prefetch_locs; i++) {
if (prefetch_loc_idxs[i] >= count) {
return hipErrorInvalidValue;
}
if (i > 0 && prefetch_loc_idxs[i] < prefetch_loc_idxs[i - 1]) {
return hipErrorInvalidValue;
}
}

getStreamPerThread(stream);

// Get stream - stream is already validated to be non-NULL upfront
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
return hipErrorInvalidValue;
}

// Allocate arrays for batch command
std::vector<const void*> dev_ptrs_vec(count);
std::vector<uint8_t> cpu_access_vec(count); // Use uint8_t instead of bool for .data() support
std::vector<int> target_devices_vec(count);
std::vector<amd::Device*> devices_vec(count);

// Validate and prepare each operation
for (size_t op_idx = 0; op_idx < count; op_idx++) {
const void* dev_ptr = dev_ptrs[op_idx];
size_t size = sizes[op_idx];

// Per-operation validation: size must be > 0
if (size == 0) {
return hipErrorInvalidValue;
}

// Per-operation validation: pointer must not be NULL if size > 0
if (dev_ptr == nullptr) {
return hipErrorInvalidValue;
}

// Get the location index for this operation
// prefetch_loc_idxs[i] indicates the first operation index that uses location i
// Find the largest location index i where prefetch_loc_idxs[i] <= op_idx
size_t loc_idx = 0;
for (size_t i = 0; i < num_prefetch_locs; i++) {
if (prefetch_loc_idxs[i] <= op_idx) {
loc_idx = i;
} else {
break;
}
}
hipMemLocation location = prefetch_locs[loc_idx];

// Validate memory object and size (similar to ihipMemPrefetchAsync)
size_t offset = 0;
amd::Memory* memObj = getMemoryObject(dev_ptr, offset);
if ((memObj != nullptr) && (size > (memObj->getSize() - offset))) {
return hipErrorInvalidValue;
}

// Check if prefetching to device
const bool isDevice = (location.type == hipMemLocationTypeDevice);
// Determine the target device index
int targetDevice = isDevice ? location.id : hipCpuDeviceId;

// Validate device (similar to ihipMemPrefetchAsync)
amd::Device* dev = nullptr;
if (isDevice) {
if (static_cast<size_t>(targetDevice) >= g_devices.size()) {
return hipErrorInvalidDevice;
}
dev = g_devices[targetDevice]->devices()[0];
// For non-managed memory prefetching to device, device must support pageable memory access
// Managed memory is identified by CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_ALLOC_HOST_PTR flags
const bool isManagedMemory =
(memObj != nullptr) &&
(memObj->getMemFlags() & (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_ALLOC_HOST_PTR));
if (!isManagedMemory && !dev->info().hmmCpuMemoryAccessible_) {
return hipErrorInvalidValue;
}
}

// Store operation data for batch command
dev_ptrs_vec[op_idx] = dev_ptr;
cpu_access_vec[op_idx] = !isDevice ? 1 : 0;
target_devices_vec[op_idx] = targetDevice;
devices_vec[op_idx] = dev;
}

// Create and enqueue single batch command
amd::Command::EventWaitList waitList;
amd::SvmPrefetchBatchAsyncCommand* command =
new amd::SvmPrefetchBatchAsyncCommand(*hip_stream, waitList, dev_ptrs_vec.data(), sizes,
count, reinterpret_cast<bool*>(cpu_access_vec.data()),
target_devices_vec.data(), devices_vec.data());
if (command == nullptr) {
return hipErrorOutOfMemory;
}
command->enqueue();
command->release();

return hipSuccess;
}
// ================================================================================================
hipError_t ihipMemAdvise(const void* dev_ptr, size_t count, hipMemoryAdvise advice,
hipMemLocation location) {
if ((dev_ptr == nullptr) || (count == 0)) {
Expand Down
Loading