diff --git a/clang/examples/DPCT/CUB/cub$$BlockExchange$$BlockedToStriped.cu b/clang/examples/DPCT/CUB/cub$$BlockExchange$$BlockedToStriped.cu new file mode 100644 index 000000000000..af7ed33da881 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockExchange$$BlockedToStriped.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int (&thread_data)[4]) { + // Start + __shared__ typename cub::BlockExchange::TempStorage temp_storage; + cub::BlockExchange(temp_storage).BlockedToStriped(thread_data/*int(&)[4]*/, thread_data/*int(&)[4]*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockExchange$$BlockedToWarpStriped.cu b/clang/examples/DPCT/CUB/cub$$BlockExchange$$BlockedToWarpStriped.cu new file mode 100644 index 000000000000..99712b484024 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockExchange$$BlockedToWarpStriped.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int (&thread_data)[4]) { + // Start + __shared__ typename cub::BlockExchange::TempStorage temp_storage; + cub::BlockExchange(temp_storage).BlockedToWarpStriped(thread_data/*int(&)[4]*/, thread_data/*int(&)[4]*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockExchange$$ScatterToBlocked.cu b/clang/examples/DPCT/CUB/cub$$BlockExchange$$ScatterToBlocked.cu new file mode 100644 index 000000000000..bd81d5b98d60 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockExchange$$ScatterToBlocked.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int (&thread_data)[4], int (&thread_rank)[4]) { + // Start + __shared__ typename cub::BlockExchange::TempStorage temp_storage; + cub::BlockExchange(temp_storage).ScatterToBlocked(thread_data/*int(&)[4]*/, thread_rank/*int(&)[4]*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockExchange$$ScatterToStriped.cu b/clang/examples/DPCT/CUB/cub$$BlockExchange$$ScatterToStriped.cu new file mode 100644 index 000000000000..6c69affda22c --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockExchange$$ScatterToStriped.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int (&thread_data)[4], int (&thread_rank)[4]) { + // Start + __shared__ typename cub::BlockExchange::TempStorage temp_storage; + cub::BlockExchange(temp_storage).ScatterToStriped(thread_data/*int(&)[4]*/, thread_rank/*int(&)[4]*/); + // End +} \ No newline at end of file diff --git a/clang/examples/DPCT/CUB/cub$$BlockExchange$$StripedToBlocked.cu b/clang/examples/DPCT/CUB/cub$$BlockExchange$$StripedToBlocked.cu new file mode 100644 index 000000000000..44abaac9d1e0 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockExchange$$StripedToBlocked.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int (&thread_data)[4]) { + // Start + __shared__ typename cub::BlockExchange::TempStorage temp_storage; + cub::BlockExchange(temp_storage).StripedToBlocked(thread_data/*int(&)[4]*/, thread_data/*int(&)[4]*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockExchange$$WarpStripedToBlocked.cu b/clang/examples/DPCT/CUB/cub$$BlockExchange$$WarpStripedToBlocked.cu new file mode 100644 index 000000000000..52ce73e0a58f --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockExchange$$WarpStripedToBlocked.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int (&thread_data)[4]) { + // Start + __shared__ typename cub::BlockExchange::TempStorage temp_storage; + cub::BlockExchange(temp_storage).WarpStripedToBlocked(thread_data/*int(&)[4]*/, thread_data/*int(&)[4]*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockLoad$$Load.cu b/clang/examples/DPCT/CUB/cub$$BlockLoad$$Load.cu new file mode 100644 index 000000000000..0bfcb1bbad85 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockLoad$$Load.cu @@ -0,0 +1,12 @@ +// clang-format off +#include +#include + +__device__ void test(int *src, int (&thread_data)[4], int end, int default_value) { + // Start + __shared__ typename cub::BlockLoad::TempStorage temp_storage; + cub::BlockLoad(temp_storage).Load(src/*int **/, thread_data/*int(&)[4]*/); + cub::BlockLoad(temp_storage).Load(src/*int **/, thread_data/*int(&)[4]*/, end/*int*/); + cub::BlockLoad(temp_storage).Load(src/*int **/, thread_data/*int(&)[4]*/, end/*int*/, default_value/*int*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockRadixSort$$Sort.cu b/clang/examples/DPCT/CUB/cub$$BlockRadixSort$$Sort.cu new file mode 100644 index 000000000000..3ef5b46426dc --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockRadixSort$$Sort.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int (&thread_data)[4]) { + // Start + __shared__ typename cub::BlockRadixSort::TempStorage temp_storage; + cub::BlockRadixSort(temp_storage).Sort(thread_data/*int(&)[4]*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockRadixSort$$SortBlockedToStriped.cu b/clang/examples/DPCT/CUB/cub$$BlockRadixSort$$SortBlockedToStriped.cu new file mode 100644 index 000000000000..802139ede0cd --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockRadixSort$$SortBlockedToStriped.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int (&thread_data)[4]) { + // Start + __shared__ typename cub::BlockRadixSort::TempStorage temp_storage; + cub::BlockRadixSort(temp_storage).SortBlockedToStriped(thread_data/*int(&)[4]*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockRadixSort$$SortDescending.cu b/clang/examples/DPCT/CUB/cub$$BlockRadixSort$$SortDescending.cu new file mode 100644 index 000000000000..1611518ed6ea --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockRadixSort$$SortDescending.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int (&thread_data)[4]) { + // Start + __shared__ typename cub::BlockRadixSort::TempStorage temp_storage; + cub::BlockRadixSort(temp_storage).SortDescending(thread_data/*int(&)[4]*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockRadixSort$$SortDescendingBlockedToStriped.cu b/clang/examples/DPCT/CUB/cub$$BlockRadixSort$$SortDescendingBlockedToStriped.cu new file mode 100644 index 000000000000..83f712fee7b8 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockRadixSort$$SortDescendingBlockedToStriped.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int (&thread_data)[4]) { + // Start + __shared__ typename cub::BlockRadixSort::TempStorage temp_storage; + cub::BlockRadixSort(temp_storage).SortDescendingBlockedToStriped(thread_data/*int(&)[4]*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockReduce$$Reduce.cu b/clang/examples/DPCT/CUB/cub$$BlockReduce$$Reduce.cu new file mode 100644 index 000000000000..c0f38dafcbaf --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockReduce$$Reduce.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int data) { + // Start + __shared__ typename cub::BlockReduce::TempStorage temp_storage; + cub::BlockReduce(temp_storage).Reduce(data/*int*/, cub::Sum()/*ReduceOp*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockReduce$$Sum.cu b/clang/examples/DPCT/CUB/cub$$BlockReduce$$Sum.cu new file mode 100644 index 000000000000..35453e48510d --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockReduce$$Sum.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int data) { + // Start + __shared__ typename cub::BlockReduce::TempStorage temp_storage; + cub::BlockReduce(temp_storage).Sum(data/*int*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockScan$$ExclusiveScan.cu b/clang/examples/DPCT/CUB/cub$$BlockScan$$ExclusiveScan.cu new file mode 100644 index 000000000000..2e9f04a99bc8 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockScan$$ExclusiveScan.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int input, int output, int init) { + // Start + __shared__ typename cub::BlockScan::TempStorage temp_storage; + cub::BlockScan(temp_storage).ExclusiveScan(input/*int*/, output/*int &*/, init/*int*/, cub::Sum()/*ScanOp*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockScan$$ExclusiveSum.cu b/clang/examples/DPCT/CUB/cub$$BlockScan$$ExclusiveSum.cu new file mode 100644 index 000000000000..9d0317c4bdae --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockScan$$ExclusiveSum.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int input, int output) { + // Start + __shared__ typename cub::BlockScan::TempStorage temp_storage; + cub::BlockScan(temp_storage).ExclusiveSum(input/*int*/, output/*int &*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockScan$$InclusiveScan.cu b/clang/examples/DPCT/CUB/cub$$BlockScan$$InclusiveScan.cu new file mode 100644 index 000000000000..22f94f6b4989 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockScan$$InclusiveScan.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int input, int output) { + // Start + __shared__ typename cub::BlockScan::TempStorage temp_storage; + cub::BlockScan(temp_storage).InclusiveScan(input/*int*/, output/*int &*/, cub::Sum()/*ScanOp*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockScan$$InclusiveSum.cu b/clang/examples/DPCT/CUB/cub$$BlockScan$$InclusiveSum.cu new file mode 100644 index 000000000000..6ce48f764f72 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockScan$$InclusiveSum.cu @@ -0,0 +1,10 @@ +// clang-format off +#include +#include + +__device__ void test(int input, int output) { + // Start + __shared__ typename cub::BlockScan::TempStorage temp_storage; + cub::BlockScan(temp_storage).InclusiveSum(input/*int*/, output/*int &*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockShuffle$$Down.cu b/clang/examples/DPCT/CUB/cub$$BlockShuffle$$Down.cu new file mode 100644 index 000000000000..8bcb9e9e95ac --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockShuffle$$Down.cu @@ -0,0 +1,11 @@ +// clang-format off +#include +#include +#include + +__device__ void test(int (&input)[4], int (&output)[4]) { + // Start + __shared__ typename cub::BlockShuffle::TempStorage temp_storage; + cub::BlockShuffle(temp_storage).Down(input/*int*/, output/*int &*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockShuffle$$Offset.cu b/clang/examples/DPCT/CUB/cub$$BlockShuffle$$Offset.cu new file mode 100644 index 000000000000..b78e3ca66a55 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockShuffle$$Offset.cu @@ -0,0 +1,11 @@ +// clang-format off +#include +#include +#include + +__device__ void test(int input, int output, int distance) { + // Start + __shared__ typename cub::BlockShuffle::TempStorage temp_storage; + cub::BlockShuffle(temp_storage).Offset(input/*int*/, output/*int &*/, distance/*int*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockShuffle$$Rotate.cu b/clang/examples/DPCT/CUB/cub$$BlockShuffle$$Rotate.cu new file mode 100644 index 000000000000..1b2ff0b2ea6b --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockShuffle$$Rotate.cu @@ -0,0 +1,11 @@ +// clang-format off +#include +#include +#include + +__device__ void test(int input, int output, unsigned int distance) { + // Start + __shared__ typename cub::BlockShuffle::TempStorage temp_storage; + cub::BlockShuffle(temp_storage).Rotate(input/*int*/, output/*int &*/, distance/*unsigned int*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockShuffle$$Up.cu b/clang/examples/DPCT/CUB/cub$$BlockShuffle$$Up.cu new file mode 100644 index 000000000000..b447541a4243 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockShuffle$$Up.cu @@ -0,0 +1,11 @@ +// clang-format off +#include +#include +#include + +__device__ void test(int (&input)[4], int (&output)[4]) { + // Start + __shared__ typename cub::BlockShuffle::TempStorage temp_storage; + cub::BlockShuffle(temp_storage).Up(input/*int*/, output/*int &*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$BlockStore$$Store.cu b/clang/examples/DPCT/CUB/cub$$BlockStore$$Store.cu new file mode 100644 index 000000000000..4a20994834bc --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$BlockStore$$Store.cu @@ -0,0 +1,11 @@ +// clang-format off +#include +#include + +__device__ void test(int *dst, int (&thread_data)[4], int end) { + // Start + __shared__ typename cub::BlockStore::TempStorage temp_storage; + cub::BlockStore(temp_storage).Store(dst/*int **/, thread_data/*int(&)[4]*/); + cub::BlockStore(temp_storage).Store(dst/*int **/, thread_data/*int(&)[4]*/, end/*int*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$DeviceHistogram$$HistogramEven.cu b/clang/examples/DPCT/CUB/cub$$DeviceHistogram$$HistogramEven.cu new file mode 100644 index 000000000000..d9f1ac7dab32 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$DeviceHistogram$$HistogramEven.cu @@ -0,0 +1,13 @@ +// clang-format off +#include +#include + +void test(int num_samples, float *d_samples, int *d_histogram, int num_levels, float lower_level, float upper_level) { + // Start + void *d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + cub::DeviceHistogram::HistogramEven(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*float **/, d_histogram/*int **/, num_levels/*int*/, lower_level/*float*/, upper_level/*float*/, num_samples/*int*/); + cudaMalloc(&d_temp_storage, temp_storage_bytes); + cub::DeviceHistogram::HistogramEven(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*float **/, d_histogram/*int **/, num_levels/*int*/, lower_level/*float*/, upper_level/*float*/, num_samples/*int*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$DeviceHistogram$$HistogramRange.cu b/clang/examples/DPCT/CUB/cub$$DeviceHistogram$$HistogramRange.cu new file mode 100644 index 000000000000..d8e210e68c61 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$DeviceHistogram$$HistogramRange.cu @@ -0,0 +1,13 @@ +// clang-format off +#include +#include + +void test(int num_samples, float *d_samples, int *d_histogram, int num_levels, float *d_levels) { + // Start + void *d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + cub::DeviceHistogram::HistogramRange(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*float **/, d_histogram/*int **/, num_levels/*int*/, d_levels/*float **/, num_samples/*int*/); + cudaMalloc(&d_temp_storage, temp_storage_bytes); + cub::DeviceHistogram::HistogramRange(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*float **/, d_histogram/*int **/, num_levels/*int*/, d_levels/*float **/, num_samples/*int*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$DeviceHistogram$$MultiHistogramEven.cu b/clang/examples/DPCT/CUB/cub$$DeviceHistogram$$MultiHistogramEven.cu new file mode 100644 index 000000000000..8477b9375e92 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$DeviceHistogram$$MultiHistogramEven.cu @@ -0,0 +1,13 @@ +// clang-format off +#include +#include + +void test(int num_pixels, unsigned char *d_samples, int *(&d_histogram)[3], int (&num_levels)[3], unsigned int (&lower_level)[3], unsigned int (&upper_level)[3], cudaStream_t S) { + // Start + void *d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*unsigned char **/, d_histogram/*int *(&)[3]*/, num_levels/*int(&)[3]*/, lower_level/*unsigned int(&)[3]*/, upper_level/*unsigned int(&)[3]*/, num_pixels/*int*/, S/*cudaStream_t*/); + cudaMalloc(&d_temp_storage, temp_storage_bytes); + cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*unsigned char **/, d_histogram/*int *(&)[3]*/, num_levels/*int(&)[3]*/, lower_level/*unsigned int(&)[3]*/, upper_level/*unsigned int(&)[3]*/, num_pixels/*int*/, S/*cudaStream_t*/); + // End +} diff --git a/clang/examples/DPCT/CUB/cub$$DeviceHistogram$$MultiHistogramRange.cu b/clang/examples/DPCT/CUB/cub$$DeviceHistogram$$MultiHistogramRange.cu new file mode 100644 index 000000000000..770699d4a533 --- /dev/null +++ b/clang/examples/DPCT/CUB/cub$$DeviceHistogram$$MultiHistogramRange.cu @@ -0,0 +1,13 @@ +// clang-format off +#include +#include + +void test(int num_pixels, unsigned char *d_samples, int *(&d_histogram)[3], int (&num_levels)[3], unsigned int *(&d_levels)[3]) { + // Start + void *d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*unsigned char **/, d_histogram/*int *(&)[3]*/, num_levels/*int(&)[3]*/, d_levels/*unsigned int *(&)[3]*/, num_pixels/*int*/); + cudaMalloc(&d_temp_storage, temp_storage_bytes); + cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*unsigned char **/, d_histogram/*int *(&)[3]*/, num_levels/*int(&)[3]*/, d_levels/*unsigned int *(&)[3]*/, num_pixels/*int*/); + // End +} diff --git a/clang/test/dpct/query_api_mapping/CUB/cub_block.cu b/clang/test/dpct/query_api_mapping/CUB/cub_block.cu new file mode 100644 index 000000000000..1c88c4481d81 --- /dev/null +++ b/clang/test/dpct/query_api_mapping/CUB/cub_block.cu @@ -0,0 +1,166 @@ +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1, cuda-10.2, cuda-11.0, cuda-11.2, cuda-11.4 +// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0, v10.1, v10.2, v11.0, v11.2, v11.4 +// UNSUPPORTED: system-windows +// clang-format off + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockExchange::BlockedToStriped | FileCheck %s -check-prefix=CHECK_BLOCKEXCHANGE_BLOCKEDTOSTRIPED +// CHECK_BLOCKEXCHANGE_BLOCKEDTOSTRIPED: CUDA API: +// CHECK_BLOCKEXCHANGE_BLOCKEDTOSTRIPED: __shared__ typename cub::BlockExchange::TempStorage temp_storage; +// CHECK_BLOCKEXCHANGE_BLOCKEDTOSTRIPED: cub::BlockExchange(temp_storage).BlockedToStriped(thread_data/*int(&)[4]*/, thread_data/*int(&)[4]*/); +// CHECK_BLOCKEXCHANGE_BLOCKEDTOSTRIPED: Is migrated to: +// CHECK_BLOCKEXCHANGE_BLOCKEDTOSTRIPED: dpct::group::exchange(temp_storage).blocked_to_striped(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), thread_data, thread_data); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockExchange::BlockedToWarpStriped | FileCheck %s -check-prefix=CHECK_BLOCKEXCHANGE_BLOCKEDTOWARPSTRIPED +// CHECK_BLOCKEXCHANGE_BLOCKEDTOWARPSTRIPED: CUDA API: +// CHECK_BLOCKEXCHANGE_BLOCKEDTOWARPSTRIPED: __shared__ typename cub::BlockExchange::TempStorage temp_storage; +// CHECK_BLOCKEXCHANGE_BLOCKEDTOWARPSTRIPED: cub::BlockExchange(temp_storage).BlockedToWarpStriped(thread_data/*int(&)[4]*/, thread_data/*int(&)[4]*/); +// CHECK_BLOCKEXCHANGE_BLOCKEDTOWARPSTRIPED: Is migrated to: +// CHECK_BLOCKEXCHANGE_BLOCKEDTOWARPSTRIPED: dpct::group::exchange(temp_storage).blocked_to_sub_group_striped(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), thread_data, thread_data); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockExchange::ScatterToBlocked | FileCheck %s -check-prefix=CHECK_BLOCKEXCHANGE_SCATTERTOBLOCKED +// CHECK_BLOCKEXCHANGE_SCATTERTOBLOCKED: CUDA API: +// CHECK_BLOCKEXCHANGE_SCATTERTOBLOCKED: __shared__ typename cub::BlockExchange::TempStorage temp_storage; +// CHECK_BLOCKEXCHANGE_SCATTERTOBLOCKED: cub::BlockExchange(temp_storage).ScatterToBlocked(thread_data/*int(&)[4]*/, thread_rank/*int(&)[4]*/); +// CHECK_BLOCKEXCHANGE_SCATTERTOBLOCKED: Is migrated to: +// CHECK_BLOCKEXCHANGE_SCATTERTOBLOCKED: dpct::group::exchange(temp_storage).scatter_to_blocked(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), thread_data, thread_rank); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockExchange::ScatterToStriped | FileCheck %s -check-prefix=CHECK_BLOCKEXCHANGE_SCATTERTOSTRIPED +// CHECK_BLOCKEXCHANGE_SCATTERTOSTRIPED: CUDA API: +// CHECK_BLOCKEXCHANGE_SCATTERTOSTRIPED: __shared__ typename cub::BlockExchange::TempStorage temp_storage; +// CHECK_BLOCKEXCHANGE_SCATTERTOSTRIPED: cub::BlockExchange(temp_storage).ScatterToStriped(thread_data/*int(&)[4]*/, thread_rank/*int(&)[4]*/); +// CHECK_BLOCKEXCHANGE_SCATTERTOSTRIPED: Is migrated to: +// CHECK_BLOCKEXCHANGE_SCATTERTOSTRIPED: dpct::group::exchange(temp_storage).scatter_to_striped(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), thread_data, thread_rank); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockExchange::StripedToBlocked | FileCheck %s -check-prefix=CHECK_BLOCKEXCHANGE_STRIPEDTOBLOCKED +// CHECK_BLOCKEXCHANGE_STRIPEDTOBLOCKED: CUDA API: +// CHECK_BLOCKEXCHANGE_STRIPEDTOBLOCKED: __shared__ typename cub::BlockExchange::TempStorage temp_storage; +// CHECK_BLOCKEXCHANGE_STRIPEDTOBLOCKED: cub::BlockExchange(temp_storage).StripedToBlocked(thread_data/*int(&)[4]*/, thread_data/*int(&)[4]*/); +// CHECK_BLOCKEXCHANGE_STRIPEDTOBLOCKED: Is migrated to: +// CHECK_BLOCKEXCHANGE_STRIPEDTOBLOCKED: dpct::group::exchange(temp_storage).striped_to_blocked(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), thread_data, thread_data); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockExchange::WarpStripedToBlocked | FileCheck %s -check-prefix=CHECK_BLOCKEXCHANGE_WARPSTRIPEDTOBLOCKED +// CHECK_BLOCKEXCHANGE_WARPSTRIPEDTOBLOCKED: CUDA API: +// CHECK_BLOCKEXCHANGE_WARPSTRIPEDTOBLOCKED: __shared__ typename cub::BlockExchange::TempStorage temp_storage; +// CHECK_BLOCKEXCHANGE_WARPSTRIPEDTOBLOCKED: cub::BlockExchange(temp_storage).WarpStripedToBlocked(thread_data/*int(&)[4]*/, thread_data/*int(&)[4]*/); +// CHECK_BLOCKEXCHANGE_WARPSTRIPEDTOBLOCKED: Is migrated to: +// CHECK_BLOCKEXCHANGE_WARPSTRIPEDTOBLOCKED: dpct::group::exchange(temp_storage).sub_group_striped_to_blocked(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), thread_data, thread_data); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockLoad::Load | FileCheck %s -check-prefix=CHECK_BLOCKLOAD_LOAD +// CHECK_BLOCKLOAD_LOAD: CUDA API: +// CHECK_BLOCKLOAD_LOAD: __shared__ typename cub::BlockLoad::TempStorage temp_storage; +// CHECK_BLOCKLOAD_LOAD: cub::BlockLoad(temp_storage).Load(src/*int **/, thread_data/*int(&)[4]*/); +// CHECK_BLOCKLOAD_LOAD: cub::BlockLoad(temp_storage).Load(src/*int **/, thread_data/*int(&)[4]*/, end/*int*/); +// CHECK_BLOCKLOAD_LOAD: cub::BlockLoad(temp_storage).Load(src/*int **/, thread_data/*int(&)[4]*/, end/*int*/, default_value/*int*/); +// CHECK_BLOCKLOAD_LOAD: Is migrated to: +// CHECK_BLOCKLOAD_LOAD: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); +// CHECK_BLOCKLOAD_LOAD: dpct::group::group_load(temp_storage).load(item_ct1, src, thread_data); +// CHECK_BLOCKLOAD_LOAD: dpct::group::group_load(temp_storage).load(item_ct1, src, thread_data, end); +// CHECK_BLOCKLOAD_LOAD: dpct::group::group_load(temp_storage).load(item_ct1, src, thread_data, end, default_value); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockStore::Store | FileCheck %s -check-prefix=CHECK_BLOCKSTORE_STORE +// CHECK_BLOCKSTORE_STORE: CUDA API: +// CHECK_BLOCKSTORE_STORE: __shared__ typename cub::BlockStore::TempStorage temp_storage; +// CHECK_BLOCKSTORE_STORE: cub::BlockStore(temp_storage).Store(dst/*int **/, thread_data/*int(&)[4]*/); +// CHECK_BLOCKSTORE_STORE: cub::BlockStore(temp_storage).Store(dst/*int **/, thread_data/*int(&)[4]*/, end/*int*/); +// CHECK_BLOCKSTORE_STORE: Is migrated to: +// CHECK_BLOCKSTORE_STORE: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); +// CHECK_BLOCKSTORE_STORE: dpct::group::group_store(temp_storage).store(item_ct1, dst, thread_data); +// CHECK_BLOCKSTORE_STORE: dpct::group::group_store(temp_storage).store(item_ct1, dst, thread_data, end); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockRadixSort::Sort | FileCheck %s -check-prefix=CHECK_BLOCKRADIXSORT_SORT +// CHECK_BLOCKRADIXSORT_SORT: CUDA API: +// CHECK_BLOCKRADIXSORT_SORT: __shared__ typename cub::BlockRadixSort::TempStorage temp_storage; +// CHECK_BLOCKRADIXSORT_SORT: cub::BlockRadixSort(temp_storage).Sort(thread_data/*int(&)[4]*/); +// CHECK_BLOCKRADIXSORT_SORT: Is migrated to: +// CHECK_BLOCKRADIXSORT_SORT: dpct::group::group_radix_sort(temp_storage).sort(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), thread_data); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockRadixSort::SortBlockedToStriped | FileCheck %s -check-prefix=CHECK_BLOCKRADIXSORT_SORTBLOCKEDTOSTRIPED +// CHECK_BLOCKRADIXSORT_SORTBLOCKEDTOSTRIPED: CUDA API: +// CHECK_BLOCKRADIXSORT_SORTBLOCKEDTOSTRIPED: __shared__ typename cub::BlockRadixSort::TempStorage temp_storage; +// CHECK_BLOCKRADIXSORT_SORTBLOCKEDTOSTRIPED: cub::BlockRadixSort(temp_storage).SortBlockedToStriped(thread_data/*int(&)[4]*/); +// CHECK_BLOCKRADIXSORT_SORTBLOCKEDTOSTRIPED: Is migrated to: +// CHECK_BLOCKRADIXSORT_SORTBLOCKEDTOSTRIPED: dpct::group::group_radix_sort(temp_storage).sort_blocked_to_striped(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), thread_data); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockRadixSort::SortDescending | FileCheck %s -check-prefix=CHECK_BLOCKRADIXSORT_SORTDESCENDING +// CHECK_BLOCKRADIXSORT_SORTDESCENDING: CUDA API: +// CHECK_BLOCKRADIXSORT_SORTDESCENDING: __shared__ typename cub::BlockRadixSort::TempStorage temp_storage; +// CHECK_BLOCKRADIXSORT_SORTDESCENDING: cub::BlockRadixSort(temp_storage).SortDescending(thread_data/*int(&)[4]*/); +// CHECK_BLOCKRADIXSORT_SORTDESCENDING: Is migrated to: +// CHECK_BLOCKRADIXSORT_SORTDESCENDING: dpct::group::group_radix_sort(temp_storage).sort_descending(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), thread_data); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockRadixSort::SortDescendingBlockedToStriped | FileCheck %s -check-prefix=CHECK_BLOCKRADIXSORT_SORTDESCENDINGBLOCKEDTOSTRIPED +// CHECK_BLOCKRADIXSORT_SORTDESCENDINGBLOCKEDTOSTRIPED: CUDA API: +// CHECK_BLOCKRADIXSORT_SORTDESCENDINGBLOCKEDTOSTRIPED: __shared__ typename cub::BlockRadixSort::TempStorage temp_storage; +// CHECK_BLOCKRADIXSORT_SORTDESCENDINGBLOCKEDTOSTRIPED: cub::BlockRadixSort(temp_storage).SortDescendingBlockedToStriped(thread_data/*int(&)[4]*/); +// CHECK_BLOCKRADIXSORT_SORTDESCENDINGBLOCKEDTOSTRIPED: Is migrated to: +// CHECK_BLOCKRADIXSORT_SORTDESCENDINGBLOCKEDTOSTRIPED: dpct::group::group_radix_sort(temp_storage).sort_descending_blocked_to_striped(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), thread_data); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockReduce::Reduce | FileCheck %s -check-prefix=CHECK_BLOCKREDUCE_REDUCE +// CHECK_BLOCKREDUCE_REDUCE: CUDA API: +// CHECK_BLOCKREDUCE_REDUCE: __shared__ typename cub::BlockReduce::TempStorage temp_storage; +// CHECK_BLOCKREDUCE_REDUCE: cub::BlockReduce(temp_storage).Reduce(data/*int*/, cub::Sum()/*ReduceOp*/); +// CHECK_BLOCKREDUCE_REDUCE: Is migrated to: +// CHECK_BLOCKREDUCE_REDUCE: sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_work_group<3>(), data, sycl::plus<>()); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockReduce::Sum | FileCheck %s -check-prefix=CHECK_BLOCKREDUCE_SUM +// CHECK_BLOCKREDUCE_SUM: CUDA API: +// CHECK_BLOCKREDUCE_SUM: __shared__ typename cub::BlockReduce::TempStorage temp_storage; +// CHECK_BLOCKREDUCE_SUM: cub::BlockReduce(temp_storage).Sum(data/*int*/); +// CHECK_BLOCKREDUCE_SUM: Is migrated to: +// CHECK_BLOCKREDUCE_SUM: sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_work_group<3>(), data, sycl::plus<>()); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockScan::ExclusiveScan | FileCheck %s -check-prefix=CHECK_BLOCKSCAN_EXCLUSIVESCAN +// CHECK_BLOCKSCAN_EXCLUSIVESCAN: CUDA API: +// CHECK_BLOCKSCAN_EXCLUSIVESCAN: __shared__ typename cub::BlockScan::TempStorage temp_storage; +// CHECK_BLOCKSCAN_EXCLUSIVESCAN: cub::BlockScan(temp_storage).ExclusiveScan(input/*int*/, output/*int &*/, init/*int*/, cub::Sum()/*ScanOp*/); +// CHECK_BLOCKSCAN_EXCLUSIVESCAN: Is migrated to: +// CHECK_BLOCKSCAN_EXCLUSIVESCAN: output = sycl::exclusive_scan_over_group(sycl::ext::oneapi::this_work_item::get_work_group<3>(), input, init, sycl::plus<>()); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockScan::ExclusiveSum | FileCheck %s -check-prefix=CHECK_BLOCKSCAN_EXCLUSIVESUM +// CHECK_BLOCKSCAN_EXCLUSIVESUM: CUDA API: +// CHECK_BLOCKSCAN_EXCLUSIVESUM: __shared__ typename cub::BlockScan::TempStorage temp_storage; +// CHECK_BLOCKSCAN_EXCLUSIVESUM: cub::BlockScan(temp_storage).ExclusiveSum(input/*int*/, output/*int &*/); +// CHECK_BLOCKSCAN_EXCLUSIVESUM: Is migrated to: +// CHECK_BLOCKSCAN_EXCLUSIVESUM: output = sycl::exclusive_scan_over_group(sycl::ext::oneapi::this_work_item::get_work_group<3>(), input, 0, sycl::plus<>()); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockScan::InclusiveScan | FileCheck %s -check-prefix=CHECK_BLOCKSCAN_INCLUSIVESCAN +// CHECK_BLOCKSCAN_INCLUSIVESCAN: CUDA API: +// CHECK_BLOCKSCAN_INCLUSIVESCAN: __shared__ typename cub::BlockScan::TempStorage temp_storage; +// CHECK_BLOCKSCAN_INCLUSIVESCAN: cub::BlockScan(temp_storage).InclusiveScan(input/*int*/, output/*int &*/, cub::Sum()/*ScanOp*/); +// CHECK_BLOCKSCAN_INCLUSIVESCAN: Is migrated to: +// CHECK_BLOCKSCAN_INCLUSIVESCAN: output = sycl::inclusive_scan_over_group(sycl::ext::oneapi::this_work_item::get_work_group<3>(), input, sycl::plus<>()); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockScan::InclusiveSum | FileCheck %s -check-prefix=CHECK_BLOCKSCAN_INCLUSIVESUM +// CHECK_BLOCKSCAN_INCLUSIVESUM: CUDA API: +// CHECK_BLOCKSCAN_INCLUSIVESUM: __shared__ typename cub::BlockScan::TempStorage temp_storage; +// CHECK_BLOCKSCAN_INCLUSIVESUM: cub::BlockScan(temp_storage).InclusiveSum(input/*int*/, output/*int &*/); +// CHECK_BLOCKSCAN_INCLUSIVESUM: Is migrated to: +// CHECK_BLOCKSCAN_INCLUSIVESUM: output = sycl::inclusive_scan_over_group(sycl::ext::oneapi::this_work_item::get_work_group<3>(), input, sycl::plus<>()); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockShuffle::Down | FileCheck %s -check-prefix=CHECK_BLOCKSHUFFLE_DOWN +// CHECK_BLOCKSHUFFLE_DOWN: CUDA API: +// CHECK_BLOCKSHUFFLE_DOWN: __shared__ typename cub::BlockShuffle::TempStorage temp_storage; +// CHECK_BLOCKSHUFFLE_DOWN: cub::BlockShuffle(temp_storage).Down(input/*int*/, output/*int &*/); +// CHECK_BLOCKSHUFFLE_DOWN: Is migrated to: +// CHECK_BLOCKSHUFFLE_DOWN: dpct::group::group_shuffle(temp_storage).shuffle_left(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), input, output); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockShuffle::Offset | FileCheck %s -check-prefix=CHECK_BLOCKSHUFFLE_OFFSET +// CHECK_BLOCKSHUFFLE_OFFSET: CUDA API: +// CHECK_BLOCKSHUFFLE_OFFSET: __shared__ typename cub::BlockShuffle::TempStorage temp_storage; +// CHECK_BLOCKSHUFFLE_OFFSET: cub::BlockShuffle(temp_storage).Offset(input/*int*/, output/*int &*/, distance/*int*/); +// CHECK_BLOCKSHUFFLE_OFFSET: Is migrated to: +// CHECK_BLOCKSHUFFLE_OFFSET: dpct::group::group_shuffle(temp_storage).select(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), input, output, distance); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockShuffle::Rotate | FileCheck %s -check-prefix=CHECK_BLOCKSHUFFLE_ROTATE +// CHECK_BLOCKSHUFFLE_ROTATE: CUDA API: +// CHECK_BLOCKSHUFFLE_ROTATE: __shared__ typename cub::BlockShuffle::TempStorage temp_storage; +// CHECK_BLOCKSHUFFLE_ROTATE: cub::BlockShuffle(temp_storage).Rotate(input/*int*/, output/*int &*/, distance/*unsigned int*/); +// CHECK_BLOCKSHUFFLE_ROTATE: Is migrated to: +// CHECK_BLOCKSHUFFLE_ROTATE: dpct::group::group_shuffle(temp_storage).select2(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), input, output, distance); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::BlockShuffle::Up | FileCheck %s -check-prefix=CHECK_BLOCKSHUFFLE_UP +// CHECK_BLOCKSHUFFLE_UP: CUDA API: +// CHECK_BLOCKSHUFFLE_UP: __shared__ typename cub::BlockShuffle::TempStorage temp_storage; +// CHECK_BLOCKSHUFFLE_UP: cub::BlockShuffle(temp_storage).Up(input/*int*/, output/*int &*/); +// CHECK_BLOCKSHUFFLE_UP: Is migrated to: +// CHECK_BLOCKSHUFFLE_UP: dpct::group::group_shuffle(temp_storage).shuffle_right(sycl::ext::oneapi::this_work_item::get_nd_item<3>(), input, output); diff --git a/clang/test/dpct/query_api_mapping/CUB/cub_device.cu b/clang/test/dpct/query_api_mapping/CUB/cub_device.cu index 1c74a386980e..580baac195b5 100644 --- a/clang/test/dpct/query_api_mapping/CUB/cub_device.cu +++ b/clang/test/dpct/query_api_mapping/CUB/cub_device.cu @@ -201,3 +201,43 @@ // CHECK_SELECT_UNIQUE-NEXT: dpct::queue_ptr stream; // CHECK_SELECT_UNIQUE-NEXT: stream = dpct::get_current_device().create_queue(); // CHECK_SELECT_UNIQUE-NEXT: stream->fill(d_num_selected_out, std::distance(d_out, oneapi::dpl::unique_copy(oneapi::dpl::execution::device_policy(*stream), d_in, d_in + num_items, d_out)), 1).wait(); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::DeviceHistogram::MultiHistogramRange | FileCheck %s -check-prefix=CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMRANGE +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMRANGE: CUDA API: +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMRANGE: void *d_temp_storage = nullptr; +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMRANGE: size_t temp_storage_bytes = 0; +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMRANGE: cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*unsigned char **/, d_histogram/*int *(&)[3]*/, num_levels/*int(&)[3]*/, d_levels/*unsigned int *(&)[3]*/, num_pixels/*int*/); +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMRANGE: cudaMalloc(&d_temp_storage, temp_storage_bytes); +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMRANGE: cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*unsigned char **/, d_histogram/*int *(&)[3]*/, num_levels/*int(&)[3]*/, d_levels/*unsigned int *(&)[3]*/, num_pixels/*int*/); +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMRANGE: Is migrated to: +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMRANGE: dpct::multi_histogram_range<4, 3>(oneapi::dpl::execution::device_policy(q_ct1), d_samples, d_histogram, num_levels, d_levels, num_pixels); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::DeviceHistogram::MultiHistogramEven | FileCheck %s -check-prefix=CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMEVEN +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMEVEN: CUDA API: +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMEVEN: void *d_temp_storage = nullptr; +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMEVEN: size_t temp_storage_bytes = 0; +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMEVEN: cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*unsigned char **/, d_histogram/*int *(&)[3]*/, num_levels/*int(&)[3]*/, lower_level/*unsigned int(&)[3]*/, upper_level/*unsigned int(&)[3]*/, num_pixels/*int*/, S/*cudaStream_t*/); +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMEVEN: cudaMalloc(&d_temp_storage, temp_storage_bytes); +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMEVEN: cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*unsigned char **/, d_histogram/*int *(&)[3]*/, num_levels/*int(&)[3]*/, lower_level/*unsigned int(&)[3]*/, upper_level/*unsigned int(&)[3]*/, num_pixels/*int*/, S/*cudaStream_t*/); +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMEVEN: Is migrated to: +// CHECK_DEVICEHISTOGRAM_MULTIHISTOGRAMEVEN: dpct::multi_histogram_even<4, 3>(oneapi::dpl::execution::device_policy(*S), d_samples, d_histogram, num_levels, lower_level, upper_level, num_pixels); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::DeviceHistogram::HistogramEven | FileCheck %s -check-prefix=CHECK_DEVICEHISTOGRAM_HISTOGRAMEVEN +// CHECK_DEVICEHISTOGRAM_HISTOGRAMEVEN: CUDA API: +// CHECK_DEVICEHISTOGRAM_HISTOGRAMEVEN: void *d_temp_storage = nullptr; +// CHECK_DEVICEHISTOGRAM_HISTOGRAMEVEN: size_t temp_storage_bytes = 0; +// CHECK_DEVICEHISTOGRAM_HISTOGRAMEVEN: cub::DeviceHistogram::HistogramEven(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*float **/, d_histogram/*int **/, num_levels/*int*/, lower_level/*float*/, upper_level/*float*/, num_samples/*int*/); +// CHECK_DEVICEHISTOGRAM_HISTOGRAMEVEN: cudaMalloc(&d_temp_storage, temp_storage_bytes); +// CHECK_DEVICEHISTOGRAM_HISTOGRAMEVEN: cub::DeviceHistogram::HistogramEven(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*float **/, d_histogram/*int **/, num_levels/*int*/, lower_level/*float*/, upper_level/*float*/, num_samples/*int*/); +// CHECK_DEVICEHISTOGRAM_HISTOGRAMEVEN: Is migrated to: +// CHECK_DEVICEHISTOGRAM_HISTOGRAMEVEN: dpct::histogram_even(oneapi::dpl::execution::device_policy(q_ct1), d_samples, d_histogram, num_levels, lower_level, upper_level, num_samples); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cub::DeviceHistogram::HistogramRange | FileCheck %s -check-prefix=CHECK_DEVICEHISTOGRAM_HISTOGRAMRANGE +// CHECK_DEVICEHISTOGRAM_HISTOGRAMRANGE: CUDA API: +// CHECK_DEVICEHISTOGRAM_HISTOGRAMRANGE: void *d_temp_storage = nullptr; +// CHECK_DEVICEHISTOGRAM_HISTOGRAMRANGE: size_t temp_storage_bytes = 0; +// CHECK_DEVICEHISTOGRAM_HISTOGRAMRANGE: cub::DeviceHistogram::HistogramRange(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*float **/, d_histogram/*int **/, num_levels/*int*/, d_levels/*float **/, num_samples/*int*/); +// CHECK_DEVICEHISTOGRAM_HISTOGRAMRANGE: cudaMalloc(&d_temp_storage, temp_storage_bytes); +// CHECK_DEVICEHISTOGRAM_HISTOGRAMRANGE: cub::DeviceHistogram::HistogramRange(d_temp_storage/*void **/, temp_storage_bytes/*size_t*/, d_samples/*float **/, d_histogram/*int **/, num_levels/*int*/, d_levels/*float **/, num_samples/*int*/); +// CHECK_DEVICEHISTOGRAM_HISTOGRAMRANGE: Is migrated to: +// CHECK_DEVICEHISTOGRAM_HISTOGRAMRANGE: dpct::histogram_range(oneapi::dpl::execution::device_policy(q_ct1), d_samples, d_histogram, num_levels, d_levels, num_samples); diff --git a/clang/test/dpct/query_api_mapping/test_all.cu b/clang/test/dpct/query_api_mapping/test_all.cu index a53aec96afee..3aa074c12176 100644 --- a/clang/test/dpct/query_api_mapping/test_all.cu +++ b/clang/test/dpct/query_api_mapping/test_all.cu @@ -739,6 +739,32 @@ // CHECK-NEXT: cuTexRefSetFilterMode // CHECK-NEXT: cuTexRefSetFlags // CHECK-NEXT: cuTexRefSetFormat +// CHECK-NEXT: cub::BlockExchange::BlockedToStriped +// CHECK-NEXT: cub::BlockExchange::BlockedToWarpStriped +// CHECK-NEXT: cub::BlockExchange::ScatterToBlocked +// CHECK-NEXT: cub::BlockExchange::ScatterToStriped +// CHECK-NEXT: cub::BlockExchange::StripedToBlocked +// CHECK-NEXT: cub::BlockExchange::WarpStripedToBlocked +// CHECK-NEXT: cub::BlockLoad::Load +// CHECK-NEXT: cub::BlockRadixSort::Sort +// CHECK-NEXT: cub::BlockRadixSort::SortBlockedToStriped +// CHECK-NEXT: cub::BlockRadixSort::SortDescending +// CHECK-NEXT: cub::BlockRadixSort::SortDescendingBlockedToStriped +// CHECK-NEXT: cub::BlockReduce::Reduce +// CHECK-NEXT: cub::BlockReduce::Sum +// CHECK-NEXT: cub::BlockScan::ExclusiveScan +// CHECK-NEXT: cub::BlockScan::ExclusiveSum +// CHECK-NEXT: cub::BlockScan::InclusiveScan +// CHECK-NEXT: cub::BlockScan::InclusiveSum +// CHECK-NEXT: cub::BlockShuffle::Down +// CHECK-NEXT: cub::BlockShuffle::Offset +// CHECK-NEXT: cub::BlockShuffle::Rotate +// CHECK-NEXT: cub::BlockShuffle::Up +// CHECK-NEXT: cub::BlockStore::Store +// CHECK-NEXT: cub::DeviceHistogram::HistogramEven +// CHECK-NEXT: cub::DeviceHistogram::HistogramRange +// CHECK-NEXT: cub::DeviceHistogram::MultiHistogramEven +// CHECK-NEXT: cub::DeviceHistogram::MultiHistogramRange // CHECK-NEXT: cub::DeviceReduce::ArgMax // CHECK-NEXT: cub::DeviceReduce::ArgMin // CHECK-NEXT: cub::DeviceReduce::Max