Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,21 @@ struct ConflictingProperties<max_linear_work_group_size_key, Properties> {
}();
};

template <typename... RestT>
auto RetrieveGetMethodPropertiesOrEmpty(RestT &&...Rest) {
// Note: the following trivial identity lambda is used to avoid the issue
// that line "const auto &KernelObj = (Rest, ...);" may result in a "left
// operand of comma operator has no effect" error for certain compiler(s)
auto Identity = [](const auto &x) -> decltype(auto) { return x; };
const auto &KernelObj = (Identity(Rest), ...);
if constexpr (ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod<
decltype(KernelObj)>::value) {
return KernelObj.get(ext::oneapi::experimental::properties_tag{});
} else {
return ext::oneapi::experimental::empty_properties_t{};
}
}

} // namespace detail
} // namespace ext::oneapi::experimental
} // namespace _V1
Expand Down
39 changes: 29 additions & 10 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2157,27 +2157,33 @@ class __SYCL_EXPORT handler {
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
(sizeof...(RestT) > 1)>
parallel_for(range<1> Range, RestT &&...Rest) {
parallel_for<KernelName>(Range,
ext::oneapi::experimental::empty_properties_t{},
std::forward<RestT>(Rest)...);
parallel_for<KernelName>(
Range,
ext::oneapi::experimental::detail::RetrieveGetMethodPropertiesOrEmpty(
std::forward<RestT>(Rest)...),
std::forward<RestT>(Rest)...);
}

template <typename KernelName = detail::auto_name, typename... RestT>
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
(sizeof...(RestT) > 1)>
parallel_for(range<2> Range, RestT &&...Rest) {
parallel_for<KernelName>(Range,
ext::oneapi::experimental::empty_properties_t{},
std::forward<RestT>(Rest)...);
parallel_for<KernelName>(
Range,
ext::oneapi::experimental::detail::RetrieveGetMethodPropertiesOrEmpty(
std::forward<RestT>(Rest)...),
std::forward<RestT>(Rest)...);
}

template <typename KernelName = detail::auto_name, typename... RestT>
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
(sizeof...(RestT) > 1)>
parallel_for(range<3> Range, RestT &&...Rest) {
parallel_for<KernelName>(Range,
ext::oneapi::experimental::empty_properties_t{},
std::forward<RestT>(Rest)...);
parallel_for<KernelName>(
Range,
ext::oneapi::experimental::detail::RetrieveGetMethodPropertiesOrEmpty(
std::forward<RestT>(Rest)...),
std::forward<RestT>(Rest)...);
}

template <typename KernelName = detail::auto_name, int Dims,
Expand All @@ -2202,7 +2208,20 @@ class __SYCL_EXPORT handler {

template <typename KernelName = detail::auto_name, int Dims,
typename... RestT>
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value>
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
(sizeof...(RestT) > 1)> // variant with reductions
parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
parallel_for<KernelName>(
Range,
ext::oneapi::experimental::detail::RetrieveGetMethodPropertiesOrEmpty(
std::forward<RestT>(Rest)...),
std::forward<RestT>(Rest)...);
}

template <typename KernelName = detail::auto_name, int Dims,
typename... RestT>
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
(sizeof...(RestT) == 1)> // variant without reductions
parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
parallel_for<KernelName>(Range,
ext::oneapi::experimental::empty_properties_t{},
Expand Down
46 changes: 23 additions & 23 deletions sycl/test-e2e/Properties/cache_config.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
// REQUIRES: gpu, level_zero

// TODO: Currently using the -Wno-deprecated-declarations flag due to issue
// https://github.com/intel/llvm/issues/16320. Remove the flag once the issue is
// resolved.
// RUN: %{build} -o %t.out -Wno-deprecated-declarations
// RUN: %{build} -o %t.out
// RUN: env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s

#include <numeric>
Expand All @@ -16,34 +13,39 @@ using namespace sycl::ext::intel::experimental;
using namespace sycl::ext::oneapi::experimental;

struct KernelFunctor {

KernelFunctor() {}

void operator()() const {}
auto get(properties_tag) const { return properties{cache_config(large_slm)}; }
};

struct KernelFunctorND {

KernelFunctorND() {}

void operator()(nd_item<2> i) const {}
auto get(properties_tag) const { return properties{cache_config(large_slm)}; }
};

struct NegativeKernelFunctor {

NegativeKernelFunctor() {}

void operator()(nd_item<2> i) const {}
auto get(properties_tag) const { return properties{}; }
};

struct RangeKernelFunctor {
void operator()(id<2> i) const {}
auto get(properties_tag) const { return properties{cache_config(large_slm)}; }
};

struct WorkGroupFunctor {
void operator()(group<1> g) const {
g.parallel_for_work_item([&](h_item<1>) {});
}
auto get(properties_tag) const { return properties{cache_config(large_slm)}; }
};

RangeKernelFunctor() {}
template <typename T1> struct ReductionKernelFunctor {
T1 mInput_values;
ReductionKernelFunctor(T1 &Input_values) : mInput_values(Input_values) {}

void operator()(id<2> i) const {}
template <typename sumT> void operator()(id<1> idx, sumT &sum) const {
sum += mInput_values[idx];
}
auto get(properties_tag) const { return properties{cache_config(large_slm)}; }
};

Expand All @@ -63,18 +65,16 @@ int main() {
// CHECK: zeKernelSetCacheConfig
std::cout << "parallel_for_work_group(range, func)" << std::endl;
q.submit([&](handler &cgh) {
cgh.parallel_for_work_group<class hpar_range>(
range<1>(8), properties,
[=](group<1> g) { g.parallel_for_work_item([&](h_item<1> i) {}); });
cgh.parallel_for_work_group<class hpar_range>(range<1>(8),
WorkGroupFunctor{});
});

// CHECK: parallel_for_work_group(range, range, func)
// CHECK: zeKernelSetCacheConfig
std::cout << "parallel_for_work_group(range, range, func)" << std::endl;
q.submit([&](handler &cgh) {
cgh.parallel_for_work_group<class hpar_range_range>(
range<1>(8), range<1>(4), properties,
[=](group<1> g) { g.parallel_for_work_item([&](h_item<1> i) {}); });
range<1>(8), range<1>(4), WorkGroupFunctor{});
});

buffer<int> values_buf{1024};
Expand All @@ -92,8 +92,8 @@ int main() {
q.submit([&](handler &cgh) {
auto input_values = values_buf.get_access<access_mode::read>(cgh);
auto sum_reduction = reduction(sum_buf, cgh, plus<>());
cgh.parallel_for(range<1>{1024}, properties, sum_reduction,
[=](id<1> idx, auto &sum) { sum += input_values[idx]; });
cgh.parallel_for(range<1>{1024}, sum_reduction,
ReductionKernelFunctor(input_values));
});

// CHECK: KernelFunctor single_task
Expand All @@ -111,7 +111,7 @@ int main() {
// CHECK-NOT: zeKernelSetCacheConfig
std::cout << "negative parallel_for with sycl::nd_range" << std::endl;
q.parallel_for(nd_range<2>{range<2>(4, 4), range<2>(2, 2)},
[=](nd_item<2> i) {})
NegativeKernelFunctor{})
.wait();

// CHECK: negative parallel_for with KernelFunctor
Expand Down
Loading