Skip to content

Commit 79aff20

Browse files
chunhuanMengguangyeytoyxutsocha
authored
Replace deprecated [[intel::reqd_sub_group_size(SgSize)]] with [[sycl::reqd_sub_group_size(SIMD)]] and remove unnecessary attributes (#1828)
### Summary This PR updates the codebase to replace the deprecated `[[intel::reqd_sub_group_size(SgSize)]]` attribute with the new `[[sycl::reqd_sub_group_size(SIMD)]]` attribute. Additionally, the attribute has been removed from certain locations where it was deemed unnecessary.These changes also aim to reduce the number of warnings, thereby decreasing the log size. ### Changes 1. **Attribute Replacement**: - Replaced all instances of `[[intel::reqd_sub_group_size(SgSize)]]` with `[[sycl::reqd_sub_group_size(SIMD)]]` to align with the latest SYCL specification and avoid using deprecated attributes. 2. **Attribute Removal**: - Removed the `[[sycl::reqd_sub_group_size(SIMD)]]` attribute from functions and kernels where it was not necessary. This was done to simplify the code and avoid redundant specifications. Co-authored-by: guangyey <[email protected]> Co-authored-by: Yutao Xu <[email protected]> Co-authored-by: Tomasz Socha <[email protected]>
1 parent 70c8846 commit 79aff20

13 files changed

+34
-36
lines changed

src/ATen/native/xpu/sycl/BatchNormKernels.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ template <
423423
typename index_t>
424424
struct BatchNormCollectStatisticsKernelFunctor
425425
: public __SYCL_KER_CONFIG_CONVENTION__ {
426-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
426+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
427427
sycl::nd_item<2> item) const {
428428
int plane = item.get_group(1);
429429
int tid = item.get_local_linear_id();
@@ -1874,7 +1874,7 @@ template <
18741874
typename index_t>
18751875
struct BatchNormBackwardReduceKernelFunctor
18761876
: public __SYCL_KER_CONFIG_CONVENTION__ {
1877-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
1877+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
18781878
sycl::nd_item<2> item) const {
18791879
index_t plane = item.get_group(1);
18801880

@@ -4162,7 +4162,7 @@ template <
41624162
typename stat_accscalar_t,
41634163
typename index_t>
41644164
struct BatchNormBackwardKernelFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
4165-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
4165+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
41664166
sycl::nd_item<2> item) const {
41674167
index_t plane = item.get_group(1);
41684168
index_t N = grad_output_.size(0) * grad_output_.size(2);
@@ -4370,7 +4370,7 @@ template <
43704370
typename index_t>
43714371
struct BatchNormBackwardVectorizedKernelFunctor
43724372
: public __SYCL_KER_CONFIG_CONVENTION__ {
4373-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
4373+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
43744374
sycl::nd_item<2> item) const {
43754375
index_t plane = item.get_group(1);
43764376
index_t N = grad_output_.size(0) * grad_output_.size(2);

src/ATen/native/xpu/sycl/Dequant_int4.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ struct DequantInt4KernelFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
2222
weight_dequant(weight_dequant) {}
2323

2424
void sycl_ker_config_convention(sycl::handler& cgh) {}
25-
[[intel::reqd_sub_group_size(SgSize)]] void operator()(
25+
[[sycl::reqd_sub_group_size(SgSize)]] void operator()(
2626
sycl::nd_item<1> it) const {
2727
int constexpr GroupN = TileN;
2828
int constexpr GroupK = SgSize * TileK;

src/ATen/native/xpu/sycl/ForeachReduceKernels.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ template <
2525
int r_args_depth = 1,
2626
int res_arg_index = 0>
2727
struct LpNormFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
28-
template <typename TLA, typename TLW>
29-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
28+
template <typename TLA, typename TLW> void operator()(
3029
const int64_t chunk_size,
3130
TLA tlAddress,
3231
TLW tlWGMeta,
@@ -117,7 +116,7 @@ struct LpNormFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
117116

118117
template <typename out_t, NormType norm_type, typename opmath_t, int SIMD>
119118
struct lpnormChunkReduceKernelFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
120-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
119+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
121120
sycl::nd_item<1> item_id) const {
122121
auto lid = item_id.get_local_linear_id();
123122
auto group_id = item_id.get_group(0);
@@ -481,8 +480,7 @@ std::vector<Tensor> foreach_norm_kernel(
481480

482481
template <typename T, int SIMD>
483482
struct LpMaxFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
484-
template <typename TLA, typename TLW>
485-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
483+
template <typename TLA, typename TLW> void operator()(
486484
int64_t chunk_size,
487485
TLA tlAddressMeta,
488486
TLW tlWGMeta,
@@ -555,7 +553,7 @@ struct LpMaxFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
555553

556554
template <typename T, int SIMD>
557555
struct LpmaxChunkReduceKernelFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
558-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
556+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
559557
sycl::nd_item<1> item_id) const {
560558
auto local_range = item_id.get_local_range(0);
561559
auto lid = item_id.get_local_linear_id();

src/ATen/native/xpu/sycl/GroupNormKernels.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ struct GNRowwiseMomentsFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
6666
using WelfordOp =
6767
WelfordOpsXPU<T_ACC, T_ACC, int64_t, std::pair<T_ACC, T_ACC>>;
6868

69-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
69+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
7070
sycl::nd_item<1> item) const {
7171
const int64_t i = item.get_group(0);
7272
WelfordOp welford_op = {/*correction=*/0, /*take_sqrt=*/false, item};
@@ -114,7 +114,7 @@ struct GNRowwiseMomentsVectorizedFunctor
114114
WelfordOpsXPU<T_ACC, T_ACC, int64_t, std::pair<T_ACC, T_ACC>>;
115115
using vec_t = memory::aligned_vector<T, VEC_SIZE>;
116116

117-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
117+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
118118
sycl::nd_item<1> item) const {
119119
WelfordType val[VEC_SIZE];
120120
WelfordOp welford_op = {/*correction=*/0, /*take_sqrt=*/false, item};
@@ -476,7 +476,7 @@ void group_norm_kernel(
476476
template <typename T, typename T_ACC, int SIMD>
477477
struct Compute1dBackwardFusedParamsFunctor
478478
: public __SYCL_KER_CONFIG_CONVENTION__ {
479-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
479+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
480480
sycl::nd_item<2> item) const {
481481
const int64_t G = group_;
482482
const int64_t D = C_ / G;
@@ -630,7 +630,7 @@ template <typename T, int SIMD, int kReduceTileSize>
630630
struct GammaBeta1dBackwardLargeKernel : public __SYCL_KER_CONFIG_CONVENTION__ {
631631
using T_ACC = acc_type_device<T, kXPU>;
632632

633-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
633+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
634634
sycl::nd_item<2> item) const {
635635
const int64_t c =
636636
item.get_group(1) * item.get_local_range(1) + item.get_local_id(1);
@@ -890,7 +890,7 @@ template <typename T, int SIMD>
890890
struct ComputeInternalGradientsFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
891891
using T_ACC = acc_type_device<T, kXPU>;
892892

893-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
893+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
894894
sycl::nd_item<1> item) const {
895895
const int64_t nc = item.get_group(0);
896896
T_ACC sum1 = 0;
@@ -941,7 +941,7 @@ struct ComputeInternalGradientsVectorizedFunctor
941941
using vec_t = memory::aligned_vector<T, VEC_SIZE>;
942942
using acc_vec_t = memory::aligned_vector<T_ACC, VEC_SIZE>;
943943

944-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
944+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
945945
sycl::nd_item<1> item) const {
946946
acc_vec_t sum1_vec;
947947
acc_vec_t sum2_vec;
@@ -1038,7 +1038,7 @@ struct ComputeBackwardFusedParamsFunctor
10381038
: public __SYCL_KER_CONFIG_CONVENTION__ {
10391039
using T_ACC = acc_type_device<T, kXPU>;
10401040

1041-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
1041+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
10421042
sycl::nd_item<2> item) const {
10431043
const int64_t G = group_;
10441044
const int64_t D = C_ / G;
@@ -1176,7 +1176,7 @@ template <typename T, int SIMD, int kReduceTileSize>
11761176
struct GammaBetaBackwardFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
11771177
using T_ACC = acc_type_device<T, kXPU>;
11781178

1179-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
1179+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
11801180
sycl::nd_item<2> item) const {
11811181
auto group_x = item.get_group(1);
11821182
auto group_size_x = item.get_local_range(1);

src/ATen/native/xpu/sycl/IndexKernelUtils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ inline bool fast_gather_kernel_eligible(
4242

4343
template <int Alignment, typename index_t>
4444
struct VectorizedGatherKernel {
45-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
45+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
4646
sycl::nd_item<2> item) const {
4747
int64_t ind = idx_[item.get_group(1)];
4848
if (allow_neg_indices_) {

src/ATen/native/xpu/sycl/LayerNormKernels.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ struct RowwiseMomentsFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
186186
using WelfordType = WelfordData<T_ACC, int64_t>;
187187
using WelfordOp = WelfordOps<T_ACC, T_ACC, int64_t, std::pair<T_ACC, T_ACC>>;
188188

189-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
189+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
190190
sycl::nd_item<1> item_id) const {
191191
const int64_t i = item_id.get_group(0);
192192
WelfordOp welford_op = {/*correction=*/0, /*take_sqrt=*/false};
@@ -435,7 +435,7 @@ WelfordDataLN compute_stats(
435435
template <typename T, typename T_ACC>
436436
struct VectorizedLayerNormKernelFunctor
437437
: public __SYCL_KER_CONFIG_CONVENTION__ {
438-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
438+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
439439
sycl::nd_item<2> item_id) const {
440440
auto i1 = item_id.get_group(1);
441441
const T* block_row = X_ + i1 * N_;

src/ATen/native/xpu/sycl/LinearInt4.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ struct LinearInt4KernelFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
4141
ldc(ldc) {}
4242
void sycl_ker_config_convention(sycl::handler& cgh) {}
4343

44-
[[intel::reqd_sub_group_size(16)]] void operator()(
44+
[[sycl::reqd_sub_group_size(16)]] void operator()(
4545
sycl::nd_item<1> it) const {
4646
int constexpr Unroll = 2;
4747
int constexpr SgSize = 16;

src/ATen/native/xpu/sycl/LossNLL2dKernels.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ struct NllLoss2dForwardNoReduceKernelFunctor {
7979

8080
template <typename scalar_t, typename accscalar_t, typename index_t, int SIMD>
8181
struct NllLoss2dForwardKernelFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
82-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
82+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
8383
sycl::nd_item<1> item) const {
8484
scalar_t cur_weight;
8585
accscalar_t input_sum = 0;

src/ATen/native/xpu/sycl/MultiLabelMarginLossKernels.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ void multilabel_margin_loss_shape_check(
5151
template <typename scalar_t, typename accscalar_t>
5252
struct MultilabelMarginLossForwardKernelFunctor
5353
: public __SYCL_KER_CONFIG_CONVENTION__ {
54-
[[intel::reqd_sub_group_size(MULTILABELMARGIN_SUB_GROUP_SIZE)]] void
54+
[[sycl::reqd_sub_group_size(MULTILABELMARGIN_SUB_GROUP_SIZE)]] void
5555
operator()(sycl::nd_item<1> item) const {
5656
int k = item.get_group(0);
5757
const scalar_t* input_k = input_ + k * dim_;
@@ -148,7 +148,7 @@ struct MultilabelMarginLossForwardKernelFunctor
148148
template <typename scalar_t, typename accscalar_t>
149149
struct MultilabelMarginLossBackwardKernelFunctor
150150
: public __SYCL_KER_CONFIG_CONVENTION__ {
151-
[[intel::reqd_sub_group_size(MULTILABELMARGIN_SUB_GROUP_SIZE)]] void
151+
[[sycl::reqd_sub_group_size(MULTILABELMARGIN_SUB_GROUP_SIZE)]] void
152152
operator()(sycl::nd_item<1> item) const {
153153
int k = item.get_group(0);
154154
const scalar_t* input_k = input_ + k * dim_;

src/ATen/native/xpu/sycl/Norm.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ template <
600600
class Norm,
601601
bool one_moment = false>
602602
struct FusedNormKernelFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
603-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
603+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
604604
sycl::nd_item<3> item_id) const {
605605
accscalar_t sum1 = 0;
606606
accscalar_t sum2 = 0;
@@ -747,7 +747,7 @@ template <
747747
class Norm,
748748
bool one_moment = false>
749749
struct RowwiseMomentsKernelFunctor : public __SYCL_KER_CONFIG_CONVENTION__ {
750-
[[intel::reqd_sub_group_size(SIMD)]] void operator()(
750+
[[sycl::reqd_sub_group_size(SIMD)]] void operator()(
751751
sycl::nd_item<3> item_id) const {
752752
index_t local_id = item_id.get_local_id(2);
753753

0 commit comments

Comments
 (0)