Skip to content

Commit bb383ad

Browse files
fhahntru
authored andcommitted
[SCEVExp] Fix early exit in ComputeEndCheck. (llvm#156910)
ComputeEndCheck incorrectly returned false for unsigned predicates starting at zero and a positive step. The AddRec could still wrap if Step * trunc ExitCount wraps or trunc ExitCount strips leading 1s. Fixes llvm#156849. PR: llvm#156910 (cherry picked from commit f8972c8)
1 parent 2daad31 commit bb383ad

File tree

6 files changed

+49
-14
lines changed

6 files changed

+49
-14
lines changed

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2133,8 +2133,15 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
21332133
// negative. If Step is known to be positive or negative, only create
21342134
// either 1. or 2.
21352135
auto ComputeEndCheck = [&]() -> Value * {
2136-
// Checking <u 0 is always false.
2137-
if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
2136+
// Checking <u 0 is always false, if (Step * trunc ExitCount) does not wrap.
2137+
// TODO: Predicates that can be proven true/false should be discarded when
2138+
// the predicates are created, not late during expansion.
2139+
if (!Signed && Start->isZero() && SE.isKnownPositive(Step) &&
2140+
DstBits < SrcBits &&
2141+
ExitCount == SE.getZeroExtendExpr(SE.getTruncateExpr(ExitCount, ARTy),
2142+
ExitCount->getType()) &&
2143+
SE.willNotOverflow(Instruction::Mul, Signed, Step,
2144+
SE.getTruncateExpr(ExitCount, ARTy)))
21382145
return ConstantInt::getFalse(Loc->getContext());
21392146

21402147
// Get the backedge taken count and truncate or extended to the AR type.

llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,13 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p
1010
; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]]
1111
; CHECK: for.body.lver.check:
1212
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
13+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
14+
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
15+
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
16+
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
1317
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
14-
; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
18+
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP1]]
19+
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
1520
; CHECK: for.body.ph.lver.orig:
1621
; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
1722
; CHECK: for.body.lver.orig:
@@ -75,7 +80,7 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p
7580
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
7681
; CHECK: for.end.loopexit:
7782
; CHECK-NEXT: br label [[FOR_END:%.*]]
78-
; CHECK: for.end.loopexit1:
83+
; CHECK: for.end.loopexit2:
7984
; CHECK-NEXT: br label [[FOR_END]]
8085
; CHECK: for.end:
8186
; CHECK-NEXT: ret void
@@ -135,8 +140,13 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n
135140
; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]]
136141
; CHECK: for.body.lver.check:
137142
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
143+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
144+
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
145+
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
146+
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
138147
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
139-
; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
148+
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP1]]
149+
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
140150
; CHECK: for.body.ph.lver.orig:
141151
; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
142152
; CHECK: for.body.lver.orig:
@@ -200,7 +210,7 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n
200210
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
201211
; CHECK: for.end.loopexit:
202212
; CHECK-NEXT: br label [[FOR_END:%.*]]
203-
; CHECK: for.end.loopexit1:
213+
; CHECK: for.end.loopexit2:
204214
; CHECK-NEXT: br label [[FOR_END]]
205215
; CHECK: for.end:
206216
; CHECK-NEXT: ret void

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,8 +1045,8 @@ define i64 @live_in_known_1_via_scev() {
10451045
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
10461046
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 3, i64 1, i64 1, i64 1>, [[VECTOR_PH]] ], [ [[VEC_PHI]], [[VECTOR_BODY]] ]
10471047
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1048-
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
1049-
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
1048+
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
1049+
; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
10501050
; CHECK: middle.block:
10511051
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[VEC_PHI]])
10521052
; CHECK-NEXT: br label [[EXIT:%.*]]
@@ -1213,6 +1213,7 @@ define i32 @g(i64 %n) {
12131213
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4
12141214
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
12151215
; CHECK: vector.scevcheck:
1216+
; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[N]] to i32
12161217
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[N]], 4294967295
12171218
; CHECK-NEXT: br i1 [[TMP2]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
12181219
; CHECK: vector.main.loop.iter.check:

llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,16 @@ exit:
120120

121121
; For %gep, we have the following SCEV: ((4 * (zext i4 {0,+,5}<%loop> to i64))<nuw><nsw> + %x).
122122
; Note the i4 bit wide AddRec {0,+,5}. It is known to wrap in the loop with trip count 16.
123-
; FIXME: Currently we incorrectly assume the widened AddRec does not wrap.
124123
define void @wrap_predicate_for_interleave_group_wraps_for_known_trip_count(ptr noalias %x, ptr noalias %out) {
125124
; CHECK-LABEL: define void @wrap_predicate_for_interleave_group_wraps_for_known_trip_count(
126125
; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[OUT:%.*]]) {
127126
; CHECK-NEXT: [[START:.*]]:
128-
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
127+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
128+
; CHECK: [[VECTOR_SCEVCHECK]]:
129+
; CHECK-NEXT: [[MUL:%.*]] = call { i4, i1 } @llvm.umul.with.overflow.i4(i4 5, i4 -1)
130+
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i4, i1 } [[MUL]], 0
131+
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i4, i1 } [[MUL]], 1
132+
; CHECK-NEXT: br i1 [[MUL_OVERFLOW]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
129133
; CHECK: [[VECTOR_PH]]:
130134
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
131135
; CHECK: [[VECTOR_BODY]]:
@@ -144,7 +148,7 @@ define void @wrap_predicate_for_interleave_group_wraps_for_known_trip_count(ptr
144148
; CHECK: [[MIDDLE_BLOCK]]:
145149
; CHECK-NEXT: br label %[[SCALAR_PH]]
146150
; CHECK: [[SCALAR_PH]]:
147-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 12, %[[MIDDLE_BLOCK]] ], [ 0, %[[START]] ]
151+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 12, %[[MIDDLE_BLOCK]] ], [ 0, %[[START]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
148152
; CHECK-NEXT: br label %[[LOOP:.*]]
149153
; CHECK: [[LOOP]]:
150154
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -189,8 +193,13 @@ define void @wrap_predicate_for_interleave_group_unknown_trip_count(ptr noalias
189193
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
190194
; CHECK: [[VECTOR_SCEVCHECK]]:
191195
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
196+
; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP0]] to i4
197+
; CHECK-NEXT: [[MUL:%.*]] = call { i4, i1 } @llvm.umul.with.overflow.i4(i4 3, i4 [[TMP9]])
198+
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i4, i1 } [[MUL]], 0
199+
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i4, i1 } [[MUL]], 1
192200
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 15
193-
; CHECK-NEXT: br i1 [[TMP1]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
201+
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP1]]
202+
; CHECK-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
194203
; CHECK: [[VECTOR_PH]]:
195204
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
196205
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0

llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ define void @load_clamped_index(ptr %A, ptr %B, i32 %N) {
1919
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
2020
; CHECK: vector.scevcheck:
2121
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
22+
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i2
2223
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], 3
2324
; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
2425
; CHECK: vector.memcheck:
@@ -91,6 +92,7 @@ define void @store_clamped_index(ptr %A, ptr %B, i32 %N) {
9192
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
9293
; CHECK: vector.scevcheck:
9394
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
95+
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i2
9496
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], 3
9597
; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
9698
; CHECK: vector.memcheck:
@@ -359,6 +361,7 @@ define void @clamped_index_equal_dependence(ptr %A, ptr %B, i32 %N) {
359361
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
360362
; CHECK: vector.scevcheck:
361363
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
364+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP0]] to i2
362365
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], 3
363366
; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
364367
; CHECK: vector.ph:

llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,20 @@ define void @f1(ptr noalias %a,
2929
; LV-LABEL: @f1(
3030
; LV-NEXT: for.body.lver.check:
3131
; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
32+
; LV-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i32
33+
; LV-NEXT: [[MUL2:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP5]])
34+
; LV-NEXT: [[MUL_RESULT1:%.*]] = extractvalue { i32, i1 } [[MUL2]], 0
35+
; LV-NEXT: [[MUL_OVERFLOW1:%.*]] = extractvalue { i32, i1 } [[MUL2]], 1
3236
; LV-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
37+
; LV-NEXT: [[TMP8:%.*]] = or i1 [[MUL_OVERFLOW1]], [[TMP1]]
3338
; LV-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
3439
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
3540
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
3641
; LV-NEXT: [[TMP2:%.*]] = sub i64 0, [[MUL_RESULT]]
3742
; LV-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[MUL_RESULT]]
3843
; LV-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[A]]
3944
; LV-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
40-
; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP1]], [[TMP6]]
45+
; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP8]], [[TMP6]]
4146
; LV-NEXT: br i1 [[TMP7]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
4247
; LV: for.body.ph.lver.orig:
4348
; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
@@ -75,7 +80,7 @@ define void @f1(ptr noalias %a,
7580
; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
7681
; LV: for.end.loopexit:
7782
; LV-NEXT: br label [[FOR_END:%.*]]
78-
; LV: for.end.loopexit2:
83+
; LV: for.end.loopexit5:
7984
; LV-NEXT: br label [[FOR_END]]
8085
; LV: for.end:
8186
; LV-NEXT: ret void

0 commit comments

Comments
 (0)