Skip to content

Commit 05b2e8c

Browse files
committed
[InstCombine] Don't sink if it would require dropping deref assumptions. (llvm#166945)
Currently sinking assumes in instcombine drops assumes if they would prevent sinking. Removing dereferenceable assumptions earlier on can inhibit vectorization of early-exit loops in practice. Special-case deferenceable assumptions so that they block sinking. This can be combined with a separate change to drop dereferencebale assumptions after vectorization: https://clang.godbolt.org/z/jGqcx3sbs PR: llvm#166945 (cherry picked from commit 700b77b)
1 parent 167c4fd commit 05b2e8c

File tree

3 files changed

+231
-2
lines changed

3 files changed

+231
-2
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5505,8 +5505,15 @@ bool InstCombinerImpl::run() {
55055505

55065506
for (Use &U : I->uses()) {
55075507
User *User = U.getUser();
5508-
if (User->isDroppable())
5509-
continue;
5508+
if (User->isDroppable()) {
5509+
// Do not sink if there are dereferenceable assumes that would be
5510+
// removed.
5511+
auto II = dyn_cast<IntrinsicInst>(User);
5512+
if (II->getIntrinsicID() != Intrinsic::assume ||
5513+
!II->getOperandBundle("dereferenceable"))
5514+
continue;
5515+
}
5516+
55105517
if (NumUsers > MaxSinkNumUsers)
55115518
return std::nullopt;
55125519

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -p instcombine -S %s | FileCheck %s
3+
4+
define i64 @test_dereferenceable_assume(ptr %p, ptr %q, i1 %c.0) {
5+
; CHECK-LABEL: define i64 @test_dereferenceable_assume(
6+
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64
9+
; CHECK-NEXT: [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64
10+
; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]]
11+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ]
12+
; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[ELSE:.*]]
13+
; CHECK: [[THEN]]:
14+
; CHECK-NEXT: ret i64 [[DIFF]]
15+
; CHECK: [[ELSE]]:
16+
; CHECK-NEXT: ret i64 0
17+
;
18+
entry:
19+
%p_int = ptrtoint ptr %p to i64
20+
%q_int = ptrtoint ptr %q to i64
21+
%diff = sub i64 %q_int, %p_int
22+
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
23+
br i1 %c.0, label %then, label %else
24+
25+
then:
26+
ret i64 %diff
27+
28+
else:
29+
ret i64 0
30+
}
31+
32+
define i64 @test_sink_with_dereferenceable_assume_same_block_as_user(ptr %p, ptr %q, i1 %c.0) {
33+
; CHECK-LABEL: define i64 @test_sink_with_dereferenceable_assume_same_block_as_user(
34+
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]]) {
35+
; CHECK-NEXT: [[ENTRY:.*:]]
36+
; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[ELSE:.*]]
37+
; CHECK: [[THEN]]:
38+
; CHECK-NEXT: [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64
39+
; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64
40+
; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]]
41+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ]
42+
; CHECK-NEXT: ret i64 [[DIFF]]
43+
; CHECK: [[ELSE]]:
44+
; CHECK-NEXT: ret i64 0
45+
;
46+
entry:
47+
%p_int = ptrtoint ptr %p to i64
48+
%q_int = ptrtoint ptr %q to i64
49+
%diff = sub i64 %q_int, %p_int
50+
br i1 %c.0, label %then, label %else
51+
52+
then:
53+
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
54+
ret i64 %diff
55+
56+
else:
57+
ret i64 0
58+
}
59+
60+
define i64 @test_sink_with_multiple_users_dominated_by_deref(ptr %p, ptr %q, i1 %c.0, i1 %c.1) {
61+
; CHECK-LABEL: define i64 @test_sink_with_multiple_users_dominated_by_deref(
62+
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]], i1 [[C_1:%.*]]) {
63+
; CHECK-NEXT: [[ENTRY:.*:]]
64+
; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64
65+
; CHECK-NEXT: [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64
66+
; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]]
67+
; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[ELSE:.*]]
68+
; CHECK: [[THEN]]:
69+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ]
70+
; CHECK-NEXT: br i1 [[C_1]], label %[[THEN_2:.*]], label %[[ELSE]]
71+
; CHECK: [[THEN_2]]:
72+
; CHECK-NEXT: [[DOUBLED:%.*]] = shl i64 [[DIFF]], 1
73+
; CHECK-NEXT: ret i64 [[DOUBLED]]
74+
; CHECK: [[ELSE]]:
75+
; CHECK-NEXT: ret i64 0
76+
;
77+
entry:
78+
%p_int = ptrtoint ptr %p to i64
79+
%q_int = ptrtoint ptr %q to i64
80+
%diff = sub i64 %q_int, %p_int
81+
br i1 %c.0, label %then, label %else
82+
83+
then:
84+
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
85+
br i1 %c.1, label %then.2, label %else
86+
87+
then.2:
88+
%doubled = mul i64 %diff, 2
89+
ret i64 %doubled
90+
91+
else:
92+
ret i64 0
93+
}
94+
95+
define i64 @test_deref_user_does_not_dominate_other_user(ptr %p, ptr %q, i1 %c.0) {
96+
; CHECK-LABEL: define i64 @test_deref_user_does_not_dominate_other_user(
97+
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]]) {
98+
; CHECK-NEXT: [[ENTRY:.*:]]
99+
; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64
100+
; CHECK-NEXT: [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64
101+
; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]]
102+
; CHECK-NEXT: br i1 [[C_0]], label %[[MIDDLE:.*]], label %[[EXIT:.*]]
103+
; CHECK: [[MIDDLE]]:
104+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ]
105+
; CHECK-NEXT: br label %[[EXIT]]
106+
; CHECK: [[EXIT]]:
107+
; CHECK-NEXT: ret i64 [[DIFF]]
108+
;
109+
entry:
110+
%p_int = ptrtoint ptr %p to i64
111+
%q_int = ptrtoint ptr %q to i64
112+
%diff = sub i64 %q_int, %p_int
113+
br i1 %c.0, label %middle, label %exit
114+
115+
middle:
116+
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ]
117+
br label %exit
118+
119+
exit:
120+
ret i64 %diff
121+
}
122+
123+
declare void @llvm.assume(i1 noundef)

llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,108 @@ return:
129129
ret i64 %res
130130
}
131131

132+
define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) {
133+
; CHECK-LABEL: define noundef ptr @std_find_caller(
134+
; CHECK-SAME: ptr noundef [[FIRST:%.*]], ptr noundef [[LAST:%.*]]) local_unnamed_addr #[[ATTR0]] {
135+
; CHECK-NEXT: [[ENTRY:.*]]:
136+
; CHECK-NEXT: [[FIRST3:%.*]] = ptrtoint ptr [[FIRST]] to i64
137+
; CHECK-NEXT: [[LAST_I64:%.*]] = ptrtoint ptr [[LAST]] to i64
138+
; CHECK-NEXT: [[PTR_SUB:%.*]] = sub i64 [[LAST_I64]], [[FIRST3]]
139+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2) ]
140+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[LAST]], i64 2) ]
141+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[FIRST]], i64 [[PTR_SUB]]) ]
142+
; CHECK-NEXT: [[PRE_I:%.*]] = icmp eq ptr [[FIRST]], [[LAST]]
143+
; CHECK-NEXT: br i1 [[PRE_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT:.*]], label %[[LOOP_HEADER_I_PREHEADER:.*]]
144+
; CHECK: [[LOOP_HEADER_I_PREHEADER]]:
145+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[PTR_SUB]]
146+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST_I64]], -2
147+
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST3]]
148+
; CHECK-NEXT: [[TMP2:%.*]] = lshr exact i64 [[TMP1]], 1
149+
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1
150+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 158
151+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[LOOP_HEADER_I_PREHEADER2:.*]], label %[[VECTOR_PH:.*]]
152+
; CHECK: [[VECTOR_PH]]:
153+
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], -8
154+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[XTRAITER]], 1
155+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]]
156+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
157+
; CHECK: [[VECTOR_BODY]]:
158+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[PROL_ITER_NEXT:%.*]], %[[VECTOR_BODY]] ]
159+
; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = shl i64 [[INDEX]], 1
160+
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX1]]
161+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP1]], align 2
162+
; CHECK-NEXT: [[WIDE_LOAD_FR:%.*]] = freeze <8 x i16> [[WIDE_LOAD]]
163+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD_FR]], splat (i16 1)
164+
; CHECK-NEXT: [[PROL_ITER_NEXT]] = add nuw i64 [[INDEX]], 8
165+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
166+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i8 [[TMP5]], 0
167+
; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
168+
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[PROL_ITER_CMP_NOT]]
169+
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
170+
; CHECK: [[MIDDLE_SPLIT]]:
171+
; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
172+
; CHECK: [[MIDDLE_BLOCK]]:
173+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[XTRAITER]]
174+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I_PREHEADER2]]
175+
; CHECK: [[LOOP_HEADER_I_PREHEADER2]]:
176+
; CHECK-NEXT: [[PTR_IV_I_PH:%.*]] = phi ptr [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ], [ [[NEXT_GEP]], %[[MIDDLE_BLOCK]] ]
177+
; CHECK-NEXT: br label %[[LOOP_HEADER_I:.*]]
178+
; CHECK: [[VECTOR_EARLY_EXIT]]:
179+
; CHECK-NEXT: [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP4]], i1 true)
180+
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
181+
; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 1
182+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]]
183+
; CHECK-NEXT: br label %[[STD_FIND_GENERIC_IMPL_EXIT]]
184+
; CHECK: [[LOOP_HEADER_I]]:
185+
; CHECK-NEXT: [[PTR_IV_I:%.*]] = phi ptr [ [[PTR_IV_NEXT_I:%.*]], %[[LOOP_LATCH_I:.*]] ], [ [[PTR_IV_I_PH]], %[[LOOP_HEADER_I_PREHEADER2]] ]
186+
; CHECK-NEXT: [[L_I:%.*]] = load i16, ptr [[PTR_IV_I]], align 2
187+
; CHECK-NEXT: [[C_1_I:%.*]] = icmp eq i16 [[L_I]], 1
188+
; CHECK-NEXT: br i1 [[C_1_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_LATCH_I]]
189+
; CHECK: [[LOOP_LATCH_I]]:
190+
; CHECK-NEXT: [[PTR_IV_NEXT_I]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 2
191+
; CHECK-NEXT: [[EC_I:%.*]] = icmp eq ptr [[PTR_IV_NEXT_I]], [[LAST]]
192+
; CHECK-NEXT: br i1 [[EC_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I]], !llvm.loop [[LOOP4:![0-9]+]]
193+
; CHECK: [[STD_FIND_GENERIC_IMPL_EXIT]]:
194+
; CHECK-NEXT: [[RES_I:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[SCEVGEP]], %[[MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ], [ [[SCEVGEP]], %[[LOOP_LATCH_I]] ], [ [[PTR_IV_I]], %[[LOOP_HEADER_I]] ]
195+
; CHECK-NEXT: ret ptr [[RES_I]]
196+
;
197+
entry:
198+
%last.i64 = ptrtoint ptr %last to i64
199+
%first.i64 = ptrtoint ptr %first to i64
200+
%ptr.sub = sub i64 %last.i64, %first.i64
201+
call void @llvm.assume(i1 true) [ "align"(ptr %first, i64 2) ]
202+
call void @llvm.assume(i1 true) [ "align"(ptr %last, i64 2) ]
203+
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %first, i64 %ptr.sub) ]
204+
%call = call noundef ptr @std_find_generic_impl(ptr noundef nonnull %first, ptr noundef %last, i16 noundef signext 1)
205+
ret ptr %call
206+
}
207+
208+
define linkonce_odr noundef ptr @std_find_generic_impl(ptr noundef %first, ptr noundef %last, i16 noundef %value) {
209+
entry:
210+
%pre = icmp eq ptr %first, %last
211+
br i1 %pre, label %exit, label %loop.header
212+
213+
loop.header:
214+
%ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %first, %entry ]
215+
%l = load i16, ptr %ptr.iv, align 2
216+
%c.1 = icmp eq i16 %l, %value
217+
br i1 %c.1, label %exit, label %loop.latch
218+
219+
loop.latch:
220+
%ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 2
221+
%ec = icmp eq ptr %ptr.iv.next, %last
222+
br i1 %ec, label %exit, label %loop.header
223+
224+
exit:
225+
%res = phi ptr [ %first, %entry ], [ %ptr.iv, %loop.header ], [ %ptr.iv.next, %loop.latch ]
226+
ret ptr %res
227+
}
228+
132229
declare void @llvm.assume(i1 noundef)
133230
;.
134231
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
135232
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
136233
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
234+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
235+
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
137236
;.

0 commit comments

Comments
 (0)