@@ -32,14 +32,15 @@ SPDX-License-Identifier: MIT
32
32
using namespace llvm ;
33
33
using namespace IGC ;
34
34
35
- AllocationLivenessAnalyzer::LivenessData AllocationLivenessAnalyzer::ProcessInstruction (Instruction* I)
35
+ AllocationLivenessAnalyzer::LivenessData AllocationLivenessAnalyzer::ProcessInstruction (
36
+ Instruction* I,
37
+ DominatorTree& DT,
38
+ LoopInfo& LI
39
+ )
36
40
{
37
41
// static allocas are usually going to be in the entry block
38
42
// that's a practice, but we only care about the last block that dominates all uses
39
43
BasicBlock* commonDominator = nullptr ;
40
- auto * DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree ();
41
- auto * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo ();
42
-
43
44
SetVector<Instruction*> allUsers;
44
45
SetVector<Instruction*> lifetimeLeakingUsers;
45
46
SmallVector<Use*> worklist;
@@ -49,7 +50,7 @@ AllocationLivenessAnalyzer::LivenessData AllocationLivenessAnalyzer::ProcessInst
49
50
auto * UasI = cast<Instruction>(use.getUser ());
50
51
if (commonDominator)
51
52
{
52
- commonDominator = DT-> findNearestCommonDominator (commonDominator, UasI->getParent ());
53
+ commonDominator = DT. findNearestCommonDominator (commonDominator, UasI->getParent ());
53
54
}
54
55
else
55
56
{
@@ -59,53 +60,76 @@ AllocationLivenessAnalyzer::LivenessData AllocationLivenessAnalyzer::ProcessInst
59
60
worklist.push_back (&use);
60
61
}
61
62
63
+ auto addUsesFn = [&worklist](auto uses) {
64
+ for (auto & use : uses)
65
+ worklist.push_back (&use);
66
+ };
67
+
62
68
// figure out the potential accesses to the memory via GEP and bitcasts
63
69
while (!worklist.empty ())
64
70
{
65
71
auto * use = worklist.pop_back_val ();
66
72
auto * II = cast<Instruction>(use->getUser ());
67
73
68
- if (allUsers.contains (II))
74
+ if (! allUsers.insert (II))
69
75
continue ;
70
76
71
- allUsers.insert (II);
77
+ // a possible optimization here:
78
+ // 1. find all reachable blocks
79
+ // 2. cull uses that are not reachable from the allocation
80
+
81
+ commonDominator = DT.findNearestCommonDominator (commonDominator, II->getParent ());
72
82
73
83
switch (II->getOpcode ())
74
84
{
75
85
case Instruction::PHI:
76
86
case Instruction::GetElementPtr:
77
87
case Instruction::BitCast:
78
88
case Instruction::Select:
79
- for (auto & use : II->uses ())
80
- worklist.push_back (&use);
81
-
89
+ addUsesFn (II->uses ());
82
90
break ;
83
91
case Instruction::PtrToInt:
84
92
lifetimeLeakingUsers.insert (II);
85
93
break ;
86
94
case Instruction::Store:
87
95
{
88
96
auto * storeI = cast<StoreInst>(II);
89
- if (storeI->getValueOperand () == cast<Value>(use))
90
- lifetimeLeakingUsers.insert (II);
97
+ if (storeI->getValueOperand () == use->get ())
98
+ {
99
+ SmallVector<Instruction*> origins;
100
+ if (Provenance::tryFindPointerOrigin (storeI->getPointerOperand (), origins))
101
+ {
102
+ for (auto * origin : origins)
103
+ addUsesFn (origin->uses ());
104
+ }
105
+ else
106
+ {
107
+ lifetimeLeakingUsers.insert (II);
108
+ }
109
+ }
91
110
}
92
111
break ;
93
112
case Instruction::Call:
94
113
{
95
114
auto * callI = cast<CallInst>(II);
96
115
if (!callI->doesNotCapture (use->getOperandNo ()))
97
116
lifetimeLeakingUsers.insert (II);
117
+
118
+ if (II->getType ()->isPointerTy ())
119
+ addUsesFn (II->uses ());
98
120
}
99
121
break ;
100
122
case Instruction::Load:
123
+ if (II->getType ()->isPointerTy ())
124
+ addUsesFn (II->uses ());
101
125
break ;
102
126
default : // failsafe for handling "unapproved" instructions
103
127
lifetimeLeakingUsers.insert (II);
104
128
break ;
105
129
}
106
130
}
107
131
108
- return LivenessData (I, std::move (allUsers), * LI, * DT, commonDominator, std::move (lifetimeLeakingUsers));
132
+ return LivenessData (I, std::move (allUsers), LI, DT, commonDominator, std::move (lifetimeLeakingUsers));
109
133
}
110
134
111
135
void AllocationLivenessAnalyzer::getAnalysisUsage (llvm::AnalysisUsage& AU) const
@@ -264,6 +288,8 @@ AllocationLivenessAnalyzer::LivenessData::LivenessData(
264
288
}
265
289
}
266
290
291
+ // at this point we have all the blocks we need, so fill out the start/end data
292
+
267
293
// substract the inflow blocks from the outflow blocks to find the block which starts the lifetime - there should be only one!
268
294
auto bbOutOnly = bbOut;
269
295
set_subtract (bbOutOnly, bbIn);
@@ -287,7 +313,7 @@ AllocationLivenessAnalyzer::LivenessData::LivenessData(
287
313
{
288
314
if (usersOfAllocation.contains (&I))
289
315
{
290
- lifetimeEnds .push_back (&I);
316
+ lifetimeEndInstructions .push_back (&I);
291
317
break ;
292
318
}
293
319
}
@@ -297,6 +323,7 @@ AllocationLivenessAnalyzer::LivenessData::LivenessData(
297
323
}
298
324
else
299
325
{
326
+ // find all blocks where lifetime flows in, but doesnt flow out
300
327
auto bbOnlyIn = bbIn;
301
328
set_subtract (bbOnlyIn, bbOut);
302
329
@@ -306,12 +333,29 @@ AllocationLivenessAnalyzer::LivenessData::LivenessData(
306
333
{
307
334
if (usersOfAllocation.contains (&I))
308
335
{
309
- lifetimeEnds .push_back (&I);
336
+ lifetimeEndInstructions .push_back (&I);
310
337
break ;
311
338
}
312
339
}
313
340
}
314
341
}
342
+
343
+ // collect lifetime end edges (where outflow block has successors that aren't inflow blocks)
344
+ for (auto * bb : bbOut)
345
+ {
346
+ // however, we can't just add successors
347
+ // because then we can accidentally execute lifetime end instruction twice
348
+ // which can end up causing issues similar to double-free
349
+ // we need to make sure every successor has a single predecessor
350
+ SmallVector<BasicBlock*> successors (llvm::successors (bb));
351
+ for (auto * succ : successors)
352
+ {
353
+ if (bbIn.contains (succ))
354
+ continue ;
355
+
356
+ lifetimeEndEdges.push_back ({ bb, succ });
357
+ }
358
+ }
315
359
}
316
360
317
361
bool AllocationLivenessAnalyzer::LivenessData::OverlapsWith (const LivenessData& LD) const
@@ -330,7 +374,7 @@ bool AllocationLivenessAnalyzer::LivenessData::OverlapsWith(const LivenessData&
330
374
for (auto & [LD1, LD2] : { std::make_pair (this , &LD), std::make_pair (&LD, this ) })
331
375
{
332
376
// TODO: replace the whole logic with ContainsInstruction checks
333
- for (auto * I : LD1->lifetimeEnds )
377
+ for (auto * I : LD1->lifetimeEndInstructions )
334
378
{
335
379
// what if LD1 is contained in a single block
336
380
if (I->getParent () == LD1->lifetimeStart ->getParent ())
@@ -341,7 +385,7 @@ bool AllocationLivenessAnalyzer::LivenessData::OverlapsWith(const LivenessData&
341
385
bool lifetimeStart = LD2->lifetimeStart ->getParent () == bb && LD2->lifetimeStart ->comesBefore (I);
342
386
343
387
auto * LD1_lifetimeStart = LD1->lifetimeStart ; // we have to copy LD1.lifetimeStart to avoid clang complaining about LD1 being captured by the lambda
344
- bool lifetimeEnd = any_of (LD2->lifetimeEnds , [&](auto * lifetimeEnd) {
388
+ bool lifetimeEnd = any_of (LD2->lifetimeEndInstructions , [&](auto * lifetimeEnd) {
345
389
return lifetimeEnd->getParent () == bb && LD1_lifetimeStart->comesBefore (lifetimeEnd);
346
390
});
347
391
@@ -381,7 +425,7 @@ bool AllocationLivenessAnalyzer::LivenessData::ContainsInstruction(const llvm::I
381
425
if (I.comesBefore (lifetimeStart))
382
426
return false ;
383
427
384
- if (lifetimeEnds [0 ]->comesBefore (&I))
428
+ if (lifetimeEndInstructions [0 ]->comesBefore (&I))
385
429
return false ;
386
430
387
431
return true ;
@@ -396,9 +440,54 @@ bool AllocationLivenessAnalyzer::LivenessData::ContainsInstruction(const llvm::I
396
440
if (lifetimeStart->getParent () == bb && !I.comesBefore (lifetimeStart))
397
441
return true ;
398
442
399
- bool overlapsWithEnd = any_of (lifetimeEnds , [&](auto * lifetimeEnd) {
443
+ bool overlapsWithEnd = any_of (lifetimeEndInstructions , [&](auto * lifetimeEnd) {
400
444
return lifetimeEnd->getParent () == bb && !lifetimeEnd->comesBefore (&I);
401
445
});
402
446
403
447
return overlapsWithEnd;
404
448
}
449
+
450
+ namespace IGC
451
+ {
452
+ namespace Provenance
453
+ {
454
+ static bool tryFindPointerOriginImpl (Value* ptr, SmallVectorImpl<Instruction*>& origins, DenseSet<Value*>& cache);
455
+
456
+ bool tryFindPointerOrigin (Value* ptr, SmallVectorImpl<Instruction*>& origins)
457
+ {
458
+ origins.clear ();
459
+
460
+ DenseSet<Value*> cache;
461
+ bool found = tryFindPointerOriginImpl (ptr, origins, cache);
462
+
463
+ IGC_ASSERT_MESSAGE (found && !origins.empty (), " Origin reported as found but no origins were added!" );
464
+
465
+ return found;
466
+ }
467
+
468
+ static bool tryFindPointerOrigin (GetElementPtrInst* Ptr, SmallVectorImpl<Instruction*>& origins, DenseSet<Value*>& cache)
469
+ {
470
+ return tryFindPointerOriginImpl (Ptr->getPointerOperand (), origins, cache);
471
+ }
472
+
473
+ static bool tryFindPointerOriginImpl (Value* ptr, SmallVectorImpl<Instruction*>& origins, DenseSet<Value*>& cache)
474
+ {
475
+ if (!cache.insert (ptr).second )
476
+ return true ;
477
+
478
+ if (auto * GEP = dyn_cast<GetElementPtrInst>(ptr))
479
+ {
480
+ return tryFindPointerOrigin (GEP, origins, cache);
481
+ }
482
+
483
+ if (auto * allocaI = dyn_cast<AllocaInst>(ptr))
484
+ {
485
+ origins.push_back (allocaI);
486
+ return true ;
487
+ }
488
+
489
+ return false ;
490
+ }
491
+
492
+ }
493
+ }
0 commit comments