Skip to content

Commit 2f11810

Browse files
committed
NFCs for nonSSA Exec Mask manipulation instrs
1 parent 0ac1b80 commit 2f11810

File tree

5 files changed

+260
-49
lines changed

5 files changed

+260
-49
lines changed

llvm/include/llvm/CodeGen/MachineBasicBlock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ class MachineBasicBlock
263263

264264
/// Return the name of the corresponding LLVM basic block, or an empty string.
265265
LLVM_ABI StringRef getName() const;
266+
LLVM_ABI std::string name() const;
266267

267268
/// Return a formatted string to identify this block and its parent function.
268269
LLVM_ABI std::string getFullName() const;

llvm/lib/CodeGen/MachineBasicBlock.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,13 @@ StringRef MachineBasicBlock::getName() const {
328328
return StringRef("", 0);
329329
}
330330

331+
std::string MachineBasicBlock::name() const {
332+
std::string Name = ("BB." + Twine(getNumber()) + ".").str();
333+
if (getBasicBlock())
334+
Name += getBasicBlock()->getName();
335+
return Name;
336+
}
337+
331338
/// Return a hopefully unique identifier for this block.
332339
std::string MachineBasicBlock::getFullName() const {
333340
std::string Name;

llvm/lib/Target/AMDGPU/AMDGPUWaveTransform.cpp

Lines changed: 132 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,42 @@ struct WaveNode {
153153
Out << "<flow-" << FlowNum << '>';
154154
});
155155
}
156+
157+
std::string getName(){
158+
std::string str;
159+
if (Block)
160+
str = Block->name();
161+
if (Block && FlowNum)
162+
str = str + ".";
163+
if (FlowNum)
164+
str = str + "<flow-" + std::to_string(FlowNum) + ">";
165+
return str;
166+
}
167+
void dump() const {
168+
dbgs() << "--------------------------------" << '\n';
169+
dbgs() << "WaveNode: " << printableName() << '\n';
170+
dbgs() << " OrderIndex: " << OrderIndex << '\n';
171+
dbgs() << " Predecessors: ";
172+
for (WaveNode *Pred : Predecessors)
173+
dbgs() << Pred->printableName() << ' ';
174+
dbgs() << '\n';
175+
dbgs() << " Successors: ";
176+
for (WaveNode *Succ : Successors)
177+
dbgs() << Succ->printableName() << ' ';
178+
dbgs() << '\n';
179+
dbgs() << " LanePredecessors: ";
180+
for (const LaneEdge &LanePred : LanePredecessors)
181+
dbgs() << "(lane=" << LanePred.Lane->printableName()
182+
<< ", wave=" << LanePred.Wave->printableName() << ") ";
183+
dbgs() << '\n';
184+
dbgs() << " LaneSuccessors: ";
185+
for (const LaneEdge &LaneSucc : LaneSuccessors)
186+
dbgs() << "(lane=" << LaneSucc.Lane->printableName()
187+
<< ", wave=" << LaneSucc.Wave->printableName() << ") ";
188+
if(LatestPostDom != nullptr) dbgs() << "\nlatestPostDom:" << LatestPostDom->printableName();
189+
else dbgs() << "latestPostDom:NULL";
190+
dbgs() << "\n--------------------------------\n\n";
191+
}
156192
};
157193

158194
/// \brief Helper class for making a CFG reconverging.
@@ -1545,6 +1581,16 @@ class ControlFlowRewriter {
15451581
explicit LaneOriginInfo(WaveNode *Node, Register CondReg = {},
15461582
bool InvertCondition = false)
15471583
: Node(Node), CondReg(CondReg), InvertCondition(InvertCondition) {}
1584+
1585+
friend llvm::raw_ostream& operator<<(llvm::raw_ostream& os, const LaneOriginInfo& loi) {
1586+
if(loi.Node == nullptr)
1587+
os << "{WaveNode=nullptr";
1588+
else
1589+
os << "{WaveNode="<< loi.Node->printableName();
1590+
os << ", CondReg=" << loi.CondReg.id() << ", InvertCond:" << loi.InvertCondition << "}";
1591+
return os;
1592+
}
1593+
15481594
};
15491595

15501596
struct CFGNodeInfo {
@@ -1574,6 +1620,29 @@ class ControlFlowRewriter {
15741620
Register PrimarySuccessorExec;
15751621

15761622
explicit CFGNodeInfo(WaveNode *Node) : Node(Node) {}
1623+
1624+
friend llvm::raw_ostream& operator<<(llvm::raw_ostream& os, const CFGNodeInfo& ni) {
1625+
os << "CFGNodeInfo{\nOrigExit=" << ni.OrigExit << ", \nOrigCondition=" << ni.OrigCondition.id();
1626+
if(ni.OrigSuccCond == nullptr)
1627+
os << ", \nOrigSuccCond=nullptr";
1628+
else
1629+
os << ", \nOrigSuccCond=" << ni.OrigSuccCond->printableName();
1630+
if(ni.OrigSuccFinal == nullptr)
1631+
os << ", \nOrigSuccFinal=nullptr";
1632+
else
1633+
os << ", \nOrigSuccFinal=" << ni.OrigSuccFinal->printableName();
1634+
os << ", \nPrimarySuccessorExec=" << ni.PrimarySuccessorExec.id();
1635+
os << ", \nOriginBranch(Ri)={";
1636+
for (const auto &E : ni.OriginBranch) {
1637+
os << "(" << E.getPointer()->printableName() << "," << E.getInt() << "),";
1638+
}
1639+
os << "}, \norigins(Ti):{";
1640+
for (const auto &E : ni.origins) {
1641+
os << E << ",";
1642+
}
1643+
return os << "}\n}\n";
1644+
}
1645+
15771646
};
15781647

15791648
/// Information required to synthesize divergent terminators with a common
@@ -1784,6 +1853,7 @@ void ControlFlowRewriter::prepareWaveCfg() {
17841853
/// establishing wave-level control flow and insert instructions for EXEC mask
17851854
/// manipulation.
17861855
void ControlFlowRewriter::rewrite() {
1856+
LLVM_DEBUG(dbgs() << "\nrewrite() begins\n");
17871857
GCNLaneMaskAnalysis LMA(Function);
17881858
const AMDGPU::LaneMaskConstants &LMC = LMU.getLaneMaskConsts();
17891859

@@ -1839,7 +1909,10 @@ void ControlFlowRewriter::rewrite() {
18391909
Opcode = AMDGPU::S_CBRANCH_SCC1;
18401910
} else {
18411911
Register CondReg = Info.OrigCondition;
1842-
if (!LMA.isSubsetOfExec(CondReg, *Node->Block)) {
1912+
bool isCondRegSubsetOfExec = LMA.isSubsetOfExec(CondReg, *Node->Block);
1913+
LLVM_DEBUG(dbgs() << "isSubsetOfExec(" << printReg(CondReg, MRI.getTargetRegisterInfo(), 0, &MRI) << "," << Node->Block->name() << ") : " << isCondRegSubsetOfExec << "\n");
1914+
1915+
if (!isCondRegSubsetOfExec) {
18431916
CondReg = LMU.createLaneMaskReg();
18441917
BuildMI(*Node->Block, Node->Block->end(), {}, TII.get(LMC.AndOpc),
18451918
CondReg)
@@ -1867,7 +1940,9 @@ void ControlFlowRewriter::rewrite() {
18671940
.addMBB(Other->Block);
18681941
}
18691942
}
1870-
1943+
LLVM_DEBUG(dbgs() << "CFG_BEGIN:" << Function.getName().str() << "_pre\n");
1944+
LLVM_DEBUG(Function.dump());
1945+
LLVM_DEBUG(dbgs() << "CFG_END:" << Function.getName().str() << "_pre\n");
18711946
// Step 2: Insert lane masks and new terminators for divergent nodes.
18721947
//
18731948
// RegMap maps (block, register) -> (masked, inverted).
@@ -1879,31 +1954,42 @@ void ControlFlowRewriter::rewrite() {
18791954
Updater.setAccumulating(true);
18801955

18811956
for (WaveNode *LaneTarget : NodeOrder) {
1957+
LLVM_DEBUG(dbgs() << "\nPROCESSING NODE:" << LaneTarget->printableName() << "\n\n");
1958+
LaneTarget->dump();
18821959
CFGNodeInfo &LaneTargetInfo = NodeInfo.find(LaneTarget)->second;
1960+
LLVM_DEBUG(dbgs() << LaneTargetInfo << '\n');
18831961

18841962
if (!llvm::any_of(
18851963
LaneTargetInfo.OriginBranch,
18861964
[](const auto &OriginBranch) { return OriginBranch.getInt(); })) {
18871965
// No divergent branches towards this node, nothing to be done.
1966+
LLVM_DEBUG(dbgs() << "No divergent branches towards this node, nothing to be done.\n");
18881967
continue;
18891968
}
18901969

1891-
LLVM_DEBUG(dbgs() << "\nDivergent branches for "
1970+
LLVM_DEBUG(dbgs() << "Divergent branches for "
18921971
<< LaneTarget->printableName() << '\n');
18931972

18941973
// Step 2.1: Add conditions branching to LaneTarget to the Lane mask
18951974
// Updater.
18961975
// FIXME: we are creating a register here only to initialize the updater
18971976
Updater.init(LMU.createLaneMaskReg());
18981977
Updater.addReset(*LaneTarget->Block, GCNLaneMaskUpdater::ResetInMiddle);
1978+
LLVM_DEBUG(dbgs() << "\nMark ResetInMiddle(X): " << LaneTarget->printableName() << '\n');
18991979
for (const auto &NodeDivergentPair : LaneTargetInfo.OriginBranch) {
1980+
LLVM_DEBUG(dbgs() << "Mark ResetAtEnd(Ri): " << NodeDivergentPair.getPointer()->printableName() << '\n');
19001981
Updater.addReset(*NodeDivergentPair.getPointer()->Block,
19011982
GCNLaneMaskUpdater::ResetAtEnd);
19021983
}
1903-
1984+
LLVM_DEBUG(dbgs() << "Iterating over Ti\n\n");
19041985
for (const LaneOriginInfo &LaneOrigin : LaneTargetInfo.origins) {
19051986
Register CondReg;
19061987

1988+
LLVM_DEBUG(dbgs() << "\nOrigin(Ti): " << LaneOrigin << '\n');
1989+
if(LaneOrigin.CondReg){
1990+
dbgs() << "LaneOrigin.CondReg:" << printReg(LaneOrigin.CondReg, MRI.getTargetRegisterInfo(), 0, &MRI) << "\n";
1991+
}
1992+
19071993
if (!LaneOrigin.CondReg) {
19081994
assert(!LaneOrigin.InvertCondition);
19091995
CondReg = getAllOnes();
@@ -1927,29 +2013,30 @@ void ControlFlowRewriter::rewrite() {
19272013
LaneOrigin.Node->Block->getFirstTerminator(), {},
19282014
TII.get(LMC.CSelectOpc), CondReg)
19292015
.addReg(LMC.ExecReg)
1930-
.addImm(0);
2016+
.addImm(0)->dump();
19312017
} else {
19322018
BuildMI(*LaneOrigin.Node->Block,
19332019
LaneOrigin.Node->Block->getFirstTerminator(), {},
19342020
TII.get(LMC.CSelectOpc), CondReg)
19352021
.addImm(0)
1936-
.addReg(LMC.ExecReg);
2022+
.addReg(LMC.ExecReg)->dump();
19372023
}
19382024
} else {
19392025
CondReg = LaneOrigin.CondReg;
1940-
if (!LMA.isSubsetOfExec(LaneOrigin.CondReg, *LaneOrigin.Node->Block)) {
2026+
bool isCondRegSubsetOfExec = LMA.isSubsetOfExec(LaneOrigin.CondReg, *LaneOrigin.Node->Block);
2027+
LLVM_DEBUG(dbgs() << "isSubsetOfExec(" << printReg(LaneOrigin.CondReg, MRI.getTargetRegisterInfo(), 0, &MRI) << "," << LaneOrigin.Node->Block->name() << ") : " << isCondRegSubsetOfExec << "\n");
2028+
if (!isCondRegSubsetOfExec) {
19412029
Register Prev = CondReg;
19422030
CondReg = LMU.createLaneMaskReg();
19432031
BuildMI(*LaneOrigin.Node->Block,
19442032
LaneOrigin.Node->Block->getFirstTerminator(), {},
19452033
TII.get(LMC.AndOpc), CondReg)
19462034
.addReg(LMC.ExecReg)
1947-
.addReg(Prev);
2035+
.addReg(Prev)->dump();
19482036

19492037
RegMap[std::make_pair(LaneOrigin.Node->Block, LaneOrigin.CondReg)]
19502038
.first = CondReg;
19512039
}
1952-
19532040
if (LaneOrigin.InvertCondition) {
19542041
// CondReg = EXEC ^ origCond;
19552042
//
@@ -1965,7 +2052,7 @@ void ControlFlowRewriter::rewrite() {
19652052
LaneOrigin.Node->Block->getFirstTerminator(), {},
19662053
TII.get(LMC.XorOpc), CondReg)
19672054
.addReg(LaneOrigin.CondReg)
1968-
.addImm(-1);
2055+
.addImm(-1)->dump();
19692056

19702057
RegMap[std::make_pair(LaneOrigin.Node->Block, LaneOrigin.CondReg)]
19712058
.second = CondReg;
@@ -1975,20 +2062,23 @@ void ControlFlowRewriter::rewrite() {
19752062
}
19762063

19772064
LLVM_DEBUG(
1978-
dbgs() << " available @ " << LaneOrigin.Node->printableName() << ": "
2065+
dbgs() << " Contributions @ " << LaneOrigin.Node->printableName() << ": "
19792066
<< printReg(CondReg, MRI.getTargetRegisterInfo(), 0, &MRI)
19802067
<< '\n');
19812068

19822069
Updater.addAvailable(*LaneOrigin.Node->Block, CondReg);
19832070
}
19842071

2072+
LLVM_DEBUG(dbgs() << "Iterating over Ri\n\n");
19852073
// Step 2.2: Synthesize EXEC updates and branch instructions.
19862074
for (const auto &NodeDivergentPair : LaneTargetInfo.OriginBranch) {
19872075
if (!NodeDivergentPair.getInt())
19882076
continue; // not a divergent branch
1989-
2077+
LLVM_DEBUG(dbgs() << "Synthesize EXEC updates and branch instructions for " << NodeDivergentPair.getPointer()->printableName() << "\n");
19902078
WaveNode *OriginNode = NodeDivergentPair.getPointer();
19912079
CFGNodeInfo &OriginCFGNodeInfo = NodeInfo.find(OriginNode)->second;
2080+
LLVM_DEBUG(dbgs() << OriginCFGNodeInfo << '\n');
2081+
19922082
OriginCFGNodeInfo.PrimarySuccessorExec =
19932083
Updater.getValueAfterMerge(*OriginNode->Block);
19942084

@@ -2001,37 +2091,46 @@ void ControlFlowRewriter::rewrite() {
20012091

20022092
BuildMI(*OriginNode->Block, OriginNode->Block->end(), {},
20032093
TII.get(LMC.MovTermOpc), LMC.ExecReg)
2004-
.addReg(OriginCFGNodeInfo.PrimarySuccessorExec);
2094+
.addReg(OriginCFGNodeInfo.PrimarySuccessorExec)->dump();
20052095
BuildMI(*OriginNode->Block, OriginNode->Block->end(), {},
2006-
TII.get(AMDGPU::SI_WAVE_CF_EDGE));
2096+
TII.get(AMDGPU::SI_WAVE_CF_EDGE))->dump();
20072097
BuildMI(*OriginNode->Block, OriginNode->Block->end(), {},
20082098
TII.get(AMDGPU::S_CBRANCH_EXECZ))
2009-
.addMBB(OriginNode->Successors[1]->Block);
2099+
.addMBB(OriginNode->Successors[1]->Block)->dump();
20102100
BuildMI(*OriginNode->Block, OriginNode->Block->end(), {},
20112101
TII.get(AMDGPU::S_BRANCH))
2012-
.addMBB(OriginNode->Successors[0]->Block);
2102+
.addMBB(OriginNode->Successors[0]->Block)->dump();
2103+
2104+
LLVM_DEBUG(dbgs() << "\nNodeDivergentPair:" << NodeDivergentPair.getPointer()->printableName() << "," << NodeDivergentPair.getInt() << " complete...\n");
20132105
}
20142106

2107+
LLVM_DEBUG(dbgs() << "CFG_BEGIN:" << Function.getName().str() << "_" << LaneTarget->getName() << "\n");
20152108
LLVM_DEBUG(Function.dump());
2109+
LLVM_DEBUG(dbgs() << "CFG_END:" << Function.getName().str() << "_" << LaneTarget->getName() << "\n");
2110+
20162111
}
2112+
LLVM_DEBUG(dbgs() << "\nInsert rejoin masks\n");
20172113

20182114
// Step 3: Insert rejoin masks.
2115+
LLVM_DEBUG(dbgs() << "Iterate over secondary nodes\n");
20192116
for (WaveNode *Secondary : ReconvergeCfg.nodes()) {
20202117
if (!Secondary->IsSecondary)
20212118
continue;
20222119

20232120
LLVM_DEBUG(dbgs() << "\nRejoin @ " << Secondary->printableName() << '\n');
2024-
2121+
Secondary->dump();
20252122
// FIXME: we are creating a register here only to initialize the updater
20262123
Updater.init(LMU.createLaneMaskReg());
20272124
Updater.addReset(*Secondary->Block, GCNLaneMaskUpdater::ResetInMiddle);
2125+
LLVM_DEBUG(dbgs() << "\nMark ResetInMiddle(X): " << Secondary->printableName() << '\n');
20282126

20292127
for (WaveNode *Pred : Secondary->Predecessors) {
20302128
if (!Pred->IsDivergent || Pred->Successors.size() == 1)
20312129
continue;
20322130

20332131
CFGNodeInfo &PredInfo = NodeInfo.find(Pred)->second;
2034-
Register PrimaryExec = PredInfo.PrimarySuccessorExec;
2132+
Register PrimaryExec = PredInfo.PrimarySuccessorExec;
2133+
LLVM_DEBUG(dbgs() << "Pred:" << Pred->Block->name() << "\nPrimaryExec:" << printReg(PrimaryExec,MRI.getTargetRegisterInfo(), 0, &MRI) << "\n");
20352134

20362135
MachineInstr *PrimaryExecDef;
20372136
for (;;) {
@@ -2041,6 +2140,10 @@ void ControlFlowRewriter::rewrite() {
20412140
PrimaryExec = PrimaryExecDef->getOperand(1).getReg();
20422141
}
20432142

2143+
LLVM_DEBUG(dbgs() << "PrimaryExecDef:");
2144+
LLVM_DEBUG(PrimaryExecDef->dump());
2145+
LLVM_DEBUG(dbgs() << "\n");
2146+
20442147
// Rejoin = EXEC ^ PrimaryExec
20452148
//
20462149
// Fold immediately if PrimaryExec was obtained via XOR as well.
@@ -2072,11 +2175,11 @@ void ControlFlowRewriter::rewrite() {
20722175
BuildMI(*Pred->Block, Pred->Block->getFirstTerminator(), {},
20732176
TII.get(LMC.XorOpc), Rejoin)
20742177
.addReg(LMC.ExecReg)
2075-
.addReg(PrimaryExec);
2178+
.addReg(PrimaryExec)->dump();
20762179
}
20772180

20782181
LLVM_DEBUG(
2079-
dbgs() << " available @ " << Pred->printableName() << ": "
2182+
dbgs() << " Rejoin available @ " << Pred->printableName() << ": "
20802183
<< printReg(Rejoin, MRI.getTargetRegisterInfo(), 0, &MRI)
20812184
<< '\n');
20822185

@@ -2087,12 +2190,20 @@ void ControlFlowRewriter::rewrite() {
20872190
BuildMI(*Secondary->Block, Secondary->Block->getFirstNonPHI(), {},
20882191
TII.get(LMC.OrOpc), LMC.ExecReg)
20892192
.addReg(LMC.ExecReg)
2090-
.addReg(Rejoin);
2193+
.addReg(Rejoin)->dump();
20912194

2195+
LLVM_DEBUG(dbgs() << "CFG_BEGIN:" << Function.getName().str() << "_" << Secondary->Block->name() << ".rejoin\n");
20922196
LLVM_DEBUG(Function.dump());
2197+
LLVM_DEBUG(dbgs() << "CFG_END:" << Function.getName().str() << "_" << Secondary->Block->name() << ".rejoin\n");
2198+
2199+
20932200
}
20942201

20952202
Updater.cleanup();
2203+
2204+
LLVM_DEBUG(dbgs() << "CFG_BEGIN:" << Function.getName().str() << "_clean\n");
2205+
LLVM_DEBUG(Function.dump());
2206+
LLVM_DEBUG(dbgs() << "CFG_END:" << Function.getName().str() << "_clean\n");
20962207
}
20972208

20982209
namespace {

0 commit comments

Comments
 (0)