@@ -153,6 +153,42 @@ struct WaveNode {
153153 Out << " <flow-" << FlowNum << ' >' ;
154154 });
155155 }
156+
157+ std::string getName (){
158+ std::string str;
159+ if (Block)
160+ str = Block->name ();
161+ if (Block && FlowNum)
162+ str = str + " ." ;
163+ if (FlowNum)
164+ str = str + " <flow-" + std::to_string (FlowNum) + " >" ;
165+ return str;
166+ }
167+ void dump () const {
168+ dbgs () << " --------------------------------" << ' \n ' ;
169+ dbgs () << " WaveNode: " << printableName () << ' \n ' ;
170+ dbgs () << " OrderIndex: " << OrderIndex << ' \n ' ;
171+ dbgs () << " Predecessors: " ;
172+ for (WaveNode *Pred : Predecessors)
173+ dbgs () << Pred->printableName () << ' ' ;
174+ dbgs () << ' \n ' ;
175+ dbgs () << " Successors: " ;
176+ for (WaveNode *Succ : Successors)
177+ dbgs () << Succ->printableName () << ' ' ;
178+ dbgs () << ' \n ' ;
179+ dbgs () << " LanePredecessors: " ;
180+ for (const LaneEdge &LanePred : LanePredecessors)
181+ dbgs () << " (lane=" << LanePred.Lane ->printableName ()
182+ << " , wave=" << LanePred.Wave ->printableName () << " ) " ;
183+ dbgs () << ' \n ' ;
184+ dbgs () << " LaneSuccessors: " ;
185+ for (const LaneEdge &LaneSucc : LaneSuccessors)
186+ dbgs () << " (lane=" << LaneSucc.Lane ->printableName ()
187+ << " , wave=" << LaneSucc.Wave ->printableName () << " ) " ;
188+ if (LatestPostDom != nullptr ) dbgs () << " \n latestPostDom:" << LatestPostDom->printableName ();
189+ else dbgs () << " latestPostDom:NULL" ;
190+ dbgs () << " \n --------------------------------\n\n " ;
191+ }
156192};
157193
158194// / \brief Helper class for making a CFG reconverging.
@@ -1545,6 +1581,16 @@ class ControlFlowRewriter {
15451581 explicit LaneOriginInfo (WaveNode *Node, Register CondReg = {},
15461582 bool InvertCondition = false )
15471583 : Node(Node), CondReg(CondReg), InvertCondition(InvertCondition) {}
1584+
1585+ friend llvm::raw_ostream& operator <<(llvm::raw_ostream& os, const LaneOriginInfo& loi) {
1586+ if (loi.Node == nullptr )
1587+ os << " {WaveNode=nullptr" ;
1588+ else
1589+ os << " {WaveNode=" << loi.Node ->printableName ();
1590+ os << " , CondReg=" << loi.CondReg .id () << " , InvertCond:" << loi.InvertCondition << " }" ;
1591+ return os;
1592+ }
1593+
15481594 };
15491595
15501596 struct CFGNodeInfo {
@@ -1574,6 +1620,29 @@ class ControlFlowRewriter {
15741620 Register PrimarySuccessorExec;
15751621
15761622 explicit CFGNodeInfo (WaveNode *Node) : Node(Node) {}
1623+
1624+ friend llvm::raw_ostream& operator <<(llvm::raw_ostream& os, const CFGNodeInfo& ni) {
1625+ os << " CFGNodeInfo{\n OrigExit=" << ni.OrigExit << " , \n OrigCondition=" << ni.OrigCondition .id ();
1626+ if (ni.OrigSuccCond == nullptr )
1627+ os << " , \n OrigSuccCond=nullptr" ;
1628+ else
1629+ os << " , \n OrigSuccCond=" << ni.OrigSuccCond ->printableName ();
1630+ if (ni.OrigSuccFinal == nullptr )
1631+ os << " , \n OrigSuccFinal=nullptr" ;
1632+ else
1633+ os << " , \n OrigSuccFinal=" << ni.OrigSuccFinal ->printableName ();
1634+ os << " , \n PrimarySuccessorExec=" << ni.PrimarySuccessorExec .id ();
1635+ os << " , \n OriginBranch(Ri)={" ;
1636+ for (const auto &E : ni.OriginBranch ) {
1637+ os << " (" << E.getPointer ()->printableName () << " ," << E.getInt () << " )," ;
1638+ }
1639+ os << " }, \n origins(Ti):{" ;
1640+ for (const auto &E : ni.origins ) {
1641+ os << E << " ," ;
1642+ }
1643+ return os << " }\n }\n " ;
1644+ }
1645+
15771646 };
15781647
15791648 // / Information required to synthesize divergent terminators with a common
@@ -1784,6 +1853,7 @@ void ControlFlowRewriter::prepareWaveCfg() {
17841853// / establishing wave-level control flow and insert instructions for EXEC mask
17851854// / manipulation.
17861855void ControlFlowRewriter::rewrite () {
1856+ LLVM_DEBUG (dbgs () << " \n rewrite() begins\n " );
17871857 GCNLaneMaskAnalysis LMA (Function);
17881858 const AMDGPU::LaneMaskConstants &LMC = LMU.getLaneMaskConsts ();
17891859
@@ -1867,7 +1937,9 @@ void ControlFlowRewriter::rewrite() {
18671937 .addMBB (Other->Block );
18681938 }
18691939 }
1870-
1940+ LLVM_DEBUG (dbgs () << " CFG_BEGIN:" << Function.getName ().str () << " _pre\n " );
1941+ LLVM_DEBUG (Function.dump ());
1942+ LLVM_DEBUG (dbgs () << " CFG_END:" << Function.getName ().str () << " _pre\n " );
18711943 // Step 2: Insert lane masks and new terminators for divergent nodes.
18721944 //
18731945 // RegMap maps (block, register) -> (masked, inverted).
@@ -1879,31 +1951,42 @@ void ControlFlowRewriter::rewrite() {
18791951 Updater.setAccumulating (true );
18801952
18811953 for (WaveNode *LaneTarget : NodeOrder) {
1954+ LLVM_DEBUG (dbgs () << " \n PROCESSING NODE:" << LaneTarget->printableName () << " \n\n " );
1955+ LaneTarget->dump ();
18821956 CFGNodeInfo &LaneTargetInfo = NodeInfo.find (LaneTarget)->second ;
1957+ LLVM_DEBUG (dbgs () << LaneTargetInfo << ' \n ' );
18831958
18841959 if (!llvm::any_of (
18851960 LaneTargetInfo.OriginBranch ,
18861961 [](const auto &OriginBranch) { return OriginBranch.getInt (); })) {
18871962 // No divergent branches towards this node, nothing to be done.
1963+ LLVM_DEBUG (dbgs () << " No divergent branches towards this node, nothing to be done.\n " );
18881964 continue ;
18891965 }
18901966
1891- LLVM_DEBUG (dbgs () << " \n Divergent branches for "
1967+ LLVM_DEBUG (dbgs () << " Divergent branches for "
18921968 << LaneTarget->printableName () << ' \n ' );
18931969
18941970 // Step 2.1: Add conditions branching to LaneTarget to the Lane mask
18951971 // Updater.
18961972 // FIXME: we are creating a register here only to initialize the updater
18971973 Updater.init (LMU.createLaneMaskReg ());
18981974 Updater.addReset (*LaneTarget->Block , GCNLaneMaskUpdater::ResetInMiddle);
1975+ LLVM_DEBUG (dbgs () << " \n Mark ResetInMiddle(X): " << LaneTarget->printableName () << ' \n ' );
18991976 for (const auto &NodeDivergentPair : LaneTargetInfo.OriginBranch ) {
1977+ LLVM_DEBUG (dbgs () << " Mark ResetAtEnd(Ri): " << NodeDivergentPair.getPointer ()->printableName () << ' \n ' );
19001978 Updater.addReset (*NodeDivergentPair.getPointer ()->Block ,
19011979 GCNLaneMaskUpdater::ResetAtEnd);
19021980 }
1903-
1981+ LLVM_DEBUG ( dbgs () << " Iterating over Ti \n\n " );
19041982 for (const LaneOriginInfo &LaneOrigin : LaneTargetInfo.origins ) {
19051983 Register CondReg;
19061984
1985+ LLVM_DEBUG (dbgs () << " \n Origin(Ti): " << LaneOrigin << ' \n ' );
1986+ if (LaneOrigin.CondReg ){
1987+ dbgs () << " LaneOrigin.CondReg:" << printReg (LaneOrigin.CondReg , MRI.getTargetRegisterInfo (), 0 , &MRI) << " \n " ;
1988+ }
1989+
19071990 if (!LaneOrigin.CondReg ) {
19081991 assert (!LaneOrigin.InvertCondition );
19091992 CondReg = getAllOnes ();
@@ -1927,13 +2010,13 @@ void ControlFlowRewriter::rewrite() {
19272010 LaneOrigin.Node ->Block ->getFirstTerminator (), {},
19282011 TII.get (LMC.CSelectOpc ), CondReg)
19292012 .addReg (LMC.ExecReg )
1930- .addImm (0 );
2013+ .addImm (0 )-> dump () ;
19312014 } else {
19322015 BuildMI (*LaneOrigin.Node ->Block ,
19332016 LaneOrigin.Node ->Block ->getFirstTerminator (), {},
19342017 TII.get (LMC.CSelectOpc ), CondReg)
19352018 .addImm (0 )
1936- .addReg (LMC.ExecReg );
2019+ .addReg (LMC.ExecReg )-> dump () ;
19372020 }
19382021 } else {
19392022 CondReg = LaneOrigin.CondReg ;
@@ -1944,7 +2027,7 @@ void ControlFlowRewriter::rewrite() {
19442027 LaneOrigin.Node ->Block ->getFirstTerminator (), {},
19452028 TII.get (LMC.AndOpc ), CondReg)
19462029 .addReg (LMC.ExecReg )
1947- .addReg (Prev);
2030+ .addReg (Prev)-> dump () ;
19482031
19492032 RegMap[std::make_pair (LaneOrigin.Node ->Block , LaneOrigin.CondReg )]
19502033 .first = CondReg;
@@ -1965,7 +2048,7 @@ void ControlFlowRewriter::rewrite() {
19652048 LaneOrigin.Node ->Block ->getFirstTerminator (), {},
19662049 TII.get (LMC.XorOpc ), CondReg)
19672050 .addReg (LaneOrigin.CondReg )
1968- .addImm (-1 );
2051+ .addImm (-1 )-> dump () ;
19692052
19702053 RegMap[std::make_pair (LaneOrigin.Node ->Block , LaneOrigin.CondReg )]
19712054 .second = CondReg;
@@ -1975,20 +2058,23 @@ void ControlFlowRewriter::rewrite() {
19752058 }
19762059
19772060 LLVM_DEBUG (
1978- dbgs () << " available @ " << LaneOrigin.Node ->printableName () << " : "
2061+ dbgs () << " Contributions @ " << LaneOrigin.Node ->printableName () << " : "
19792062 << printReg (CondReg, MRI.getTargetRegisterInfo (), 0 , &MRI)
19802063 << ' \n ' );
19812064
19822065 Updater.addAvailable (*LaneOrigin.Node ->Block , CondReg);
19832066 }
19842067
2068+ LLVM_DEBUG (dbgs () << " Iterating over Ri\n\n " );
19852069 // Step 2.2: Synthesize EXEC updates and branch instructions.
19862070 for (const auto &NodeDivergentPair : LaneTargetInfo.OriginBranch ) {
19872071 if (!NodeDivergentPair.getInt ())
19882072 continue ; // not a divergent branch
1989-
2073+ LLVM_DEBUG ( dbgs () << " Synthesize EXEC updates and branch instructions for " << NodeDivergentPair. getPointer ()-> printableName () << " \n " );
19902074 WaveNode *OriginNode = NodeDivergentPair.getPointer ();
19912075 CFGNodeInfo &OriginCFGNodeInfo = NodeInfo.find (OriginNode)->second ;
2076+ LLVM_DEBUG (dbgs () << OriginCFGNodeInfo << ' \n ' );
2077+
19922078 OriginCFGNodeInfo.PrimarySuccessorExec =
19932079 Updater.getValueAfterMerge (*OriginNode->Block );
19942080
@@ -2001,37 +2087,46 @@ void ControlFlowRewriter::rewrite() {
20012087
20022088 BuildMI (*OriginNode->Block , OriginNode->Block ->end (), {},
20032089 TII.get (LMC.MovTermOpc ), LMC.ExecReg )
2004- .addReg (OriginCFGNodeInfo.PrimarySuccessorExec );
2090+ .addReg (OriginCFGNodeInfo.PrimarySuccessorExec )-> dump () ;
20052091 BuildMI (*OriginNode->Block , OriginNode->Block ->end (), {},
2006- TII.get (AMDGPU::SI_WAVE_CF_EDGE));
2092+ TII.get (AMDGPU::SI_WAVE_CF_EDGE))-> dump () ;
20072093 BuildMI (*OriginNode->Block , OriginNode->Block ->end (), {},
20082094 TII.get (AMDGPU::S_CBRANCH_EXECZ))
2009- .addMBB (OriginNode->Successors [1 ]->Block );
2095+ .addMBB (OriginNode->Successors [1 ]->Block )-> dump () ;
20102096 BuildMI (*OriginNode->Block , OriginNode->Block ->end (), {},
20112097 TII.get (AMDGPU::S_BRANCH))
2012- .addMBB (OriginNode->Successors [0 ]->Block );
2098+ .addMBB (OriginNode->Successors [0 ]->Block )->dump ();
2099+
2100+ LLVM_DEBUG (dbgs () << " \n NodeDivergentPair:" << NodeDivergentPair.getPointer ()->printableName () << " ," << NodeDivergentPair.getInt () << " complete...\n " );
20132101 }
20142102
2103+ LLVM_DEBUG (dbgs () << " CFG_BEGIN:" << Function.getName ().str () << " _" << LaneTarget->getName () << " \n " );
20152104 LLVM_DEBUG (Function.dump ());
2105+ LLVM_DEBUG (dbgs () << " CFG_END:" << Function.getName ().str () << " _" << LaneTarget->getName () << " \n " );
2106+
20162107 }
2108+ LLVM_DEBUG (dbgs () << " \n Insert rejoin masks\n " );
20172109
20182110 // Step 3: Insert rejoin masks.
2111+ LLVM_DEBUG (dbgs () << " Iterate over secondary nodes\n " );
20192112 for (WaveNode *Secondary : ReconvergeCfg.nodes ()) {
20202113 if (!Secondary->IsSecondary )
20212114 continue ;
20222115
20232116 LLVM_DEBUG (dbgs () << " \n Rejoin @ " << Secondary->printableName () << ' \n ' );
2024-
2117+ Secondary-> dump ();
20252118 // FIXME: we are creating a register here only to initialize the updater
20262119 Updater.init (LMU.createLaneMaskReg ());
20272120 Updater.addReset (*Secondary->Block , GCNLaneMaskUpdater::ResetInMiddle);
2121+ LLVM_DEBUG (dbgs () << " \n Mark ResetInMiddle(X): " << Secondary->printableName () << ' \n ' );
20282122
20292123 for (WaveNode *Pred : Secondary->Predecessors ) {
20302124 if (!Pred->IsDivergent || Pred->Successors .size () == 1 )
20312125 continue ;
20322126
20332127 CFGNodeInfo &PredInfo = NodeInfo.find (Pred)->second ;
2034- Register PrimaryExec = PredInfo.PrimarySuccessorExec ;
2128+ Register PrimaryExec = PredInfo.PrimarySuccessorExec ;
2129+ LLVM_DEBUG (dbgs () << " Pred:" << Pred->Block ->name () << " \n PrimaryExec:" << printReg (PrimaryExec,MRI.getTargetRegisterInfo (), 0 , &MRI) << " \n " );
20352130
20362131 MachineInstr *PrimaryExecDef;
20372132 for (;;) {
@@ -2041,6 +2136,10 @@ void ControlFlowRewriter::rewrite() {
20412136 PrimaryExec = PrimaryExecDef->getOperand (1 ).getReg ();
20422137 }
20432138
2139+ LLVM_DEBUG (dbgs () << " PrimaryExecDef:" );
2140+ LLVM_DEBUG (PrimaryExecDef->dump ());
2141+ LLVM_DEBUG (dbgs () << " \n " );
2142+
20442143 // Rejoin = EXEC ^ PrimaryExec
20452144 //
20462145 // Fold immediately if PrimaryExec was obtained via XOR as well.
@@ -2072,11 +2171,11 @@ void ControlFlowRewriter::rewrite() {
20722171 BuildMI (*Pred->Block , Pred->Block ->getFirstTerminator (), {},
20732172 TII.get (LMC.XorOpc ), Rejoin)
20742173 .addReg (LMC.ExecReg )
2075- .addReg (PrimaryExec);
2174+ .addReg (PrimaryExec)-> dump () ;
20762175 }
20772176
20782177 LLVM_DEBUG (
2079- dbgs () << " available @ " << Pred->printableName () << " : "
2178+ dbgs () << " Rejoin available @ " << Pred->printableName () << " : "
20802179 << printReg (Rejoin, MRI.getTargetRegisterInfo (), 0 , &MRI)
20812180 << ' \n ' );
20822181
@@ -2087,12 +2186,20 @@ void ControlFlowRewriter::rewrite() {
20872186 BuildMI (*Secondary->Block , Secondary->Block ->getFirstNonPHI (), {},
20882187 TII.get (LMC.OrOpc ), LMC.ExecReg )
20892188 .addReg (LMC.ExecReg )
2090- .addReg (Rejoin);
2189+ .addReg (Rejoin)-> dump () ;
20912190
2191+ LLVM_DEBUG (dbgs () << " CFG_BEGIN:" << Function.getName ().str () << " _" << Secondary->Block ->name () << " .rejoin\n " );
20922192 LLVM_DEBUG (Function.dump ());
2193+ LLVM_DEBUG (dbgs () << " CFG_END:" << Function.getName ().str () << " _" << Secondary->Block ->name () << " .rejoin\n " );
2194+
2195+
20932196 }
20942197
20952198 Updater.cleanup ();
2199+
2200+ LLVM_DEBUG (dbgs () << " CFG_BEGIN:" << Function.getName ().str () << " _clean\n " );
2201+ LLVM_DEBUG (Function.dump ());
2202+ LLVM_DEBUG (dbgs () << " CFG_END:" << Function.getName ().str () << " _clean\n " );
20962203}
20972204
20982205namespace {
0 commit comments