@@ -153,6 +153,42 @@ struct WaveNode {
153153 Out << " <flow-" << FlowNum << ' >' ;
154154 });
155155 }
156+
157+ std::string getName (){
158+ std::string str;
159+ if (Block)
160+ str = Block->name ();
161+ if (Block && FlowNum)
162+ str = str + " ." ;
163+ if (FlowNum)
164+ str = str + " <flow-" + std::to_string (FlowNum) + " >" ;
165+ return str;
166+ }
167+ void dump () const {
168+ dbgs () << " --------------------------------" << ' \n ' ;
169+ dbgs () << " WaveNode: " << printableName () << ' \n ' ;
170+ dbgs () << " OrderIndex: " << OrderIndex << ' \n ' ;
171+ dbgs () << " Predecessors: " ;
172+ for (WaveNode *Pred : Predecessors)
173+ dbgs () << Pred->printableName () << ' ' ;
174+ dbgs () << ' \n ' ;
175+ dbgs () << " Successors: " ;
176+ for (WaveNode *Succ : Successors)
177+ dbgs () << Succ->printableName () << ' ' ;
178+ dbgs () << ' \n ' ;
179+ dbgs () << " LanePredecessors: " ;
180+ for (const LaneEdge &LanePred : LanePredecessors)
181+ dbgs () << " (lane=" << LanePred.Lane ->printableName ()
182+ << " , wave=" << LanePred.Wave ->printableName () << " ) " ;
183+ dbgs () << ' \n ' ;
184+ dbgs () << " LaneSuccessors: " ;
185+ for (const LaneEdge &LaneSucc : LaneSuccessors)
186+ dbgs () << " (lane=" << LaneSucc.Lane ->printableName ()
187+ << " , wave=" << LaneSucc.Wave ->printableName () << " ) " ;
188+ if (LatestPostDom != nullptr ) dbgs () << " \n latestPostDom:" << LatestPostDom->printableName ();
189+ else dbgs () << " latestPostDom:NULL" ;
190+ dbgs () << " \n --------------------------------\n\n " ;
191+ }
156192};
157193
158194// / \brief Helper class for making a CFG reconverging.
@@ -1545,6 +1581,16 @@ class ControlFlowRewriter {
15451581 explicit LaneOriginInfo (WaveNode *Node, Register CondReg = {},
15461582 bool InvertCondition = false )
15471583 : Node(Node), CondReg(CondReg), InvertCondition(InvertCondition) {}
1584+
1585+ friend llvm::raw_ostream& operator <<(llvm::raw_ostream& os, const LaneOriginInfo& loi) {
1586+ if (loi.Node == nullptr )
1587+ os << " {WaveNode=nullptr" ;
1588+ else
1589+ os << " {WaveNode=" << loi.Node ->printableName ();
1590+ os << " , CondReg=" << loi.CondReg .id () << " , InvertCond:" << loi.InvertCondition << " }" ;
1591+ return os;
1592+ }
1593+
15481594 };
15491595
15501596 struct CFGNodeInfo {
@@ -1574,6 +1620,29 @@ class ControlFlowRewriter {
15741620 Register PrimarySuccessorExec;
15751621
15761622 explicit CFGNodeInfo (WaveNode *Node) : Node(Node) {}
1623+
1624+ friend llvm::raw_ostream& operator <<(llvm::raw_ostream& os, const CFGNodeInfo& ni) {
1625+ os << " CFGNodeInfo{\n OrigExit=" << ni.OrigExit << " , \n OrigCondition=" << ni.OrigCondition .id ();
1626+ if (ni.OrigSuccCond == nullptr )
1627+ os << " , \n OrigSuccCond=nullptr" ;
1628+ else
1629+ os << " , \n OrigSuccCond=" << ni.OrigSuccCond ->printableName ();
1630+ if (ni.OrigSuccFinal == nullptr )
1631+ os << " , \n OrigSuccFinal=nullptr" ;
1632+ else
1633+ os << " , \n OrigSuccFinal=" << ni.OrigSuccFinal ->printableName ();
1634+ os << " , \n PrimarySuccessorExec=" << ni.PrimarySuccessorExec .id ();
1635+ os << " , \n OriginBranch(Ri)={" ;
1636+ for (const auto &E : ni.OriginBranch ) {
1637+ os << " (" << E.getPointer ()->printableName () << " ," << E.getInt () << " )," ;
1638+ }
1639+ os << " }, \n origins(Ti):{" ;
1640+ for (const auto &E : ni.origins ) {
1641+ os << E << " ," ;
1642+ }
1643+ return os << " }\n }\n " ;
1644+ }
1645+
15771646 };
15781647
15791648 // / Information required to synthesize divergent terminators with a common
@@ -1784,6 +1853,7 @@ void ControlFlowRewriter::prepareWaveCfg() {
17841853// / establishing wave-level control flow and insert instructions for EXEC mask
17851854// / manipulation.
17861855void ControlFlowRewriter::rewrite () {
1856+ LLVM_DEBUG (dbgs () << " \n rewrite() begins\n " );
17871857 GCNLaneMaskAnalysis LMA (Function);
17881858 const AMDGPU::LaneMaskConstants &LMC = LMU.getLaneMaskConsts ();
17891859
@@ -1839,7 +1909,10 @@ void ControlFlowRewriter::rewrite() {
18391909 Opcode = AMDGPU::S_CBRANCH_SCC1;
18401910 } else {
18411911 Register CondReg = Info.OrigCondition ;
1842- if (!LMA.isSubsetOfExec (CondReg, *Node->Block )) {
1912+ bool isCondRegSubsetOfExec = LMA.isSubsetOfExec (CondReg, *Node->Block );
1913+ LLVM_DEBUG (dbgs () << " isSubsetOfExec(" << printReg (CondReg, MRI.getTargetRegisterInfo (), 0 , &MRI) << " ," << Node->Block ->name () << " ) : " << isCondRegSubsetOfExec << " \n " );
1914+
1915+ if (!isCondRegSubsetOfExec) {
18431916 CondReg = LMU.createLaneMaskReg ();
18441917 BuildMI (*Node->Block , Node->Block ->end (), {}, TII.get (LMC.AndOpc ),
18451918 CondReg)
@@ -1867,7 +1940,9 @@ void ControlFlowRewriter::rewrite() {
18671940 .addMBB (Other->Block );
18681941 }
18691942 }
1870-
1943+ LLVM_DEBUG (dbgs () << " CFG_BEGIN:" << Function.getName ().str () << " _pre\n " );
1944+ LLVM_DEBUG (Function.dump ());
1945+ LLVM_DEBUG (dbgs () << " CFG_END:" << Function.getName ().str () << " _pre\n " );
18711946 // Step 2: Insert lane masks and new terminators for divergent nodes.
18721947 //
18731948 // RegMap maps (block, register) -> (masked, inverted).
@@ -1879,31 +1954,42 @@ void ControlFlowRewriter::rewrite() {
18791954 Updater.setAccumulating (true );
18801955
18811956 for (WaveNode *LaneTarget : NodeOrder) {
1957+ LLVM_DEBUG (dbgs () << " \n PROCESSING NODE:" << LaneTarget->printableName () << " \n\n " );
1958+ LaneTarget->dump ();
18821959 CFGNodeInfo &LaneTargetInfo = NodeInfo.find (LaneTarget)->second ;
1960+ LLVM_DEBUG (dbgs () << LaneTargetInfo << ' \n ' );
18831961
18841962 if (!llvm::any_of (
18851963 LaneTargetInfo.OriginBranch ,
18861964 [](const auto &OriginBranch) { return OriginBranch.getInt (); })) {
18871965 // No divergent branches towards this node, nothing to be done.
1966+ LLVM_DEBUG (dbgs () << " No divergent branches towards this node, nothing to be done.\n " );
18881967 continue ;
18891968 }
18901969
1891- LLVM_DEBUG (dbgs () << " \n Divergent branches for "
1970+ LLVM_DEBUG (dbgs () << " Divergent branches for "
18921971 << LaneTarget->printableName () << ' \n ' );
18931972
18941973 // Step 2.1: Add conditions branching to LaneTarget to the Lane mask
18951974 // Updater.
18961975 // FIXME: we are creating a register here only to initialize the updater
18971976 Updater.init (LMU.createLaneMaskReg ());
18981977 Updater.addReset (*LaneTarget->Block , GCNLaneMaskUpdater::ResetInMiddle);
1978+ LLVM_DEBUG (dbgs () << " \n Mark ResetInMiddle(X): " << LaneTarget->printableName () << ' \n ' );
18991979 for (const auto &NodeDivergentPair : LaneTargetInfo.OriginBranch ) {
1980+ LLVM_DEBUG (dbgs () << " Mark ResetAtEnd(Ri): " << NodeDivergentPair.getPointer ()->printableName () << ' \n ' );
19001981 Updater.addReset (*NodeDivergentPair.getPointer ()->Block ,
19011982 GCNLaneMaskUpdater::ResetAtEnd);
19021983 }
1903-
1984+ LLVM_DEBUG ( dbgs () << " Iterating over Ti \n\n " );
19041985 for (const LaneOriginInfo &LaneOrigin : LaneTargetInfo.origins ) {
19051986 Register CondReg;
19061987
1988+ LLVM_DEBUG (dbgs () << " \n Origin(Ti): " << LaneOrigin << ' \n ' );
1989+ if (LaneOrigin.CondReg ){
1990+ dbgs () << " LaneOrigin.CondReg:" << printReg (LaneOrigin.CondReg , MRI.getTargetRegisterInfo (), 0 , &MRI) << " \n " ;
1991+ }
1992+
19071993 if (!LaneOrigin.CondReg ) {
19081994 assert (!LaneOrigin.InvertCondition );
19091995 CondReg = getAllOnes ();
@@ -1927,29 +2013,30 @@ void ControlFlowRewriter::rewrite() {
19272013 LaneOrigin.Node ->Block ->getFirstTerminator (), {},
19282014 TII.get (LMC.CSelectOpc ), CondReg)
19292015 .addReg (LMC.ExecReg )
1930- .addImm (0 );
2016+ .addImm (0 )-> dump () ;
19312017 } else {
19322018 BuildMI (*LaneOrigin.Node ->Block ,
19332019 LaneOrigin.Node ->Block ->getFirstTerminator (), {},
19342020 TII.get (LMC.CSelectOpc ), CondReg)
19352021 .addImm (0 )
1936- .addReg (LMC.ExecReg );
2022+ .addReg (LMC.ExecReg )-> dump () ;
19372023 }
19382024 } else {
19392025 CondReg = LaneOrigin.CondReg ;
1940- if (!LMA.isSubsetOfExec (LaneOrigin.CondReg , *LaneOrigin.Node ->Block )) {
2026+ bool isCondRegSubsetOfExec = LMA.isSubsetOfExec (LaneOrigin.CondReg , *LaneOrigin.Node ->Block );
2027+ LLVM_DEBUG (dbgs () << " isSubsetOfExec(" << printReg (LaneOrigin.CondReg , MRI.getTargetRegisterInfo (), 0 , &MRI) << " ," << LaneOrigin.Node ->Block ->name () << " ) : " << isCondRegSubsetOfExec << " \n " );
2028+ if (!isCondRegSubsetOfExec) {
19412029 Register Prev = CondReg;
19422030 CondReg = LMU.createLaneMaskReg ();
19432031 BuildMI (*LaneOrigin.Node ->Block ,
19442032 LaneOrigin.Node ->Block ->getFirstTerminator (), {},
19452033 TII.get (LMC.AndOpc ), CondReg)
19462034 .addReg (LMC.ExecReg )
1947- .addReg (Prev);
2035+ .addReg (Prev)-> dump () ;
19482036
19492037 RegMap[std::make_pair (LaneOrigin.Node ->Block , LaneOrigin.CondReg )]
19502038 .first = CondReg;
19512039 }
1952-
19532040 if (LaneOrigin.InvertCondition ) {
19542041 // CondReg = EXEC ^ origCond;
19552042 //
@@ -1965,7 +2052,7 @@ void ControlFlowRewriter::rewrite() {
19652052 LaneOrigin.Node ->Block ->getFirstTerminator (), {},
19662053 TII.get (LMC.XorOpc ), CondReg)
19672054 .addReg (LaneOrigin.CondReg )
1968- .addImm (-1 );
2055+ .addImm (-1 )-> dump () ;
19692056
19702057 RegMap[std::make_pair (LaneOrigin.Node ->Block , LaneOrigin.CondReg )]
19712058 .second = CondReg;
@@ -1975,20 +2062,23 @@ void ControlFlowRewriter::rewrite() {
19752062 }
19762063
19772064 LLVM_DEBUG (
1978- dbgs () << " available @ " << LaneOrigin.Node ->printableName () << " : "
2065+ dbgs () << " Contributions @ " << LaneOrigin.Node ->printableName () << " : "
19792066 << printReg (CondReg, MRI.getTargetRegisterInfo (), 0 , &MRI)
19802067 << ' \n ' );
19812068
19822069 Updater.addAvailable (*LaneOrigin.Node ->Block , CondReg);
19832070 }
19842071
2072+ LLVM_DEBUG (dbgs () << " Iterating over Ri\n\n " );
19852073 // Step 2.2: Synthesize EXEC updates and branch instructions.
19862074 for (const auto &NodeDivergentPair : LaneTargetInfo.OriginBranch ) {
19872075 if (!NodeDivergentPair.getInt ())
19882076 continue ; // not a divergent branch
1989-
2077+ LLVM_DEBUG ( dbgs () << " Synthesize EXEC updates and branch instructions for " << NodeDivergentPair. getPointer ()-> printableName () << " \n " );
19902078 WaveNode *OriginNode = NodeDivergentPair.getPointer ();
19912079 CFGNodeInfo &OriginCFGNodeInfo = NodeInfo.find (OriginNode)->second ;
2080+ LLVM_DEBUG (dbgs () << OriginCFGNodeInfo << ' \n ' );
2081+
19922082 OriginCFGNodeInfo.PrimarySuccessorExec =
19932083 Updater.getValueAfterMerge (*OriginNode->Block );
19942084
@@ -2001,37 +2091,46 @@ void ControlFlowRewriter::rewrite() {
20012091
20022092 BuildMI (*OriginNode->Block , OriginNode->Block ->end (), {},
20032093 TII.get (LMC.MovTermOpc ), LMC.ExecReg )
2004- .addReg (OriginCFGNodeInfo.PrimarySuccessorExec );
2094+ .addReg (OriginCFGNodeInfo.PrimarySuccessorExec )-> dump () ;
20052095 BuildMI (*OriginNode->Block , OriginNode->Block ->end (), {},
2006- TII.get (AMDGPU::SI_WAVE_CF_EDGE));
2096+ TII.get (AMDGPU::SI_WAVE_CF_EDGE))-> dump () ;
20072097 BuildMI (*OriginNode->Block , OriginNode->Block ->end (), {},
20082098 TII.get (AMDGPU::S_CBRANCH_EXECZ))
2009- .addMBB (OriginNode->Successors [1 ]->Block );
2099+ .addMBB (OriginNode->Successors [1 ]->Block )-> dump () ;
20102100 BuildMI (*OriginNode->Block , OriginNode->Block ->end (), {},
20112101 TII.get (AMDGPU::S_BRANCH))
2012- .addMBB (OriginNode->Successors [0 ]->Block );
2102+ .addMBB (OriginNode->Successors [0 ]->Block )->dump ();
2103+
2104+ LLVM_DEBUG (dbgs () << " \n NodeDivergentPair:" << NodeDivergentPair.getPointer ()->printableName () << " ," << NodeDivergentPair.getInt () << " complete...\n " );
20132105 }
20142106
2107+ LLVM_DEBUG (dbgs () << " CFG_BEGIN:" << Function.getName ().str () << " _" << LaneTarget->getName () << " \n " );
20152108 LLVM_DEBUG (Function.dump ());
2109+ LLVM_DEBUG (dbgs () << " CFG_END:" << Function.getName ().str () << " _" << LaneTarget->getName () << " \n " );
2110+
20162111 }
2112+ LLVM_DEBUG (dbgs () << " \n Insert rejoin masks\n " );
20172113
20182114 // Step 3: Insert rejoin masks.
2115+ LLVM_DEBUG (dbgs () << " Iterate over secondary nodes\n " );
20192116 for (WaveNode *Secondary : ReconvergeCfg.nodes ()) {
20202117 if (!Secondary->IsSecondary )
20212118 continue ;
20222119
20232120 LLVM_DEBUG (dbgs () << " \n Rejoin @ " << Secondary->printableName () << ' \n ' );
2024-
2121+ Secondary-> dump ();
20252122 // FIXME: we are creating a register here only to initialize the updater
20262123 Updater.init (LMU.createLaneMaskReg ());
20272124 Updater.addReset (*Secondary->Block , GCNLaneMaskUpdater::ResetInMiddle);
2125+ LLVM_DEBUG (dbgs () << " \n Mark ResetInMiddle(X): " << Secondary->printableName () << ' \n ' );
20282126
20292127 for (WaveNode *Pred : Secondary->Predecessors ) {
20302128 if (!Pred->IsDivergent || Pred->Successors .size () == 1 )
20312129 continue ;
20322130
20332131 CFGNodeInfo &PredInfo = NodeInfo.find (Pred)->second ;
2034- Register PrimaryExec = PredInfo.PrimarySuccessorExec ;
2132+ Register PrimaryExec = PredInfo.PrimarySuccessorExec ;
2133+ LLVM_DEBUG (dbgs () << " Pred:" << Pred->Block ->name () << " \n PrimaryExec:" << printReg (PrimaryExec,MRI.getTargetRegisterInfo (), 0 , &MRI) << " \n " );
20352134
20362135 MachineInstr *PrimaryExecDef;
20372136 for (;;) {
@@ -2041,6 +2140,10 @@ void ControlFlowRewriter::rewrite() {
20412140 PrimaryExec = PrimaryExecDef->getOperand (1 ).getReg ();
20422141 }
20432142
2143+ LLVM_DEBUG (dbgs () << " PrimaryExecDef:" );
2144+ LLVM_DEBUG (PrimaryExecDef->dump ());
2145+ LLVM_DEBUG (dbgs () << " \n " );
2146+
20442147 // Rejoin = EXEC ^ PrimaryExec
20452148 //
20462149 // Fold immediately if PrimaryExec was obtained via XOR as well.
@@ -2072,11 +2175,11 @@ void ControlFlowRewriter::rewrite() {
20722175 BuildMI (*Pred->Block , Pred->Block ->getFirstTerminator (), {},
20732176 TII.get (LMC.XorOpc ), Rejoin)
20742177 .addReg (LMC.ExecReg )
2075- .addReg (PrimaryExec);
2178+ .addReg (PrimaryExec)-> dump () ;
20762179 }
20772180
20782181 LLVM_DEBUG (
2079- dbgs () << " available @ " << Pred->printableName () << " : "
2182+ dbgs () << " Rejoin available @ " << Pred->printableName () << " : "
20802183 << printReg (Rejoin, MRI.getTargetRegisterInfo (), 0 , &MRI)
20812184 << ' \n ' );
20822185
@@ -2087,12 +2190,20 @@ void ControlFlowRewriter::rewrite() {
20872190 BuildMI (*Secondary->Block , Secondary->Block ->getFirstNonPHI (), {},
20882191 TII.get (LMC.OrOpc ), LMC.ExecReg )
20892192 .addReg (LMC.ExecReg )
2090- .addReg (Rejoin);
2193+ .addReg (Rejoin)-> dump () ;
20912194
2195+ LLVM_DEBUG (dbgs () << " CFG_BEGIN:" << Function.getName ().str () << " _" << Secondary->Block ->name () << " .rejoin\n " );
20922196 LLVM_DEBUG (Function.dump ());
2197+ LLVM_DEBUG (dbgs () << " CFG_END:" << Function.getName ().str () << " _" << Secondary->Block ->name () << " .rejoin\n " );
2198+
2199+
20932200 }
20942201
20952202 Updater.cleanup ();
2203+
2204+ LLVM_DEBUG (dbgs () << " CFG_BEGIN:" << Function.getName ().str () << " _clean\n " );
2205+ LLVM_DEBUG (Function.dump ());
2206+ LLVM_DEBUG (dbgs () << " CFG_END:" << Function.getName ().str () << " _clean\n " );
20962207}
20972208
20982209namespace {
0 commit comments