@@ -35,7 +35,7 @@ bool InlineRaytracing::LowerAllocations(Function &F) {
35
35
/*
36
36
* struct RQObjectType
37
37
* {
38
- * void* addrspace(2) globalBufferPtr ;
38
+ * uint32_t slot ;
39
39
* uint32_t rayQueryPackedData;
40
40
* }
41
41
*/
@@ -51,21 +51,21 @@ bool InlineRaytracing::LowerAllocations(Function &F) {
51
51
if (AllocateRQInstructions.empty ())
52
52
return false ;
53
53
54
+ auto *rtstackTy = IRB.getRTStack2PtrTy (false );
54
55
auto *globalBufferPtrTy =
55
56
IRB.getRayDispatchGlobalDataPtrTy (*F.getParent (), ADDRESS_SPACE_CONSTANT);
56
57
57
58
if (!m_RQObjectType)
58
- m_RQObjectType = StructType::create (
59
- *m_pCGCtx->getLLVMContext (),
60
- {globalBufferPtrTy , IRB.getInt32Ty ()}, name);
59
+ m_RQObjectType =
60
+ StructType::create ( *m_pCGCtx->getLLVMContext (),
61
+ {IRB. getInt32Ty () , IRB.getInt32Ty ()}, name);
61
62
62
- auto *getGlobalBufferPtrFnTy =
63
- FunctionType::get (globalBufferPtrTy, IRB.getInt32Ty (), false );
64
- auto *getGlobalBufferPtrFn = m_Functions[GET_GLOBAL_BUFFER_PTR ] =
65
- Function::Create (getGlobalBufferPtrFnTy , GlobalValue::PrivateLinkage,
66
- VALUE_NAME (" getGlobalBufferPtrFn " ), F.getParent ());
63
+ auto *createRQObjectFnTy = FunctionType::get (m_RQObjectType-> getPointerTo (),
64
+ IRB.getInt32Ty (), false );
65
+ auto *createRQObjectFn = m_Functions[CREATE_RQ_OBJECT ] =
66
+ Function::Create (createRQObjectFnTy , GlobalValue::PrivateLinkage,
67
+ VALUE_NAME (" createRQObject " ), F.getParent ());
67
68
68
- auto *rtstackTy = IRB.getRTStack2PtrTy (false );
69
69
auto *getStackPointerFromGlobalBufferPtrFnTy =
70
70
FunctionType::get (rtstackTy, globalBufferPtrTy, false );
71
71
auto *getStackPointerFromGlobalBufferPtrFn =
@@ -84,7 +84,6 @@ bool InlineRaytracing::LowerAllocations(Function &F) {
84
84
85
85
getStackPointerFromGlobalBufferPtrFn->addParamAttr (
86
86
0 , llvm::Attribute::NoCapture);
87
- getRQHandleFromRQObjectFn->addParamAttr (0 , llvm::Attribute::NoCapture);
88
87
89
88
// allocate rayquery instructions return i32 handle
90
89
// we want all rayqueries to be represent via our struct
@@ -98,42 +97,35 @@ bool InlineRaytracing::LowerAllocations(Function &F) {
98
97
cast<CallInst>(I)->addFnAttr (llvm::Attribute::ReadOnly);
99
98
100
99
IRB.SetInsertPoint (F.getEntryBlock ().getFirstNonPHI ());
101
- auto *aI = IRB. CreateAlloca (m_RQObjectType) ;
100
+ Value *rqObject = nullptr ;
102
101
103
102
IRB.SetInsertPoint (I);
104
103
if (m_pCGCtx->syncRTCallsNeedSplitting ()) {
105
- // create 2 globals and select the appropriate one depending on the lane
106
- // id
107
- auto *globalBufferPtr1 = IRB.CreateCall (
108
- getGlobalBufferPtrFn, UndefValue::get (IRB.getInt32Ty ()));
109
- auto *globalBufferPtr2 = IRB.CreateCall (
110
- getGlobalBufferPtrFn, UndefValue::get (IRB.getInt32Ty ()));
104
+ // create 2 rq objects and select one based on the lane id
105
+
106
+ auto *rqObject1 =
107
+ IRB.CreateCall (createRQObjectFn, UndefValue::get (IRB.getInt32Ty ()));
108
+ auto *rqObject2 =
109
+ IRB.CreateCall (createRQObjectFn, UndefValue::get (IRB.getInt32Ty ()));
110
+
111
111
auto *laneId = IRB.get32BitLaneID ();
112
112
auto *cond =
113
113
IRB.CreateICmpULT (laneId, IRB.getInt32 (numLanes (SIMDMode::SIMD16)));
114
- auto *globalBufferPtr =
115
- IRB.CreateSelect (cond, globalBufferPtr1, globalBufferPtr2,
116
- VALUE_NAME (" GlobalBufferPtr" ));
117
- IRB.CreateStore (globalBufferPtr, IRB.CreateInBoundsGEP (
118
- m_RQObjectType, aI,
119
- {IRB.getInt32 (0 ), IRB.getInt32 (0 )}));
114
+ rqObject =
115
+ IRB.CreateSelect (cond, rqObject1, rqObject2, VALUE_NAME (" rqObject" ));
120
116
} else {
121
- auto *globalBufferPtr = IRB.CreateCall (getGlobalBufferPtrFn,
122
- UndefValue::get (IRB.getInt32Ty ()),
123
- VALUE_NAME (" GlobalBufferPtr" ));
124
- IRB.CreateStore (globalBufferPtr, IRB.CreateInBoundsGEP (
125
- m_RQObjectType, aI,
126
- {IRB.getInt32 (0 ), IRB.getInt32 (0 )}));
117
+ rqObject =
118
+ IRB.CreateCall (createRQObjectFn, UndefValue::get (IRB.getInt32Ty ()));
127
119
}
128
120
129
121
// iniitalize it to done so when app calls proceed without tracerayinline,
130
122
// we dont traverse over garbage
131
- setPackedData (IRB, aI ,
123
+ setPackedData (IRB, rqObject ,
132
124
{IRB.getInt32 (TRACE_RAY_DONE), IRB.getInt32 (0 ),
133
125
IRB.getInt32 (0 ), IRB.getInt32 (0 ),
134
126
IRB.getInt32 (CommittedHit)});
135
127
136
- v2vMap[I] = aI ;
128
+ v2vMap[I] = rqObject ;
137
129
138
130
SmallVector<Use *> worklist;
139
131
@@ -632,7 +624,7 @@ InlineRaytracing::LivenessDataMap
632
624
InlineRaytracing::AnalyzeLiveness (Function &F, DominatorTree &DT,
633
625
LoopInfo &LI) {
634
626
LivenessDataMap data;
635
- for (auto *I : m_Functions[GET_GLOBAL_BUFFER_PTR ]->users ()) {
627
+ for (auto *I : m_Functions[CREATE_RQ_OBJECT ]->users ()) {
636
628
data.insert (
637
629
std::make_pair (cast<Instruction>(I),
638
630
ProcessInstruction (cast<Instruction>(I), DT, LI)));
@@ -641,8 +633,8 @@ InlineRaytracing::AnalyzeLiveness(Function &F, DominatorTree &DT,
641
633
return data;
642
634
}
643
635
644
- void InlineRaytracing::AssignGlobalBuffers (
645
- Function &F, const LivenessDataMap &livenessDataMap) {
636
+ void InlineRaytracing::AssignSlots (Function &F,
637
+ const LivenessDataMap &livenessDataMap) {
646
638
RTBuilder IRB (&*F.getEntryBlock ().begin (), *m_pCGCtx);
647
639
SmallVector<SmallVector<const LivenessData *>, 2 > occupancyMap;
648
640
@@ -707,7 +699,7 @@ void InlineRaytracing::InsertCacheControl(
707
699
}
708
700
709
701
void InlineRaytracing::StopAndStartRayquery (RTBuilder &IRB, Instruction *I,
710
- Instruction *globalBufferPtr,
702
+ Value *globalBufferPtr,
711
703
bool doSpillFill,
712
704
bool doRQCheckRelease) {
713
705
IRB.SetInsertPoint (I);
@@ -718,7 +710,7 @@ void InlineRaytracing::StopAndStartRayquery(RTBuilder &IRB, Instruction *I,
718
710
auto *stackPtr = static_cast <RTBuilder::SyncStackPointerVal *>(
719
711
cast<Value>(IRB.CreateCall (
720
712
m_Functions[GET_STACK_POINTER_FROM_GLOBAL_BUFFER_POINTER],
721
- IRB. Insert ( globalBufferPtr-> clone ()) )));
713
+ globalBufferPtr)));
722
714
723
715
liveStack = IRB.CreateLoad (IRB.getRTStack2Ty (), stackPtr);
724
716
@@ -741,7 +733,7 @@ void InlineRaytracing::StopAndStartRayquery(RTBuilder &IRB, Instruction *I,
741
733
auto *stackPtr = static_cast <RTBuilder::SyncStackPointerVal *>(
742
734
cast<Value>(IRB.CreateCall (
743
735
m_Functions[GET_STACK_POINTER_FROM_GLOBAL_BUFFER_POINTER],
744
- IRB. Insert ( globalBufferPtr-> clone ()) )));
736
+ globalBufferPtr)));
745
737
746
738
IRB.CreateStore (liveStack, stackPtr);
747
739
}
@@ -781,7 +773,7 @@ void InlineRaytracing::HandleOptimizationsAndSpills(
781
773
for (auto &entry : livenessDataMap) {
782
774
bool cfgChanged = false ;
783
775
784
- auto *globalBufferPtr = entry.first ;
776
+ auto *rqObject = entry.first ;
785
777
auto *LD = &entry.second ;
786
778
787
779
// process the allocation acquire point
@@ -796,10 +788,12 @@ void InlineRaytracing::HandleOptimizationsAndSpills(
796
788
for (auto *I : LD->lifetimeEndInstructions ) {
797
789
IRB.SetInsertPoint (I->getNextNode ());
798
790
791
+ auto *globalBufferPtr =
792
+ getGlobalBufferPtr (IRB, IRB.Insert (rqObject->clone ()));
799
793
auto *stackPtr = static_cast <RTBuilder::SyncStackPointerVal *>(
800
794
cast<Value>(IRB.CreateCall (
801
795
m_Functions[GET_STACK_POINTER_FROM_GLOBAL_BUFFER_POINTER],
802
- IRB. Insert ( globalBufferPtr-> clone ()) )));
796
+ globalBufferPtr)));
803
797
804
798
// handle cache control
805
799
InsertCacheControl (IRB, stackPtr);
@@ -824,10 +818,13 @@ void InlineRaytracing::HandleOptimizationsAndSpills(
824
818
825
819
IRB.SetInsertPoint (succ->getFirstNonPHI ());
826
820
821
+ auto *globalBufferPtr =
822
+ getGlobalBufferPtr (IRB, IRB.Insert (rqObject->clone ()));
823
+
827
824
auto *stackPtr = static_cast <RTBuilder::SyncStackPointerVal *>(
828
825
cast<Value>(IRB.CreateCall (
829
826
m_Functions[GET_STACK_POINTER_FROM_GLOBAL_BUFFER_POINTER],
830
- IRB. Insert ( globalBufferPtr-> clone ()) )));
827
+ globalBufferPtr)));
831
828
832
829
// handle cache control
833
830
InsertCacheControl (IRB, stackPtr);
@@ -844,6 +841,9 @@ void InlineRaytracing::HandleOptimizationsAndSpills(
844
841
if (!LD->ContainsInstruction (*I))
845
842
continue ;
846
843
844
+ auto *globalBufferPtr =
845
+ getGlobalBufferPtr (IRB, IRB.Insert (rqObject->clone ()));
846
+
847
847
StopAndStartRayquery (IRB, I, globalBufferPtr, true , doRQCheckRelease);
848
848
}
849
849
@@ -852,6 +852,9 @@ void InlineRaytracing::HandleOptimizationsAndSpills(
852
852
if (!LD->ContainsInstruction (*I))
853
853
continue ;
854
854
855
+ auto *globalBufferPtr =
856
+ getGlobalBufferPtr (IRB, IRB.Insert (rqObject->clone ()));
857
+
855
858
StopAndStartRayquery (IRB, I, globalBufferPtr, true , doRQCheckRelease);
856
859
}
857
860
@@ -860,6 +863,9 @@ void InlineRaytracing::HandleOptimizationsAndSpills(
860
863
if (!LD->ContainsInstruction (*I))
861
864
continue ;
862
865
866
+ auto *globalBufferPtr =
867
+ getGlobalBufferPtr (IRB, IRB.Insert (rqObject->clone ()));
868
+
863
869
StopAndStartRayquery (IRB, I, globalBufferPtr, true , doRQCheckRelease);
864
870
}
865
871
@@ -868,11 +874,14 @@ void InlineRaytracing::HandleOptimizationsAndSpills(
868
874
if (!LD->ContainsInstruction (*I))
869
875
continue ;
870
876
877
+ auto *globalBufferPtr =
878
+ getGlobalBufferPtr (IRB, IRB.Insert (rqObject->clone ()));
879
+
871
880
StopAndStartRayquery (IRB, I, globalBufferPtr, false , doRQCheckRelease);
872
881
}
873
882
874
883
if (cfgChanged) {
875
- auto nextentry = livenessDataMap.find (globalBufferPtr );
884
+ auto nextentry = livenessDataMap.find (rqObject );
876
885
877
886
// TODO: can we incrementally update LoopInfo and DomTree?
878
887
DT.recalculate (F);
@@ -887,43 +896,23 @@ void InlineRaytracing::HandleOptimizationsAndSpills(
887
896
}
888
897
}
889
898
890
- void InlineRaytracing::LowerGlobalBufferPtrs (Function &F) {
899
+ void InlineRaytracing::LowerSlotAssignments (Function &F) {
891
900
RTBuilder IRB (&*F.getEntryBlock ().begin (), *m_pCGCtx);
892
- SmallVector<Instruction *> getGBPInstructions ;
901
+ SmallVector<Instruction *> createRQInstructions ;
893
902
894
- for (auto &U : m_Functions[GET_GLOBAL_BUFFER_PTR]->uses ())
895
- getGBPInstructions.push_back (cast<Instruction>(U.getUser ()));
896
-
897
- for (auto &I : getGBPInstructions) {
898
- uint32_t slot = static_cast <uint32_t >(
899
- cast<ConstantInt>(I->getOperand (0 ))->getZExtValue ());
903
+ for (auto &U : m_Functions[CREATE_RQ_OBJECT]->uses ())
904
+ createRQInstructions.push_back (cast<Instruction>(U.getUser ()));
900
905
906
+ for (auto *I : createRQInstructions) {
901
907
IRB.SetInsertPoint (I);
902
- auto *pFunc = GenISAIntrinsic::getDeclaration (
903
- F.getParent (), GenISAIntrinsic::GenISA_RuntimeValue,
904
- IRB.getRayDispatchGlobalDataPtrTy (*m_pCGCtx->getModule (),
905
- ADDRESS_SPACE_CONSTANT));
906
-
907
- auto *mainGlobalBufferPtr = IRB.CreateCall (
908
- pFunc,
909
- IRB.getInt32 (
910
- m_pCGCtx->getModuleMetaData ()->pushInfo .inlineRTGlobalPtrOffset ));
911
-
912
- uint32_t offset =
913
- slot * IGC::Align (sizeof (RayDispatchGlobalData), IGC::RTGlobalsAlign);
914
-
915
- auto *globalBufferPtr = IRB.CreateBitCast (
916
- mainGlobalBufferPtr, IRB.getInt8PtrTy (ADDRESS_SPACE_CONSTANT));
917
- globalBufferPtr = IRB.CreateInBoundsGEP (IRB.getInt8Ty (), globalBufferPtr,
918
- IRB.getInt32 (offset));
919
- globalBufferPtr = IRB.CreateBitCast (
920
- globalBufferPtr, mainGlobalBufferPtr->getType (),
921
- VALUE_NAME (" globalBuffer[" + std::to_string (slot) + " ]" ));
922
-
923
- I->replaceAllUsesWith (globalBufferPtr);
908
+ auto *rqObject = IRB.CreateAlloca (m_RQObjectType);
909
+ auto *slotPtr = getAtIndexFromRayQueryObject (IRB, rqObject, 0 );
910
+ IRB.CreateStore (I->getOperand (0 ), slotPtr);
911
+
912
+ I->replaceAllUsesWith (rqObject);
924
913
}
925
914
926
- llvm::for_each (getGBPInstructions ,
915
+ llvm::for_each (createRQInstructions ,
927
916
[](Instruction *I) { I->eraseFromParent (); });
928
917
}
929
918
@@ -975,9 +964,9 @@ bool InlineRaytracing::runOnFunction(Function &F) {
975
964
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo ();
976
965
977
966
auto livenessData = AnalyzeLiveness (F, DT, LI);
978
- AssignGlobalBuffers (F, livenessData);
967
+ AssignSlots (F, livenessData);
979
968
HandleOptimizationsAndSpills (F, livenessData, DT, LI);
980
- LowerGlobalBufferPtrs (F);
969
+ LowerSlotAssignments (F);
981
970
LowerStackPtrs (F);
982
971
983
972
// set relevant metadata
0 commit comments