Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 67 additions & 79 deletions include/AdePT/core/AsyncAdePTTransport.cuh

Large diffs are not rendered by default.

66 changes: 55 additions & 11 deletions include/AdePT/core/AsyncAdePTTransportStruct.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,44 @@
namespace AsyncAdePT {

// A bundle of pointers to generate particles of an implicit type.
struct ParticleGenerator {
struct SpeciesParticleManager {
Track *fTracks;
Track *fLeakedTracks;
SlotManager *fSlotManager;
SlotManager *fSlotManagerLeaks;
adept::MParray *fActiveQueue;
adept::MParray *fNextActiveQueue;
adept::MParray *fActiveLeaksQueue;

public:
__host__ __device__ ParticleGenerator(Track *tracks, SlotManager *slotManager, SlotManager *slotManagerLeaks,
adept::MParray *activeQueue)
: fTracks(tracks), fSlotManager(slotManager), fSlotManagerLeaks(slotManagerLeaks), fActiveQueue(activeQueue)
__host__ __device__ SpeciesParticleManager(Track *tracks, Track *leakedTracks, SlotManager *slotManager,
SlotManager *slotManagerLeaks, adept::MParray *activeQueue,
adept::MParray *nextActiveQueue, adept::MParray *activeLeaksQueue)
: fTracks(tracks), fLeakedTracks(leakedTracks), fSlotManager(slotManager), fSlotManagerLeaks(slotManagerLeaks),
fActiveQueue(activeQueue), fNextActiveQueue(nextActiveQueue), fActiveLeaksQueue(activeLeaksQueue)
{
}

/// Obtain track and leaked track at given slot position
__device__ __forceinline__ Track &TrackAt(SlotManager::value_type slot) { return fTracks[slot]; }
__device__ __forceinline__ Track &LeakTrackAt(SlotManager::value_type slot) { return fLeakedTracks[slot]; }

/// Obtain a slot for a track, but don't enqueue.
__device__ auto NextSlot() { return fSlotManager->NextSlot(); }

__device__ auto NextLeakSlot() { return fSlotManagerLeaks->NextSlot(); }

// enqueue into next-active queue
__device__ __forceinline__ bool EnqueueNext(SlotManager::value_type slot)
{
return fNextActiveQueue->push_back(slot);
}

// size of the active queue
__device__ __forceinline__ int ActiveSize() const { return fActiveQueue->size(); }

// read slot from active queue by index
__device__ __forceinline__ SlotManager::value_type ActiveAt(int i) const { return (*fActiveQueue)[i]; }

/// Construct a track at the given location, forwarding all arguments to the constructor.
template <typename... Ts>
__device__ Track &InitTrack(SlotManager::value_type slot, Ts &&...args)
Expand All @@ -56,12 +76,36 @@ public:
__device__ Track &NextTrack(Ts &&...args)
{
const auto slot = NextSlot();
fActiveQueue->push_back(slot);
// next track is only visible in next GPU iteration, therefore pushed in the NextActiveQueue
fNextActiveQueue->push_back(slot);
auto &track = InitTrack(slot, std::forward<Ts>(args)...);
return track;
}

void SetActiveQueue(adept::MParray *queue) { fActiveQueue = queue; }
/// Obtains a leak slot, copies the track from the source slot to the leaks, and marks the slot in the active queue
/// for freeing
__device__ __forceinline__ void CopyTrackToLeaked(SlotManager::value_type srcSlot)
{
// get a leak slot
const auto leakSlot = NextLeakSlot();

// Create and construct track from other track
new (fLeakedTracks + leakSlot) Track{TrackAt(srcSlot)};

// enqueue into leak queue
const bool success = fActiveLeaksQueue->push_back(leakSlot);
if (!success) {
printf("ERROR: No space left in leaks queue.\n"
"\tThe threshold for flushing the leak buffer may be too high\n"
"\tThe space allocated to the leak buffer may be too small\n");
asm("trap;");
}

// free the source slot
fSlotManager->MarkSlotForFreeing(srcSlot);

return;
}
};

struct LeakedTracks {
Expand All @@ -86,10 +130,10 @@ struct LeakedTracks {
// };

// A bundle of generators for the three particle types.
struct Secondaries {
ParticleGenerator electrons;
ParticleGenerator positrons;
ParticleGenerator gammas;
struct ParticleManager {
SpeciesParticleManager electrons;
SpeciesParticleManager positrons;
SpeciesParticleManager gammas;
};

// Holds the leaked track structs for all three particle types
Expand Down
3 changes: 3 additions & 0 deletions include/AdePT/core/Track.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ struct Track {

LeakStatus leakStatus{LeakStatus::NoLeak};

__host__ __device__ Track(const Track &) = default;
__host__ __device__ Track &operator=(const Track &) = default;

/// Construct a new track for GPU transport.
/// NB: The navState remains uninitialised.
__device__ Track(uint64_t rngSeed, double eKin, double globalTime, float localTime, float properTime, float weight,
Expand Down
72 changes: 24 additions & 48 deletions include/AdePT/kernels/electrons.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,7 @@ namespace AsyncAdePT {
// applying the continuous effects and maybe a discrete process that could
// generate secondaries.
template <bool IsElectron, typename Scoring, class SteppingActionT>
static __device__ __forceinline__ void TransportElectrons(Track *electrons, Track *leaks, const adept::MParray *active,
Secondaries &secondaries, adept::MParray *nextActiveQueue,
adept::MParray *leakedQueue, Scoring *userScoring,
static __device__ __forceinline__ void TransportElectrons(ParticleManager &particleManager, Scoring *userScoring,
Stats *InFlightStats, const StepActionParam params,
AllowFinishOffEventArray allowFinishOffEvent,
const bool returnAllSteps, const bool returnLastStep)
Expand All @@ -73,12 +71,13 @@ static __device__ __forceinline__ void TransportElectrons(Track *electrons, Trac

auto &magneticField = *gMagneticField;

int activeSize = active->size();
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) {
const int slot = (*active)[i];
SlotManager &slotManager = IsElectron ? *secondaries.electrons.fSlotManager : *secondaries.positrons.fSlotManager;
auto &electronsOrPositrons = (IsElectron ? particleManager.electrons : particleManager.positrons);
SlotManager &slotManager = *electronsOrPositrons.fSlotManager;

Track &currentTrack = electrons[slot];
const int activeSize = electronsOrPositrons.ActiveSize();
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < activeSize; i += blockDim.x * gridDim.x) {
const auto slot = electronsOrPositrons.ActiveAt(i);
Track &currentTrack = electronsOrPositrons.TrackAt(slot);
#else

template <bool IsElectron, typename Scoring, class SteppingActionT>
Expand Down Expand Up @@ -164,29 +163,11 @@ static __device__ __forceinline__ void TransportElectrons(adept::TrackManager<Tr
currentTrack.navState = nextState;
currentTrack.leakStatus = leakReason;
#ifdef ASYNC_MODE
// NOTE: When adapting the split kernels for async mode this won't
// work if we want to re-use slots on the fly. Directly copying to
// a trackdata struct would be better
if (leakReason != LeakStatus::NoLeak) {
// Get a slot in the leaks array
int leakSlot;
if (IsElectron)
leakSlot = secondaries.electrons.NextLeakSlot();
else
leakSlot = secondaries.positrons.NextLeakSlot();
// Copy the track to the leaks array and store the index in the leak queue
leaks[leakSlot] = electrons[slot];
auto success = leakedQueue->push_back(leakSlot);
if (!success) {
printf("ERROR: No space left in e-/+ leaks queue.\n\
\tThe threshold for flushing the leak buffer may be too high\n\
\tThe space allocated to the leak buffer may be too small\n");
asm("trap;");
}
// Free the slot in the tracks slot manager
slotManager.MarkSlotForFreeing(slot);
// Copy track at slot to the leaked tracks
electronsOrPositrons.CopyTrackToLeaked(slot);
} else {
nextActiveQueue->push_back(slot);
electronsOrPositrons.EnqueueNext(slot);
}
#else
currentTrack.CopyTo(trackdata, Pdg);
Expand Down Expand Up @@ -589,7 +570,7 @@ static __device__ __forceinline__ void TransportElectrons(adept::TrackManager<Tr

} else {
#ifdef ASYNC_MODE
Track &secondary = secondaries.electrons.NextTrack(
Track &secondary = particleManager.electrons.NextTrack(
newRNG, deltaEkin, pos, vecgeom::Vector3D<double>{dirSecondary[0], dirSecondary[1], dirSecondary[2]},
navState, currentTrack, globalTime);
#else
Expand Down Expand Up @@ -671,7 +652,7 @@ static __device__ __forceinline__ void TransportElectrons(adept::TrackManager<Tr
#endif
} else {
#ifdef ASYNC_MODE
Track &gamma = secondaries.gammas.NextTrack(
Track &gamma = particleManager.gammas.NextTrack(
newRNG, deltaEkin, pos, vecgeom::Vector3D<double>{dirSecondary[0], dirSecondary[1], dirSecondary[2]},
navState, currentTrack, globalTime);
#else
Expand Down Expand Up @@ -750,7 +731,7 @@ static __device__ __forceinline__ void TransportElectrons(adept::TrackManager<Tr

} else {
#ifdef ASYNC_MODE
Track &gamma1 = secondaries.gammas.NextTrack(
Track &gamma1 = particleManager.gammas.NextTrack(
newRNG, theGamma1Ekin, pos,
vecgeom::Vector3D<double>{theGamma1Dir[0], theGamma1Dir[1], theGamma1Dir[2]}, navState, currentTrack,
globalTime);
Expand Down Expand Up @@ -794,7 +775,7 @@ static __device__ __forceinline__ void TransportElectrons(adept::TrackManager<Tr

} else {
#ifdef ASYNC_MODE
Track &gamma2 = secondaries.gammas.NextTrack(
Track &gamma2 = particleManager.gammas.NextTrack(
currentTrack.rngState, theGamma2Ekin, pos,
vecgeom::Vector3D<double>{theGamma2Dir[0], theGamma2Dir[1], theGamma2Dir[2]}, navState, currentTrack,
globalTime);
Expand Down Expand Up @@ -872,13 +853,14 @@ static __device__ __forceinline__ void TransportElectrons(adept::TrackManager<Tr
RanluxppDouble newRNG2(currentTrack.rngState.Branch());

#ifdef ASYNC_MODE
Track &gamma1 = secondaries.gammas.NextTrack(newRNG2, double{copcore::units::kElectronMassC2}, pos,
vecgeom::Vector3D<double>{sint * cosPhi, sint * sinPhi, cost},
navState, currentTrack, globalTime);
Track &gamma1 = particleManager.gammas.NextTrack(
newRNG2, double{copcore::units::kElectronMassC2}, pos,
vecgeom::Vector3D<double>{sint * cosPhi, sint * sinPhi, cost}, navState, currentTrack, globalTime);

// Reuse the RNG state of the dying track.
Track &gamma2 = secondaries.gammas.NextTrack(currentTrack.rngState, double{copcore::units::kElectronMassC2},
pos, -gamma1.dir, navState, currentTrack, globalTime);
Track &gamma2 =
particleManager.gammas.NextTrack(currentTrack.rngState, double{copcore::units::kElectronMassC2}, pos,
-gamma1.dir, navState, currentTrack, globalTime);
#else
Track &gamma1 = secondaries.gammas->NextTrack();
Track &gamma2 = secondaries.gammas->NextTrack();
Expand Down Expand Up @@ -1024,26 +1006,20 @@ static __device__ __forceinline__ void TransportElectrons(adept::TrackManager<Tr
// Instantiate kernels for electrons and positrons.
#ifdef ASYNC_MODE
template <typename Scoring, class SteppingActionT>
__global__ void TransportElectrons(Track *electrons, Track *leaks, const adept::MParray *active,
Secondaries secondaries, adept::MParray *nextActiveQueue,
adept::MParray *leakedQueue, Scoring *userScoring, Stats *InFlightStats,
__global__ void TransportElectrons(ParticleManager particleManager, Scoring *userScoring, Stats *InFlightStats,
const StepActionParam params, AllowFinishOffEventArray allowFinishOffEvent,
const bool returnAllSteps, const bool returnLastStep)
{
TransportElectrons</*IsElectron*/ true, Scoring, SteppingActionT>(
electrons, leaks, active, secondaries, nextActiveQueue, leakedQueue, userScoring, InFlightStats, params,
allowFinishOffEvent, returnAllSteps, returnLastStep);
particleManager, userScoring, InFlightStats, params, allowFinishOffEvent, returnAllSteps, returnLastStep);
}
template <typename Scoring, class SteppingActionT>
__global__ void TransportPositrons(Track *positrons, Track *leaks, const adept::MParray *active,
Secondaries secondaries, adept::MParray *nextActiveQueue,
adept::MParray *leakedQueue, Scoring *userScoring, Stats *InFlightStats,
__global__ void TransportPositrons(ParticleManager particleManager, Scoring *userScoring, Stats *InFlightStats,
const StepActionParam params, AllowFinishOffEventArray allowFinishOffEvent,
const bool returnAllSteps, const bool returnLastStep)
{
TransportElectrons</*IsElectron*/ false, Scoring, SteppingActionT>(
positrons, leaks, active, secondaries, nextActiveQueue, leakedQueue, userScoring, InFlightStats, params,
allowFinishOffEvent, returnAllSteps, returnLastStep);
particleManager, userScoring, InFlightStats, params, allowFinishOffEvent, returnAllSteps, returnLastStep);
}
#else
template <typename Scoring, class SteppingActionT>
Expand Down
Loading