Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/libPMacc/examples/gameOfLife2D/include/Simulation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,10 @@ class Simulation
// in units of SuperCells to be used by the kernel to identify itself.
evo.init(
MappingDesc(layout.getDataSpace(), 1, 1));

buff1.reset(new PMacc::GridBuffer<std::uint8_t, DIM2>(layout, false));
buff2.reset(new PMacc::GridBuffer<std::uint8_t, DIM2>(layout, false));

PMacc::DataSpace<DIM2> guardingCells(1, 1);
for (uint32_t i(1); i < PMacc::traits::NumberOfExchanges<DIM2>::value; ++i)
{
Expand All @@ -161,7 +161,7 @@ class Simulation
buff1->addExchange(PMacc::GUARD, PMacc::Mask(i), guardingCells, BUFF1);
buff2->addExchange(PMacc::GUARD, PMacc::Mask(i), guardingCells, BUFF2);
}

// Both next lines are defined in GatherSlice.hpp:
// -gather saves the MessageHeader object
// -Then do an Allgather for the gloabalRanks from GC, sort out
Expand Down Expand Up @@ -236,7 +236,7 @@ class Simulation
evo.run<PMacc::BORDER>(
read.getDeviceBuffer().getDataBox(),
write.getDeviceBuffer().getDataBox());

// Copy from device to host for saving. All threads and not only the master have to do this.
write.deviceToHost();

Expand Down Expand Up @@ -264,7 +264,7 @@ class Simulation
writeFullImage(write, sFileNameWithoutExt);
}
}

//-----------------------------------------------------------------------------
//!
//-----------------------------------------------------------------------------
Expand Down
53 changes: 22 additions & 31 deletions src/libPMacc/include/eventSystem/events/CudaEvent.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,12 @@ class CudaEvent
* no data is allocated @see create()
*/
CudaEvent() :
m_event(),
m_pStream(),
isRecorded(false)
m_event(NULL),
m_pStream(NULL),
isRecorded(false),
isValid(false)
{}

/**
* Copy constructor
*/
CudaEvent(CudaEvent const & other) :
m_event(
new alpaka::event::Event<AlpakaAccStream>(*other.m_event.get())),
m_pStream(other.m_pStream),
isRecorded(other.isRecorded)
{}

/**
* Move constructor
*/
CudaEvent(CudaEvent && other) = default;

/**
* Destructor
Expand All @@ -80,7 +67,8 @@ class CudaEvent
static CudaEvent create(AlpakaAccDev const & dev)
{
CudaEvent ev;
ev.m_event.reset(new alpaka::event::Event<AlpakaAccStream>(dev));
ev.m_event = new alpaka::event::Event<AlpakaAccStream>(dev);
ev.isValid = true;
return ev;
}

Expand All @@ -89,19 +77,21 @@ class CudaEvent
*/
static void destroy(CudaEvent& ev)
{
alpaka::wait::wait(*ev.m_event.get());
ev.m_event.reset();
alpaka::wait::wait(*ev.m_event);
ev.isValid = false;
delete ev.m_event;
ev.m_event = NULL;
}

/**
* get native cuda event
*
* @return native cuda event
*/
alpaka::event::Event<AlpakaAccStream> & operator*() const
alpaka::event::Event<AlpakaAccStream> operator*() const
{
assert(m_event);
return *m_event.get();
assert(isValid);
return *m_event;
}

/**
Expand All @@ -111,16 +101,16 @@ class CudaEvent
*/
bool isFinished() const
{
assert(m_event);
return alpaka::event::test(*m_event.get());
assert(isValid);
return alpaka::event::test(*m_event);
}

/**
* get stream in which this event is recorded
*
* @return native cuda stream
*/
AlpakaAccStream & getCudaStream() const
AlpakaAccStream& getCudaStream() const
{
assert(isRecorded);
assert(m_pStream);
Expand All @@ -132,19 +122,20 @@ class CudaEvent
*
* @param stream native cuda stream
*/
void recordEvent(AlpakaAccStream & stream)
void recordEvent( AlpakaAccStream* stream)
{
/* disallow double recording */
assert(isRecorded==false);
isRecorded = true;
m_pStream = &stream;
alpaka::stream::enqueue(*m_pStream, *m_event.get());
m_pStream = stream;
alpaka::stream::enqueue(*m_pStream, *m_event);
}

private:
std::unique_ptr<alpaka::event::Event<AlpakaAccStream>> m_event;
AlpakaAccStream * m_pStream;
alpaka::event::Event<AlpakaAccStream>* m_event;
AlpakaAccStream* m_pStream;
/* state if event is recorded */
bool isRecorded;
bool isValid;
};
}
12 changes: 6 additions & 6 deletions src/libPMacc/include/eventSystem/streams/EventStream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class EventStream
* Creates the cudaStream_t object.
*/
EventStream(alpaka::dev::Dev<AlpakaAccDev> dev) :
stream(dev)
stream(new AlpakaAccStream(dev))
{}

/**
Expand All @@ -53,28 +53,28 @@ class EventStream
virtual ~EventStream()
{
//wait for all kernels in stream to finish
alpaka::wait::wait(stream);
alpaka::wait::wait(*stream);
}

/**
* Returns the cudaStream_t object associated with this EventStream.
* @return the internal cuda stream object
*/
AlpakaAccStream & getCudaStream()
AlpakaAccStream& getCudaStream()
{
return stream;
return *stream;
}

void waitOn(const CudaEvent& ev)
{
if(getCudaStream() != ev.getCudaStream())
{
alpaka::wait::wait(stream, *ev);
alpaka::wait::wait(*stream, *ev);
}
}

private:
AlpakaAccStream stream;
AlpakaAccStream* stream;
};

}
3 changes: 2 additions & 1 deletion src/libPMacc/include/eventSystem/tasks/StreamTask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@ namespace PMacc

private:
mutable EventStream* stream;
std::unique_ptr<CudaEvent> cudaEvent;
CudaEvent cudaEvent;
bool alwaysFinished;
bool hasCudaEvent;
};

} //namespace PMacc
20 changes: 11 additions & 9 deletions src/libPMacc/include/eventSystem/tasks/StreamTask.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,30 +34,31 @@ namespace PMacc
inline StreamTask::StreamTask( ) :
ITask( ),
stream( NULL ),
cudaEvent( ),
alwaysFinished( false )
alwaysFinished( false ),
hasCudaEvent(false)
{
this->setTaskType( TASK_CUDA );
}

inline CudaEvent StreamTask::getCudaEvent( ) const
{
assert(cudaEvent);
return *cudaEvent.get();
assert(hasCudaEvent);
return cudaEvent;
}

inline void StreamTask::setCudaEvent(const CudaEvent& cudaEvent )
{
this->cudaEvent.reset(new CudaEvent(cudaEvent));
this->hasCudaEvent = true;
this->cudaEvent = cudaEvent;
}

inline bool StreamTask::isFinished( )
{
if ( alwaysFinished )
return true;
if(cudaEvent)
if(hasCudaEvent)
{
if ( cudaEvent->isFinished( ) )
if ( cudaEvent.isFinished( ) )
{
alwaysFinished = true;
return true;
Expand All @@ -83,8 +84,9 @@ inline void StreamTask::setEventStream( EventStream* newStream )

inline void StreamTask::activate( )
{
cudaEvent.reset(new CudaEvent(Environment<>::get().Manager().getEventPool().getNextEvent()));
cudaEvent->recordEvent(this->stream->getCudaStream());
cudaEvent = Environment<>::get().Manager().getEventPool().getNextEvent();
cudaEvent.recordEvent(&(getEventStream()->getCudaStream()));
hasCudaEvent = true;
}

} //namespace PMacc
5 changes: 3 additions & 2 deletions src/libPMacc/include/particles/memory/boxes/ParticlesBox.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,15 @@ class ParticlesBox : protected DataBox<PitchedBox<SuperCell<FRAME>, DIM> >
*
* @return an empty frame
*/
PMACC_NO_NVCC_HDWARNING
DINLINE FRAME &getEmptyFrame() const
{

FrameType* tmp = NULL;
const int maxTries = 13; //magic number is not performance critical
for (int numTries = 0; numTries < maxTries; ++numTries)
{
tmp = (FrameType*) mallocMC::malloc(sizeof (FrameType));
tmp = (FrameType*) ::mallocMC::malloc(sizeof (FrameType));
if (tmp != NULL)
{
/* disable all particles since we can not assume that newly allocated memory contains zeros */
Expand Down Expand Up @@ -119,7 +120,7 @@ class ParticlesBox : protected DataBox<PitchedBox<SuperCell<FRAME>, DIM> >
*/
DINLINE void removeFrame(FRAME &frame) const
{
mallocMC::free((void*) &frame);
::mallocMC::free((void*) &frame);
}

HDINLINE
Expand Down
5 changes: 3 additions & 2 deletions src/libPMacc/include/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,12 @@ namespace PMacc
using AlpakaHostDev = alpaka::dev::DevCpu;
#ifdef PMACC_ACC_CPU
using AlpakaAccDev = alpaka::dev::DevCpu;
//using AlpakaAccStream = alpaka::stream::StreamCpuAsync;
using AlpakaAccStream = alpaka::stream::StreamCpuSync;
using AlpakaAccStream = alpaka::stream::StreamCpuAsync;
//using AlpakaAccStream = alpaka::stream::StreamCpuSync;
template<
typename TDim>
using AlpakaAcc = alpaka::acc::AccCpuOmp2Threads<TDim, AlpakaIdxSize>;
//using AlpakaAcc = alpaka::acc::AccCpuOmp2Blocks<TDim, AlpakaIdxSize>;
#else
using AlpakaAccDev = alpaka::dev::DevCudaRt;
using AlpakaAccStream = alpaka::stream::StreamCudaRtAsync;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ struct RandomPositionImpl
localCells = subGrid.getLocalDomain().size;
}

PMACC_NO_NVCC_HDWARNING
template<
typename T_Acc,
typename T_Particle1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ struct TemperatureImpl : private T_ValueFunctor
localCells = subGrid.getLocalDomain().size;
}

PMACC_NO_NVCC_HDWARNING
template<
typename T_Acc,
typename T_Particle1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ struct RandomImpl
* @param curParticle the number of this particle: [0, totalNumParsPerCell-1]
* @return float3_X with components between [0.0, 1.0)
*/
PMACC_NO_NVCC_HDWARNING
DINLINE floatD_X operator()(const uint32_t)
{
floatD_X result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

#if defined(MAMC_CUDA_ENABLED) && defined(__CUDACC__)


#include "distributionPolicies/XMallocSIMD.hpp"
#include "distributionPolicies/XMallocSIMD_impl.hpp"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

#if defined(MAMC_CUDA_ENABLED) && defined(__CUDACC__)


#include "reservePoolPolicies/SimpleCudaMalloc.hpp"
#include "reservePoolPolicies/SimpleCudaMalloc_impl.hpp"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,17 @@ namespace AlignmentPolicies{

public:

MAMC_HOST
static boost::tuple<void*,size_t> alignPool(void* memory, size_t memsize){
return boost::make_tuple(memory,memsize);
}

MAMC_HOST MAMC_ACCELERATOR
MAMC_ACC
static uint32 applyPadding(uint32 bytes){
return bytes;
}

MAMC_HOST
static std::string classname(){
return "Noop";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ namespace Shrink2NS{
BOOST_STATIC_ASSERT(dataAlignment && !(dataAlignment & (dataAlignment-1)) );

public:
MAMC_HOST
static boost::tuple<void*,size_t> alignPool(void* memory, size_t memsize){
PointerEquivalent alignmentstatus = ((PointerEquivalent)memory) & (dataAlignment -1);
if(alignmentstatus != 0)
Expand All @@ -102,8 +103,7 @@ namespace Shrink2NS{
return boost::make_tuple(memory,memsize);
}

MAMC_HOST
MAMC_ACCELERATOR
MAMC_ACC
static uint32 applyPadding(uint32 bytes){
return (bytes + dataAlignment - 1) & ~(dataAlignment-1);
}
Expand Down
Loading