Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 31 additions & 31 deletions gpu-simulator/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,41 +81,41 @@ int main(int argc, const char **argv) {

unsigned i = 0;
while (i < commandlist.size() || !kernels_info.empty()) {
trace_kernel_info_t *kernel_info = NULL;
if (commandlist[i].m_type == command_type::cpu_gpu_mem_copy) {
size_t addre, Bcount;
tracer.parse_memcpy_info(commandlist[i].command_string, addre, Bcount);
std::cout << "launching memcpy command : " << commandlist[i].command_string << std::endl;
m_gpgpu_sim->perf_memcpy_to_gpu(addre, Bcount);
i++;
continue;
} else if (commandlist[i].m_type == command_type::kernel_launch) {
// Read trace header info for window_size number of kernels
while (kernels_info.size() < window_size && i < commandlist.size()) {
kernel_trace_t* kernel_trace_info = tracer.parse_kernel_info(commandlist[i].command_string);
kernel_info = create_kernel_info(kernel_trace_info, m_gpgpu_context, &tconfig, &tracer);
kernels_info.push_back(kernel_info);
std::cout << "Header info loaded for kernel command : " << commandlist[i].command_string << std::endl;
//gulp up as many commands as possible - either cpu_gpu_mem_copy
//or kernel_launch - until the vector "kernels_info" has exceeded
//the window_size or no command is left in commandlist
while (kernels_info.size() < window_size && i < commandlist.size()) {
trace_kernel_info_t *kernel_info = NULL;
if (commandlist[i].m_type == command_type::cpu_gpu_mem_copy) {
size_t addre, Bcount;
tracer.parse_memcpy_info(commandlist[i].command_string, addre, Bcount);
std::cout << "launching memcpy command : " << commandlist[i].command_string << std::endl;
m_gpgpu_sim->perf_memcpy_to_gpu(addre, Bcount);
i++;
} else if (commandlist[i].m_type == command_type::kernel_launch) {
// Read trace header info for window_size number of kernels
kernel_trace_t* kernel_trace_info = tracer.parse_kernel_info(commandlist[i].command_string);
kernel_info = create_kernel_info(kernel_trace_info, m_gpgpu_context, &tconfig, &tracer);
kernels_info.push_back(kernel_info);
std::cout << "Header info loaded for kernel command : " << commandlist[i].command_string << std::endl;
i++;
}
// Launch all kernels within window that are on a stream that isn't already running
for (auto k : kernels_info) {
bool stream_busy = false;
for (auto s: busy_streams) {
if (s == k->get_cuda_stream_id())
stream_busy = true;
}
if (!stream_busy && m_gpgpu_sim->can_start_kernel() && !k->was_launched()) {
std::cout << "launching kernel name: " << k->get_name() << " uid: " << k->get_uid() << std::endl;
m_gpgpu_sim->launch(k);
k->set_launched();
busy_streams.push_back(k->get_cuda_stream_id());
}
}

// Launch all kernels within window that are on a stream that isn't already running
for (auto k : kernels_info) {
bool stream_busy = false;
for (auto s: busy_streams) {
if (s == k->get_cuda_stream_id())
stream_busy = true;
}
if (!stream_busy && m_gpgpu_sim->can_start_kernel() && !k->was_launched()) {
std::cout << "launching kernel name: " << k->get_name() << " uid: " << k->get_uid() << std::endl;
m_gpgpu_sim->launch(k);
k->set_launched();
busy_streams.push_back(k->get_cuda_stream_id());
}
}
else if (kernels_info.empty())
assert(0 && "Undefined Command");

bool active = false;
bool sim_cycles = false;
Expand Down
37 changes: 32 additions & 5 deletions gpu-simulator/trace-driven/trace_driven.cc
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,14 @@ unsigned trace_shader_core_ctx::sim_init_thread(
return 1;
}

/**
* @brief Must be called once for each CTA.
*
* In the event of hwtid wrap-around due to subcore scheduling,
* end_thread is smaller than start_thread. This case is accetable
* and will be resolved by callee methods.
*
*/
void trace_shader_core_ctx::init_warps(unsigned cta_id, unsigned start_thread,
unsigned end_thread, unsigned ctaid,
int cta_size, kernel_info_t &kernel) {
Expand Down Expand Up @@ -565,24 +573,43 @@ void trace_shader_core_ctx::updateSIMTStack(unsigned warpId,
// No SIMT-stack in trace-driven mode
}

/**
* @brief Calls trace_parser::get_next_threadblock_traces to parse an entire
* CTA section in the *.traceg file. This means the range of warp ids covered
* by [start_warp, end_warp) must cover all warps of the CTA.
*
* With the subcore model, special case is given since wrap-arounds can
* happen, which means end_warp < start_warp. This is done by generating
* a const vec of warp ids to iterate over in a range-based for loop, instead
* of looping from start_warp to end_warp.
*
* @param start_warp Warp id calculated from hwtid by dividing it by warp-size
* @param end_warp Calculated from hwtid in the same way
* @param kernel
*/
void trace_shader_core_ctx::init_traces(unsigned start_warp, unsigned end_warp,
kernel_info_t &kernel) {
std::vector<std::vector<inst_trace_t> *> threadblock_traces;
for (unsigned i = start_warp; i < end_warp; ++i) {

//WrappableUnsignedRange is defined in gpgpu-sim/gpu-sim.h
WrappableUnsignedRange warp_id_range(start_warp, end_warp, m_config->max_warps_per_shader);

warp_id_range.loop([&](const unsigned i){
trace_shd_warp_t *m_trace_warp = static_cast<trace_shd_warp_t *>(m_warp[i]);
m_trace_warp->clear();
threadblock_traces.push_back(&(m_trace_warp->warp_traces));
}
threadblock_traces.push_back(&(m_trace_warp->warp_traces));
});

trace_kernel_info_t &trace_kernel =
static_cast<trace_kernel_info_t &>(kernel);
trace_kernel.get_next_threadblock_traces(threadblock_traces);

// set the pc from the traces and ignore the functional model
for (unsigned i = start_warp; i < end_warp; ++i) {
warp_id_range.loop([&](const unsigned i){
trace_shd_warp_t *m_trace_warp = static_cast<trace_shd_warp_t *>(m_warp[i]);
m_trace_warp->set_next_pc(m_trace_warp->get_start_trace_pc());
m_trace_warp->set_kernel(&trace_kernel);
}
});
}

void trace_shader_core_ctx::checkExecutionStatusAndUpdate(warp_inst_t &inst,
Expand Down
4 changes: 3 additions & 1 deletion util/job_launching/slurm.sim
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ copy_output() {

trap copy_output ERR

set -e
#citing https://stackoverflow.com/questions/35800082/how-to-trap-err-when-using-set-e-in-bash
#Setting -E alongside -e makes any trap on ERR inherited by shell funcs, command substitutions and commands executed in a subshell environment
set -eE

if [ "$GPGPUSIM_SETUP_ENVIRONMENT_WAS_RUN" != "1" ]; then
export GPGPUSIM_ROOT=REPLACE_GPGPUSIM_ROOT
Expand Down