diff --git a/src/runtime_src/CMakeLists.txt b/src/runtime_src/CMakeLists.txt index 8f486439962..d20d6379e73 100644 --- a/src/runtime_src/CMakeLists.txt +++ b/src/runtime_src/CMakeLists.txt @@ -92,9 +92,9 @@ else() xrt_add_subdirectory(xocl) xrt_add_subdirectory(xrt) xrt_add_subdirectory(tools/xclbinutil) - xrt_add_subdirectory(xdp) xrt_add_subdirectory(tools/scripts) xrt_add_subdirectory(core) + xrt_add_subdirectory(xdp) # --- Optional HIP bindings --- if (XRT_ENABLE_HIP) diff --git a/src/runtime_src/xdp/profile/database/static_info/aie_constructs.h b/src/runtime_src/xdp/profile/database/static_info/aie_constructs.h index 9653fea13b0..e32fdd5bfa7 100644 --- a/src/runtime_src/xdp/profile/database/static_info/aie_constructs.h +++ b/src/runtime_src/xdp/profile/database/static_info/aie_constructs.h @@ -14,6 +14,11 @@ #include namespace xdp::aie { + // Microblaze Debug Module (MDM) Counters + // https://docs.amd.com/r/en-US/ug984-vivado-microblaze-ref/Performance-Monitoring) + constexpr unsigned int NUM_UC_EVENT_COUNTERS = 5; + constexpr unsigned int NUM_UC_LATENCY_COUNTERS = 1; + struct aiecompiler_options { bool broadcast_enable_core; @@ -51,7 +56,9 @@ namespace xdp { enum io_type { PLIO = 0, - GMIO + GMIO, + TRACE_DMA, + CONTROL_DMA }; struct tile_type @@ -203,14 +210,16 @@ namespace xdp { uint8_t channelNumber; uint8_t streamId; uint8_t burstLength; + uint8_t type; TraceGMIO(uint32_t i, uint8_t col, uint8_t num, - uint8_t stream, uint8_t len) + uint8_t stream, uint8_t len, uint8_t t = 0) : id(i) , shimColumn(col) , channelNumber(num) , streamId(stream) , burstLength(len) + , type(t) {} }; @@ -277,8 +286,8 @@ namespace xdp { bool port_trace_is_master[NUM_SWITCH_MONITOR_PORTS]; int8_t port_trace_ids[NUM_SWITCH_MONITOR_PORTS]; std::string port_trace_names[NUM_SWITCH_MONITOR_PORTS]; - int8_t s2mm_channels[NUM_CHANNEL_SELECTS] = {-1, -1}; - int8_t mm2s_channels[NUM_CHANNEL_SELECTS] = {-1, -1}; + int8_t s2mm_channels[NUM_CHANNEL_SELECTS_MAX] = {-1, -1, -1, -1}; + int8_t mm2s_channels[NUM_CHANNEL_SELECTS_MAX] = {-1, -1, -1, -1}; std::string s2mm_names[NUM_MEM_CHANNELS]; std::string mm2s_names[NUM_MEM_CHANNELS]; std::vector pc; diff --git a/src/runtime_src/xdp/profile/database/static_info/device_info.cpp b/src/runtime_src/xdp/profile/database/static_info/device_info.cpp index f741309a1fe..464eabcd6f9 100644 --- a/src/runtime_src/xdp/profile/database/static_info/device_info.cpp +++ b/src/runtime_src/xdp/profile/database/static_info/device_info.cpp @@ -320,13 +320,13 @@ namespace xdp { } void DeviceInfo::addTraceGMIO(uint32_t id, uint8_t col, uint8_t num, - uint8_t stream, uint8_t len) + uint8_t stream, uint8_t len, uint8_t type) { ConfigInfo* config = currentConfig() ; if (!config || config->currentXclbins.empty()) return ; - config->addTraceGMIO(id, col, num, stream, len) ; + config->addTraceGMIO(id, col, num, stream, len, type) ; } void DeviceInfo::addAIECounter(uint32_t i, uint8_t col, uint8_t row, diff --git a/src/runtime_src/xdp/profile/database/static_info/device_info.h b/src/runtime_src/xdp/profile/database/static_info/device_info.h index 480429b9268..5f95ff7d426 100644 --- a/src/runtime_src/xdp/profile/database/static_info/device_info.h +++ b/src/runtime_src/xdp/profile/database/static_info/device_info.h @@ -140,7 +140,7 @@ namespace xdp { // ****** Functions for AIE information on the current xclbin ****** XDP_CORE_EXPORT void addTraceGMIO(uint32_t i, uint8_t col, uint8_t num, uint8_t stream, - uint8_t len) ; + uint8_t len, uint8_t type) ; XDP_CORE_EXPORT void addAIECounter(uint32_t i, uint8_t col, uint8_t row, uint8_t num, uint16_t start, uint16_t end, uint8_t reset, diff --git a/src/runtime_src/xdp/profile/database/static_info/filetypes/aie_control_config_filetype.cpp b/src/runtime_src/xdp/profile/database/static_info/filetypes/aie_control_config_filetype.cpp index 764171b83e4..bd823b7c9a0 100644 --- a/src/runtime_src/xdp/profile/database/static_info/filetypes/aie_control_config_filetype.cpp +++ b/src/runtime_src/xdp/profile/database/static_info/filetypes/aie_control_config_filetype.cpp @@ -9,8 +9,9 @@ #include "aie_control_config_filetype.h" #include "core/common/message.h" -#include "xdp/profile/plugin/vp_base/utility.h" #include "xdp/profile/database/static_info/aie_util.h" +#include "xdp/profile/device/tracedefs.h" +#include "xdp/profile/plugin/vp_base/utility.h" #include "xdp/profile/plugin/aie_profile/aie_profile_defs.h" namespace xdp::aie { @@ -225,16 +226,18 @@ AIEControlConfigFiletype::getChildGMIOs( const std::string& childStr) const // 1 : S2MM channel 1 // 2 : MM2S channel 0 (slave/input) // 3 : MM2S channel 1 - auto slaveOrMaster = gmio_node.second.get("type"); + auto ioType = gmio_node.second.get("type"); auto channelNumber = gmio_node.second.get("channel_number"); - gmio.type = io_type::GMIO; + gmio.type = (ioType == MM2S_CONTROL) ? io_type::CONTROL_DMA + : ((ioType == S2MM_TRACE) ? io_type::TRACE_DMA : io_type::GMIO); gmio.id = gmio_node.second.get("id"); gmio.name = gmio_node.second.get("name"); gmio.logicalName = gmio_node.second.get("logical_name"); - gmio.slaveOrMaster = slaveOrMaster; + gmio.slaveOrMaster = (ioType >= 2) ? (ioType - 2) : ioType; gmio.shimColumn = gmio_node.second.get("shim_column"); - gmio.channelNum = (slaveOrMaster == 0) ? (channelNumber - 2) : channelNumber; + gmio.channelNum = ((ioType == 0) && (channelNumber >= 2)) + ? (channelNumber - 2) : channelNumber; gmio.streamId = gmio_node.second.get("stream_id"); gmio.burstLength = gmio_node.second.get("burst_length_in_16byte"); diff --git a/src/runtime_src/xdp/profile/database/static_info/xclbin_info.cpp b/src/runtime_src/xdp/profile/database/static_info/xclbin_info.cpp index c2032d18cc1..ebbff2067ae 100644 --- a/src/runtime_src/xdp/profile/database/static_info/xclbin_info.cpp +++ b/src/runtime_src/xdp/profile/database/static_info/xclbin_info.cpp @@ -556,14 +556,15 @@ namespace xdp { } void ConfigInfo::addTraceGMIO(uint32_t id, uint8_t col, uint8_t num, - uint8_t stream, uint8_t len) + uint8_t stream, uint8_t len, uint8_t t) { for (auto xclbin : currentXclbins) { if (xclbin->aie.valid) { - xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Added GMIO trace of ID "+ std::to_string(id) + "."); - xclbin->aie.gmioList.push_back(new TraceGMIO(id, col, num, stream, len)) ; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Added GMIO trace of ID "+ std::to_string(id) + "."); + xclbin->aie.gmioList.push_back(new TraceGMIO(id, col, num, stream, len, t)) ; return ; } } diff --git a/src/runtime_src/xdp/profile/database/static_info/xclbin_info.h b/src/runtime_src/xdp/profile/database/static_info/xclbin_info.h index 73a1841609d..5ebc3b33e45 100644 --- a/src/runtime_src/xdp/profile/database/static_info/xclbin_info.h +++ b/src/runtime_src/xdp/profile/database/static_info/xclbin_info.h @@ -242,7 +242,7 @@ namespace xdp { std::vector getUserASMsWithTrace(XclbinInfo* xclbin) ; void addTraceGMIO(uint32_t id, uint8_t col, uint8_t num, - uint8_t stream, uint8_t len) ; + uint8_t stream, uint8_t len, uint8_t type) ; void addAIECounter(uint32_t i, uint8_t col, uint8_t r, uint8_t num, uint16_t start, uint16_t end, uint8_t reset, uint64_t load, double freq, diff --git a/src/runtime_src/xdp/profile/database/static_info_database.cpp b/src/runtime_src/xdp/profile/database/static_info_database.cpp index 33ee465bf30..61184cdbd9b 100644 --- a/src/runtime_src/xdp/profile/database/static_info_database.cpp +++ b/src/runtime_src/xdp/profile/database/static_info_database.cpp @@ -1108,14 +1108,14 @@ namespace xdp { } void VPStaticDatabase::addTraceGMIO(uint64_t deviceId, uint32_t i, - uint8_t col, uint8_t num, - uint8_t stream, uint8_t len) + uint8_t col, uint8_t num, uint8_t stream, + uint8_t len, uint8_t type) { std::lock_guard lock(deviceLock) ; if (deviceInfo.find(deviceId) == deviceInfo.end()) return ; - deviceInfo[deviceId]->addTraceGMIO(i, col, num, stream, len) ; + deviceInfo[deviceId]->addTraceGMIO(i, col, num, stream, len, type) ; } void VPStaticDatabase::addAIECounter(uint64_t deviceId, uint32_t i, diff --git a/src/runtime_src/xdp/profile/database/static_info_database.h b/src/runtime_src/xdp/profile/database/static_info_database.h index c36d2c7525a..3b89e4898eb 100644 --- a/src/runtime_src/xdp/profile/database/static_info_database.h +++ b/src/runtime_src/xdp/profile/database/static_info_database.h @@ -383,7 +383,7 @@ namespace xdp { getAIECfgTiles(uint64_t deviceId) ; XDP_CORE_EXPORT TraceGMIO* getTraceGMIO(uint64_t deviceId, uint64_t idx) ; XDP_CORE_EXPORT void addTraceGMIO(uint64_t deviceId, uint32_t i, uint8_t col, - uint8_t num, uint8_t stream, uint8_t len) ; + uint8_t num, uint8_t stream, uint8_t len, uint8_t type) ; XDP_CORE_EXPORT void addAIECounter(uint64_t deviceId, uint32_t i, uint8_t col, uint8_t row, uint8_t num, uint16_t start, uint16_t end, uint8_t reset, uint64_t load, diff --git a/src/runtime_src/xdp/profile/device/aie_trace/client/aie_trace_offload_npu3.cpp b/src/runtime_src/xdp/profile/device/aie_trace/client/aie_trace_offload_npu3.cpp new file mode 100755 index 00000000000..a90ac046517 --- /dev/null +++ b/src/runtime_src/xdp/profile/device/aie_trace/client/aie_trace_offload_npu3.cpp @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved + +#define XDP_PLUGIN_SOURCE + +#include "aie_trace_offload_npu3.h" +#include "core/common/message.h" +#include "core/include/xrt/xrt_kernel.h" +#include "xdp/profile/database/database.h" +#include "xdp/profile/database/static_info/aie_constructs.h" +#include "xdp/profile/device/aie_trace/aie_trace_logger.h" +#include "xdp/profile/device/pl_device_intf.h" +#include "xdp/profile/plugin/vp_base/utility.h" + +#include + +namespace xdp { + using severity_level = xrt_core::message::severity_level; + + AIETraceOffload::AIETraceOffload(void* handle, uint64_t id, PLDeviceIntf* dInt, + AIETraceLogger* logger, bool isPlio, + uint64_t totalSize, uint64_t numStrm, + xrt::hw_context context, + std::shared_ptr(metadata)) + : deviceHandle(handle), deviceId(id), plDeviceIntf(dInt), traceLogger(logger), + isPLIO(isPlio), totalSz(totalSize), numStream(numStrm), + traceContinuous(false), offloadIntervalUs(0), bufferInitialized(false), + offloadStatus(AIEOffloadThreadStatus::IDLE), mEnCircularBuf(false), + mCircularBufOverwrite(false), context(context), metadata(metadata) + { + bufAllocSz = getAlignedTraceBufSize(totalSz, static_cast(numStream)); + mReadTrace = std::bind(&AIETraceOffload::readTraceGMIO, this, std::placeholders::_1); + } + + AIETraceOffload::~AIETraceOffload() + { + stopOffload(); + if (offloadThread.joinable()) + offloadThread.join(); + } + + bool AIETraceOffload::initReadTrace() + { + // The code below is hanging, so for now don't run + //return; + + xrt_core::message::send(severity_level::info, "XRT", + "Starting configuration for NPU3."); + + buffers.clear(); + buffers.resize(numStream); + + // TODO: get board-specific values + constexpr std::uint64_t DDR_AIE_ADDR_OFFSET = std::uint64_t{0x80000000}; + + xdp::aie::driver_config meta_config = metadata->getAIEConfigMetadata(); + + XAie_Config cfg{ + meta_config.hw_gen, + meta_config.base_address, + meta_config.column_shift, + meta_config.row_shift, + meta_config.num_rows, + meta_config.num_columns, + meta_config.shim_row, + meta_config.mem_row_start, + meta_config.mem_num_rows, + meta_config.aie_tile_row_start, + meta_config.aie_tile_num_rows, + {0} // PartProp + }; + + auto RC = XAie_CfgInitialize(&aieDevInst, &cfg); + + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE Driver Initialization Failed."); + return false; + } + + tranxHandler = std::make_unique(); + if (!tranxHandler->initializeTransaction(&aieDevInst, "AieTraceOffload")) + return false; + + for (uint64_t i = 0; i < numStream; ++i) { + VPDatabase* db = VPDatabase::Instance(); + TraceGMIO* traceGMIO = (db->getStaticInfo()).getTraceGMIO(deviceId, i); + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Allocating trace buffer of size " + std::to_string(bufAllocSz) + " for AIE Stream " + + std::to_string(i)); + xrt_bos.emplace_back(xrt::bo(context.get_device(), bufAllocSz, + XRT_BO_FLAGS_HOST_ONLY, tranxHandler->getGroupID(0, context))); + + buffers[i].bufId = xrt_bos.size(); + if (!buffers[i].bufId) { + bufferInitialized = false; + return bufferInitialized; + } + + if (!xrt_bos.empty()) { + auto bo_map = xrt_bos.back().map(); + memset(bo_map, 0, bufAllocSz); + } + + XAie_LocType loc; + XAie_DmaDesc dmaDesc; + loc = XAie_TileLoc(traceGMIO->shimColumn, 0); + + auto dmaType = traceGMIO->type; + XAie_DmaDirection dmaDir = (dmaType == io_type::TRACE_DMA) ? DMA_S2MM_TRACE : DMA_S2MM; + uint8_t s2mm_ch_id = (dmaType >= S2MM_TRACE) ? 0 : traceGMIO->channelNumber; + uint16_t s2mm_bd_id = 0; /* always use first bd in private pool */ + + // S2MM BD + RC = XAie_DmaDescInit(&aieDevInst, &dmaDesc, loc); + RC = XAie_DmaSetAddrLen(&dmaDesc, xrt_bos[i].address() + DDR_AIE_ADDR_OFFSET, + static_cast(bufAllocSz)); + RC = XAie_DmaSetAxi(&dmaDesc, 0U, 8U, 0U, 0U, 0U); + //RC = XAie_DmaWriteBd(&aieDevInst, &dmaDesc, loc, s2mm_bd_id); + RC = XAie_DmaWriteBdPvtBuffPool(&aieDevInst, &dmaDesc, loc, s2mm_ch_id, dmaDir, s2mm_bd_id); + RC = XAie_DmaChannelPushBdToQueue(&aieDevInst, loc, s2mm_ch_id, dmaDir, s2mm_bd_id); + RC = XAie_DmaChannelEnable(&aieDevInst, loc, s2mm_ch_id, dmaDir); + + if (!tranxHandler->submitTransaction(&aieDevInst, context)) + return false; + + xrt_core::message::send(severity_level::info, "XRT", + "Successfully scheduled AIE Trace Offloading NPU3."); + } + + bufferInitialized = true; + return bufferInitialized; + } + + void AIETraceOffload::readTraceGMIO(bool final) + { + // Keep it low to save bandwidth + constexpr uint64_t chunk_512k = 0x80000; + + for (uint64_t index = 0; index < numStream; ++index) { + auto& bd = buffers[index]; + if (bd.offloadDone) + continue; + + // read one chunk or till the end of buffer + auto chunkEnd = bd.offset + chunk_512k; + if (final || chunkEnd > bufAllocSz) + chunkEnd = bufAllocSz; + bd.usedSz = chunkEnd; + + bd.offset += syncAndLog(index); + } + } + + uint64_t AIETraceOffload::syncAndLog(uint64_t index) + { + auto& bd = buffers[index]; + + if (bd.offset >= bd.usedSz) + return 0; + + // Amount of newly written trace + uint64_t nBytes = bd.usedSz - bd.offset; + + xrt_bos[index].sync(XCL_BO_SYNC_BO_FROM_DEVICE, nBytes, bd.offset); + auto in_bo_map = xrt_bos[index].map() + bd.offset; + + if (!in_bo_map) + return 0; + + nBytes = searchWrittenBytes((void*)in_bo_map, bufAllocSz); + + // check for full buffer + if (bd.offset + nBytes >= bufAllocSz) { + bd.isFull = true; + bd.offloadDone = true; + } + + // Log nBytes of trace + traceLogger->addAIETraceData(index, (void*)in_bo_map, nBytes, true); + return nBytes; + } + + void AIETraceOffload::startOffload() + { + if (offloadStatus == AIEOffloadThreadStatus::RUNNING) + return; + + std::lock_guard lock(statusLock); + offloadStatus = AIEOffloadThreadStatus::RUNNING; + + offloadThread = std::thread(&AIETraceOffload::continuousOffload, this); + } + + void AIETraceOffload::continuousOffload() + { + if (!bufferInitialized && !initReadTrace()) { + offloadFinished(); + return; + } + + while (keepOffloading()) { + mReadTrace(false); + std::this_thread::sleep_for(std::chrono::microseconds(offloadIntervalUs)); + } + + // Note: This will call flush and reset on datamover + mReadTrace(true); + endReadTrace(); + offloadFinished(); + } + + bool AIETraceOffload::keepOffloading() + { + std::lock_guard lock(statusLock); + return (AIEOffloadThreadStatus::RUNNING == offloadStatus); + } + + void AIETraceOffload::stopOffload() + { + std::lock_guard lock(statusLock); + if (AIEOffloadThreadStatus::STOPPED == offloadStatus) + return; + offloadStatus = AIEOffloadThreadStatus::STOPPING; + } + + void AIETraceOffload::offloadFinished() + { + std::lock_guard lock(statusLock); + if (AIEOffloadThreadStatus::STOPPED == offloadStatus) + return; + offloadStatus = AIEOffloadThreadStatus::STOPPED; + } + + void AIETraceOffload::endReadTrace() + { + for (uint64_t i = 0; i < numStream ; ++i) { + if (!buffers[i].bufId) + continue; + + buffers[i].bufId = 0; + } + bufferInitialized = false; + } + + uint64_t AIETraceOffload::searchWrittenBytes(void* buf, uint64_t bytes) + { + /* + * Look For trace boundary using binary search. + * Use Dword to be safe + */ + auto words = static_cast(buf); + uint64_t wordcount = bytes / TRACE_PACKET_SIZE; + + // indices + int64_t low = 0; + int64_t high = static_cast(wordcount) - 1; + + // Boundary at which trace ends and 0s begin + uint64_t boundary = wordcount; + + while (low <= high) { + int64_t mid = low + (high - low) / 2; + + if (!words[mid]) { + boundary = mid; + high = mid - 1; + } + else { + low = mid + 1; + } + } + + uint64_t written = boundary * TRACE_PACKET_SIZE; + + debug_stream << "Found Boundary at 0x" << std::hex << written << std::dec + << std::endl; + + return written; + } + +} // namespace xdp diff --git a/src/runtime_src/xdp/profile/device/aie_trace/client/aie_trace_offload_npu3.h b/src/runtime_src/xdp/profile/device/aie_trace/client/aie_trace_offload_npu3.h new file mode 100755 index 00000000000..ddd4b5f8bb7 --- /dev/null +++ b/src/runtime_src/xdp/profile/device/aie_trace/client/aie_trace_offload_npu3.h @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved + +#ifndef XDP_PROFILE_AIE_TRACE_OFFLOAD_NPU3_H_ +#define XDP_PROFILE_AIE_TRACE_OFFLOAD_NPU3_H_ + +#include "core/include/xrt/xrt_hw_context.h" +#include "core/include/xrt/xrt_kernel.h" + +#include "xdp/config.h" +#include "xdp/profile/device/tracedefs.h" +#include "xdp/profile/device/common/npu3/npu3_transaction.h" +#include "xdp/profile/plugin/aie_trace/aie_trace_metadata.h" + +#include + +extern "C" { + #include + #include +} + +namespace xdp { + +class PLDeviceIntf; +class AIETraceLogger; + +#define debug_stream \ +if(!m_debug); else std::cout + +struct AIETraceBufferInfo +{ + size_t bufId; +// uint64_t allocSz; // currently all the buffers are equal size + uint64_t usedSz; + uint64_t offset; + uint32_t rollover_count; + bool isFull; + bool offloadDone; + + AIETraceBufferInfo() + : bufId(0), + usedSz(0), + offset(0), + rollover_count(0), + isFull(false), + offloadDone(false) + {} +}; + +enum class AIEOffloadThreadStatus { + IDLE, + RUNNING, + STOPPING, + STOPPED +}; + +class AIETraceOffload +{ + public: + AIETraceOffload(void* handle, uint64_t id, + PLDeviceIntf*, AIETraceLogger*, + bool isPlio, + uint64_t totalSize, + uint64_t numStrm, + xrt::hw_context context, + std::shared_ptr metadata + ); + ~AIETraceOffload(); + + public: + bool initReadTrace(); + void endReadTrace(); + void startOffload(); + void stopOffload(); + + inline AIETraceLogger* getAIETraceLogger() { return traceLogger; } + inline void setContinuousTrace() { traceContinuous = true; } + inline bool continuousTrace() { return traceContinuous; } + inline void setOffloadIntervalUs(uint64_t v) { offloadIntervalUs = v; } + + inline AIEOffloadThreadStatus getOffloadStatus() { + std::lock_guard lock(statusLock); + return offloadStatus; + }; + + void readTrace(bool final) {mReadTrace(final);}; + bool isTraceBufferFull() {return false;}; + + private: + void* deviceHandle; + uint64_t deviceId; + PLDeviceIntf* plDeviceIntf; + AIETraceLogger* traceLogger; + + bool isPLIO; + uint64_t totalSz; + uint64_t numStream; + uint64_t bufAllocSz; + std::vector buffers; + + //Internal use only + // Set this for verbose trace offload + bool m_debug = false; + XAie_DevInst aieDevInst = {0}; + std::unique_ptr tranxHandler; + + // Continuous Trace Offload (For PLIO) + bool traceContinuous; + uint64_t offloadIntervalUs; + bool bufferInitialized; + std::mutex statusLock; + AIEOffloadThreadStatus offloadStatus; + std::thread offloadThread; + + //Circular Buffer Tracking + bool mEnCircularBuf; + bool mCircularBufOverwrite; + + xrt::hw_context context; + std::shared_ptr metadata; + std::vector xrt_bos; + + private: + void readTraceGMIO(bool final); + void continuousOffload(); + bool keepOffloading(); + void offloadFinished(); + uint64_t syncAndLog(uint64_t index); + std::function mReadTrace; + uint64_t searchWrittenBytes(void * buf, uint64_t bytes); +}; + +} + +#endif diff --git a/src/runtime_src/xdp/profile/device/common/npu3/npu3_transaction.cpp b/src/runtime_src/xdp/profile/device/common/npu3/npu3_transaction.cpp new file mode 100755 index 00000000000..b6bd8d3caef --- /dev/null +++ b/src/runtime_src/xdp/profile/device/common/npu3/npu3_transaction.cpp @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved + +#include + +#include "npu3_transaction.h" +#include "core/common/message.h" +#include "xrt/experimental/xrt_elf.h" +#include "xrt/experimental/xrt_ext.h" +#include "xrt/experimental/xrt_module.h" +#include "xrt/xrt_hw_context.h" +#include "xrt/xrt_kernel.h" + +#include "core/common/aiebu/src/cpp/include/aiebu/aiebu_assembler.h" +#include "core/common/aiebu/src/cpp/include/aiebu/aiebu_error.h" + +#include +#include +#include +#include + +extern "C" { + #include + #include +} + +namespace xdp::aie { + using severity_level = xrt_core::message::severity_level; + + bool NPU3Transaction::initializeTransaction(XAie_DevInst* aieDevInst, std::string tName) + { + setTransactionName(tName); + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Writing to New Control Code ASM file: " + getAsmFileName()); + + try { + // NOTE: XAIE_IO_BACKEND_CONTROLCODE is the default + // XAie_SetIOBackend(aieDevInst, XAIE_IO_BACKEND_CONTROLCODE); + XAie_OpenControlCodeFile(aieDevInst, getAsmFileName().c_str(), 8192); + XAie_StartNewJob(aieDevInst); + return true; + } + catch(const std::exception& e) { + xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", + "Error in generating asm File: " + getAsmFileName() + "\n" + e.what()); + } + xrt_core::message::send(severity_level::warning, "XRT", "AIE Transaction Initialization Failed."); + return false; + } + + bool NPU3Transaction::completeASM(XAie_DevInst* aieDevInst) + { + // + // 1. End generation of ASM file + // + try { + XAie_EndJob(aieDevInst); + XAie_EndPage(aieDevInst); + XAie_CloseControlCodeFile(aieDevInst); + } + catch(const std::exception& e) { + xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", + "Error in generating ASM file: " + getAsmFileName() + "\n" + e.what()); + return false; + } + return true; + } + + bool NPU3Transaction::generateELF() + { + // + // 2. Convert ASM to ELF + // + // Fill this vector with ASM content + std::vector control_code_buf; + std::vector libpaths; + libpaths.push_back("./"); + + try { +#if 1 + //Read ASM file + std::string asmFileName = getAsmFileName(); + if (!std::filesystem::exists(asmFileName)) + throw std::runtime_error("file:" + asmFileName + " not found\n"); + + std::ifstream inAsm(asmFileName, std::ios::in | std::ios::binary); + std::cout << "Open file " << asmFileName << std::endl; + + auto file_size = std::filesystem::file_size(asmFileName); + control_code_buf.resize(file_size); + + inAsm.read(control_code_buf.data(), file_size); + std::streamsize bytesRead = inAsm.gcount(); + if (static_cast(bytesRead) != static_cast(file_size)) { + std::cerr << "Read " << bytesRead << " bytes but expected " << file_size + << " for file " << asmFileName << '\n'; + control_code_buf.resize(static_cast(bytesRead)); // keep only read bytes + } else { + std::cout << "ASM file read (" << file_size << " bytes): " << asmFileName << '\n'; + } + + //Convert ASM to ELF data. + auto as = aiebu::aiebu_assembler(aiebu::aiebu_assembler::buffer_type::asm_aie4, + control_code_buf, {}, libpaths); + + //Write elf data to a file + auto e = as.get_elf(); + std::cout << "Elf size:" << e.size() << std::endl; + std::ofstream outElf(getElfFileName(), std::ios_base::binary); + outElf.write(e.data(), e.size()); +#else + auto check1 = std::getenv("AIEBU_REPO"); + auto check2 = std::getenv("PYTHONPATH"); + if ((check1 == nullptr) || (check2 == nullptr)) { + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", + "Please define AIEBU_REPO and PYTHONPATH so elf generation can work."); + return false; + } + + std::stringstream command; + command << "${AIEBU_REPO}/src/python/aiebu/control_asm_disasm.py -t aie4 " + << getAsmFileName() << " -o " << getElfFileName(); + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Generating ELF using: " + command.str()); + if (system(command.str().c_str())) { + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Elf generation failed"); + return false; + } +#endif + } + catch(const std::exception& e) { + xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", + "Error in generating Elf file: " + getElfFileName() + "\n" + e.what()); + return false; + } + return true; + } + + bool NPU3Transaction::submitELF(xrt::hw_context hwContext) + { + // + // 3. Submit ELF to microcontroller + // + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Start New Control Code Elf"); + xrt::elf profileElf; + try { + profileElf = xrt::elf(getElfFileName()); + } + catch (...) { + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", + "Failed to load " + getElfFileName() + ". Cannot configure AIE to profile."); + return false; + } + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Elf Object Created"); + xrt::module mod{profileElf}; + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Module Created"); + xrt::kernel kernel; + try { + kernel = xrt::ext::kernel{hwContext, mod, "XDP_KERNEL:{IPUV1CNN}"}; + } catch (...) { + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", + "XDP_KERNEL not found in HW Context. Unable to run " + getElfFileName()); + return false; + } + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "XDP_KERNEL created"); + xrt::run run{kernel}; + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Kernel run created"); + run.start(); + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Run started"); + run.wait2(); + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Wait done!"); + return true; + } + + bool NPU3Transaction::submitTransaction(XAie_DevInst* aieDevInst, xrt::hw_context hwContext) + { + if (!completeASM(aieDevInst)) + return false; + if (!generateELF()) + return false; + if (!submitELF(hwContext)) + return false; + return true; + } +} diff --git a/src/runtime_src/xdp/profile/device/common/npu3/npu3_transaction.h b/src/runtime_src/xdp/profile/device/common/npu3/npu3_transaction.h new file mode 100755 index 00000000000..6076aaff65d --- /dev/null +++ b/src/runtime_src/xdp/profile/device/common/npu3/npu3_transaction.h @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved + +#ifndef NPU3_TRANSACTION_DOT_H +#define NPU3_TRANSACTION_DOT_H + +#include +#include +#include + +#include "xrt/xrt_hw_context.h" +#include "xrt/xrt_kernel.h" + +extern "C" { + #include + #include +} + +namespace xdp::aie { + class NPU3Transaction { + public: + NPU3Transaction() {}; + bool initializeTransaction(XAie_DevInst* aieDevInst, std::string tName); + bool submitTransaction(XAie_DevInst* aieDevInst, xrt::hw_context hwContext); + bool completeASM(XAie_DevInst* aieDevInst); + bool generateELF(); + bool submitELF(xrt::hw_context hwContext); + + void setTransactionName(std::string newTransactionName) {m_transactionName = newTransactionName;} + std::string getAsmFileName() { return m_transactionName + ".asm"; } + std::string getElfFileName() { return m_transactionName + ".elf"; } + int getGroupID(int id, xrt::hw_context hwContext) { + xrt::kernel kernel = xrt::kernel(hwContext, "XDP_KERNEL"); + return kernel.group_id(id); + } + + private: + std::string m_transactionName; + std::vector m_columns; + std::vector m_rows; + std::vector m_offsets; + std::vector m_values; + }; + +} // namespace xdp::aie + +#endif \ No newline at end of file diff --git a/src/runtime_src/xdp/profile/device/tracedefs.h b/src/runtime_src/xdp/profile/device/tracedefs.h index c095a74a9ff..b02e11b1c32 100755 --- a/src/runtime_src/xdp/profile/device/tracedefs.h +++ b/src/runtime_src/xdp/profile/device/tracedefs.h @@ -124,7 +124,10 @@ constexpr uint32_t NUM_MEM_TRACE_PCS = 2; constexpr uint32_t NUM_COMBO_EVENT_CONTROL = 3; constexpr uint32_t NUM_COMBO_EVENT_INPUT = 4; constexpr uint32_t NUM_SWITCH_MONITOR_PORTS = 8; +// NOTE: This is 2 for memory tiles and 4 for interface tiles (NPU3 only) constexpr uint32_t NUM_CHANNEL_SELECTS = 2; +constexpr uint32_t NUM_CHANNEL_SELECTS_SHIM_NPU3 = 4; +constexpr uint32_t NUM_CHANNEL_SELECTS_MAX = 4; constexpr uint32_t NUM_MEM_CHANNELS = 6; constexpr uint32_t BROADCAST_MASK_DEFAULT = 65535; @@ -147,6 +150,12 @@ constexpr uint32_t GROUP_STREAM_SWITCH_RUNNING_MASK = 0x00002222; constexpr uint64_t AIE_OFFSET_EDGE_CONTROL_MEM_TILE = 0x94408; constexpr uint64_t AIE_OFFSET_EDGE_CONTROL_MEM = 0x14408; +constexpr uint64_t AIE_OFFSET_EDGE_CONTROL_MEM_TILE_NPU3 = 0x001A0; +constexpr uint64_t AIE_OFFSET_EDGE_CONTROL_MEM_NPU3 = 0xA0410; + +// DMA types +#define MM2S_CONTROL 2 +#define S2MM_TRACE 3 #define XDP_DEV_GEN_AIE 1U #define XDP_DEV_GEN_AIEML 2U diff --git a/src/runtime_src/xdp/profile/device/utility.cpp b/src/runtime_src/xdp/profile/device/utility.cpp index d0eda95b8fa..87f15c79b69 100644 --- a/src/runtime_src/xdp/profile/device/utility.cpp +++ b/src/runtime_src/xdp/profile/device/utility.cpp @@ -71,7 +71,7 @@ namespace xdp::util { std::string getDeviceName(void* deviceHandle, bool hw_context_flow) { - std::string deviceName = ""; + std::string deviceName = "device"; // The deviceHandle could either be a pointer to a hw_context_impl // or a shim pointer. We need to get the core device in either case. diff --git a/src/runtime_src/xdp/profile/plugin/CMakeLists.txt b/src/runtime_src/xdp/profile/plugin/CMakeLists.txt index 354229b9a2b..0dda82e1c71 100644 --- a/src/runtime_src/xdp/profile/plugin/CMakeLists.txt +++ b/src/runtime_src/xdp/profile/plugin/CMakeLists.txt @@ -12,17 +12,24 @@ if (XDP_VE2_BUILD_CMAKE STREQUAL "yes") add_subdirectory(user) elseif (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") - add_subdirectory(native) add_subdirectory(user) add_subdirectory(ml_timeline) + add_subdirectory(aie_halt) + add_subdirectory(aie_profile) + add_subdirectory(aie_trace) + #add_subdirectory(aie_debug) if (WIN32) - add_subdirectory(aie_profile) - add_subdirectory(aie_trace) - add_subdirectory(aie_halt) add_subdirectory(aie_pc) endif() + + if (XDP_NPU3_BUILD_CMAKE STREQUAL "yes") + add_custom_target(xdp_npu3_target ALL DEPENDS + xdp_aie_halt_plugin + xdp_aie_profile_plugin + xdp_aie_trace_plugin) + endif() else() diff --git a/src/runtime_src/xdp/profile/plugin/aie_base/aie_base_util.h b/src/runtime_src/xdp/profile/plugin/aie_base/aie_base_util.h index baa70ab238c..080fad86be4 100755 --- a/src/runtime_src/xdp/profile/plugin/aie_base/aie_base_util.h +++ b/src/runtime_src/xdp/profile/plugin/aie_base/aie_base_util.h @@ -72,7 +72,7 @@ namespace xdp::aie { /** * @brief Get HW generation-specific number of performance counters - * @note This function currently supports AIE1 and AIE2* + * @note This function currently supports AIE1, AIE2*, and NPU3 * @param hwGen integer representing the hardware generation * @param mod module type * @return number of counters available in the module @@ -81,20 +81,84 @@ namespace xdp::aie { { if (mod == xdp::module_type::core) { return (xdp::aie::isAIE2ps(hwGen) ? aie2ps::cm_num_counters - : aie2::cm_num_counters); + : (xdp::aie::isNPU3(hwGen) ? npu3::cm_num_counters + : aie2::cm_num_counters)); } if (mod == xdp::module_type::dma) { return (xdp::aie::isAIE2ps(hwGen) ? aie2ps::mm_num_counters - : aie2::mm_num_counters); + : (xdp::aie::isNPU3(hwGen) ? npu3::mm_num_counters + : aie2::mm_num_counters)); } if (mod == xdp::module_type::shim) { return (xdp::aie::isAIE2ps(hwGen) ? aie2ps::shim_num_counters - : aie2::shim_num_counters); + : (xdp::aie::isNPU3(hwGen) ? npu3::shim_num_counters + : aie2::shim_num_counters)); } if (mod == xdp::module_type::mem_tile) { return (xdp::aie::isAIE2ps(hwGen) ? aie2ps::mem_num_counters - : aie2::mem_num_counters); + : (xdp::aie::isNPU3(hwGen) ? npu3::mem_num_counters + : aie2::mem_num_counters)); + } + if (mod == xdp::module_type::uc) { + return (NUM_UC_EVENT_COUNTERS + NUM_UC_LATENCY_COUNTERS); + } + + return 0; + } + + /** + * @brief Get HW generation-specific number of MM2S channels + * @note This function currently supports AIE1, AIE2*, and NPU3 + * @param hwGen integer representing the hardware generation + * @param mod module type + * @return number of MM2S channels available in the module + */ + inline unsigned int getNumMM2SChannels(const int hwGen, xdp::module_type mod) + { + if ((mod == xdp::module_type::core) || (mod == xdp::module_type::dma)) { + return (xdp::aie::isAIE2ps(hwGen) ? aie2ps::mm_num_dma_mm2s_channels + : (xdp::aie::isNPU3(hwGen) ? npu3::mm_num_dma_mm2s_channels + : aie2::mm_num_dma_mm2s_channels)); + } + if (mod == xdp::module_type::shim) { + return (xdp::aie::isAIE2ps(hwGen) ? aie2ps::shim_num_dma_mm2s_channels + : (xdp::aie::isNPU3(hwGen) ? npu3::shim_num_dma_mm2s_channels + : aie2::shim_num_dma_mm2s_channels)); + } + if (mod == xdp::module_type::mem_tile) { + return (xdp::aie::isAIE2ps(hwGen) ? aie2ps::mem_num_dma_mm2s_channels + : (xdp::aie::isNPU3(hwGen) ? npu3::mem_num_dma_mm2s_channels + : aie2::mem_num_dma_mm2s_channels)); } + + return 0; + } + + /** + * @brief Get HW generation-specific number of S2MM channels + * @note This function currently supports AIE1, AIE2*, and NPU3 + * @param hwGen integer representing the hardware generation + * @param mod module type + * @return number of S2MM channels available in the module + */ + inline unsigned int getNumS2MMChannels(const int hwGen, xdp::module_type mod) + { + if ((mod == xdp::module_type::core) || (mod == xdp::module_type::dma)) { + return (xdp::aie::isAIE2ps(hwGen) ? aie2ps::mm_num_dma_s2mm_channels + : (xdp::aie::isNPU3(hwGen) ? npu3::mm_num_dma_s2mm_channels + : aie2::mm_num_dma_s2mm_channels)); + } + if (mod == xdp::module_type::shim) { + return (xdp::aie::isAIE2ps(hwGen) ? aie2ps::shim_num_dma_s2mm_channels + : (xdp::aie::isNPU3(hwGen) ? npu3::shim_num_dma_s2mm_channels + : aie2::shim_num_dma_s2mm_channels)); + } + if (mod == xdp::module_type::mem_tile) { + return (xdp::aie::isAIE2ps(hwGen) ? aie2ps::mem_num_dma_s2mm_channels + : (xdp::aie::isNPU3(hwGen) ? npu3::mem_num_dma_s2mm_channels + : aie2::mem_num_dma_s2mm_channels)); + } + return 0; } diff --git a/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie1_attributes.h b/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie1_attributes.h index 925ddc879aa..f199e887415 100755 --- a/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie1_attributes.h +++ b/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie1_attributes.h @@ -29,6 +29,13 @@ const unsigned int clock_freq_mhz = 1250; // Bit widths const unsigned int stream_bit_width = 32; const unsigned int cascade_bit_width = 384; +// Number of DMA channels +const unsigned int mm_num_dma_s2mm_channels = 2; +const unsigned int mm_num_dma_mm2s_channels = 2; +const unsigned int mem_num_dma_s2mm_channels = 0; +const unsigned int mem_num_dma_mm2s_channels = 0; +const unsigned int shim_num_dma_s2mm_channels = 2; +const unsigned int shim_num_dma_mm2s_channels = 2; // Trace events per module/tile const unsigned int num_trace_events = 8; // Counters per module/tile diff --git a/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie2_attributes.h b/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie2_attributes.h index 72b1865fe0e..0d9ab83c0ce 100755 --- a/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie2_attributes.h +++ b/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie2_attributes.h @@ -29,6 +29,13 @@ const unsigned int clock_freq_mhz = 1250; // Bit widths const unsigned int stream_bit_width = 32; const unsigned int cascade_bit_width = 512; +// Number of DMA channels +const unsigned int mm_num_dma_s2mm_channels = 2; +const unsigned int mm_num_dma_mm2s_channels = 2; +const unsigned int mem_num_dma_s2mm_channels = 6; +const unsigned int mem_num_dma_mm2s_channels = 6; +const unsigned int shim_num_dma_s2mm_channels = 2; +const unsigned int shim_num_dma_mm2s_channels = 2; // Trace events per module/tile const unsigned int num_trace_events = 8; // Counters per module/tile diff --git a/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie2ps_attributes.h b/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie2ps_attributes.h index 5de7ad8eced..dff05a4006e 100755 --- a/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie2ps_attributes.h +++ b/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie2ps_attributes.h @@ -29,6 +29,13 @@ const unsigned int clock_freq_mhz = 1250; // Bit widths const unsigned int stream_bit_width = 64; const unsigned int cascade_bit_width = 512; +// Number of DMA channels +const unsigned int mm_num_dma_s2mm_channels = 2; +const unsigned int mm_num_dma_mm2s_channels = 2; +const unsigned int mem_num_dma_s2mm_channels = 6; +const unsigned int mem_num_dma_mm2s_channels = 6; +const unsigned int shim_num_dma_s2mm_channels = 2; +const unsigned int shim_num_dma_mm2s_channels = 2; // Trace events per module/tile const unsigned int num_trace_events = 8; // Counters per module/tile diff --git a/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie_generations.h b/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie_generations.h index ad59d216a72..ac81b4f69c6 100755 --- a/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie_generations.h +++ b/src/runtime_src/xdp/profile/plugin/aie_base/generations/aie_generations.h @@ -7,7 +7,7 @@ #include "aie1_attributes.h" #include "aie2_attributes.h" #include "aie2ps_attributes.h" -//#include "npu3_attributes.h" +#include "npu3_attributes.h" //#include "aie1_registers.h" //#include "aie2_registers.h" diff --git a/src/runtime_src/xdp/profile/plugin/aie_base/generations/npu3_attributes.h b/src/runtime_src/xdp/profile/plugin/aie_base/generations/npu3_attributes.h new file mode 100755 index 00000000000..66527ed0e7d --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_base/generations/npu3_attributes.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved + +#ifndef NPU3_ATTRIBUTES_H_ +#define NPU3_ATTRIBUTES_H_ + +namespace npu3 +{ + +// NOTE: Replace with actual attributes when available + +// Number of DMA channels +const unsigned int mm_num_dma_s2mm_channels = 2; +const unsigned int mm_num_dma_mm2s_channels = 1; +const unsigned int mem_num_dma_s2mm_channels = 6; +const unsigned int mem_num_dma_mm2s_channels = 6; +const unsigned int shim_num_dma_s2mm_channels = 2; +const unsigned int shim_num_dma_mm2s_channels = 2; +// Counters per module/tile +const unsigned int cm_num_counters = 12; +const unsigned int mm_num_counters = 0; +const unsigned int shim_num_counters = 12; +const unsigned int mem_num_counters = 12; + +} // namespace npu3 + +#endif /* NPU3_ATTRIBUTES_H_ */ diff --git a/src/runtime_src/xdp/profile/plugin/aie_base/generations/npu3_registers.h b/src/runtime_src/xdp/profile/plugin/aie_base/generations/npu3_registers.h new file mode 100755 index 00000000000..e9b8d8d12ab --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_base/generations/npu3_registers.h @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved + +#ifndef NPU3_REGISTERS_H_ +#define NPU3_REGISTERS_H_ + +namespace npu3 +{ + +// TODO: Add NPU3-specific memory map when available + +// Core Counters +const unsigned int cm_performance_counter0 = 0; +const unsigned int cm_performance_counter1 = 0; +const unsigned int cm_performance_counter2 = 0; +const unsigned int cm_performance_counter3 = 0; +const unsigned int cm_performance_counter4 = 0; +const unsigned int cm_performance_counter5 = 0; +const unsigned int cm_performance_counter6 = 0; +const unsigned int cm_performance_counter7 = 0; +const unsigned int cm_performance_counter8 = 0; +const unsigned int cm_performance_counter9 = 0; +const unsigned int cm_performance_counter10 = 0; +const unsigned int cm_performance_counter11 = 0; +// Memory Tile Counters +const unsigned int mem_performance_counter0 = 0; +const unsigned int mem_performance_counter1 = 0; +const unsigned int mem_performance_counter2 = 0; +const unsigned int mem_performance_counter3 = 0; +const unsigned int mem_performance_counter4 = 0; +const unsigned int mem_performance_counter5 = 0; +const unsigned int mem_performance_counter6 = 0; +const unsigned int mem_performance_counter7 = 0; +const unsigned int mem_performance_counter8 = 0; +const unsigned int mem_performance_counter9 = 0; +const unsigned int mem_performance_counter10 = 0; +const unsigned int mem_performance_counter11 = 0; +// Interface Tile Counters +const unsigned int shim_performance_counter0 = 0; +const unsigned int shim_performance_counter1 = 0; +const unsigned int shim_performance_counter2 = 0; +const unsigned int shim_performance_counter3 = 0; +const unsigned int shim_performance_counter4 = 0; +const unsigned int shim_performance_counter5 = 0; +const unsigned int shim_performance_counter6 = 0; +const unsigned int shim_performance_counter7 = 0; +const unsigned int shim_performance_counter8 = 0; +const unsigned int shim_performance_counter9 = 0; +const unsigned int shim_performance_counter10 = 0; +const unsigned int shim_performance_counter11 = 0; + +} // namespace npu3 + +#endif /* NPU3_REGISTERS_H_ */ diff --git a/src/runtime_src/xdp/profile/plugin/aie_debug/aie_debug_metadata_npu3.cpp b/src/runtime_src/xdp/profile/plugin/aie_debug/aie_debug_metadata_npu3.cpp new file mode 100755 index 00000000000..b326c7884f8 --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_debug/aie_debug_metadata_npu3.cpp @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved + + #ifndef AIE_DEBUG_METADATA_NPU3_CPP + #define AIE_DEBUG_METADATA_NPU3_CPP + + #include "aie_debug_metadata.h" + + namespace xdp { + + /************************************************************************************* + NPU3 Registers + *************************************************************************************/ +void NPU3UsedRegisters::populateProfileRegisters() { + // TODO: Populate profile registers for NPU3 +} + +void NPU3UsedRegisters::populateTraceRegisters() { + // TODO: Populate trace registers for NPU3 +} + +void NPU3UsedRegisters::populateRegNameToValueMap() { + // TODO: Populate register name to value map for NPU3 +} + +void NPU3UsedRegisters::populateRegValueToNameMap() { + // TODO: Populate register value to name map for NPU3 +} + +void NPU3UsedRegisters::populateRegAddrToSizeMap() { + // TODO: Populate register address to size map for NPU3 +} + +} // end namespace xdp diff --git a/src/runtime_src/xdp/profile/plugin/aie_debug/client/aie_debug_npu3.cpp b/src/runtime_src/xdp/profile/plugin/aie_debug/client/aie_debug_npu3.cpp new file mode 100755 index 00000000000..69149e9e59b --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_debug/client/aie_debug_npu3.cpp @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved + +#define XDP_PLUGIN_SOURCE + +#include "aie_debug_npu3.h" +#include "xdp/profile/plugin/aie_debug/aie_debug_metadata.h" + +#include +#include + +#include "core/common/api/bo_int.h" +#include "core/common/api/hw_context_int.h" +#include "core/common/config_reader.h" +#include "core/common/message.h" +#include "core/include/experimental/xrt-next.h" + +#include "xdp/profile/database/static_info/aie_util.h" +#include "xdp/profile/database/database.h" +#include "xdp/profile/database/static_info/aie_constructs.h" +#include "xdp/profile/plugin/vp_base/info.h" + +namespace xdp { + using severity_level = xrt_core::message::severity_level; + using tile_type = xdp::tile_type; + using module_type = xdp::module_type; + + /**************************************************************************** + * Client constructor + ***************************************************************************/ + AieDebug_NPU3Impl::AieDebug_NPU3Impl(VPDatabase* database, std::shared_ptr metadata) + : AieDebugImpl(database, metadata) + { + hwContext = metadata->getHwContext(); + //transactionHandler = std::make_unique(hwContext, "AIE Debug Setup"); + } + + /**************************************************************************** + * Poll all registers + ***************************************************************************/ + void AieDebug_NPU3Impl::poll(const uint64_t deviceID, void* /*handle*/) + { + xrt_core::message::send(severity_level::debug, "XRT", "Calling AIE Poll."); + + if (db->infoAvailable(xdp::info::ml_timeline)) { + db->broadcast(VPDatabase::MessageType::READ_RECORD_TIMESTAMPS, nullptr); + xrt_core::message::send(severity_level::debug, "XRT", "Done reading recorded timestamps."); + } + + xrt::bo resultBO; + uint32_t* output = nullptr; + try { + resultBO = xrt_core::bo_int::create_debug_bo(hwContext, 0x20000); + output = resultBO.map(); + memset(output, 0, 0x20000); + } catch (std::exception& e) { + std::stringstream msg; + msg << "Unable to create 128KB buffer for AIE Debug results. Cannot get AIE Debug info. " << e.what() << std::endl; + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg.str()); + return; + } + + //XAie_StartTransaction(&aieDevInst, XAIE_TRANSACTION_DISABLE_AUTO_FLUSH); + + //if (!transactionHandler->initializeKernel("XDP_KERNEL")) + // return; + + //XAie_AddCustomTxnOp(&aieDevInst, XAIE_IO_CUSTOM_OP_READ_REGS, (void*)op, op_size); + //txn_ptr = XAie_ExportSerializedTransaction(&aieDevInst, 1, 0); + + //if (!transactionHandler->submitTransaction(txn_ptr)) + // return; + + //XAie_ClearTransaction(&aieDevInst); + + resultBO.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + for (uint32_t i = 0; i < op->count; i++) { + uint8_t col = (op->data[i].address >> 25) & 0x1F; + uint8_t row = (op->data[i].address >> 20) & 0x1F; + uint64_t reg = (op->data[i].address) & 0xFFFFF; + + if (aie::isDebugVerbosity()) { + std::stringstream msg; + msg << "Debug tile (" << +col << ", " << +row << ") " << "hex address/values: " + << std::hex << reg << " : " << output[i] << std::dec; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", msg.str()); + } + + tile_type tile; + tile.col = col; + tile.row = row; + + if (debugTileMap.find(tile) == debugTileMap.end()) + debugTileMap[tile] = std::make_unique(col, row, reg); + + auto regName = metadata->lookupRegisterName(reg); + debugTileMap[tile]->addOffsetName(reg, regName); + debugTileMap[tile]->addValue(output[i]); + } + + // Add values to database + for (auto& tileAddr : debugTileMap) + tileAddr.second->printValues(static_cast(deviceID), db); + } + + /**************************************************************************** + * Update device + ***************************************************************************/ + void AieDebug_NPU3Impl::updateDevice() + { + // Do nothing for now + } + + /**************************************************************************** + * Update AIE device + ***************************************************************************/ + void AieDebug_NPU3Impl::updateAIEDevice(void* /*handle*/) + { + if (!xrt_core::config::get_aie_debug()) + return; + + auto regValues = metadata->getRegisterValues(); + std::vector op_debug_data; + + // Traverse all module types + int counterId = 0; + for (int module = 0; module < metadata->getNumModules(); ++module) { + auto configMetrics = metadata->getConfigMetricsVec(module); + if (configMetrics.empty()) + continue; + + auto type = metadata->getModuleType(module); + auto name = moduleTypes.at(type); + + // List of registers to read for current module + auto Regs = regValues[type]; + if (Regs.empty()) + continue; + + if (aie::isDebugVerbosity()) { + std::stringstream msg; + msg << "AIE Debug monitoring tiles of type " << name << ":\n"; + for (auto& tileMetric : configMetrics) + msg << tileMetric.first.col << "," << tileMetric.first.row << " "; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + } + + // Traverse all active tiles for this module type + for (auto& tileMetric : configMetrics) { + auto tile = tileMetric.first; + auto tileOffset = (tile.col << 25) + (tile.row << 20); + + for (int i = 0; i < Regs.size(); i++) { + op_debug_data.emplace_back(register_data_t{Regs[i] + tileOffset}); + counterId++; + } + } + } + + auto meta_config = metadata->getAIEConfigMetadata(); + XAie_Config cfg { + meta_config.hw_gen, + meta_config.base_address, + meta_config.column_shift, + meta_config.row_shift, + meta_config.num_rows, + meta_config.num_columns, + meta_config.shim_row, + meta_config.mem_row_start, + meta_config.mem_num_rows, + meta_config.aie_tile_row_start, + meta_config.aie_tile_num_rows, + {0} // PartProp + }; + + auto RC = XAie_CfgInitialize(&aieDevInst, &cfg); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::warning, "XRT", "AIE Driver Initialization Failed."); + return; + } + + op_size = sizeof(read_register_op_t) + sizeof(register_data_t) * (counterId - 1); + op = (read_register_op_t*)malloc(op_size); + op->count = counterId; + for (int i = 0; i < op_debug_data.size(); i++) + op->data[i] = op_debug_data[i]; + } + +} // end namespace xdp diff --git a/src/runtime_src/xdp/profile/plugin/aie_debug/client/aie_debug_npu3.h b/src/runtime_src/xdp/profile/plugin/aie_debug/client/aie_debug_npu3.h new file mode 100755 index 00000000000..db1e1fa7db2 --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_debug/client/aie_debug_npu3.h @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved + +#ifndef AIE_DEBUG_NPU3_H +#define AIE_DEBUG_NPU3_H + +#include +#include + +#include "xdp/profile/plugin/aie_debug/aie_debug_impl.h" +#include "xdp/profile/plugin/aie_debug/aie_debug_metadata.h" +//#include "xdp/profile/device/common/client_transaction.h" +#include "xdp/profile/database/static_info/aie_constructs.h" +#include "xdp/profile/database/static_info/filetypes/base_filetype_impl.h" +#include "xdp/profile/plugin/vp_base/vp_base_plugin.h" + +#include "core/include/xrt/xrt_hw_context.h" + +extern "C" { + #include + #include +} + +namespace xdp { + class ClientReadableTile; + + class AieDebug_NPU3Impl : public AieDebugImpl { + public: + AieDebug_NPU3Impl(VPDatabase* database, std::shared_ptr metadata); + ~AieDebug_NPU3Impl() = default; + void updateDevice(); + void updateAIEDevice(void* handle); + void poll(const uint64_t index, void* handle); + + private: + xrt::hw_context hwContext; + //std::unique_ptr transactionHandler; + uint8_t* txn_ptr; + XAie_DevInst aieDevInst = {0}; + read_register_op_t* op; + std::size_t op_size; + + std::map> debugTileMap; + }; + + class ClientReadableTile : public BaseReadableTile { + public: + ClientReadableTile(uint8_t c, uint8_t r, uint64_t to) { + col = c; + row = r; + tileOffset = to; + } + void addValue(uint32_t val) { + values.push_back(val); + } + void readValues(XAie_DevInst* /*aieDevInst*/) {} + }; +} // end namespace xdp + +#endif diff --git a/src/runtime_src/xdp/profile/plugin/aie_halt/CMakeLists.txt b/src/runtime_src/xdp/profile/plugin/aie_halt/CMakeLists.txt index 2ed5998d0d3..484b4b7c4f2 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_halt/CMakeLists.txt +++ b/src/runtime_src/xdp/profile/plugin/aie_halt/CMakeLists.txt @@ -30,9 +30,14 @@ file(GLOB XDP_DEVICE_COMMON_FILES if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_halt_plugin SHARED ${XDP_AIE_HALT_PLUGIN_FILES} ${XDP_DEVICE_COMMON_FILES}) add_dependencies(xdp_aie_halt_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_halt_plugin PRIVATE xdp_core xrt_coreutil xaiengine) + target_link_libraries(xdp_aie_halt_plugin PRIVATE xdp_core xrt_coreutil + xaiengine aiebu_library_objects) target_compile_definitions(xdp_aie_halt_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) - target_include_directories(xdp_aie_halt_plugin PRIVATE ${AIERT_DIR}/include) + if (XDP_NPU3_BUILD_CMAKE STREQUAL "yes") + target_compile_definitions(xdp_aie_halt_plugin PRIVATE XDP_NPU3_BUILD=1) + endif() + target_include_directories(xdp_aie_halt_plugin PRIVATE + ${AIERT_DIR}/include ${AIEBU_SOURCE_DIR}/src/cpp/include) set_target_properties(xdp_aie_halt_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) install (TARGETS xdp_aie_halt_plugin diff --git a/src/runtime_src/xdp/profile/plugin/aie_halt/clientDev/aie_halt_npu3.cpp b/src/runtime_src/xdp/profile/plugin/aie_halt/clientDev/aie_halt_npu3.cpp new file mode 100755 index 00000000000..95e3db66053 --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_halt/clientDev/aie_halt_npu3.cpp @@ -0,0 +1,85 @@ +/** + * Copyright (C) 2025 Advanced Micro Devices, Inc. - All rights reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#define XDP_PLUGIN_SOURCE + +#include "aie_halt_npu3.h" +#include "xdp/profile/plugin/vp_base/utility.h" + +#include "core/common/api/hw_context_int.h" +#include "core/common/device.h" +#include "core/common/message.h" + +#include "core/include/xrt/experimental/xrt_elf.h" +#include "core/include/xrt/experimental/xrt_ext.h" +#include "core/include/xrt/experimental/xrt_module.h" +#include "core/include/xrt/xrt_kernel.h" + +namespace xdp { + + AIEHaltNPU3Impl::AIEHaltNPU3Impl(VPDatabase*dB) + : AIEHaltImpl(dB) + { + } + + void AIEHaltNPU3Impl::updateDevice(void* hwCtxImpl) + { + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "In AIEHaltNPU3Impl::updateDevice"); + + std::string inputCtrlCode = xrt_core::config::get_aie_halt_settings_control_code(); + if (inputCtrlCode.empty()) { + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "No input control code file for AIE Halt provided. Defaulting to \"aieHalt4x4.elf\"."); + inputCtrlCode = "aieHalt4x4.elf"; + } + + xrt::hw_context hwContext = xrt_core::hw_context_int::create_hw_context_from_implementation(hwCtxImpl); + + xrt::elf haltElf; + try { + haltElf = xrt::elf(inputCtrlCode); + } catch (...) { + std::string msg = "Failed to load " + inputCtrlCode + ". Cannot configure AIE to halt."; + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg); + return; + } + + xrt::module mod{haltElf}; + xrt::kernel krnl; + try { + krnl = xrt::ext::kernel{hwContext, mod, "XDP_KERNEL:{IPUV1CNN}"}; + } catch (...) { + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", + "XDP_KERNEL not found in HW Context. Cannot configure AIE to halt."); + return; + } + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "In AIEHaltNPU3Impl New Kernel Object for XDP_KERNEL created for running control code Elf"); + + xrt::run rn{krnl}; + rn.start(); + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "In AIEHaltNPU3Impl run start, going to wait"); + + rn.wait2(); + } + + void AIEHaltNPU3Impl::finishflushDevice(void* /*hwCtxImpl*/) + { + } +} diff --git a/src/runtime_src/xdp/profile/plugin/aie_halt/clientDev/aie_halt_npu3.h b/src/runtime_src/xdp/profile/plugin/aie_halt/clientDev/aie_halt_npu3.h new file mode 100755 index 00000000000..36f2b91b0ff --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_halt/clientDev/aie_halt_npu3.h @@ -0,0 +1,45 @@ +/** + * Copyright (C) 2025 Advanced Micro Devices, Inc. - All rights reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#ifndef XDP_PLUGIN_AIE_HALT_NPU3_IMPL_H +#define XDP_PLUGIN_AIE_HALT_NPU3_IMPL_H + +#include "xdp/config.h" +#include "xdp/profile/plugin/aie_halt/aie_halt_impl.h" + +namespace xdp { + + class AIEHaltNPU3Impl : public AIEHaltImpl + { + + public : + explicit AIEHaltNPU3Impl(VPDatabase* dB); + + ~AIEHaltNPU3Impl() override = default; + + AIEHaltNPU3Impl(const AIEHaltNPU3Impl&) = delete; + AIEHaltNPU3Impl(AIEHaltNPU3Impl&&) = delete; + + AIEHaltNPU3Impl& operator=(const AIEHaltNPU3Impl&) = delete; + AIEHaltNPU3Impl& operator=(AIEHaltNPU3Impl&&) = delete; + + void updateDevice(void* hwCtxImpl) override; + void finishflushDevice(void* hwCtxImpl) override; + }; + +} + +#endif \ No newline at end of file diff --git a/src/runtime_src/xdp/profile/plugin/aie_pc/CMakeLists.txt b/src/runtime_src/xdp/profile/plugin/aie_pc/CMakeLists.txt index 15a3335bc24..84cb2dbb31b 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_pc/CMakeLists.txt +++ b/src/runtime_src/xdp/profile/plugin/aie_pc/CMakeLists.txt @@ -30,7 +30,8 @@ if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") add_dependencies(xdp_aie_pc_plugin xdp_core xrt_coreutil) target_link_libraries(xdp_aie_pc_plugin PRIVATE xdp_core xrt_coreutil xaiengine) target_compile_definitions(xdp_aie_pc_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) - target_include_directories(xdp_aie_pc_plugin PRIVATE ${AIERT_DIR}/include) + target_include_directories(xdp_aie_pc_plugin PRIVATE + ${AIERT_DIR}/include ${AIEBU_SOURCE_DIR}/src/cpp/include) set_target_properties(xdp_aie_pc_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) install (TARGETS xdp_aie_pc_plugin diff --git a/src/runtime_src/xdp/profile/plugin/aie_profile/CMakeLists.txt b/src/runtime_src/xdp/profile/plugin/aie_profile/CMakeLists.txt index 3d6442bf29e..2bad7fc31f9 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_profile/CMakeLists.txt +++ b/src/runtime_src/xdp/profile/plugin/aie_profile/CMakeLists.txt @@ -3,11 +3,13 @@ # # ==================================================================== -# This builds the AIE Profile plugin. It is currently built -# on Edge, x86, Client, and VE2 platforms that support AIE. +# This builds the AIE Profile plugin. It is currently built on Edge, +# x86, Client, and VE2 platforms that support AIE. # ==================================================================== file(GLOB AIE_PROFILE_PLUGIN_FILES + "${PROFILE_DIR}/plugin/aie_base/*" + "${PROFILE_DIR}/plugin/aie_base/generations/*" "${PROFILE_DIR}/plugin/aie_profile/*.h" "${PROFILE_DIR}/plugin/aie_profile/*.cpp" "${PROFILE_DIR}/writer/aie_profile/*.h" @@ -29,17 +31,35 @@ file(GLOB AIE_DRIVER_COMMON_UTIL_FILES ) if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") + message("AIEBU_SOURCE_DIR = ${AIEBU_SOURCE_DIR}") + set(IMPL_DIR "${PROFILE_DIR}/plugin/aie_profile/client") + if (XDP_NPU3_BUILD_CMAKE STREQUAL "yes") + file(GLOB AIE_PROFILE_IMPL_FILES + "${IMPL_DIR}/aie_profile_npu3.h" + "${IMPL_DIR}/aie_profile_npu3.cpp" + "${IMPL_DIR}/aie_profile.h" + "${IMPL_DIR}/aie_profile.cpp" + "${PROFILE_DIR}/device/common/npu3/*" + ) + else() + file(GLOB AIE_PROFILE_IMPL_FILES + "${IMPL_DIR}/aie_profile.h" + "${IMPL_DIR}/aie_profile.cpp" + ) + endif() - file(GLOB AIE_PROFILE_IMPL_FILES - "${IMPL_DIR}/*.h" - "${IMPL_DIR}/*.cpp" - ) - add_library(xdp_aie_profile_plugin SHARED ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} ${AIE_DRIVER_COMMON_UTIL_FILES} ${AIE_PROFILE_UTIL_FILES}) + add_library(xdp_aie_profile_plugin SHARED ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} + ${AIE_DRIVER_COMMON_UTIL_FILES} ${AIE_PROFILE_UTIL_FILES}) add_dependencies(xdp_aie_profile_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_profile_plugin PRIVATE xdp_core xrt_coreutil xaiengine) + target_link_libraries(xdp_aie_profile_plugin PRIVATE + xdp_core xrt_coreutil xaiengine aiebu_library_objects) target_compile_definitions(xdp_aie_profile_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) - target_include_directories(xdp_aie_profile_plugin PRIVATE ${AIERT_DIR}/include) + if (XDP_NPU3_BUILD_CMAKE STREQUAL "yes") + target_compile_definitions(xdp_aie_profile_plugin PRIVATE XDP_NPU3_BUILD=1) + endif() + target_include_directories(xdp_aie_profile_plugin PRIVATE + ${AIERT_DIR}/include ${AIEBU_SOURCE_DIR}/src/cpp/include) set_target_properties(xdp_aie_profile_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) install (TARGETS xdp_aie_profile_plugin @@ -75,7 +95,8 @@ else() "${IMPL_DIR}/*.h" "${IMPL_DIR}/*.cpp" ) - add_library(xdp_aie_profile_plugin_xdna SHARED ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} ${AIE_PROFILE_UTIL_FILES} ${AIE_PROFILE_CONFIG_FILES}) + add_library(xdp_aie_profile_plugin_xdna SHARED ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} + ${AIE_PROFILE_UTIL_FILES} ${AIE_PROFILE_CONFIG_FILES}) add_dependencies(xdp_aie_profile_plugin_xdna xdp_core xrt_coreutil) target_link_libraries(xdp_aie_profile_plugin_xdna PRIVATE xdp_core xrt_coreutil xaiengine) target_compile_definitions(xdp_aie_profile_plugin_xdna PRIVATE XDP_VE2_BUILD=1 FAL_LINUX="on") @@ -95,7 +116,9 @@ else() "${IMPL_DIR}/*.h" "${IMPL_DIR}/*.cpp" ) - add_library(xdp_aie_profile_plugin SHARED ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} ${AIE_PROFILE_UTIL_FILES} ${AIE_PROFILE_CONFIG_FILES}) + + add_library(xdp_aie_profile_plugin SHARED ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} + ${AIE_PROFILE_UTIL_FILES} ${AIE_PROFILE_CONFIG_FILES}) add_dependencies(xdp_aie_profile_plugin xdp_core xrt_coreutil) target_link_libraries(xdp_aie_profile_plugin PRIVATE xdp_core xrt_coreutil xaiengine) if (XDP_VE2_BUILD_CMAKE STREQUAL "yes") @@ -103,7 +126,8 @@ else() else() target_compile_definitions(xdp_aie_profile_plugin PRIVATE FAL_LINUX="on") endif() - set_target_properties(xdp_aie_profile_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) + set_target_properties(xdp_aie_profile_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} + SOVERSION ${XRT_SOVERSION}) install (TARGETS xdp_aie_profile_plugin RUNTIME DESTINATION ${XDP_PLUGIN_INSTALL_DIR} COMPONENT ${XRT_COMPONENT} diff --git a/src/runtime_src/xdp/profile/plugin/aie_profile/aie_profile_metadata.cpp b/src/runtime_src/xdp/profile/plugin/aie_profile/aie_profile_metadata.cpp index 7a3595747e9..57bddb655e1 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_profile/aie_profile_metadata.cpp +++ b/src/runtime_src/xdp/profile/plugin/aie_profile/aie_profile_metadata.cpp @@ -1408,7 +1408,6 @@ namespace xdp { if (!isValidLatencyTile(pairTile)) return false; - auto tile_key = create_tileKey(pairTile); sourceTile = latencyConfigMap.at(create_tileKey(pairTile)).src; return true; } diff --git a/src/runtime_src/xdp/profile/plugin/aie_profile/aie_profile_metadata.h b/src/runtime_src/xdp/profile/plugin/aie_profile/aie_profile_metadata.h index fc8f7adb1f3..2a25b20bf87 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_profile/aie_profile_metadata.h +++ b/src/runtime_src/xdp/profile/plugin/aie_profile/aie_profile_metadata.h @@ -28,6 +28,7 @@ #include "xdp/profile/database/static_info/aie_constructs.h" #include "xdp/profile/database/static_info/aie_util.h" #include "xdp/profile/database/static_info/filetypes/base_filetype_impl.h" +#include "xdp/profile/plugin/aie_base/aie_base_util.h" #include "xdp/profile/plugin/aie_profile/aie_profile_defs.h" namespace xdp { @@ -35,13 +36,6 @@ namespace xdp { // Forwadr declarations of XDP constructs struct LatencyConfig; -constexpr unsigned int NUM_CORE_COUNTERS = 4; -constexpr unsigned int NUM_MEMORY_COUNTERS = 2; -constexpr unsigned int NUM_SHIM_COUNTERS = 2; -constexpr unsigned int NUM_MEM_TILE_COUNTERS = 4; -constexpr unsigned int NUM_UC_EVENT_COUNTERS = 5; -constexpr unsigned int NUM_UC_LATENCY_COUNTERS = 1; - class AieProfileMetadata { private: // Currently supporting core modules, memory modules, interface tiles, @@ -54,7 +48,8 @@ class AieProfileMetadata { "heat_map", "stalls", "execution", "floating_point", "stream_put_get", "aie_trace", "events", "write_throughputs", "read_throughputs", - "s2mm_throughputs", "mm2s_throughputs"} + "s2mm_throughputs", "mm2s_throughputs", + "stream_throughputs", "dma_throughputs"} }, { module_type::dma, { @@ -67,7 +62,8 @@ class AieProfileMetadata { "s2mm_throughputs", "mm2s_throughputs", "input_stalls", "output_stalls", "s2mm_stalls", "mm2s_stalls", "packets", METRIC_BYTE_COUNT, - "uc_dma_activity", "uc_axis_throughputs", "uc_core"} + "uc_dma_activity", "uc_axis_throughputs", "uc_core", + "throughputs", "dma_throughputs", "trace_dma"} }, { module_type::mem_tile, { @@ -76,7 +72,7 @@ class AieProfileMetadata { "output_channels", "output_channels_details", "output_throughputs", "mm2s_channels", "mm2s_channels_details", "mm2s_throughputs", "memory_stats", "mem_trace", "conflict_stats1", "conflict_stats2", - "conflict_stats3", "conflict_stats4"} + "conflict_stats3", "conflict_stats4", "throughputs"} }, { module_type::uc, { @@ -89,9 +85,6 @@ class AieProfileMetadata { const std::string defaultSets[NUM_MODULES] = {"s2mm_throughputs", "s2mm_throughputs", "s2mm_throughputs", "s2mm_throughputs", "execution"}; - const int numCountersMod[NUM_MODULES] = - {NUM_CORE_COUNTERS, NUM_MEMORY_COUNTERS, NUM_SHIM_COUNTERS, - NUM_MEM_TILE_COUNTERS, NUM_UC_EVENT_COUNTERS+NUM_UC_LATENCY_COUNTERS}; const module_type moduleTypes[NUM_MODULES] = {module_type::core, module_type::dma, module_type::shim, module_type::mem_tile, module_type::uc}; @@ -156,7 +149,9 @@ class AieProfileMetadata { bool checkModule(const int module) { return (module >= 0 && module < NUM_MODULES);} std::string getModuleName(const int module) { return moduleNames[module]; } - int getNumCountersMod(const int module){ return numCountersMod[module]; } + int getNumCountersMod(const int module) { + return aie::getNumCounters(getHardwareGen(), getModuleType(module)); + } module_type getModuleType(const int module) { return moduleTypes[module]; } uint8_t getAIETileRowOffset() const { return metadataReader == nullptr ? 0 : metadataReader->getAIETileRowOffset(); } diff --git a/src/runtime_src/xdp/profile/plugin/aie_profile/aie_profile_plugin.cpp b/src/runtime_src/xdp/profile/plugin/aie_profile/aie_profile_plugin.cpp index fbba8fdb5bf..0d42422417f 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_profile/aie_profile_plugin.cpp +++ b/src/runtime_src/xdp/profile/plugin/aie_profile/aie_profile_plugin.cpp @@ -21,11 +21,15 @@ #include "xdp/profile/device/utility.h" #include "xdp/profile/device/xdp_base_device.h" #include "xdp/profile/plugin/vp_base/info.h" +#include "xdp/profile/plugin/aie_base/aie_base_util.h" #include "xdp/profile/writer/aie_profile/aie_writer.h" -#ifdef XDP_CLIENT_BUILD +#ifdef XDP_NPU3_BUILD +#include "client/aie_profile.h" +#include "client/aie_profile_npu3.h" +#elif XDP_CLIENT_BUILD #include "client/aie_profile.h" -#elif defined(XRT_X86_BUILD) +#elif XRT_X86_BUILD #include "x86/aie_profile.h" #elif XDP_VE2_BUILD #include "ve2/aie_profile.h" @@ -159,8 +163,13 @@ namespace xdp { #ifdef XDP_CLIENT_BUILD xrt::hw_context context = xrt_core::hw_context_int::create_hw_context_from_implementation(handle); AIEData.metadata->setHwContext(context); - AIEData.implementation = std::make_unique(db, AIEData.metadata); -#elif defined(XRT_X86_BUILD) + #ifdef XDP_NPU3_BUILD + if (aie::isNPU3(AIEData.metadata->getHardwareGen())) + AIEData.implementation = std::make_unique(db, AIEData.metadata); + else + #endif + AIEData.implementation = std::make_unique(db, AIEData.metadata); +#elif XRT_X86_BUILD AIEData.implementation = std::make_unique(db, AIEData.metadata); #elif XDP_VE2_BUILD AIEData.implementation = std::make_unique(db, AIEData.metadata); @@ -254,10 +263,11 @@ auto time = std::time(nullptr); { xrt_core::message::send(severity_level::info, "XRT", "Calling AIE Profile endPoll."); - #ifdef XDP_CLIENT_BUILD - auto& AIEData = handleToAIEData.begin()->second; - AIEData.implementation->poll(0); - #endif +#ifdef XDP_CLIENT_BUILD + auto& AIEData = handleToAIEData.begin()->second; + AIEData.implementation->poll(0); +#endif + // Ask all threads to end for (auto& p : handleToAIEData) { if (p.second.implementation) diff --git a/src/runtime_src/xdp/profile/plugin/aie_profile/client/aie_profile_npu3.cpp b/src/runtime_src/xdp/profile/plugin/aie_profile/client/aie_profile_npu3.cpp new file mode 100755 index 00000000000..cc2053b289f --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_profile/client/aie_profile_npu3.cpp @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved + +#define XDP_PLUGIN_SOURCE + +#include "aie_profile_npu3.h" + +#include +#include + +#include +#include +#include +#include + +#include "core/common/api/bo_int.h" +#include "core/common/message.h" +#include "core/common/time.h" +#include "core/include/xrt/xrt_kernel.h" + +#include "core/include/xrt/experimental/xrt_elf.h" +#include "core/include/xrt/experimental/xrt_ext.h" +#include "core/include/xrt/experimental/xrt_module.h" + +#include "xdp/profile/database/database.h" +#include "xdp/profile/database/static_info/aie_constructs.h" +#include "xdp/profile/database/static_info/pl_constructs.h" +#include "xdp/profile/plugin/aie_base/aie_base_util.h" +#include "xdp/profile/plugin/aie_profile/aie_profile_defs.h" +#include "xdp/profile/plugin/aie_profile/util/aie_profile_util.h" +#include "xdp/profile/plugin/vp_base/info.h" + +// XRT headers +#include "xrt/xrt_bo.h" +#include "core/common/shim/hwctx_handle.h" + +#ifdef _WIN32 +#include +#endif + +namespace xdp { + using severity_level = xrt_core::message::severity_level; + using tile_type = xdp::tile_type; + using module_type = xdp::module_type; + + AieProfile_NPU3Impl::AieProfile_NPU3Impl(VPDatabase* database, + std::shared_ptr metadata + ) + : AieProfileImpl(database, metadata) + { + auto hwGen = metadata->getHardwareGen(); + + coreStartEvents = aie::profile::getCoreEventSets(hwGen); + coreEndEvents = coreStartEvents; + + memoryStartEvents = aie::profile::getMemoryEventSets(hwGen); + memoryEndEvents = memoryStartEvents; + + shimStartEvents = aie::profile::getInterfaceTileEventSets(hwGen); + shimEndEvents = shimStartEvents; + + memTileStartEvents = aie::profile::getMemoryTileEventSets(hwGen); + memTileEndEvents = memTileStartEvents; + + //auto context = metadata->getHwContext(); + //transactionHandler = std::make_unique(context, "AIE Profile Setup"); + tranxHandler = std::make_unique(); + } + + void AieProfile_NPU3Impl::updateDevice() + { + setMetricsSettings(metadata->getDeviceID()); + } + + bool AieProfile_NPU3Impl::setMetricsSettings(const uint64_t deviceId) + { + xrt_core::message::send(severity_level::info, "XRT", "Setting AIE Profile Metrics Settings."); + + int counterId = 0; + bool runtimeCounters = false; + + xdp::aie::driver_config meta_config = metadata->getAIEConfigMetadata(); + + XAie_Config cfg { + meta_config.hw_gen, + meta_config.base_address, + meta_config.column_shift, + meta_config.row_shift, + meta_config.num_rows, + meta_config.num_columns, + meta_config.shim_row, + 0, + 1, + meta_config.mem_row_start, + meta_config.mem_num_rows, + meta_config.aie_tile_row_start, + meta_config.aie_tile_num_rows, + {0} // PartProp + }; + + auto RC = XAie_CfgInitialize(&aieDevInst, &cfg); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::warning, "XRT", "AIE Driver Initialization Failed."); + return false; + } + + std::string tranxName = "AieProfileMetrics"; + + // Get partition columns + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(metadata->getHandle()); + // Currently, assuming only one Hw Context is alive at a time + //uint8_t startCol = static_cast(aiePartitionPt.front().second.get("start_col")); + uint8_t startCol = 0; + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Starting transaction " + tranxName); + + // Initialize transaction + if (!tranxHandler->initializeTransaction(&aieDevInst, tranxName)) + return false; + + auto configChannel0 = metadata->getConfigChannel0(); + auto configChannel1 = metadata->getConfigChannel1(); + + for (int module = 0; module < metadata->getNumModules(); ++module) { + std::cout << "Configuring profiling for module " << module << std::endl; + + XAie_ModuleType mod = aie::profile::getFalModuleType(module); + // Iterate over tiles and metrics to configure all desired counters + for (auto& tileMetric : metadata->getConfigMetrics(module)) { + int numCounters = 0; + + auto& metricSet = tileMetric.second; + auto tile = tileMetric.first; + auto row = tile.row; + auto col = tile.col; + auto subtype = tile.subtype; + auto type = aie::getModuleType(row, metadata->getAIETileRowOffset()); + + std::cout << "Configuring profiling for tile (" << +col << "," << +row + << ") using metric set " << metricSet << std::endl; + + // Ignore invalid types and inactive modules + // NOTE: Inactive core modules are configured when utilizing + // stream switch monitor ports to profile DMA channels + if (!aie::profile::isValidType(type, mod)) + continue; + if ((type == module_type::dma) && !tile.active_memory) + continue; + if ((type == module_type::core) && !tile.active_core) { + if (metadata->getPairModuleIndex(metricSet, type) < 0) + continue; + } + + //std::cout << "Getting sets and modifying events..." << std::endl; + + auto loc = XAie_TileLoc(col, row); + auto startEvents = (type == module_type::core) ? coreStartEvents[metricSet] + : ((type == module_type::dma) ? memoryStartEvents[metricSet] + : ((type == module_type::shim) ? shimStartEvents[metricSet] + : memTileStartEvents[metricSet])); + auto endEvents = (type == module_type::core) ? coreEndEvents[metricSet] + : ((type == module_type::dma) ? memoryEndEvents[metricSet] + : ((type == module_type::shim) ? shimEndEvents[metricSet] + : memTileEndEvents[metricSet])); + + uint8_t numFreeCtr = static_cast(startEvents.size()); + std::vector Regs = regValues.at(type); + + auto iter0 = configChannel0.find(tile); + auto iter1 = configChannel1.find(tile); + uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; + uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; + // TODO: for now, hard-code channels 2 and 3 + std::vector channels = {channel0, channel1, 2, 3}; + + // Modify events as needed + aie::profile::modifyEvents(type, subtype, channel0, startEvents, metadata->getHardwareGen()); + endEvents = startEvents; + + configEventSelections(loc, type, metricSet, channels); + + // Request and configure all available counters for this tile + for (uint8_t i = 0; i < numFreeCtr; i++) { + //std::cout << "Configuring counter " << +i << std::endl; + + auto startEvent = startEvents.at(i); + auto endEvent = endEvents.at(i); + uint8_t resetEvent = 0; + + // No resource manager, so manually manage the counters + RC = XAie_PerfCounterReset(&aieDevInst, loc, mod, i); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::error, "XRT", "AIE Performance Counter Reset Failed."); + break; + } + RC = XAie_PerfCounterControlSet(&aieDevInst, loc, mod, i, startEvent, endEvent); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::error, "XRT", "AIE Performance Counter Set Failed."); + break; + } + + aie::profile::configGroupEvents(&aieDevInst, loc, mod, type, metricSet, startEvent, channel0); + if (aie::isStreamSwitchPortEvent(startEvent)) + configStreamSwitchPorts(tileMetric.first, loc, type, metricSet, channel0, startEvent); + + // Convert enums to physical event IDs for reporting purposes + uint16_t tmpStart; + uint16_t tmpEnd; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, startEvent, &tmpStart); + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, endEvent, &tmpEnd); + uint16_t phyStartEvent = tmpStart + aie::profile::getCounterBase(type); + uint16_t phyEndEvent = tmpEnd + aie::profile::getCounterBase(type); + // auto payload = getCounterPayload(tileMetric.first, type, col, row, + // startEvent, metricSet, channel0); + auto payload = channel0; + + // Store counter info in database + std::string counterName = "AIE Counter" + std::to_string(counterId); + (db->getStaticInfo()).addAIECounter(deviceId, counterId, col, row, i, + phyStartEvent, phyEndEvent, resetEvent, payload, metadata->getClockFreqMhz(), + metadata->getModuleName(module), counterName); + + // NOTE: NPU3 has unique addressing, so get offsets from driver + auto tileOffset = XAie_GetTileAddr(&aieDevInst, row, col); + op_profile_data.emplace_back((u32)(Regs[i] + tileOffset)); + + std::vector values; + uint8_t absCol = col + startCol; + values.insert(values.end(), {absCol, row, phyStartEvent, phyEndEvent, + resetEvent, 0, 0, payload}); + outputValues.push_back(values); + + counterId++; + numCounters++; + } + + std::stringstream msg; + msg << "Reserved " << numCounters << " counters for profiling AIE tile (" << +col << "," + << +row << ") using metric set " << metricSet << " and channel " << +channel0 << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + // numTileCounters[numCounters]++; + } + runtimeCounters = true; + } // modules + + //xrt::hw_context hwContext = xrt_core::hw_context_int::create_hw_context_from_implementation(hwCtxImpl); + auto hwContext = metadata->getHwContext(); + tranxHandler->submitTransaction(&aieDevInst, hwContext); + + xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE Profiling."); + return runtimeCounters; + } // setMetricsSettings + + /**************************************************************************** + * Configure selection index to monitor channel numbers + * NOTE: In NPU3, this is required in memory and interface tiles + ***************************************************************************/ + void + AieProfile_NPU3Impl::configEventSelections(const XAie_LocType loc, const module_type type, + const std::string metricSet, std::vector& channels) + { + if ((type != module_type::mem_tile) && (type != module_type::shim)) + return; + + XAie_DmaDirection dmaDir = aie::isInputSet(type, metricSet) ? DMA_S2MM : DMA_MM2S; + uint8_t numChannels = ((type == module_type::shim) && (dmaDir == DMA_MM2S)) + ? NUM_CHANNEL_SELECTS_SHIM_NPU3 : NUM_CHANNEL_SELECTS; + + if (aie::isDebugVerbosity()) { + std::string tileType = (type == module_type::shim) ? "interface" : "memory"; + std::string dmaType = (dmaDir == DMA_S2MM) ? "S2MM" : "MM2S"; + std::stringstream channelsStr; + std::copy(channels.begin(), channels.end(), std::ostream_iterator(channelsStr, ", ")); + + std::string msg = "Configuring event selections for " + tileType + " tile DMA " + + dmaType + " channels " + channelsStr.str(); + xrt_core::message::send(severity_level::debug, "XRT", msg); + } + + for (uint8_t c = 0; c < numChannels; ++c) + XAie_EventSelectDmaChannel(&aieDevInst, loc, c, dmaDir, channels.at(c)); + } + + /**************************************************************************** + * Configure stream switch ports for monitoring purposes + * NOTE: Used to monitor streams: trace, interfaces, and memory tiles + ***************************************************************************/ + void + AieProfile_NPU3Impl::configStreamSwitchPorts(const tile_type& tile, const XAie_LocType& loc, + const module_type& type, const std::string& metricSet, + const uint8_t channel, const XAie_Events startEvent) + { + // Hardcoded + uint8_t rscId = 0; + uint8_t portnum = aie::getPortNumberFromEvent(startEvent); + // AIE Tiles (e.g., trace streams) + if (type == module_type::core) { + auto slaveOrMaster = (metricSet.find("mm2s") != std::string::npos) ? + XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, DMA, channel); + std::stringstream msg; + msg << "Configured core tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") + << " stream switch ports for metricset " << metricSet << " and channel " << (int)channel << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + return; + } + + // Interface tiles (e.g., PLIO, GMIO) + if (type == module_type::shim) { + // NOTE: skip configuration of extra ports for tile if stream_ids are not available. + if (portnum >= tile.stream_ids.size()) + return; + // Grab slave/master and stream ID + // NOTE: stored in getTilesForProfiling() above + auto slaveOrMaster = (tile.is_master_vec.at(portnum) == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + uint8_t streamPortId = static_cast(tile.stream_ids.at(portnum)); + + // auto streamPortId = tile.stream_id; + // Define stream switch port to monitor interface + XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, SOUTH, streamPortId); + std::stringstream msg; + msg << "Configured shim tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") << " stream switch ports for metricset " << metricSet << " and stream port id " << (int)streamPortId << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + return; + } + + if (type == module_type::mem_tile) { + auto slaveOrMaster = (metricSet.find("mm2s") != std::string::npos) ? + XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, DMA, channel); + std::stringstream msg; + msg << "Configured mem tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") << " stream switch ports for metricset " << metricSet << " and channel " << (int)channel << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + } + } + + void AieProfile_NPU3Impl::poll(const uint64_t id) + { + id; + if (finishedPoll) + return; + + if (db->infoAvailable(xdp::info::ml_timeline)) { + db->broadcast(VPDatabase::MessageType::READ_RECORD_TIMESTAMPS, nullptr); + xrt_core::message::send(severity_level::debug, "XRT", "Done reading recorded timestamps."); + } + + auto context = metadata->getHwContext(); + xrt::bo resultBO; + uint32_t* output = nullptr; + std::map activeUCsegmentMap; + activeUCsegmentMap[0] = 0x20000; + try { + //resultBO = xrt_core::bo_int::create_debug_bo(context, 0x20000); + resultBO = xrt_core::bo_int::create_bo(context, 0x20000, xrt_core::bo_int::use_type::debug); + xrt_core::bo_int::config_bo(resultBO, activeUCsegmentMap); + output = resultBO.map(); + memset(output, 0, 0x20000); + } + catch (std::exception& e) { + std::stringstream msg; + msg << "Unable to create 128KB buffer for AIE Profile results. Cannot get AIE Profile info. " << e.what() << std::endl; + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg.str()); + return; + } + + + std::string tranxName = "AieProfilePoll"; + if (!tranxHandler->initializeTransaction(&aieDevInst, tranxName)) { + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Unable to initialize transaction for AIE profile polling."); + return; + } + for (u32 i = 0; i < op_profile_data.size(); i++) { + XAie_SaveRegister(&aieDevInst, op_profile_data[i], op_profile_data[i]); + } + if (!tranxHandler->submitTransaction(&aieDevInst, context)) + return; + + resultBO.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + output = resultBO.map(); + + for (u32 i = 0; i < op_profile_data.size() + 12 * 3; i++) { + std::stringstream msg; + msg << "Counter address/values: " << output[2 * i] << " - " << output[2 * i + 1]; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", msg.str()); + } + + finishedPoll = true; + } + + void AieProfile_NPU3Impl::freeResources() + { + + } +} // namespace xdp diff --git a/src/runtime_src/xdp/profile/plugin/aie_profile/client/aie_profile_npu3.h b/src/runtime_src/xdp/profile/plugin/aie_profile/client/aie_profile_npu3.h new file mode 100755 index 00000000000..83b87d53085 --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_profile/client/aie_profile_npu3.h @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved + +#ifndef AIE_PROFILE_NPU3_H +#define AIE_PROFILE_NPU3_H + +#include +#include + +#include "xdp/profile/database/static_info/aie_constructs.h" +#include "xdp/profile/plugin/aie_profile/aie_profile_defs.h" +#include "xdp/profile/plugin/aie_profile/aie_profile_impl.h" +#include "xdp/profile/device/common/npu3/npu3_transaction.h" +#include "xdp/profile/plugin/aie_base/generations/npu3_registers.h" + +extern "C" { +#include +#include +} + +namespace xdp { + + class AieProfile_NPU3Impl : public AieProfileImpl { + public: + AieProfile_NPU3Impl(VPDatabase* database, std::shared_ptr metadata); + ~AieProfile_NPU3Impl() = default; + + void updateDevice(); + + void startPoll(const uint64_t /*id*/) override {} + void continuePoll(const uint64_t /*id*/) override {} + void poll(const uint64_t id) override; + void endPoll() override {} + + void freeResources(); + bool setMetricsSettings(const uint64_t deviceId); + + void configEventSelections(const XAie_LocType loc, const module_type type, + const std::string metricSet, std::vector& channels); + void configStreamSwitchPorts(const tile_type& tile, const XAie_LocType& loc, + const module_type& type, const std::string& metricSet, + const uint8_t channel, const XAie_Events startEvent); + private: + const std::vector falModuleTypes = { + XAIE_CORE_MOD, + XAIE_MEM_MOD, + XAIE_PL_MOD, + XAIE_MEM_MOD + }; + + const std::map> regValues { +#ifdef XDP_NPU3_BUILD + {module_type::core, {npu3::cm_performance_counter0, npu3::cm_performance_counter1, + npu3::cm_performance_counter2, npu3::cm_performance_counter3, + npu3::cm_performance_counter4, npu3::cm_performance_counter5, + npu3::cm_performance_counter6, npu3::cm_performance_counter7, + npu3::cm_performance_counter8, npu3::cm_performance_counter9, + npu3::cm_performance_counter10, npu3::cm_performance_counter11}}, + {module_type::dma, {}}, + {module_type::shim, {npu3::shim_performance_counter0, npu3::shim_performance_counter1, + npu3::shim_performance_counter2, npu3::shim_performance_counter3, + npu3::shim_performance_counter4, npu3::shim_performance_counter5, + npu3::shim_performance_counter6, npu3::shim_performance_counter7, + npu3::shim_performance_counter8, npu3::shim_performance_counter9, + npu3::shim_performance_counter10, npu3::shim_performance_counter11}}, + {module_type::mem_tile, {npu3::mem_performance_counter0, npu3::mem_performance_counter1, + npu3::mem_performance_counter2, npu3::mem_performance_counter3, + npu3::mem_performance_counter4, npu3::mem_performance_counter5, + npu3::mem_performance_counter6, npu3::mem_performance_counter7, + npu3::mem_performance_counter8, npu3::mem_performance_counter9, + npu3::mem_performance_counter10, npu3::mem_performance_counter11}} +#endif + }; + + std::map> coreStartEvents; + std::map> coreEndEvents; + std::map> memoryStartEvents; + std::map> memoryEndEvents; + std::map> shimStartEvents; + std::map> shimEndEvents; + std::map> memTileStartEvents; + std::map> memTileEndEvents; + + bool finishedPoll = false; + std::vector op_profile_data; + XAie_DevInst aieDevInst = {0}; + std::vector> outputValues; + std::unique_ptr tranxHandler; + }; + +} // namespace xdp + +#endif diff --git a/src/runtime_src/xdp/profile/plugin/aie_profile/util/aie_profile_util.cpp b/src/runtime_src/xdp/profile/plugin/aie_profile/util/aie_profile_util.cpp old mode 100644 new mode 100755 index 0915c9e408c..222809c73d0 --- a/src/runtime_src/xdp/profile/plugin/aie_profile/util/aie_profile_util.cpp +++ b/src/runtime_src/xdp/profile/plugin/aie_profile/util/aie_profile_util.cpp @@ -6,6 +6,7 @@ #include "xdp/profile/plugin/aie_profile/util/aie_profile_util.h" #include "xdp/profile/database/static_info/aie_util.h" #include "xdp/profile/plugin/aie_base/aie_base_util.h" +#include "xdp/profile/plugin/aie_base/generations/aie_generations.h" #include #include @@ -26,33 +27,74 @@ namespace xdp::aie::profile { std::map> getCoreEventSets(const int hwGen) { std::map> eventSets; + + int numCounters = xdp::aie::getNumCounters(hwGen, xdp::module_type::core); + + // Consistent sets across generations eventSets = { - {"heat_map", {XAIE_EVENT_ACTIVE_CORE, XAIE_EVENT_GROUP_CORE_STALL_CORE, - XAIE_EVENT_INSTR_VECTOR_CORE, XAIE_EVENT_GROUP_CORE_PROGRAM_FLOW_CORE}}, - {"stalls", {XAIE_EVENT_MEMORY_STALL_CORE, XAIE_EVENT_STREAM_STALL_CORE, - XAIE_EVENT_LOCK_STALL_CORE, XAIE_EVENT_CASCADE_STALL_CORE}}, - {"execution", {XAIE_EVENT_INSTR_VECTOR_CORE, XAIE_EVENT_INSTR_LOAD_CORE, - XAIE_EVENT_INSTR_STORE_CORE, XAIE_EVENT_GROUP_CORE_PROGRAM_FLOW_CORE}}, - {"stream_put_get", {XAIE_EVENT_INSTR_CASCADE_GET_CORE, XAIE_EVENT_INSTR_CASCADE_PUT_CORE, - XAIE_EVENT_INSTR_STREAM_GET_CORE, XAIE_EVENT_INSTR_STREAM_PUT_CORE}}, - {"write_throughputs", {XAIE_EVENT_ACTIVE_CORE, XAIE_EVENT_INSTR_STREAM_PUT_CORE, - XAIE_EVENT_INSTR_CASCADE_PUT_CORE, XAIE_EVENT_GROUP_CORE_STALL_CORE}}, - {"read_throughputs", {XAIE_EVENT_ACTIVE_CORE, XAIE_EVENT_INSTR_STREAM_GET_CORE, - XAIE_EVENT_INSTR_CASCADE_GET_CORE, XAIE_EVENT_GROUP_CORE_STALL_CORE}}, - {"s2mm_throughputs", {XAIE_EVENT_PORT_RUNNING_0_CORE, XAIE_EVENT_PORT_STALLED_0_CORE}}, - {"mm2s_throughputs", {XAIE_EVENT_PORT_RUNNING_0_CORE, XAIE_EVENT_PORT_STALLED_0_CORE}}, - {"aie_trace", {XAIE_EVENT_PORT_RUNNING_0_CORE, XAIE_EVENT_PORT_STALLED_0_CORE, - XAIE_EVENT_PORT_RUNNING_1_CORE, XAIE_EVENT_PORT_STALLED_1_CORE}}, - {"events", {XAIE_EVENT_INSTR_EVENT_0_CORE, XAIE_EVENT_INSTR_EVENT_1_CORE, - XAIE_EVENT_USER_EVENT_0_CORE, XAIE_EVENT_USER_EVENT_1_CORE}} + {"aie_trace", {XAIE_EVENT_PORT_RUNNING_0_CORE, XAIE_EVENT_PORT_STALLED_0_CORE, + XAIE_EVENT_PORT_RUNNING_1_CORE, XAIE_EVENT_PORT_STALLED_1_CORE}}, + {"events", {XAIE_EVENT_INSTR_EVENT_0_CORE, XAIE_EVENT_INSTR_EVENT_1_CORE, + XAIE_EVENT_USER_EVENT_0_CORE, XAIE_EVENT_USER_EVENT_1_CORE}} }; - if (hwGen == 1) { - eventSets["floating_point"] = {XAIE_EVENT_FP_OVERFLOW_CORE, XAIE_EVENT_FP_UNDERFLOW_CORE, - XAIE_EVENT_FP_INVALID_CORE, XAIE_EVENT_FP_DIV_BY_ZERO_CORE}; + // Flexible sets based on number of counters + if (numCounters == 4) { + eventSets["heat_map"] = {XAIE_EVENT_ACTIVE_CORE, XAIE_EVENT_GROUP_CORE_STALL_CORE, + XAIE_EVENT_INSTR_VECTOR_CORE, XAIE_EVENT_GROUP_CORE_PROGRAM_FLOW_CORE}; + eventSets["stalls"] = {XAIE_EVENT_MEMORY_STALL_CORE, XAIE_EVENT_STREAM_STALL_CORE, + XAIE_EVENT_LOCK_STALL_CORE, XAIE_EVENT_CASCADE_STALL_CORE}; + eventSets["execution"] = {XAIE_EVENT_INSTR_VECTOR_CORE, XAIE_EVENT_INSTR_LOAD_CORE, + XAIE_EVENT_INSTR_STORE_CORE, XAIE_EVENT_GROUP_CORE_PROGRAM_FLOW_CORE}; + eventSets["stream_put_get"] = {XAIE_EVENT_INSTR_CASCADE_GET_CORE, XAIE_EVENT_INSTR_CASCADE_PUT_CORE, + XAIE_EVENT_INSTR_STREAM_GET_CORE, XAIE_EVENT_INSTR_STREAM_PUT_CORE}; + eventSets["write_throughputs"] = {XAIE_EVENT_ACTIVE_CORE, XAIE_EVENT_INSTR_STREAM_PUT_CORE, + XAIE_EVENT_INSTR_CASCADE_PUT_CORE, XAIE_EVENT_GROUP_CORE_STALL_CORE}; + eventSets["read_throughputs"] = {XAIE_EVENT_ACTIVE_CORE, XAIE_EVENT_INSTR_STREAM_GET_CORE, + XAIE_EVENT_INSTR_CASCADE_GET_CORE, XAIE_EVENT_GROUP_CORE_STALL_CORE}; + eventSets["s2mm_throughputs"] = {XAIE_EVENT_PORT_RUNNING_0_CORE, XAIE_EVENT_PORT_STALLED_0_CORE}; + eventSets["mm2s_throughputs"] = {XAIE_EVENT_PORT_RUNNING_0_CORE, XAIE_EVENT_PORT_STALLED_0_CORE}; + eventSets["stream_throughputs"] = {}; + eventSets["dma_throughputs"] = {}; + } + else if (numCounters == 12) { +#ifdef XDP_NPU3_BUILD + eventSets["heat_map"] = {XAIE_EVENT_ACTIVE_CORE, XAIE_EVENT_MEMORY_STALL_CORE, + XAIE_EVENT_STREAM_STALL_CORE, XAIE_EVENT_LOCK_STALL_CORE, + XAIE_EVENT_CASCADE_STALL_CORE, XAIE_EVENT_INSTR_VECTOR_CORE, + XAIE_EVENT_INSTR_MATRIX_CORE, XAIE_EVENT_INSTR_MOVE_CORE, + XAIE_EVENT_INSTR_ALU_CORE, XAIE_EVENT_INSTR_LOAD_CORE, + XAIE_EVENT_INSTR_LOAD_B_CORE, XAIE_EVENT_INSTR_STORE_CORE}; + eventSets["stalls"] = {}; + eventSets["execution"] = {}; + eventSets["stream_throughputs"] = {XAIE_EVENT_ACTIVE_CORE, XAIE_EVENT_INSTR_STREAM_GET_CORE, + XAIE_EVENT_INSTR_STREAM_GET_1_CORE, XAIE_EVENT_INSTR_STREAM_PUT_CORE, + XAIE_EVENT_INSTR_CASCADE_GET_CORE, XAIE_EVENT_INSTR_CASCADE_PUT_CORE}; + eventSets["stream_put_get"] = {}; + eventSets["read_throughputs"] = {}; + eventSets["write_throughputs"] = {}; + eventSets["dma_throughputs"] = {XAIE_EVENT_DMA_S2MM_0_RUNNING_CORE, XAIE_EVENT_DMA_S2MM_0_FINISHED_BD_CORE, + XAIE_EVENT_DMA_S2MM_0_STALLED_LOCK_CORE, + XAIE_EVENT_DMA_S2MM_0_MEMORY_BACKPRESSURE_CORE, + XAIE_EVENT_DMA_S2MM_1_RUNNING_CORE, XAIE_EVENT_DMA_S2MM_1_FINISHED_BD_CORE, + XAIE_EVENT_DMA_S2MM_1_STALLED_LOCK_CORE, + XAIE_EVENT_DMA_S2MM_1_MEMORY_BACKPRESSURE_CORE, + XAIE_EVENT_DMA_MM2S_0_RUNNING_CORE, XAIE_EVENT_DMA_MM2S_0_FINISHED_BD_CORE, + XAIE_EVENT_DMA_MM2S_0_STREAM_BACKPRESSURE_CORE, + XAIE_EVENT_DMA_MM2S_0_MEMORY_STARVATION_CORE, + }; + eventSets["s2mm_throughputs"] = {}; + eventSets["mm2s_throughputs"] = {}; +#endif + } + + // Floating point events are generation-specific + if (xdp::aie::isAIE1(hwGen)) { + eventSets["floating_point"] = {XAIE_EVENT_FP_OVERFLOW_CORE, XAIE_EVENT_FP_UNDERFLOW_CORE, + XAIE_EVENT_FP_INVALID_CORE, XAIE_EVENT_FP_DIV_BY_ZERO_CORE}; } else { - eventSets["floating_point"] = {XAIE_EVENT_FP_HUGE_CORE, XAIE_EVENT_INT_FP_0_CORE, - XAIE_EVENT_FP_INVALID_CORE, XAIE_EVENT_FP_INF_CORE}; + eventSets["floating_point"] = {XAIE_EVENT_FP_HUGE_CORE, XAIE_EVENT_INT_FP_0_CORE, + XAIE_EVENT_FP_INVALID_CORE, XAIE_EVENT_FP_INF_CORE}; } return eventSets; @@ -67,12 +109,19 @@ namespace xdp::aie::profile { { std::map> eventSets; + // Verify number of memory module counters + int numCounters = xdp::aie::getNumCounters(hwGen, xdp::module_type::dma); + if (numCounters == 0) + return eventSets; + + // Consistent sets across generations eventSets = { - {"conflicts", {XAIE_EVENT_GROUP_MEMORY_CONFLICT_MEM, XAIE_EVENT_GROUP_ERRORS_MEM}}, - {"dma_locks", {XAIE_EVENT_GROUP_DMA_ACTIVITY_MEM, XAIE_EVENT_GROUP_LOCK_MEM}} + {"conflicts", {XAIE_EVENT_GROUP_MEMORY_CONFLICT_MEM, XAIE_EVENT_GROUP_ERRORS_MEM}}, + {"dma_locks", {XAIE_EVENT_GROUP_DMA_ACTIVITY_MEM, XAIE_EVENT_GROUP_LOCK_MEM}} }; - if (hwGen == 1) { + // DMA events are generation-specific + if (xdp::aie::isAIE1(hwGen)) { eventSets["dma_stalls_s2mm"] = {XAIE_EVENT_DMA_S2MM_0_STALLED_LOCK_ACQUIRE_MEM, XAIE_EVENT_DMA_S2MM_1_STALLED_LOCK_ACQUIRE_MEM}; eventSets["dma_stalls_mm2s"] = {XAIE_EVENT_DMA_MM2S_0_STALLED_LOCK_ACQUIRE_MEM, @@ -100,15 +149,28 @@ namespace xdp::aie::profile { ***************************************************************************/ std::map> getInterfaceTileEventSets(const int hwGen) { + // Number of shim counters is generation-specific + int numCounters = xdp::aie::getNumCounters(hwGen, xdp::module_type::shim); + std::map> eventSets; + + // Define profile API sets + // TODO: modify these to be generation-specific eventSets = { - {"packets", {XAIE_EVENT_PORT_TLAST_0_PL, XAIE_EVENT_PORT_TLAST_1_PL}}, - {"input_throughputs", {XAIE_EVENT_GROUP_DMA_ACTIVITY_PL, XAIE_EVENT_PORT_RUNNING_0_PL}}, - {"output_throughputs", {XAIE_EVENT_GROUP_DMA_ACTIVITY_PL, XAIE_EVENT_PORT_RUNNING_0_PL}}, - {METRIC_BYTE_COUNT, {XAIE_EVENT_PORT_RUNNING_0_PL, XAIE_EVENT_PORT_RUNNING_0_PL}}, - {METRIC_LATENCY, {XAIE_EVENT_PORT_RUNNING_0_PL, XAIE_EVENT_PORT_RUNNING_0_PL}}, + {METRIC_BYTE_COUNT, {XAIE_EVENT_PORT_RUNNING_0_PL, XAIE_EVENT_PORT_RUNNING_0_PL}}, + {METRIC_LATENCY, {XAIE_EVENT_PORT_RUNNING_0_PL, XAIE_EVENT_PORT_RUNNING_0_PL}}, }; + // Flexible sets based on number of counters + if (numCounters == 2) { + eventSets["packets"] = {XAIE_EVENT_PORT_TLAST_0_PL, + XAIE_EVENT_PORT_TLAST_1_PL}; + eventSets["input_throughputs"] = {XAIE_EVENT_GROUP_DMA_ACTIVITY_PL, + XAIE_EVENT_PORT_RUNNING_0_PL}; + eventSets["output_throughputs"] = {XAIE_EVENT_GROUP_DMA_ACTIVITY_PL, + XAIE_EVENT_PORT_RUNNING_0_PL}; + } + if (aie::isAIE1(hwGen)) { eventSets["input_stalls"] = {XAIE_EVENT_PORT_STALLED_0_PL, XAIE_EVENT_PORT_IDLE_0_PL}; @@ -126,47 +188,119 @@ namespace xdp::aie::profile { #endif } else { - eventSets["input_stalls"] = {XAIE_EVENT_DMA_MM2S_0_STREAM_BACKPRESSURE_PL, - XAIE_EVENT_DMA_MM2S_0_MEMORY_STARVATION_PL}; - eventSets["output_stalls"] = {XAIE_EVENT_DMA_S2MM_0_MEMORY_BACKPRESSURE_PL, - XAIE_EVENT_DMA_S2MM_0_STALLED_LOCK_PL}; + std::vector tlasts = {XAIE_EVENT_PORT_TLAST_0_PL, XAIE_EVENT_PORT_TLAST_1_PL, + XAIE_EVENT_PORT_TLAST_2_PL, XAIE_EVENT_PORT_TLAST_3_PL, + XAIE_EVENT_PORT_TLAST_4_PL, XAIE_EVENT_PORT_TLAST_5_PL}; + std::vector stalled = {XAIE_EVENT_PORT_STALLED_0_PL, XAIE_EVENT_PORT_STALLED_1_PL, + XAIE_EVENT_PORT_STALLED_2_PL, XAIE_EVENT_PORT_STALLED_3_PL, + XAIE_EVENT_PORT_STALLED_4_PL, XAIE_EVENT_PORT_STALLED_5_PL}; + std::vector running = {XAIE_EVENT_PORT_RUNNING_0_PL, XAIE_EVENT_PORT_RUNNING_1_PL, + XAIE_EVENT_PORT_RUNNING_2_PL, XAIE_EVENT_PORT_RUNNING_3_PL, + XAIE_EVENT_PORT_RUNNING_4_PL, XAIE_EVENT_PORT_RUNNING_5_PL}; + + for (int c=0; c < numCounters/2; ++c) { + eventSets["packets"].push_back(tlasts.at(c)); + eventSets["input_throughputs"].push_back(stalled.at(c)); + eventSets["input_throughputs"].push_back(running.at(c)); + eventSets["output_throughputs"].push_back(stalled.at(c)); + eventSets["output_throughputs"].push_back(running.at(c)); + } + } + + // Stall events are generation-specific + if (xdp::aie::isAIE1(hwGen)) { + eventSets["input_stalls"] = {XAIE_EVENT_PORT_STALLED_0_PL, + XAIE_EVENT_PORT_IDLE_0_PL}; + eventSets["output_stalls"] = {XAIE_EVENT_PORT_STALLED_0_PL, + XAIE_EVENT_PORT_IDLE_0_PL}; + } + else if (xdp::aie::isAIE2(hwGen)) { + eventSets["input_stalls"] = {XAIE_EVENT_DMA_MM2S_0_STREAM_BACKPRESSURE_PL, + XAIE_EVENT_DMA_MM2S_0_MEMORY_STARVATION_PL}; + eventSets["output_stalls"] = {XAIE_EVENT_DMA_S2MM_0_MEMORY_BACKPRESSURE_PL, + XAIE_EVENT_DMA_S2MM_0_STALLED_LOCK_PL}; + } + else if (xdp::aie::isAIE2ps(hwGen)) { +#ifdef XDP_VE2_BUILD + eventSets["input_stalls"] = {XAIE_EVENT_NOC0_DMA_MM2S_0_STREAM_BACKPRESSURE_PL, + XAIE_EVENT_NOC0_DMA_MM2S_0_MEMORY_STARVATION_PL, + XAIE_EVENT_NOC0_DMA_MM2S_0_STALLED_LOCK_PL, + XAIE_EVENT_NOC0_DMA_MM2S_1_STREAM_BACKPRESSURE_PL, + XAIE_EVENT_NOC0_DMA_MM2S_1_MEMORY_STARVATION_PL, + XAIE_EVENT_NOC0_DMA_MM2S_1_STALLED_LOCK_PL}; + eventSets["output_stalls"] = {XAIE_EVENT_NOC0_DMA_S2MM_0_MEMORY_BACKPRESSURE_PL, + XAIE_EVENT_NOC0_DMA_S2MM_0_STREAM_STARVATION_PL, + XAIE_EVENT_NOC0_DMA_S2MM_0_STALLED_LOCK_PL, + XAIE_EVENT_NOC0_DMA_S2MM_1_MEMORY_BACKPRESSURE_PL, + XAIE_EVENT_NOC0_DMA_S2MM_1_STREAM_STARVATION_PL, + XAIE_EVENT_NOC0_DMA_S2MM_1_STALLED_LOCK_PL}; + eventSets["input_throughputs"] = {XAIE_EVENT_NOC0_DMA_MM2S_0_STREAM_BACKPRESSURE_PL, + XAIE_EVENT_NOC0_DMA_MM2S_0_MEMORY_STARVATION_PL, + XAIE_EVENT_PORT_RUNNING_0_PL, + XAIE_EVENT_NOC0_DMA_MM2S_1_STREAM_BACKPRESSURE_PL, + XAIE_EVENT_NOC0_DMA_MM2S_1_MEMORY_STARVATION_PL, + XAIE_EVENT_PORT_RUNNING_1_PL}; + eventSets["output_throughputs"] = {XAIE_EVENT_NOC0_DMA_S2MM_0_MEMORY_BACKPRESSURE_PL, + XAIE_EVENT_NOC0_DMA_S2MM_0_STALLED_LOCK_PL, + XAIE_EVENT_PORT_RUNNING_0_PL, + XAIE_EVENT_NOC0_DMA_S2MM_1_MEMORY_BACKPRESSURE_PL, + XAIE_EVENT_NOC0_DMA_S2MM_1_STALLED_LOCK_PL, + XAIE_EVENT_PORT_RUNNING_1_PL}; +#endif + } + else { +#ifdef XDP_NPU3_BUILD + eventSets["output_throughputs"] = {XAIE_EVENT_DMA_MM2S_0_STREAM_BACKPRESSURE_PL, + XAIE_EVENT_DMA_MM2S_0_MEMORY_STARVATION_PL, + XAIE_EVENT_DMA_MM2S_0_RUNNING_PL, + XAIE_EVENT_DMA_MM2S_1_STREAM_BACKPRESSURE_PL, + XAIE_EVENT_DMA_MM2S_1_MEMORY_STARVATION_PL, + XAIE_EVENT_DMA_MM2S_1_RUNNING_PL, + XAIE_EVENT_DMA_MM2S_2_STREAM_BACKPRESSURE_PL, + XAIE_EVENT_DMA_MM2S_2_MEMORY_STARVATION_PL, + XAIE_EVENT_DMA_MM2S_2_RUNNING_PL, + XAIE_EVENT_DMA_MM2S_3_STREAM_BACKPRESSURE_PL, + XAIE_EVENT_DMA_MM2S_3_MEMORY_STARVATION_PL, + XAIE_EVENT_DMA_MM2S_3_RUNNING_PL}; + eventSets["trace_dma"] = {XAIE_EVENT_DMA_TRACE_S2MM_MEMORY_BACKPRESSURE_PL, + XAIE_EVENT_DMA_TRACE_S2MM_STREAM_STARVATION_PL, + XAIE_EVENT_DMA_TRACE_S2MM_RUNNING_PL, + XAIE_EVENT_DMA_TRACE_S2MM_START_BD_PL, + XAIE_EVENT_DMA_TRACE_S2MM_FINISHED_BD_PL, + XAIE_EVENT_DMA_TRACE_S2MM_START_TASK_PL, + XAIE_EVENT_DMA_TRACE_S2MM_FINISHED_TASK_PL}; +#endif } // Microcontroller sets - if (hwGen >= 5) { -#ifdef XDP_CLIENT_BUILD - // eventSets["uc_dma_activity"] = { - // XAIE_EVENT_UC_DMA_DM2MM_A_FINISHED_BD, XAIE_EVENT_UC_DMA_DM2MM_A_LOCAL_MEMORY_STARVATION, - // XAIE_EVENT_UC_DMA_DM2MM_A_REMOTE_MEMORY_BACKPRESSURE, - // XAIE_EVENT_UC_DMA_MM2DM_A_FINISHED_BD, XAIE_EVENT_UC_DMA_MM2DM_A_LOCAL_MEMORY_STARVATION, - // XAIE_EVENT_UC_DMA_MM2DM_A_REMOTE_MEMORY_BACKPRESSURE}; - // eventSets["uc_axis_throughputs"] = { - // XAIE_EVENT_UC_CORE_AXIS_MASTER_RUNNING, XAIE_EVENT_UC_CORE_AXIS_MASTER_STALLED, - // XAIE_EVENT_UC_CORE_AXIS_MASTER_TLAST, - // XAIE_EVENT_UC_CORE_AXIS_SLAVE_RUNNING, XAIE_EVENT_UC_CORE_AXIS_SLAVE_STALLED, - // XAIE_EVENT_UC_CORE_AXIS_SLAVE_TLAST}; - // eventSets["uc_core"] = { - // XAIE_EVENT_UC_CORE_REG_WRITE, XAIE_EVENT_UC_CORE_JUMP_TAKEN, - // XAIE_EVENT_UC_CORE_DATA_READ, XAIE_EVENT_UC_CORE_DATA_WRITE, - // XAIE_EVENT_UC_CORE_STREAM_GET, XAIE_EVENT_UC_CORE_STREAM_PUT}; - eventSets["uc_dma_activity"] = {}; - eventSets["uc_axis_throughputs"] = {}; - eventSets["uc_core"] = {}; -#else + if (xdp::aie::isMicroSupported(hwGen)) { +#ifdef XDP_VE2_BUILD + eventSets["uc_core"] = { + XAIE_EVENT_CORE_REG_WRITE_UC, XAIE_EVENT_CORE_JUMP_TAKEN_UC, + XAIE_EVENT_CORE_DATA_READ_UC, XAIE_EVENT_CORE_DATA_WRITE_UC, + XAIE_EVENT_CORE_STREAM_GET_UC, XAIE_EVENT_CORE_STREAM_PUT_UC}; eventSets["uc_dma_activity"] = { XAIE_EVENT_DMA_DM2MM_FINISHED_BD_UC, XAIE_EVENT_DMA_DM2MM_LOCAL_MEMORY_STARVATION_UC, XAIE_EVENT_DMA_DM2MM_REMOTE_MEMORY_BACKPRESSURE_UC, - XAIE_EVENT_DMA_MM2DM_FINISHED_BD_UC, XAIE_EVENT_DMA_MM2DM_LOCAL_MEMORY_STARVATION_UC, - XAIE_EVENT_DMA_MM2DM_REMOTE_MEMORY_BACKPRESSURE_UC}; + XAIE_EVENT_DMA_MM2DM_FINISHED_BD_UC, XAIE_EVENT_DMA_MM2DM_LOCAL_MEMORY_BACKPRESSURE_UC, + XAIE_EVENT_DMA_MM2DM_REMOTE_MEMORY_STARVATION_UC}; + eventSets["uc_axis_throughputs"] = { + XAIE_EVENT_CORE_AXIS_MASTER_RUNNING_UC, XAIE_EVENT_CORE_AXIS_MASTER_STALLED_UC, + XAIE_EVENT_CORE_AXIS_MASTER_TLAST_UC, + XAIE_EVENT_CORE_AXIS_SLAVE_RUNNING_UC, XAIE_EVENT_CORE_AXIS_SLAVE_STALLED_UC, + XAIE_EVENT_CORE_AXIS_SLAVE_TLAST_UC}; +#elif XDP_NPU3_BUILD + eventSets["uc_core"] = {}; + eventSets["uc_dma_activity"] = { + XAIE_EVENT_DMA_DM2MM_FINISHED_BD_UC, XAIE_EVENT_DMA_DM2MM_LOCAL_MEMORY_STARVATION_UC, + XAIE_EVENT_DMA_DM2MM_REMOTE_MEMORY_BACKPRESSURE_UC, + XAIE_EVENT_DMA_MM2DM_FINISHED_BD_UC, XAIE_EVENT_DMA_MM2DM_B_LOCAL_MEMORY_BACKPRESSURE_UC, + XAIE_EVENT_DMA_MM2DM_B_REMOTE_MEMORY_STARVATION_UC}; eventSets["uc_axis_throughputs"] = { XAIE_EVENT_CORE_AXIS_MASTER_RUNNING_UC, XAIE_EVENT_CORE_AXIS_MASTER_STALLED_UC, XAIE_EVENT_CORE_AXIS_MASTER_TLAST_UC, XAIE_EVENT_CORE_AXIS_SLAVE_RUNNING_UC, XAIE_EVENT_CORE_AXIS_SLAVE_STALLED_UC, XAIE_EVENT_CORE_AXIS_SLAVE_TLAST_UC}; - eventSets["uc_core"] = { - XAIE_EVENT_CORE_REG_WRITE_UC, XAIE_EVENT_CORE_JUMP_TAKEN_UC, - XAIE_EVENT_CORE_DATA_READ_UC, XAIE_EVENT_CORE_DATA_WRITE_UC, - XAIE_EVENT_CORE_STREAM_GET_UC, XAIE_EVENT_CORE_STREAM_PUT_UC}; #endif } else { @@ -187,77 +321,129 @@ namespace xdp::aie::profile { ***************************************************************************/ std::map> getMemoryTileEventSets(const int hwGen) { + // Number of memory tile counters is generation-specific + int numCounters = xdp::aie::getNumCounters(hwGen, xdp::module_type::mem_tile); + std::map> eventSets; - eventSets = { - {"input_channels", {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, - XAIE_EVENT_PORT_STALLED_0_MEM_TILE, - XAIE_EVENT_PORT_TLAST_0_MEM_TILE, - XAIE_EVENT_DMA_S2MM_SEL0_FINISHED_BD_MEM_TILE}}, - {"input_channels_details", {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, - XAIE_EVENT_DMA_S2MM_SEL0_MEMORY_BACKPRESSURE_MEM_TILE, - XAIE_EVENT_DMA_S2MM_SEL0_STALLED_LOCK_ACQUIRE_MEM_TILE, - XAIE_EVENT_DMA_S2MM_SEL0_STREAM_STARVATION_MEM_TILE}}, - {"output_channels", {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, - XAIE_EVENT_PORT_STALLED_0_MEM_TILE, - XAIE_EVENT_PORT_TLAST_0_MEM_TILE, - XAIE_EVENT_DMA_MM2S_SEL0_FINISHED_BD_MEM_TILE}}, - {"output_channels_details", {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, - XAIE_EVENT_DMA_MM2S_SEL0_STREAM_BACKPRESSURE_MEM_TILE, - XAIE_EVENT_DMA_MM2S_SEL0_MEMORY_STARVATION_MEM_TILE, - XAIE_EVENT_DMA_MM2S_SEL0_STALLED_LOCK_ACQUIRE_MEM_TILE}}, - {"memory_stats", {XAIE_EVENT_GROUP_MEMORY_CONFLICT_MEM_TILE, - XAIE_EVENT_GROUP_ERRORS_MEM_TILE, - XAIE_EVENT_GROUP_LOCK_MEM_TILE, - XAIE_EVENT_GROUP_WATCHPOINT_MEM_TILE}}, - {"mem_trace", {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, - XAIE_EVENT_PORT_STALLED_0_MEM_TILE, - XAIE_EVENT_PORT_IDLE_0_MEM_TILE, - XAIE_EVENT_PORT_TLAST_0_MEM_TILE}}, - {"input_throughputs", {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, - XAIE_EVENT_DMA_S2MM_SEL0_STREAM_STARVATION_MEM_TILE, - XAIE_EVENT_DMA_S2MM_SEL0_MEMORY_BACKPRESSURE_MEM_TILE, - XAIE_EVENT_DMA_S2MM_SEL0_STALLED_LOCK_ACQUIRE_MEM_TILE}}, - {"output_throughputs", {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, - XAIE_EVENT_DMA_MM2S_SEL0_STREAM_BACKPRESSURE_MEM_TILE, - XAIE_EVENT_DMA_MM2S_SEL0_MEMORY_STARVATION_MEM_TILE, - XAIE_EVENT_DMA_MM2S_SEL0_STALLED_LOCK_ACQUIRE_MEM_TILE}} - }; - if (hwGen < 40) { - eventSets["conflict_stats1"] = { - XAIE_EVENT_CONFLICT_DM_BANK_0_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_1_MEM_TILE, - XAIE_EVENT_CONFLICT_DM_BANK_2_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_3_MEM_TILE}; - eventSets["conflict_stats2"] = { - XAIE_EVENT_CONFLICT_DM_BANK_4_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_5_MEM_TILE, - XAIE_EVENT_CONFLICT_DM_BANK_6_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_7_MEM_TILE}; - eventSets["conflict_stats3"] = { - XAIE_EVENT_CONFLICT_DM_BANK_8_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_9_MEM_TILE, - XAIE_EVENT_CONFLICT_DM_BANK_10_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_11_MEM_TILE}; - eventSets["conflict_stats4"] = { - XAIE_EVENT_CONFLICT_DM_BANK_12_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_13_MEM_TILE, - XAIE_EVENT_CONFLICT_DM_BANK_14_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_15_MEM_TILE}; - } else { - eventSets["conflict_stats1"] = { - XAIE_EVENT_CONFLICT_DM_BANK_0_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_1_MEM_TILE, - XAIE_EVENT_CONFLICT_DM_BANK_2_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_3_MEM_TILE, - XAIE_EVENT_CONFLICT_DM_BANK_4_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_5_MEM_TILE, - XAIE_EVENT_CONFLICT_DM_BANK_6_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_7_MEM_TILE, - XAIE_EVENT_CONFLICT_DM_BANK_8_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_9_MEM_TILE, - XAIE_EVENT_CONFLICT_DM_BANK_10_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_11_MEM_TILE}; -#ifdef XDP_CLIENT_BUILD + if (numCounters == 4) { + eventSets["input_channels"] = {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, + XAIE_EVENT_PORT_STALLED_0_MEM_TILE, + XAIE_EVENT_PORT_TLAST_0_MEM_TILE, + XAIE_EVENT_DMA_S2MM_SEL0_FINISHED_BD_MEM_TILE}; + eventSets["input_channels_details"] = {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, + XAIE_EVENT_DMA_S2MM_SEL0_MEMORY_BACKPRESSURE_MEM_TILE, + XAIE_EVENT_DMA_S2MM_SEL0_STALLED_LOCK_ACQUIRE_MEM_TILE, + XAIE_EVENT_DMA_S2MM_SEL0_STREAM_STARVATION_MEM_TILE}; + eventSets["output_channels"] = {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, + XAIE_EVENT_PORT_STALLED_0_MEM_TILE, + XAIE_EVENT_PORT_TLAST_0_MEM_TILE, + XAIE_EVENT_DMA_MM2S_SEL0_FINISHED_BD_MEM_TILE}; + eventSets["output_channels_details"] = {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, + XAIE_EVENT_DMA_MM2S_SEL0_STREAM_BACKPRESSURE_MEM_TILE, + XAIE_EVENT_DMA_MM2S_SEL0_MEMORY_STARVATION_MEM_TILE, + XAIE_EVENT_DMA_MM2S_SEL0_STALLED_LOCK_ACQUIRE_MEM_TILE}; + eventSets["memory_stats"] = {XAIE_EVENT_GROUP_MEMORY_CONFLICT_MEM_TILE, + XAIE_EVENT_GROUP_ERRORS_MEM_TILE, + XAIE_EVENT_GROUP_LOCK_MEM_TILE, + XAIE_EVENT_GROUP_WATCHPOINT_MEM_TILE}; + eventSets["mem_trace"] = {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, + XAIE_EVENT_PORT_STALLED_0_MEM_TILE, + XAIE_EVENT_PORT_IDLE_0_MEM_TILE, + XAIE_EVENT_PORT_TLAST_0_MEM_TILE}; + eventSets["input_throughputs"] = {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, + XAIE_EVENT_DMA_S2MM_SEL0_STREAM_STARVATION_MEM_TILE, + XAIE_EVENT_DMA_S2MM_SEL0_MEMORY_BACKPRESSURE_MEM_TILE, + XAIE_EVENT_DMA_S2MM_SEL0_STALLED_LOCK_ACQUIRE_MEM_TILE}; + eventSets["output_throughputs"] = {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, + XAIE_EVENT_DMA_MM2S_SEL0_STREAM_BACKPRESSURE_MEM_TILE, + XAIE_EVENT_DMA_MM2S_SEL0_MEMORY_STARVATION_MEM_TILE, + XAIE_EVENT_DMA_MM2S_SEL0_STALLED_LOCK_ACQUIRE_MEM_TILE}; + eventSets["conflict_stats1"] = {XAIE_EVENT_CONFLICT_DM_BANK_0_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_1_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_2_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_3_MEM_TILE}; + eventSets["conflict_stats2"] = {XAIE_EVENT_CONFLICT_DM_BANK_4_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_5_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_6_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_7_MEM_TILE}; + eventSets["conflict_stats3"] = {XAIE_EVENT_CONFLICT_DM_BANK_8_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_9_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_10_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_11_MEM_TILE}; + eventSets["conflict_stats4"] = {XAIE_EVENT_CONFLICT_DM_BANK_12_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_13_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_14_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_15_MEM_TILE}; + eventSets["throughputs"] = {}; + } + else if (numCounters == 12) { + eventSets["input_channels"] = {XAIE_EVENT_PORT_STALLED_0_MEM_TILE, + XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, + XAIE_EVENT_PORT_STALLED_1_MEM_TILE, + XAIE_EVENT_PORT_RUNNING_1_MEM_TILE, + XAIE_EVENT_PORT_STALLED_2_MEM_TILE, + XAIE_EVENT_PORT_RUNNING_2_MEM_TILE, + XAIE_EVENT_PORT_STALLED_3_MEM_TILE, + XAIE_EVENT_PORT_RUNNING_3_MEM_TILE, + XAIE_EVENT_PORT_STALLED_4_MEM_TILE, + XAIE_EVENT_PORT_RUNNING_4_MEM_TILE, + XAIE_EVENT_PORT_STALLED_5_MEM_TILE, + XAIE_EVENT_PORT_RUNNING_5_MEM_TILE}; + eventSets["memory_stats"] = {XAIE_EVENT_GROUP_MEMORY_CONFLICT_MEM_TILE, + XAIE_EVENT_GROUP_ERRORS_MEM_TILE, + XAIE_EVENT_GROUP_LOCK_MEM_TILE, + XAIE_EVENT_GROUP_WATCHPOINT_MEM_TILE}; + eventSets["mem_trace"] = {XAIE_EVENT_PORT_RUNNING_0_MEM_TILE, + XAIE_EVENT_PORT_STALLED_0_MEM_TILE, + XAIE_EVENT_PORT_IDLE_0_MEM_TILE, + XAIE_EVENT_PORT_TLAST_0_MEM_TILE}; + eventSets["conflict_stats1"] = {XAIE_EVENT_CONFLICT_DM_BANK_0_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_1_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_2_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_3_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_4_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_5_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_6_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_7_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_8_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_9_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_10_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_11_MEM_TILE}; +#ifdef XDP_NPU3_BUILD // Banks 16-23 are not defined for all generations - // eventSets["conflict_stats2"] = { - // XAIE_EVENT_CONFLICT_DM_BANK_12_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_13_MEM_TILE, - // XAIE_EVENT_CONFLICT_DM_BANK_14_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_15_MEM_TILE, - // XAIE_EVENT_CONFLICT_DM_BANK_16_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_17_MEM_TILE, - // XAIE_EVENT_CONFLICT_DM_BANK_18_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_19_MEM_TILE, - // XAIE_EVENT_CONFLICT_DM_BANK_20_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_21_MEM_TILE, - // XAIE_EVENT_CONFLICT_DM_BANK_22_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_23_MEM_TILE}; -#else - eventSets["conflict_stats2"] = { - XAIE_EVENT_CONFLICT_DM_BANK_12_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_13_MEM_TILE, - XAIE_EVENT_CONFLICT_DM_BANK_14_MEM_TILE, XAIE_EVENT_CONFLICT_DM_BANK_15_MEM_TILE}; + eventSets["conflict_stats2"] = {XAIE_EVENT_CONFLICT_DM_BANK_12_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_13_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_14_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_15_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_16_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_17_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_18_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_19_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_20_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_21_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_22_MEM_TILE, + XAIE_EVENT_CONFLICT_DM_BANK_23_MEM_TILE}; + eventSets["throughputs"] = {XAIE_EVENT_DMA_S2MM_SEL0_STALLED_LOCK_ACQUIRE_MEM_TILE, + XAIE_EVENT_DMA_S2MM_SEL0_MEMORY_BACKPRESSURE_MEM_TILE, + XAIE_EVENT_DMA_S2MM_SEL0_RUNNING_MEM_TILE, + XAIE_EVENT_DMA_S2MM_SEL1_STALLED_LOCK_ACQUIRE_MEM_TILE, + XAIE_EVENT_DMA_S2MM_SEL1_MEMORY_BACKPRESSURE_MEM_TILE, + XAIE_EVENT_DMA_S2MM_SEL1_RUNNING_MEM_TILE, + XAIE_EVENT_DMA_MM2S_SEL0_STREAM_BACKPRESSURE_MEM_TILE, + XAIE_EVENT_DMA_MM2S_SEL0_MEMORY_STARVATION_MEM_TILE, + XAIE_EVENT_DMA_MM2S_SEL0_RUNNING_MEM_TILE, + XAIE_EVENT_DMA_MM2S_SEL1_STREAM_BACKPRESSURE_MEM_TILE, + XAIE_EVENT_DMA_MM2S_SEL1_MEMORY_STARVATION_MEM_TILE, + XAIE_EVENT_DMA_MM2S_SEL1_RUNNING_MEM_TILE,}; #endif + + eventSets["output_channels"] = eventSets["input_channels"]; + eventSets["input_channels_details"] = {}; + eventSets["output_channels_details"] = {}; + eventSets["mem_trace"] = {}; + eventSets["input_throughputs"] = {}; + eventSets["output_throughputs"] = {}; eventSets["conflict_stats3"] = {}; eventSets["conflict_stats4"] = {}; } @@ -273,17 +459,13 @@ namespace xdp::aie::profile { /**************************************************************************** * Get metric sets for microcontrollers - * TODO: convert to XAie_Events once support is available from driver ***************************************************************************/ - //std::map> getMicrocontrollerEventSets(const int hwGen) - std::map> getMicrocontrollerEventSets(const int hwGen) + std::map> getMicrocontrollerEventSets(const int hwGen) { - //std::map> eventSets; - std::map> eventSets; - if (hwGen < 5) + std::map> eventSets; + if (!xdp::aie::isMicroSupported(hwGen)) return eventSets; - // TODO: replace with enums once driver supports the MDM eventSets = { {"execution", {16, 17, 18, 19, 20, 62}}, {"interrupt_stalls", {23, 24, 25, 26, 27, 57}}, @@ -335,10 +517,12 @@ namespace xdp::aie::profile { XAie_DmaDirection dmaDir = aie::isInputSet(type, metricSet) ? DMA_S2MM : DMA_MM2S; XAie_EventSelectDmaChannel(aieDevInst, loc, 0, dmaDir, channel); - std::stringstream msg; - msg << "Configured mem tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") - << "DMA for metricset " << metricSet << ", channel " << (int)channel << "."; - xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + if (aie::isDebugVerbosity()) { + std::string typeName = (dmaDir == DMA_S2MM) ? "S2MM" : "MM2S"; + std::string msg = "Configured event selections for DMA " + typeName + ", metric set " + + metricSet + ", channel " + std::to_string(channel); + xrt_core::message::send(severity_level::debug, "XRT", msg); + } } /**************************************************************************** @@ -369,7 +553,7 @@ namespace xdp::aie::profile { // Calculate throughput differently for PLIO or AIE1 devices // since DMA-related events are not defined in those cases - if ((subtype == io_type::PLIO) || (hwGen == 1)) { + if ((subtype == io_type::PLIO) || xdp::aie::isAIE1(hwGen)) { std::replace(events.begin(), events.end(), XAIE_EVENT_GROUP_DMA_ACTIVITY_PL, XAIE_EVENT_PORT_STALLED_0_PL); std::replace(events.begin(), events.end(), @@ -545,13 +729,12 @@ namespace xdp::aie::profile { /**************************************************************************** * Configure counters in Microblaze Debug Module (MDM) - * TODO: convert to using XAie_Events once support is available from driver ***************************************************************************/ void configMDMCounters(XAie_DevInst* aieDevInst, int hwGen, uint8_t col, uint8_t row, - const std::vector events) + const std::vector events) { // Ensure supported generation and not privileged - if (hwGen < 5) + if (!xdp::aie::isMicroSupported(hwGen)) return; uint32_t val; @@ -590,15 +773,15 @@ namespace xdp::aie::profile { std::vector& values) { // Ensure supported generation and not privileged - if (hwGen < 5) + if (!xdp::aie::isMicroSupported(hwGen)) return; - uint32_t val; + uint32_t privileged = 0; static bool showWarning = true; auto tileOffset = XAie_GetTileAddr(aieDevInst, row, col); - XAie_Read32(aieDevInst, tileOffset + UC_MEMORY_PRIVILEGED, &val); + XAie_Read32(aieDevInst, tileOffset + UC_MEMORY_PRIVILEGED, &privileged); - if (val) { + if (privileged) { if (showWarning) { xrt_core::message::send(severity_level::warning, "XRT", "Unable to read MDM counters since debug port is privileged."); @@ -622,7 +805,7 @@ namespace xdp::aie::profile { std::vector overflows; uint32_t numCounters = UC_NUM_EVENT_COUNTERS + UC_NUM_LATENCY_COUNTERS; for (uint32_t c=0; c < numCounters; ++c) { - uint32_t val1; + uint32_t val1 = 0; XAie_Read32(aieDevInst, tileOffset + UC_MDM_PCSR, &val1); bool overflow = (((val1 >> UC_MDM_PCSR_OVERFLOW_BIT) & 0x1) == 1); overflows.push_back(overflow); @@ -638,9 +821,9 @@ namespace xdp::aie::profile { // 5. Read values of event counters for (uint32_t c=0; c < UC_NUM_EVENT_COUNTERS; ++c) { - uint32_t val1; - XAie_Read32(aieDevInst, tileOffset + UC_MDM_PCDRR, &val1); - uint64_t val2 = (overflows.at(c)) ? (val1 + OVERFLOW_32BIT) : val1; + uint32_t val = 0; + XAie_Read32(aieDevInst, tileOffset + UC_MDM_PCDRR, &val); + uint64_t val2 = (overflows.at(c)) ? (val + OVERFLOW_32BIT) : val; values.push_back(val2); } @@ -652,9 +835,9 @@ namespace xdp::aie::profile { // 15:0 Maximum measured latency, 16 bits std::vector latencyValues; for (uint32_t c=0; c < UC_MDM_PCDRR_LATENCY_READS; ++c) { - uint32_t val1; - XAie_Read32(aieDevInst, tileOffset + UC_MDM_PCDRR, &val1); - uint64_t val2 = (overflows.at(UC_NUM_EVENT_COUNTERS)) ? (val1 + OVERFLOW_32BIT) : val1; + uint32_t val = 0; + XAie_Read32(aieDevInst, tileOffset + UC_MDM_PCDRR, &val); + uint64_t val2 = (overflows.at(UC_NUM_EVENT_COUNTERS)) ? (val + OVERFLOW_32BIT) : val; latencyValues.push_back(val2); } diff --git a/src/runtime_src/xdp/profile/plugin/aie_profile/util/aie_profile_util.h b/src/runtime_src/xdp/profile/plugin/aie_profile/util/aie_profile_util.h index 3cf62984622..d28f8d2e021 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_profile/util/aie_profile_util.h +++ b/src/runtime_src/xdp/profile/plugin/aie_profile/util/aie_profile_util.h @@ -87,7 +87,7 @@ namespace xdp::aie::profile { * @return Map of microcontroller metric set names with vectors of event IDs */ //std::map> getMicrocontrollerEventSets(const int hwGen); - std::map> getMicrocontrollerEventSets(const int hwGen); + std::map> getMicrocontrollerEventSets(const int hwGen); /** * @brief Modify configured events @@ -109,10 +109,10 @@ namespace xdp::aie::profile { * @param event metric set group event * @param channel channel to be configured */ - void configGroupEvents(XAie_DevInst* aieDevInst, const XAie_LocType loc, - const XAie_ModuleType mod, const module_type type, - const std::string metricSet, const XAie_Events event, - const uint8_t channel); + void configGroupEvents(XAie_DevInst* aieDevInst, const XAie_LocType loc, + const XAie_ModuleType mod, const module_type type, + const std::string metricSet, const XAie_Events event, + const uint8_t channel); /** * @brief Configure the selection index to monitor channel number in memory tiles @@ -184,7 +184,7 @@ namespace xdp::aie::profile { * @param events vector of events to use in counters */ void configMDMCounters(XAie_DevInst* aieDevInst, int hwGen, uint8_t col, uint8_t row, - const std::vector events); + const std::vector events); /** * @brief Read counters in Microblaze Debug Module (MDM) diff --git a/src/runtime_src/xdp/profile/plugin/aie_trace/CMakeLists.txt b/src/runtime_src/xdp/profile/plugin/aie_trace/CMakeLists.txt index c8c7743592b..4a8bdc753f3 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_trace/CMakeLists.txt +++ b/src/runtime_src/xdp/profile/plugin/aie_trace/CMakeLists.txt @@ -4,11 +4,13 @@ # ==================================================================== # This builds the AIE Trace plugin. It is currently built on x86, -# Edge-Versal systems, client and ve2 but not Edge-aarch64. It also has a dependency -# on the hardware shim +# Edge, Client, and VE2 platforms that support AIE. It also has +# a dependency on the hardware shim. # ==================================================================== file(GLOB AIE_TRACE_PLUGIN_FILES + "${PROFILE_DIR}/plugin/aie_base/*" + "${PROFILE_DIR}/plugin/aie_base/generations/*" "${PROFILE_DIR}/plugin/aie_trace/*.h" "${PROFILE_DIR}/plugin/aie_trace/*.cpp" "${PROFILE_DIR}/writer/aie_trace/*.h" @@ -28,27 +30,53 @@ file(GLOB AIE_TRACE_CONFIG_FILES if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") set(IMPL_DIR "${PROFILE_DIR}/plugin/aie_trace/client") set(OFFLOAD_DIR "${PROFILE_DIR}/device/aie_trace/client") - set(DEVICE_DIR "${PROFILE_DIR}/device/client_device") file(GLOB AIE_DRIVER_COMMON_UTIL_FILES "${PROFILE_DIR}/device/common/*.h" "${PROFILE_DIR}/device/common/*.cpp" ) - file(GLOB AIE_TRACE_COMPONENT_FILES - "${OFFLOAD_DIR}/*.h" - "${OFFLOAD_DIR}/*.cpp" - "${DEVICE_DIR}/*.h" - "${DEVICE_DIR}/*.cpp" - "${IMPL_DIR}/*.h" - "${IMPL_DIR}/*.cpp" - ) + if (XDP_NPU3_BUILD_CMAKE STREQUAL "yes") + file(GLOB AIE_TRACE_COMPONENT_FILES + "${IMPL_DIR}/aie_trace_npu3.h" + "${IMPL_DIR}/aie_trace_npu3.cpp" + "${IMPL_DIR}/aie_trace.h" + "${IMPL_DIR}/aie_trace.cpp" + ) + file(GLOB AIE_TRACE_OFFLOAD_FILES + "${OFFLOAD_DIR}/aie_trace_offload_npu3.h" + "${OFFLOAD_DIR}/aie_trace_offload_npu3.cpp" + ) + else() + file(GLOB AIE_TRACE_COMPONENT_FILES + "${IMPL_DIR}/aie_trace.h" + "${IMPL_DIR}/aie_trace.cpp" + ) + file(GLOB AIE_TRACE_OFFLOAD_FILES + "${OFFLOAD_DIR}/aie_trace_offload_client.h" + "${OFFLOAD_DIR}/aie_trace_offload_client.cpp" + ) + endif() + + if (XDP_NPU3_BUILD_CMAKE STREQUAL "yes") + file(GLOB AIE_DEVICE_NPU3_FILES "${PROFILE_DIR}/device/common/npu3/*") + add_library(xdp_aie_trace_plugin SHARED ${AIE_TRACE_PLUGIN_FILES} ${AIE_TRACE_COMPONENT_FILES} + ${AIE_TRACE_OFFLOAD_FILES} ${AIE_TRACE_UTIL_FILES} ${AIE_DRIVER_COMMON_UTIL_FILES} + ${AIE_DEVICE_NPU3_FILES}) + else() + add_library(xdp_aie_trace_plugin SHARED ${AIE_TRACE_PLUGIN_FILES} ${AIE_TRACE_COMPONENT_FILES} + ${AIE_TRACE_OFFLOAD_FILES} ${AIE_TRACE_UTIL_FILES} ${AIE_DRIVER_COMMON_UTIL_FILES}) + endif() - add_library(xdp_aie_trace_plugin SHARED ${AIE_TRACE_PLUGIN_FILES} ${AIE_TRACE_COMPONENT_FILES} ${AIE_TRACE_UTIL_FILES} ${AIE_DRIVER_COMMON_UTIL_FILES}) add_dependencies(xdp_aie_trace_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_trace_plugin PRIVATE xdp_core xrt_coreutil xaiengine) + target_link_libraries(xdp_aie_trace_plugin PRIVATE + xdp_core xrt_coreutil xaiengine aiebu_library_objects) target_compile_definitions(xdp_aie_trace_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) - target_include_directories(xdp_aie_trace_plugin PRIVATE ${AIERT_DIR}/include) + if (XDP_NPU3_BUILD_CMAKE STREQUAL "yes") + target_compile_definitions(xdp_aie_trace_plugin PRIVATE XDP_NPU3_BUILD=1) + endif() + target_include_directories(xdp_aie_trace_plugin PRIVATE + ${AIERT_DIR}/include ${AIEBU_SOURCE_DIR}/src/cpp/include) set_target_properties(xdp_aie_trace_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) install (TARGETS xdp_aie_trace_plugin @@ -96,7 +124,8 @@ else() "${IMPL_DIR}/*.cpp" ) - add_library(xdp_aie_trace_plugin_xdna SHARED ${AIE_TRACE_PLUGIN_FILES} ${AIE_TRACE_COMPONENT_FILES} ${AIE_TRACE_UTIL_FILES} ${AIE_TRACE_CONFIG_FILES}) + add_library(xdp_aie_trace_plugin_xdna SHARED ${AIE_TRACE_PLUGIN_FILES} ${AIE_TRACE_COMPONENT_FILES} + ${AIE_TRACE_UTIL_FILES} ${AIE_TRACE_CONFIG_FILES}) add_dependencies(xdp_aie_trace_plugin_xdna xdp_core xrt_coreutil) target_link_libraries(xdp_aie_trace_plugin_xdna PRIVATE xdp_core xrt_coreutil xaiengine) target_compile_definitions(xdp_aie_trace_plugin_xdna PRIVATE XDP_VE2_BUILD=1 FAL_LINUX="on") @@ -108,7 +137,7 @@ else() LIBRARY DESTINATION ${XDP_PLUGIN_INSTALL_DIR} COMPONENT ${XRT_COMPONENT} ${XRT_NAMELINK_SKIP} ) endif() - + if (DEFINED XRT_AIE_BUILD AND XRT_EDGE) set(IMPL_DIR "${PROFILE_DIR}/plugin/aie_trace/edge") set(OFFLOAD_DIR "${PROFILE_DIR}/device/aie_trace") @@ -139,6 +168,6 @@ else() ) endif() -# Else, on edge-aarch64 don't build at all + # Else, on edge-aarch64 don't build at all endif() diff --git a/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_metadata.h b/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_metadata.h index 261a7161ffa..36d19456afb 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_metadata.h +++ b/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_metadata.h @@ -165,7 +165,8 @@ class AieTraceMetadata { "output_channels", "output_channels_stalls", "s2mm_channels", "s2mm_channels_stalls", "mm2s_channels", "mm2s_channels_stalls", - "memory_conflicts1", "memory_conflicts2"} }, + "memory_conflicts1", "memory_conflicts2", + "memory_conflicts3"} }, { module_type::shim, {"input_ports", "output_ports", "input_ports_stalls", "output_ports_stalls", "input_ports_details", "output_ports_details", diff --git a/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_offload_manager.h b/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_offload_manager.h index 638e74c0766..df4031da2c5 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_offload_manager.h +++ b/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_offload_manager.h @@ -16,7 +16,9 @@ #include "xdp/profile/database/static_info/pl_constructs.h" #include "xdp/profile/database/database.h" -#ifdef XDP_CLIENT_BUILD +#ifdef XDP_NPU3_BUILD +#include "xdp/profile/device/aie_trace/client/aie_trace_offload_npu3.h" +#elif XDP_CLIENT_BUILD #include "xdp/profile/device/aie_trace/client/aie_trace_offload_client.h" #elif XDP_VE2_BUILD #include "xdp/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h" diff --git a/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_plugin.cpp b/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_plugin.cpp index 6a03ef11ac2..e80d43951c9 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_plugin.cpp +++ b/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_plugin.cpp @@ -26,14 +26,18 @@ #include "xdp/profile/device/pl_device_intf.h" #include "xdp/profile/device/utility.h" #include "xdp/profile/plugin/vp_base/info.h" +#include "xdp/profile/plugin/aie_base/aie_base_util.h" #include "xdp/profile/writer/aie_trace/aie_trace_config_writer.h" #include "xdp/profile/writer/aie_trace/aie_trace_timestamps_writer.h" // #include "xdp/profile/writer/aie_trace/aie_trace_writer.h" #include "aie_trace_offload_manager.h" -#ifdef XDP_CLIENT_BUILD +#ifdef XDP_NPU3_BUILD +#include "client/aie_trace.h" +#include "client/aie_trace_npu3.h" +#elif XDP_CLIENT_BUILD #include "client/aie_trace.h" -#elif defined(XRT_X86_BUILD) +#elif XRT_X86_BUILD #include "x86/aie_trace.h" #include "xdp/profile/device/hal_device/xdp_hal_device.h" #elif XDP_VE2_BUILD @@ -172,8 +176,13 @@ void AieTracePluginUnified::updateAIEDevice(void *handle, bool hw_context_flow) #ifdef XDP_CLIENT_BUILD xrt::hw_context context = xrt_core::hw_context_int::create_hw_context_from_implementation(handle); AIEData.metadata->setHwContext(context); - AIEData.implementation = std::make_unique(db, AIEData.metadata); -#elif defined(XRT_X86_BUILD) +#ifdef XDP_NPU3_BUILD + if (aie::isNPU3(AIEData.metadata->getHardwareGen())) + AIEData.implementation = std::make_unique(db, AIEData.metadata); + else +#endif + AIEData.implementation = std::make_unique(db, AIEData.metadata); +#elif XRT_X86_BUILD AIEData.implementation = std::make_unique(db, AIEData.metadata); #elif XDP_VE2_BUILD AIEData.implementation = std::make_unique(db, AIEData.metadata); @@ -203,7 +212,8 @@ void AieTracePluginUnified::updateAIEDevice(void *handle, bool hw_context_flow) // hence we should stop adding partition shift to col for passing to XAIE Apis. uint8_t relCol = ((db->getStaticInfo()).getAppStyle() == xdp::AppStyle::LOAD_XCLBIN_STYLE) ? gmio.shimColumn + startColShift : gmio.shimColumn; (db->getStaticInfo()).addTraceGMIO(deviceID, gmio.id, relCol, gmio.channelNum, - gmio.streamId, gmio.burstLength); + gmio.streamId, gmio.burstLength, + static_cast(gmio.type)); } } @@ -226,11 +236,10 @@ void AieTracePluginUnified::updateAIEDevice(void *handle, bool hw_context_flow) if (AIEData.metadata->getRuntimeMetrics()) { std::string configFile = "aie_event_runtime_config_" + std::to_string(deviceID) + ".json"; - VPWriter *writer = new AieTraceConfigWriter(configFile.c_str(), deviceID); - writers.push_back(writer); - db->addOpenedFile(writer->getcurrentFileName(), - "AIE_EVENT_RUNTIME_CONFIG", - deviceID); + configWriter = new AieTraceConfigWriter(configFile.c_str(), deviceID); + writers.push_back(configWriter); + db->addOpenedFile(configWriter->getcurrentFileName(), + "AIE_EVENT_RUNTIME_CONFIG", deviceID); } if (!AIEData.offloadManager) @@ -303,8 +312,7 @@ void AieTracePluginUnified::updateAIEDevice(void *handle, bool hw_context_flow) deviceName.c_str(), deviceID); writers.push_back(tsWriter); db->addOpenedFile(tsWriter->getcurrentFileName(), - "AIE_EVENT_TRACE_TIMESTAMPS", - deviceID); + "AIE_EVENT_TRACE_TIMESTAMPS", deviceID); // Start the AIE trace timestamps thread // NOTE: we purposely start polling before configuring trace events @@ -322,6 +330,8 @@ void AieTracePluginUnified::updateAIEDevice(void *handle, bool hw_context_flow) "Calling AIE Trace updateDevice."); AIEData.implementation->updateDevice(); + if (configWriter) + configWriter->write(false); // Continuous Trace Offload is supported only for PLIO flow if (AIEData.metadata->getContinuousTrace()) diff --git a/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_plugin.h b/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_plugin.h index 60ac6a36ef0..eb59adc1899 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_plugin.h +++ b/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_plugin.h @@ -25,7 +25,9 @@ #include "xdp/profile/plugin/aie_trace/aie_trace_impl.h" #include "xdp/profile/plugin/vp_base/vp_base_plugin.h" -#ifdef XDP_CLIENT_BUILD +#ifdef XDP_NPU3_BUILD +#include "xdp/profile/device/aie_trace/client/aie_trace_offload_npu3.h" +#elif XDP_CLIENT_BUILD #include "xdp/profile/device/aie_trace/client/aie_trace_offload_client.h" #elif XDP_VE2_BUILD #include "xdp/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h" @@ -65,6 +67,7 @@ class AieTracePluginUnified : public XDPPlugin { std::unique_ptr implementation; std::shared_ptr metadata; }; + VPWriter *configWriter{nullptr}; std::map handleToAIEData; }; diff --git a/src/runtime_src/xdp/profile/plugin/aie_trace/client/aie_trace.cpp b/src/runtime_src/xdp/profile/plugin/aie_trace/client/aie_trace.cpp index 3d5133e0a05..77f36978a8f 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_trace/client/aie_trace.cpp +++ b/src/runtime_src/xdp/profile/plugin/aie_trace/client/aie_trace.cpp @@ -921,12 +921,12 @@ namespace xdp { if (channelNum >= 0) { if (aie::isInputSet(type, metricSet)) { cfgTile->core_trace_config.mm2s_channels[0] = channelNum; - if (channelNum < tile.mm2s_names.size()) + if (static_cast(channelNum) < tile.mm2s_names.size()) cfgTile->core_trace_config.mm2s_names[0] = tile.mm2s_names.at(channelNum); } else { cfgTile->core_trace_config.s2mm_channels[0] = channelNum; - if (channelNum < tile.s2mm_names.size()) + if (static_cast(channelNum) < tile.s2mm_names.size()) cfgTile->core_trace_config.s2mm_names[0] = tile.s2mm_names.at(channelNum); } } diff --git a/src/runtime_src/xdp/profile/plugin/aie_trace/client/aie_trace_npu3.cpp b/src/runtime_src/xdp/profile/plugin/aie_trace/client/aie_trace_npu3.cpp new file mode 100755 index 00000000000..bcce18a1843 --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_trace/client/aie_trace_npu3.cpp @@ -0,0 +1,1159 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved + +#define XDP_PLUGIN_SOURCE + +#include "aie_trace_npu3.h" +#include "resources_def.h" + +#include +#include + +#include "core/common/message.h" +#include "core/include/xrt/xrt_kernel.h" +#include "xdp/profile/database/database.h" +#include "xdp/profile/database/events/creator/aie_trace_data_logger.h" +#include "xdp/profile/database/static_info/aie_constructs.h" +#include "xdp/profile/database/static_info/aie_util.h" +#include "xdp/profile/database/static_info/pl_constructs.h" +#include "xdp/profile/device/pl_device_intf.h" +#include "xdp/profile/device/tracedefs.h" +#include "xdp/profile/plugin/aie_base/aie_base_util.h" +#include "xdp/profile/plugin/aie_trace/aie_trace_metadata.h" +#include "xdp/profile/plugin/aie_trace/util/aie_trace_util.h" +#include "xdp/profile/plugin/vp_base/info.h" + +namespace xdp { + using severity_level = xrt_core::message::severity_level; + + AieTrace_NPU3Impl::AieTrace_NPU3Impl(VPDatabase* database, std::shared_ptr metadata) + : AieTraceImpl(database, metadata) + { + // Pre-defined metric sets + auto hwGen = metadata->getHardwareGen(); + coreEventSets = aie::trace::getCoreEventSets(hwGen); + memoryEventSets = aie::trace::getMemoryEventSets(hwGen); + memoryTileEventSets = aie::trace::getMemoryTileEventSets(hwGen); + interfaceTileEventSets = aie::trace::getInterfaceTileEventSets(hwGen); + + m_trace_start_broadcast = xrt_core::config::get_aie_trace_settings_trace_start_broadcast(); + if (m_trace_start_broadcast) + coreTraceStartEvent = (XAie_Events) (XAIE_EVENT_BROADCAST_0_CORE + traceStartBroadcastChId1); + else + coreTraceStartEvent = XAIE_EVENT_ACTIVE_CORE; + // These are also broadcast to memory module + coreTraceEndEvent = XAIE_EVENT_DISABLED_CORE; + + // Memory tile trace is flushed at end of run + if (m_trace_start_broadcast) + memoryTileTraceStartEvent = (XAie_Events) (XAIE_EVENT_BROADCAST_0_MEM_TILE + traceStartBroadcastChId1); + else + memoryTileTraceStartEvent = XAIE_EVENT_TRUE_MEM_TILE; + memoryTileTraceEndEvent = XAIE_EVENT_USER_EVENT_1_MEM_TILE; + + // Interface tile trace is flushed at end of run + if (m_trace_start_broadcast) + interfaceTileTraceStartEvent = (XAie_Events) (XAIE_EVENT_BROADCAST_A_0_PL + traceStartBroadcastChId2); + else + interfaceTileTraceStartEvent = XAIE_EVENT_TRUE_PL; + interfaceTileTraceEndEvent = XAIE_EVENT_USER_EVENT_1_PL; + + xdp::aie::driver_config meta_config = metadata->getAIEConfigMetadata(); + + XAie_Config cfg { + meta_config.hw_gen, + meta_config.base_address, + meta_config.column_shift, + meta_config.row_shift, + meta_config.num_rows, + meta_config.num_columns, + meta_config.shim_row, + 0, + 1, + meta_config.mem_row_start, + meta_config.mem_num_rows, + meta_config.aie_tile_row_start, + meta_config.aie_tile_num_rows, + {0} // PartProp + }; + + auto RC = XAie_CfgInitialize(&aieDevInst, &cfg); + if (RC != XAIE_OK) + xrt_core::message::send(severity_level::warning, "XRT", "AIE Driver Initialization Failed."); + + tranxHandler = std::make_unique(); + } + + /*************************************************************************** + * Build broadcast network using specified channels + ***************************************************************************/ + void AieTrace_NPU3Impl::build2ChannelBroadcastNetwork(void *hwCtxImpl, uint8_t broadcastId1, + uint8_t broadcastId2, XAie_Events event) + { + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(hwCtxImpl); + // Currently, assuming only one Hw Context is alive at a time + // uint8_t startCol = static_cast(aiePartitionPt.front().second.get("start_col")); + uint8_t startCol = 0; // Todo: Need to investigate segfault in the above line. + // uint8_t numCols = static_cast(aiePartitionPt.front().second.get("num_cols")); + uint8_t numCols = 3; + + std::vector maxRowAtCol(startCol + numCols, 0); + for (auto& tileMetric : metadata->getConfigMetrics()) { + auto tile = tileMetric.first; + auto col = tile.col; + auto row = tile.row; + maxRowAtCol[startCol + col] = std::max(maxRowAtCol[col], (uint8_t)row); + } + + XAie_Events bcastEvent2_PL = (XAie_Events) (XAIE_EVENT_BROADCAST_A_0_PL + broadcastId2); + XAie_EventBroadcast(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, broadcastId2, event); + + for(uint8_t col = startCol; col < startCol + numCols; col++) { + for(uint8_t row = 0; row <= maxRowAtCol[col]; row++) { + module_type tileType = getTileType(row); + auto loc = XAie_TileLoc(col, row); + + if(tileType == module_type::shim) { + // first channel is only used to send north + if(col == startCol) { + XAie_EventBroadcast(&aieDevInst, loc, XAIE_PL_MOD, broadcastId1, event); + } + else { + XAie_EventBroadcast(&aieDevInst, loc, XAIE_PL_MOD, broadcastId1, bcastEvent2_PL); + } + if(maxRowAtCol[col] != row) { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST); + } + else { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST | XAIE_EVENT_BROADCAST_NORTH); + } + + // second channel is only used to send east + if(col != startCol + numCols - 1) { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId2, XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_NORTH); + } + else { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId2, XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_NORTH); + } + } + else if(tileType == module_type::mem_tile) { + if(maxRowAtCol[col] != row) { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_MEM_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST); + } + else { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_MEM_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST | XAIE_EVENT_BROADCAST_NORTH); + } + } + else { //core tile + if(maxRowAtCol[col] != row) { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST); + } + else { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST | XAIE_EVENT_BROADCAST_NORTH); + } + } + } + } + } + + /*************************************************************************** + * Reset using broadcast network on specified channels + ***************************************************************************/ + void AieTrace_NPU3Impl::reset2ChannelBroadcastNetwork(void *hwCtxImpl, uint8_t broadcastId1, + uint8_t broadcastId2) + { + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(hwCtxImpl); + // Currently, assuming only one Hw Context is alive at a time + //uint8_t startCol = static_cast(aiePartitionPt.back().second.get("start_col")); + uint8_t startCol = 0; + //uint8_t numCols = static_cast(aiePartitionPt.back().second.get("num_cols")); + uint8_t numCols = 3; + + std::vector maxRowAtCol(startCol + numCols, 0); + for (auto& tileMetric : metadata->getConfigMetrics()) { + auto tile = tileMetric.first; + auto col = tile.col; + auto row = tile.row; + maxRowAtCol[startCol + col] = std::max(maxRowAtCol[col], (uint8_t)row); + } + + XAie_EventBroadcastReset(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, broadcastId2); + + for(uint8_t col = startCol; col < startCol + numCols; col++) { + for(uint8_t row = 0; row <= maxRowAtCol[col]; row++) { + module_type tileType = getTileType(row); + auto loc = XAie_TileLoc(col, row); + + if(tileType == module_type::shim) { + XAie_EventBroadcastReset(&aieDevInst, loc, XAIE_PL_MOD, broadcastId1); + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, XAIE_EVENT_BROADCAST_ALL); + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId2, XAIE_EVENT_BROADCAST_ALL); + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_B, broadcastId2, XAIE_EVENT_BROADCAST_ALL); + } + else if(tileType == module_type::mem_tile) { + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_MEM_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, XAIE_EVENT_BROADCAST_ALL); + } + else { //core tile + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, XAIE_EVENT_BROADCAST_ALL); + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_MEM_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, XAIE_EVENT_BROADCAST_ALL); + } + } + } + } + + /*************************************************************************** + * Configure windowed event trace + ***************************************************************************/ + bool AieTrace_NPU3Impl::configureWindowedEventTrace(void* hwCtxImpl) + { + // Start recording the transaction + if (!tranxHandler->initializeTransaction(&aieDevInst, "AieTraceWindow")) + return false; + + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(hwCtxImpl); + // Currently, assuming only one Hw Context is alive at a time + //uint8_t startCol = static_cast(aiePartitionPt.back().second.get("start_col")); + uint8_t startCol = 0; + + XAie_Events bcastEvent2_PL = (XAie_Events) (XAIE_EVENT_BROADCAST_A_0_PL + traceStartBroadcastChId2); + XAie_Events shimTraceStartEvent = bcastEvent2_PL; + XAie_Events memTileTraceStartEvent = (XAie_Events)(XAIE_EVENT_BROADCAST_0_MEM_TILE + traceStartBroadcastChId1); + XAie_Events coreModTraceStartEvent = (XAie_Events)(XAIE_EVENT_BROADCAST_0_CORE + traceStartBroadcastChId1); + XAie_Events memTraceStartEvent = (XAie_Events)(XAIE_EVENT_BROADCAST_0_MEM + traceStartBroadcastChId1); + + unsigned int startLayer = xrt_core::config::get_aie_trace_settings_start_layer(); + + // NOTE: rows are stored as absolute as required by resource manager + for (auto& tileMetric : metadata->getConfigMetrics()) { + auto tile = tileMetric.first; + auto col = tile.col; + auto row = tile.row; + auto tileType = getTileType(row); + auto loc = XAie_TileLoc(col, row); + if (tileType == module_type::shim) { + if (startLayer != UINT_MAX) { + if (col == startCol) + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_PERF_CNT_0_PL); + else + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_PL_MOD, shimTraceStartEvent); + } + } + else if (tileType == module_type::mem_tile) { + if (startLayer != UINT_MAX) + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_MEM_MOD, memTileTraceStartEvent); + } + else { + if (startLayer != UINT_MAX) { + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_CORE_MOD, coreModTraceStartEvent); + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_MEM_MOD, memTraceStartEvent); + } + } + } + + if (startLayer != UINT_MAX) { + XAie_PerfCounterControlSet(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, 0, XAIE_EVENT_USER_EVENT_0_PL, XAIE_EVENT_USER_EVENT_0_PL); + XAie_PerfCounterEventValueSet(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, 0, startLayer); + } + + build2ChannelBroadcastNetwork(hwCtxImpl, traceStartBroadcastChId1, traceStartBroadcastChId2, XAIE_EVENT_PERF_CNT_0_PL); + + xrt_core::message::send(severity_level::info, "XRT", "Finished AIE Windowed Trace Settings."); + auto hwContext = metadata->getHwContext(); + tranxHandler->submitTransaction(&aieDevInst, hwContext); + return true; + } + + void AieTrace_NPU3Impl::updateDevice() + { + xrt_core::message::send(severity_level::info, "XRT", "Calling AIE Trace IPU updateDevice."); + + // Make sure compiler trace option is available as runtime + if (!metadata->getRuntimeMetrics()) + return; + + // Set metrics for trace events + if (!setMetricsSettings(metadata->getDeviceID(), metadata->getHandle())) { + std::string msg("Unable to configure AIE trace control and events. No trace will be generated."); + xrt_core::message::send(severity_level::warning, "XRT", msg); + return; + } + if(xrt_core::config::get_aie_trace_settings_start_type() == "layer") { + if (!configureWindowedEventTrace(metadata->getHandle())) { + std::string msg("Unable to configure AIE Windowed event trace"); + xrt_core::message::send(severity_level::warning, "XRT", msg); + return; + } + } + } + + // No CMA checks on Win + uint64_t AieTrace_NPU3Impl::checkTraceBufSize(uint64_t size) + { + return size; + } + + /**************************************************************************** + * Modify events in metric set based on type and channel + ***************************************************************************/ + void AieTrace_NPU3Impl::modifyEvents(module_type type, io_type subtype, + const std::string metricSet, uint8_t channel, + std::vector& events) + { + // Only needed for GMIO DMA channel 1 + if ((type != module_type::shim) || (subtype == io_type::PLIO) || (channel == 0)) + return; + + // Check type to minimize replacements + if (aie::isInputSet(type, metricSet)) { + // Input or MM2S + std::replace(events.begin(), events.end(), + XAIE_EVENT_DMA_MM2S_0_START_TASK_PL, XAIE_EVENT_DMA_MM2S_1_START_TASK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_DMA_MM2S_0_FINISHED_BD_PL, XAIE_EVENT_DMA_MM2S_1_FINISHED_BD_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_DMA_MM2S_0_FINISHED_TASK_PL, XAIE_EVENT_DMA_MM2S_1_FINISHED_TASK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_DMA_MM2S_0_STALLED_LOCK_PL, XAIE_EVENT_DMA_MM2S_1_STALLED_LOCK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_DMA_MM2S_0_STREAM_BACKPRESSURE_PL, XAIE_EVENT_DMA_MM2S_1_STREAM_BACKPRESSURE_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_DMA_MM2S_0_MEMORY_STARVATION_PL, XAIE_EVENT_DMA_MM2S_1_MEMORY_STARVATION_PL); + } + else { + // Output or S2MM + std::replace(events.begin(), events.end(), + XAIE_EVENT_DMA_S2MM_0_START_TASK_PL, XAIE_EVENT_DMA_S2MM_1_START_TASK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_DMA_S2MM_0_FINISHED_BD_PL, XAIE_EVENT_DMA_S2MM_1_FINISHED_BD_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_DMA_S2MM_0_FINISHED_TASK_PL, XAIE_EVENT_DMA_S2MM_1_FINISHED_TASK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_DMA_S2MM_0_STALLED_LOCK_PL, XAIE_EVENT_DMA_S2MM_1_STALLED_LOCK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_DMA_S2MM_0_STREAM_STARVATION_PL, XAIE_EVENT_DMA_S2MM_1_STREAM_STARVATION_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_DMA_S2MM_0_MEMORY_BACKPRESSURE_PL, XAIE_EVENT_DMA_S2MM_1_MEMORY_BACKPRESSURE_PL); + } + } + + void AieTrace_NPU3Impl::flushTraceModules() + { + //if (db->infoAvailable(xdp::info::ml_timeline)) { + // db->broadcast(VPDatabase::MessageType::READ_RECORD_TIMESTAMPS, nullptr); + // xrt_core::message::send(severity_level::debug, "XRT", "Done reading recorded timestamps."); + //} + + if (traceFlushLocs.empty() && memoryTileTraceFlushLocs.empty() + && interfaceTileTraceFlushLocs.empty()) + return; + + if (xrt_core::config::get_verbosity() >= static_cast(severity_level::info)) { + std::stringstream msg; + msg << "Flushing AIE trace by forcing end event for " << traceFlushLocs.size() + << " AIE tiles, " << memoryTileTraceFlushLocs.size() << " memory tiles, and " + << interfaceTileTraceFlushLocs.size() << " interface tiles."; + xrt_core::message::send(severity_level::info, "XRT", msg.str()); + } + + // if (!tranxHandler->initializeTransaction(&aieDevInst, "AieTraceFlush")) + // return; + + // Flush trace by forcing end event + // NOTE: this informs tiles to output remaining packets (even if partial) + //for (const auto& loc : traceFlushLocs) + // XAie_EventGenerate(&aieDevInst, loc, XAIE_CORE_MOD, coreTraceEndEvent); + //for (const auto& loc : memoryTileTraceFlushLocs) + // XAie_EventGenerate(&aieDevInst, loc, XAIE_MEM_MOD, memoryTileTraceEndEvent); + //for (const auto& loc : interfaceTileTraceFlushLocs) + // XAie_EventGenerate(&aieDevInst, loc, XAIE_PL_MOD, interfaceTileTraceEndEvent); + + traceFlushLocs.clear(); + memoryTileTraceFlushLocs.clear(); + interfaceTileTraceFlushLocs.clear(); + + //xrt_core::message::send(severity_level::info, "XRT", "Before AIE trace flush."); + + //auto hwContext = metadata->getHwContext(); + // tranxHandler->submitTransaction(&aieDevInst, hwContext); + //tranxHandler->submitELF(hwContext); + + //xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE trace flush."); + } + + void AieTrace_NPU3Impl::pollTimers(uint64_t index, void* handle) + { + // TODO: Poll timers (needed for system timeline only) + (void)index; + (void)handle; + } + + uint16_t AieTrace_NPU3Impl::getRelativeRow(uint16_t absRow) + { + auto rowOffset = metadata->getRowOffset(); + if (absRow == 0) + return 0; + if (absRow < rowOffset) + return (absRow - 1); + return (absRow - rowOffset); + } + + module_type AieTrace_NPU3Impl::getTileType(uint8_t absRow) + { + if (absRow == 0) + return module_type::shim; + if (absRow < metadata->getRowOffset()) + return module_type::mem_tile; + return module_type::core; + } + + void AieTrace_NPU3Impl::freeResources() + { + // Nothing to do + } + + inline uint32_t AieTrace_NPU3Impl::bcIdToEvent(int bcId) + { + return bcId + CORE_BROADCAST_EVENT_BASE; + } + + /**************************************************************************** + * Configure stream switch event ports for monitoring purposes + ***************************************************************************/ + void + AieTrace_NPU3Impl::configStreamSwitchPorts(const tile_type& tile, const XAie_LocType loc, + const module_type type, const std::string metricSet, + const uint8_t channel0, const uint8_t channel1, + std::vector& events, aie_cfg_base& config) + { + // For now, unused argument + (void)tile; + + std::set portSet; + + // Traverse all counters and request monitor ports as needed + for (size_t i=0; i < events.size(); ++i) { + // Ensure applicable event + auto event = events.at(i); + if (!aie::isStreamSwitchPortEvent(event)) + continue; + + //bool newPort = false; + auto portnum = aie::getPortNumberFromEvent(event); + uint8_t channelNum = portnum % 2; + uint8_t channel = (channelNum == 0) ? channel0 : channel1; + + // New port needed: reserver, configure, and store + if (portSet.find(portnum) == portSet.end()) { + portSet.insert(portnum); + + if (type == module_type::core) { + // AIE Tiles - Monitor DMA channels + bool isMaster = ((portnum >= 2) || (metricSet.find("s2mm") != std::string::npos)); + auto totalChannels = isMaster ? aie::getNumS2MMChannels(metadata->getHardwareGen(), type) + : aie::getNumMM2SChannels(metadata->getHardwareGen(), type); + if (channelNum >= totalChannels) + continue; + + auto slaveOrMaster = isMaster ? XAIE_STRMSW_MASTER : XAIE_STRMSW_SLAVE; + std::string typeName = isMaster ? "S2MM" : "MM2S"; + std::string msg = "Configuring core module stream switch to monitor DMA " + + typeName + " channel " + std::to_string(channelNum); + xrt_core::message::send(severity_level::debug, "XRT", msg); + XAie_EventSelectStrmPort(&aieDevInst, loc, portnum, slaveOrMaster, DMA, channelNum); + + // Record for runtime config file + // NOTE: channel info informs back-end there will be events on that channel + config.port_trace_ids[portnum] = channelNum; + config.port_trace_is_master[portnum] = isMaster; + config.port_trace_names[portnum] = tile.port_names.at(portnum); + + if (isMaster) { + config.s2mm_channels[channelNum] = channelNum; + config.s2mm_names[channelNum] = tile.s2mm_names.at(channelNum); + } + else { + config.mm2s_channels[channelNum] = channelNum; + config.mm2s_names[channelNum] = tile.mm2s_names.at(channelNum); + } + } + else if (type == module_type::shim) { + // Interface tiles (e.g., GMIO) + // NOTE: skip configuration of extra ports for tile if stream_ids are not available. + if (portnum >= tile.stream_ids.size()) + continue; + + auto slaveOrMaster = (tile.is_master_vec.at(portnum) == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + uint8_t streamPortId = static_cast(tile.stream_ids.at(portnum)); + std::string typeName = (tile.is_master_vec.at(portnum) == 0) ? "slave" : "master"; + + std::string msg = "Configuring interface tile stream switch to monitor " + + typeName + " port with stream ID of " + std::to_string(streamPortId); + xrt_core::message::send(severity_level::debug, "XRT", msg); + // NOTE: For NPU3, there is no SOUTH port type. Hence, monitoring DMA port. + XAie_EventSelectStrmPort(&aieDevInst, loc, portnum, slaveOrMaster, DMA, streamPortId); + + // Record for runtime config file + config.port_trace_ids[portnum] = channelNum; + config.port_trace_is_master[portnum] = (tile.is_master_vec.at(portnum) != 0); + config.port_trace_names[portnum] = tile.port_names.at(portnum); + + if (tile.is_master_vec.at(portnum) == 0) { + config.mm2s_channels[channelNum] = channel; // Slave or Input Port + config.mm2s_names[channelNum] = tile.mm2s_names.at(channelNum); + } + else { + config.s2mm_channels[channelNum] = channel; // Master or Output Port + config.s2mm_names[channelNum] = tile.s2mm_names.at(channelNum); + } + } + else { + // Memory tiles + auto slaveOrMaster = aie::isInputSet(type, metricSet) ? XAIE_STRMSW_MASTER : XAIE_STRMSW_SLAVE; + std::string typeName = (slaveOrMaster == XAIE_STRMSW_MASTER) ? "master" : "slave"; + std::string msg = "Configuring memory tile stream switch to monitor " + + typeName + " stream port " + std::to_string(channel); + xrt_core::message::send(severity_level::debug, "XRT", msg); + //switchPortRsc->setPortToSelect(slaveOrMaster, DMA, channel); + XAie_EventSelectStrmPort(&aieDevInst, loc, portnum, slaveOrMaster, DMA, channel); + + // Record for runtime config file + config.port_trace_ids[portnum] = channel; + config.port_trace_is_master[portnum] = (slaveOrMaster == XAIE_STRMSW_MASTER); + config.port_trace_names[portnum] = tile.port_names.at(portnum); + } + } + + //auto switchPortRsc = switchPortMap[portnum]; + + // Event options: + // getSSIdleEvent, getSSRunningEvent, getSSStalledEvent, & getSSTlastEvent + // XAie_Events ssEvent; + // if (aie::isPortRunningEvent(event)) + // switchPortRsc->getSSRunningEvent(ssEvent); + // else + // switchPortRsc->getSSStalledEvent(ssEvent); + // events.at(i) = ssEvent; + + // if (newPort) { + // switchPortRsc->start(); + // streamPorts.push_back(switchPortRsc); + // } + } + + //switchPortMap.clear(); + portSet.clear(); + } + + /**************************************************************************** + * Configure combo events (AIE tiles only) + ***************************************************************************/ + std::vector + AieTrace_NPU3Impl::configComboEvents(const XAie_LocType loc, const XAie_ModuleType mod, + const module_type type, const std::string metricSet, + aie_cfg_base& config) + { + // Only needed for core/memory modules and metric sets that include DMA events + if (!aie::isDmaSet(metricSet) || ((type != module_type::core) && (type != module_type::dma))) + return {}; + + std::vector comboEvents; + + if (mod == XAIE_CORE_MOD) { + //auto comboEvent = xaieTile.core().comboEvent(4); + comboEvents.push_back(XAIE_EVENT_COMBO_EVENT_2_CORE); + + // Combo2 = Port_Idle_0 OR Port_Idle_1 OR Port_Idle_2 OR Port_Idle_3 + std::vector events = {XAIE_EVENT_PORT_IDLE_0_CORE, + XAIE_EVENT_PORT_IDLE_1_CORE, XAIE_EVENT_PORT_IDLE_2_CORE, + XAIE_EVENT_PORT_IDLE_3_CORE}; + std::vector opts = {XAIE_EVENT_COMBO_E1_OR_E2, + XAIE_EVENT_COMBO_E1_OR_E2, XAIE_EVENT_COMBO_E1_OR_E2}; + + // Capture in config class to report later + for (size_t i=0; i < NUM_COMBO_EVENT_CONTROL; ++i) + config.combo_event_control[i] = 2; + for (size_t i=0; i < events.size(); ++i) { + uint16_t phyEvent = 0; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, events.at(i), &phyEvent); + config.combo_event_input[i] = phyEvent; + } + + // Set events and trigger on OR of events + //comboEvent->setEvents(events, opts); + XAie_EventComboConfig(&aieDevInst, loc, mod, XAIE_EVENT_COMBO0, opts[0], + events[0], events[1]); + XAie_EventComboConfig(&aieDevInst, loc, mod, XAIE_EVENT_COMBO1, opts[1], + events[2], events[3]); + XAie_EventComboConfig(&aieDevInst, loc, mod, XAIE_EVENT_COMBO2, opts[2], + XAIE_EVENT_COMBO_EVENT_0_PL, XAIE_EVENT_COMBO_EVENT_1_PL); + return comboEvents; + } + + // Since we're tracing DMA events, start trace right away. + // Specify user event 0 as trace end so we can flush after run. + comboEvents.push_back(XAIE_EVENT_TRUE_CORE); + comboEvents.push_back(XAIE_EVENT_USER_EVENT_0_CORE); + return comboEvents; + } + + /**************************************************************************** + * Configure group events (core modules only) + ***************************************************************************/ + void + AieTrace_NPU3Impl::configGroupEvents(const XAie_LocType loc, const XAie_ModuleType mod, + const module_type type, const std::string metricSet) + { + // Only needed for core module and metric sets that include DMA events + if (!aie::isDmaSet(metricSet) || (type != module_type::core)) + return; + + // Set masks for group events + XAie_EventGroupControl(&aieDevInst, loc, mod, XAIE_EVENT_GROUP_CORE_PROGRAM_FLOW_CORE, + GROUP_CORE_FUNCTIONS_MASK); + XAie_EventGroupControl(&aieDevInst, loc, mod, XAIE_EVENT_GROUP_CORE_STALL_CORE, + GROUP_CORE_STALL_MASK); + XAie_EventGroupControl(&aieDevInst, loc, mod, XAIE_EVENT_GROUP_STREAM_SWITCH_CORE, + GROUP_STREAM_SWITCH_RUNNING_MASK); + } + + /**************************************************************************** + * Configure event selection + * NOTE: This supports memory tiles and interface tiles + ***************************************************************************/ + void + AieTrace_NPU3Impl::configEventSelections(const XAie_LocType loc, const module_type type, + const std::string metricSet, std::vector& channels, + aie_cfg_base& config) + { + if ((type != module_type::mem_tile) && (type != module_type::shim)) + return; + + XAie_DmaDirection dmaDir = aie::isInputSet(type, metricSet) ? DMA_S2MM : DMA_MM2S; + uint8_t numChannels = ((type == module_type::shim) && (dmaDir == DMA_MM2S)) + ? NUM_CHANNEL_SELECTS_SHIM_NPU3 : NUM_CHANNEL_SELECTS; + + if (aie::isDebugVerbosity()) { + std::string tileType = (type == module_type::shim) ? "interface" : "memory"; + std::string dmaType = (dmaDir == DMA_S2MM) ? "S2MM" : "MM2S"; + std::stringstream channelsStr; + std::copy(channels.begin(), channels.end(), std::ostream_iterator(channelsStr, ", ")); + + std::string msg = "Configuring event selections for " + tileType + " tile DMA " + + dmaType + " channels " + channelsStr.str(); + xrt_core::message::send(severity_level::debug, "XRT", msg); + } + + for (uint8_t c = 0; c < numChannels; ++c) { + XAie_EventSelectDmaChannel(&aieDevInst, loc, c, dmaDir, channels.at(c)); + + // Record for runtime config file + config.port_trace_ids[c] = channels.at(c); + if (aie::isInputSet(type, metricSet)) { + config.port_trace_is_master[c] = true; + config.s2mm_channels[c] = channels.at(c); + } + else { + config.port_trace_is_master[c] = false; + config.mm2s_channels[c] = channels.at(c); + } + } + } + + /**************************************************************************** + * Configure edge detection events + ***************************************************************************/ + void AieTrace_NPU3Impl::configEdgeEvents(const tile_type& tile, const module_type type, + const std::string metricSet, const XAie_Events event, + const uint8_t channel) + { + if ((event != XAIE_EVENT_EDGE_DETECTION_EVENT_0_MEM_TILE) + && (event != XAIE_EVENT_EDGE_DETECTION_EVENT_1_MEM_TILE) + && (event != XAIE_EVENT_EDGE_DETECTION_EVENT_0_MEM) + && (event != XAIE_EVENT_EDGE_DETECTION_EVENT_1_MEM)) + return; + + // Catch memory tiles + if (type == module_type::mem_tile) { + // Event is DMA_S2MM_Sel0_stream_starvation or DMA_MM2S_Sel0_stalled_lock + uint16_t eventNum = aie::isInputSet(type, metricSet) + ? EVENT_MEM_TILE_DMA_S2MM_SEL0_STREAM_STARVATION + : EVENT_MEM_TILE_DMA_MM2S_SEL0_STALLED_LOCK; + + // Register Edge_Detection_event_control + // 26 Event 1 triggered on falling edge + // 25 Event 1 triggered on rising edge + // 23:16 Input event for edge event 1 + // 10 Event 0 triggered on falling edge + // 9 Event 0 triggered on rising edge + // 7:0 Input event for edge event 0 + uint32_t edgeEventsValue = (1 << 26) + (eventNum << 16) + (1 << 9) + eventNum; + + xrt_core::message::send(severity_level::debug, "XRT", + "Configuring memory tile edge events to detect rise and fall of event " + + std::to_string(eventNum)); + + auto tileOffset = _XAie_GetTileAddr(&aieDevInst, tile.row, tile.col); + XAie_Write32(&aieDevInst, tileOffset + AIE_OFFSET_EDGE_CONTROL_MEM_TILE_NPU3, + edgeEventsValue); + return; + } + + // Below is AIE tile support + + // Event is DMA_MM2S_stalled_lock or DMA_S2MM_stream_starvation + // Event is DMA_S2MM_Sel0_stream_starvation or DMA_MM2S_Sel0_stalled_lock + uint16_t eventNum = aie::isInputSet(type, metricSet) + ? ((channel == 0) ? EVENT_MEM_DMA_MM2S_0_STALLED_LOCK + : EVENT_MEM_DMA_MM2S_1_STALLED_LOCK) + : ((channel == 0) ? EVENT_MEM_DMA_S2MM_0_STREAM_STARVATION + : EVENT_MEM_DMA_S2MM_1_STREAM_STARVATION); + + // Register Edge_Detection_event_control + // 26 Event 1 triggered on falling edge + // 25 Event 1 triggered on rising edge + // 23:16 Input event for edge event 1 + // 10 Event 0 triggered on falling edge + // 9 Event 0 triggered on rising edge + // 7:0 Input event for edge event 0 + uint32_t edgeEventsValue = (1 << 26) + (eventNum << 16) + (1 << 9) + eventNum; + + xrt_core::message::send(severity_level::debug, "XRT", + "Configuring AIE tile edge events to detect rise and fall of event " + + std::to_string(eventNum)); + + auto tileOffset = _XAie_GetTileAddr(&aieDevInst, tile.row, tile.col); + XAie_Write32(&aieDevInst, tileOffset + AIE_OFFSET_EDGE_CONTROL_MEM_NPU3, + edgeEventsValue); + } + + /**************************************************************************** + * Configure requested tiles with trace metrics and settings + ***************************************************************************/ + bool AieTrace_NPU3Impl::setMetricsSettings(uint64_t deviceId, void* hwCtxImpl) + { + (void)deviceId; + + // Get partition columns + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(hwCtxImpl); + // Currently, assuming only one Hw Context is alive at a time + //uint8_t startCol = static_cast(aiePartitionPt.front().second.get("start_col")); + uint8_t startCol = 0; + + std::string startType = xrt_core::config::get_aie_trace_settings_start_type(); + unsigned int startLayer = xrt_core::config::get_aie_trace_settings_start_layer(); + + // Initialize and start transaction + std::string tranxName = "AieTraceMetrics"; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Starting transaction " + tranxName); + if (!tranxHandler->initializeTransaction(&aieDevInst, tranxName)) + return false; + + if (!metadata->getIsValidMetrics()) { + std::string msg("AIE trace metrics were not specified in xrt.ini. AIE event trace will not be available."); + xrt_core::message::send(severity_level::warning, "XRT", msg); + return false; + } + + // Get channel configurations (memory and interface tiles) + auto configChannel0 = metadata->getConfigChannel0(); + auto configChannel1 = metadata->getConfigChannel1(); + + // Zero trace event tile counts + for (int m = 0; m < static_cast(module_type::num_types); ++m) { + for (size_t n = 0; n <= NUM_TRACE_EVENTS; ++n) + mNumTileTraceEvents[m][n] = 0; + } + + // Using user event for trace end to enable flushing + // NOTE: Flush trace module always at the end because for some applications + // core might be running infinitely. + if (metadata->getUseUserControl()) + coreTraceStartEvent = XAIE_EVENT_INSTR_EVENT_0_CORE; + coreTraceEndEvent = XAIE_EVENT_INSTR_EVENT_1_CORE; + + // Iterate over all used/specified tiles + // NOTE: rows are stored as absolute as required by resource manager + //std::cout << "Config Metrics Size: " << metadata->getConfigMetrics().size() << std::endl; + for (auto& tileMetric : metadata->getConfigMetrics()) { + auto& metricSet = tileMetric.second; + auto tile = tileMetric.first; + auto col = tile.col; + auto row = tile.row; + auto subtype = tile.subtype; + auto type = getTileType(row); + auto typeInt = static_cast(type); + auto loc = XAie_TileLoc(col, row); + + std::stringstream cmsg; + cmsg << "Configuring tile (" << +col << "," << +row << ") in module type: " << aie::getModuleName(type) << "."; + xrt_core::message::send(severity_level::info, "XRT", cmsg.str()); + + // Store location to flush at end of run + if (type == module_type::core || (type == module_type::mem_tile) + || (type == module_type::shim)) { + if (type == module_type::core) + traceFlushLocs.push_back(loc); + else if (type == module_type::mem_tile) + memoryTileTraceFlushLocs.push_back(loc); + else if (type == module_type::shim) + interfaceTileTraceFlushLocs.push_back(loc); + } + + // AIE config object for this tile + auto cfgTile = std::make_unique(col+startCol, row, type); + cfgTile->type = type; + cfgTile->trace_metric_set = metricSet; + cfgTile->active_core = tile.active_core; + cfgTile->active_memory = tile.active_memory; + + // Catch core execution trace + if ((type == module_type::core) && (metricSet == "execution")) { + // Set start/end events, use execution packets, and start trace module + XAie_TraceStopEvent(&aieDevInst, loc, XAIE_CORE_MOD, coreTraceEndEvent); + + // Driver requires at least one, non-zero trace event + XAie_TraceEvent(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_TRUE_CORE, 0); + + XAie_Packet pkt = {0, 0}; + XAie_TraceModeConfig(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_TRACE_INST_EXEC); + XAie_TracePktConfig(&aieDevInst, loc, XAIE_CORE_MOD, pkt); + + if(startType != "layer" || startLayer == UINT_MAX) + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_CORE_MOD, coreTraceStartEvent); + (db->getStaticInfo()).addAIECfgTile(deviceId, cfgTile); + continue; + } + + // Get vector of pre-defined metrics for this set + // NOTE: These are local copies to add tile-specific events + EventVector coreEvents; + EventVector memoryEvents; + EventVector interfaceEvents; + if (type == module_type::core) { + coreEvents = coreEventSets[metricSet]; + memoryEvents = memoryEventSets[metricSet]; + } + else if (type == module_type::mem_tile) { + memoryEvents = memoryTileEventSets[metricSet]; + } + else if (type == module_type::shim) { + interfaceEvents = interfaceTileEventSets[metricSet]; + } + + if (xrt_core::config::get_verbosity() >= static_cast(severity_level::info)) { + std::stringstream infoMsg; + auto tileName = (type == module_type::mem_tile) ? "memory" + : ((type == module_type::shim) ? "interface" : "AIE"); + infoMsg << "Configuring " << tileName << " tile (" << +col << "," + << +row << ") for trace using metric set " << metricSet; + xrt_core::message::send(severity_level::info, "XRT", infoMsg.str()); + } + + int numCoreTraceEvents = 0; + int numMemoryTraceEvents = 0; + int numInterfaceTraceEvents = 0; + + // + // 1. Configure Core Trace Events + // + if (type == module_type::core) { + xrt_core::message::send(severity_level::info, "XRT", "Configuring Core Trace Events"); + + XAie_ModuleType mod = XAIE_CORE_MOD; + uint16_t phyEvent = 0; + //auto coreTrace = core.traceControl(); + + // Delay cycles and user control are not compatible with each other + // if (metadata->getUseGraphIterator()) { + // if (!configureStartIteration(core)) + // break; + // } else if (metadata->getUseDelay()) { + // if (!configureStartDelay(core)) + // break; + // } + + // Configure combo & group events (e.g., to monitor DMA channels) + auto comboEvents = configComboEvents(loc, mod, type, metricSet, cfgTile->core_trace_config); + configGroupEvents(loc, mod, type, metricSet); + + // Set end event for trace capture + // NOTE: This needs to be done first + if (XAie_TraceStopEvent(&aieDevInst, loc, mod, coreTraceEndEvent) != XAIE_OK) + break; + + for (uint8_t i = 0; i < coreEvents.size(); i++) { + uint8_t slot = i; + if (XAie_TraceEvent(&aieDevInst, loc, mod, coreEvents[i], i) != XAIE_OK) + break; + numCoreTraceEvents++; + + // Update config file + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, coreEvents[i], &phyEvent); + cfgTile->core_trace_config.traced_events[slot] = phyEvent; + } + + // Update config file + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, coreTraceStartEvent, &phyEvent); + cfgTile->core_trace_config.start_event = phyEvent; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, coreTraceEndEvent, &phyEvent); + cfgTile->core_trace_config.stop_event = phyEvent; + + coreEvents.clear(); + mNumTileTraceEvents[typeInt][numCoreTraceEvents]++; + + XAie_Packet pkt = {0, 0}; + if (XAie_TraceModeConfig(&aieDevInst, loc, mod, XAIE_TRACE_EVENT_PC) != XAIE_OK) + break; + if (XAie_TracePktConfig(&aieDevInst, loc, mod, pkt) != XAIE_OK) + break; + if(startType != "layer" || startLayer == UINT_MAX) + XAie_TraceStartEvent(&aieDevInst, loc, mod, coreTraceStartEvent); + } // Core modules + + // + // 2. Configure Memory Trace Events + // + // NOTE: This is applicable for memory modules in AIE tiles or memory tiles + // NOTE 2: For NPU3, this configures the second trace stream that uses time packets + if ((type == module_type::core) || (type == module_type::mem_tile)) { + xrt_core::message::send(severity_level::info, "XRT", "Configuring Memory Trace Events"); + XAie_ModuleType mod = XAIE_MEM_MOD; + auto phyMod = (type == module_type::mem_tile) ? XAIE_MEM_MOD: XAIE_CORE_MOD; + + // Set overall start/end for trace capture + auto traceStartEvent = (type == module_type::core) ? coreTraceStartEvent : memoryTileTraceStartEvent; + auto traceEndEvent = (type == module_type::core) ? coreTraceEndEvent : memoryTileTraceEndEvent; + + aie_cfg_base& aieConfig = cfgTile->core_trace_config; + if (type == module_type::mem_tile) + aieConfig = cfgTile->memory_tile_trace_config; + + // Configure combo events for metric sets that include DMA events + auto comboEvents = configComboEvents(loc, mod, type, metricSet, aieConfig); + if (comboEvents.size() == 2) { + traceStartEvent = comboEvents.at(0); + traceEndEvent = comboEvents.at(1); + } + + // Configure event ports on stream switch + configStreamSwitchPorts(tile, loc, type, metricSet, 0, 0, memoryEvents, aieConfig); + + memoryModTraceStartEvent = traceStartEvent; + if (XAie_TraceStopEvent(&aieDevInst, loc, mod, traceEndEvent) != XAIE_OK) + break; + + { + uint16_t phyEvent1 = 0; + uint16_t phyEvent2 = 0; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, phyMod, traceStartEvent, &phyEvent1); + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, phyMod, traceEndEvent, &phyEvent2); + if (type == module_type::core) { + cfgTile->memory_trace_config.start_event = phyEvent1; + cfgTile->memory_trace_config.stop_event = phyEvent2; + } else { + cfgTile->memory_tile_trace_config.start_event = phyEvent1; + cfgTile->memory_tile_trace_config.stop_event = phyEvent2; + } + } + + auto iter0 = configChannel0.find(tile); + auto iter1 = configChannel1.find(tile); + uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; + uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; + // TODO: for now, hard-code channels 2 and 3 + std::vector channels = {channel0, channel1, 2, 3}; + + // Specify Sel0/Sel1 for memory tiles + if (type == module_type::mem_tile) { + configEventSelections(loc, type, metricSet, channels, cfgTile->memory_tile_trace_config); + } + else { + // Record if these are channel-specific events + // NOTE: for now, check first event and assume single channel + auto channelNum = aie::getChannelNumberFromEvent(memoryEvents.at(0)); + if (channelNum >= 0) { + if (aie::isInputSet(type, metricSet)) { + cfgTile->core_trace_config.mm2s_channels[0] = channelNum; + if (static_cast(channelNum) < tile.mm2s_names.size()) + cfgTile->core_trace_config.mm2s_names[0] = tile.mm2s_names.at(channelNum); + } + else { + cfgTile->core_trace_config.s2mm_channels[0] = channelNum; + if (static_cast(channelNum) < tile.s2mm_names.size()) + cfgTile->core_trace_config.s2mm_names[0] = tile.s2mm_names.at(channelNum); + } + } + } + + // Configure memory trace events + for (uint8_t i = 0; i < memoryEvents.size(); i++) { + if (XAie_TraceEvent(&aieDevInst, loc, XAIE_MEM_MOD, memoryEvents[i], i) != XAIE_OK) + break; + numMemoryTraceEvents++; + + // Configure edge events (as needed) + configEdgeEvents(tile, type, metricSet, memoryEvents[i], channel0); + + // Update config file + uint16_t phyEvent = 0; + + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, phyMod, memoryEvents[i], &phyEvent); + + if (type == module_type::mem_tile) + cfgTile->memory_tile_trace_config.traced_events[i] = phyEvent; + else + cfgTile->memory_trace_config.traced_events[i] = phyEvent; + } + + memoryEvents.clear(); + mNumTileTraceEvents[typeInt][numMemoryTraceEvents]++; + + uint8_t packetType = (type == module_type::mem_tile) ? 3 : 1; + XAie_Packet pkt = {0, packetType}; + + xrt_core::message::send(severity_level::info, "XRT", "Configuring Memory Trace Mode"); + + if (XAie_TracePktConfig(&aieDevInst, loc, mod, pkt) != XAIE_OK) + break; + if ((startType != "layer") || (startLayer == UINT_MAX)) { + if (XAie_TraceStartEvent(&aieDevInst, loc, mod, traceStartEvent) != XAIE_OK) + break; + } + + // Update memory packet type in config file + if (type == module_type::mem_tile) + cfgTile->memory_tile_trace_config.packet_type = packetType; + else + cfgTile->memory_trace_config.packet_type = packetType; + } // Memory modules/tiles + + // + // 3. Configure Interface Tile Trace Events + // + if (type == module_type::shim) { + xrt_core::message::send(severity_level::info, "XRT", "Configuring Interface Tile Trace Events"); + XAie_ModuleType mod = XAIE_PL_MOD; + + // Get specified channel numbers + auto iter0 = configChannel0.find(tile); + auto iter1 = configChannel1.find(tile); + uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; + uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; + // TODO: for now, hard-code channels 2 and 3 + std::vector channels = {channel0, channel1, 2, 3}; + + // Modify events as needed + modifyEvents(type, subtype, metricSet, channel0, interfaceEvents); + + // Specify selections for interface tiles (new for NPU3) + configEventSelections(loc, type, metricSet, channels, cfgTile->interface_tile_trace_config); + configStreamSwitchPorts(tileMetric.first, loc, type, metricSet, channel0, channel1, + interfaceEvents, cfgTile->interface_tile_trace_config); + + // Configure interface tile trace events + for (size_t i = 0; i < interfaceEvents.size(); i++) { + auto event = interfaceEvents.at(i); + if (XAie_TraceEvent(&aieDevInst, loc, mod, event, static_cast(i)) != XAIE_OK) + break; + numInterfaceTraceEvents++; + + // Update config file + uint16_t phyEvent = 0; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, XAIE_PL_MOD, event, &phyEvent); + cfgTile->interface_tile_trace_config.traced_events[i] = phyEvent; + } + + // Update config file + { + // Add interface trace control events + // Start + uint16_t phyEvent = 0; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, XAIE_PL_MOD, interfaceTileTraceStartEvent, &phyEvent); + cfgTile->interface_tile_trace_config.start_event = phyEvent; + // Stop + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, XAIE_PL_MOD, interfaceTileTraceEndEvent, &phyEvent); + cfgTile->interface_tile_trace_config.stop_event = phyEvent; + } + + mNumTileTraceEvents[typeInt][numInterfaceTraceEvents]++; + + uint8_t packetType = 4; + XAie_Packet pkt = {0, packetType}; + if (XAie_TracePktConfig(&aieDevInst, loc, mod, pkt) != XAIE_OK) + break; + if (startType != "layer" || startLayer == UINT_MAX) { + if (XAie_TraceStartEvent(&aieDevInst, loc, mod, interfaceTileTraceStartEvent) != XAIE_OK) + break; + } + if (XAie_TraceStopEvent(&aieDevInst, loc, mod, interfaceTileTraceEndEvent) != XAIE_OK) + break; + cfgTile->interface_tile_trace_config.packet_type = packetType; + auto channelNum = aie::getChannelNumberFromEvent(interfaceEvents.at(0)); + if (channelNum >= 0) { + if (aie::isInputSet(type, metricSet)) + cfgTile->interface_tile_trace_config.mm2s_channels[channelNum] = channelNum; + else + cfgTile->interface_tile_trace_config.s2mm_channels[channelNum] = channelNum; + } + } // Interface tiles + + if (xrt_core::config::get_verbosity() >= static_cast(severity_level::debug)) { + std::stringstream msg; + msg << "Reserved "; + if (type == module_type::core) + msg << numCoreTraceEvents << " core and " << numMemoryTraceEvents << " memory"; + else if (type == module_type::mem_tile) + msg << numMemoryTraceEvents << " memory tile"; + else if (type == module_type::shim) + msg << numInterfaceTraceEvents << " interface tile"; + msg << " trace events for tile (" << +col << "," << +row + << "). Adding tile to static database."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + } + + // Add config info to static database + // NOTE: Do not access cfgTile after this + (db->getStaticInfo()).addAIECfgTile(deviceId, cfgTile); + xrt_core::message::send(severity_level::info, "XRT", "Debugging XDP: after (db->getStaticInfo()).addAIECfgTile"); + } // For tiles + + if (m_trace_start_broadcast) { + xrt_core::message::send(severity_level::info, "XRT", "before build2ChannelBroadcastNetwork"); + build2ChannelBroadcastNetwork(hwCtxImpl, traceStartBroadcastChId1, traceStartBroadcastChId2, interfaceTileTraceStartEvent); + xrt_core::message::send(severity_level::info, "XRT", "before XAie_EventGenerate"); + XAie_EventGenerate(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, interfaceTileTraceStartEvent); + } + + xrt_core::message::send(severity_level::info, "XRT", "before tranxHandler->submitTransaction"); + auto hwContext = metadata->getHwContext(); + tranxHandler->submitTransaction(&aieDevInst, hwContext); + + xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE Trace."); + + if (!tranxHandler->initializeTransaction(&aieDevInst, "AieTraceFlush")) + return false; + + // Flush trace by forcing end event + // NOTE: this informs tiles to output remaining packets (even if partial) + for (const auto& loc : traceFlushLocs) + XAie_EventGenerate(&aieDevInst, loc, XAIE_CORE_MOD, coreTraceEndEvent); + for (const auto& loc : memoryTileTraceFlushLocs) + XAie_EventGenerate(&aieDevInst, loc, XAIE_MEM_MOD, memoryTileTraceEndEvent); + for (const auto& loc : interfaceTileTraceFlushLocs) + XAie_EventGenerate(&aieDevInst, loc, XAIE_PL_MOD, interfaceTileTraceEndEvent); + + tranxHandler->completeASM(&aieDevInst); + tranxHandler->generateELF(); + + xrt_core::message::send(severity_level::info, "XRT", "Successfully generated ELF for AIE Trace Flush."); + + return true; + } + + /**************************************************************************** + * Set AIE Device Instance (Currently unused in NPU3 implementation) + ***************************************************************************/ + void* AieTrace_NPU3Impl::setAieDeviceInst(void*, uint64_t) { return nullptr;} + +} // namespace xdp diff --git a/src/runtime_src/xdp/profile/plugin/aie_trace/client/aie_trace_npu3.h b/src/runtime_src/xdp/profile/plugin/aie_trace/client/aie_trace_npu3.h new file mode 100755 index 00000000000..111efcaa2a2 --- /dev/null +++ b/src/runtime_src/xdp/profile/plugin/aie_trace/client/aie_trace_npu3.h @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved + +#ifndef AIE_TRACE_NPU3_H +#define AIE_TRACE_NPU3_H + +#include + +#include "core/include/xrt/xrt_kernel.h" +#include "xdp/profile/database/static_info/aie_constructs.h" +#include "xdp/profile/plugin/aie_trace/aie_trace_impl.h" +#include "xdp/profile/device/common/npu3/npu3_transaction.h" + +extern "C" { + #include + #include +} + +namespace xdp { + class AieTrace_NPU3Impl : public AieTraceImpl { + public: + AieTrace_NPU3Impl(VPDatabase* database, std::shared_ptr metadata); + ~AieTrace_NPU3Impl() = default; + void updateDevice() override; + void flushTraceModules() override; + void freeResources() override; + void pollTimers(uint64_t index, void* handle) override; + uint64_t checkTraceBufSize(uint64_t size) override; + void* setAieDeviceInst(void*, uint64_t) override; + + void modifyEvents(module_type type, io_type subtype, + const std::string metricSet, uint8_t channel, + std::vector& events); + bool setMetricsSettings(uint64_t deviceId, void* handle); + bool configureWindowedEventTrace(void* handle); + void build2ChannelBroadcastNetwork(void *handle, uint8_t broadcastId1, uint8_t broadcastId2, XAie_Events event); + void reset2ChannelBroadcastNetwork(void *handle, uint8_t broadcastId1, uint8_t broadcastId2); + module_type getTileType(uint8_t row); + uint16_t getRelativeRow(uint16_t absRow); + uint32_t bcIdToEvent(int bcId); + + bool isInputSet(const module_type type, const std::string metricSet); + bool isStreamSwitchPortEvent(const XAie_Events event); + bool isPortRunningEvent(const XAie_Events event); + bool isCoreModuleEvent(const XAie_Events event); + bool isDmaSet(const std::string metricSet); + + uint8_t getPortNumberFromEvent(XAie_Events event); + int8_t getChannelNumberFromEvent(XAie_Events event); + void configStreamSwitchPorts(const tile_type& tile, const XAie_LocType loc, + const module_type type, const std::string metricSet, + const uint8_t channel0, const uint8_t channel1, + std::vector& events, aie_cfg_base& config); + std::vector configComboEvents(const XAie_LocType loc, const XAie_ModuleType mod, + const module_type type, const std::string metricSet, + aie_cfg_base& config); + void configGroupEvents(const XAie_LocType loc, const XAie_ModuleType mod, + const module_type type, const std::string metricSet); + void configEventSelections(const XAie_LocType loc, const module_type type, + const std::string metricSet, std::vector& channels, + aie_cfg_base& config); + void configEdgeEvents(const tile_type& tile, const module_type type, + const std::string metricSet, const XAie_Events event, + const uint8_t channel = 0); + + private: + typedef XAie_Events EventType; + typedef std::vector EventVector; + std::unique_ptr tranxHandler; + + std::size_t op_size; + XAie_DevInst aieDevInst = {0}; + + std::map coreEventSets; + std::map memoryEventSets; + std::map memoryTileEventSets; + std::map interfaceTileEventSets; + + // Trace metrics (same for all sets) + EventType coreTraceStartEvent; + EventType coreTraceEndEvent; + EventType memoryModTraceStartEvent; + EventType memoryTileTraceStartEvent; + EventType memoryTileTraceEndEvent; + EventType interfaceTileTraceStartEvent; + EventType interfaceTileTraceEndEvent; + + bool m_trace_start_broadcast; + + // Tile locations to apply trace end and flush + std::vector traceFlushLocs; + std::vector memoryTileTraceFlushLocs; + std::vector interfaceTileTraceFlushLocs; + + // Keep track of number of events reserved per module and/or tile + int mNumTileTraceEvents[static_cast(module_type::num_types)][NUM_TRACE_EVENTS + 1]; + + }; + +} + +#endif diff --git a/src/runtime_src/xdp/profile/plugin/aie_trace/util/aie_trace_util.cpp b/src/runtime_src/xdp/profile/plugin/aie_trace/util/aie_trace_util.cpp index 73e56cd4b52..6b66d5d9f3e 100755 --- a/src/runtime_src/xdp/profile/plugin/aie_trace/util/aie_trace_util.cpp +++ b/src/runtime_src/xdp/profile/plugin/aie_trace/util/aie_trace_util.cpp @@ -122,16 +122,18 @@ namespace xdp::aie::trace { XAIE_EVENT_DMA_MM2S_0_MEMORY_STARVATION_MEM}; } else if (xdp::aie::isNPU3(hwGen)) { +#ifdef XDP_NPU3_BUILD eventSets["all_stalls_s2mm"].push_back(XAIE_EVENT_CASCADE_STALL_CORE); eventSets["s2mm_channels_stalls"] = - {XAIE_EVENT_DMA_S2MM_0_START_BD_MEM, XAIE_EVENT_DMA_S2MM_0_FINISHED_BD_MEM, - XAIE_EVENT_DMA_S2MM_0_STALLED_LOCK_MEM, XAIE_EVENT_DMA_S2MM_0_MEMORY_BACKPRESSURE_MEM, - XAIE_EVENT_DMA_S2MM_1_START_BD_MEM, XAIE_EVENT_DMA_S2MM_1_FINISHED_BD_MEM, - XAIE_EVENT_DMA_S2MM_1_STALLED_LOCK_MEM, XAIE_EVENT_DMA_S2MM_1_MEMORY_BACKPRESSURE_MEM}; + {XAIE_EVENT_DMA_S2MM_0_START_BD_CORE, XAIE_EVENT_DMA_S2MM_0_FINISHED_BD_CORE, + XAIE_EVENT_DMA_S2MM_0_STALLED_LOCK_CORE, XAIE_EVENT_DMA_S2MM_0_MEMORY_BACKPRESSURE_CORE, + XAIE_EVENT_DMA_S2MM_1_START_BD_CORE, XAIE_EVENT_DMA_S2MM_1_FINISHED_BD_CORE, + XAIE_EVENT_DMA_S2MM_1_STALLED_LOCK_CORE, XAIE_EVENT_DMA_S2MM_1_MEMORY_BACKPRESSURE_CORE}; eventSets["mm2s_channels_stalls"] = - {XAIE_EVENT_DMA_MM2S_0_START_BD_MEM, XAIE_EVENT_DMA_MM2S_0_FINISHED_BD_MEM, - XAIE_EVENT_DMA_MM2S_0_STREAM_BACKPRESSURE_MEM, XAIE_EVENT_DMA_MM2S_0_MEMORY_STARVATION_MEM}; + {XAIE_EVENT_DMA_MM2S_0_START_BD_CORE, XAIE_EVENT_DMA_MM2S_0_FINISHED_BD_CORE, + XAIE_EVENT_DMA_MM2S_0_STREAM_BACKPRESSURE_CORE, XAIE_EVENT_DMA_MM2S_0_MEMORY_STARVATION_CORE}; +#endif } eventSets["mm2s_channels"] = eventSets["s2mm_channels"]; @@ -269,10 +271,10 @@ namespace xdp::aie::trace { else if (aie::isNPU3(hwGen)) { #ifdef XDP_NPU3_BUILD eventSets["input_ports_details"] = { - XAIE_EVENT_DMA_MM2S_SEL0_START_BD_PL, XAIE_EVENT_DMA_MM2S_SEL0_FINISHED_BD_PL, - XAIE_EVENT_DMA_MM2S_SEL0_STREAM_BACKPRESSURE_PL, XAIE_EVENT_DMA_MM2S_SEL0_MEMORY_STARVATION_PL, - XAIE_EVENT_DMA_MM2S_SEL1_START_BD_PL, XAIE_EVENT_DMA_MM2S_SEL1_FINISHED_BD_PL, - XAIE_EVENT_DMA_MM2S_SEL1_STREAM_BACKPRESSURE_PL, XAIE_EVENT_DMA_MM2S_SEL1_MEMORY_STARVATION_PL}; + XAIE_EVENT_DMA_MM2S_0_START_BD_PL, XAIE_EVENT_DMA_MM2S_0_FINISHED_BD_PL, + XAIE_EVENT_DMA_MM2S_0_STREAM_BACKPRESSURE_PL, XAIE_EVENT_DMA_MM2S_0_MEMORY_STARVATION_PL, + XAIE_EVENT_DMA_MM2S_1_START_BD_PL, XAIE_EVENT_DMA_MM2S_1_FINISHED_BD_PL, + XAIE_EVENT_DMA_MM2S_1_STREAM_BACKPRESSURE_PL, XAIE_EVENT_DMA_MM2S_1_MEMORY_STARVATION_PL}; eventSets["output_ports_details"] = { XAIE_EVENT_DMA_S2MM_0_START_BD_PL, XAIE_EVENT_DMA_S2MM_0_FINISHED_BD_PL, XAIE_EVENT_DMA_S2MM_0_STALLED_LOCK_PL, XAIE_EVENT_DMA_S2MM_0_MEMORY_BACKPRESSURE_PL, diff --git a/src/runtime_src/xdp/profile/writer/aie_debug/aie_debug_writer_metadata.h b/src/runtime_src/xdp/profile/writer/aie_debug/aie_debug_writer_metadata.h index 0d4e547d504..093b45a44c9 100755 --- a/src/runtime_src/xdp/profile/writer/aie_debug/aie_debug_writer_metadata.h +++ b/src/runtime_src/xdp/profile/writer/aie_debug/aie_debug_writer_metadata.h @@ -101,6 +101,20 @@ class AIE2PSWriterUsedRegisters : public WriterUsedRegisters { }; +/************************************************************************************* + NPU3 Registers + *************************************************************************************/ +class NPU3WriterUsedRegisters : public WriterUsedRegisters { +public: + NPU3WriterUsedRegisters() { + populateRegDataMap(); + } + ~NPU3WriterUsedRegisters() = default; + + void populateRegDataMap(); + +}; + } // end namesapce xdp #endif diff --git a/src/runtime_src/xdp/profile/writer/aie_debug/aie_debug_writer_npu3_metadata.cpp b/src/runtime_src/xdp/profile/writer/aie_debug/aie_debug_writer_npu3_metadata.cpp new file mode 100755 index 00000000000..1de7de13c0f --- /dev/null +++ b/src/runtime_src/xdp/profile/writer/aie_debug/aie_debug_writer_npu3_metadata.cpp @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved + +#include "xdp/profile/writer/aie_debug/aie_debug_writer_metadata.h" + +namespace xdp { + +/************************************************************************************* + * NPU3 Registers + *************************************************************************************/ +void NPU3WriterUsedRegisters::populateRegDataMap() { + // TODO: Populate register map with NPU3 registers +} + +} // end namespace xdp diff --git a/src/runtime_src/xdp/profile/writer/aie_trace/aie_trace_config_writer.cpp b/src/runtime_src/xdp/profile/writer/aie_trace/aie_trace_config_writer.cpp index 5a9a30bccc3..0ea7f5f54b9 100644 --- a/src/runtime_src/xdp/profile/writer/aie_trace/aie_trace_config_writer.cpp +++ b/src/runtime_src/xdp/profile/writer/aie_trace/aie_trace_config_writer.cpp @@ -31,11 +31,17 @@ namespace xdp { } AieTraceConfigWriter::~AieTraceConfigWriter() - { + { + if (!isWritten) + write(false); } bool AieTraceConfigWriter::write(bool) { + if (isWritten) + return true; + isWritten = true; + bpt::ptree pt; bpt::ptree EventTraceConfigs_C, EventTraceConfigs; @@ -157,7 +163,7 @@ namespace xdp { bpt::ptree s2mm_names; bpt::ptree mm2s_names; - for (uint32_t i=0; i < NUM_CHANNEL_SELECTS; ++i) { + for (uint32_t i=0; i < NUM_CHANNEL_SELECTS_MAX; ++i) { bpt::ptree chan1; bpt::ptree chan2; bpt::ptree chan3; @@ -345,7 +351,7 @@ namespace xdp { bpt::ptree s2mm_channels; bpt::ptree mm2s_channels; - for (uint32_t i=0; i < NUM_CHANNEL_SELECTS; ++i) { + for (uint32_t i=0; i < NUM_CHANNEL_SELECTS_MAX; ++i) { bpt::ptree chan1; bpt::ptree chan2; chan1.put("", tile_trace_config.s2mm_channels[i]); diff --git a/src/runtime_src/xdp/profile/writer/aie_trace/aie_trace_config_writer.h b/src/runtime_src/xdp/profile/writer/aie_trace/aie_trace_config_writer.h index 860b1b7098d..1138d65c5bd 100644 --- a/src/runtime_src/xdp/profile/writer/aie_trace/aie_trace_config_writer.h +++ b/src/runtime_src/xdp/profile/writer/aie_trace/aie_trace_config_writer.h @@ -33,6 +33,7 @@ namespace xdp { class AieTraceConfigWriter : public VPWriter { private: + bool isWritten = false; uint64_t deviceIndex; public: AieTraceConfigWriter(const char* filename, uint64_t index);