From a8008adfc98b270630d3c377b2108e35acec0d23 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Wed, 23 Apr 2025 22:21:07 +0000 Subject: [PATCH 1/3] Event timing v2 --- utils/gen_babeltrace_model_helper.rb | 2 +- ze/tracer_ze_helpers.include.c | 50 ++++++++++++++++++++++++++-- ze/ze_events.yaml | 13 ++++++++ ze/ze_model.rb | 2 +- 4 files changed, 62 insertions(+), 5 deletions(-) diff --git a/utils/gen_babeltrace_model_helper.rb b/utils/gen_babeltrace_model_helper.rb index 1eb81c4d2..854768108 100644 --- a/utils/gen_babeltrace_model_helper.rb +++ b/utils/gen_babeltrace_model_helper.rb @@ -2,7 +2,7 @@ def get_extra_fields_types_name(event) event["fields"].collect { |field| lttng = LTTng::TracepointField.new(*field) name = lttng.name.to_s - type = event["args"].find { |t, n| n == name || n == name.gsub(/_val\z/, "") }[0] + type = event["args"].find { |t, n| n == name || n == name.gsub(/_vals?\z/, "") }[0] case lttng.macro.to_s when /ctf_sequence/ [["ctf_integer", "size_t", "_#{name}_length", nil], diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index be865b5af..5fb723448 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -375,7 +375,7 @@ static struct _ze_event_h * _get_profiling_event( } e_w->command_list = command_list; - ze_event_pool_desc_t desc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, NULL, ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP | ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1}; + ze_event_pool_desc_t desc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, NULL, ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP | ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP, 1}; ze_result_t res = ZE_EVENT_POOL_CREATE_PTR(context, &desc, 0, NULL, &e_w->event_pool); if (res != ZE_RESULT_SUCCESS) { THAPI_DBGLOG("zeEventPoolCreate failed with %d, for command list: %p, context: %p", res, command_list, context); @@ -399,6 +399,7 @@ static struct _ze_event_h * _get_profiling_event( } static void _profile_event_results(ze_event_handle_t event); +static void _profile_event_results_v2(ze_event_handle_t event, ze_command_list_handle_t); static inline void _on_created_event(ze_event_handle_t event) { #ifdef THAPI_DEBUG @@ -441,8 +442,10 @@ static inline void _on_destroy_event(ze_event_handle_t event) { return; } - if (!(ze_event->flags & _ZE_PROFILED)) + if (!(ze_event->flags & _ZE_PROFILED)) { _profile_event_results(event); + _profile_event_results_v2(event, ze_event->command_list); + } PUT_ZE_EVENT_WRAPPER(ze_event); } @@ -472,8 +475,10 @@ static inline void _on_reset_event(ze_event_handle_t event) { return; } - if (!(ze_event->flags & _ZE_PROFILED)) + if (!(ze_event->flags & _ZE_PROFILED)) { _profile_event_results(event); + _profile_event_results_v2(event, ze_event->command_list); + } if (!(ze_event->flags & _ZE_IMMEDIATE_CMD)) ADD_ZE_EVENT(ze_event); @@ -505,6 +510,9 @@ static void _profile_event_results(ze_event_handle_t event) { if (tracepoint_enabled(lttng_ust_ze_profiling, event_profiling_results)) { status = ZE_EVENT_QUERY_STATUS_PTR(event); timestamp_status = ZE_EVENT_QUERY_KERNEL_TIMESTAMP_PTR(event, &res); + printf("zeEventQueryKernelTimestamp | Start %ld | End %ld \n", + res.global.kernelStart, + res.global.kernelEnd); do_tracepoint(lttng_ust_ze_profiling, event_profiling_results, event, status, timestamp_status, res.global.kernelStart, @@ -513,6 +521,42 @@ static void _profile_event_results(ze_event_handle_t event) { res.context.kernelEnd); } } +static void _profile_event_results_v2(ze_event_handle_t event, ze_command_list_handle_t command_list) { + + ze_result_t status = ZE_EVENT_QUERY_STATUS_PTR(event); + + /* Find Device */ + ze_device_handle_t device; + ZE_COMMAND_LIST_GET_DEVICE_HANDLE_PTR(command_list, &device); + + // Qurery Timestamp. We should avoid malloc + uint32_t pCount = 0; + ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PTR(event, device, &pCount, NULL); + + ze_kernel_timestamp_result_t *kernelTimestamps = + malloc(pCount * sizeof(ze_kernel_timestamp_result_t)); + ze_synchronized_timestamp_result_ext_t *synchronizedTimestamps = + malloc(pCount * sizeof(ze_synchronized_timestamp_result_ext_t)); + ze_event_query_kernel_timestamps_results_ext_properties_t resultsProps; + + resultsProps.stype = ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_RESULTS_EXT_PROPERTIES; + resultsProps.pNext = NULL; + resultsProps.pKernelTimestampsBuffer = kernelTimestamps; + resultsProps.pSynchronizedTimestampsBuffer = synchronizedTimestamps; + // Query the event timestamps + ze_result_t timestamps_status = ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PTR(event, device, &pCount, &resultsProps); + // Will do lttng static array. of size pCount and type synchronizedTimestamps + for (uint32_t i = 0; i < pCount; i++) { + printf("zeEventQueryKernelTimestampsExtSynchronizedTimestamps | Start %ld | End %ld \n", + synchronizedTimestamps[i].global.kernelStart, + synchronizedTimestamps[i].global.kernelEnd); + printf("zeEventQueryKernelTimestampsExtGlobalKernelKernelTimestamps | Start %ld | End %ld \n", + kernelTimestamps[i].global.kernelStart, + kernelTimestamps[i].global.kernelEnd); + } + do_tracepoint(lttng_ust_ze_profiling, event_profiling_results_v2, + event, status, timestamps_status, pCount, synchronizedTimestamps); +} static void _event_cleanup() { struct _ze_event_h *ze_event = NULL; diff --git a/ze/ze_events.yaml b/ze/ze_events.yaml index 6ed18d953..95c8da2dc 100644 --- a/ze/ze_events.yaml +++ b/ze/ze_events.yaml @@ -132,6 +132,19 @@ lttng_ust_ze_profiling: - [ ctf_integer, uint64_t, globalEnd, "globalEnd" ] - [ ctf_integer, uint64_t, contextStart, "contextStart" ] - [ ctf_integer, uint64_t, contextEnd, "contextEnd" ] + - name: event_profiling_results_v2 + args: + - [ ze_event_handle_t, hEvent ] + - [ ze_result_t, status ] + - [ ze_result_t, timestampsStatus ] + - [ uint32_t, count ] + - [ ze_synchronized_timestamp_result_ext_t *, pSynchronizedTimestamps ] + fields: + - [ ctf_integer_hex, uintptr_t, hEvent, "(uintptr_t)hEvent" ] + - [ ctf_integer, ze_result_t, status, "status" ] + - [ ctf_integer, ze_result_t, timestampsStatus, "timestampsStatus" ] + - [ ctf_integer, uint32_t, count, "count" ] + - [ ctf_sequence_text, uint8_t, pSynchronizedTimestamps_vals, pSynchronizedTimestamps, size_t, "sizeof(ze_synchronized_timestamp_result_ext_t)*count" ] lttng_ust_ze_properties: events: - name: driver diff --git a/ze/ze_model.rb b/ze/ze_model.rb index d1d88935a..95b100230 100644 --- a/ze/ze_model.rb +++ b/ze/ze_model.rb @@ -182,7 +182,7 @@ def upper_snake_case(str) ze_event_pool_desc_t _new_desc; if (_do_profile && desc && !(desc->flags & ZE_EVENT_POOL_FLAG_IPC)) { _new_desc = *desc; - _new_desc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + _new_desc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP; _new_desc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE; desc = &_new_desc; } From a5ad953ae9914c5b80e19f7536e34a436115dade Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Wed, 7 May 2025 21:40:30 +0000 Subject: [PATCH 2/3] cleaner? --- ze/btx_zeinterval_callbacks.cpp | 116 ++++++++++++++++++++++++++------ ze/btx_zeinterval_callbacks.hpp | 2 + ze/tracer_ze_helpers.include.c | 75 ++++++++++----------- 3 files changed, 132 insertions(+), 61 deletions(-) diff --git a/ze/btx_zeinterval_callbacks.cpp b/ze/btx_zeinterval_callbacks.cpp index 6fb3e4617..8a4771c6a 100644 --- a/ze/btx_zeinterval_callbacks.cpp +++ b/ze/btx_zeinterval_callbacks.cpp @@ -605,6 +605,8 @@ static void property_device_timer_callback(void *btx_handle, void *usr_data, int auto *data = static_cast(usr_data); data->device_timestamps_pair_ref[{hostname, vpid, (thapi_device_id)hDevice}] = {ts, deviceTimestamp}; + + data->sync_lttng_monotonic_ref[{hostname, vpid}] = ts - hostTimestamp; } /* @@ -716,6 +718,7 @@ static void event_profiling_result_callback(void *btx_handle, void *usr_data, in const bool err = ((status != ZE_RESULT_SUCCESS) || (timestampStatus != ZE_RESULT_SUCCESS)); // No device information. No conversion to ns, no looping uint64_t delta = globalEnd - globalStart; + uint64_t start = lltngMin; uintptr_t device_hash = 0; const auto it0 = data->device_property.find({hostname, vpid, (thapi_device_id)device}); @@ -740,6 +743,72 @@ static void event_profiling_result_callback(void *btx_handle, void *usr_data, in metadata.c_str()); } +static void event_profiling_results_v2_callback( + void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid, + ze_event_handle_t hEvent, ze_result_t status, ze_result_t timestampsStatus, uint32_t count, + size_t _pSynchronizedTimestamps_vals_length, + ze_synchronized_timestamp_result_ext_t *pSynchronizedTimestamps_vals) { + + if (status == ZE_RESULT_NOT_READY) + return; + + auto *data = static_cast(usr_data); + + // TODO: Should we always find the eventToBtxDesct? + // We didn't find the partial payload, that mean we should ignore it + const auto it_p = data->eventToBtxDesct.find({hostname, vpid, hEvent}); + if (it_p == data->eventToBtxDesct.cend()) + return; + // We don't erase, may have one entry for multiple result + const auto &[vtid_submission, commandQueueDesc, hCommandList, hCommandListIsImmediate, device, + commandName, _1, _2, type, ptr] = it_p->second; + std::string metadata = ""; + { + std::stringstream ss_metadata; + if ((type == btx_event_t::KERNEL) && (status == ZE_RESULT_SUCCESS)) + ss_metadata << std::get(ptr) << ", "; + // Create additional Medatata of the Command Queue + ss_metadata << "{ordinal: " << commandQueueDesc.ordinal << ", " + << "index: " << commandQueueDesc.index << "}"; + metadata = ss_metadata.str(); + } + if (!hCommandListIsImmediate) + data->commandListToEvents[{hostname, vpid, hCommandList}].erase(hEvent); + + if ((type == btx_event_t::TRAFFIC) && (status == ZE_RESULT_SUCCESS)) { + auto &[ts, size] = std::get(ptr); + btx_push_message_lttng_traffic(btx_handle, hostname, vpid, vtid, ts, BACKEND_ZE, + commandName.c_str(), size, metadata.c_str()); + } + const bool err = ((status != ZE_RESULT_SUCCESS) || (timestampsStatus != ZE_RESULT_SUCCESS)); + + uint64_t min_start = std::numeric_limits::max(); + uint64_t max_end = std::numeric_limits::min(); + + auto shift = data->sync_lttng_monotonic_ref[{hostname, vpid}]; + for (uint32_t i = 0; i < count; i++) { + min_start = std::min(min_start, pSynchronizedTimestamps_vals[i].global.kernelStart); + max_end = std::max(max_end, pSynchronizedTimestamps_vals[i].global.kernelEnd); + } + + uintptr_t device_hash = 0; + const auto it0 = data->device_property.find({hostname, vpid, (thapi_device_id)device}); + if (it0 != data->device_property.cend()) { + device_hash = hash_device(it0->second); + } + uintptr_t subdevice_hash = 0; + const auto it1 = data->subdevice_parent.find({hostname, vpid, (thapi_device_id)device}); + if (it1 != data->subdevice_parent.cend()) { + subdevice_hash = device_hash; + const auto it2 = data->device_property.find({hostname, vpid, it1->second}); + if (it2 != data->device_property.cend()) + subdevice_hash = hash_device(it2->second); + } + btx_push_message_lttng_device(btx_handle, hostname, vpid, vtid_submission, min_start + shift, + BACKEND_ZE, (commandName + "_new").c_str(), max_end - min_start, + device_hash, subdevice_hash, err, metadata.c_str()); +} + static void zeEventDestroy_entry_callback(void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid, ze_event_handle_t hEvent) { @@ -1081,21 +1150,21 @@ void btx_register_usr_callbacks(void *btx_handle) { /* Device and Subdevice property */ btx_register_callbacks_lttng_ust_ze_properties_device(btx_handle, &property_device_callback); btx_register_callbacks_lttng_ust_ze_properties_subdevice(btx_handle, - &property_subdevice_callback); + &property_subdevice_callback); /* Map command list to device and to command queue dist*/ btx_register_callbacks_lttng_ust_ze_zeCommandListCreateImmediate_entry( btx_handle, zeCommandListCreateImmediate_entry_callback); btx_register_callbacks_lttng_ust_ze_zeCommandListCreateImmediate_exit( btx_handle, zeCommandListCreateImmediate_exit_callback); - btx_register_callbacks_lttng_ust_ze_zeCommandListCreate_entry( - btx_handle, zeCommandListCreate_entry_callback); - btx_register_callbacks_lttng_ust_ze_zeCommandListCreate_exit( - btx_handle, zeCommandListCreate_exit_callback); + btx_register_callbacks_lttng_ust_ze_zeCommandListCreate_entry(btx_handle, + zeCommandListCreate_entry_callback); + btx_register_callbacks_lttng_ust_ze_zeCommandListCreate_exit(btx_handle, + zeCommandListCreate_exit_callback); btx_register_callbacks_lttng_ust_ze_zeCommandQueueCreate_entry( btx_handle, zeCommandQueueCreate_entry_callback); - btx_register_callbacks_lttng_ust_ze_zeCommandQueueCreate_exit( - btx_handle, zeCommandQueueCreate_exit_callback); + btx_register_callbacks_lttng_ust_ze_zeCommandQueueCreate_exit(btx_handle, + zeCommandQueueCreate_exit_callback); btx_register_callbacks_lttng_ust_ze_zeCommandQueueExecuteCommandLists_entry( btx_handle, zeCommandQueueExecuteCommandLists_entry_callback); @@ -1111,7 +1180,7 @@ void btx_register_usr_callbacks(void *btx_handle) { /* Drift */ btx_register_callbacks_lttng_ust_ze_properties_device_timer(btx_handle, - &property_device_timer_callback); + &property_device_timer_callback); /* Profiling Command (everything who signal an event on completion) */ @@ -1131,22 +1200,25 @@ void btx_register_usr_callbacks(void *btx_handle) { btx_register_callbacks_lttng_ust_ze_zeModuleGetGlobalPointer_exit( btx_handle, &zeModuleGetGlobalPointer_exit_callback); - btx_register_callbacks_lttng_ust_ze_zeModuleDestroy_exit( - btx_handle, &zeModuleDestroy_exit_callback); + btx_register_callbacks_lttng_ust_ze_zeModuleDestroy_exit(btx_handle, + &zeModuleDestroy_exit_callback); /* Handling of event */ - btx_register_callbacks_lttng_ust_ze_profiling_event_profiling( - btx_handle, &event_profiling_callback); + btx_register_callbacks_lttng_ust_ze_profiling_event_profiling(btx_handle, + &event_profiling_callback); btx_register_callbacks_lttng_ust_ze_profiling_event_profiling_results( btx_handle, &event_profiling_result_callback); - btx_register_callbacks_lttng_ust_ze_zeEventDestroy_entry( - btx_handle, &zeEventDestroy_entry_callback); - btx_register_callbacks_lttng_ust_ze_zeEventDestroy_exit( - btx_handle, &zeEventDestroy_exit_callback); - btx_register_callbacks_lttng_ust_ze_zeCommandListReset_entry( - btx_handle, &zeCommandListReset_entry_callback); - btx_register_callbacks_lttng_ust_ze_zeCommandListReset_exit( - btx_handle, &zeCommandListReset_exit_callback); + btx_register_callbacks_lttng_ust_ze_profiling_event_profiling_results_v2( + btx_handle, &event_profiling_results_v2_callback); + + btx_register_callbacks_lttng_ust_ze_zeEventDestroy_entry(btx_handle, + &zeEventDestroy_entry_callback); + btx_register_callbacks_lttng_ust_ze_zeEventDestroy_exit(btx_handle, + &zeEventDestroy_exit_callback); + btx_register_callbacks_lttng_ust_ze_zeCommandListReset_entry(btx_handle, + &zeCommandListReset_entry_callback); + btx_register_callbacks_lttng_ust_ze_zeCommandListReset_exit(btx_handle, + &zeCommandListReset_exit_callback); /* Sampling */ @@ -1165,8 +1237,8 @@ void btx_register_usr_callbacks(void *btx_handle) { btx_handle, <tng_ust_ze_sampling_memoryProperties_callback); // Telemetries - btx_register_callbacks_lttng_ust_ze_sampling_memStats( - btx_handle, <tng_ust_ze_sampling_memStats_callback); + btx_register_callbacks_lttng_ust_ze_sampling_memStats(btx_handle, + <tng_ust_ze_sampling_memStats_callback); btx_register_callbacks_lttng_ust_ze_sampling_fabricPort( btx_handle, <tng_ust_ze_sampling_fabricPort_callback); btx_register_callbacks_lttng_ust_ze_sampling_gpu_energy( diff --git a/ze/btx_zeinterval_callbacks.hpp b/ze/btx_zeinterval_callbacks.hpp index 6d1afb2a0..5dd1ed0f6 100644 --- a/ze/btx_zeinterval_callbacks.hpp +++ b/ze/btx_zeinterval_callbacks.hpp @@ -99,6 +99,8 @@ struct data_s { std::unordered_map> imm_tmp; std::unordered_map device_timestamps_pair_ref; + std::unordered_map sync_lttng_monotonic_ref; + /* Sampling */ std::unordered_map sampling_device_property; std::unordered_map fabricPort_property; diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index 5fb723448..661b2058e 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -458,8 +458,10 @@ static inline void _unregister_ze_event(ze_event_handle_t event, int get_results return; } - if (get_results && !(ze_event->flags & _ZE_PROFILED)) + if (get_results && !(ze_event->flags & _ZE_PROFILED)) { _profile_event_results(event); + _profile_event_results_v2(event, ze_event->command_list); + } if (ze_event->event_pool) PUT_ZE_EVENT(ze_event); else @@ -496,6 +498,8 @@ static inline void _dump_and_reset_our_event(ze_event_handle_t event) { } _profile_event_results(event); + _profile_event_results_v2(event, ze_event->command_list); + ZE_EVENT_HOST_RESET_PTR(event); ze_event->flags &= ~_ZE_PROFILED; @@ -510,9 +514,6 @@ static void _profile_event_results(ze_event_handle_t event) { if (tracepoint_enabled(lttng_ust_ze_profiling, event_profiling_results)) { status = ZE_EVENT_QUERY_STATUS_PTR(event); timestamp_status = ZE_EVENT_QUERY_KERNEL_TIMESTAMP_PTR(event, &res); - printf("zeEventQueryKernelTimestamp | Start %ld | End %ld \n", - res.global.kernelStart, - res.global.kernelEnd); do_tracepoint(lttng_ust_ze_profiling, event_profiling_results, event, status, timestamp_status, res.global.kernelStart, @@ -523,39 +524,30 @@ static void _profile_event_results(ze_event_handle_t event) { } static void _profile_event_results_v2(ze_event_handle_t event, ze_command_list_handle_t command_list) { - ze_result_t status = ZE_EVENT_QUERY_STATUS_PTR(event); - - /* Find Device */ - ze_device_handle_t device; - ZE_COMMAND_LIST_GET_DEVICE_HANDLE_PTR(command_list, &device); - - // Qurery Timestamp. We should avoid malloc - uint32_t pCount = 0; - ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PTR(event, device, &pCount, NULL); - - ze_kernel_timestamp_result_t *kernelTimestamps = - malloc(pCount * sizeof(ze_kernel_timestamp_result_t)); - ze_synchronized_timestamp_result_ext_t *synchronizedTimestamps = - malloc(pCount * sizeof(ze_synchronized_timestamp_result_ext_t)); - ze_event_query_kernel_timestamps_results_ext_properties_t resultsProps; - - resultsProps.stype = ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_RESULTS_EXT_PROPERTIES; - resultsProps.pNext = NULL; - resultsProps.pKernelTimestampsBuffer = kernelTimestamps; - resultsProps.pSynchronizedTimestampsBuffer = synchronizedTimestamps; - // Query the event timestamps - ze_result_t timestamps_status = ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PTR(event, device, &pCount, &resultsProps); - // Will do lttng static array. of size pCount and type synchronizedTimestamps - for (uint32_t i = 0; i < pCount; i++) { - printf("zeEventQueryKernelTimestampsExtSynchronizedTimestamps | Start %ld | End %ld \n", - synchronizedTimestamps[i].global.kernelStart, - synchronizedTimestamps[i].global.kernelEnd); - printf("zeEventQueryKernelTimestampsExtGlobalKernelKernelTimestamps | Start %ld | End %ld \n", - kernelTimestamps[i].global.kernelStart, - kernelTimestamps[i].global.kernelEnd); - } - do_tracepoint(lttng_ust_ze_profiling, event_profiling_results_v2, - event, status, timestamps_status, pCount, synchronizedTimestamps); + if (!tracepoint_enabled(lttng_ust_ze_profiling, event_profiling_results)) { + return; + } + ze_result_t status = ZE_EVENT_QUERY_STATUS_PTR(event); + /* Find Device */ + ze_device_handle_t device; + ZE_COMMAND_LIST_GET_DEVICE_HANDLE_PTR(command_list, &device); + // Query Timestamp + uint32_t pCount = 0; + ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PTR(event, device, &pCount, NULL); + assert(pCount <= 2); + + ze_kernel_timestamp_result_t kernelTimestamps[2]; + ze_synchronized_timestamp_result_ext_t synchronizedTimestamps[2]; + + ze_event_query_kernel_timestamps_results_ext_properties_t resultsProps = { + .stype = ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_RESULTS_EXT_PROPERTIES, + .pNext = NULL, + .pKernelTimestampsBuffer = kernelTimestamps, + .pSynchronizedTimestampsBuffer = synchronizedTimestamps } ; + ze_result_t timestamps_status = ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PTR(event, device, &pCount, &resultsProps); + // Send + do_tracepoint(lttng_ust_ze_profiling, event_profiling_results_v2, + event, status, timestamps_status, pCount, synchronizedTimestamps); } static void _event_cleanup() { @@ -564,8 +556,11 @@ static void _event_cleanup() { HASH_ITER(hh, _ze_events, ze_event, tmp) { HASH_DEL(_ze_events, ze_event); - if (ze_event->event && !(ze_event->flags & _ZE_PROFILED)) + if (ze_event->event && !(ze_event->flags & _ZE_PROFILED)) { _profile_event_results(ze_event->event); + _profile_event_results_v2(ze_event->event, ze_event->command_list); + } + if (ze_event->event_pool) { if (ze_event->event) ZE_EVENT_DESTROY_PTR(ze_event->event); @@ -582,8 +577,10 @@ static void _on_destroy_context(ze_context_handle_t context){ HASH_ITER(hh, _ze_events, ze_event, tmp) { if (ze_event->context == context) { HASH_DEL(_ze_events, ze_event); - if (ze_event->event && !(ze_event->flags & _ZE_PROFILED)) + if (ze_event->event && !(ze_event->flags & _ZE_PROFILED)) { _profile_event_results(ze_event->event); + _profile_event_results_v2(ze_event->event, ze_event->command_list); + } if (ze_event->event_pool) { if (ze_event->event) ZE_EVENT_DESTROY_PTR(ze_event->event); From 720135127c30f1ddb86ab1e95191b7d94f1294e6 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Tue, 20 May 2025 18:54:32 +0000 Subject: [PATCH 3/3] early exist --- ze/btx_zeinterval_callbacks.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ze/btx_zeinterval_callbacks.cpp b/ze/btx_zeinterval_callbacks.cpp index 8a4771c6a..0fe3b1e8c 100644 --- a/ze/btx_zeinterval_callbacks.cpp +++ b/ze/btx_zeinterval_callbacks.cpp @@ -779,6 +779,9 @@ static void event_profiling_results_v2_callback( auto &[ts, size] = std::get(ptr); btx_push_message_lttng_traffic(btx_handle, hostname, vpid, vtid, ts, BACKEND_ZE, commandName.c_str(), size, metadata.c_str()); + + // Early exist to warkound the bug where timer are broken for memcopy + return; } const bool err = ((status != ZE_RESULT_SUCCESS) || (timestampsStatus != ZE_RESULT_SUCCESS));