Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion utils/gen_babeltrace_model_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ def get_extra_fields_types_name(event)
event["fields"].collect { |field|
lttng = LTTng::TracepointField.new(*field)
name = lttng.name.to_s
type = event["args"].find { |t, n| n == name || n == name.gsub(/_val\z/, "") }[0]
type = event["args"].find { |t, n| n == name || n == name.gsub(/_vals?\z/, "") }[0]
case lttng.macro.to_s
when /ctf_sequence/
[["ctf_integer", "size_t", "_#{name}_length", nil],
Expand Down
119 changes: 97 additions & 22 deletions ze/btx_zeinterval_callbacks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,8 @@ static void property_device_timer_callback(void *btx_handle, void *usr_data, int
auto *data = static_cast<data_t *>(usr_data);
data->device_timestamps_pair_ref[{hostname, vpid, (thapi_device_id)hDevice}] = {ts,
deviceTimestamp};

data->sync_lttng_monotonic_ref[{hostname, vpid}] = ts - hostTimestamp;
}

/*
Expand Down Expand Up @@ -716,6 +718,7 @@ static void event_profiling_result_callback(void *btx_handle, void *usr_data, in
const bool err = ((status != ZE_RESULT_SUCCESS) || (timestampStatus != ZE_RESULT_SUCCESS));
// No device information. No conversion to ns, no looping
uint64_t delta = globalEnd - globalStart;

uint64_t start = lltngMin;
uintptr_t device_hash = 0;
const auto it0 = data->device_property.find({hostname, vpid, (thapi_device_id)device});
Expand All @@ -740,6 +743,75 @@ static void event_profiling_result_callback(void *btx_handle, void *usr_data, in
metadata.c_str());
}

static void event_profiling_results_v2_callback(
void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid,
ze_event_handle_t hEvent, ze_result_t status, ze_result_t timestampsStatus, uint32_t count,
size_t _pSynchronizedTimestamps_vals_length,
ze_synchronized_timestamp_result_ext_t *pSynchronizedTimestamps_vals) {

if (status == ZE_RESULT_NOT_READY)
return;

auto *data = static_cast<data_t *>(usr_data);

// TODO: Should we always find the eventToBtxDesct?
// We didn't find the partial payload, that mean we should ignore it
const auto it_p = data->eventToBtxDesct.find({hostname, vpid, hEvent});
if (it_p == data->eventToBtxDesct.cend())
return;
// We don't erase, may have one entry for multiple result
const auto &[vtid_submission, commandQueueDesc, hCommandList, hCommandListIsImmediate, device,
commandName, _1, _2, type, ptr] = it_p->second;
std::string metadata = "";
{
std::stringstream ss_metadata;
if ((type == btx_event_t::KERNEL) && (status == ZE_RESULT_SUCCESS))
ss_metadata << std::get<btx_additional_info_kernel_t>(ptr) << ", ";
// Create additional Medatata of the Command Queue
ss_metadata << "{ordinal: " << commandQueueDesc.ordinal << ", "
<< "index: " << commandQueueDesc.index << "}";
metadata = ss_metadata.str();
}
if (!hCommandListIsImmediate)
data->commandListToEvents[{hostname, vpid, hCommandList}].erase(hEvent);

if ((type == btx_event_t::TRAFFIC) && (status == ZE_RESULT_SUCCESS)) {
auto &[ts, size] = std::get<btx_additional_info_traffic_t>(ptr);
btx_push_message_lttng_traffic(btx_handle, hostname, vpid, vtid, ts, BACKEND_ZE,
commandName.c_str(), size, metadata.c_str());

// Early exist to warkound the bug where timer are broken for memcopy
return;
}
const bool err = ((status != ZE_RESULT_SUCCESS) || (timestampsStatus != ZE_RESULT_SUCCESS));

uint64_t min_start = std::numeric_limits<uint64_t>::max();
uint64_t max_end = std::numeric_limits<uint64_t>::min();

auto shift = data->sync_lttng_monotonic_ref[{hostname, vpid}];
for (uint32_t i = 0; i < count; i++) {
min_start = std::min(min_start, pSynchronizedTimestamps_vals[i].global.kernelStart);
max_end = std::max(max_end, pSynchronizedTimestamps_vals[i].global.kernelEnd);
}

uintptr_t device_hash = 0;
const auto it0 = data->device_property.find({hostname, vpid, (thapi_device_id)device});
if (it0 != data->device_property.cend()) {
device_hash = hash_device(it0->second);
}
uintptr_t subdevice_hash = 0;
const auto it1 = data->subdevice_parent.find({hostname, vpid, (thapi_device_id)device});
if (it1 != data->subdevice_parent.cend()) {
subdevice_hash = device_hash;
const auto it2 = data->device_property.find({hostname, vpid, it1->second});
if (it2 != data->device_property.cend())
subdevice_hash = hash_device(it2->second);
}
btx_push_message_lttng_device(btx_handle, hostname, vpid, vtid_submission, min_start + shift,
BACKEND_ZE, (commandName + "_new").c_str(), max_end - min_start,
device_hash, subdevice_hash, err, metadata.c_str());
}

static void zeEventDestroy_entry_callback(void *btx_handle, void *usr_data, int64_t ts,
const char *hostname, int64_t vpid, uint64_t vtid,
ze_event_handle_t hEvent) {
Expand Down Expand Up @@ -1081,21 +1153,21 @@ void btx_register_usr_callbacks(void *btx_handle) {
/* Device and Subdevice property */
btx_register_callbacks_lttng_ust_ze_properties_device(btx_handle, &property_device_callback);
btx_register_callbacks_lttng_ust_ze_properties_subdevice(btx_handle,
&property_subdevice_callback);
&property_subdevice_callback);

/* Map command list to device and to command queue dist*/
btx_register_callbacks_lttng_ust_ze_zeCommandListCreateImmediate_entry(
btx_handle, zeCommandListCreateImmediate_entry_callback);
btx_register_callbacks_lttng_ust_ze_zeCommandListCreateImmediate_exit(
btx_handle, zeCommandListCreateImmediate_exit_callback);
btx_register_callbacks_lttng_ust_ze_zeCommandListCreate_entry(
btx_handle, zeCommandListCreate_entry_callback);
btx_register_callbacks_lttng_ust_ze_zeCommandListCreate_exit(
btx_handle, zeCommandListCreate_exit_callback);
btx_register_callbacks_lttng_ust_ze_zeCommandListCreate_entry(btx_handle,
zeCommandListCreate_entry_callback);
btx_register_callbacks_lttng_ust_ze_zeCommandListCreate_exit(btx_handle,
zeCommandListCreate_exit_callback);
btx_register_callbacks_lttng_ust_ze_zeCommandQueueCreate_entry(
btx_handle, zeCommandQueueCreate_entry_callback);
btx_register_callbacks_lttng_ust_ze_zeCommandQueueCreate_exit(
btx_handle, zeCommandQueueCreate_exit_callback);
btx_register_callbacks_lttng_ust_ze_zeCommandQueueCreate_exit(btx_handle,
zeCommandQueueCreate_exit_callback);
btx_register_callbacks_lttng_ust_ze_zeCommandQueueExecuteCommandLists_entry(
btx_handle, zeCommandQueueExecuteCommandLists_entry_callback);

Expand All @@ -1111,7 +1183,7 @@ void btx_register_usr_callbacks(void *btx_handle) {

/* Drift */
btx_register_callbacks_lttng_ust_ze_properties_device_timer(btx_handle,
&property_device_timer_callback);
&property_device_timer_callback);

/* Profiling Command (everything who signal an event on completion)
*/
Expand All @@ -1131,22 +1203,25 @@ void btx_register_usr_callbacks(void *btx_handle) {

btx_register_callbacks_lttng_ust_ze_zeModuleGetGlobalPointer_exit(
btx_handle, &zeModuleGetGlobalPointer_exit_callback);
btx_register_callbacks_lttng_ust_ze_zeModuleDestroy_exit(
btx_handle, &zeModuleDestroy_exit_callback);
btx_register_callbacks_lttng_ust_ze_zeModuleDestroy_exit(btx_handle,
&zeModuleDestroy_exit_callback);

/* Handling of event */
btx_register_callbacks_lttng_ust_ze_profiling_event_profiling(
btx_handle, &event_profiling_callback);
btx_register_callbacks_lttng_ust_ze_profiling_event_profiling(btx_handle,
&event_profiling_callback);
btx_register_callbacks_lttng_ust_ze_profiling_event_profiling_results(
btx_handle, &event_profiling_result_callback);
btx_register_callbacks_lttng_ust_ze_zeEventDestroy_entry(
btx_handle, &zeEventDestroy_entry_callback);
btx_register_callbacks_lttng_ust_ze_zeEventDestroy_exit(
btx_handle, &zeEventDestroy_exit_callback);
btx_register_callbacks_lttng_ust_ze_zeCommandListReset_entry(
btx_handle, &zeCommandListReset_entry_callback);
btx_register_callbacks_lttng_ust_ze_zeCommandListReset_exit(
btx_handle, &zeCommandListReset_exit_callback);
btx_register_callbacks_lttng_ust_ze_profiling_event_profiling_results_v2(
btx_handle, &event_profiling_results_v2_callback);

btx_register_callbacks_lttng_ust_ze_zeEventDestroy_entry(btx_handle,
&zeEventDestroy_entry_callback);
btx_register_callbacks_lttng_ust_ze_zeEventDestroy_exit(btx_handle,
&zeEventDestroy_exit_callback);
btx_register_callbacks_lttng_ust_ze_zeCommandListReset_entry(btx_handle,
&zeCommandListReset_entry_callback);
btx_register_callbacks_lttng_ust_ze_zeCommandListReset_exit(btx_handle,
&zeCommandListReset_exit_callback);

/* Sampling */

Expand All @@ -1165,8 +1240,8 @@ void btx_register_usr_callbacks(void *btx_handle) {
btx_handle, &lttng_ust_ze_sampling_memoryProperties_callback);

// Telemetries
btx_register_callbacks_lttng_ust_ze_sampling_memStats(
btx_handle, &lttng_ust_ze_sampling_memStats_callback);
btx_register_callbacks_lttng_ust_ze_sampling_memStats(btx_handle,
&lttng_ust_ze_sampling_memStats_callback);
btx_register_callbacks_lttng_ust_ze_sampling_fabricPort(
btx_handle, &lttng_ust_ze_sampling_fabricPort_callback);
btx_register_callbacks_lttng_ust_ze_sampling_gpu_energy(
Expand Down
2 changes: 2 additions & 0 deletions ze/btx_zeinterval_callbacks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ struct data_s {
std::unordered_map<hpt_t, std::pair<ze_device_handle_t, ze_command_queue_desc_t>> imm_tmp;

std::unordered_map<hp_device_t, clock_lttng_device_t> device_timestamps_pair_ref;
std::unordered_map<hp_t, int64_t> sync_lttng_monotonic_ref;

/* Sampling */
std::unordered_map<hpd_t, deviceProperty_id_t> sampling_device_property;
std::unordered_map<hpdf_t, zes_fabric_port_properties_t> fabricPort_property;
Expand Down
53 changes: 47 additions & 6 deletions ze/tracer_ze_helpers.include.c
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ static struct _ze_event_h * _get_profiling_event(
}

e_w->command_list = command_list;
ze_event_pool_desc_t desc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, NULL, ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP | ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1};
ze_event_pool_desc_t desc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, NULL, ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP | ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP, 1};
ze_result_t res = ZE_EVENT_POOL_CREATE_PTR(context, &desc, 0, NULL, &e_w->event_pool);
if (res != ZE_RESULT_SUCCESS) {
THAPI_DBGLOG("zeEventPoolCreate failed with %d, for command list: %p, context: %p", res, command_list, context);
Expand All @@ -399,6 +399,7 @@ static struct _ze_event_h * _get_profiling_event(
}

static void _profile_event_results(ze_event_handle_t event);
static void _profile_event_results_v2(ze_event_handle_t event, ze_command_list_handle_t);

static inline void _on_created_event(ze_event_handle_t event) {
#ifdef THAPI_DEBUG
Expand Down Expand Up @@ -441,8 +442,10 @@ static inline void _on_destroy_event(ze_event_handle_t event) {
return;
}

if (!(ze_event->flags & _ZE_PROFILED))
if (!(ze_event->flags & _ZE_PROFILED)) {
_profile_event_results(event);
_profile_event_results_v2(event, ze_event->command_list);
}
PUT_ZE_EVENT_WRAPPER(ze_event);
}

Expand All @@ -455,8 +458,10 @@ static inline void _unregister_ze_event(ze_event_handle_t event, int get_results
return;
}

if (get_results && !(ze_event->flags & _ZE_PROFILED))
if (get_results && !(ze_event->flags & _ZE_PROFILED)) {
_profile_event_results(event);
_profile_event_results_v2(event, ze_event->command_list);
}
if (ze_event->event_pool)
PUT_ZE_EVENT(ze_event);
else
Expand All @@ -472,8 +477,10 @@ static inline void _on_reset_event(ze_event_handle_t event) {
return;
}

if (!(ze_event->flags & _ZE_PROFILED))
if (!(ze_event->flags & _ZE_PROFILED)) {
_profile_event_results(event);
_profile_event_results_v2(event, ze_event->command_list);
}

if (!(ze_event->flags & _ZE_IMMEDIATE_CMD))
ADD_ZE_EVENT(ze_event);
Expand All @@ -491,6 +498,8 @@ static inline void _dump_and_reset_our_event(ze_event_handle_t event) {
}

_profile_event_results(event);
_profile_event_results_v2(event, ze_event->command_list);

ZE_EVENT_HOST_RESET_PTR(event);

ze_event->flags &= ~_ZE_PROFILED;
Expand All @@ -513,15 +522,45 @@ static void _profile_event_results(ze_event_handle_t event) {
res.context.kernelEnd);
}
}
static void _profile_event_results_v2(ze_event_handle_t event, ze_command_list_handle_t command_list) {

if (!tracepoint_enabled(lttng_ust_ze_profiling, event_profiling_results)) {
return;
}
ze_result_t status = ZE_EVENT_QUERY_STATUS_PTR(event);
/* Find Device */
ze_device_handle_t device;
ZE_COMMAND_LIST_GET_DEVICE_HANDLE_PTR(command_list, &device);
// Query Timestamp
uint32_t pCount = 0;
ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PTR(event, device, &pCount, NULL);
assert(pCount <= 2);

ze_kernel_timestamp_result_t kernelTimestamps[2];
ze_synchronized_timestamp_result_ext_t synchronizedTimestamps[2];

ze_event_query_kernel_timestamps_results_ext_properties_t resultsProps = {
.stype = ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_RESULTS_EXT_PROPERTIES,
.pNext = NULL,
.pKernelTimestampsBuffer = kernelTimestamps,
.pSynchronizedTimestampsBuffer = synchronizedTimestamps } ;
ze_result_t timestamps_status = ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PTR(event, device, &pCount, &resultsProps);
// Send
do_tracepoint(lttng_ust_ze_profiling, event_profiling_results_v2,
event, status, timestamps_status, pCount, synchronizedTimestamps);
}

static void _event_cleanup() {
struct _ze_event_h *ze_event = NULL;
struct _ze_event_h *tmp = NULL;

HASH_ITER(hh, _ze_events, ze_event, tmp) {
HASH_DEL(_ze_events, ze_event);
if (ze_event->event && !(ze_event->flags & _ZE_PROFILED))
if (ze_event->event && !(ze_event->flags & _ZE_PROFILED)) {
_profile_event_results(ze_event->event);
_profile_event_results_v2(ze_event->event, ze_event->command_list);
}

if (ze_event->event_pool) {
if (ze_event->event)
ZE_EVENT_DESTROY_PTR(ze_event->event);
Expand All @@ -538,8 +577,10 @@ static void _on_destroy_context(ze_context_handle_t context){
HASH_ITER(hh, _ze_events, ze_event, tmp) {
if (ze_event->context == context) {
HASH_DEL(_ze_events, ze_event);
if (ze_event->event && !(ze_event->flags & _ZE_PROFILED))
if (ze_event->event && !(ze_event->flags & _ZE_PROFILED)) {
_profile_event_results(ze_event->event);
_profile_event_results_v2(ze_event->event, ze_event->command_list);
}
if (ze_event->event_pool) {
if (ze_event->event)
ZE_EVENT_DESTROY_PTR(ze_event->event);
Expand Down
13 changes: 13 additions & 0 deletions ze/ze_events.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,19 @@ lttng_ust_ze_profiling:
- [ ctf_integer, uint64_t, globalEnd, "globalEnd" ]
- [ ctf_integer, uint64_t, contextStart, "contextStart" ]
- [ ctf_integer, uint64_t, contextEnd, "contextEnd" ]
- name: event_profiling_results_v2
args:
- [ ze_event_handle_t, hEvent ]
- [ ze_result_t, status ]
- [ ze_result_t, timestampsStatus ]
- [ uint32_t, count ]
- [ ze_synchronized_timestamp_result_ext_t *, pSynchronizedTimestamps ]
fields:
- [ ctf_integer_hex, uintptr_t, hEvent, "(uintptr_t)hEvent" ]
- [ ctf_integer, ze_result_t, status, "status" ]
- [ ctf_integer, ze_result_t, timestampsStatus, "timestampsStatus" ]
- [ ctf_integer, uint32_t, count, "count" ]
- [ ctf_sequence_text, uint8_t, pSynchronizedTimestamps_vals, pSynchronizedTimestamps, size_t, "sizeof(ze_synchronized_timestamp_result_ext_t)*count" ]
lttng_ust_ze_properties:
events:
- name: driver
Expand Down
2 changes: 1 addition & 1 deletion ze/ze_model.rb
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def upper_snake_case(str)
ze_event_pool_desc_t _new_desc;
if (_do_profile && desc && !(desc->flags & ZE_EVENT_POOL_FLAG_IPC)) {
_new_desc = *desc;
_new_desc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
_new_desc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
_new_desc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
desc = &_new_desc;
}
Expand Down
Loading