Skip to content

Commit 84cf395

Browse files
rnayar3copybara-github
authored andcommitted
Setup ondemand loading for traceViewer counter events
PiperOrigin-RevId: 783647694
1 parent 87e1227 commit 84cf395

File tree

2 files changed

+82
-11
lines changed

2 files changed

+82
-11
lines changed

plugin/trace_viewer/tf_trace_viewer/tf-trace-viewer.html

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -526,8 +526,10 @@
526526
_loadedTraceEents: Object,
527527
_fullBounds: Object,
528528
_isLoading: {type: Boolean, value: false},
529+
_isCounterEventsLoading: {type: Boolean, value: false},
529530
_dirty: {type: Boolean, value: false},
530531
_tasks: Object,
532+
_counterEventsOffset: {type: Number, value: 0},
531533
_model: Object,
532534
_resolution: {type: Number, value: 1000},
533535
_filteredByVisibility: {type: Boolean, value: false},
@@ -1815,13 +1817,14 @@
18151817
let startUpdateView;
18161818

18171819
try {
1818-
const data = await this._loadJSON(requestedRange)
1820+
const data = await this._loadJSON(requestedRange);
18191821
if (!this._isOss) {
18201822
this._addInitialResponseDataLatency(performance.now());
18211823
this._addDataResponseSize(data.length);
18221824
}
18231825
startUpdateModel = performance.now();
18241826
const jsonData = JSON.parse(data);
1827+
this._maybeFetchMoreCounterEvents(jsonData);
18251828
this._codeLink = jsonData['codeLink'];
18261829
if (!this._model /* first load */) {
18271830
this._collapseBigProcessTracks(jsonData);
@@ -1947,9 +1950,6 @@
19471950
if (!jsonData['returnedEventsSize']) {
19481951
this._displayOverlay('Trace Viewer', 'No trace events data returned.');
19491952
}
1950-
if (jsonData['showCounterMessage'].length > 0) {
1951-
this._displayOverlay('Counters: ', jsonData['showCounterMessage']);
1952-
}
19531953
this._dirty = true;
19541954
this._model = new tr.Model();
19551955
this._loadedTraceEvents = new Set();
@@ -2012,6 +2012,7 @@
20122012
let eventsToAdd = [];
20132013
jsonData.traceEvents.forEach(event => {
20142014
if (event.entries) {
2015+
let groupedCounterEventsStartTime = performance.now();
20152016
// Handle Grouped Counter Events
20162017
let argName = event.event_stats;
20172018
event.entries.forEach(entry => {
@@ -2026,6 +2027,7 @@
20262027
};
20272028
eventsToAdd.push(newEvent);
20282029
});
2030+
this._addProcessGroupCouterLatency(performance.now() - groupedCounterEventsStartTime);
20292031
} else {
20302032
// Handle Regular Events
20312033
eventsToAdd.push(event);
@@ -2047,9 +2049,63 @@
20472049
}
20482050
window.parent.postMessage({'type': 'processes-list','data': processList,}, '*');
20492051
},
2052+
// Some sessions may have counter events that are not returned in the initial response.
2053+
// This function sends a request to fetch more counter events if the offset is set in the
2054+
// response. This is done using the counter events offset returned in the initial response.
2055+
// The counter events offset is set to 0 from the backend, which will stop subsequent fetches.
2056+
_maybeFetchMoreCounterEvents: function(jsonData) {
2057+
if (!jsonData['counterEventsOffset'] || jsonData['counterEventsOffset'] === 0) {
2058+
// console.log("Progressive loading: No more counter events available.");
2059+
return;
2060+
}
2061+
// console.log("Progressive loading: More counter events available. Offset:", jsonData['counterEventsOffset']);
2062+
this._counterEventsOffset = jsonData['counterEventsOffset'];
2063+
this._replaceModel = false;
2064+
const requestURL = this._buildBaseURL();
2065+
const requestedRange = null;
2066+
let retries = 5;
2067+
let moreCounterEventsStartTime = performance.now();
2068+
const fetchWithRetry = async () => {
2069+
try {
2070+
const data = await this._loadJSON(requestedRange);
2071+
this._addFetchCounterEventsLatency(performance.now() - moreCounterEventsStartTime);
2072+
const jsonData = JSON.parse(data);
2073+
// console.log("Progressive loading: Counter events fetched. New offset:", jsonData['counterEventsOffset']);
2074+
// console.log("Progressive loading: All counter events loaded. Updating view.");
2075+
return jsonData;
2076+
} catch (error) {
2077+
if (retries > 0) {
2078+
retries--;
2079+
console.error('Failed to fetch more counter events, retrying...', error);
2080+
await new Promise(resolve => setTimeout(resolve, 1000)); // Wait 1 second before retrying
2081+
return fetchWithRetry();
2082+
} else {
2083+
console.error('Failed to fetch more counter events after multiple retries:', error);
2084+
this._displayOverlay('Error', 'Failed to fetch more counter events.');
2085+
return null;
2086+
}
2087+
}
2088+
};
2089+
this._isCounterEventsLoading = true; // Set loading to true
2090+
fetchWithRetry().then(jsonData => {
2091+
this._isCounterEventsLoading = false; // Set loading to false
2092+
if (jsonData) {
2093+
this._updateModel(jsonData, /* replaceModel= */ false);
2094+
if (jsonData['counterEventsOffset'] === 0) {
2095+
this._updateView(this._loadedRange);
2096+
}
2097+
window.parent.gtag && window.parent.gtag('event', 'tv-counter-events-reload', {
2098+
'screen_name': 'trace viewer',
2099+
'event_category': 'reload',
2100+
'event_label': 'counter_events_reload',
2101+
'event_variable': requestURL.toString()});
2102+
}
2103+
});
2104+
},
20502105

20512106
// Updates the view based on the current model.
20522107
_updateView: async function(requestedRange) {
2108+
// console.log("updateView: requestedRange:", requestedRange);
20532109
if (requestedRange == null) {
20542110
this._loadedRange = tf_component_traceviewer.expand(
20552111
this._fullBounds,
@@ -2100,6 +2156,16 @@
21002156
node.value = latency;
21012157
},
21022158

2159+
_addProcessGroupCouterLatency: function(latency) {
2160+
const node = parent.document.getElementById('process-group-counter-latency');
2161+
node.value = latency;
2162+
},
2163+
2164+
_addFetchCounterEventsLatency: function(latency) {
2165+
const node = parent.document.getElementById('fetch-counter-events-latency');
2166+
node.value = latency;
2167+
},
2168+
21032169
// Access the {min, max} range of a trackView.
21042170
_trackViewRange: function(trackView) {
21052171
return this._calcViewportRange(trackView.viewport.currentDisplayTransform, trackView.viewWidth_);
@@ -2182,6 +2248,7 @@
21822248
}
21832249
requestURL.searchParams.set('replace_model', this._replaceModel);
21842250
requestURL.searchParams.set('new_backend', this._useNewBackend);
2251+
requestURL.searchParams.set('counter_events_offset', this._counterEventsOffset);
21852252
return requestURL;
21862253
},
21872254

xprof/convert/trace_viewer/trace_events_to_json.h

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ limitations under the License.
5555
namespace tensorflow {
5656
namespace profiler {
5757

58+
inline constexpr size_t kMaxCounterEvents = 14000000;
59+
5860
// JSON generation options.
5961
struct JsonTraceOptions {
6062
using Details = std::vector<std::pair<std::string, bool>>;
@@ -73,6 +75,7 @@ struct JsonTraceOptions {
7375
bool use_new_backend = false;
7476
std::string code_link;
7577
bool use_grouped_json_counter_events = true;
78+
uint64_t counter_events_offset = 0;
7679
};
7780

7881
// Counts generated JSON events by type.
@@ -725,15 +728,16 @@ void TraceEventsToJson(const JsonTraceOptions& options,
725728
output->Append("]}");
726729
}
727730
size_t counter_event_count = writer.GetCounterEventCount();
728-
VLOG(1) << "Counter event count: " << counter_event_count;
729-
if (counter_event_count == 14000000) {
730-
output->Append(
731-
R"(], "showCounterMessage": "Only 14M counter events are shown. Zoom in or pan to see more." )");
731+
size_t counter_event_offset = options.counter_events_offset;
732+
if (counter_event_count >= kMaxCounterEvents) {
733+
counter_event_offset += kMaxCounterEvents;
732734
} else {
733-
output->Append(R"(], "showCounterMessage": "" )");
735+
counter_event_offset = 0;
734736
}
735-
output->Append(R"(,"totalCounterEvents":)", counter_event_count);
736-
output->Append(R"(,"counterEventsOffset":)", 0);
737+
VLOG(1) << "Counter event offset: " << counter_event_offset;
738+
VLOG(1) << "Counter event count: " << counter_event_count;
739+
output->Append(R"(],"totalCounterEvents":)", counter_event_count);
740+
output->Append(R"(,"counterEventsOffset":)", counter_event_offset);
737741
output->Append(R"(})");
738742
}
739743

0 commit comments

Comments
 (0)