Skip to content

Commit b5fb8c0

Browse files
Add initial script
1 parent c9209aa commit b5fb8c0

File tree

1 file changed

+193
-0
lines changed

1 file changed

+193
-0
lines changed
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
2+
# See file LICENSE for terms.
3+
4+
import sqlite3
5+
import pandas as pd
6+
import numpy as np
7+
import subprocess
8+
import argparse
9+
import pathlib
10+
import os
11+
12+
13+
class Analyzer:
14+
def __init__(self, args: argparse.Namespace):
15+
self.nsys_report_path = args.nsys_report_path
16+
17+
self.sql_path = None
18+
if args.sql_path is None:
19+
report_basename_no_ext = pathlib.Path(self.nsys_report_path).stem
20+
self.sql_path = os.getcwd() + os.sep + report_basename_no_ext + ".sqlite"
21+
else:
22+
self.sql_path = args.sql_path
23+
24+
self.nsys_binary = None
25+
if args.nsys_binary is None:
26+
self.nsys_binary = "nsys"
27+
else:
28+
self.nsys_binary = args.nsys_binary
29+
30+
def _export_report_to_sqlite(self):
31+
full_cmd_str = (
32+
f"{self.nsys_binary} export --type=sqlite --lazy=false "
33+
+ f"--force-overwrite=true --output={self.sql_path} {self.nsys_report_path} "
34+
+ "--tables=StringIds,NVTX_EVENTS"
35+
)
36+
full_cmd_list = full_cmd_str.split()
37+
print(f"Command: {full_cmd_str}")
38+
subprocess.run(full_cmd_list)
39+
40+
def _initialize_bins(self):
41+
"""Create bins ranging from 0 B to 512 PiB"""
42+
43+
tmp = np.logspace(start=0, stop=59, num=60, base=2,
44+
dtype=np.float64) # 2^0 2^1 ... 2^59
45+
self.bin_full = np.insert(tmp, 0, 0.0) # 0 2^0 2^1 ... 2^59
46+
self.bin_full_in_MiB = self.bin_full / 1024.0 / 1024.0
47+
48+
def _sql_query(self, filter_string: str) -> pd.DataFrame:
49+
"""Perform SQL query.
50+
The SQLite schema in nsys is not forward compatible, and may change completely in a new release.
51+
Refer to https://docs.nvidia.com/nsight-systems/UserGuide/index.html?highlight=schema#sqlite-schema-reference
52+
53+
:param filter_string: NVTX annotation string serving as a filter for the query.
54+
:type filter_string: str
55+
:return: Pandas dataframe containing the SQL query result.
56+
:rtype: pd.DataFrame
57+
"""
58+
59+
sql_expr = (
60+
"WITH io_string AS ( "
61+
+ " SELECT * "
62+
+ " FROM "
63+
+ " StringIds "
64+
+ " WHERE "
65+
+ " value LIKE '%%{}%%' ".format(filter_string)
66+
+ "), "
67+
+ "io_marker AS ( "
68+
+ " SELECT "
69+
+ " start AS startTimeInNs, "
70+
+ " int64Value AS ioSize, "
71+
+ " value AS nvtxAnnotation "
72+
+ " FROM NVTX_EVENTS "
73+
+ " CROSS JOIN io_string "
74+
+ " WHERE textId = io_string.id "
75+
+ " ORDER BY start "
76+
+ ") "
77+
+ "SELECT * "
78+
+ "FROM io_marker;"
79+
)
80+
81+
df = pd.read_sql(sql_expr, self.db_connection)
82+
if df.empty:
83+
print(
84+
f'Warning: SQL result is empty for filter string "{filter_string}"')
85+
return df
86+
87+
def _generate_hist(self, df: pd.DataFrame) -> tuple[np.ndarray, np.ndarray]:
88+
my_series = df["ioSize"]
89+
90+
# Determine the appropriate bins for the histogram
91+
idx_upperbound = -1
92+
max_v = np.amax(my_series)
93+
for idx in range(len(self.bin_full_in_MiB)):
94+
if self.bin_full_in_MiB[idx] >= max_v:
95+
idx_upperbound = idx
96+
break
97+
98+
tight_bin_edges = self.bin_full_in_MiB[0: (idx_upperbound + 1)]
99+
if max_v > self.bin_full_in_MiB[-1]:
100+
tight_bin_edges.append(max_v)
101+
return np.histogram(my_series, tight_bin_edges)
102+
103+
def _get_compact_filesize(self, file_size_inB: np.float64) -> str:
104+
KiB = 1024.0
105+
MiB = 1024.0 * KiB
106+
GiB = 1024.0 * MiB
107+
TiB = 1024.0 * GiB
108+
PiB = 1024.0 * TiB
109+
EiB = 1024.0 * PiB
110+
111+
if file_size_inB >= 0 and file_size_inB < KiB:
112+
return f"{int(file_size_inB)} B"
113+
elif file_size_inB >= KiB and file_size_inB < MiB:
114+
return f"{int(file_size_inB / KiB)} KiB"
115+
elif file_size_inB >= MiB and file_size_inB < GiB:
116+
return f"{int(file_size_inB / MiB)} MiB"
117+
elif file_size_inB >= GiB and file_size_inB < TiB:
118+
return f"{int(file_size_inB / GiB)} GiB"
119+
elif file_size_inB >= TiB and file_size_inB < PiB:
120+
return f"{int(file_size_inB / TiB)} TiB"
121+
elif file_size_inB >= PiB and file_size_inB < EiB:
122+
return f"{int(file_size_inB / PiB)} PiB"
123+
else:
124+
raise Exception("Invalid value for file_size.")
125+
126+
def _print(self, title, hist, bin_edges):
127+
print(f"\n{title}")
128+
print(" Bins ...... Count")
129+
for idx in range(len(hist)):
130+
symbol = ")"
131+
if idx == len(hist) - 1:
132+
symbol = "]"
133+
134+
print(
135+
" [{:>8}, {:>8}{} ...... {}".format(
136+
self._get_compact_filesize(bin_edges[idx]),
137+
self._get_compact_filesize(bin_edges[idx + 1]),
138+
symbol,
139+
hist[idx],
140+
)
141+
)
142+
143+
def _process(self, filter_string: str):
144+
df = self._sql_query(filter_string)
145+
if df.empty:
146+
return
147+
148+
hist, bin_edges = self._generate_hist(df)
149+
self._print(filter_string, hist, bin_edges)
150+
151+
def run(self):
152+
self._initialize_bins()
153+
154+
self._export_report_to_sqlite()
155+
self.db_connection = sqlite3.connect(self.sql_path)
156+
157+
filter_string_list = [
158+
"FileHandle::pread()",
159+
"FileHandle::pwrite()",
160+
"posix_device_read()",
161+
"posix_device_write()",
162+
"posix_host_read()",
163+
"posix_host_write()",
164+
"cufileRead()",
165+
"cufileWrite()",
166+
"RemoteHandle::read()",
167+
"RemoteHandle::pread()",
168+
]
169+
170+
for filter_string in filter_string_list:
171+
self._process(filter_string)
172+
173+
174+
if __name__ == "__main__":
175+
parser = argparse.ArgumentParser(
176+
prog="kvikio_stat", description="Generate I/O size histogram from Nsight System report"
177+
)
178+
parser.add_argument("--nsys-report-path", required=True,
179+
help="The path of the Nsight System report.", type=str)
180+
parser.add_argument(
181+
"--sql-path",
182+
help="The path of the SQL database exported from the Nsight System report. "
183+
+ "If unspecified, the current working directory is used to store the SQL database, "
184+
+ "and the file name is derived from the Nsight System report.",
185+
type=str,
186+
)
187+
parser.add_argument(
188+
"--nsys-binary", help='The path of the Nsight System CLI program. If unspecified, "nsys" is used.', type=str
189+
)
190+
args = parser.parse_args()
191+
192+
az = Analyzer(args)
193+
az.run()

0 commit comments

Comments
 (0)