Skip to content

Commit 7a3cfab

Browse files
committed
gh-138709: Implement CPU time profiling in profiling.sample
1 parent af58a6f commit 7a3cfab

13 files changed

+563
-36
lines changed

Include/internal/pycore_global_objects_fini_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_global_strings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ struct _Py_global_strings {
387387
STRUCT_FOR_ID(coro)
388388
STRUCT_FOR_ID(count)
389389
STRUCT_FOR_ID(covariant)
390+
STRUCT_FOR_ID(cpu_time)
390391
STRUCT_FOR_ID(ctx)
391392
STRUCT_FOR_ID(cwd)
392393
STRUCT_FOR_ID(d_parameter_type)

Include/internal/pycore_runtime_init_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_unicodeobject_generated.h

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/profiling/sampling/collector.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
from abc import ABC, abstractmethod
22

3+
# Enums are slow
4+
THREAD_STATE_RUNNING = 0
5+
THREAD_STATE_IDLE = 1
6+
THREAD_STATE_GIL_WAIT = 2
7+
THREAD_STATE_UNKNOWN = 3
8+
9+
STATUS = {
10+
THREAD_STATE_RUNNING: "running",
11+
THREAD_STATE_IDLE: "idle",
12+
THREAD_STATE_GIL_WAIT: "gil_wait",
13+
THREAD_STATE_UNKNOWN: "unknown",
14+
}
315

416
class Collector(ABC):
517
@abstractmethod
@@ -10,10 +22,12 @@ def collect(self, stack_frames):
1022
def export(self, filename):
1123
"""Export collected data to a file."""
1224

13-
def _iter_all_frames(self, stack_frames):
25+
def _iter_all_frames(self, stack_frames, skip_idle=False):
1426
"""Iterate over all frame stacks from all interpreters and threads."""
1527
for interpreter_info in stack_frames:
1628
for thread_info in interpreter_info.threads:
29+
if skip_idle and thread_info.status != THREAD_STATE_RUNNING:
30+
continue
1731
frames = thread_info.frame_info
1832
if frames:
1933
yield frames

Lib/profiling/sampling/pstats_collector.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
class PstatsCollector(Collector):
8-
def __init__(self, sample_interval_usec):
8+
def __init__(self, sample_interval_usec, *, skip_idle=False):
99
self.result = collections.defaultdict(
1010
lambda: dict(total_rec_calls=0, direct_calls=0, cumulative_calls=0)
1111
)
@@ -14,6 +14,7 @@ def __init__(self, sample_interval_usec):
1414
self.callers = collections.defaultdict(
1515
lambda: collections.defaultdict(int)
1616
)
17+
self.skip_idle = skip_idle
1718

1819
def _process_frames(self, frames):
1920
"""Process a single thread's frame stack."""
@@ -40,7 +41,7 @@ def _process_frames(self, frames):
4041
self.callers[callee][caller] += 1
4142

4243
def collect(self, stack_frames):
43-
for frames in self._iter_all_frames(stack_frames):
44+
for frames in self._iter_all_frames(stack_frames, skip_idle=self.skip_idle):
4445
self._process_frames(frames)
4546

4647
def export(self, filename):

Lib/profiling/sampling/sample.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,18 +114,18 @@ def _run_with_sync(original_cmd):
114114

115115

116116
class SampleProfiler:
117-
def __init__(self, pid, sample_interval_usec, all_threads):
117+
def __init__(self, pid, sample_interval_usec, all_threads, *, cpu_time=False):
118118
self.pid = pid
119119
self.sample_interval_usec = sample_interval_usec
120120
self.all_threads = all_threads
121121
if _FREE_THREADED_BUILD:
122122
self.unwinder = _remote_debugging.RemoteUnwinder(
123-
self.pid, all_threads=self.all_threads
123+
self.pid, all_threads=self.all_threads, cpu_time=cpu_time
124124
)
125125
else:
126126
only_active_threads = bool(self.all_threads)
127127
self.unwinder = _remote_debugging.RemoteUnwinder(
128-
self.pid, only_active_thread=only_active_threads
128+
self.pid, only_active_thread=only_active_threads, cpu_time=cpu_time
129129
)
130130
# Track sample intervals and total sample count
131131
self.sample_intervals = deque(maxlen=100)
@@ -583,18 +583,19 @@ def sample(
583583
show_summary=True,
584584
output_format="pstats",
585585
realtime_stats=False,
586+
skip_idle=False,
586587
):
587588
profiler = SampleProfiler(
588-
pid, sample_interval_usec, all_threads=all_threads
589+
pid, sample_interval_usec, all_threads=all_threads, cpu_time=skip_idle
589590
)
590591
profiler.realtime_stats = realtime_stats
591592

592593
collector = None
593594
match output_format:
594595
case "pstats":
595-
collector = PstatsCollector(sample_interval_usec)
596+
collector = PstatsCollector(sample_interval_usec, skip_idle=skip_idle)
596597
case "collapsed":
597-
collector = CollapsedStackCollector()
598+
collector = CollapsedStackCollector(skip_idle=skip_idle)
598599
filename = filename or f"collapsed.{pid}.txt"
599600
case _:
600601
raise ValueError(f"Invalid output format: {output_format}")
@@ -644,6 +645,7 @@ def wait_for_process_and_sample(pid, sort_value, args):
644645
filename = args.outfile
645646
if not filename and args.format == "collapsed":
646647
filename = f"collapsed.{pid}.txt"
648+
skip_idle = True if args.mode == "cpu" else False
647649

648650
sample(
649651
pid,
@@ -656,6 +658,7 @@ def wait_for_process_and_sample(pid, sort_value, args):
656658
show_summary=not args.no_summary,
657659
output_format=args.format,
658660
realtime_stats=args.realtime_stats,
661+
skip_idle=skip_idle,
659662
)
660663

661664

@@ -710,6 +713,15 @@ def main():
710713
help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling",
711714
)
712715

716+
# Mode options
717+
mode_group = parser.add_argument_group("Mode options")
718+
mode_group.add_argument(
719+
"--mode",
720+
choices=["wall", "cpu"],
721+
default="wall-time",
722+
help="Sampling mode: wall-time (default, skip_idle=False) or cpu-time (skip_idle=True)",
723+
)
724+
713725
# Output format selection
714726
output_group = parser.add_argument_group("Output options")
715727
output_format = output_group.add_mutually_exclusive_group()
@@ -826,6 +838,9 @@ def main():
826838
elif target_count > 1:
827839
parser.error("only one target type can be specified: -p/--pid, -m/--module, or script")
828840

841+
# Set skip_idle based on mode
842+
skip_idle = True if args.mode == "cpu" else False
843+
829844
if args.pid:
830845
sample(
831846
args.pid,
@@ -838,6 +853,7 @@ def main():
838853
show_summary=not args.no_summary,
839854
output_format=args.format,
840855
realtime_stats=args.realtime_stats,
856+
skip_idle=skip_idle,
841857
)
842858
elif args.module or args.args:
843859
if args.module:

Lib/profiling/sampling/stack_collector.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55

66

77
class StackTraceCollector(Collector):
8-
def __init__(self):
8+
def __init__(self, *, skip_idle=False):
99
self.call_trees = []
1010
self.function_samples = collections.defaultdict(int)
11+
self.skip_idle = skip_idle
1112

1213
def _process_frames(self, frames):
1314
"""Process a single thread's frame stack."""
@@ -23,7 +24,7 @@ def _process_frames(self, frames):
2324
self.function_samples[frame] += 1
2425

2526
def collect(self, stack_frames):
26-
for frames in self._iter_all_frames(stack_frames):
27+
for frames in self._iter_all_frames(stack_frames, skip_idle=self.skip_idle):
2728
self._process_frames(frames)
2829

2930

Lib/test/test_external_inspection.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1670,6 +1670,115 @@ def test_unsupported_platform_error(self):
16701670
str(cm.exception)
16711671
)
16721672

1673+
class TestDetectionOfThreadStatus(unittest.TestCase):
1674+
@unittest.skipIf(
1675+
sys.platform not in ("linux", "darwin", "win32"),
1676+
"Test only runs on unsupported platforms (not Linux, macOS, or Windows)",
1677+
)
1678+
@unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception")
1679+
def test_thread_status_detection(self):
1680+
port = find_unused_port()
1681+
script = textwrap.dedent(
1682+
f"""\
1683+
import time, sys, socket, threading
1684+
import os
1685+
1686+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1687+
sock.connect(('localhost', {port}))
1688+
1689+
def sleeper():
1690+
tid = threading.get_native_id()
1691+
sock.sendall(f'ready:sleeper:{{tid}}\\n'.encode())
1692+
time.sleep(10000)
1693+
1694+
def busy():
1695+
tid = threading.get_native_id()
1696+
sock.sendall(f'ready:busy:{{tid}}\\n'.encode())
1697+
x = 0
1698+
while True:
1699+
x = x + 1
1700+
time.sleep(0.5)
1701+
1702+
t1 = threading.Thread(target=sleeper)
1703+
t2 = threading.Thread(target=busy)
1704+
t1.start()
1705+
t2.start()
1706+
sock.sendall(b'ready:main\\n')
1707+
t1.join()
1708+
t2.join()
1709+
sock.close()
1710+
"""
1711+
)
1712+
with os_helper.temp_dir() as work_dir:
1713+
script_dir = os.path.join(work_dir, "script_pkg")
1714+
os.mkdir(script_dir)
1715+
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1716+
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1717+
server_socket.bind(("localhost", port))
1718+
server_socket.settimeout(SHORT_TIMEOUT)
1719+
server_socket.listen(1)
1720+
1721+
script_name = _make_test_script(script_dir, "thread_status_script", script)
1722+
client_socket = None
1723+
try:
1724+
p = subprocess.Popen([sys.executable, script_name])
1725+
client_socket, _ = server_socket.accept()
1726+
server_socket.close()
1727+
response = b""
1728+
sleeper_tid = None
1729+
busy_tid = None
1730+
while True:
1731+
chunk = client_socket.recv(1024)
1732+
response += chunk
1733+
if b"ready:main" in response and b"ready:sleeper" in response and b"ready:busy" in response:
1734+
# Parse TIDs from the response
1735+
for line in response.split(b"\n"):
1736+
if line.startswith(b"ready:sleeper:"):
1737+
try:
1738+
sleeper_tid = int(line.split(b":")[-1])
1739+
except Exception:
1740+
pass
1741+
elif line.startswith(b"ready:busy:"):
1742+
try:
1743+
busy_tid = int(line.split(b":")[-1])
1744+
except Exception:
1745+
pass
1746+
break
1747+
1748+
attempts = 10
1749+
try:
1750+
unwinder = RemoteUnwinder(p.pid, all_threads=True, cpu_time=True)
1751+
for _ in range(attempts):
1752+
traces = unwinder.get_stack_trace()
1753+
# Check if any thread is running
1754+
if any(thread_info.status == 0 for interpreter_info in traces
1755+
for thread_info in interpreter_info.threads):
1756+
break
1757+
time.sleep(0.5) # Give a bit of time to let threads settle
1758+
except PermissionError:
1759+
self.skipTest(
1760+
"Insufficient permissions to read the stack trace"
1761+
)
1762+
1763+
1764+
# Find threads and their statuses
1765+
statuses = {}
1766+
for interpreter_info in traces:
1767+
for thread_info in interpreter_info.threads:
1768+
statuses[thread_info.thread_id] = thread_info.status
1769+
1770+
self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received")
1771+
self.assertIsNotNone(busy_tid, "Busy thread id not received")
1772+
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
1773+
self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
1774+
self.assertEqual(statuses[sleeper_tid], 1, "Sleeper thread should be idle (1)")
1775+
self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)")
1776+
1777+
finally:
1778+
if client_socket is not None:
1779+
client_socket.close()
1780+
p.terminate()
1781+
p.wait(timeout=SHORT_TIMEOUT)
16731782

16741783
if __name__ == "__main__":
16751784
unittest.main()

0 commit comments

Comments
 (0)