gh-138122: Add exception profiling mode to the sampling profiler by pablogsal · Pull Request #142561 · python/cpython · GitHub
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 67 additions & 4 deletions Doc/library/profiling.sampling.rst
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ Key features include:
and blocking. Use this to identify CPU-bound bottlenecks and optimize computational work.
* **GIL-holding time** (``--mode gil``): Measures time spent holding Python's Global Interpreter
Lock. Use this to identify which threads dominate GIL usage in multi-threaded applications.
* **Exception handling time** (``--mode exception``): Captures samples only from threads with
an active exception. Use this to analyze exception handling overhead.

* **Thread-aware profiling**: Option to profile all threads (``-a``) or just the main thread,
essential for understanding multi-threaded application behavior.
Expand Down Expand Up @@ -175,6 +177,10 @@ Key features include:
(``--async-aware``). See which coroutines are consuming time, with options to show only
running tasks or all tasks including those waiting.

* **Opcode-level profiling**: Gather bytecode opcode information for instruction-level
profiling (``--opcodes``). Shows which bytecode instructions are executing, including
specializations from the adaptive interpreter.

See :mod:`profiling.sampling` for the complete documentation, including all
available output formats, profiling modes, and configuration options.

Expand Down
12 changes: 12 additions & 0 deletions Include/internal/pycore_debug_offsets.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,15 @@ typedef struct _Py_DebugOffsets {
uint64_t status;
uint64_t holds_gil;
uint64_t gil_requested;
uint64_t current_exception;
uint64_t exc_state;
} thread_state;

// Exception stack item offset
struct {
uint64_t exc_value;
} err_stackitem;

// InterpreterFrame offset;
struct _interpreter_frame {
uint64_t size;
Expand Down Expand Up @@ -282,6 +289,11 @@ typedef struct _Py_DebugOffsets {
.status = offsetof(PyThreadState, _status), \
.holds_gil = offsetof(PyThreadState, holds_gil), \
.gil_requested = offsetof(PyThreadState, gil_requested), \
.current_exception = offsetof(PyThreadState, current_exception), \
.exc_state = offsetof(PyThreadState, exc_state), \
}, \
.err_stackitem = { \
.exc_value = offsetof(_PyErr_StackItem, exc_value), \
}, \
.interpreter_frame = { \
.size = sizeof(_PyInterpreterFrame), \
Expand Down
4 changes: 4 additions & 0 deletions Lib/profiling/sampling/_flamegraph_assets/flamegraph.js
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,10 @@ function populateThreadStats(data, selectedThreadId = null) {

const gcPctElem = document.getElementById('gc-pct');
if (gcPctElem) gcPctElem.textContent = `${(threadStats.gc_pct || 0).toFixed(1)}%`;

// Exception stats
const excPctElem = document.getElementById('exc-pct');
if (excPctElem) excPctElem.textContent = `${(threadStats.has_exception_pct || 0).toFixed(1)}%`;
}

// ============================================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ <h3 class="section-title">Runtime Stats</h3>
<div class="stat-tile-value" id="gc-pct">--</div>
<div class="stat-tile-label">GC</div>
</div>
<div class="stat-tile stat-tile--red" id="exc-stat">
<div class="stat-tile-value" id="exc-pct">--</div>
<div class="stat-tile-label">Exception</div>
</div>
</div>
</div>
</section>
Expand Down
8 changes: 6 additions & 2 deletions Lib/profiling/sampling/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
PROFILING_MODE_WALL,
PROFILING_MODE_CPU,
PROFILING_MODE_GIL,
PROFILING_MODE_EXCEPTION,
SORT_MODE_NSAMPLES,
SORT_MODE_TOTTIME,
SORT_MODE_CUMTIME,
Expand Down Expand Up @@ -90,6 +91,7 @@ def _parse_mode(mode_string):
"wall": PROFILING_MODE_WALL,
"cpu": PROFILING_MODE_CPU,
"gil": PROFILING_MODE_GIL,
"exception": PROFILING_MODE_EXCEPTION,
}
return mode_map[mode_string]

Expand Down Expand Up @@ -213,10 +215,12 @@ def _add_mode_options(parser):
mode_group = parser.add_argument_group("Mode options")
mode_group.add_argument(
"--mode",
choices=["wall", "cpu", "gil"],
choices=["wall", "cpu", "gil", "exception"],
default="wall",
help="Sampling mode: wall (all samples), cpu (only samples when thread is on CPU), "
"gil (only samples when thread holds the GIL). Incompatible with --async-aware",
"gil (only samples when thread holds the GIL), "
"exception (only samples when thread has an active exception). "
"Incompatible with --async-aware",
)
mode_group.add_argument(
"--async-mode",
Expand Down
9 changes: 8 additions & 1 deletion Lib/profiling/sampling/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
THREAD_STATUS_ON_CPU,
THREAD_STATUS_GIL_REQUESTED,
THREAD_STATUS_UNKNOWN,
THREAD_STATUS_HAS_EXCEPTION,
)

try:
Expand Down Expand Up @@ -170,7 +171,7 @@ def _collect_thread_status_stats(self, stack_frames):

Returns:
tuple: (aggregate_status_counts, has_gc_frame, per_thread_stats)
- aggregate_status_counts: dict with has_gil, on_cpu, etc.
- aggregate_status_counts: dict with has_gil, on_cpu, has_exception, etc.
- has_gc_frame: bool indicating if any thread has GC frames
- per_thread_stats: dict mapping thread_id to per-thread counts
"""
Expand All @@ -179,6 +180,7 @@ def _collect_thread_status_stats(self, stack_frames):
"on_cpu": 0,
"gil_requested": 0,
"unknown": 0,
"has_exception": 0,
"total": 0,
}
has_gc_frame = False
Expand All @@ -200,6 +202,8 @@ def _collect_thread_status_stats(self, stack_frames):
status_counts["gil_requested"] += 1
if status_flags & THREAD_STATUS_UNKNOWN:
status_counts["unknown"] += 1
if status_flags & THREAD_STATUS_HAS_EXCEPTION:
status_counts["has_exception"] += 1

# Track per-thread statistics
thread_id = getattr(thread_info, "thread_id", None)
Expand All @@ -210,6 +214,7 @@ def _collect_thread_status_stats(self, stack_frames):
"on_cpu": 0,
"gil_requested": 0,
"unknown": 0,
"has_exception": 0,
"total": 0,
"gc_samples": 0,
}
Expand All @@ -225,6 +230,8 @@ def _collect_thread_status_stats(self, stack_frames):
thread_stats["gil_requested"] += 1
if status_flags & THREAD_STATUS_UNKNOWN:
thread_stats["unknown"] += 1
if status_flags & THREAD_STATUS_HAS_EXCEPTION:
thread_stats["has_exception"] += 1

# Check for GC frames in this thread
frames = getattr(thread_info, "frame_info", None)
Expand Down
3 changes: 3 additions & 0 deletions Lib/profiling/sampling/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
PROFILING_MODE_CPU = 1
PROFILING_MODE_GIL = 2
PROFILING_MODE_ALL = 3 # Combines GIL + CPU checks
PROFILING_MODE_EXCEPTION = 4 # Only samples when thread has an active exception

# Sort mode constants
SORT_MODE_NSAMPLES = 0
Expand All @@ -25,10 +26,12 @@
THREAD_STATUS_ON_CPU,
THREAD_STATUS_UNKNOWN,
THREAD_STATUS_GIL_REQUESTED,
THREAD_STATUS_HAS_EXCEPTION,
)
except ImportError:
# Fallback for tests or when module is not available
THREAD_STATUS_HAS_GIL = (1 << 0)
THREAD_STATUS_ON_CPU = (1 << 1)
THREAD_STATUS_UNKNOWN = (1 << 2)
THREAD_STATUS_GIL_REQUESTED = (1 << 3)
THREAD_STATUS_HAS_EXCEPTION = (1 << 4)
16 changes: 15 additions & 1 deletion Lib/profiling/sampling/gecko_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
from .collector import Collector
from .opcode_utils import get_opcode_info, format_opcode
try:
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED, THREAD_STATUS_HAS_EXCEPTION
except ImportError:
# Fallback if module not available (shouldn't happen in normal use)
THREAD_STATUS_HAS_GIL = (1 << 0)
THREAD_STATUS_ON_CPU = (1 << 1)
THREAD_STATUS_UNKNOWN = (1 << 2)
THREAD_STATUS_GIL_REQUESTED = (1 << 3)
THREAD_STATUS_HAS_EXCEPTION = (1 << 4)


# Categories matching Firefox Profiler expectations
Expand All @@ -28,6 +29,7 @@
{"name": "CPU", "color": "purple", "subcategories": ["Other"]},
{"name": "Code Type", "color": "red", "subcategories": ["Other"]},
{"name": "Opcodes", "color": "magenta", "subcategories": ["Other"]},
{"name": "Exception", "color": "lightblue", "subcategories": ["Other"]},
]

# Category indices
Expand All @@ -39,6 +41,7 @@
CATEGORY_CPU = 5
CATEGORY_CODE_TYPE = 6
CATEGORY_OPCODES = 7
CATEGORY_EXCEPTION = 8

# Subcategory indices
DEFAULT_SUBCATEGORY = 0
Expand Down Expand Up @@ -88,6 +91,8 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, opcodes=False):
self.python_code_start = {} # Thread running Python code (has GIL)
self.native_code_start = {} # Thread running native code (on CPU without GIL)
self.gil_wait_start = {} # Thread waiting for GIL
self.exception_start = {} # Thread has an exception set
self.no_exception_start = {} # Thread has no exception set

# GC event tracking: track GC start time per thread
self.gc_start_per_thread = {} # tid -> start_time
Expand Down Expand Up @@ -204,6 +209,13 @@ def collect(self, stack_frames):
self._add_marker(tid, "Waiting for GIL", self.gil_wait_start.pop(tid),
current_time, CATEGORY_GIL)

# Track exception state (Has Exception / No Exception)
has_exception = bool(status_flags & THREAD_STATUS_HAS_EXCEPTION)
self._track_state_transition(
tid, has_exception, self.exception_start, self.no_exception_start,
"Has Exception", "No Exception", CATEGORY_EXCEPTION, current_time
)

# Track GC events by detecting <GC> frames in the stack trace
# This leverages the improved GC frame tracking from commit 336366fd7ca
# which precisely identifies the thread that initiated GC collection
Expand Down Expand Up @@ -622,6 +634,8 @@ def _finalize_markers(self):
(self.native_code_start, "Native Code", CATEGORY_CODE_TYPE),
(self.gil_wait_start, "Waiting for GIL", CATEGORY_GIL),
(self.gc_start_per_thread, "GC Collecting", CATEGORY_GC),
(self.exception_start, "Has Exception", CATEGORY_EXCEPTION),
(self.no_exception_start, "No Exception", CATEGORY_EXCEPTION),
]

for state_dict, marker_name, category in marker_states:
Expand Down
8 changes: 8 additions & 0 deletions Lib/profiling/sampling/live_collector/collector.py
Loading
Loading