gh-138122: Replace --interval with --sampling-rate by lkollar · Pull Request #143085 · python/cpython · GitHub
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 18 additions & 19 deletions Doc/library/profiling.sampling.rst
4 changes: 2 additions & 2 deletions Lib/profiling/sampling/_child_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
_CHILD_POLL_INTERVAL_SEC = 0.1

# Default timeout for waiting on child profilers
_DEFAULT_WAIT_TIMEOUT = 30.0
_DEFAULT_WAIT_TIMEOUT_SEC = 30.0

# Maximum number of child profilers to spawn (prevents resource exhaustion)
_MAX_CHILD_PROFILERS = 100
Expand Down Expand Up @@ -138,7 +138,7 @@ def spawned_profilers(self):
with self._lock:
return list(self._spawned_profilers)

def wait_for_profilers(self, timeout=_DEFAULT_WAIT_TIMEOUT):
def wait_for_profilers(self, timeout=_DEFAULT_WAIT_TIMEOUT_SEC):
"""
Wait for all spawned child profilers to complete.

Expand Down
8 changes: 4 additions & 4 deletions Lib/profiling/sampling/_sync_coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def _validate_arguments(args: List[str]) -> tuple[int, str, List[str]]:

# Constants for socket communication
_MAX_RETRIES = 3
_INITIAL_RETRY_DELAY = 0.1
_SOCKET_TIMEOUT = 2.0
_INITIAL_RETRY_DELAY_SEC = 0.1
_SOCKET_TIMEOUT_SEC = 2.0
_READY_MESSAGE = b"ready"


Expand All @@ -93,14 +93,14 @@ def _signal_readiness(sync_port: int) -> None:
for attempt in range(_MAX_RETRIES):
try:
# Use context manager for automatic cleanup
with socket.create_connection(("127.0.0.1", sync_port), timeout=_SOCKET_TIMEOUT) as sock:
with socket.create_connection(("127.0.0.1", sync_port), timeout=_SOCKET_TIMEOUT_SEC) as sock:
sock.send(_READY_MESSAGE)
return
except (socket.error, OSError) as e:
last_error = e
if attempt < _MAX_RETRIES - 1:
# Exponential backoff before retry
time.sleep(_INITIAL_RETRY_DELAY * (2 ** attempt))
time.sleep(_INITIAL_RETRY_DELAY_SEC * (2 ** attempt))

# If we get here, all retries failed
raise SyncError(f"Failed to signal readiness after {_MAX_RETRIES} attempts: {last_error}") from last_error
Expand Down
98 changes: 74 additions & 24 deletions Lib/profiling/sampling/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import importlib.util
import locale
import os
import re
import selectors
import socket
import subprocess
Expand All @@ -20,6 +21,7 @@
from .binary_collector import BinaryCollector
from .binary_reader import BinaryReader
from .constants import (
MICROSECONDS_PER_SECOND,
PROFILING_MODE_ALL,
PROFILING_MODE_WALL,
PROFILING_MODE_CPU,
Expand Down Expand Up @@ -66,8 +68,8 @@ class CustomFormatter(


# Constants for socket synchronization
_SYNC_TIMEOUT = 5.0
_PROCESS_KILL_TIMEOUT = 2.0
_SYNC_TIMEOUT_SEC = 5.0
_PROCESS_KILL_TIMEOUT_SEC = 2.0
_READY_MESSAGE = b"ready"
_RECV_BUFFER_SIZE = 1024

Expand Down Expand Up @@ -116,7 +118,8 @@ def _build_child_profiler_args(args):
child_args = []

# Sampling options
child_args.extend(["-i", str(args.interval)])
hz = MICROSECONDS_PER_SECOND // args.sample_interval_usec
child_args.extend(["-r", str(hz)])
child_args.extend(["-d", str(args.duration)])

if args.all_threads:
Expand Down Expand Up @@ -239,7 +242,7 @@ def _run_with_sync(original_cmd, suppress_output=False):
sync_sock.bind(("127.0.0.1", 0)) # Let OS choose a free port
sync_port = sync_sock.getsockname()[1]
sync_sock.listen(1)
sync_sock.settimeout(_SYNC_TIMEOUT)
sync_sock.settimeout(_SYNC_TIMEOUT_SEC)

# Get current working directory to preserve it
cwd = os.getcwd()
Expand Down Expand Up @@ -268,7 +271,7 @@ def _run_with_sync(original_cmd, suppress_output=False):
process = subprocess.Popen(cmd, **popen_kwargs)

try:
_wait_for_ready_signal(sync_sock, process, _SYNC_TIMEOUT)
_wait_for_ready_signal(sync_sock, process, _SYNC_TIMEOUT_SEC)

# Close stderr pipe if we were capturing it
if process.stderr:
Expand All @@ -279,7 +282,7 @@ def _run_with_sync(original_cmd, suppress_output=False):
if process.poll() is None:
process.terminate()
try:
process.wait(timeout=_PROCESS_KILL_TIMEOUT)
process.wait(timeout=_PROCESS_KILL_TIMEOUT_SEC)
except subprocess.TimeoutExpired:
process.kill()
process.wait()
Expand All @@ -290,16 +293,64 @@ def _run_with_sync(original_cmd, suppress_output=False):
return process


_RATE_PATTERN = re.compile(r'''
^ # Start of string
( # Group 1: The numeric value
\d+ # One or more digits (integer part)
(?:\.\d+)? # Optional: decimal point followed by digits
) # Examples: "10", "0.5", "100.25"
( # Group 2: Optional unit suffix
hz # "hz" - hertz
| khz # "khz" - kilohertz
| k # "k" - shorthand for kilohertz
)? # Suffix is optional (bare number = Hz)
$ # End of string
''', re.VERBOSE | re.IGNORECASE)


def _parse_sampling_rate(rate_str: str) -> int:
"""Parse sampling rate string to microseconds."""
rate_str = rate_str.strip().lower()

match = _RATE_PATTERN.match(rate_str)
if not match:
raise argparse.ArgumentTypeError(

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Let's add a hint about spaces in the error message since "10 khz" (with space) is rejected but users might try it

f"Invalid sampling rate format: {rate_str}. "
"Expected: number followed by optional suffix (hz, khz, k) with no spaces (e.g., 10khz)"
)

number_part = match.group(1)
suffix = match.group(2) or ''

# Determine multiplier based on suffix
suffix_map = {
'hz': 1,
'khz': 1000,
'k': 1000,
}
multiplier = suffix_map.get(suffix, 1)
hz = float(number_part) * multiplier
if hz <= 0:
raise argparse.ArgumentTypeError(f"Sampling rate must be positive: {rate_str}")

interval_usec = int(MICROSECONDS_PER_SECOND / hz)
if interval_usec < 1:
raise argparse.ArgumentTypeError(f"Sampling rate too high: {rate_str}")

return interval_usec


def _add_sampling_options(parser):
"""Add sampling configuration options to a parser."""
sampling_group = parser.add_argument_group("Sampling configuration")
sampling_group.add_argument(
"-i",
"--interval",
type=int,
default=100,
metavar="MICROSECONDS",
help="sampling interval",
"-r",
"--sampling-rate",
type=_parse_sampling_rate,

@pablogsal pablogsal Dec 23, 2025

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: The argument is named sampling_rate but after parsing it stores the interval in microseconds, not the rate in Hz. This is kind of confusing in the rest of the code

default="1khz",
metavar="RATE",
dest="sample_interval_usec",
help="sampling rate (e.g., 10000, 10khz, 10k)",
)
sampling_group.add_argument(
"-d",
Expand Down Expand Up @@ -487,14 +538,13 @@ def _sort_to_mode(sort_choice):
}
return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)


def _create_collector(format_type, interval, skip_idle, opcodes=False,
def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=False,
output_file=None, compression='auto'):
"""Create the appropriate collector based on format type.

Args:
format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap', 'binary')
interval: Sampling interval in microseconds
sample_interval_usec: Sampling interval in microseconds
skip_idle: Whether to skip idle samples
opcodes: Whether to collect opcode information (only used by gecko format
for creating interval markers in Firefox Profiler)
Expand All @@ -519,9 +569,9 @@ def _create_collector(format_type, interval, skip_idle, opcodes=False,
# and is the only format that uses opcodes for interval markers
if format_type == "gecko":
skip_idle = False
return collector_class(interval, skip_idle=skip_idle, opcodes=opcodes)
return collector_class(sample_interval_usec, skip_idle=skip_idle, opcodes=opcodes)

return collector_class(interval, skip_idle=skip_idle)
return collector_class(sample_interval_usec, skip_idle=skip_idle)


def _generate_output_filename(format_type, pid):
Expand Down Expand Up @@ -725,8 +775,8 @@ def _main():
# Generate flamegraph from a script
`python -m profiling.sampling run --flamegraph -o output.html script.py`

# Profile with custom interval and duration
`python -m profiling.sampling run -i 50 -d 30 script.py`
# Profile with custom rate and duration
`python -m profiling.sampling run -r 5khz -d 30 script.py`

# Save collapsed stacks to file
`python -m profiling.sampling run --collapsed -o stacks.txt script.py`
Expand Down Expand Up @@ -860,7 +910,7 @@ def _handle_attach(args):

# Create the appropriate collector
collector = _create_collector(
args.format, args.interval, skip_idle, args.opcodes,
args.format, args.sample_interval_usec, skip_idle, args.opcodes,
output_file=output_file,
compression=getattr(args, 'compression', 'auto')
)
Expand Down Expand Up @@ -938,7 +988,7 @@ def _handle_run(args):

# Create the appropriate collector
collector = _create_collector(
args.format, args.interval, skip_idle, args.opcodes,
args.format, args.sample_interval_usec, skip_idle, args.opcodes,
output_file=output_file,
compression=getattr(args, 'compression', 'auto')
)
Expand All @@ -965,7 +1015,7 @@ def _handle_run(args):
if process.poll() is None:
process.terminate()
try:
process.wait(timeout=_PROCESS_KILL_TIMEOUT)
process.wait(timeout=_PROCESS_KILL_TIMEOUT_SEC)
except subprocess.TimeoutExpired:
process.kill()
process.wait()
Expand All @@ -980,7 +1030,7 @@ def _handle_live_attach(args, pid):

# Create live collector with default settings
collector = LiveStatsCollector(
args.interval,
args.sample_interval_usec,
skip_idle=skip_idle,
sort_by="tottime", # Default initial sort
limit=20, # Default limit
Expand Down Expand Up @@ -1027,7 +1077,7 @@ def _handle_live_run(args):

# Create live collector with default settings
collector = LiveStatsCollector(
args.interval,
args.sample_interval_usec,
skip_idle=skip_idle,
sort_by="tottime", # Default initial sort
limit=20, # Default limit
Expand Down
4 changes: 4 additions & 0 deletions Lib/profiling/sampling/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
"""Constants for the sampling profiler."""

# Time unit conversion constants
MICROSECONDS_PER_SECOND = 1_000_000
MILLISECONDS_PER_SECOND = 1_000

# Profiling mode constants
PROFILING_MODE_WALL = 0
PROFILING_MODE_CPU = 1
Expand Down
4 changes: 2 additions & 2 deletions Lib/profiling/sampling/live_collector/__init__.py
Loading
Loading