gh-138122: Validate base frame before caching in remote debugging fra… · python/cpython@f31e71d · GitHub
Skip to content

Commit f31e71d

Browse files
committed
gh-138122: Validate base frame before caching in remote debugging frame cache
The frame cache in the remote debugging module was storing frame chains without validating that they reached the base frame. This could happen when a frame chain was interrupted or when the process state changed during reading, resulting in incomplete stacks being cached. Subsequent samples that hit the cache would then produce flamegraphs that didn't reach the bottom of the call stack. The fix passes base_frame_addr through to process_frame_chain() which already has validation logic to ensure the frame walk terminates at the expected sentinel frame. By enabling this validation in the caching code path and tracking whether we've confirmed reaching the base frame, we now only store complete frame chains in the cache. When extending from cached data, we trust that the cached frames were already validated at storage time, maintaining the invariant that cached stacks are always complete. An integration test using deeply nested generators that oscillate the stack depth is added to verify that all sampled stacks contain the entry point function. This catches regressions where incomplete stacks might be cached and returned.
1 parent 790a46a commit f31e71d

6 files changed

Lines changed: 131 additions & 11 deletions

File tree

Lib/test/test_profiling/test_sampling_profiler/test_integration.py

Lines changed: 95 additions & 0 deletions
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix incomplete stack traces in the Tachyon profiler's frame cache when
2+
profiling code with deeply nested generators. The frame cache now validates
3+
that stack traces reach the base frame before caching, preventing broken
4+
flamegraphs. Patch by Pablo Galindo.

Modules/_remote_debugging/_remote_debugging.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -447,18 +447,21 @@ extern int frame_cache_lookup_and_extend(
447447
Py_ssize_t *num_addrs,
448448
Py_ssize_t max_addrs);
449449
// Returns: 1 = stored, 0 = not stored (graceful), -1 = error
450+
// Only stores complete stacks that reach base_frame_addr
450451
extern int frame_cache_store(
451452
RemoteUnwinderObject *unwinder,
452453
uint64_t thread_id,
453454
PyObject *frame_list,
454455
const uintptr_t *addrs,
455-
Py_ssize_t num_addrs);
456+
Py_ssize_t num_addrs,
457+
uintptr_t base_frame_addr);
456458

457459
extern int collect_frames_with_cache(
458460
RemoteUnwinderObject *unwinder,
459461
uintptr_t frame_addr,
460462
StackChunkList *chunks,
461463
PyObject *frame_info,
464+
uintptr_t base_frame_addr,
462465
uintptr_t gc_frame,
463466
uintptr_t last_profiled_frame,
464467
uint64_t thread_id);

Modules/_remote_debugging/frame_cache.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,19 +194,33 @@ frame_cache_lookup_and_extend(
194194
}
195195

196196
// Store frame list with addresses in cache
197+
// Only stores complete stacks that reach base_frame_addr (validation done internally)
197198
// Returns: 1 = stored successfully, 0 = not stored (graceful degradation), -1 = error
198199
int
199200
frame_cache_store(
200201
RemoteUnwinderObject *unwinder,
201202
uint64_t thread_id,
202203
PyObject *frame_list,
203204
const uintptr_t *addrs,
204-
Py_ssize_t num_addrs)
205+
Py_ssize_t num_addrs,
206+
uintptr_t base_frame_addr)
205207
{
206208
if (!unwinder->frame_cache || thread_id == 0) {
207209
return 0;
208210
}
209211

212+
// Validate we have a complete stack before caching.
213+
// Only cache if the last frame address matches base_frame_addr (the sentinel
214+
// at the bottom of the stack). This prevents caching incomplete stacks that
215+
// would produce broken flamegraphs.
216+
if (base_frame_addr != 0 && num_addrs > 0) {
217+
uintptr_t last_frame_addr = addrs[num_addrs - 1];
218+
if (last_frame_addr != base_frame_addr) {
219+
// Incomplete stack - don't cache (graceful degradation)
220+
return 0;
221+
}
222+
}
223+
210224
// Clamp to max frames
211225
if (num_addrs > FRAME_CACHE_MAX_FRAMES) {
212226
num_addrs = FRAME_CACHE_MAX_FRAMES;

Modules/_remote_debugging/frames.c

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,7 @@ collect_frames_with_cache(
537537
uintptr_t frame_addr,
538538
StackChunkList *chunks,
539539
PyObject *frame_info,
540+
uintptr_t base_frame_addr,
540541
uintptr_t gc_frame,
541542
uintptr_t last_profiled_frame,
542543
uint64_t thread_id)
@@ -553,7 +554,7 @@ collect_frames_with_cache(
553554
Py_ssize_t frames_before = PyList_GET_SIZE(frame_info);
554555

555556
int stopped_at_cached = 0;
556-
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 0, gc_frame,
557+
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, base_frame_addr, gc_frame,
557558
last_profiled_frame, &stopped_at_cached,
558559
addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
559560
return -1;
@@ -575,23 +576,26 @@ collect_frames_with_cache(
575576
// Cache miss - continue walking from last_profiled_frame to get the rest
576577
STATS_INC(unwinder, frame_cache_misses);
577578
Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info);
578-
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, 0, gc_frame,
579+
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, base_frame_addr, gc_frame,
579580
0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
580581
return -1;
581582
}
582583
STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(frame_info) - frames_before_walk);
583584
} else {
584-
// Partial cache hit
585+
// Partial cache hit - cache was validated when stored, so we trust it
585586
STATS_INC(unwinder, frame_cache_partial_hits);
586587
STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(frame_info) - frames_before_cache);
587588
}
588-
} else if (last_profiled_frame == 0) {
589-
// No cache involvement (no last_profiled_frame or cache disabled)
590-
STATS_INC(unwinder, frame_cache_misses);
589+
} else {
590+
if (last_profiled_frame == 0) {
591+
// No cache involvement (no last_profiled_frame or cache disabled)
592+
STATS_INC(unwinder, frame_cache_misses);
593+
}
591594
}
592595

593-
// Store in cache (frame_cache_store handles truncation if num_addrs > FRAME_CACHE_MAX_FRAMES)
594-
if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs) < 0) {
596+
// Store in cache - frame_cache_store validates internally that we have a
597+
// complete stack (reached base_frame_addr) before actually storing
598+
if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs, base_frame_addr) < 0) {
595599
return -1;
596600
}
597601

Modules/_remote_debugging/threads.c

Lines changed: 1 addition & 1 deletion

0 commit comments

Comments
 (0)