From ded8366d97e5bab8a22ca52fda949df74935599d Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 6 Feb 2026 21:46:09 +0000 Subject: [PATCH 1/5] gh-144438: Fix false sharing between QSBR and tlbc_index Align the QSBR thread state array to a 64-byte cache line boundary and add padding at the end of _PyThreadStateImpl. Depending on heap layout, the QSBR array could end up sharing a cache line with a thread's tlbc_index, causing QSBR quiescent state updates to contend with reads of tlbc_index in RESUME_CHECK. This is sensitive to earlier allocations during interpreter init and can appear or disappear with seemingly unrelated changes. Either change alone is sufficient to fix the specific issue, but both are worthwhile to avoid similar problems in the future. --- Include/internal/pycore_qsbr.h | 3 ++- Include/internal/pycore_tstate.h | 6 ++++++ ...2-06-21-45-52.gh-issue-144438.GI_uB1LR.rst | 2 ++ Python/qsbr.c | 19 +++++++++++++------ 4 files changed, 23 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-02-06-21-45-52.gh-issue-144438.GI_uB1LR.rst diff --git a/Include/internal/pycore_qsbr.h b/Include/internal/pycore_qsbr.h index 1f9b3fcf777493..eeca6fc472be37 100644 --- a/Include/internal/pycore_qsbr.h +++ b/Include/internal/pycore_qsbr.h @@ -83,8 +83,9 @@ struct _qsbr_shared { // Minimum observed read sequence of all QSBR thread states uint64_t rd_seq; - // Array of QSBR thread states. + // Array of QSBR thread states (aligned to 64 bytes). struct _qsbr_pad *array; + void *array_raw; // raw allocation pointer (for free) Py_ssize_t size; // Freelist of unused _qsbr_thread_states (protected by mutex) diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 64b90710b8e664..eb2b0c84acdc7c 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -102,6 +102,12 @@ typedef struct _PyThreadStateImpl { #if _Py_TIER2 struct _PyJitTracerState *jit_tracer_state; #endif + +#ifdef Py_GIL_DISABLED + // gh-144438: Add padding to ensure that the fields above don't share a + // cache line with other allocations. + char __padding[64]; +#endif } _PyThreadStateImpl; #ifdef __cplusplus diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-06-21-45-52.gh-issue-144438.GI_uB1LR.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-06-21-45-52.gh-issue-144438.GI_uB1LR.rst new file mode 100644 index 00000000000000..1b19bbc7972d62 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-06-21-45-52.gh-issue-144438.GI_uB1LR.rst @@ -0,0 +1,2 @@ +Align the QSBR thread state array to a 64-byte cache line boundary to +avoid false sharing in the free-threaded build. diff --git a/Python/qsbr.c b/Python/qsbr.c index 6bf5b75f346690..e04ef95c3f852a 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -85,22 +85,28 @@ grow_thread_array(struct _qsbr_shared *shared) new_size = MIN_ARRAY_SIZE; } - struct _qsbr_pad *array = PyMem_RawCalloc(new_size, sizeof(*array)); - if (array == NULL) { + // Overallocate by 63 bytes so we can align to a 64-byte boundary. + // This avoids potential false sharing between the first entry and other + // allocations. + size_t alloc_size = (size_t)new_size * sizeof(struct _qsbr_pad) + 63; + void *raw = PyMem_RawCalloc(1, alloc_size); + if (raw == NULL) { return -1; } + struct _qsbr_pad *array = (struct _qsbr_pad *)(((uintptr_t)raw + 63) & ~(uintptr_t)63); - struct _qsbr_pad *old = shared->array; - if (old != NULL) { + void *old_raw = shared->array_raw; + if (shared->array != NULL) { memcpy(array, shared->array, shared->size * sizeof(*array)); } shared->array = array; + shared->array_raw = raw; shared->size = new_size; shared->freelist = NULL; initialize_new_array(shared); - PyMem_RawFree(old); + PyMem_RawFree(old_raw); return 0; } @@ -257,8 +263,9 @@ void _Py_qsbr_fini(PyInterpreterState *interp) { struct _qsbr_shared *shared = &interp->qsbr; - PyMem_RawFree(shared->array); + PyMem_RawFree(shared->array_raw); shared->array = NULL; + shared->array_raw = NULL; shared->size = 0; shared->freelist = NULL; } From 866272835a8490762bb6c3ee489699cf4d343df3 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 6 Feb 2026 22:25:31 +0000 Subject: [PATCH 2/5] Use _Py_ALIGN_UP in grow_thread_array --- Python/qsbr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/qsbr.c b/Python/qsbr.c index e04ef95c3f852a..22ba77b88a944a 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -93,7 +93,7 @@ grow_thread_array(struct _qsbr_shared *shared) if (raw == NULL) { return -1; } - struct _qsbr_pad *array = (struct _qsbr_pad *)(((uintptr_t)raw + 63) & ~(uintptr_t)63); + struct _qsbr_pad *array = _Py_ALIGN_UP(raw, 64); void *old_raw = shared->array_raw; if (shared->array != NULL) { From 60daa681787efcd2dab5e5dce3578ab1bba26ab3 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sat, 7 Feb 2026 11:36:00 -0500 Subject: [PATCH 3/5] Update Python/qsbr.c Co-authored-by: Kumar Aditya --- Python/qsbr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Python/qsbr.c b/Python/qsbr.c index 22ba77b88a944a..7bac6e664c30af 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -88,12 +88,13 @@ grow_thread_array(struct _qsbr_shared *shared) // Overallocate by 63 bytes so we can align to a 64-byte boundary. // This avoids potential false sharing between the first entry and other // allocations. - size_t alloc_size = (size_t)new_size * sizeof(struct _qsbr_pad) + 63; + size_t alignment = 64; + size_t alloc_size = (size_t)new_size * sizeof(struct _qsbr_pad) + alignment - 1; void *raw = PyMem_RawCalloc(1, alloc_size); if (raw == NULL) { - return -1; - } - struct _qsbr_pad *array = _Py_ALIGN_UP(raw, 64); +return -1; +} + struct _qsbr_pad *array = _Py_ALIGN_UP(raw, alignment); void *old_raw = shared->array_raw; if (shared->array != NULL) { From d29fd8ec83a51416523b05b20ca47549210587b7 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sat, 7 Feb 2026 11:39:13 -0500 Subject: [PATCH 4/5] Fix indentation in qsbr.c --- Python/qsbr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/qsbr.c b/Python/qsbr.c index 7bac6e664c30af..e9d935bfb40d84 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -92,8 +92,8 @@ grow_thread_array(struct _qsbr_shared *shared) size_t alloc_size = (size_t)new_size * sizeof(struct _qsbr_pad) + alignment - 1; void *raw = PyMem_RawCalloc(1, alloc_size); if (raw == NULL) { -return -1; -} + return -1; + } struct _qsbr_pad *array = _Py_ALIGN_UP(raw, alignment); void *old_raw = shared->array_raw; From db5f73e261a92779192297e209bfd1f1893e9934 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sat, 7 Feb 2026 15:54:48 -0500 Subject: [PATCH 5/5] Update Misc/NEWS.d/next/Core_and_Builtins/2026-02-06-21-45-52.gh-issue-144438.GI_uB1LR.rst Co-authored-by: Kumar Aditya --- .../2026-02-06-21-45-52.gh-issue-144438.GI_uB1LR.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-06-21-45-52.gh-issue-144438.GI_uB1LR.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-06-21-45-52.gh-issue-144438.GI_uB1LR.rst index 1b19bbc7972d62..3e33e461ae8b5a 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-06-21-45-52.gh-issue-144438.GI_uB1LR.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-06-21-45-52.gh-issue-144438.GI_uB1LR.rst @@ -1,2 +1,2 @@ Align the QSBR thread state array to a 64-byte cache line boundary to -avoid false sharing in the free-threaded build. +avoid false sharing in the :term:`free-threaded build`.