From 94a0e28faebdd771e161a441e56fda6dff54c817 Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sat, 16 May 2026 18:14:02 +0800 Subject: [PATCH 1/5] fix: Fix race condition in elementtree with free-threading --- Lib/test/test_xml_etree_c.py | 47 +++++++++++++++++++++++ Modules/_elementtree.c | 72 ++++++++++++++++++++++++++---------- 2 files changed, 100 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index 270b9d6da8e7b9..78c0ec62784bde 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -3,8 +3,10 @@ import struct from test import support from test.support.import_helper import import_fresh_module +from test.support import threading_helper import types import unittest +import threading cET = import_fresh_module('xml.etree.ElementTree', fresh=['_elementtree']) @@ -256,6 +258,51 @@ def test_element_with_children(self): self.check_sizeof(e, self.elementsize + self.extra + struct.calcsize('8P')) +@unittest.skipUnless(cET, 'requires _elementtree') +@threading_helper.requires_working_threading() +class TestElementTreeFreeThreading(unittest.TestCase): + def test_element_extra_race(self): + """Race len(), .attrib, and .clear() to verify fix for gh-149861.""" + root = cET.Element('root') + children = [cET.Element(f'child-{i}') for i in range(5)] + + stop_event = threading.Event() + + def reader_task(): + while not stop_event.is_set(): + # Test element_length + len(root) + # Test element_get_attrib + try: + _ = root.attrib + except AttributeError: + # In a race where clear() just ran, this is expected + # because of the PyErr_SetString we added in C. + pass + + def writer_task(): + while not stop_event.is_set(): + # Test element_add_subelement / extend + root.extend(children) + # Test clear_extra + root.clear() + + # Create a mix of readers and writers + threads = [] + for _ in range(4): + threads.append(threading.Thread(target=reader_task)) + for _ in range(2): + threads.append(threading.Thread(target=writer_task)) + + with threading_helper.start_threads(threads): + # Stress the race for a short duration. + # In CI, 0.5 to 1.0 seconds is usually enough to catch races + # without slowing down the test suite too much. + import time + time.sleep(1.0) + stop_event.set() + + def install_tests(): # Test classes should have __module__ referring to this module. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 9e794be5c109ba..6a4367d88f6bb7 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -271,19 +271,31 @@ typedef struct { LOCAL(int) create_extra(ElementObject* self, PyObject* attrib) { - self->extra = PyMem_Malloc(sizeof(ElementObjectExtra)); - if (!self->extra) { + int res; + Py_BEGIN_CRITICAL_SECTION(self); + + if (self->extra != NULL) { + res = 0; + goto end; + } + ElementObjectExtra* extra = PyMem_Malloc(sizeof(ElementObjectExtra)); + if (!extra) { PyErr_NoMemory(); - return -1; + res = -1; + goto end; } - self->extra->attrib = Py_XNewRef(attrib); + extra->attrib = Py_XNewRef(attrib); - self->extra->length = 0; - self->extra->allocated = STATIC_CHILDREN; - self->extra->children = self->extra->_children; + extra->length = 0; + extra->allocated = STATIC_CHILDREN; + extra->children = extra->_children; + self->extra = extra; + res = 0; - return 0; + end: + Py_END_CRITICAL_SECTION(); + return res; } LOCAL(void) @@ -297,7 +309,7 @@ dealloc_extra(ElementObjectExtra *extra) Py_XDECREF(extra->attrib); for (i = 0; i < extra->length; i++) - Py_DECREF(extra->children[i]); + Py_XDECREF(extra->children[i]); if (extra->children != extra->_children) { PyMem_Free(extra->children); @@ -311,15 +323,16 @@ clear_extra(ElementObject* self) { ElementObjectExtra *myextra; - if (!self->extra) - return; + Py_BEGIN_CRITICAL_SECTION(self); /* Avoid DECREFs calling into this code again (cycles, etc.) */ myextra = self->extra; self->extra = NULL; - - dealloc_extra(myextra); + Py_END_CRITICAL_SECTION(); + if (myextra) { + dealloc_extra(myextra); + } } /* Convenience internal function to create new Element objects with the given @@ -544,6 +557,7 @@ element_add_subelement(elementtreestate *st, ElementObject *self, return -1; } + Py_BEGIN_CRITICAL_SECTION(self); if (element_resize(self, 1) < 0) return -1; @@ -551,6 +565,7 @@ element_add_subelement(elementtreestate *st, ElementObject *self, self->extra->length++; + Py_END_CRITICAL_SECTION(); return 0; } @@ -560,13 +575,24 @@ element_get_attrib(ElementObject* self) /* return borrowed reference to attrib dictionary */ /* note: this function assumes that the extra section exists */ - PyObject* res = self->extra->attrib; + PyObject *res = NULL; + Py_BEGIN_CRITICAL_SECTION(self); + if (self->extra == NULL) { + PyErr_SetString(PyExc_AttributeError, "extra section does not exist"); + goto end; + } + res = self->extra->attrib; if (!res) { /* create missing dictionary */ - res = self->extra->attrib = PyDict_New(); + res = PyDict_New(); + if (res) { + self->extra->attrib = res; + } } - + Py_XINCREF(res); + end: + Py_END_CRITICAL_SECTION(); return res; } @@ -667,6 +693,7 @@ element_gc_traverse(PyObject *op, visitproc visit, void *arg) Py_VISIT(JOIN_OBJ(self->text)); Py_VISIT(JOIN_OBJ(self->tail)); + Py_BEGIN_CRITICAL_SECTION(self); if (self->extra) { Py_ssize_t i; Py_VISIT(self->extra->attrib); @@ -674,6 +701,7 @@ element_gc_traverse(PyObject *op, visitproc visit, void *arg) for (i = 0; i < self->extra->length; ++i) Py_VISIT(self->extra->children[i]); } + Py_END_CRITICAL_SECTION(); return 0; } @@ -681,6 +709,7 @@ static int element_gc_clear(PyObject *op) { ElementObject *self = _Element_CAST(op); + Py_BEGIN_CRITICAL_SECTION(self); Py_CLEAR(self->tag); _clear_joined_ptr(&self->text); _clear_joined_ptr(&self->tail); @@ -689,6 +718,7 @@ element_gc_clear(PyObject *op) * so fully deallocate it. */ clear_extra(self); + Py_END_CRITICAL_SECTION(); return 0; } @@ -1625,10 +1655,14 @@ static Py_ssize_t element_length(PyObject *op) { ElementObject *self = _Element_CAST(op); - if (!self->extra) - return 0; + Py_ssize_t res = 0; + Py_BEGIN_CRITICAL_SECTION(self); + if (self->extra) { + res = self->extra->length; + } + Py_END_CRITICAL_SECTION(); - return self->extra->length; + return res; } /*[clinic input] From b3cad8bb914a5b281052858cb0d92b0f54a09161 Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sat, 16 May 2026 18:23:17 +0800 Subject: [PATCH 2/5] clear comments --- Lib/test/test_xml_etree_c.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index 78c0ec62784bde..dedb1f217ca8ae 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -262,7 +262,7 @@ def test_element_with_children(self): @threading_helper.requires_working_threading() class TestElementTreeFreeThreading(unittest.TestCase): def test_element_extra_race(self): - """Race len(), .attrib, and .clear() to verify fix for gh-149861.""" + #Race len(), .attrib, and .clear() to verify fix for gh-149861. root = cET.Element('root') children = [cET.Element(f'child-{i}') for i in range(5)] @@ -270,14 +270,12 @@ def test_element_extra_race(self): def reader_task(): while not stop_event.is_set(): - # Test element_length len(root) - # Test element_get_attrib try: _ = root.attrib except AttributeError: # In a race where clear() just ran, this is expected - # because of the PyErr_SetString we added in C. + # because of the PyErr_SetString is added in C. pass def writer_task(): @@ -287,7 +285,6 @@ def writer_task(): # Test clear_extra root.clear() - # Create a mix of readers and writers threads = [] for _ in range(4): threads.append(threading.Thread(target=reader_task)) @@ -295,9 +292,6 @@ def writer_task(): threads.append(threading.Thread(target=writer_task)) with threading_helper.start_threads(threads): - # Stress the race for a short duration. - # In CI, 0.5 to 1.0 seconds is usually enough to catch races - # without slowing down the test suite too much. import time time.sleep(1.0) stop_event.set() From 1dc4f3696b15ff5b03c7c8acc396a23130b2ef26 Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sat, 16 May 2026 18:50:03 +0800 Subject: [PATCH 3/5] fix --- Modules/_elementtree.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 6a4367d88f6bb7..4a28844d4c6360 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -572,7 +572,7 @@ element_add_subelement(elementtreestate *st, ElementObject *self, LOCAL(PyObject*) element_get_attrib(ElementObject* self) { - /* return borrowed reference to attrib dictionary */ + /* return new reference to attrib dictionary */ /* note: this function assumes that the extra section exists */ PyObject *res = NULL; @@ -1800,9 +1800,12 @@ _elementtree_Element_set_impl(ElementObject *self, PyObject *key, if (!attrib) return NULL; - if (PyDict_SetItem(attrib, key, value) < 0) + if (PyDict_SetItem(attrib, key, value) < 0) { + Py_DECREF(attrib); return NULL; + } + Py_DECREF(attrib); Py_RETURN_NONE; } @@ -2113,12 +2116,16 @@ element_attrib_getter(PyObject *op, void *closure) { PyObject *res; ElementObject *self = _Element_CAST(op); + Py_BEGIN_CRITICAL_SECTION(self); if (!self->extra) { if (create_extra(self, NULL) < 0) - return NULL; + goto end; } res = element_get_attrib(self); - return Py_XNewRef(res); + + end: + Py_END_CRITICAL_SECTION(); + return res; } /* macro for setter validation */ From 4e1605782a39c45bc5a62401be1c08e5b8533628 Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sat, 16 May 2026 19:09:08 +0800 Subject: [PATCH 4/5] fix CI tests --- Modules/_elementtree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 4a28844d4c6360..d68c3ff8aea82e 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -293,7 +293,7 @@ create_extra(ElementObject* self, PyObject* attrib) self->extra = extra; res = 0; - end: + end: ; Py_END_CRITICAL_SECTION(); return res; } @@ -591,7 +591,7 @@ element_get_attrib(ElementObject* self) } } Py_XINCREF(res); - end: + end: ; Py_END_CRITICAL_SECTION(); return res; } @@ -2114,7 +2114,7 @@ element_tail_getter(PyObject *op, void *closure) static PyObject* element_attrib_getter(PyObject *op, void *closure) { - PyObject *res; + PyObject *res = NULL; ElementObject *self = _Element_CAST(op); Py_BEGIN_CRITICAL_SECTION(self); if (!self->extra) { @@ -2123,7 +2123,7 @@ element_attrib_getter(PyObject *op, void *closure) } res = element_get_attrib(self); - end: + end: ; Py_END_CRITICAL_SECTION(); return res; } From 6d8b5599ddc53558bb68497e1d4f5934d17a5c0f Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sat, 16 May 2026 19:14:07 +0800 Subject: [PATCH 5/5] add news --- .../Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst diff --git a/Misc/NEWS.d/next/Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst b/Misc/NEWS.d/next/Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst new file mode 100644 index 00000000000000..3f98c33a329303 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst @@ -0,0 +1,4 @@ +Fixes race conditions in :mod:`xml.etree.ElementTree` where the ``extra`` +pointer dereference and ``attrib`` access was unsynchronized . This prevents +potential crashes (core dumps) in the free-threading build caused by +concurrent modification of an element's internal structure.