Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/cibuildwheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ jobs:
contents: write
actions: read
with:
pure-python: false
wheel-name-pattern: 'cymem-*.whl'
secrets:
gh-token: ${{ secrets.GITHUB_TOKEN }}

2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python_version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
python_version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14", "3.14t"]
exclude:
# The 3.9/3.10 arm64 Python binaries that `setup-python` tries to use
# don't work on macOS 15 anymore, so skip those.
Expand Down
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,28 @@ objects, but we'd still like the laziness of the `Pool`.
from cymem.cymem cimport Pool, WrapMalloc, WrapFree
cdef Pool mem = Pool(WrapMalloc(priv_malloc), WrapFree(priv_free))
```

## Thread Safety

As of version 2.0.12, `cymem.Pool` is thread-safe when used with CPython 3.13+
free-threaded builds (PEP 703). All operations on the Pool, including `alloc()`,
`free()`, and `realloc()`, can be safely called from multiple threads concurrently.

**Key guarantees:**
- Multiple threads can safely call `alloc()`, `free()`, and `realloc()` on the
same `Pool` instance.
- The Pool's internal bookkeeping (`addresses` dict and `size` accounting) is
protected from race conditions. Reading the internal state without
holding a lock on the `Pool` instance via a critical section is not
thread-safe.

**Important notes:**
- Individual Pool instances are thread-safe, but you are still responsible for
proper synchronization when accessing the memory contents themselves. Note
that holding a lock on the Pool itself is typically not the right approach:
since malloc is thread-safe, memory allocated by separate calls to `alloc`
can be safely accessed concurrently without locking. You should only
synchronize access to specific memory regions that are being shared across
threads, using fine-grained locks appropriate to your use case rather than a
coarse-grained lock on the entire `Pool`.
- Custom memory allocators need to be thread-safe themselves.
78 changes: 60 additions & 18 deletions cymem/cymem.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# cython: embedsignature=True
# cython: embedsignature=True, freethreading_compatible=True

cimport cython
from cpython.mem cimport PyMem_Malloc, PyMem_Free
from cpython.ref cimport Py_INCREF, Py_DECREF
from libc.string cimport memset
from libc.string cimport memcpy
import warnings
Expand Down Expand Up @@ -44,17 +44,29 @@ cdef class Pool:
addresses (dict): The currently allocated addresses and their sizes. Read-only.
pymalloc (PyMalloc): The allocator to use (default uses PyMem_Malloc).
pyfree (PyFree): The free to use (default uses PyMem_Free).

Thread-safety:
All public methods in this class can be safely called from multiple threads.
Testing the thread-safety of this class can be done by checking out the repository
at https://github.com/lysnikolaou/test-cymem-threadsafety and following the
instructions on the README.
"""

def __cinit__(self, PyMalloc pymalloc=Default_Malloc,
PyFree pyfree=Default_Free):
# size, addresses and refs are mutable. note that operations on
# dicts and lists are atomic.
self.size = 0
self.addresses = {}
self.refs = []

# pymalloc and pyfree are immutable.
self.pymalloc = pymalloc
self.pyfree = pyfree

def __dealloc__(self):
# No need for locking here since the methods will be unreachable
# by the time __dealloc__ is called
cdef size_t addr
if self.addresses is not None:
for addr in self.addresses:
Expand All @@ -73,28 +85,54 @@ cdef class Pool:
if p == NULL:
raise MemoryError("Error assigning %d bytes" % (number * elem_size))
memset(p, 0, number * elem_size)
self.addresses[<size_t>p] = number * elem_size
self.size += number * elem_size

# We need a critical section here so that addresses and size get
# updated atomically. If we were to acquire a critical section on self,
# mutating the dictionary would try to acquire a critical section on
# the dictionary and therefore release the critical section on self.
# Acquiring a critical section on self.addresses works because that's
# the only C API that gets called inside the block and acquiring the
# critical section on the top-most held lock does not release it.
with cython.critical_section(self.addresses):
self.addresses[<size_t>p] = number * elem_size
self.size += number * elem_size
return p

cdef void* realloc(self, void* p, size_t new_size) except NULL:
"""Resizes the memory block pointed to by p to new_size bytes, returning
a non-NULL pointer to the new block. new_size must be larger than the
original.

If p is not in the Pool or new_size is 0, a MemoryError is raised.
If p is not in the Pool or new_size isn't larger than the previous size,
a ValueError is raised.
"""
if <size_t>p not in self.addresses:
raise ValueError("Pointer %d not found in Pool %s" % (<size_t>p, self.addresses))
if new_size == 0:
raise ValueError("Realloc requires new_size > 0")
assert new_size > self.addresses[<size_t>p]
cdef void* new_ptr = self.alloc(1, new_size)
cdef size_t old_size
cdef void* new_ptr

new_ptr = self.pymalloc.malloc(new_size)
if new_ptr == NULL:
raise MemoryError("Error reallocating to %d bytes" % new_size)
memcpy(new_ptr, p, self.addresses[<size_t>p])
self.free(p)
self.addresses[<size_t>new_ptr] = new_size

# See comment in alloc on why we're acquiring a critical section on
# self.addresses instead of self.
with cython.critical_section(self.addresses):
try:
old_size = self.addresses.pop(<size_t>p)
except KeyError:
self.pyfree.free(new_ptr)
raise ValueError("Pointer %d not found in Pool %s" % (<size_t>p, self.addresses))

if old_size >= new_size:
self.addresses[<size_t>p] = old_size
self.pyfree.free(new_ptr)
raise ValueError("Realloc requires new_size > previous size")

memcpy(new_ptr, p, old_size)
memset(<char*> new_ptr + old_size, 0, new_size - old_size)
self.size += new_size - old_size
self.addresses[<size_t>new_ptr] = new_size

self.pyfree.free(p)
return new_ptr

cdef void free(self, void* p) except *:
Expand All @@ -105,10 +143,14 @@ cdef class Pool:

If p is not in Pool.addresses, a KeyError is raised.
"""
self.size -= self.addresses.pop(<size_t>p)
# See comment in alloc on why we're acquiring a critical section on
# self.addresses instead of self.
with cython.critical_section(self.addresses):
self.size -= <size_t>self.addresses.pop(<size_t>p)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would still be better to not use augmented subtraction here as I explained on call because of time-of-access vs time-of-use issue

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds like something I'd like to be aware of going forward. Is there a link I could read? In a perfect world is there something Cython could support better here?

Copy link
Contributor Author

@lysnikolaou lysnikolaou Nov 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would still be better to not use augmented subtraction here as I explained on call because of time-of-access vs time-of-use issue

The cast to size_t eliminated that problem though, no?

That sounds like something I'd like to be aware of going forward.

The problem mostly relates to Cython calling PyNumber_InPlaceSubtract here, so it does the following:

  • Construct a long-object out of self.size
  • Call pop
  • Call in-place subtract
  • Save the result back into self.size

So there's a lot of stuff happening between self.size being accessed and self.size being used. In general, checking the generated C code and making sure that it avois C API calls and does the expected thing is the way to go. For this specific example, Cython maybe should have done the subtraction in C directly, since size is defined as a size_t. The problem is that self.addresses.pop returns a PyObject * that can be pretty much anything. I'll open an upstream issue with that question.

Copy link

@kumaraditya303 kumaraditya303 Nov 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cast to size_t eliminated that problem though, no?

Yes, but it seems like a Cython specific hack to me. The core issue is because of using augmented assignment, if you avoid using that then the size_t cast won't be needed as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds like something I'd like to be aware of going forward.

Agreed, there's a hot debate going on internally about the general usability issue(s) with critical sections.

Is there a link I could read?

Nothing conclusive yet I think. @kumaraditya303 just drafted a summary of what the issue is (see this gist), as well as this gist about races from augmented assignments - but that will need more follow-up and discussion.

In a perfect world is there something Cython could support better here?

Almost certainly, yes.

self.pyfree.free(p)

def own_pyref(self, object py_ref):
# Calling append here is atomic, no critical section needed.
self.refs.append(py_ref)


Expand Down Expand Up @@ -142,9 +184,9 @@ cdef class Address:
raise MemoryError("Error assigning %d bytes" % number * elem_size)
memset(self.ptr, 0, number * elem_size)

property addr:
def __get__(self):
return <size_t>self.ptr
@property
def addr(self):
return <size_t>self.ptr

def __dealloc__(self):
if self.ptr != NULL:
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
[build-system]
requires = [
"setuptools",
"cython>=0.25",
"cython>=3.1",
]
build-backend = "setuptools.build_meta"

[tool.cibuildwheel]
build = "*"
skip = "pp* cp36* cp37* cp38*"
test-skip = ""
free-threaded-support = false
enable = ["cpython-freethreading"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the thread safety guarantees we're adding depend critically on details of how critical sections work and we haven't and don't really want to do careful thread safety testing on 3.13t, can you delete this? It's not required to build cp314t wheels.

But do keep the deletion of free-threaded-support = false.


archs = ["native"]

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
cython>=0.25
cython>=3.1
pytest
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ def setup_package():
version=about["__version__"],
url=about["__uri__"],
license=about["__license__"],
ext_modules=cythonize(ext_modules, language_level=2),
setup_requires=["cython>=0.25"],
ext_modules=cythonize(ext_modules),
setup_requires=["cython>=3.1"],
classifiers=[
"Environment :: Console",
"Intended Audience :: Developers",
Expand All @@ -116,6 +116,8 @@ def setup_package():
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Programming Language :: Python :: Free Threading :: 2 - Beta",
"Topic :: Scientific/Engineering",
],
cmdclass={"build_ext": build_ext_subclass},
Expand Down