Skip to content

Commit a21fa38

Browse files
authored
[Python] Add Stable ABI (abi3) support for Python 3.12+ (#23600)
Build Python bindings against the Limited C API (`Py_LIMITED_API`, PEP 384) so that a single wheel built with Python 3.12 works on all future CPython versions. This reduces the total release artifact size. The Limited API forbids direct access to CPython struct layouts and most convenience macros. The main impact is replacing the NumPy C API (whose headers use non-limited internals) with Python-level equivalents like `numpy.frombuffer()` and `numpy.dtype()`, and using nanobind's `type_slots()` to register buffer/sequence/mapping protocols instead of casting to `PyHeapTypeObject`. Limitations: - Only CPython 3.12+. Per-version wheels are still built for 3.10-3.11. - Free-threaded builds (3.13t) are excluded: the free-threaded ABI is not yet stable. - `Development.SABIModule` requires CMake 3.26+ (IREE minimum is 3.21), so `IREE_ENABLE_PYTHON_STABLE_ABI` is OFF by default. setup.py auto-enables it when building with CPython 3.12+. May want to bump it in the future. The existing release workflow already builds with Python 3.12 on all platforms, so abi3 wheels are produced with no workflow changes. pkgci only builds cp311 by default (no abi3). The LLVM submodule includes cherry-picks of llvm/llvm-project PRs for MLIR-side abi3 support. Assisted-by: claude
1 parent fb7e890 commit a21fa38

File tree

18 files changed

+297
-140
lines changed

18 files changed

+297
-140
lines changed

CMakeLists.txt

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,13 @@ include(iree_setup_toolchain)
726726
# Otherwise, for features that just require the interpreter, find that alone.
727727
#-------------------------------------------------------------------------------
728728

729+
# Build Python bindings with Python Stable ABI (abi3) for Python 3.12+.
730+
# When enabled, a single extension module can be used across Python 3.12+.
731+
# nanobind handles the interaction with FREE_THREADED: if the interpreter
732+
# is free-threaded, STABLE_ABI is ignored and the free-threaded ABI is used.
733+
# Declared here (before first use) and also cascaded to MLIR below.
734+
option(IREE_ENABLE_PYTHON_STABLE_ABI "Build Python bindings with Stable ABI (abi3) for Python 3.12+" OFF)
735+
729736
if(IREE_BUILD_PYTHON_BINDINGS)
730737
# After CMake 3.18, we are able to limit the scope of the search to just
731738
# Development.Module. Searching for Development will fail in situations where
@@ -739,13 +746,19 @@ if(IREE_BUILD_PYTHON_BINDINGS)
739746
# See: https://reviews.llvm.org/D118148
740747
# If building Python packages, we have a hard requirement on 3.10+.
741748
find_package(Python3 3.10 COMPONENTS Interpreter Development NumPy)
742-
find_package(Python3 3.10 COMPONENTS Interpreter Development.Module NumPy REQUIRED)
749+
# Development.SABIModule is needed for Stable ABI (abi3) builds. CMake 3.26+
750+
# provides it; on older CMake it is silently ignored if not found.
751+
set(_PYTHON_SABI_COMPONENT "")
752+
if(IREE_ENABLE_PYTHON_STABLE_ABI)
753+
set(_PYTHON_SABI_COMPONENT Development.SABIModule)
754+
endif()
755+
find_package(Python3 3.10 COMPONENTS Interpreter Development.Module NumPy ${_PYTHON_SABI_COMPONENT} REQUIRED)
743756
# Some parts of the build use FindPython instead of FindPython3. Why? No
744757
# one knows, but they are different. So make sure to bootstrap this one too.
745758
# Not doing this here risks them diverging, which on multi-Python systems,
746759
# can be troublesome. Note that nanobind requires FindPython.
747760
set(Python_EXECUTABLE "${Python3_EXECUTABLE}")
748-
find_package(Python 3.10 COMPONENTS Interpreter Development.Module NumPy REQUIRED)
761+
find_package(Python 3.10 COMPONENTS Interpreter Development.Module NumPy ${_PYTHON_SABI_COMPONENT} REQUIRED)
749762
elseif(IREE_BUILD_COMPILER OR IREE_BUILD_TESTS)
750763
find_package(Python3 COMPONENTS Interpreter REQUIRED)
751764
set(Python_EXECUTABLE "${Python3_EXECUTABLE}")
@@ -870,6 +883,11 @@ endif()
870883
# MLIR/LLVM Dependency
871884
#-------------------------------------------------------------------------------
872885

886+
# Cascade IREE_ENABLE_PYTHON_STABLE_ABI to MLIR.
887+
if(IREE_ENABLE_PYTHON_STABLE_ABI)
888+
set(MLIR_ENABLE_PYTHON_STABLE_ABI ON CACHE BOOL "" FORCE)
889+
endif()
890+
873891
# Both the IREE and MLIR Python bindings require nanobind. We initialize it here
874892
# at the top level so that everything uses ours consistently.
875893
if(IREE_BUILD_PYTHON_BINDINGS)

build_tools/python_deploy/build_linux_packages.sh

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ this_dir="$(cd $(dirname $0) && pwd)"
6565
script_name="$(basename $0)"
6666
repo_root=$(cd "${this_dir}" && find_git_dir_parent)
6767
manylinux_docker_image="${manylinux_docker_image:-$(uname -m | awk '{print ($1 == "aarch64") ? "quay.io/pypa/manylinux_2_28_aarch64" : "ghcr.io/iree-org/manylinux_x86_64@sha256:2e0246137819cf10ed84240a971f9dd75cc3eb62dc6907dfd2080ee966b3c9f4" }')}"
68-
python_versions="${override_python_versions:-cp310-cp310 cp311-cp311 cp312-cp312 cp313-cp313 cp313-cp313t}"
68+
# Python versions to build. cp312 is the abi3 build (produces a wheel
69+
# compatible with 3.12+). Per-version builds are used for <3.12 and 3.13t.
70+
python_versions="${override_python_versions:-cp310-cp310 cp311-cp311 cp312-cp312 cp313-cp313t}"
6971
output_dir="${output_dir:-${this_dir}/wheelhouse}"
7072
packages="${packages:-iree-base-runtime iree-base-compiler}"
7173
package_suffix="${package_suffix:-}"
@@ -173,8 +175,14 @@ function build_iree_compiler() {
173175
function run_audit_wheel() {
174176
local wheel_basename="$1"
175177
local python_version="$2"
178+
# For abi3 builds (cp312 non-free-threaded), the wheel tag is cp312-abi3
179+
# instead of cp312-cp312.
180+
local wheel_tag="${python_version}"
181+
if [[ "${python_version}" == "cp312-cp312" ]]; then
182+
wheel_tag="cp312-abi3"
183+
fi
176184
# Force wildcard expansion here
177-
generic_wheel="$(echo "${output_dir}/${wheel_basename}-"*"-${python_version}-linux_$(uname -m).whl")"
185+
generic_wheel="$(echo "${output_dir}/${wheel_basename}-"*"-${wheel_tag}-linux_$(uname -m).whl")"
178186
ls "${generic_wheel}"
179187
echo ":::: Auditwheel ${generic_wheel}"
180188
auditwheel repair -w "${output_dir}" "${generic_wheel}"
@@ -186,6 +194,10 @@ function clean_wheels() {
186194
local python_version="$2"
187195
echo ":::: Clean wheels ${wheel_basename} ${python_version}"
188196
rm -f -v "${output_dir}/${wheel_basename}-"*"-${python_version}-"*".whl"
197+
# Also clean abi3 wheels for cp312.
198+
if [[ "${python_version}" == "cp312-cp312" ]]; then
199+
rm -f -v "${output_dir}/${wheel_basename}-"*"-cp312-abi3-"*".whl"
200+
fi
189201
}
190202

191203
function prepare_python() {

build_tools/python_deploy/build_macos_packages.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ set -eu -o errtrace
2121

2222
this_dir="$(cd $(dirname $0) && pwd)"
2323
repo_root="$(cd $this_dir/../../ && pwd)"
24-
python_versions="${override_python_versions:-3.11}"
24+
# Python versions to build. 3.12 produces an abi3 wheel (compatible with 3.12+).
25+
# Per-version builds are used for <3.12 and free-threaded builds.
26+
python_versions="${override_python_versions:-3.11 3.12}"
2527
output_dir="${output_dir:-${this_dir}/wheelhouse}"
2628
packages="${packages:-iree-base-runtime iree-base-compiler}"
2729

build_tools/python_deploy/build_windows_packages.ps1

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99

1010
# Configure settings with script parameters.
1111
param(
12-
[array]$python_versions=@("3.11"),
12+
# Python versions to build. 3.12 produces an abi3 wheel (compatible with 3.12+).
13+
# Per-version builds are used for <3.12 and free-threaded builds.
14+
[array]$python_versions=@("3.11", "3.12"),
1315
[array]$packages=@("iree-base-runtime", "iree-base-compiler"),
1416
[System.String]$output_dir
1517
)

compiler/setup.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@
4141
from setuptools.command.build_py import build_py as _build_py
4242
from setuptools.command.egg_info import egg_info
4343

44+
# Detect whether we should build an abi3 (Stable ABI) wheel.
45+
# This applies to CPython 3.12+ when not in free-threaded mode.
46+
_is_abi3_build = sys.version_info >= (3, 12) and not sysconfig.get_config_var(
47+
"Py_GIL_DISABLED"
48+
)
49+
4450

4551
def check_pip_version():
4652
from packaging import version
@@ -269,6 +275,8 @@ def prepare_installation():
269275
get_env_cmake_option("IREE_TARGET_BACKEND_CUDA", "OFF"),
270276
get_env_cmake_option("IREE_ENABLE_LLD", "OFF"),
271277
]
278+
if _is_abi3_build:
279+
cmake_args.append("-DIREE_ENABLE_PYTHON_STABLE_ABI=ON")
272280
cmake_args.extend(get_cmake_version_info_args())
273281

274282
# These usually flow through the environment, but we add them explicitly
@@ -382,6 +390,24 @@ def run(self):
382390
egg_info.run(self)
383391

384392

393+
# Override bdist_wheel to produce abi3 wheel tags when applicable.
394+
_bdist_wheel_cmdclass = {}
395+
try:
396+
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
397+
398+
class bdist_wheel(_bdist_wheel):
399+
def get_tag(self):
400+
python, abi, plat = _bdist_wheel.get_tag(self)
401+
if _is_abi3_build:
402+
python, abi = "cp312", "abi3"
403+
return python, abi, plat
404+
405+
_bdist_wheel_cmdclass = {"bdist_wheel": bdist_wheel}
406+
except ImportError:
407+
# wheel package not available (e.g., during sdist). Not an error.
408+
pass
409+
410+
385411
def generate_version_py():
386412
return f"""# Auto-generated version info.
387413
PACKAGE_SUFFIX = "{PACKAGE_SUFFIX}"
@@ -485,12 +511,15 @@ def find_git_submodule_revision(submodule_path):
485511
CMakeExtension("iree.compiler._mlir_libs._mlirGPUPasses"),
486512
CMakeExtension("iree.compiler._mlir_libs._site_initialize_0"),
487513
],
488-
cmdclass={
489-
"build": CustomBuild,
490-
"built_ext": NoopBuildExtension,
491-
"build_py": CMakeBuildPy,
492-
"egg_info": CleanEggInfo,
493-
},
514+
cmdclass=dict(
515+
{
516+
"build": CustomBuild,
517+
"built_ext": NoopBuildExtension,
518+
"build_py": CMakeBuildPy,
519+
"egg_info": CleanEggInfo,
520+
},
521+
**_bdist_wheel_cmdclass,
522+
),
494523
zip_safe=False,
495524
package_dir={
496525
# Note: Must be relative path, so we line this up with the absolute

runtime/bindings/python/CMakeLists.txt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,13 @@ iree_select_compiler_opts(_RTTI_AND_EXCEPTION_COPTS
4242
"/GR"
4343
)
4444

45+
set(_STABLE_ABI_FLAG "")
46+
if(IREE_ENABLE_PYTHON_STABLE_ABI)
47+
set(_STABLE_ABI_FLAG STABLE_ABI)
48+
endif()
49+
4550
nanobind_add_module(iree_runtime_bindings_python_PyExtRt
46-
NB_STATIC LTO FREE_THREADED
51+
NB_STATIC LTO FREE_THREADED ${_STABLE_ABI_FLAG}
4752
"binding.h"
4853
"initialize_module.cc"
4954
"invoke.h"
@@ -87,8 +92,6 @@ target_link_libraries(iree_runtime_bindings_python_PyExtRt
8792
iree::tooling::modules
8893
iree::vm
8994
iree::vm::bytecode::module
90-
91-
Python::NumPy
9295
)
9396

9497
target_compile_options(iree_runtime_bindings_python_PyExtRt

runtime/bindings/python/binding.h

Lines changed: 41 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,15 @@ class ApiRefCounted {
5858
}
5959
void operator=(const ApiRefCounted&) = delete;
6060

61-
~ApiRefCounted() { Release(); }
61+
~ApiRefCounted() {
62+
// In stable ABI (abi3) mode, types are heap-allocated via PyType_FromSpec
63+
// and instances may be destroyed during Py_FinalizeEx after IREE's type
64+
// registry is no longer valid. Skip release when nanobind's internals
65+
// are being torn down since the process is exiting anyway.
66+
if (instance_ && py::is_alive()) {
67+
Release();
68+
}
69+
}
6270

6371
// Steals the reference to the object referenced by the given raw pointer and
6472
// returns a wrapper (transfers ownership).
@@ -130,37 +138,39 @@ inline py::object create_empty_tuple() {
130138
return py::steal(py::handle(PyTuple_New(0)));
131139
}
132140

133-
// For a bound class, binds the buffer protocol. This will result in a call
134-
// to on the CppType:
135-
// HandleBufferProtocol(Py_buffer *view, int flags)
136-
// This is a low level callback and must not raise any exceptions. If
137-
// error conditions are warranted the usual PyErr_SetString approach must be
138-
// used (and -1 returned). Return 0 on success.
141+
// Returns a nanobind::type_slots() descriptor for the buffer protocol on
142+
// CppType. CppType must implement HandleBufferProtocol(Py_buffer*, int).
143+
// The slots are only read during py::class_ construction, so the static local
144+
// array inside is fine (no persistent global state).
139145
template <typename CppType>
140-
void BindBufferProtocol(py::handle clazz) {
141-
PyBufferProcs buffer_procs;
142-
memset(&buffer_procs, 0, sizeof(buffer_procs));
143-
buffer_procs.bf_getbuffer =
144-
// It is not legal to raise exceptions from these callbacks.
145-
+[](PyObject* raw_self, Py_buffer* view, int flags) -> int {
146-
if (view == NULL) {
147-
PyErr_SetString(PyExc_ValueError, "NULL view in getbuffer");
148-
return -1;
149-
}
150-
151-
// Cast must succeed due to invariants.
152-
auto self = py::cast<CppType*>(py::handle(raw_self));
153-
154-
Py_INCREF(raw_self);
155-
view->obj = raw_self;
156-
return self->HandleBufferProtocol(view, flags);
157-
};
158-
buffer_procs.bf_releasebuffer =
159-
+[](PyObject* raw_self, Py_buffer* view) -> void {};
160-
auto heap_type = reinterpret_cast<PyHeapTypeObject*>(clazz.ptr());
161-
assert(heap_type->ht_type.tp_flags & Py_TPFLAGS_HEAPTYPE &&
162-
"must be heap type");
163-
heap_type->as_buffer = buffer_procs;
146+
py::type_slots buffer_protocol_slots() {
147+
// It is not legal to raise exceptions from buffer protocol callbacks.
148+
static const PyType_Slot slots[] = {
149+
{Py_bf_getbuffer,
150+
reinterpret_cast<void*>(
151+
+[](PyObject* raw_self, Py_buffer* view, int flags) -> int {
152+
if (!view) {
153+
PyErr_SetString(PyExc_ValueError, "NULL view in getbuffer");
154+
return -1;
155+
}
156+
157+
// Cast must succeed due to invariants.
158+
auto self = py::cast<CppType*>(py::handle(raw_self));
159+
160+
int rc = self->HandleBufferProtocol(view, flags);
161+
if (rc == 0) {
162+
Py_INCREF(raw_self);
163+
view->obj = raw_self;
164+
}
165+
return rc;
166+
})},
167+
// No-op: PyBuffer_Release handles Py_DECREF(view->obj) after calling
168+
// this callback, so the Py_INCREF in getbuffer is already balanced.
169+
{Py_bf_releasebuffer,
170+
reinterpret_cast<void*>(
171+
+[](PyObject* raw_self, Py_buffer* view) -> void {})},
172+
{0, nullptr}};
173+
return py::type_slots(slots);
164174
}
165175

166176
// Nanobind 2.0 had a backwards compatibility bug where it left out the

runtime/bindings/python/hal.cc

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,6 +1238,24 @@ int HalModuleDebugSinkTpClear(PyObject* self) {
12381238
return 0;
12391239
}
12401240

1241+
//------------------------------------------------------------------------------
1242+
// HalMappedMemory buffer protocol
1243+
//------------------------------------------------------------------------------
1244+
1245+
int HalMappedMemory::HandleBufferProtocol(Py_buffer* view, int flags) {
1246+
view->buf = mapped_memory_.contents.data;
1247+
view->len = mapped_memory_.contents.data_length;
1248+
view->readonly = 1; // Mapped with IREE_HAL_MEMORY_ACCESS_READ
1249+
view->itemsize = 1;
1250+
view->format = (char*)"B";
1251+
view->ndim = 1;
1252+
view->shape = nullptr;
1253+
view->strides = nullptr;
1254+
view->suboffsets = nullptr;
1255+
view->internal = nullptr;
1256+
return 0;
1257+
}
1258+
12411259
//------------------------------------------------------------------------------
12421260
// Bindings
12431261
//------------------------------------------------------------------------------
@@ -1344,9 +1362,7 @@ void SetupHalBindings(nanobind::module_ m) {
13441362
hal_element_type
13451363
.def_static("map_to_dtype",
13461364
[](iree_hal_element_type_t element_type) {
1347-
int typenum = numpy::ConvertHalElementTypeToNumPyTypeNum(
1348-
element_type);
1349-
return numpy::DescrNewFromType(typenum);
1365+
return numpy::DescrNewFromType(element_type);
13501366
})
13511367
.def_static("is_byte_aligned",
13521368
[](iree_hal_element_type_t element_type) {
@@ -1856,7 +1872,8 @@ void SetupHalBindings(nanobind::module_ m) {
18561872
py::arg("timeout") = py::none(), py::arg("deadline") = py::none(),
18571873
kHalWait);
18581874

1859-
py::class_<HalMappedMemory>(m, "MappedMemory")
1875+
py::class_<HalMappedMemory>(m, "MappedMemory",
1876+
buffer_protocol_slots<HalMappedMemory>())
18601877
.def(
18611878
"asarray",
18621879
[](HalMappedMemory* self, py::handle shape, py::object dtype_descr) {
@@ -1867,8 +1884,7 @@ void SetupHalBindings(nanobind::module_ m) {
18671884
for (size_t i = 0; i < rank; ++i) {
18681885
dims[i] = py::cast<intptr_t>(shape[i]);
18691886
}
1870-
int typenum = numpy::TypenumFromDescr(dtype_descr);
1871-
return numpy::SimpleNewFromData(rank, dims, typenum,
1887+
return numpy::SimpleNewFromData(rank, dims, dtype_descr,
18721888
self->mapped_memory().contents.data,
18731889
py_mapped_memory);
18741890
},

runtime/bindings/python/hal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,9 @@ class HalMappedMemory {
286286

287287
iree_hal_buffer_mapping_t& mapped_memory() { return mapped_memory_; }
288288

289+
// Buffer protocol for PyMemoryView_FromObject (used by SimpleNewFromData).
290+
int HandleBufferProtocol(Py_buffer* view, int flags);
291+
289292
private:
290293
iree_hal_buffer_mapping_t mapped_memory_ = {{0}};
291294
iree_hal_buffer_t* buffer_ = nullptr;

runtime/bindings/python/initialize_module.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include "./invoke.h"
1515
#include "./io.h"
1616
#include "./loop.h"
17-
#include "./numpy_interop.h"
1817
#include "./py_module.h"
1918
#include "./status_utils.h"
2019
#include "./vm.h"
@@ -33,7 +32,6 @@ namespace iree {
3332
namespace python {
3433

3534
NB_MODULE(_runtime, m) {
36-
numpy::InitializeNumPyInterop();
3735
IREE_TRACE_APP_ENTER();
3836

3937
IREE_CHECK_OK(iree_hal_register_all_available_drivers(

0 commit comments

Comments
 (0)