Skip to content

Commit 6e2bc1d

Browse files
authored
ARROW-112 Make PyArrow Binding more Robust (#95)
1 parent d937b51 commit 6e2bc1d

File tree

7 files changed

+143
-78
lines changed

7 files changed

+143
-78
lines changed

.github/workflows/release-python.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ concurrency:
88
group: wheels-${{ github.ref }}
99
cancel-in-progress: true
1010

11+
defaults:
12+
run:
13+
working-directory: ./bindings/python
14+
1115
jobs:
1216
build_wheels:
1317
name: Build wheel for ${{ matrix.python }}-${{ matrix.buildplat[1] }}
@@ -37,17 +41,13 @@ jobs:
3741

3842
- name: Build MacOS Py38 Wheel
3943
if: ${{ matrix.python == 'cp38' && matrix.buildplat[0] == 'macos-10.15' }}
40-
working-directory: ./bindings/python
41-
shell: bash
4244
env:
4345
CIBW_BUILD: cp38-macosx_x86_64
4446
MACOSX_DEPLOYMENT_TARGET: "10.13"
4547
run: python -m cibuildwheel --output-dir wheelhouse
4648

4749
- name: Build wheels
4850
if: ${{ matrix.python != 'cp38' || matrix.buildplat[0] != 'macos-10.15' }}
49-
working-directory: ./bindings/python
50-
shell: bash
5151
env:
5252
CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }}
5353
MACOSX_DEPLOYMENT_TARGET: "10.13"
@@ -92,7 +92,7 @@ jobs:
9292
- uses: actions/upload-artifact@v2
9393
with:
9494
name: "sdist"
95-
path: ./bindings/python/dist/*.tar.gz
95+
path: ./bindings/python/wheelhouse/*.tar.gz
9696

9797
collect-dist:
9898
runs-on: ubuntu-latest
@@ -102,6 +102,7 @@ jobs:
102102
- name: Download all workflow run artifacts
103103
uses: actions/download-artifact@v2
104104
- name: Flatten directory
105+
working-directory: .
105106
run: |
106107
find . -mindepth 2 -type f -exec mv {} . \;
107108
find . -type d -empty -delete

bindings/python/MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ exclude THIRD-PARTY-NOTICES
77
exclude release.sh
88
exclude addtags.py
99
exclude benchmark.py
10+
exclude repair_wheel.py
1011
exclude .flake8
1112
exclude RELEASE.rst
1213
exclude asv.conf.json

bindings/python/cibw_before_build.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,8 @@ then
1919
fi
2020

2121
# Install pyarrow with the appropriate platform.
22-
pip install --platform $platform --target $HOME/wheels --no-deps --only-binary=:all: pyarrow
22+
pip install --platform $platform --upgrade --target $HOME/wheels --no-deps --only-binary=:all: pyarrow
2323
fi
2424

25-
2625
# Build libbson with the appropriate arch.
2726
./build-libbson.sh

bindings/python/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ skip = "pp* *-manylinux_i686 *_ppc64le *_s390x *-musllinux*"
1212
before-build = "bash ./cibw_before_build.sh"
1313
build-frontend = "build"
1414
test-command = "python -c \"from pymongoarrow.lib import process_bson_stream\""
15+
repair-wheel-command = "python repair_wheel.py {dest_dir} {wheel} {delocate_archs}"
1516

1617
[tool.cibuildwheel.environment]
1718
LIBBSON_INSTALL_DIR = "./libbson"

bindings/python/release.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ then
1010
exit 1
1111
fi
1212

13+
# Clean up
14+
rm -rf dist wheelhouse build pymongoarrow/*.so pymongoarrow/*.dll pymongoarrow/*.dylib
15+
1316
# Platform-dependent actions:
1417
PYTHON=${PYTHON_BINARY:-"python"}
1518
if [ "Linux" = "$(uname -s)" ]
@@ -38,3 +41,7 @@ then
3841
$PYTHON -m pip install auditwheel
3942
$PYTHON addtags.py dist/*.whl "$PLAT" ./wheelhouse
4043
fi
44+
45+
# Repair the wheel, copying shared libraries as needed
46+
MACHINE=$(python -c "import platform;print(platform.machine())")
47+
python repair_wheel.py "./wheelhouse" dist/*.whl $MACHINE

bindings/python/repair_wheel.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import atexit
2+
import glob
3+
import os
4+
import sys
5+
import tempfile
6+
from subprocess import run
7+
8+
HERE = os.path.abspath(os.path.dirname(__file__))
9+
wheel_dir, wheel_file, delocate_args = sys.argv[1:]
10+
wheel_dir = wheel_dir.replace(os.sep, "/")
11+
wheel_file = wheel_file.replace(os.sep, "/")
12+
13+
# Ensure pyarrow.
14+
if "universal2" in wheel_file:
15+
# pip selects the most specific platform by default,
16+
# so we have to tell it to install the universal2 version.
17+
# See https://github.com/pypa/packaging/issues/381
18+
macos_ver = os.environ.get("MACOSX_DEPLOYMENT_TARGET", "10.3")
19+
macos_ver = macos_ver.replace(".", "_")
20+
wheel_temp_dir = tempfile.TemporaryDirectory()
21+
atexit.register(wheel_temp_dir.cleanup)
22+
run(
23+
[
24+
sys.executable,
25+
"-m",
26+
"pip",
27+
"install",
28+
"--platform",
29+
f"macosx_{macos_ver}_universal2",
30+
"--upgrade",
31+
"--target",
32+
wheel_temp_dir.name,
33+
"--only-binary=:all:",
34+
"pyarrow",
35+
]
36+
)
37+
# Allow the installed pyarrow library to be imported.
38+
sys.path.insert(0, wheel_temp_dir.name)
39+
else:
40+
run([sys.executable, "-m", "pip", "install", "pyarrow"])
41+
import pyarrow as pa # noqa
42+
43+
libbson = os.environ.get("LIBBSON_INSTALL_DIR", os.path.join(HERE, "libbson"))
44+
libbson = os.path.abspath(libbson)
45+
if os.name == "nt":
46+
libbson_lib = glob.glob(os.path.join(libbson, "bin"))
47+
else:
48+
libbson_lib = glob.glob(os.path.join(libbson, "lib*"))
49+
extra_paths = pa.get_library_dirs() + libbson_lib
50+
extra_path = os.path.pathsep.join([a.replace(os.sep, "/") for a in extra_paths])
51+
52+
53+
def append_os_variable(name, extra_path):
54+
if os.environ.get(name):
55+
os.environ[name] = os.environ[name] + os.pathsep + extra_path
56+
else:
57+
os.environ[name] = extra_path
58+
print(f"{name}: {os.environ[name]}")
59+
60+
61+
if os.name == "nt":
62+
append_os_variable("PATH", extra_path)
63+
run([sys.executable, "-m", "pip", "install", "delvewheel"])
64+
# Do not mangle ucrtbased.dll to avoid:
65+
# "RuntimeError: Unable to rename the dependencies of vcruntime140d.dll
66+
# because this DLL has trailing data. If this DLL was created with MinGW,
67+
# run the strip utility. Otherwise, include ucrtbased.dll in the
68+
# --no-mangle flag. In addition, if you believe that delvewheel should
69+
# avoid name-mangling a specific DLL by default, open an issue at
70+
# https://github.com/adang1345/delvewheel/issues and include this error
71+
# message."
72+
run(["delvewheel", "repair", "--no-mangle", "ucrtbased.dll", "-w", wheel_dir, wheel_file])
73+
74+
elif sys.platform == "darwin":
75+
# FIXME: We should not have to do this.
76+
site_pkgs = sys.base_prefix
77+
dylib = glob.glob(f"{sys.base_prefix}/lib/python*/lib-dynload")[0]
78+
extra_path = f"{dylib}:{extra_path}"
79+
append_os_variable("DYLD_LIBRARY_PATH", extra_path)
80+
run([sys.executable, "-m", "pip", "install", "delocate"])
81+
run(
82+
[
83+
"delocate-wheel",
84+
"--require-archs",
85+
delocate_args,
86+
"-w",
87+
wheel_dir,
88+
wheel_file,
89+
]
90+
)
91+
else:
92+
append_os_variable("LD_LIBRARY_PATH", extra_path)
93+
run([sys.executable, "-m", "pip", "install", "auditwheel"])
94+
run(["auditwheel", "repair", "-w", wheel_dir, wheel_file])

bindings/python/setup.py

Lines changed: 33 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,6 @@
1111
BUILD_DIR = os.path.join(HERE, "pymongoarrow")
1212
IS_WIN = platform == "win32"
1313

14-
# Find and copy the binary arrow files, unless
15-
# MONGO_NO_COPY_ARROW_LIB is set (for instance in a conda build).
16-
# Wheels are meant to be self-contained, per PEP 513.
17-
# https://www.python.org/dev/peps/pep-0513/#id40
18-
# Conda has the opposite philosphy, where libraries are meant to be
19-
# shared. For instance, there is an arrow-cpp library available on conda-forge
20-
# that provides the libarrow files.
21-
COPY_LIBARROW = not os.environ.get("MONGO_NO_COPY_LIBARROW", False)
22-
2314
# Find and copy the binary libbson file, unless
2415
# MONGO_NO_COPY_LIBBSON is set (for instance in a conda build).
2516
COPY_LIBBSON = not os.environ.get("MONGO_NO_COPY_LIBBSON", False)
@@ -47,6 +38,7 @@ def append_libbson_flags(module):
4738
install_dir = os.environ.get("LIBBSON_INSTALL_DIR")
4839
if install_dir:
4940
install_dir = os.path.abspath(install_dir)
41+
5042
# Handle the copy-able library file if applicable.
5143
if COPY_LIBBSON:
5244
if platform == "darwin":
@@ -62,6 +54,14 @@ def append_libbson_flags(module):
6254
if os.path.exists(lib_file):
6355
shutil.copy(lib_file, BUILD_DIR)
6456

57+
# Ensure our Cython extension can dynamically link to libraries
58+
# - https://blog.krzyzanowskim.com/2018/12/05/rpath-what/
59+
# - https://nehckl0.medium.com/creating-relocatable-linux-executables-by-setting-rpath-with-origin-45de573a2e98
60+
if platform == "darwin":
61+
module.extra_link_args += ["-rpath", "@loader_path"]
62+
elif platform == "linux":
63+
module.extra_link_args += ["-Wl,-rpath,$ORIGIN"]
64+
6565
# Find the linkable library file, and explicity add it to the linker if on Windows.
6666
lib_dirs = glob.glob(os.path.join(install_dir, "lib*"))
6767
if len(lib_dirs) != 1:
@@ -125,64 +125,33 @@ def append_libbson_flags(module):
125125
module.libraries.extend(libnames)
126126

127127

128-
def append_arrow_flags(module):
128+
def append_arrow_flags(ext):
129129
import numpy as np
130130
import pyarrow as pa
131131

132-
if IS_WIN:
133-
module.include_dirs.append(np.get_include())
134-
module.include_dirs.append(pa.get_include())
135-
else:
136-
module.extra_compile_args.append("-isystem" + pa.get_include())
137-
module.extra_compile_args.append("-isystem" + np.get_include())
138-
# Arrow's manylinux{2010, 2014} binaries are built with gcc < 4.8 which predates CXX11 ABI
139-
# - https://uwekorn.com/2019/09/15/how-we-build-apache-arrows-manylinux-wheels.html
140-
# - https://arrow.apache.org/docs/python/extending.html#example
141-
if "std=" not in os.environ.get("CXXFLAGS", ""):
142-
module.extra_compile_args.append("-std=c++11")
143-
module.extra_compile_args.append("-D_GLIBCXX_USE_CXX11_ABI=0")
144-
145-
# Handle the arrow library files manually.
146-
# Alternative to using pyarrow.create_library_symlinks().
147-
# You can use MONGO_LIBARROW_DIR to explicitly set the location of the
148-
# arrow libraries (for instance in a conda build).
149-
# We first check for an unmodified path to the library,
150-
# then look for a library file with a version modifier, e.g. libarrow.600.dylib.
151-
arrow_lib = os.environ.get("MONGO_LIBARROW_DIR", pa.get_library_dirs()[0])
152-
if platform == "darwin":
153-
exts = [".dylib", ".*.dylib"]
154-
elif platform == "linux":
155-
exts = [".so", ".so.*"]
156-
else:
157-
# Windows is handled differently (see below)
158-
pass
159-
160-
# Find the appropriate library file and optionally copy it locally.
161-
# Explicitly handle "parquet" library as a workaround for
162-
# https://issues.apache.org/jira/browse/ARROW-17327
163-
for name in pa.get_libraries() + ["parquet"]:
164-
if IS_WIN:
165-
if COPY_LIBARROW:
166-
lib_file = os.path.join(arrow_lib, f"{name}.dll")
167-
if not os.path.exists(lib_file):
168-
if name == "parquet":
169-
continue
170-
raise ValueError("Could not find compiled arrow library")
171-
shutil.copy(lib_file, BUILD_DIR)
172-
lib_file = os.path.join(arrow_lib, f"{name}.lib")
173-
module.extra_link_args.append(lib_file)
174-
continue
175-
176-
for ext in exts:
177-
files = glob.glob(os.path.join(arrow_lib, f"lib{name}{ext}"))
178-
if not files:
179-
continue
180-
path = files[0]
181-
if COPY_LIBARROW:
182-
shutil.copy(path, BUILD_DIR)
183-
path = os.path.join(BUILD_DIR, os.path.basename(path))
184-
module.extra_link_args.append(path)
185-
break
132+
# From https://arrow.apache.org/docs/python/integration/extending.html#example
133+
# The Numpy C headers are currently required
134+
ext.include_dirs.append(np.get_include())
135+
ext.include_dirs.append(pa.get_include())
136+
ext.libraries.extend(pa.get_libraries())
137+
ext.library_dirs.extend(pa.get_library_dirs())
138+
139+
if os.name != "nt":
140+
# On Linux and MacOS, we must run pyarrow.create_library_symlinks()
141+
# as a user with write access to the directory where pyarrow is
142+
# installed.
143+
# See https://arrow.apache.org/docs/python/integration/extending.html#building-extensions-against-pypi-wheels.
144+
pa.create_library_symlinks()
145+
146+
if os.name == "posix":
147+
ext.extra_compile_args.append("-std=c++11")
148+
149+
# Arrow's manylinux{2010, 2014} binaries are built with gcc < 4.8 which predates CXX11 ABI
150+
# - https://uwekorn.com/2019/09/15/how-we-build-apache-arrows-manylinux-wheels.html
151+
# - https://arrow.apache.org/docs/python/extending.html#example
152+
if "std=" not in os.environ.get("CXXFLAGS", ""):
153+
ext.extra_compile_args.append("-std=c++11")
154+
ext.extra_compile_args.append("-D_GLIBCXX_USE_CXX11_ABI=0")
186155

187156

188157
def get_extension_modules():
@@ -197,13 +166,6 @@ def get_extension_modules():
197166
for module in modules:
198167
append_libbson_flags(module)
199168
append_arrow_flags(module)
200-
# Ensure our Cython extension can dynamically link to libraries
201-
# - https://blog.krzyzanowskim.com/2018/12/05/rpath-what/
202-
# - https://nehckl0.medium.com/creating-relocatable-linux-executables-by-setting-rpath-with-origin-45de573a2e98
203-
if platform == "darwin":
204-
module.extra_link_args += ["-rpath", "@loader_path"]
205-
elif platform == "linux":
206-
module.extra_link_args += ["-Wl,-rpath,$ORIGIN"]
207169

208170
return modules
209171

0 commit comments

Comments
 (0)