Skip to content

Commit 3546a2e

Browse files
committed
[GR-66021]: Add pyarrow 20.0.0 patch
PullRequest: graalpython/3852
2 parents 412f431 + 2269ba4 commit 3546a2e

File tree

2 files changed

+184
-0
lines changed

2 files changed

+184
-0
lines changed

graalpython/lib-graalpython/patches/metadata.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,11 @@ version = '== 19.0.1'
510510
patch = 'pyarrow-19.0.1.patch'
511511
license = 'Apache-2.0'
512512

513+
[[pyarrow.rules]]
514+
version = '== 20.0.0'
515+
patch = 'pyarrow-20.0.0.patch'
516+
license = 'Apache-2.0'
517+
513518
[[pybind11.rules]]
514519
# Upstreamed
515520
install-priority = 0
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
iff --git a/pyarrow/error.pxi b/pyarrow/error.pxi
2+
index cbe2552..8d0d9d9 100644
3+
--- a/pyarrow/error.pxi
4+
+++ b/pyarrow/error.pxi
5+
@@ -248,7 +248,7 @@ cdef class SignalStopHandler:
6+
if exc_value.signum:
7+
# Re-emit the exact same signal. We restored the Python signal
8+
# handler above, so it should receive it.
9+
- if os.name == 'nt':
10+
+ if os.name == 'nt' or sys.implementation.name == 'graalpy':
11+
SendSignal(exc_value.signum)
12+
else:
13+
SendSignalToThread(exc_value.signum,
14+
diff --git a/pyarrow/memory.pxi b/pyarrow/memory.pxi
15+
index fdd5b99..ea611cd 100644
16+
--- a/pyarrow/memory.pxi
17+
+++ b/pyarrow/memory.pxi
18+
@@ -20,6 +20,10 @@
19+
# cython: embedsignature = True
20+
21+
22+
+cdef extern from "Python.h":
23+
+ void Py_INCREF(object)
24+
+
25+
+
26+
cdef class MemoryPool(_Weakrefable):
27+
"""
28+
Base class for memory allocation.
29+
@@ -35,6 +39,13 @@ cdef class MemoryPool(_Weakrefable):
30+
31+
cdef void init(self, CMemoryPool* pool):
32+
self.pool = pool
33+
+ # GraalPy change: pyarrow doesn't maintain python references from
34+
+ # buffers to pools, but they dereference the pointer to the pool in the
35+
+ # destructor. They just assume buffers will get GC'ed before their
36+
+ # pools. You can easily get a segfault even on CPython if you make
37+
+ # a buffer outlive its pool. Since we can't guarantee destruction
38+
+ # order, we just leak the pool.
39+
+ Py_INCREF(self)
40+
41+
def release_unused(self):
42+
"""
43+
diff --git a/pyarrow_build_backend.py b/pyarrow_build_backend.py
44+
new file mode 100644
45+
index 0000000..dc176d3
46+
--- /dev/null
47+
+++ b/pyarrow_build_backend.py
48+
@@ -0,0 +1,93 @@
49+
+import os
50+
+import re
51+
+import sys
52+
+import tarfile
53+
+import subprocess
54+
+import tempfile
55+
+import shutil
56+
+import tarfile
57+
+import urllib.request
58+
+from pathlib import Path
59+
+
60+
+VERSION = '20.0.0'
61+
+
62+
+
63+
+def build_sdist(sdist_directory, config_settings=None):
64+
+ nv = f'pyarrow-{VERSION}'
65+
+ srcdir = Path(__file__).parent
66+
+ archive_path = Path(sdist_directory) / f'{nv}.tar.gz'
67+
+
68+
+ def tarfilter(info):
69+
+ if re.match(r'\./(?:.git|venv|[^-/]+-venv|dist)', info.name):
70+
+ return None
71+
+ info.name = f'./{nv}/{info.name}'
72+
+ return info
73+
+
74+
+ with tarfile.open(archive_path, 'w:gz') as tar:
75+
+ tar.add('.', filter=tarfilter)
76+
+ return archive_path.name
77+
+
78+
+
79+
+def build_wheel(wheel_directory, config_settings=None, metadata_directory=None):
80+
+ wheel_directory = Path(wheel_directory).absolute()
81+
+ with tempfile.TemporaryDirectory() as tmpdir:
82+
+ tmpdir = Path(tmpdir).absolute()
83+
+ tarname = f'apache-arrow-{VERSION}.tar.gz'
84+
+ tarpath = tmpdir / tarname
85+
+ urllib.request.urlretrieve(f"https://github.com/apache/arrow/archive/refs/tags/{tarname}", tarpath)
86+
+ with tarfile.open(tarpath) as tar:
87+
+ tar.extractall(tmpdir)
88+
+ arrow_dir = tmpdir / f'arrow-apache-arrow-{VERSION}'
89+
+ assert arrow_dir.is_dir()
90+
+ arrow_dist = tmpdir / 'arrow-dist'
91+
+ build_dir = tmpdir / 'arrow-build'
92+
+ subprocess.check_call([
93+
+ 'cmake', '-S', str(arrow_dir / 'cpp'), '-B', str(build_dir),
94+
+ '-DCMAKE_INSTALL_LIBDIR=lib',
95+
+ f'-DCMAKE_INSTALL_PREFIX={arrow_dist}',
96+
+ '-DCMAKE_BUILD_TYPE=Release',
97+
+ '-DARROW_RPATH_ORIGIN=ON',
98+
+ '-DARROW_BUILD_TESTS=OFF',
99+
+ '-DARROW_BUILD_SHARED=ON',
100+
+ '-DARROW_BUILD_STATIC=OFF',
101+
+ # Features
102+
+ '-DARROW_COMPUTE=ON',
103+
+ '-DARROW_CSV=ON',
104+
+ '-DARROW_JSON=ON',
105+
+ '-DARROW_FILESYSTEM=ON',
106+
+ '-DARROW_DATASET=ON',
107+
+ '-DARROW_PARQUET=ON',
108+
+ '-DPARQUET_REQUIRE_ENCRYPTION=ON',
109+
+ '-DARROW_GANDIVA=ON',
110+
+ '-DARROW_WITH_BZ2=ON',
111+
+ '-DARROW_WITH_ZLIB=ON',
112+
+ '-DARROW_WITH_ZSTD=ON',
113+
+ '-DARROW_WITH_LZ4=ON',
114+
+ '-DARROW_WITH_SNAPPY=ON',
115+
+ '-DARROW_WITH_BROTLI=ON',
116+
+ ])
117+
+ subprocess.check_call([
118+
+ 'cmake', '--build', str(build_dir),
119+
+ ])
120+
+ subprocess.check_call([
121+
+ 'cmake', '--install', str(build_dir),
122+
+ ])
123+
+ env = os.environ.copy()
124+
+ env['ARROW_HOME'] = str(arrow_dist)
125+
+ env['CMAKE_PREFIX_PATH'] = str(arrow_dist)
126+
+ env['PYARROW_WITH_DATASET'] = '1'
127+
+ env['PYARROW_WITH_PARQUET'] = '1'
128+
+ env['PYARROW_WITH_PARQUET_ENCRYPTION'] = '1'
129+
+ env['PYARROW_WITH_GANDIVA'] = '1'
130+
+ env['PYARROW_BUNDLE_ARROW_CPP'] = '1'
131+
+ env['PYARROW_BUNDLE_CYTHON_CPP'] = '1'
132+
+ subprocess.run(
133+
+ [sys.executable, 'setup.py', 'bdist_wheel'],
134+
+ env=env,
135+
+ check=True,
136+
+ )
137+
+ wheels = list(Path('dist').glob('*.whl'))
138+
+ assert len(wheels) == 1, f"Expected 1 wheel, found {len(wheels)}"
139+
+ wheel = wheels[0]
140+
+ shutil.copyfile(wheel, wheel_directory / wheel.name)
141+
+ return str(wheel.name)
142+
diff --git a/pyproject.toml b/pyproject.toml
143+
index e7c95e0..abab83e 100644
144+
--- a/pyproject.toml
145+
+++ b/pyproject.toml
146+
@@ -29,7 +29,8 @@ requires = [
147+
"setuptools_scm[toml]>=8",
148+
"setuptools>=64",
149+
]
150+
-build-backend = "setuptools.build_meta"
151+
+build-backend = "pyarrow_build_backend"
152+
+backend-path = ["."]
153+
154+
[project]
155+
name = "pyarrow"
156+
diff --git a/MANIFEST.in b/MANIFEST.in
157+
index ef2043f..cb08a86 100644
158+
--- a/MANIFEST.in
159+
+++ b/MANIFEST.in
160+
@@ -1,6 +1,4 @@
161+
include README.md
162+
-include ../LICENSE.txt
163+
-include ../NOTICE.txt
164+
165+
global-include CMakeLists.txt
166+
graft pyarrow
167+
diff --git a/setup.cfg b/setup.cfg
168+
index ef2043f..cb08a86 100644
169+
--- a/setup.cfg
170+
+++ b/setup.cfg
171+
@@ -1,7 +1,6 @@
172+
[metadata]
173+
license_files =
174+
- ../LICENSE.txt
175+
- ../NOTICE.txt
176+
+ README.md
177+
178+
[build_sphinx]
179+
source-dir = doc/

0 commit comments

Comments
 (0)