Skip to content

Commit 9f6ecc6

Browse files
authored
Support newer versions of PyArrow (#34892)
* Support newer versions of PyArrow * add context for lower bound that would let us remove the pyarrow-hotfix * smaller jump for pyarrow * run postcommit * tweak dynamic work rebalancing test * add comment explaining unit test behavior * yapf
1 parent 00f6e6e commit 9f6ecc6

File tree

6 files changed

+20
-6
lines changed

6 files changed

+20
-6
lines changed
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
{
22
"comment": "Modify this file in a trivial way to cause this test suite to run.",
3-
"https://github.com/apache/beam/pull/32440": "test new datastream runner for batch"
4-
"modification": 10
3+
"modification": 11
54
}
65

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
22
"comment": "Modify this file in a trivial way to cause this test suite to run",
3-
"modification": 1
3+
"modification": 0
44
}

sdks/python/apache_beam/io/parquetio_test.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,11 @@ def test_read_with_splitting_multiple_row_group(self):
487487
self._run_parquet_test(file_name, None, 10000, True, expected_result)
488488

489489
def test_dynamic_work_rebalancing(self):
490-
file_name = self._write_data(count=120, row_group_size=20)
490+
# This test depends on count being sufficiently large + the ratio of
491+
# count to row_group_size also being sufficiently large (but the required
492+
# ratio to pass varies for values of row_group_size and, somehow, the
493+
# version of pyarrow being tested against.)
494+
file_name = self._write_data(count=280, row_group_size=20)
491495
source = _create_parquet_source(file_name)
492496

493497
splits = [split for split in source.split(desired_bundle_size=float('inf'))]

sdks/python/setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,10 @@ def cythonize(*args, **kwargs):
145145
pyarrow_dependency = ['']
146146
else:
147147
pyarrow_dependency = [
148-
'pyarrow>=3.0.0,<17.0.0',
148+
'pyarrow>=3.0.0,<19.0.0',
149149
# NOTE(https://github.com/apache/beam/issues/29392): We can remove this
150150
# once Beam increases the pyarrow lower bound to a version that fixes CVE.
151+
# (lower bound >= 14.0.1)
151152
'pyarrow-hotfix<1'
152153
]
153154

sdks/python/test-suites/tox/py39/build.gradle

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,14 @@ toxTask "testPy39pyarrow-16", "py39-pyarrow-16", "${posargs}"
8585
test.dependsOn "testPy39pyarrow-16"
8686
postCommitPyDep.dependsOn "testPy39pyarrow-16"
8787

88+
toxTask "testPy39pyarrow-17", "py39-pyarrow-17", "${posargs}"
89+
test.dependsOn "testPy39pyarrow-17"
90+
postCommitPyDep.dependsOn "testPy39pyarrow-17"
91+
92+
toxTask "testPy39pyarrow-18", "py39-pyarrow-18", "${posargs}"
93+
test.dependsOn "testPy39pyarrow-18"
94+
postCommitPyDep.dependsOn "testPy39pyarrow-18"
95+
8896
// Create a test task for each supported minor version of pandas
8997
toxTask "testPy39pandas-14", "py39-pandas-14", "${posargs}"
9098
test.dependsOn "testPy39pandas-14"

sdks/python/tox.ini

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ extras = test
321321
commands =
322322
bash {toxinidir}/scripts/pytest_validates_runner.sh {envname} {toxinidir}/apache_beam/runners/portability/prism_runner_test.py {posargs}
323323

324-
[testenv:py{39,310}-pyarrow-{3,9,10,11,12,13,14,15,16}]
324+
[testenv:py{39,310}-pyarrow-{3,9,10,11,12,13,14,15,16,17,18}]
325325
deps =
326326
# As a courtesy to users, test against the oldest allowed version of Pyarrow.
327327
# We'd have to increase the pyarrow lower bound when Python 3.9 is deprecated.
@@ -339,6 +339,8 @@ deps =
339339
14: pyarrow>=14,<15
340340
15: pyarrow>=15,<16
341341
16: pyarrow>=16,<17
342+
17: pyarrow>=17,<18
343+
18: pyarrow>=18,<19
342344
numpy==1.26.4
343345
commands =
344346
# Log pyarrow and numpy version for debugging

0 commit comments

Comments
 (0)