Skip to content
Merged
7 changes: 0 additions & 7 deletions ci/scripts/python_wheel_windows_test.bat
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,5 @@ py -0p
@REM Validate wheel contents
%PYTHON_CMD% C:\arrow\ci\scripts\python_wheel_validate_contents.py --path C:\arrow\python\repaired_wheels || exit /B 1

@rem Download IANA Timezone Database for ORC C++
curl https://cygwin.osuosl.org/noarch/release/tzdata/tzdata-2024a-1.tar.xz --output tzdata.tar.xz || exit /B
mkdir %USERPROFILE%\Downloads\test\tzdata
arc unarchive tzdata.tar.xz %USERPROFILE%\Downloads\test\tzdata || exit /B
set TZDIR=%USERPROFILE%\Downloads\test\tzdata\usr\share\zoneinfo
dir %TZDIR%

@REM Execute unittest
%PYTHON_CMD% -m pytest -r s --pyargs pyarrow || exit /B 1
16 changes: 16 additions & 0 deletions python/pyarrow/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,22 @@
set_timezone_db_path(tzdata_set_path)


# GH-45295: For ORC, try to populate TZDIR env var from tzdata package resource
# path.
#
# Note this is a different kind of database than what we allow to be set by
# `PYARROW_TZDATA_PATH` and passed to set_timezone_db_path.
if sys.platform == 'win32':
if os.environ.get('TZDIR', None) is None:
from importlib import resources
try:
os.environ['TZDIR'] = os.path.join(resources.files('tzdata'), 'zoneinfo')
except ModuleNotFoundError:
print(
'Package "tzdata" not found. Not setting TZDIR environment variable.'
)


def pytest_addoption(parser):
# Create options to selectively enable test groups
def bool_env(name, default=None):
Expand Down
40 changes: 29 additions & 11 deletions python/pyarrow/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,20 @@ def _break_traceback_cycle_from_frame(frame):
refs = frame = this_frame = None


def _download_urllib(url, out_path):
from urllib.request import urlopen
with urlopen(url) as response:
with open(out_path, 'wb') as f:
f.write(response.read())


def _download_requests(url, out_path):
import requests
with requests.get(url) as response:
with open(out_path, 'wb') as f:
f.write(response.content)


def download_tzdata_on_windows():
r"""
Download and extract latest IANA timezone database into the
Expand All @@ -240,19 +254,23 @@ def download_tzdata_on_windows():

import tarfile

tzdata_url = "https://data.iana.org/time-zones/tzdata-latest.tar.gz"
tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata")
tzdata_compressed = os.path.join(tzdata_path, "tzdata.tar.gz")
tzdata_compressed_path = os.path.join(tzdata_path, "tzdata.tar.gz")
windows_zones_url = "https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml" # noqa
windows_zones_path = os.path.join(tzdata_path, "windowsZones.xml")
os.makedirs(tzdata_path, exist_ok=True)

from urllib.request import urlopen
with urlopen('https://data.iana.org/time-zones/tzdata-latest.tar.gz') as response:
with open(tzdata_compressed, 'wb') as f:
f.write(response.read())

assert os.path.exists(tzdata_compressed)
# Try to download the files with requests and then fall back to urllib. This
# works around possible issues in certain older environment (GH-45295)
try:
_download_requests(tzdata_url, tzdata_compressed_path)
_download_requests(windows_zones_url, windows_zones_path)
except ImportError:
_download_urllib(tzdata_url, tzdata_compressed_path)
_download_urllib(windows_zones_url, windows_zones_path)

tarfile.open(tzdata_compressed).extractall(tzdata_path)
assert os.path.exists(tzdata_compressed_path)
assert os.path.exists(windows_zones_path)

with urlopen('https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml') as response_zones: # noqa
with open(os.path.join(tzdata_path, "windowsZones.xml"), 'wb') as f:
f.write(response_zones.read())
tarfile.open(tzdata_compressed_path).extractall(tzdata_path)
1 change: 1 addition & 0 deletions python/requirements-wheel-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ hypothesis
pytest
pytz
pyuwsgi; sys.platform != 'win32' and python_version < '3.13'
requests; sys_platform == 'win32'
tzdata; sys_platform == 'win32'

# We generally test with the oldest numpy version that supports a given Python
Expand Down
Loading