Skip to content

Commit 9176297

Browse files
authored
Revert "SNOW-870225: remove vendored arrow (#1783)" (#1794)
1 parent ce4fdde commit 9176297

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+4575
-96
lines changed

.github/workflows/build_test.yml

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,68 @@ jobs:
328328
.coverage.py${{ env.shortver }}-lambda-ci
329329
junit.py${{ env.shortver }}-lambda-ci-dev.xml
330330
331+
test-vendoredarrow:
332+
name: Test Vendored Arrow ${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
333+
needs: build
334+
runs-on: ${{ matrix.os.image_name }}
335+
strategy:
336+
fail-fast: false
337+
matrix:
338+
os:
339+
- image_name: ubuntu-latest
340+
download_name: manylinux_x86_64
341+
- image_name: macos-latest
342+
download_name: macosx_x86_64
343+
- image_name: windows-2019
344+
download_name: win_amd64
345+
python-version: ["3.8", "3.11"]
346+
cloud-provider: [aws]
347+
steps:
348+
- uses: actions/checkout@v3
349+
- name: Set up Python
350+
uses: actions/setup-python@v4
351+
with:
352+
python-version: ${{ matrix.python-version }}
353+
- name: Display Python version
354+
run: python -c "import sys; print(sys.version)"
355+
- name: Setup parameters file
356+
shell: bash
357+
env:
358+
PARAMETERS_SECRET: ${{ secrets.PARAMETERS_SECRET }}
359+
run: |
360+
gpg --quiet --batch --yes --decrypt --passphrase="$PARAMETERS_SECRET" \
361+
.github/workflows/parameters/public/parameters_${{ matrix.cloud-provider }}.py.gpg > test/parameters.py
362+
- name: Download wheel(s)
363+
uses: actions/download-artifact@v3
364+
with:
365+
name: ${{ matrix.os.download_name }}_py${{ matrix.python-version }}
366+
path: dist
367+
- name: Show wheels downloaded
368+
run: ls -lh dist
369+
shell: bash
370+
- name: Upgrade setuptools, pip and wheel
371+
run: python -m pip install -U setuptools pip wheel
372+
- name: Install tox
373+
run: python -m pip install tox tox-external-wheels
374+
- name: Run tests
375+
run: python -m tox -e "py${PYTHON_VERSION/\./}-{extras,unit,integ,pandas,sso}-ci"
376+
env:
377+
PYTHON_VERSION: ${{ matrix.python-version }}
378+
cloud_provider: ${{ matrix.cloud-provider }}
379+
PYTEST_ADDOPTS: --color=yes --tb=short
380+
TOX_PARALLEL_NO_SPINNER: 1
381+
TEST_USING_VENDORED_ARROW: true
382+
shell: bash
383+
- name: Combine coverages
384+
run: python -m tox -e coverage --skip-missing-interpreters false
385+
shell: bash
386+
- uses: actions/upload-artifact@v3
387+
with:
388+
name: coverage_vendored_arrow_${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
389+
path: |
390+
.tox/.coverage
391+
.tox/coverage.xml
392+
331393
combine-coverage:
332394
if: ${{ success() || failure() }}
333395
name: Combine coverage

.pre-commit-config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ repos:
3636
exclude: >
3737
(?x)^(
3838
src/snowflake/connector/version.py|
39+
src/snowflake/connector/cpp|
3940
src/snowflake/connector/nanoarrow_cpp|
4041
)$
4142
args:
@@ -45,6 +46,7 @@ repos:
4546
name: insert-cpp-license
4647
files: >
4748
(?x)^(
49+
src/snowflake/connector/cpp/.*\.(cpp|hpp)|
4850
src/snowflake/connector/nanoarrow_cpp/.*\.(cpp|hpp)|
4951
)$
5052
args:

DESCRIPTION.md

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,6 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
99
# Release Notes
1010

1111

12-
- v3.5.0(TBD)
13-
14-
- Version 3.5.0 is the snowflake-connector-python purely built upon apache arrow-nanoarrow project.
15-
- Reduced the wheel size to ~1MB and installation size to ~5MB.
16-
- Removed a hard dependency on a specific version of pyarrow.
17-
- Deprecated the usage of the following class/variable/environment variable for the sake of pure nanoarrow converter:
18-
- Deprecated class `snowflake.connector.cursor.NanoarrowUsage`.
19-
- Deprecated environment variable `NANOARROW_USAGE`.
20-
- Deprecated module variable `snowflake.connector.cursor.NANOARROW_USAGE`.
21-
2212
- v3.4.0(November 03,2023)
2313

2414
- Added support for `use_logical_type` in `write_pandas`.

MANIFEST.in

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ include src/snowflake/connector/nanoarrow_cpp/ArrowIterator/LICENSE.txt
77
recursive-include src/snowflake/connector py.typed *.py *.pyx
88
recursive-include src/snowflake/connector/vendored LICENSE*
99

10+
recursive-include src/snowflake/connector/cpp *.cpp *.hpp
11+
recursive-include src/snowflake/connector/cpp *.c *.h
12+
exclude src/snowflake/connector/arrow_iterator.cpp
13+
exclude src/snowflake/connector/cpp/ArrowIterator/arrow_iterator.cpp
14+
1015
recursive-include src/snowflake/connector/nanoarrow_cpp *.cpp *.hpp
1116
recursive-include src/snowflake/connector/nanoarrow_cpp *.c *.h
1217
exclude src/snowflake/connector/nanoarrow_cpp/ArrowIterator/nanoarrow_arrow_iterator.cpp
@@ -23,6 +28,6 @@ prune ci
2328
prune benchmark
2429
prune test
2530
prune tested_requirements
26-
prune src/snowflake/connector/nanoarrow_cpp/scripts
31+
prune src/snowflake/connector/cpp/scripts
2732
prune __pycache__
2833
prune samples

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ requires = [
55
"setuptools>=40.6.0",
66
"wheel",
77
"cython",
8+
# Must be kept in sync with the `setup_requirements` in `setup.cfg`
9+
"pyarrow>=10.0.1,<10.1.0",
810
]
911

1012
[tool.cibuildwheel]

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,6 @@ development =
9393
pytzdata
9494
pandas =
9595
pandas>=1.0.0,<2.1.0
96-
pyarrow
96+
pyarrow>=10.0.1,<10.1.0
9797
secure-local-storage =
9898
keyring!=16.1.0,<25.0.0

setup.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import os
77
import sys
88
import warnings
9+
from shutil import copy
910

1011
from setuptools import Extension, setup
1112

@@ -46,6 +47,8 @@
4647
).lower() in ("y", "yes", "t", "true", "1", "on")
4748

4849
try:
50+
import numpy
51+
import pyarrow
4952
from Cython.Build import cythonize
5053
from Cython.Distutils import build_ext
5154

@@ -58,8 +61,15 @@
5861
_ABLE_TO_COMPILE_EXTENSIONS = False
5962

6063
if _ABLE_TO_COMPILE_EXTENSIONS and not SNOWFLAKE_DISABLE_COMPILE_ARROW_EXTENSIONS:
64+
pyarrow_version = tuple(int(x) for x in pyarrow.__version__.split("."))
6165
extensions = cythonize(
6266
[
67+
# vendored arrow iterator
68+
Extension(
69+
name="snowflake.connector.arrow_iterator",
70+
sources=[os.path.join(CONNECTOR_SRC_DIR, "arrow_iterator.pyx")],
71+
),
72+
# nanoarrow iterator
6373
Extension(
6474
name="snowflake.connector.nanoarrow_arrow_iterator",
6575
sources=[
@@ -68,15 +78,124 @@
6878
language="c++",
6979
),
7080
],
81+
compile_time_env=dict(ARROW_LESS_THAN_8=pyarrow_version < (8,)),
7182
)
7283

7384
class MyBuildExt(build_ext):
85+
# list of libraries that will be bundled with python connector,
86+
# this list should be carefully examined when pyarrow lib is
87+
# upgraded
88+
arrow_libs_to_copy = {
89+
"linux": [
90+
"libarrow.so.1000",
91+
"libarrow_dataset.so.1000",
92+
"libarrow_python.so.1000",
93+
"libparquet.so.1000",
94+
],
95+
"darwin": [
96+
"libarrow.1000.dylib",
97+
"libarrow_dataset.1000.dylib",
98+
"libarrow_python.1000.dylib",
99+
"libparquet.1000.dylib",
100+
],
101+
"win32": [
102+
"arrow.dll",
103+
"arrow_dataset.dll",
104+
"arrow_python.dll",
105+
"parquet.dll",
106+
],
107+
}
108+
109+
arrow_libs_to_link = {
110+
"linux": [
111+
"libarrow.so.1000",
112+
"libarrow_dataset.so.1000",
113+
"libarrow_python.so.1000",
114+
"libparquet.so.1000",
115+
],
116+
"darwin": [
117+
"libarrow.1000.dylib",
118+
"libarrow_dataset.1000.dylib",
119+
"libarrow_python.1000.dylib",
120+
"libparquet.1000.dylib",
121+
],
122+
"win32": [
123+
"arrow.lib",
124+
"arrow_dataset.lib",
125+
"arrow_python.lib",
126+
"parquet.lib",
127+
],
128+
}
129+
74130
def build_extension(self, ext):
75131
if options["debug"]:
76132
ext.extra_compile_args.append("-g")
77133
ext.extra_link_args.append("-g")
78134
current_dir = os.getcwd()
79135

136+
# vendored arrow extension
137+
if ext.name == "snowflake.connector.arrow_iterator":
138+
if not os.environ.get("SF_NO_COPY_ARROW_LIB", False):
139+
self._copy_arrow_lib()
140+
CPP_SRC_DIR = os.path.join(CONNECTOR_SRC_DIR, "cpp")
141+
ARROW_ITERATOR_SRC_DIR = os.path.join(CPP_SRC_DIR, "ArrowIterator")
142+
LOGGING_SRC_DIR = os.path.join(CPP_SRC_DIR, "Logging")
143+
144+
ext.sources += [
145+
os.path.join(ARROW_ITERATOR_SRC_DIR, "CArrowIterator.cpp"),
146+
os.path.join(ARROW_ITERATOR_SRC_DIR, "CArrowChunkIterator.cpp"),
147+
os.path.join(ARROW_ITERATOR_SRC_DIR, "CArrowTableIterator.cpp"),
148+
os.path.join(ARROW_ITERATOR_SRC_DIR, "SnowflakeType.cpp"),
149+
os.path.join(ARROW_ITERATOR_SRC_DIR, "BinaryConverter.cpp"),
150+
os.path.join(ARROW_ITERATOR_SRC_DIR, "BooleanConverter.cpp"),
151+
os.path.join(ARROW_ITERATOR_SRC_DIR, "DecimalConverter.cpp"),
152+
os.path.join(ARROW_ITERATOR_SRC_DIR, "DateConverter.cpp"),
153+
os.path.join(ARROW_ITERATOR_SRC_DIR, "FloatConverter.cpp"),
154+
os.path.join(ARROW_ITERATOR_SRC_DIR, "IntConverter.cpp"),
155+
os.path.join(ARROW_ITERATOR_SRC_DIR, "StringConverter.cpp"),
156+
os.path.join(ARROW_ITERATOR_SRC_DIR, "TimeConverter.cpp"),
157+
os.path.join(ARROW_ITERATOR_SRC_DIR, "TimeStampConverter.cpp"),
158+
os.path.join(ARROW_ITERATOR_SRC_DIR, "Python", "Common.cpp"),
159+
os.path.join(ARROW_ITERATOR_SRC_DIR, "Python", "Helpers.cpp"),
160+
os.path.join(ARROW_ITERATOR_SRC_DIR, "Util", "time.cpp"),
161+
LOGGING_SRC_DIR + "/logging.cpp",
162+
]
163+
ext.include_dirs.append(ARROW_ITERATOR_SRC_DIR)
164+
ext.include_dirs.append(LOGGING_SRC_DIR)
165+
166+
if sys.platform == "win32":
167+
if not any("/std" not in s for s in ext.extra_compile_args):
168+
ext.extra_compile_args.append("/std:c++17")
169+
ext.include_dirs.append(pyarrow.get_include())
170+
ext.include_dirs.append(numpy.get_include())
171+
elif sys.platform == "linux" or sys.platform == "darwin":
172+
ext.extra_compile_args.append("-isystem" + pyarrow.get_include())
173+
ext.extra_compile_args.append("-isystem" + numpy.get_include())
174+
if "std=" not in os.environ.get("CXXFLAGS", ""):
175+
ext.extra_compile_args.append("-std=c++17")
176+
ext.extra_compile_args.append("-D_GLIBCXX_USE_CXX11_ABI=0")
177+
if (
178+
sys.platform == "darwin"
179+
and "macosx-version-min" not in os.environ.get("CXXFLAGS", "")
180+
):
181+
ext.extra_compile_args.append("-mmacosx-version-min=10.13")
182+
183+
ext.library_dirs.append(
184+
os.path.join(current_dir, self.build_lib, "snowflake", "connector")
185+
)
186+
ext.extra_link_args += self._get_arrow_lib_as_linker_input()
187+
188+
# sys.platform for linux used to return with version suffix, (i.e. linux2, linux3)
189+
# After version 3.3, it will always be just 'linux'
190+
# https://docs.python.org/3/library/sys.html#sys.platform
191+
if sys.platform == "linux":
192+
ext.extra_link_args += ["-Wl,-rpath,$ORIGIN"]
193+
elif sys.platform == "darwin":
194+
# rpath,$ORIGIN only work on linux, did not work on darwin. use @loader_path instead
195+
# fyi, https://medium.com/@donblas/fun-with-rpath-otool-and-install-name-tool-e3e41ae86172
196+
ext.extra_link_args += ["-rpath", "@loader_path"]
197+
198+
# nanoarrow extension
80199
if ext.name == "snowflake.connector.nanoarrow_arrow_iterator":
81200
NANOARROW_CPP_SRC_DIR = os.path.join(CONNECTOR_SRC_DIR, "nanoarrow_cpp")
82201
NANOARROW_ARROW_ITERATOR_SRC_DIR = os.path.join(
@@ -181,6 +300,32 @@ def new__compile(obj, src: str, ext, cc_args, extra_postargs, pp_opts):
181300
finally:
182301
self.compiler._compile = original__compile
183302

303+
def _get_arrow_lib_dir(self):
304+
if "SF_ARROW_LIBDIR" in os.environ:
305+
return os.environ["SF_ARROW_LIBDIR"]
306+
return pyarrow.get_library_dirs()[0]
307+
308+
def _copy_arrow_lib(self):
309+
libs_to_bundle = self.arrow_libs_to_copy[sys.platform]
310+
311+
build_dir = os.path.join(self.build_lib, "snowflake", "connector")
312+
os.makedirs(build_dir, exist_ok=True)
313+
314+
for lib in libs_to_bundle:
315+
source = f"{self._get_arrow_lib_dir()}/{lib}"
316+
copy(source, build_dir)
317+
318+
def _get_arrow_lib_as_linker_input(self):
319+
link_lib = self.arrow_libs_to_link[sys.platform]
320+
ret = []
321+
322+
for lib in link_lib:
323+
source = f"{self._get_arrow_lib_dir()}/{lib}"
324+
assert os.path.exists(source)
325+
ret.append(source)
326+
327+
return ret
328+
184329
cmd_class = {"build_ext": MyBuildExt}
185330

186331
setup(

0 commit comments

Comments
 (0)