Skip to content

Commit ce4fdde

Browse files
authored
SNOW-870225: remove vendored arrow (#1783)
1 parent bbf8aac commit ce4fdde

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+96
-4575
lines changed

.github/workflows/build_test.yml

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -328,68 +328,6 @@ jobs:
328328
.coverage.py${{ env.shortver }}-lambda-ci
329329
junit.py${{ env.shortver }}-lambda-ci-dev.xml
330330
331-
test-vendoredarrow:
332-
name: Test Vendored Arrow ${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
333-
needs: build
334-
runs-on: ${{ matrix.os.image_name }}
335-
strategy:
336-
fail-fast: false
337-
matrix:
338-
os:
339-
- image_name: ubuntu-latest
340-
download_name: manylinux_x86_64
341-
- image_name: macos-latest
342-
download_name: macosx_x86_64
343-
- image_name: windows-2019
344-
download_name: win_amd64
345-
python-version: ["3.8", "3.11"]
346-
cloud-provider: [aws]
347-
steps:
348-
- uses: actions/checkout@v3
349-
- name: Set up Python
350-
uses: actions/setup-python@v4
351-
with:
352-
python-version: ${{ matrix.python-version }}
353-
- name: Display Python version
354-
run: python -c "import sys; print(sys.version)"
355-
- name: Setup parameters file
356-
shell: bash
357-
env:
358-
PARAMETERS_SECRET: ${{ secrets.PARAMETERS_SECRET }}
359-
run: |
360-
gpg --quiet --batch --yes --decrypt --passphrase="$PARAMETERS_SECRET" \
361-
.github/workflows/parameters/public/parameters_${{ matrix.cloud-provider }}.py.gpg > test/parameters.py
362-
- name: Download wheel(s)
363-
uses: actions/download-artifact@v3
364-
with:
365-
name: ${{ matrix.os.download_name }}_py${{ matrix.python-version }}
366-
path: dist
367-
- name: Show wheels downloaded
368-
run: ls -lh dist
369-
shell: bash
370-
- name: Upgrade setuptools, pip and wheel
371-
run: python -m pip install -U setuptools pip wheel
372-
- name: Install tox
373-
run: python -m pip install tox tox-external-wheels
374-
- name: Run tests
375-
run: python -m tox -e "py${PYTHON_VERSION/\./}-{extras,unit,integ,pandas,sso}-ci"
376-
env:
377-
PYTHON_VERSION: ${{ matrix.python-version }}
378-
cloud_provider: ${{ matrix.cloud-provider }}
379-
PYTEST_ADDOPTS: --color=yes --tb=short
380-
TOX_PARALLEL_NO_SPINNER: 1
381-
TEST_USING_VENDORED_ARROW: true
382-
shell: bash
383-
- name: Combine coverages
384-
run: python -m tox -e coverage --skip-missing-interpreters false
385-
shell: bash
386-
- uses: actions/upload-artifact@v3
387-
with:
388-
name: coverage_vendored_arrow_${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
389-
path: |
390-
.tox/.coverage
391-
.tox/coverage.xml
392-
393331
combine-coverage:
394332
if: ${{ success() || failure() }}
395333
name: Combine coverage

.pre-commit-config.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ repos:
3636
exclude: >
3737
(?x)^(
3838
src/snowflake/connector/version.py|
39-
src/snowflake/connector/cpp|
4039
src/snowflake/connector/nanoarrow_cpp|
4140
)$
4241
args:
@@ -46,7 +45,6 @@ repos:
4645
name: insert-cpp-license
4746
files: >
4847
(?x)^(
49-
src/snowflake/connector/cpp/.*\.(cpp|hpp)|
5048
src/snowflake/connector/nanoarrow_cpp/.*\.(cpp|hpp)|
5149
)$
5250
args:

DESCRIPTION.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,16 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
99
# Release Notes
1010

1111

12+
- v3.5.0(TBD)
13+
14+
- Version 3.5.0 is the snowflake-connector-python purely built upon apache arrow-nanoarrow project.
15+
- Reduced the wheel size to ~1MB and installation size to ~5MB.
16+
- Removed a hard dependency on a specific version of pyarrow.
17+
- Deprecated the usage of the following class/variable/environment variable for the sake of pure nanoarrow converter:
18+
- Deprecated class `snowflake.connector.cursor.NanoarrowUsage`.
19+
- Deprecated environment variable `NANOARROW_USAGE`.
20+
- Deprecated module variable `snowflake.connector.cursor.NANOARROW_USAGE`.
21+
1222
- v3.4.0(November 03,2023)
1323

1424
- Added support for `use_logical_type` in `write_pandas`.

MANIFEST.in

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,6 @@ include src/snowflake/connector/nanoarrow_cpp/ArrowIterator/LICENSE.txt
77
recursive-include src/snowflake/connector py.typed *.py *.pyx
88
recursive-include src/snowflake/connector/vendored LICENSE*
99

10-
recursive-include src/snowflake/connector/cpp *.cpp *.hpp
11-
recursive-include src/snowflake/connector/cpp *.c *.h
12-
exclude src/snowflake/connector/arrow_iterator.cpp
13-
exclude src/snowflake/connector/cpp/ArrowIterator/arrow_iterator.cpp
14-
1510
recursive-include src/snowflake/connector/nanoarrow_cpp *.cpp *.hpp
1611
recursive-include src/snowflake/connector/nanoarrow_cpp *.c *.h
1712
exclude src/snowflake/connector/nanoarrow_cpp/ArrowIterator/nanoarrow_arrow_iterator.cpp
@@ -28,6 +23,6 @@ prune ci
2823
prune benchmark
2924
prune test
3025
prune tested_requirements
31-
prune src/snowflake/connector/cpp/scripts
26+
prune src/snowflake/connector/nanoarrow_cpp/scripts
3227
prune __pycache__
3328
prune samples

pyproject.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ requires = [
55
"setuptools>=40.6.0",
66
"wheel",
77
"cython",
8-
# Must be kept in sync with the `setup_requirements` in `setup.cfg`
9-
"pyarrow>=10.0.1,<10.1.0",
108
]
119

1210
[tool.cibuildwheel]

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,6 @@ development =
9393
pytzdata
9494
pandas =
9595
pandas>=1.0.0,<2.1.0
96-
pyarrow>=10.0.1,<10.1.0
96+
pyarrow
9797
secure-local-storage =
9898
keyring!=16.1.0,<25.0.0

setup.py

Lines changed: 0 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import os
77
import sys
88
import warnings
9-
from shutil import copy
109

1110
from setuptools import Extension, setup
1211

@@ -47,8 +46,6 @@
4746
).lower() in ("y", "yes", "t", "true", "1", "on")
4847

4948
try:
50-
import numpy
51-
import pyarrow
5249
from Cython.Build import cythonize
5350
from Cython.Distutils import build_ext
5451

@@ -61,15 +58,8 @@
6158
_ABLE_TO_COMPILE_EXTENSIONS = False
6259

6360
if _ABLE_TO_COMPILE_EXTENSIONS and not SNOWFLAKE_DISABLE_COMPILE_ARROW_EXTENSIONS:
64-
pyarrow_version = tuple(int(x) for x in pyarrow.__version__.split("."))
6561
extensions = cythonize(
6662
[
67-
# vendored arrow iterator
68-
Extension(
69-
name="snowflake.connector.arrow_iterator",
70-
sources=[os.path.join(CONNECTOR_SRC_DIR, "arrow_iterator.pyx")],
71-
),
72-
# nanoarrow iterator
7363
Extension(
7464
name="snowflake.connector.nanoarrow_arrow_iterator",
7565
sources=[
@@ -78,124 +68,15 @@
7868
language="c++",
7969
),
8070
],
81-
compile_time_env=dict(ARROW_LESS_THAN_8=pyarrow_version < (8,)),
8271
)
8372

8473
class MyBuildExt(build_ext):
85-
# list of libraries that will be bundled with python connector,
86-
# this list should be carefully examined when pyarrow lib is
87-
# upgraded
88-
arrow_libs_to_copy = {
89-
"linux": [
90-
"libarrow.so.1000",
91-
"libarrow_dataset.so.1000",
92-
"libarrow_python.so.1000",
93-
"libparquet.so.1000",
94-
],
95-
"darwin": [
96-
"libarrow.1000.dylib",
97-
"libarrow_dataset.1000.dylib",
98-
"libarrow_python.1000.dylib",
99-
"libparquet.1000.dylib",
100-
],
101-
"win32": [
102-
"arrow.dll",
103-
"arrow_dataset.dll",
104-
"arrow_python.dll",
105-
"parquet.dll",
106-
],
107-
}
108-
109-
arrow_libs_to_link = {
110-
"linux": [
111-
"libarrow.so.1000",
112-
"libarrow_dataset.so.1000",
113-
"libarrow_python.so.1000",
114-
"libparquet.so.1000",
115-
],
116-
"darwin": [
117-
"libarrow.1000.dylib",
118-
"libarrow_dataset.1000.dylib",
119-
"libarrow_python.1000.dylib",
120-
"libparquet.1000.dylib",
121-
],
122-
"win32": [
123-
"arrow.lib",
124-
"arrow_dataset.lib",
125-
"arrow_python.lib",
126-
"parquet.lib",
127-
],
128-
}
129-
13074
def build_extension(self, ext):
13175
if options["debug"]:
13276
ext.extra_compile_args.append("-g")
13377
ext.extra_link_args.append("-g")
13478
current_dir = os.getcwd()
13579

136-
# vendored arrow extension
137-
if ext.name == "snowflake.connector.arrow_iterator":
138-
if not os.environ.get("SF_NO_COPY_ARROW_LIB", False):
139-
self._copy_arrow_lib()
140-
CPP_SRC_DIR = os.path.join(CONNECTOR_SRC_DIR, "cpp")
141-
ARROW_ITERATOR_SRC_DIR = os.path.join(CPP_SRC_DIR, "ArrowIterator")
142-
LOGGING_SRC_DIR = os.path.join(CPP_SRC_DIR, "Logging")
143-
144-
ext.sources += [
145-
os.path.join(ARROW_ITERATOR_SRC_DIR, "CArrowIterator.cpp"),
146-
os.path.join(ARROW_ITERATOR_SRC_DIR, "CArrowChunkIterator.cpp"),
147-
os.path.join(ARROW_ITERATOR_SRC_DIR, "CArrowTableIterator.cpp"),
148-
os.path.join(ARROW_ITERATOR_SRC_DIR, "SnowflakeType.cpp"),
149-
os.path.join(ARROW_ITERATOR_SRC_DIR, "BinaryConverter.cpp"),
150-
os.path.join(ARROW_ITERATOR_SRC_DIR, "BooleanConverter.cpp"),
151-
os.path.join(ARROW_ITERATOR_SRC_DIR, "DecimalConverter.cpp"),
152-
os.path.join(ARROW_ITERATOR_SRC_DIR, "DateConverter.cpp"),
153-
os.path.join(ARROW_ITERATOR_SRC_DIR, "FloatConverter.cpp"),
154-
os.path.join(ARROW_ITERATOR_SRC_DIR, "IntConverter.cpp"),
155-
os.path.join(ARROW_ITERATOR_SRC_DIR, "StringConverter.cpp"),
156-
os.path.join(ARROW_ITERATOR_SRC_DIR, "TimeConverter.cpp"),
157-
os.path.join(ARROW_ITERATOR_SRC_DIR, "TimeStampConverter.cpp"),
158-
os.path.join(ARROW_ITERATOR_SRC_DIR, "Python", "Common.cpp"),
159-
os.path.join(ARROW_ITERATOR_SRC_DIR, "Python", "Helpers.cpp"),
160-
os.path.join(ARROW_ITERATOR_SRC_DIR, "Util", "time.cpp"),
161-
LOGGING_SRC_DIR + "/logging.cpp",
162-
]
163-
ext.include_dirs.append(ARROW_ITERATOR_SRC_DIR)
164-
ext.include_dirs.append(LOGGING_SRC_DIR)
165-
166-
if sys.platform == "win32":
167-
if not any("/std" not in s for s in ext.extra_compile_args):
168-
ext.extra_compile_args.append("/std:c++17")
169-
ext.include_dirs.append(pyarrow.get_include())
170-
ext.include_dirs.append(numpy.get_include())
171-
elif sys.platform == "linux" or sys.platform == "darwin":
172-
ext.extra_compile_args.append("-isystem" + pyarrow.get_include())
173-
ext.extra_compile_args.append("-isystem" + numpy.get_include())
174-
if "std=" not in os.environ.get("CXXFLAGS", ""):
175-
ext.extra_compile_args.append("-std=c++17")
176-
ext.extra_compile_args.append("-D_GLIBCXX_USE_CXX11_ABI=0")
177-
if (
178-
sys.platform == "darwin"
179-
and "macosx-version-min" not in os.environ.get("CXXFLAGS", "")
180-
):
181-
ext.extra_compile_args.append("-mmacosx-version-min=10.13")
182-
183-
ext.library_dirs.append(
184-
os.path.join(current_dir, self.build_lib, "snowflake", "connector")
185-
)
186-
ext.extra_link_args += self._get_arrow_lib_as_linker_input()
187-
188-
# sys.platform for linux used to return with version suffix, (i.e. linux2, linux3)
189-
# After version 3.3, it will always be just 'linux'
190-
# https://docs.python.org/3/library/sys.html#sys.platform
191-
if sys.platform == "linux":
192-
ext.extra_link_args += ["-Wl,-rpath,$ORIGIN"]
193-
elif sys.platform == "darwin":
194-
# rpath,$ORIGIN only work on linux, did not work on darwin. use @loader_path instead
195-
# fyi, https://medium.com/@donblas/fun-with-rpath-otool-and-install-name-tool-e3e41ae86172
196-
ext.extra_link_args += ["-rpath", "@loader_path"]
197-
198-
# nanoarrow extension
19980
if ext.name == "snowflake.connector.nanoarrow_arrow_iterator":
20081
NANOARROW_CPP_SRC_DIR = os.path.join(CONNECTOR_SRC_DIR, "nanoarrow_cpp")
20182
NANOARROW_ARROW_ITERATOR_SRC_DIR = os.path.join(
@@ -300,32 +181,6 @@ def new__compile(obj, src: str, ext, cc_args, extra_postargs, pp_opts):
300181
finally:
301182
self.compiler._compile = original__compile
302183

303-
def _get_arrow_lib_dir(self):
304-
if "SF_ARROW_LIBDIR" in os.environ:
305-
return os.environ["SF_ARROW_LIBDIR"]
306-
return pyarrow.get_library_dirs()[0]
307-
308-
def _copy_arrow_lib(self):
309-
libs_to_bundle = self.arrow_libs_to_copy[sys.platform]
310-
311-
build_dir = os.path.join(self.build_lib, "snowflake", "connector")
312-
os.makedirs(build_dir, exist_ok=True)
313-
314-
for lib in libs_to_bundle:
315-
source = f"{self._get_arrow_lib_dir()}/{lib}"
316-
copy(source, build_dir)
317-
318-
def _get_arrow_lib_as_linker_input(self):
319-
link_lib = self.arrow_libs_to_link[sys.platform]
320-
ret = []
321-
322-
for lib in link_lib:
323-
source = f"{self._get_arrow_lib_dir()}/{lib}"
324-
assert os.path.exists(source)
325-
ret.append(source)
326-
327-
return ret
328-
329184
cmd_class = {"build_ext": MyBuildExt}
330185

331186
setup(

0 commit comments

Comments
 (0)