|
6 | 6 | import os
|
7 | 7 | import sys
|
8 | 8 | import warnings
|
| 9 | +from shutil import copy |
9 | 10 |
|
10 | 11 | from setuptools import Extension, setup
|
11 | 12 |
|
|
46 | 47 | ).lower() in ("y", "yes", "t", "true", "1", "on")
|
47 | 48 |
|
48 | 49 | try:
|
| 50 | + import numpy |
| 51 | + import pyarrow |
49 | 52 | from Cython.Build import cythonize
|
50 | 53 | from Cython.Distutils import build_ext
|
51 | 54 |
|
|
58 | 61 | _ABLE_TO_COMPILE_EXTENSIONS = False
|
59 | 62 |
|
60 | 63 | if _ABLE_TO_COMPILE_EXTENSIONS and not SNOWFLAKE_DISABLE_COMPILE_ARROW_EXTENSIONS:
|
| 64 | + pyarrow_version = tuple(int(x) for x in pyarrow.__version__.split(".")) |
61 | 65 | extensions = cythonize(
|
62 | 66 | [
|
| 67 | + # vendored arrow iterator |
| 68 | + Extension( |
| 69 | + name="snowflake.connector.arrow_iterator", |
| 70 | + sources=[os.path.join(CONNECTOR_SRC_DIR, "arrow_iterator.pyx")], |
| 71 | + ), |
| 72 | + # nanoarrow iterator |
63 | 73 | Extension(
|
64 | 74 | name="snowflake.connector.nanoarrow_arrow_iterator",
|
65 | 75 | sources=[
|
|
68 | 78 | language="c++",
|
69 | 79 | ),
|
70 | 80 | ],
|
| 81 | + compile_time_env=dict(ARROW_LESS_THAN_8=pyarrow_version < (8,)), |
71 | 82 | )
|
72 | 83 |
|
73 | 84 | class MyBuildExt(build_ext):
|
| 85 | + # list of libraries that will be bundled with python connector, |
| 86 | + # this list should be carefully examined when pyarrow lib is |
| 87 | + # upgraded |
| 88 | + arrow_libs_to_copy = { |
| 89 | + "linux": [ |
| 90 | + "libarrow.so.1000", |
| 91 | + "libarrow_dataset.so.1000", |
| 92 | + "libarrow_python.so.1000", |
| 93 | + "libparquet.so.1000", |
| 94 | + ], |
| 95 | + "darwin": [ |
| 96 | + "libarrow.1000.dylib", |
| 97 | + "libarrow_dataset.1000.dylib", |
| 98 | + "libarrow_python.1000.dylib", |
| 99 | + "libparquet.1000.dylib", |
| 100 | + ], |
| 101 | + "win32": [ |
| 102 | + "arrow.dll", |
| 103 | + "arrow_dataset.dll", |
| 104 | + "arrow_python.dll", |
| 105 | + "parquet.dll", |
| 106 | + ], |
| 107 | + } |
| 108 | + |
| 109 | + arrow_libs_to_link = { |
| 110 | + "linux": [ |
| 111 | + "libarrow.so.1000", |
| 112 | + "libarrow_dataset.so.1000", |
| 113 | + "libarrow_python.so.1000", |
| 114 | + "libparquet.so.1000", |
| 115 | + ], |
| 116 | + "darwin": [ |
| 117 | + "libarrow.1000.dylib", |
| 118 | + "libarrow_dataset.1000.dylib", |
| 119 | + "libarrow_python.1000.dylib", |
| 120 | + "libparquet.1000.dylib", |
| 121 | + ], |
| 122 | + "win32": [ |
| 123 | + "arrow.lib", |
| 124 | + "arrow_dataset.lib", |
| 125 | + "arrow_python.lib", |
| 126 | + "parquet.lib", |
| 127 | + ], |
| 128 | + } |
| 129 | + |
74 | 130 | def build_extension(self, ext):
|
75 | 131 | if options["debug"]:
|
76 | 132 | ext.extra_compile_args.append("-g")
|
77 | 133 | ext.extra_link_args.append("-g")
|
78 | 134 | current_dir = os.getcwd()
|
79 | 135 |
|
| 136 | + # vendored arrow extension |
| 137 | + if ext.name == "snowflake.connector.arrow_iterator": |
| 138 | + if not os.environ.get("SF_NO_COPY_ARROW_LIB", False): |
| 139 | + self._copy_arrow_lib() |
| 140 | + CPP_SRC_DIR = os.path.join(CONNECTOR_SRC_DIR, "cpp") |
| 141 | + ARROW_ITERATOR_SRC_DIR = os.path.join(CPP_SRC_DIR, "ArrowIterator") |
| 142 | + LOGGING_SRC_DIR = os.path.join(CPP_SRC_DIR, "Logging") |
| 143 | + |
| 144 | + ext.sources += [ |
| 145 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "CArrowIterator.cpp"), |
| 146 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "CArrowChunkIterator.cpp"), |
| 147 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "CArrowTableIterator.cpp"), |
| 148 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "SnowflakeType.cpp"), |
| 149 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "BinaryConverter.cpp"), |
| 150 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "BooleanConverter.cpp"), |
| 151 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "DecimalConverter.cpp"), |
| 152 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "DateConverter.cpp"), |
| 153 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "FloatConverter.cpp"), |
| 154 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "IntConverter.cpp"), |
| 155 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "StringConverter.cpp"), |
| 156 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "TimeConverter.cpp"), |
| 157 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "TimeStampConverter.cpp"), |
| 158 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "Python", "Common.cpp"), |
| 159 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "Python", "Helpers.cpp"), |
| 160 | + os.path.join(ARROW_ITERATOR_SRC_DIR, "Util", "time.cpp"), |
| 161 | + LOGGING_SRC_DIR + "/logging.cpp", |
| 162 | + ] |
| 163 | + ext.include_dirs.append(ARROW_ITERATOR_SRC_DIR) |
| 164 | + ext.include_dirs.append(LOGGING_SRC_DIR) |
| 165 | + |
| 166 | + if sys.platform == "win32": |
| 167 | + if not any("/std" not in s for s in ext.extra_compile_args): |
| 168 | + ext.extra_compile_args.append("/std:c++17") |
| 169 | + ext.include_dirs.append(pyarrow.get_include()) |
| 170 | + ext.include_dirs.append(numpy.get_include()) |
| 171 | + elif sys.platform == "linux" or sys.platform == "darwin": |
| 172 | + ext.extra_compile_args.append("-isystem" + pyarrow.get_include()) |
| 173 | + ext.extra_compile_args.append("-isystem" + numpy.get_include()) |
| 174 | + if "std=" not in os.environ.get("CXXFLAGS", ""): |
| 175 | + ext.extra_compile_args.append("-std=c++17") |
| 176 | + ext.extra_compile_args.append("-D_GLIBCXX_USE_CXX11_ABI=0") |
| 177 | + if ( |
| 178 | + sys.platform == "darwin" |
| 179 | + and "macosx-version-min" not in os.environ.get("CXXFLAGS", "") |
| 180 | + ): |
| 181 | + ext.extra_compile_args.append("-mmacosx-version-min=10.13") |
| 182 | + |
| 183 | + ext.library_dirs.append( |
| 184 | + os.path.join(current_dir, self.build_lib, "snowflake", "connector") |
| 185 | + ) |
| 186 | + ext.extra_link_args += self._get_arrow_lib_as_linker_input() |
| 187 | + |
| 188 | + # sys.platform for linux used to return with version suffix, (i.e. linux2, linux3) |
| 189 | + # After version 3.3, it will always be just 'linux' |
| 190 | + # https://docs.python.org/3/library/sys.html#sys.platform |
| 191 | + if sys.platform == "linux": |
| 192 | + ext.extra_link_args += ["-Wl,-rpath,$ORIGIN"] |
| 193 | + elif sys.platform == "darwin": |
| 194 | + # rpath,$ORIGIN only work on linux, did not work on darwin. use @loader_path instead |
| 195 | + # fyi, https://medium.com/@donblas/fun-with-rpath-otool-and-install-name-tool-e3e41ae86172 |
| 196 | + ext.extra_link_args += ["-rpath", "@loader_path"] |
| 197 | + |
| 198 | + # nanoarrow extension |
80 | 199 | if ext.name == "snowflake.connector.nanoarrow_arrow_iterator":
|
81 | 200 | NANOARROW_CPP_SRC_DIR = os.path.join(CONNECTOR_SRC_DIR, "nanoarrow_cpp")
|
82 | 201 | NANOARROW_ARROW_ITERATOR_SRC_DIR = os.path.join(
|
@@ -181,6 +300,32 @@ def new__compile(obj, src: str, ext, cc_args, extra_postargs, pp_opts):
|
181 | 300 | finally:
|
182 | 301 | self.compiler._compile = original__compile
|
183 | 302 |
|
| 303 | + def _get_arrow_lib_dir(self): |
| 304 | + if "SF_ARROW_LIBDIR" in os.environ: |
| 305 | + return os.environ["SF_ARROW_LIBDIR"] |
| 306 | + return pyarrow.get_library_dirs()[0] |
| 307 | + |
| 308 | + def _copy_arrow_lib(self): |
| 309 | + libs_to_bundle = self.arrow_libs_to_copy[sys.platform] |
| 310 | + |
| 311 | + build_dir = os.path.join(self.build_lib, "snowflake", "connector") |
| 312 | + os.makedirs(build_dir, exist_ok=True) |
| 313 | + |
| 314 | + for lib in libs_to_bundle: |
| 315 | + source = f"{self._get_arrow_lib_dir()}/{lib}" |
| 316 | + copy(source, build_dir) |
| 317 | + |
| 318 | + def _get_arrow_lib_as_linker_input(self): |
| 319 | + link_lib = self.arrow_libs_to_link[sys.platform] |
| 320 | + ret = [] |
| 321 | + |
| 322 | + for lib in link_lib: |
| 323 | + source = f"{self._get_arrow_lib_dir()}/{lib}" |
| 324 | + assert os.path.exists(source) |
| 325 | + ret.append(source) |
| 326 | + |
| 327 | + return ret |
| 328 | + |
184 | 329 | cmd_class = {"build_ext": MyBuildExt}
|
185 | 330 |
|
186 | 331 | setup(
|
|
0 commit comments