11#! /bin/bash
22
3- cd ..
3+ # Go to home
4+ cd ~
5+
6+ # Clone desired Arrow version
7+ rm -rf arrow dist pyarrow*
8+ git clone \
9+ --branch apache-arrow-0.14.0 \
10+ --single-branch \
11+ https://github.com/apache/arrow.git
12+
13+ # Install dependencies
14+ yum install -y \
15+ boost-devel \
16+ jemalloc-devel \
17+ bison \
18+ flex \
19+ autoconf \
20+ python36-devel
21+ pip install six numpy pandas cython pytest cmake wheel
22+
23+ # Build Arrow
24+ export ARROW_HOME=$( pwd) /dist
25+ export LD_LIBRARY_PATH=$( pwd) /dist/lib:$LD_LIBRARY_PATH
26+ mkdir dist
27+ mkdir arrow/cpp/build
28+ pushd arrow/cpp/build
29+ cmake \
30+ -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
31+ -DCMAKE_INSTALL_LIBDIR=lib \
32+ -DARROW_FLIGHT=OFF \
33+ -DARROW_GANDIVA=OFF \
34+ -DARROW_ORC=OFF \
35+ -DARROW_PARQUET=ON \
36+ -DARROW_PYTHON=ON \
37+ -DARROW_PLASMA=OFF \
38+ -DARROW_BUILD_TESTS=ON \
39+ ..
40+ make -j4
41+ make install
42+ popd
43+
44+ # Build Pyarrow
45+ export PYARROW_WITH_FLIGHT=0
46+ export PYARROW_WITH_GANDIVA=0
47+ export PYARROW_WITH_ORC=0
48+ export PYARROW_WITH_PARQUET=1
49+ pushd arrow/python
50+ python setup.py build_ext \
51+ --build-type=release \
52+ --bundle-arrow-cpp \
53+ bdist_wheel
54+ cp dist/pyarrow-* .whl ~
55+ popd
56+
57+ # Extracting files
58+ pip install pyarrow-* whl -t pyarrow_files
59+
60+ # Go back to AWSWRANGLER directory
61+ cd /aws-data-wrangler/
462
563# Preparing directories
664mkdir -p dist
@@ -10,15 +68,12 @@ rm -f "dist/awswrangler_layer.zip"
1068
1169# Building
1270pip install . -t ./python
13- rm -rf ./python/* gandiva*
14- rm -rf ./python/pyarrow/* gandiva*
15- rm -rf ./python/pyarrow/* flight*
16- rm -rf ./python/pyarrow/* plasma*
17- rm -rf ./python/pyarrow/* orc*
71+ rm -rf python/pyarrow*
72+ cp -r ~ /pyarrow_files/pyarrow* python/
1873zip -r " awswrangler_layer.zip" ./python
1974mv " awswrangler_layer.zip" dist/
2075
21- # Cleaning up the directory again
76+ # # Cleaning up the directory again
2277rm -rf python
2378
2479cd building
0 commit comments