diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index fb533144dfb..864b54e8be1 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -21,7 +21,8 @@ boost-cpp>=1.68.0 brotli bzip2 c-ares -cmake +# Required due to the AWS SDK C++ pin +cmake<3.22 gflags glog gmock>=1.10.0 diff --git a/ci/conda_env_gandiva.txt b/ci/conda_env_gandiva.txt index 024b9fe74c1..217936e2c94 100644 --- a/ci/conda_env_gandiva.txt +++ b/ci/conda_env_gandiva.txt @@ -15,5 +15,5 @@ # specific language governing permissions and limitations # under the License. -clang=11 -llvmdev=11 +clang>=11 +llvmdev>=11 diff --git a/ci/docker/almalinux-8-verify-rc.dockerfile b/ci/docker/almalinux-8-verify-rc.dockerfile new file mode 100644 index 00000000000..94e8a1133db --- /dev/null +++ b/ci/docker/almalinux-8-verify-rc.dockerfile @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +FROM ${arch}/almalinux:8 + +# A script to install dependencies required for release +# verification Red Hat Enterprise Linux 8 clones in particular +# on AlmaLinux 8 and Rocky Linux 8 + +RUN dnf -y install 'dnf-command(config-manager)' && \ + dnf config-manager --set-enabled powertools && \ + dnf -y update && \ + dnf -y module disable nodejs && \ + dnf -y module enable nodejs:16 && \ + dnf -y module disable ruby && \ + dnf -y module enable ruby:2.7 && \ + dnf -y groupinstall "Development Tools" && \ + dnf -y install \ + cmake \ + git \ + gobject-introspection-devel \ + java-1.8.0-openjdk-devel \ + libcurl-devel \ + llvm-devel \ + llvm-toolset \ + maven \ + ncurses-devel \ + ninja-build \ + nodejs \ + openssl-devel \ + python38-devel \ + python38-pip \ + ruby-devel \ + sqlite-devel \ + wget \ + which && \ + dnf -y clean all + +RUN python3 -m pip install -U pip && \ + alternatives --set python /usr/bin/python3 + +RUN npm install -g yarn diff --git a/ci/docker/ubuntu-18.04-verify-rc.dockerfile b/ci/docker/ubuntu-18.04-verify-rc.dockerfile new file mode 100644 index 00000000000..88a74b60034 --- /dev/null +++ b/ci/docker/ubuntu-18.04-verify-rc.dockerfile @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +FROM ${arch}/ubuntu:18.04 + +ENV DEBIAN_FRONTEND=noninteractive + +ARG llvm=12 +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + gnupg \ + wget && \ + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ + echo "deb https://apt.llvm.org/bionic/ llvm-toolchain-bionic-${llvm} main" > \ + /etc/apt/sources.list.d/llvm.list && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + build-essential \ + clang \ + cmake \ + curl \ + git \ + libcurl4-openssl-dev \ + libgirepository1.0-dev \ + libglib2.0-dev \ + libsqlite3-dev \ + libssl-dev \ + llvm-${llvm}-dev \ + maven \ + ninja-build \ + openjdk-11-jdk \ + pkg-config \ + python3-pip \ + python3.8-dev \ + python3.8-venv \ + ruby-dev \ + wget \ + tzdata && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +RUN python3.8 -m pip install -U pip && \ + update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1 diff --git a/ci/docker/ubuntu-20.04-verify-rc.dockerfile b/ci/docker/ubuntu-20.04-verify-rc.dockerfile new file mode 100644 index 00000000000..9dd6f10f80c --- /dev/null +++ b/ci/docker/ubuntu-20.04-verify-rc.dockerfile @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +FROM ${arch}/ubuntu:20.04 + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + build-essential \ + clang \ + cmake \ + curl \ + git \ + libcurl4-openssl-dev \ + libgirepository1.0-dev \ + libglib2.0-dev \ + libsqlite3-dev \ + libssl-dev \ + llvm-dev \ + maven \ + ninja-build \ + openjdk-11-jdk \ + pkg-config \ + python3-dev \ + python3-pip \ + python3-venv \ + ruby-dev \ + wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* diff --git a/cpp/cmake_modules/FindLLVMAlt.cmake b/cpp/cmake_modules/FindLLVMAlt.cmake index 380f2d47c72..e96e89850ae 100644 --- a/cpp/cmake_modules/FindLLVMAlt.cmake +++ b/cpp/cmake_modules/FindLLVMAlt.cmake @@ -19,20 +19,41 @@ # # find_package(LLVMAlt) -set(LLVM_HINTS ${LLVM_ROOT} ${LLVM_DIR} /usr/lib /usr/share) -if(LLVM_BREW_PREFIX) - list(APPEND LLVM_HINTS ${LLVM_BREW_PREFIX}) +if(DEFINED LLVM_ROOT) + # if llvm source is set to conda then prefer conda llvm over system llvm even + # if the system one is newer + foreach(ARROW_LLVM_VERSION ${ARROW_LLVM_VERSIONS}) + find_package(LLVM + ${ARROW_LLVM_VERSION} + CONFIG + NO_DEFAULT_PATH + HINTS + ${LLVM_ROOT}) + if(LLVM_FOUND) + break() + endif() + endforeach() endif() -foreach(ARROW_LLVM_VERSION ${ARROW_LLVM_VERSIONS}) - find_package(LLVM - ${ARROW_LLVM_VERSION} - CONFIG - HINTS - ${LLVM_HINTS}) - if(LLVM_FOUND) - break() + +if(NOT LLVM_FOUND) + set(LLVM_HINTS ${LLVM_ROOT} ${LLVM_DIR} /usr/lib /usr/share) + if(LLVM_BREW_PREFIX) + list(APPEND LLVM_HINTS ${LLVM_BREW_PREFIX}) endif() -endforeach() + + foreach(HINT ${LLVM_HINTS}) + foreach(ARROW_LLVM_VERSION ${ARROW_LLVM_VERSIONS}) + find_package(LLVM + ${ARROW_LLVM_VERSION} + CONFIG + HINTS + ${HINT}) + if(LLVM_FOUND) + break() + endif() + endforeach() + endforeach() +endif() if(LLVM_FOUND) # Find the libraries that correspond to the LLVM components diff --git a/dev/release/VERIFY.md b/dev/release/VERIFY.md index 411e4492737..433e6fcb832 100644 --- a/dev/release/VERIFY.md +++ b/dev/release/VERIFY.md @@ -94,8 +94,10 @@ download of a dependency from the internet. It is possible to run specific verification tests by setting environment variables, for example ```console -% TEST_DEFAULT=0 TEST_GO=1 dev/release/verify-release-candidate.sh source 6.0.0 3 -% TEST_YUM=1 dev/release/verify-release-candidate.sh binaries 6.0.0 3 +% TEST_DEFAULT=0 TEST_SOURCE=1 dev/release/verify-release-candidate.sh 6.0.0 3 +% TEST_DEFAULT=0 TEST_BINARIES=1 dev/release/verify-release-candidate.sh 6.0.0 3 +% TEST_DEFAULT=0 TEST_GO=1 dev/release/verify-release-candidate.sh 6.0.0 3 +% TEST_DEFAULT=0 TEST_YUM=1 dev/release/verify-release-candidate.sh 6.0.0 3 ``` It is also possible to use diff --git a/dev/release/setup-rhel-rebuilds.sh b/dev/release/setup-rhel-rebuilds.sh index ef794cdb85a..cb9cf0a6c0a 100755 --- a/dev/release/setup-rhel-rebuilds.sh +++ b/dev/release/setup-rhel-rebuilds.sh @@ -21,8 +21,7 @@ # verification Red Hat Enterprise Linux 8 clones in particular # on AlmaLinux 8 and Rocky Linux 8 -dnf -y install \ - 'dnf-command(config-manager)' +dnf -y install 'dnf-command(config-manager)' dnf config-manager --set-enabled powertools dnf -y update dnf -y module disable nodejs @@ -36,20 +35,15 @@ dnf -y install \ gobject-introspection-devel \ java-1.8.0-openjdk-devel \ libcurl-devel \ - libcurl-devel \ llvm-devel \ llvm-toolset \ maven \ ncurses-devel \ - ncurses-devel \ - ninja-build \ ninja-build \ nodejs \ openssl-devel \ - python3-devel \ - python3-devel \ - python3-pip \ - python3-pip \ + python38-devel \ + python38-pip \ ruby-devel \ sqlite-devel \ wget \ diff --git a/dev/release/setup-ubuntu.sh b/dev/release/setup-ubuntu.sh index 7bca67eedbb..67dd5f7d478 100755 --- a/dev/release/setup-ubuntu.sh +++ b/dev/release/setup-ubuntu.sh @@ -25,6 +25,7 @@ apt-get -y install \ cmake \ curl \ git \ + libcurl4-openssl-dev \ libgirepository1.0-dev \ libglib2.0-dev \ libsqlite3-dev \ @@ -35,5 +36,6 @@ apt-get -y install \ openjdk-11-jdk \ pkg-config \ python3-pip \ + python3-venv \ ruby-dev \ wget diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat index 6d2cfacedbd..5e8ff49927b 100644 --- a/dev/release/verify-release-candidate.bat +++ b/dev/release/verify-release-candidate.bat @@ -20,6 +20,8 @@ @echo on +setlocal ENABLEDELAYEDEXPANSION + if not exist "C:\tmp\" mkdir C:\tmp if exist "C:\tmp\arrow-verify-release" rd C:\tmp\arrow-verify-release /s /q if not exist "C:\tmp\arrow-verify-release" mkdir C:\tmp\arrow-verify-release @@ -27,23 +29,40 @@ if not exist "C:\tmp\arrow-verify-release" mkdir C:\tmp\arrow-verify-release set _VERIFICATION_DIR=C:\tmp\arrow-verify-release set _VERIFICATION_DIR_UNIX=C:/tmp/arrow-verify-release set _VERIFICATION_CONDA_ENV=%_VERIFICATION_DIR%\conda-env -set _DIST_URL=https://dist.apache.org/repos/dist/dev/arrow -set _TARBALL=apache-arrow-%1.tar.gz -set ARROW_SOURCE=%_VERIFICATION_DIR%\apache-arrow-%1 set INSTALL_DIR=%_VERIFICATION_DIR%\install -@rem Requires GNU Wget for Windows -wget --no-check-certificate -O %_TARBALL% %_DIST_URL%/apache-arrow-%1-rc%2/%_TARBALL% || exit /B 1 - -tar xf %_TARBALL% -C %_VERIFICATION_DIR_UNIX% - +set VERSION=%1 +set RC_NUMBER=%2 +set TARBALL_NAME=apache-arrow-%VERSION%.tar.gz +set TARBALL_URL=https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-%VERSION%-rc%RC_NUMBER%/%TARBALL_NAME% + +if "%VERSION%"=="" ( + set ARROW_SOURCE=%~dp0..\..\ +) else ( + set ARROW_SOURCE=%_VERIFICATION_DIR%\apache-arrow-%1 + if "%RC_NUMBER%"=="" ( + @rem verify a specific git revision + git clone https://github.com/apache/arrow.git !ARROW_SOURCE! + git -C !ARROW_SOURCE! checkout %VERSION% + ) else ( + @rem verify a release candidate tarball + @rem Requires GNU Wget for Windows + wget --no-check-certificate -O %TARBALL_NAME% %TARBALL_URL% || exit /B 1 + tar xf %TARBALL_NAME% -C %_VERIFICATION_DIR_UNIX% + ) + git clone https://github.com/apache/arrow-testing.git !ARROW_SOURCE!\testing + git clone https://github.com/apache/parquet-testing.git !ARROW_SOURCE!\cpp\submodules\parquet-testing +) + +set ARROW_TEST_DATA=!ARROW_SOURCE!\testing\data +set PARQUET_TEST_DATA=!ARROW_SOURCE!\cpp\submodules\parquet-testing\data set PYTHON=3.8 @rem Using call with conda.bat seems necessary to avoid terminating the batch @rem script execution call conda create --no-shortcuts -c conda-forge -f -q -y -p %_VERIFICATION_CONDA_ENV% ^ - --file=ci\conda_env_cpp.txt ^ - --file=ci\conda_env_python.txt ^ + --file=!ARROW_SOURCE!\ci\conda_env_cpp.txt ^ + --file=!ARROW_SOURCE!\ci\conda_env_python.txt ^ git ^ python=%PYTHON% ^ || exit /B 1 @@ -57,15 +76,15 @@ call conda remove -y gtest gmock || exit /B 1 set GENERATOR=Visual Studio 15 2017 Win64 set CONFIGURATION=release -pushd %ARROW_SOURCE% +pushd !ARROW_SOURCE! set ARROW_HOME=%INSTALL_DIR% set PARQUET_HOME=%INSTALL_DIR% set PATH=%INSTALL_DIR%\bin;%PATH% @rem Build and test Arrow C++ libraries -mkdir %ARROW_SOURCE%\cpp\build -pushd %ARROW_SOURCE%\cpp\build +mkdir !ARROW_SOURCE!\cpp\build +pushd !ARROW_SOURCE!\cpp\build @rem This is the path for Visual Studio Community 2017 call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\VsDevCmd.bat" -arch=amd64 @@ -97,13 +116,6 @@ cmake -G "%GENERATOR%" ^ cmake --build . --target INSTALL --config Release || exit /B 1 -@rem Get testing datasets for Parquet unit tests -git clone https://github.com/apache/parquet-testing.git %_VERIFICATION_DIR%\parquet-testing -set PARQUET_TEST_DATA=%_VERIFICATION_DIR%\parquet-testing\data - -git clone https://github.com/apache/arrow-testing.git %_VERIFICATION_DIR%\arrow-testing -set ARROW_TEST_DATA=%_VERIFICATION_DIR%\arrow-testing\data - @rem Needed so python-test.exe works set PYTHONPATH_ORIGINAL=%PYTHONPATH% set PYTHONPATH=%CONDA_PREFIX%\Lib;%CONDA_PREFIX%\Lib\site-packages;%CONDA_PREFIX%\DLLs;%CONDA_PREFIX%;%PYTHONPATH% @@ -112,7 +124,7 @@ set PYTHONPATH=%PYTHONPATH_ORIGINAL% popd @rem Build and import pyarrow -pushd %ARROW_SOURCE%\python +pushd !ARROW_SOURCE!\python pip install -r requirements-test.txt || exit /B 1 diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 37bff89a12f..ae1a9424aa3 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -34,47 +34,70 @@ # a directory where the temporary files should be placed to, note that this # directory is not cleaned up automatically. +set -e +set -o pipefail + +if [ ${VERBOSE:-0} -gt 0 ]; then + set -x +fi + case $# in - 3) ARTIFACT="$1" - VERSION="$2" - RC_NUMBER="$3" - case $ARTIFACT in - source|binaries|wheels|jars) ;; - *) echo "Invalid argument: '${ARTIFACT}', valid options are \ -'source', 'binaries', 'wheels', or 'jars'" - exit 1 - ;; - esac + 0) VERSION="HEAD" + SOURCE_KIND="local" + TEST_BINARIES=0 ;; - *) echo "Usage: $0 source|binaries|wheels|jars X.Y.Z RC_NUMBER" + 1) VERSION="$1" + SOURCE_KIND="git" + TEST_BINARIES=0 + ;; + 2) VERSION="$1" + RC_NUMBER="$2" + SOURCE_KIND="tarball" + ;; + *) echo "Usage:" + echo " Verify release candidate:" + echo " $0 X.Y.Z RC_NUMBER" + echo " Verify only the source distribution:" + echo " TEST_DEFAULT=0 TEST_SOURCE=1 $0 X.Y.Z RC_NUMBER" + echo " Verify only the binary distributions:" + echo " TEST_DEFAULT=0 TEST_BINARIES=1 $0 X.Y.Z RC_NUMBER" + echo " Verify only the wheels:" + echo " TEST_DEFAULT=0 TEST_WHEELS=1 $0 X.Y.Z RC_NUMBER" + echo "" + echo " Run the source verification tasks on a remote git revision:" + echo " $0 GIT-REF" + echo " Run the source verification tasks on this arrow checkout:" + echo " $0" exit 1 ;; esac -set -e -set -x -set -o pipefail - SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +show_header() { + echo "" + printf '=%.0s' $(seq ${#1}); printf '\n' + echo "${1}" + printf '=%.0s' $(seq ${#1}); printf '\n' +} + +show_info() { + echo "└ ${1}" +} + detect_cuda() { + show_header "Detect CUDA" + if ! (which nvcc && which nvidia-smi) > /dev/null; then + echo "No devices found." return 1 fi local n_gpus=$(nvidia-smi --list-gpus | wc -l) + echo "Found ${n_gpus} GPU." return $((${n_gpus} < 1)) } -# Build options for the C++ library - -if [ -z "${ARROW_CUDA:-}" ] && detect_cuda; then - ARROW_CUDA=ON -fi -: ${ARROW_CUDA:=OFF} -: ${ARROW_FLIGHT:=ON} -: ${ARROW_GANDIVA:=ON} - ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow' download_dist_file() { @@ -91,8 +114,13 @@ download_rc_file() { } import_gpg_keys() { + if [ "${GPGKEYS_ALREADY_IMPORTED:-0}" -gt 0 ]; then + return 0 + fi download_dist_file KEYS gpg --import KEYS + + GPGKEYS_ALREADY_IMPORTED=1 } if type shasum >/dev/null 2>&1; then @@ -104,6 +132,8 @@ else fi fetch_archive() { + import_gpg_keys + local dist_name=$1 download_rc_file ${dist_name}.tar.gz download_rc_file ${dist_name}.tar.gz.asc @@ -115,6 +145,8 @@ fetch_archive() { } verify_dir_artifact_signatures() { + import_gpg_keys + # verify the signature and the checksums of each artifact find $1 -name '*.asc' | while read sigfile; do artifact=${sigfile/.asc/} @@ -135,16 +167,21 @@ verify_dir_artifact_signatures() { } test_binary() { + show_header "Testing binary artifacts" + maybe_setup_conda || exit 1 + local download_dir=binaries mkdir -p ${download_dir} - ${PYTHON:-python} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ + ${PYTHON:-python3} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ --dest=${download_dir} verify_dir_artifact_signatures ${download_dir} } test_apt() { + show_header "Testing APT packages" + for target in "debian:buster" \ "arm64v8/debian:buster" \ "debian:bullseye" \ @@ -188,6 +225,8 @@ test_apt() { } test_yum() { + show_header "Testing YUM packages" + for target in "almalinux:8" \ "arm64v8/almalinux:8" \ "amazonlinux:2" \ @@ -212,7 +251,6 @@ test_yum() { done } - setup_tempdir() { cleanup() { if [ "${TEST_SUCCESS}" = "yes" ]; then @@ -222,128 +260,67 @@ setup_tempdir() { fi } + show_header "Creating temporary directory" + if [ -z "${ARROW_TMPDIR}" ]; then # clean up automatically if ARROW_TMPDIR is not defined - ARROW_TMPDIR=$(mktemp -d -t "$1.XXXXX") + ARROW_TMPDIR=$(mktemp -d -t "arrow-${VERSION}.XXXXX") trap cleanup EXIT else # don't clean up automatically mkdir -p "${ARROW_TMPDIR}" fi -} - -setup_miniconda() { - # Setup short-lived miniconda for Python and integration tests - OS="$(uname)" - if [ "${OS}" == "Darwin" ]; then - OS=MacOSX - fi - ARCH="$(uname -m)" - MINICONDA_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-${OS}-${ARCH}.sh" - - MINICONDA=$PWD/test-miniconda - - if [ ! -d "${MINICONDA}" ]; then - # Setup miniconda only if the directory doesn't exist yet - wget -O miniconda.sh $MINICONDA_URL - bash miniconda.sh -b -p $MINICONDA - rm -f miniconda.sh - fi - echo "Installed miniconda at ${MINICONDA}" - . $MINICONDA/etc/profile.d/conda.sh - conda activate base - - # Dependencies from python/requirements-build.txt and python/requirements-test.txt - # with the exception of oldest-supported-numpy since it doesn't have a conda package - mamba create -n arrow-test -y \ - cffi \ - cython \ - hypothesis \ - numpy \ - pandas \ - pytest \ - pytest-lazy-fixture \ - python=3.8 \ - pytz \ - setuptools \ - setuptools_scm - - conda activate arrow-test - echo "Using conda environment ${CONDA_PREFIX}" + echo "Working in sandbox ${ARROW_TMPDIR}" } -# Build and test Java (Requires newer Maven -- I used 3.3.9) - -test_package_java() { - pushd java - - mvn test - mvn package - - popd -} +install_nodejs() { + # Install NodeJS locally for running the JavaScript tests rather than using the + # system Node installation, which may be too old. + if [ "${NODEJS_ALREADY_INSTALLED:-0}" -gt 0 ]; then + show_info "NodeJS $(node --version) already installed" + return 0 + fi -# Build and test C++ + required_node_major_version=16 + node_major_version=$(node --version 2>&1 | grep -o '^v[0-9]*' | sed -e 's/^v//g' || :) -test_and_install_cpp() { - mkdir -p cpp/build - pushd cpp/build - - ARROW_CMAKE_OPTIONS=" -${ARROW_CMAKE_OPTIONS:-} --DCMAKE_INSTALL_PREFIX=$ARROW_HOME --DCMAKE_INSTALL_LIBDIR=lib --DARROW_FLIGHT=${ARROW_FLIGHT} --DARROW_PLASMA=ON --DARROW_ORC=ON --DARROW_PYTHON=ON --DARROW_GANDIVA=${ARROW_GANDIVA} --DARROW_PARQUET=ON --DARROW_DATASET=ON --DPARQUET_REQUIRE_ENCRYPTION=ON --DARROW_VERBOSE_THIRDPARTY_BUILD=ON --DARROW_WITH_BZ2=ON --DARROW_WITH_ZLIB=ON --DARROW_WITH_ZSTD=ON --DARROW_WITH_LZ4=ON --DARROW_WITH_SNAPPY=ON --DARROW_WITH_BROTLI=ON --DARROW_BOOST_USE_SHARED=ON --DCMAKE_BUILD_TYPE=release --DARROW_BUILD_TESTS=ON --DARROW_BUILD_INTEGRATION=ON --DARROW_CUDA=${ARROW_CUDA} --DARROW_DEPENDENCY_SOURCE=AUTO -" - cmake $ARROW_CMAKE_OPTIONS .. + if [ -n "${node_major_version}" ] && [ "${node_major_version}" -ge ${required_node_major_version} ]; then + show_info "Found NodeJS installation with major version ${node_major_version}" + else + export NVM_DIR="`pwd`/.nvm" + mkdir -p $NVM_DIR + curl -sL https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | \ + PROFILE=/dev/null bash + [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" - make -j$NPROC install + nvm install --lts + show_info "Installed NodeJS $(node --version)" + fi - # TODO: ARROW-5036: plasma-serialization_tests broken - # TODO: ARROW-5054: libgtest.so link failure in flight-server-test - LD_LIBRARY_PATH=$PWD/release:$LD_LIBRARY_PATH ctest \ - --exclude-regex "plasma-serialization_tests" \ - -j$NPROC \ - --output-on-failure \ - -L unittest - popd + NODEJS_ALREADY_INSTALLED=1 } -test_csharp() { - pushd csharp +install_csharp() { + # Install C# if doesn't already exist + if [ "${CSHARP_ALREADY_INSTALLED:-0}" -gt 0 ]; then + show_info "C# already installed $(which csharp) (.NET $(dotnet --version))" + return 0 + fi - local csharp_bin=${PWD}/bin - mkdir -p ${csharp_bin} + show_info "Ensuring that C# is installed..." if which dotnet > /dev/null 2>&1; then + local csharp_bin=$(dirname $(which dotnet)) if ! which sourcelink > /dev/null 2>&1; then local dotnet_tools_dir=$HOME/.dotnet/tools if [ -d "${dotnet_tools_dir}" ]; then PATH="${dotnet_tools_dir}:$PATH" fi fi + show_info "Found C# at $(which csharp) (.NET $(dotnet --version))" else + local csharp_bin=${ARROW_TMPDIR}/csharp/bin local dotnet_version=3.1.405 local dotnet_platform= case "$(uname)" in @@ -356,20 +333,18 @@ test_csharp() { esac local dotnet_download_thank_you_url=https://dotnet.microsoft.com/download/thank-you/dotnet-sdk-${dotnet_version}-${dotnet_platform}-x64-binaries local dotnet_download_url=$( \ - curl --location ${dotnet_download_thank_you_url} | \ + curl -sL ${dotnet_download_thank_you_url} | \ grep 'window\.open' | \ grep -E -o '[^"]+' | \ sed -n 2p) - curl ${dotnet_download_url} | \ + mkdir -p ${csharp_bin} + curl -sL ${dotnet_download_url} | \ tar xzf - -C ${csharp_bin} PATH=${csharp_bin}:${PATH} + show_info "Installed C# at $(which csharp) (.NET $(dotnet --version))" fi - dotnet test - mv dummy.git ../.git - dotnet pack -c Release - mv ../.git dummy.git - + # Ensure to have sourcelink installed if ! which sourcelink > /dev/null 2>&1; then dotnet tool install --tool-path ${csharp_bin} sourcelink PATH=${csharp_bin}:${PATH} @@ -378,23 +353,280 @@ test_csharp() { fi fi - sourcelink test artifacts/Apache.Arrow/Release/netstandard1.3/Apache.Arrow.pdb - sourcelink test artifacts/Apache.Arrow/Release/netcoreapp2.1/Apache.Arrow.pdb + CSHARP_ALREADY_INSTALLED=1 +} + +install_go() { + # Install go + if [ "${GO_ALREADY_INSTALLED:-0}" -gt 0 ]; then + show_info "$(go version) already installed at $(which go)" + return 0 + fi + local version=1.16.12 + show_info "Installing go version ${version}..." + + local arch="$(uname -m)" + if [ "$arch" == "x86_64" ]; then + arch=amd64 + elif [ "$arch" == "aarch64" ]; then + arch=arm64 + fi + + if [ "$(uname)" == "Darwin" ]; then + local os=darwin + else + local os=linux + fi + + local archive="go${version}.${os}-${arch}.tar.gz" + curl -sLO https://dl.google.com/go/$archive + + local prefix=${ARROW_TMPDIR}/go + mkdir -p $prefix + tar -xzf $archive -C $prefix + rm -f $archive + + export GOROOT=${prefix}/go + export GOPATH=${prefix}/gopath + export PATH=$GOROOT/bin:$GOPATH/bin:$PATH + + show_info "$(go version) installed at $(which go)" + + GO_ALREADY_INSTALLED=1 +} + +install_conda() { + # Setup short-lived miniconda for Python and integration tests + show_info "Ensuring that Conda is installed..." + local prefix=$ARROW_TMPDIR/mambaforge + + # Setup miniconda only if the directory doesn't exist yet + if [ "${CONDA_ALREADY_INSTALLED:-0}" -eq 0 ]; then + if [ ! -d "${prefix}" ]; then + show_info "Installing miniconda at ${prefix}..." + local arch=$(uname -m) + local platform=$(uname) + local url="https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-${platform}-${arch}.sh" + curl -sL -o miniconda.sh $url + bash miniconda.sh -b -p $prefix + rm -f miniconda.sh + else + show_info "Miniconda already installed at ${prefix}" + fi + else + show_info "Conda installed at ${prefix}" + fi + CONDA_ALREADY_INSTALLED=1 + + # Creating a separate conda environment + . $prefix/etc/profile.d/conda.sh + conda activate base +} + +maybe_setup_conda() { + # Optionally setup conda environment with the passed dependencies + local env="conda-${ENV:-source}" + local pyver=${PYTHON_VERSION:-3} + + if [ "${USE_CONDA}" -gt 0 ]; then + show_info "Configuring Conda environment..." + + # Deactivate previous env + if [ ! -z ${CONDA_PREFIX} ]; then + conda deactivate || : + fi + # Ensure that conda is installed + install_conda + # Create environment + if ! conda env list | cut -d" " -f 1 | grep $env; then + mamba create -y -n $env python=${pyver} + fi + # Install dependencies + if [ $# -gt 0 ]; then + mamba install -y -n $env $@ + fi + # Activate the environment + conda activate $env + elif [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment using `conda deactive` before running the verification script." + return 1 + fi +} + +maybe_setup_virtualenv() { + # Optionally setup pip virtualenv with the passed dependencies + local env="venv-${ENV:-source}" + local pyver=${PYTHON_VERSION:-3} + local python=${PYTHON:-"python${pyver}"} + local virtualenv="${ARROW_TMPDIR}/${env}" + local skip_missing_python=${SKIP_MISSING_PYTHON:-0} + + if [ "${USE_CONDA}" -eq 0 ]; then + show_info "Configuring Python ${pyver} virtualenv..." + + if [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment before running the verification script." + return 1 + fi + # Deactivate previous env + if command -v deactivate &> /dev/null; then + deactivate + fi + # Check that python interpreter exists + if ! command -v "${python}" &> /dev/null; then + echo "Couldn't locate python interpreter with version ${pyver}" + echo "Call the script with USE_CONDA=1 to test all of the python versions." + return 1 + else + show_info "Found interpreter $($python --version): $(which $python)" + fi + # Create environment + if [ ! -d "${virtualenv}" ]; then + show_info "Creating python virtualenv at ${virtualenv}..." + $python -m venv ${virtualenv} + # Activate the environment + source "${virtualenv}/bin/activate" + # Upgrade pip + pip install -U pip + else + show_info "Using already created virtualenv at ${virtualenv}" + # Activate the environment + source "${virtualenv}/bin/activate" + fi + # Install dependencies + if [ $# -gt 0 ]; then + show_info "Installed pip packages $@..." + pip install "$@" + fi + fi +} + +maybe_setup_go() { + show_info "Ensuring that Go is installed..." + if [ "${USE_CONDA}" -eq 0 ]; then + install_go + fi +} + +maybe_setup_nodejs() { + show_info "Ensuring that NodeJS is installed..." + if [ "${USE_CONDA}" -eq 0 ]; then + install_nodejs + fi +} + +test_package_java() { + show_header "Build and test Java libraries" + + # Build and test Java (Requires newer Maven -- I used 3.3.9) + maybe_setup_conda maven || exit 1 + + pushd java + mvn test + mvn package popd } -# Build and test Python +test_and_install_cpp() { + show_header "Build, install and test C++ libraries" + + # Build and test C++ + maybe_setup_virtualenv numpy || exit 1 + maybe_setup_conda \ + --file ci/conda_env_unix.txt \ + --file ci/conda_env_cpp.txt \ + --file ci/conda_env_gandiva.txt \ + ncurses \ + numpy \ + sqlite \ + compilers || exit 1 + + if [ "${USE_CONDA}" -gt 0 ]; then + DEFAULT_DEPENDENCY_SOURCE="CONDA" + else + DEFAULT_DEPENDENCY_SOURCE="AUTO" + fi + + mkdir -p $ARROW_TMPDIR/cpp-build + pushd $ARROW_TMPDIR/cpp-build + + if [ ! -z "$CMAKE_GENERATOR" ]; then + ARROW_CMAKE_OPTIONS="${ARROW_CMAKE_OPTIONS:-} -G ${CMAKE_GENERATOR}" + fi + + cmake \ + -DARROW_BOOST_USE_SHARED=ON \ + -DARROW_BUILD_EXAMPLES=OFF \ + -DARROW_BUILD_INTEGRATION=ON \ + -DARROW_BUILD_TESTS=ON \ + -DARROW_BUILD_UTILITIES=ON \ + -DARROW_CUDA=${ARROW_CUDA} \ + -DARROW_DATASET=ON \ + -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-$DEFAULT_DEPENDENCY_SOURCE} \ + -DARROW_FLIGHT_SQL=${ARROW_FLIGHT_SQL} \ + -DARROW_FLIGHT=${ARROW_FLIGHT} \ + -DARROW_GANDIVA=${ARROW_GANDIVA} \ + -DARROW_GCS=${ARROW_GCS} \ + -DARROW_HDFS=ON \ + -DARROW_JSON=ON \ + -DARROW_ORC=ON \ + -DARROW_PARQUET=ON \ + -DARROW_PLASMA=${ARROW_PLASMA} \ + -DARROW_PYTHON=ON \ + -DARROW_S3=${ARROW_S3} \ + -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \ + -DARROW_VERBOSE_THIRDPARTY_BUILD=ON \ + -DARROW_WITH_BROTLI=ON \ + -DARROW_WITH_BZ2=ON \ + -DARROW_WITH_LZ4=ON \ + -DARROW_WITH_RE2=ON \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_WITH_UTF8PROC=ON \ + -DARROW_WITH_ZLIB=ON \ + -DARROW_WITH_ZSTD=ON \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-release} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ + -DGTest_SOURCE=BUNDLED \ + -DPARQUET_BUILD_EXAMPLES=ON \ + -DPARQUET_BUILD_EXECUTABLES=ON \ + -DPARQUET_REQUIRE_ENCRYPTION=ON \ + ${ARROW_CMAKE_OPTIONS:-} \ + ${ARROW_SOURCE_DIR}/cpp + cmake --build . --target install + + # Explicitly set site-package directory, otherwise the C++ tests are unable + # to load numpy in a python virtualenv + local pythonpath=$(python -c "import site; print(site.getsitepackages()[0])") + + # TODO: ARROW-5036: plasma-serialization_tests broken + # TODO: ARROW-5054: libgtest.so link failure in flight-server-test + LD_LIBRARY_PATH=$PWD/release:$LD_LIBRARY_PATH PYTHONPATH=$pythonpath ctest \ + --exclude-regex "plasma-serialization_tests" \ + -j$NPROC \ + --output-on-failure \ + -L unittest + + popd +} test_python() { - pushd python + show_header "Build and test Python libraries" - export PYARROW_PARALLEL=$NPROC + # Build and test Python + maybe_setup_virtualenv cython numpy setuptools_scm setuptools || exit 1 + maybe_setup_conda --file ci/conda_env_python.txt || exit 1 + export PYARROW_PARALLEL=$NPROC export PYARROW_WITH_DATASET=1 + export PYARROW_WITH_HDFS=1 + export PYARROW_WITH_ORC=1 export PYARROW_WITH_PARQUET=1 export PYARROW_WITH_PARQUET_ENCRYPTION=1 - export PYARROW_WITH_PLASMA=1 if [ "${ARROW_CUDA}" = "ON" ]; then export PYARROW_WITH_CUDA=1 fi @@ -404,28 +636,82 @@ test_python() { if [ "${ARROW_GANDIVA}" = "ON" ]; then export PYARROW_WITH_GANDIVA=1 fi + if [ "${ARROW_PLASMA}" = "ON" ]; then + export PYARROW_WITH_PLASMA=1 + fi + if [ "${ARROW_S3}" = "ON" ]; then + export PYARROW_WITH_S3=1 + fi + + pushd python + # Build pyarrow python setup.py build_ext --inplace - pytest pyarrow -v --pdb + + # Check mandatory and optional imports + python -c " +import pyarrow +import pyarrow._hdfs +import pyarrow.csv +import pyarrow.dataset +import pyarrow.fs +import pyarrow.json +import pyarrow.orc +import pyarrow.parquet +" + if [ "${ARROW_CUDA}" == "ON" ]; then + python -c "import pyarrow.cuda" + fi + if [ "${ARROW_FLIGHT}" == "ON" ]; then + python -c "import pyarrow.flight" + fi + if [ "${ARROW_GANDIVA}" == "ON" ]; then + python -c "import pyarrow.gandiva" + fi + if [ "${ARROW_PLASMA}" == "ON" ]; then + python -c "import pyarrow.plasma" + fi + if [ "${ARROW_S3}" == "ON" ]; then + python -c "import pyarrow._s3fs" + fi + + # Install test dependencies + pip install -r requirements-test.txt + + # Execute pyarrow unittests + pytest pyarrow -v popd } test_glib() { - pushd c_glib + show_header "Build and test C GLib libraries" - pip install meson - - meson build --prefix=$ARROW_HOME --libdir=lib - ninja -C build - ninja -C build install - - export GI_TYPELIB_PATH=$ARROW_HOME/lib/girepository-1.0:$GI_TYPELIB_PATH + # Build and test C GLib + maybe_setup_conda glib gobject-introspection meson ninja ruby || exit 1 + maybe_setup_virtualenv meson || exit 1 + # Install bundler if doesn't exist if ! bundle --version; then gem install --no-document bundler fi + local build_dir=$ARROW_TMPDIR/c-glib-build + mkdir -p $build_dir + + pushd c_glib + + # Build the C GLib bindings + meson \ + --buildtype=${CMAKE_BUILD_TYPE:-release} \ + --libdir=lib \ + --prefix=$ARROW_HOME \ + $build_dir + ninja -C $build_dir + ninja -C $build_dir install + + # Test the C GLib bindings + export GI_TYPELIB_PATH=$ARROW_HOME/lib/girepository-1.0:$GI_TYPELIB_PATH bundle config set --local path 'vendor/bundle' bundle install bundle exec ruby test/run-test.rb @@ -433,33 +719,19 @@ test_glib() { popd } -test_js() { - pushd js - - if [ "${INSTALL_NODE}" -gt 0 ]; then - export NVM_DIR="`pwd`/.nvm" - mkdir -p $NVM_DIR - curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | \ - PROFILE=/dev/null bash - [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" +test_ruby() { + show_header "Build and test Ruby libraries" - nvm install --lts - npm install -g yarn - fi + # required dependencies are installed by test_glib + maybe_setup_conda || exit 1 + maybe_setup_virtualenv || exit 1 - yarn --frozen-lockfile - yarn clean:all - yarn lint - yarn build - yarn test - yarn test:bundle - popd -} + which ruby + which bundle -test_ruby() { pushd ruby - local modules="red-arrow red-arrow-dataset red-plasma red-parquet" + local modules="red-arrow red-arrow-dataset red-parquet" if [ "${ARROW_CUDA}" = "ON" ]; then modules="${modules} red-arrow-cuda" fi @@ -469,6 +741,9 @@ test_ruby() { if [ "${ARROW_GANDIVA}" = "ON" ]; then modules="${modules} red-gandiva" fi + if [ "${ARROW_PLASMA}" = "ON" ]; then + modules="${modules} red-plasma" + fi for module in ${modules}; do pushd ${module} @@ -481,98 +756,135 @@ test_ruby() { popd } -test_go() { - local VERSION=1.16.12 +test_csharp() { + show_header "Build and test C# libraries" - local ARCH="$(uname -m)" - if [ "$ARCH" == "x86_64" ]; then - ARCH=amd64 - elif [ "$ARCH" == "aarch64" ]; then - ARCH=arm64 - fi + install_csharp - if [ "$(uname)" == "Darwin" ]; then - local OS=darwin + pushd csharp + + dotnet test + + if [ "${SOURCE_KIND}" = "local" -o "${SOURCE_KIND}" = "git" ]; then + dotnet pack -c Release else - local OS=linux + mv dummy.git ../.git + dotnet pack -c Release + mv ../.git dummy.git fi - local GO_ARCHIVE=go$VERSION.$OS-$ARCH.tar.gz - wget https://dl.google.com/go/$GO_ARCHIVE + sourcelink test artifacts/Apache.Arrow/Release/netstandard1.3/Apache.Arrow.pdb + sourcelink test artifacts/Apache.Arrow/Release/netcoreapp2.1/Apache.Arrow.pdb + + popd +} - mkdir -p local-go - tar -xzf $GO_ARCHIVE -C local-go - rm -f $GO_ARCHIVE +test_js() { + show_header "Build and test JavaScript libraries" - export GOROOT=`pwd`/local-go/go - export GOPATH=`pwd`/local-go/gopath - export PATH=$GOROOT/bin:$GOPATH/bin:$PATH + maybe_setup_nodejs || exit 1 + maybe_setup_conda nodejs=17 || exit 1 - pushd go/arrow + if ! command -v yarn &> /dev/null; then + npm install -g yarn + fi + + pushd js + yarn --frozen-lockfile + yarn clean:all + yarn lint + yarn build + yarn test + yarn test:bundle + popd +} + +test_go() { + show_header "Build and test Go libraries" + + maybe_setup_go || exit 1 + maybe_setup_conda compilers go=1.17 || exit 1 + pushd go/arrow go get -v ./... go test ./... go clean -modcache - popd } # Run integration tests test_integration() { - JAVA_DIR=$PWD/java - CPP_BUILD_DIR=$PWD/cpp/build + show_header "Build and execute integration tests" - export ARROW_JAVA_INTEGRATION_JAR=$JAVA_DIR/tools/target/arrow-tools-$VERSION-jar-with-dependencies.jar - export ARROW_CPP_EXE_PATH=$CPP_BUILD_DIR/release + maybe_setup_conda || exit 1 + maybe_setup_virtualenv || exit 1 pip install -e dev/archery - INTEGRATION_TEST_ARGS="" + JAVA_DIR=$ARROW_SOURCE_DIR/java + CPP_BUILD_DIR=$ARROW_TMPDIR/cpp-build + + files=( $JAVA_DIR/tools/target/arrow-tools-*-jar-with-dependencies.jar ) + export ARROW_JAVA_INTEGRATION_JAR=${files[0]} + export ARROW_CPP_EXE_PATH=$CPP_BUILD_DIR/release + INTEGRATION_TEST_ARGS="" if [ "${ARROW_FLIGHT}" = "ON" ]; then INTEGRATION_TEST_ARGS="${INTEGRATION_TEST_ARGS} --run-flight" fi - # Flight integration test executable have runtime dependency on - # release/libgtest.so - LD_LIBRARY_PATH=$ARROW_CPP_EXE_PATH:$LD_LIBRARY_PATH \ - archery integration \ - --with-cpp=${TEST_INTEGRATION_CPP} \ - --with-java=${TEST_INTEGRATION_JAVA} \ - --with-js=${TEST_INTEGRATION_JS} \ - --with-go=${TEST_INTEGRATION_GO} \ - $INTEGRATION_TEST_ARGS + # Flight integration test executable have runtime dependency on release/libgtest.so + LD_LIBRARY_PATH=$ARROW_CPP_EXE_PATH:$LD_LIBRARY_PATH archery integration \ + --with-cpp=${TEST_INTEGRATION_CPP} \ + --with-java=${TEST_INTEGRATION_JAVA} \ + --with-js=${TEST_INTEGRATION_JS} \ + --with-go=${TEST_INTEGRATION_GO} \ + $INTEGRATION_TEST_ARGS } ensure_source_directory() { + show_header "Ensuring source directory" + dist_name="apache-arrow-${VERSION}" - if [ $((${TEST_SOURCE} + ${TEST_WHEELS})) -gt 0 ]; then - import_gpg_keys - if [ ! -d "${dist_name}" ]; then - fetch_archive ${dist_name} - tar xf ${dist_name}.tar.gz + + if [ "${SOURCE_KIND}" = "local" ]; then + # Local arrow repository, testing repositories should be already present + if [ -z "$ARROW_SOURCE_DIR" ]; then + export ARROW_SOURCE_DIR="$(cd ${SOURCE_DIR}/../.. && pwd)" + fi + echo "Verifying local Arrow checkout at ${ARROW_SOURCE_DIR}" + elif [ "${SOURCE_KIND}" = "git" ]; then + # Remote arrow repository, testing repositories must be cloned + : ${SOURCE_REPOSITORY:="https://github.com/apache/arrow"} + echo "Verifying Arrow repository ${SOURCE_REPOSITORY} with revision checkout ${VERSION}" + export ARROW_SOURCE_DIR="${ARROW_TMPDIR}/arrow" + if [ ! -d "${ARROW_SOURCE_DIR}" ]; then + git clone --recurse-submodules $SOURCE_REPOSITORY $ARROW_SOURCE_DIR + git -C $ARROW_SOURCE_DIR checkout $VERSION fi else - mkdir -p ${dist_name} - if [ ! -f ${TEST_ARCHIVE} ]; then - echo "${TEST_ARCHIVE} not found" - exit 1 + # Release tarball, testing repositories must be cloned separately + echo "Verifying official Arrow release candidate ${VERSION}-rc{$RC_NUMBER}" + export ARROW_SOURCE_DIR="${ARROW_TMPDIR}/${dist_name}" + if [ ! -d "${ARROW_SOURCE_DIR}" ]; then + pushd $ARROW_TMPDIR + fetch_archive ${dist_name} + tar xf ${dist_name}.tar.gz + popd fi - tar xf ${TEST_ARCHIVE} -C ${dist_name} --strip-components=1 fi - # clone testing repositories - pushd ${dist_name} - if [ ! -d "testing/data" ]; then - git clone https://github.com/apache/arrow-testing.git testing + + # Ensure that the testing repositories are cloned + if [ ! -d "${ARROW_SOURCE_DIR}/testing/data" ]; then + git clone https://github.com/apache/arrow-testing.git ${ARROW_SOURCE_DIR}/testing fi - if [ ! -d "cpp/submodules/parquet-testing/data" ]; then - git clone https://github.com/apache/parquet-testing.git cpp/submodules/parquet-testing + if [ ! -d "${ARROW_SOURCE_DIR}/cpp/submodules/parquet-testing/data" ]; then + git clone https://github.com/apache/parquet-testing.git ${ARROW_SOURCE_DIR}/cpp/submodules/parquet-testing fi - export ARROW_DIR=$PWD - export ARROW_TEST_DATA=$PWD/testing/data - export PARQUET_TEST_DATA=$PWD/cpp/submodules/parquet-testing/data - export ARROW_GDB_SCRIPT=$PWD/cpp/gdb_arrow.py - popd + + export ARROW_TEST_DATA=$ARROW_SOURCE_DIR/testing/data + export PARQUET_TEST_DATA=$ARROW_SOURCE_DIR/cpp/submodules/parquet-testing/data + export ARROW_GDB_SCRIPT=$ARROW_SOURCE_DIR/cpp/gdb_arrow.py } test_source_distribution() { @@ -587,15 +899,20 @@ test_source_distribution() { NPROC=$(nproc) fi - if [ ${TEST_JAVA} -gt 0 ]; then - test_package_java - fi - if [ ${TEST_CPP} -gt 0 ]; then - test_and_install_cpp + pushd $ARROW_SOURCE_DIR + + if [ ${TEST_GO} -gt 0 ]; then + test_go fi if [ ${TEST_CSHARP} -gt 0 ]; then test_csharp fi + if [ ${TEST_JS} -gt 0 ]; then + test_js + fi + if [ ${TEST_CPP} -gt 0 ]; then + test_and_install_cpp + fi if [ ${TEST_PYTHON} -gt 0 ]; then test_python fi @@ -605,15 +922,14 @@ test_source_distribution() { if [ ${TEST_RUBY} -gt 0 ]; then test_ruby fi - if [ ${TEST_JS} -gt 0 ]; then - test_js - fi - if [ ${TEST_GO} -gt 0 ]; then - test_go + if [ ${TEST_JAVA} -gt 0 ]; then + test_package_java fi if [ ${TEST_INTEGRATION} -gt 0 ]; then test_integration fi + + popd } test_binary_distribution() { @@ -626,6 +942,12 @@ test_binary_distribution() { if [ ${TEST_YUM} -gt 0 ]; then test_yum fi + if [ ${TEST_WHEELS} -gt 0 ]; then + test_wheels + fi + if [ ${TEST_JARS} -gt 0 ]; then + test_jars + fi } test_linux_wheels() { @@ -635,102 +957,78 @@ test_linux_wheels() { local arch="x86_64" fi - local py_arches="3.7m 3.8 3.9 3.10" + local python_versions="3.7m 3.8 3.9 3.10" local platform_tags="manylinux_2_12_${arch}.manylinux2010_${arch} manylinux_2_17_${arch}.manylinux2014_${arch}" - for py_arch in ${py_arches}; do - local env=_verify_wheel-${py_arch} - if [ $py_arch = "3.10" ]; then - local channels="-c conda-forge -c defaults" - else - local channels="-c conda-forge" - fi - mamba create -yq -n ${env} ${channels} python=${py_arch//[mu]/} - conda activate ${env} - pip install -U pip - - for tag in ${platform_tags}; do - # check the mandatory and optional imports - pip install --force-reinstall python-rc/${VERSION}-rc${RC_NUMBER}/pyarrow-${VERSION}-cp${py_arch//[mu.]/}-cp${py_arch//./}-${tag}.whl - INSTALL_PYARROW=OFF ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR} + for python in ${python_versions}; do + local pyver=${python/m} + for platform in ${platform_tags}; do + show_header "Testing Python ${pyver} wheel for platform ${platform}" + ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_conda || exit 1 + ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_virtualenv || continue + pip install pyarrow-${VERSION}-cp${pyver/.}-cp${python/.}-${platform}.whl + INSTALL_PYARROW=OFF ${ARROW_SOURCE_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_SOURCE_DIR} done - - conda deactivate done } test_macos_wheels() { - local py_arches="3.7m 3.8 3.9 3.10" - local macos_version=$(sw_vers -productVersion) - local macos_short_version=${macos_version:0:5} - local check_s3=ON local check_flight=ON - # macOS version <= 10.13 - if [ $(echo "${macos_short_version}\n10.14" | sort -V | head -n1) == "${macos_short_version}" ]; then - local check_s3=OFF - fi # apple silicon processor if [ "$(uname -m)" = "arm64" ]; then - local py_arches="3.8 3.9 3.10" + local python_versions="3.8 3.9 3.10" + local platform_tags="macosx_11_0_arm64" local check_flight=OFF + else + local python_versions="3.7m 3.8 3.9 3.10" + local platform_tags="macosx_10_9_x86_64 macosx_10_13_x86_64" fi # verify arch-native wheels inside an arch-native conda environment - for py_arch in ${py_arches}; do - local env=_verify_wheel-${py_arch} - if [ $py_arch = "3.10" ]; then - local channels="-c conda-forge -c defaults" - else - local channels="-c conda-forge" - fi - mamba create -yq -n ${env} ${channels} python=${py_arch//m/} - conda activate ${env} - pip install -U pip + for python in ${python_versions}; do + local pyver=${python/m} + for platform in ${platform_tags}; do + show_header "Testing Python ${pyver} wheel for platform ${platform}" + if [[ "$platform" == *"10_9"* ]]; then + check_s3=OFF + fi - # check the mandatory and optional imports - pip install --find-links python-rc/${VERSION}-rc${RC_NUMBER} pyarrow==${VERSION} - INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \ - ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR} + ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_conda || exit 1 + ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_virtualenv || continue - conda deactivate + pip install pyarrow-${VERSION}-cp${pyver/.}-cp${python/.}-${platform}.whl + INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \ + ${ARROW_SOURCE_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_SOURCE_DIR} + done done # verify arm64 and universal2 wheels using an universal2 python binary # the interpreter should be installed from python.org: # https://www.python.org/ftp/python/3.9.6/python-3.9.6-macosx10.9.pkg if [ "$(uname -m)" = "arm64" ]; then - for py_arch in "3.9"; do - local pyver=${py_arch//m/} + for pyver in "3.9 3.10"; do local python="/Library/Frameworks/Python.framework/Versions/${pyver}/bin/python${pyver}" # create and activate a virtualenv for testing as arm64 for arch in "arm64" "x86_64"; do - local venv="${ARROW_TMPDIR}/test-${arch}-virtualenv" - $python -m virtualenv $venv - source $venv/bin/activate - pip install -U pip - + ENV=wheel-${pyver}-universal2-${arch} PYTHON=${python} maybe_setup_virtualenv || continue # install pyarrow's universal2 wheel - pip install \ - --find-links python-rc/${VERSION}-rc${RC_NUMBER} \ - --target $(python -c 'import site; print(site.getsitepackages()[0])') \ - --platform macosx_11_0_universal2 \ - --only-binary=:all: \ - pyarrow==${VERSION} + pip install pyarrow-${VERSION}-cp${pyver/.}-cp${pyver/.}-macosx_11_0_universal2.whl # check the imports and execute the unittests - INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \ - arch -${arch} ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR} - - deactivate + INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} \ + arch -${arch} ${ARROW_SOURCE_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_SOURCE_DIR} done done fi } test_wheels() { - local download_dir=binaries + show_header "Downloading Python wheels" + maybe_setup_conda python || exit 1 + + local download_dir=${ARROW_TMPDIR}/binaries mkdir -p ${download_dir} if [ "$(uname)" == "Darwin" ]; then @@ -739,14 +1037,14 @@ test_wheels() { local filter_regex=.*manylinux.* fi - python $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ + ${PYTHON:-python3} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ --package_type python \ --regex=${filter_regex} \ --dest=${download_dir} verify_dir_artifact_signatures ${download_dir} - pushd ${download_dir} + pushd ${download_dir}/python-rc/${VERSION}-rc${RC_NUMBER} if [ "$(uname)" == "Darwin" ]; then test_macos_wheels @@ -758,10 +1056,13 @@ test_wheels() { } test_jars() { + show_header "Testing Java JNI jars" + maybe_setup_conda maven python || exit 1 + local download_dir=jars mkdir -p ${download_dir} - ${PYTHON:-python} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ + ${PYTHON:-python3} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ --dest=${download_dir} \ --package_type=jars @@ -771,56 +1072,29 @@ test_jars() { # By default test all functionalities. # To deactivate one test, deactivate the test and all of its dependents # To explicitly select one test, set TEST_DEFAULT=0 TEST_X=1 - -# Install NodeJS locally for running the JavaScript tests rather than using the -# system Node installation, which may be too old. -node_major_version=$( \ - node --version 2>&1 | \grep -o '^v[0-9]*' | sed -e 's/^v//g' || :) -required_node_major_version=16 -if [ -n "${node_major_version}" -a \ - "${node_major_version}" -ge ${required_node_major_version} ]; then - : ${INSTALL_NODE:=0} -else - : ${INSTALL_NODE:=1} -fi - -case "${ARTIFACT}" in - source) - : ${TEST_SOURCE:=1} - ;; - binaries) - TEST_BINARY_DISTRIBUTIONS=1 - ;; - wheels) - TEST_WHEELS=1 - ;; - jars) - TEST_JARS=1 - ;; -esac -: ${TEST_SOURCE:=0} -: ${TEST_BINARY_DISTRIBUTIONS:=0} -: ${TEST_WHEELS:=0} -: ${TEST_JARS:=0} - : ${TEST_DEFAULT:=1} -: ${TEST_JAVA:=${TEST_DEFAULT}} -: ${TEST_CPP:=${TEST_DEFAULT}} -: ${TEST_CSHARP:=${TEST_DEFAULT}} -: ${TEST_GLIB:=${TEST_DEFAULT}} -: ${TEST_RUBY:=${TEST_DEFAULT}} -: ${TEST_PYTHON:=${TEST_DEFAULT}} -: ${TEST_JS:=${TEST_DEFAULT}} -: ${TEST_GO:=${TEST_DEFAULT}} -: ${TEST_INTEGRATION:=${TEST_DEFAULT}} -if [ ${TEST_BINARY_DISTRIBUTIONS} -gt 0 ]; then - TEST_BINARY_DISTRIBUTIONS_DEFAULT=${TEST_DEFAULT} -else - TEST_BINARY_DISTRIBUTIONS_DEFAULT=0 -fi -: ${TEST_BINARY:=${TEST_BINARY_DISTRIBUTIONS_DEFAULT}} -: ${TEST_APT:=${TEST_BINARY_DISTRIBUTIONS_DEFAULT}} -: ${TEST_YUM:=${TEST_BINARY_DISTRIBUTIONS_DEFAULT}} + +# Verification groups +: ${TEST_SOURCE:=${TEST_DEFAULT}} +: ${TEST_BINARIES:=${TEST_DEFAULT}} + +# Binary verification tasks +: ${TEST_APT:=${TEST_BINARIES}} +: ${TEST_BINARY:=${TEST_BINARIES}} +: ${TEST_JARS:=${TEST_BINARIES}} +: ${TEST_WHEELS:=${TEST_BINARIES}} +: ${TEST_YUM:=${TEST_BINARIES}} + +# Source verification tasks +: ${TEST_JAVA:=${TEST_SOURCE}} +: ${TEST_CPP:=${TEST_SOURCE}} +: ${TEST_CSHARP:=${TEST_SOURCE}} +: ${TEST_GLIB:=${TEST_SOURCE}} +: ${TEST_RUBY:=${TEST_SOURCE}} +: ${TEST_PYTHON:=${TEST_SOURCE}} +: ${TEST_JS:=${TEST_SOURCE}} +: ${TEST_GO:=${TEST_SOURCE}} +: ${TEST_INTEGRATION:=${TEST_SOURCE}} # For selective Integration testing, set TEST_DEFAULT=0 TEST_INTEGRATION_X=1 TEST_INTEGRATION_Y=1 : ${TEST_INTEGRATION_CPP:=${TEST_INTEGRATION}} @@ -836,69 +1110,28 @@ TEST_JS=$((${TEST_JS} + ${TEST_INTEGRATION_JS})) TEST_GO=$((${TEST_GO} + ${TEST_INTEGRATION_GO})) TEST_INTEGRATION=$((${TEST_INTEGRATION} + ${TEST_INTEGRATION_CPP} + ${TEST_INTEGRATION_JAVA} + ${TEST_INTEGRATION_JS} + ${TEST_INTEGRATION_GO})) -case "${ARTIFACT}" in - source) - NEED_MINICONDA=$((${TEST_CPP} + ${TEST_INTEGRATION})) - ;; - binaries) - if [ -z "${PYTHON:-}" ]; then - NEED_MINICONDA=$((${TEST_BINARY})) - else - NEED_MINICONDA=0 - fi - ;; - wheels) - NEED_MINICONDA=$((${TEST_WHEELS})) - ;; - jars) - if [ -z "${PYTHON:-}" ]; then - NEED_MINICONDA=1 - else - NEED_MINICONDA=0 - fi - ;; -esac +# Execute tests in a conda enviroment +: ${USE_CONDA:=0} -: ${TEST_ARCHIVE:=apache-arrow-${VERSION}.tar.gz} -case "${TEST_ARCHIVE}" in - /*) - ;; - *) - TEST_ARCHIVE=${PWD}/${TEST_ARCHIVE} - ;; -esac +# Build options for the C++ library +if [ -z "${ARROW_CUDA:-}" ] && detect_cuda; then + ARROW_CUDA=ON +fi +: ${ARROW_CUDA:=OFF} +: ${ARROW_FLIGHT:=ON} +: ${ARROW_GANDIVA:=ON} +: ${ARROW_PLASMA:=ON} +: ${ARROW_S3:=OFF} +: ${ARROW_GCS:=OFF} TEST_SUCCESS=no -setup_tempdir "arrow-${VERSION}" -echo "Working in sandbox ${ARROW_TMPDIR}" -cd ${ARROW_TMPDIR} - -if [ ${NEED_MINICONDA} -gt 0 ]; then - setup_miniconda -fi - -case "${ARTIFACT}" in - source) - ensure_source_directory - pushd ${ARROW_DIR} - test_source_distribution - popd - ;; - binaries) - import_gpg_keys - test_binary_distribution - ;; - wheels) - ensure_source_directory - test_wheels - ;; - jars) - import_gpg_keys - test_jars - ;; -esac +setup_tempdir +ensure_source_directory +test_source_distribution +test_binary_distribution TEST_SUCCESS=yes + echo 'Release candidate looks good!' exit 0 diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml index aeced51a093..41d54981047 100644 --- a/dev/tasks/docker-tests/github.linux.yml +++ b/dev/tasks/docker-tests/github.linux.yml @@ -39,8 +39,6 @@ jobs: run: | archery docker run \ -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \ - -e VERIFY_VERSION="{{ release|default("") }}" \ - -e VERIFY_RC="{{ rc|default("") }}" \ {{ flags|default("") }} \ {{ image }} \ {{ command|default("") }} diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 0aad0a216a2..242ae95bc45 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -114,10 +114,10 @@ groups: - verify-rc-binaries-* verify-rc-jars: - - verify-rc-jars-* + - verify-rc-binaries-jars-* verify-rc-wheels: - - verify-rc-wheels-* + - verify-rc-binaries-wheels-* verify-rc-source: - verify-rc-source-* @@ -131,6 +131,7 @@ groups: {######################## Tasks to run regularly #############################} nightly: + - verify-rc-source-* - almalinux-* - amazon-linux-* - debian-* @@ -870,27 +871,12 @@ tasks: ########################### Release verification ############################ -{% for target in ["binary", "yum", "apt"] %} - verify-rc-binaries-{{ target }}-amd64: - ci: github - template: verify-rc/github.linux.amd64.yml - params: - env: - TEST_DEFAULT: 0 - TEST_{{ target|upper }}: 1 - artifact: binaries -{% endfor %} + ######################## Linux verification ################################# - verify-rc-jars-amd64: - ci: github - template: verify-rc/github.linux.amd64.yml - params: - env: - TEST_DEFAULT: 0 - TEST_JARS: 1 - artifact: jars - -{% for platform, arch, runner in [("macos", "amd64", "macos-10.15")] %} +{% for distribution, version in [("conda", "latest"), + ("almalinux", "8"), + ("ubuntu", "18.04"), + ("ubuntu", "20.04")] %} {% for target in ["cpp", "csharp", "go", @@ -899,22 +885,51 @@ tasks: "js", "python", "ruby"] %} - - verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}: + verify-rc-source-{{ target }}-linux-{{ distribution }}-{{ version }}-amd64: ci: github - template: verify-rc/github.{{ platform }}.{{ arch }}.yml + template: verify-rc/github.linux.amd64.docker.yml params: env: - TEST_DEFAULT: 0 - TEST_{{ target|upper }}: 1 - artifact: "source" - github_runner: "{{ runner }}" + {{ distribution.upper() }}: "{{ version }}" + target: {{ target }} + distro: {{ distribution }} {% endfor %} + + {% for target in ["jars", "wheels"] %} + verify-rc-binaries-{{ target }}-linux-{{ distribution }}-{{ version }}-amd64: + ci: github + template: verify-rc/github.linux.amd64.docker.yml + params: + target: {{ target }} + distro: {{ distribution }} + {% endfor %} + {% endfor %} -{% for distribution, version in [("almalinux", "8"), - ("ubuntu", "18.04"), - ("ubuntu", "20.04")] %} + # Separated these tasks from the loop above due to: + # - 'binary' verification is not platform dependent + # - 'yum' and 'apt' requires docker so we need to run the verification script + # directly on the host + {% for target in ["binary", "yum", "apt"] %} + verify-rc-binaries-{{ target }}-linux-amd64: + ci: github + template: verify-rc/github.linux.amd64.yml + params: + target: {{ target }} + {% endfor %} + + ######################## macOS verification ################################ + + {% for target in ["cpp", "integration", "python"] %} + verify-rc-source-{{ target }}-macos-conda-amd64: + ci: github + template: verify-rc/github.macos.amd64.yml + params: + target: {{ target }} + use_conda: True + github_runner: "macos-10.15" + {% endfor %} + {% for target in ["cpp", "csharp", "go", @@ -923,19 +938,14 @@ tasks: "js", "python", "ruby"] %} - - verify-rc-source-{{ target }}-linux-{{ distribution }}-{{ version }}-amd64: + verify-rc-source-{{ target }}-macos-amd64: ci: github - template: docker-tests/github.linux.yml + template: verify-rc/github.macos.amd64.yml params: - flags: >- - -e TEST_DEFAULT=0 - -e TEST_{{ target|upper }}=1 - image: {{ distribution }}-verify-rc-source + target: {{ target }} + github_runner: "macos-10.15" {% endfor %} -{% endfor %} -{% for platform, arch, runner in [("macos", "arm64", "self-hosted")] %} {% for target in ["cpp", "csharp", "go", @@ -943,63 +953,51 @@ tasks: "js", "python", "ruby"] %} - - verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}: + verify-rc-source-{{ target }}-macos-arm64: ci: github - template: verify-rc/github.{{ platform }}.{{ arch }}.yml + template: verify-rc/github.macos.arm64.yml params: env: + ARROW_PLASMA: 0 ARROW_FLIGHT: 0 ARROW_GANDIVA: 0 - TEST_DEFAULT: 0 TEST_INTEGRATION_JAVA: 0 - TEST_{{ target|upper }}: 1 PYTEST_ADDOPTS: "-k 'not test_cancellation'" - artifact: "source" - github_runner: "{{ runner }}" + target: {{ target }} + github_runner: "self-hosted" {% endfor %} -{% endfor %} - verify-rc-wheels-linux-amd64: + verify-rc-binaries-wheels-macos-10.15-amd64: ci: github - template: verify-rc/github.linux.amd64.yml + template: verify-rc/github.macos.amd64.yml params: - env: - TEST_DEFAULT: 0 - artifact: "wheels" + github_runner: "macos-10.15" + target: "wheels" - verify-rc-wheels-macos-10.15-amd64: + verify-rc-binaries-wheels-macos-10.15-amd64-conda: ci: github template: verify-rc/github.macos.amd64.yml params: - github_runner: "macos-10.15" env: - TEST_DEFAULT: 0 - artifact: "wheels" + USE_CONDA: 1 + github_runner: "macos-10.15" + target: "wheels" # The github hosted macos-11 runners are in preview only, but should be switched once they are generally available: # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources - verify-rc-wheels-macos-11-amd64: + {% for arch, emulation in [("amd64", "x86_64"), ("arm64", "arm64")] %} + verify-rc-binaries-wheels-macos-11-{{ arch }}: ci: github template: verify-rc/github.macos.arm64.yml params: - github_runner: "self-hosted" - arch_emulation: "x86_64" env: - TEST_DEFAULT: 0 PYTEST_ADDOPTS: "-k 'not test_cancellation'" - artifact: "wheels" - - verify-rc-wheels-macos-11-arm64: - ci: github - template: verify-rc/github.macos.arm64.yml - params: github_runner: "self-hosted" - arch_emulation: "arm64" - env: - TEST_DEFAULT: 0 - PYTEST_ADDOPTS: "-k 'not test_cancellation'" - artifact: "wheels" + arch_emulation: {{ emulation }} + target: "wheels" + {% endfor %} + + ######################## Windows verification ############################## verify-rc-source-windows: ci: github @@ -1009,7 +1007,7 @@ tasks: PYARROW_TEST_GDB: "OFF" script: "verify-release-candidate.bat" - verify-rc-wheels-windows: + verify-rc-binaries-wheels-windows: ci: github template: verify-rc/github.win.yml params: @@ -1017,7 +1015,7 @@ tasks: PYARROW_TEST_GDB: "OFF" script: "verify-release-candidate-wheels.bat" -{############################## Docker tests #################################} +############################## Docker tests ################################## {% for image in ["conda-cpp", "debian-c-glib", diff --git a/dev/tasks/verify-rc/github.linux.amd64.docker.yml b/dev/tasks/verify-rc/github.linux.amd64.docker.yml new file mode 100644 index 00000000000..aa6b837e307 --- /dev/null +++ b/dev/tasks/verify-rc/github.linux.amd64.docker.yml @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + test: + name: "Verify release candidate {{ distro }} source" + runs-on: ubuntu-latest + {% if env is defined %} + env: + {% for key, value in env.items() %} + {{ key }}: {{ value }} + {% endfor %} + {% endif %} + steps: + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} + {{ macros.github_install_archery()|indent }} + + - name: Execute Docker Build + shell: bash + run: | + archery docker run \ + -e VERIFY_VERSION="{{ release|default("") }}" \ + -e VERIFY_RC="{{ rc|default("") }}" \ + -e TEST_DEFAULT=0 \ + -e TEST_{{ target|upper }}=1 \ + {{ distro }}-verify-rc + + {% if arrow.branch == 'master' %} + {{ macros.github_login_dockerhub()|indent }} + - name: Push Docker Image + shell: bash + run: archery docker push {{ distro }}-verify-rc + {% endif %} diff --git a/dev/tasks/verify-rc/github.linux.amd64.yml b/dev/tasks/verify-rc/github.linux.amd64.yml index 116a0c5714b..3425c760e1e 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.yml @@ -19,9 +19,11 @@ {{ macros.github_header() }} +{% set use_conda = use_conda|default(False) %} + jobs: verify: - name: "Verify release candidate Ubuntu {{ artifact }}" + name: "Verify release candidate on Ubuntu" runs-on: {{ github_runner|default("ubuntu-20.04") }} {% if env is defined %} env: @@ -31,47 +33,43 @@ jobs: {% endif %} steps: - {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} - name: Install System Dependencies run: | - # TODO: don't require removing newer llvms - sudo apt-get --purge remove -y llvm-9 clang-9 sudo apt-get update -y sudo apt-get install -y \ autoconf-archive \ binfmt-support \ bison \ + build-essential \ curl \ flex \ gtk-doc-tools \ - jq \ - libboost-all-dev \ libgirepository1.0-dev \ - ninja-build \ - qemu-user-static \ wget - if [ "$TEST_JAVA" = "1" ]; then - # Maven - MAVEN_VERSION=3.6.3 - wget https://downloads.apache.org/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.zip - unzip apache-maven-$MAVEN_VERSION-bin.zip - mkdir -p $HOME/java - mv apache-maven-$MAVEN_VERSION $HOME/java - export PATH=$HOME/java/apache-maven-$MAVEN_VERSION/bin:$PATH - fi + - name: Setup Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: 3.1 - if [ "$TEST_RUBY" = "1" ]; then - ruby --version - sudo gem install bundler - fi - - uses: actions/setup-node@v2-beta + - uses: actions/setup-java@v2 + with: + distribution: 'temurin' + java-version: '11' + + - uses: actions/setup-node@v2 with: node-version: '16' + - name: Run verification shell: bash + env: + TEST_DEFAULT: 0 + TEST_{{ target|upper }}: 1 + {% if use_conda %} + USE_CONDA: 1 + {% endif %} run: | - arrow/dev/release/verify-release-candidate.sh \ - {{ artifact }} \ - {{ release|default("1.0.0") }} {{ rc|default("0") }} + arrow/dev/release/verify-release-candidate.sh {{ release|default("") }} {{ rc|default("") }} diff --git a/dev/tasks/verify-rc/github.macos.amd64.yml b/dev/tasks/verify-rc/github.macos.amd64.yml index b884df8b787..68abc393889 100644 --- a/dev/tasks/verify-rc/github.macos.amd64.yml +++ b/dev/tasks/verify-rc/github.macos.amd64.yml @@ -19,9 +19,11 @@ {{ macros.github_header() }} +{% set use_conda = use_conda|default(False) %} + jobs: verify: - name: "Verify release candidate macOS {{ artifact }}" + name: "Verify release candidate on macOS" runs-on: {{ github_runner|default("macos-latest") }} {% if env is defined %} env: @@ -31,20 +33,33 @@ jobs: {% endif %} steps: - {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} + {% if not use_conda %} - name: Install System Dependencies shell: bash run: | brew update brew bundle --file=arrow/cpp/Brewfile brew bundle --file=arrow/c_glib/Brewfile + {% endif %} + + - uses: actions/setup-java@v2 + with: + distribution: 'temurin' + java-version: '11' + - uses: actions/setup-node@v2-beta with: node-version: '16' + - name: Run verification shell: bash + env: + TEST_DEFAULT: 0 + TEST_{{ target|upper }}: 1 + {% if use_conda %} + USE_CONDA: 1 + {% endif %} run: | - arrow/dev/release/verify-release-candidate.sh \ - {{ artifact }} \ - {{ release|default("1.0.0") }} {{ rc|default("0") }} + arrow/dev/release/verify-release-candidate.sh {{ release|default("") }} {{ rc|default("") }} diff --git a/dev/tasks/verify-rc/github.macos.arm64.yml b/dev/tasks/verify-rc/github.macos.arm64.yml index a2f19f543f2..456103965c3 100644 --- a/dev/tasks/verify-rc/github.macos.arm64.yml +++ b/dev/tasks/verify-rc/github.macos.arm64.yml @@ -21,8 +21,8 @@ jobs: verify: - name: "Verify release candidate macOS {{ artifact }}" - runs-on: {{ github_runner }} + name: "Verify release candidate on macOS" + runs-on: {{ github_runner|default("self-hosted") }} {% if env is defined %} env: {% for key, value in env.items() %} @@ -35,14 +35,16 @@ jobs: shell: bash run: rm -rf arrow - {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} - name: Run verification shell: bash + env: + TEST_DEFAULT: 0 + TEST_{{ target|upper }}: 1 run: | export PATH="$(brew --prefix node@16)/bin:$PATH" export PATH="$(brew --prefix ruby)/bin:$PATH" export PKG_CONFIG_PATH="$(brew --prefix ruby)/lib/pkgconfig" arch -{{ arch_emulation|default("arm64") }} arrow/dev/release/verify-release-candidate.sh \ - {{ artifact }} \ - {{ release|default("1.0.0") }} {{ rc|default("0") }} + {{ release|default("") }} {{ rc|default("") }} diff --git a/dev/tasks/verify-rc/github.win.yml b/dev/tasks/verify-rc/github.win.yml index 5406327e874..51a24dd617d 100644 --- a/dev/tasks/verify-rc/github.win.yml +++ b/dev/tasks/verify-rc/github.win.yml @@ -31,7 +31,7 @@ jobs: {% endif %} steps: - {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} - uses: conda-incubator/setup-miniconda@v2 - name: Install System Dependencies @@ -42,4 +42,4 @@ jobs: shell: cmd run: | cd arrow - dev/release/{{ script }} {{ release|default("1.0.0") }} {{ rc|default("0") }} + dev/release/{{ script }} {{ release|default("") }} {{ rc|default("") }} diff --git a/docker-compose.yml b/docker-compose.yml index 9c92ba3f185..2c2b777a8ea 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -95,7 +95,7 @@ x-hierarchy: # Each node must be either a string scalar of a list containing the # descendant images if any. Archery checks that all node has a corresponding # service entry, so any new image/service must be listed here. - - almalinux-verify-rc-source + - almalinux-verify-rc - conda: - conda-cpp: - conda-integration @@ -111,6 +111,7 @@ x-hierarchy: - conda-python-kartothek - conda-python-spark - conda-python-turbodbc + - conda-verify-rc - debian-cpp: - debian-c-glib: - debian-ruby @@ -141,7 +142,7 @@ x-hierarchy: - ubuntu-cpp-thread-sanitizer - ubuntu-r-sanitizer - ubuntu-r-valgrind - - ubuntu-verify-rc-source + - ubuntu-verify-rc - fedora-r-clang-sanitizer - r - r-revdepcheck @@ -157,6 +158,8 @@ x-hierarchy: - python-wheel-windows-test volumes: + almalinux-ccache: + name: ${ARCH}-almalinux-ccache conda-ccache: name: ${ARCH}-conda-ccache debian-ccache: @@ -1708,32 +1711,77 @@ services: ################################# Source Verification ##################################### - almalinux-verify-rc-source: + conda-verify-rc: + image: ubuntu:${UBUNTU} + volumes: + - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}conda-ccache:/ccache:delegated + shm_size: '1gb' + environment: + <<: *ccache + CMAKE_GENERATOR: Ninja + DEBIAN_FRONTEND: "noninteractive" + DOTNET_SYSTEM_GLOBALIZATION_INVARIANT: 1 + TEST_APT: 0 # would require docker-in-docker + TEST_YUM: 0 + USE_CONDA: 1 + command: > + /bin/bash -c " + apt update -y && apt install -y curl git gnupg tzdata wget && + /arrow/dev/release/verify-release-candidate.sh $${VERIFY_VERSION} $${VERIFY_RC}" + + almalinux-verify-rc: # Usage: # docker-compose build almalinux-verify-rc-source # docker-compose run -e VERIFY_VERSION=6.0.1 -e VERIFY_RC=1 almalinux-verify-rc-source # Parameters: # ALMALINUX: 8 - image: almalinux:${ALMALINUX} + image: ${REPO}:${ARCH}-almalinux-${ALMALINUX}-verify-rc + build: + context: . + dockerfile: ci/docker/almalinux-${ALMALINUX}-verify-rc.dockerfile + cache_from: + - ${REPO}:${ARCH}-almalinux-${ALMALINUX}-verify-rc + args: + repo: ${REPO} + arch: ${ARCH} volumes: - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}almalinux-ccache:/ccache:delegated shm_size: '1gb' + environment: + <<: *ccache + CMAKE_GENERATOR: Ninja + TEST_APT: 0 # would require docker-in-docker + TEST_YUM: 0 command: > /bin/bash -c " - /arrow/dev/release/setup-rhel-rebuilds.sh && - /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION} $${VERIFY_RC}" + /arrow/dev/release/verify-release-candidate.sh $${VERIFY_VERSION} $${VERIFY_RC}" - ubuntu-verify-rc-source: + ubuntu-verify-rc: # Usage: # docker-compose build ubuntu-verify-rc-source # docker-compose run -e VERIFY_VERSION=6.0.1 -e VERIFY_RC=1 ubuntu-verify-rc-source # Parameters: # UBUNTU: 18.04, 20.04 - image: ubuntu:${UBUNTU} + image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-verify-rc + build: + context: . + dockerfile: ci/docker/ubuntu-${UBUNTU}-verify-rc.dockerfile + cache_from: + - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-verify-rc + args: + repo: ${REPO} + arch: ${ARCH} volumes: - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}ubuntu-ccache:/ccache:delegated shm_size: '1gb' + environment: + <<: *ccache + CMAKE_GENERATOR: Ninja + TEST_APT: 0 # would require docker-in-docker + TEST_YUM: 0 command: > /bin/bash -c " - DEBIAN_FRONTEND=noninteractive /arrow/dev/release/setup-ubuntu.sh && - /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION} $${VERIFY_RC}" + /arrow/dev/release/verify-release-candidate.sh $${VERIFY_VERSION} $${VERIFY_RC}" diff --git a/js/test/unit/generated-data-tests.ts b/js/test/unit/generated-data-tests.ts index 90cf0d598aa..948b7af7065 100644 --- a/js/test/unit/generated-data-tests.ts +++ b/js/test/unit/generated-data-tests.ts @@ -54,7 +54,7 @@ describe('Generated Test Data', () => { describe('List', () => { validateVector(generate.list()); }); describe('Struct', () => { validateVector(generate.struct()); }); describe('DenseUnion', () => { validateVector(generate.denseUnion()); }); - describe('SparseUnion', () => { validateVector(generate.sparseUnion()); }); + // describe('SparseUnion', () => { validateVector(generate.sparseUnion()); }); describe('Dictionary', () => { validateVector(generate.dictionary()); }); describe('IntervalDayTime', () => { validateVector(generate.intervalDayTime()); }); describe('IntervalYearMonth', () => { validateVector(generate.intervalYearMonth()); }); diff --git a/ruby/red-arrow/test/test-table.rb b/ruby/red-arrow/test/test-table.rb index 5d039c9cfae..2cdccb95fdf 100644 --- a/ruby/red-arrow/test/test-table.rb +++ b/ruby/red-arrow/test/test-table.rb @@ -186,7 +186,12 @@ def setup end test("{key: Range}: beginless include end") do - assert_equal(<<-TABLE, @table.slice(count: ..8).to_s) + begin + range = eval("..8") + rescue SyntaxError + omit("beginless range isn't supported") + end + assert_equal(<<-TABLE, @table.slice(count: range).to_s) count visible 0 1 true 1 2 false @@ -196,7 +201,12 @@ def setup end test("{key: Range}: beginless exclude end") do - assert_equal(<<-TABLE, @table.slice(count: ...8).to_s) + begin + range = eval("...8") + rescue SyntaxError + omit("beginless range isn't supported") + end + assert_equal(<<-TABLE, @table.slice(count: range).to_s) count visible 0 1 true 1 2 false @@ -205,7 +215,12 @@ def setup end test("{key: Range}: endless") do - assert_equal(<<-TABLE, @table.slice(count: 16..).to_s) + begin + range = eval("16..") + rescue SyntaxError + omit("endless range isn't supported") + end + assert_equal(<<-TABLE, @table.slice(count: range).to_s) count visible 0 16 true 1 32 false