diff --git a/ci/conda_env_gandiva.txt b/ci/conda_env_gandiva.txt index 024b9fe74c1..217936e2c94 100644 --- a/ci/conda_env_gandiva.txt +++ b/ci/conda_env_gandiva.txt @@ -15,5 +15,5 @@ # specific language governing permissions and limitations # under the License. -clang=11 -llvmdev=11 +clang>=11 +llvmdev>=11 diff --git a/dev/archery/setup.py b/dev/archery/setup.py index ce1b97e0ae4..69f33bd9c45 100755 --- a/dev/archery/setup.py +++ b/dev/archery/setup.py @@ -18,12 +18,8 @@ import functools import operator -import sys from setuptools import setup, find_packages -if sys.version_info < (3, 7): - sys.exit('Python < 3.7 is not supported') - # For pathlib.Path compatibility jinja_req = 'jinja2>=2.11' @@ -50,7 +46,7 @@ maintainer_email='dev@arrow.apache.org', packages=find_packages(), include_package_data=True, - python_requires='>=3.7', + python_requires='>=3.6', install_requires=['click>=7'], tests_require=['pytest', 'responses'], extras_require=extras, diff --git a/dev/release/setup-ubuntu.sh b/dev/release/setup-ubuntu.sh index 7bca67eedbb..0737be9b2c3 100755 --- a/dev/release/setup-ubuntu.sh +++ b/dev/release/setup-ubuntu.sh @@ -25,6 +25,7 @@ apt-get -y install \ cmake \ curl \ git \ + libcurl4-openssl-dev \ libgirepository1.0-dev \ libglib2.0-dev \ libsqlite3-dev \ diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat index 387eb25e1db..dfb874d722c 100644 --- a/dev/release/verify-release-candidate.bat +++ b/dev/release/verify-release-candidate.bat @@ -27,23 +27,45 @@ if not exist "C:\tmp\arrow-verify-release" mkdir C:\tmp\arrow-verify-release set _VERIFICATION_DIR=C:\tmp\arrow-verify-release set _VERIFICATION_DIR_UNIX=C:/tmp/arrow-verify-release set _VERIFICATION_CONDA_ENV=%_VERIFICATION_DIR%\conda-env -set _DIST_URL=https://dist.apache.org/repos/dist/dev/arrow -set _TARBALL=apache-arrow-%1.tar.gz set ARROW_SOURCE=%_VERIFICATION_DIR%\apache-arrow-%1 set INSTALL_DIR=%_VERIFICATION_DIR%\install -@rem Requires GNU Wget for Windows -wget --no-check-certificate -O %_TARBALL% %_DIST_URL%/apache-arrow-%1-rc%2/%_TARBALL% || exit /B 1 +set VERSION=%1 +set RC_NUMBER=%2 + +if "%RC_NUMBER%"=="" ( + @rem verify a specific git revision + if "%SOURCE_REPOSITORY%"=="" ( + pushd "%~dp0..\..\" + ) else ( + pushd %SOURCE_REPOSITORY% + ) + git clone . %ARROW_SOURCE% + git -C %ARROW_SOURCE% checkout %VERSION% + popd +) else ( + @rem verify a release candidate tarball + @rem Requires GNU Wget for Windows + set TARBALL_NAME=apache-arrow-%VERSION%.tar.gz + set TARBALL_URL=https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-%VERSION%-rc%RC_NUMBER%/%TARBALL_NAME% + wget --no-check-certificate -O %TARBALL_NAME% %TARBALL_URL% || exit /B 1 + tar xf %TARBALL_NAME% -C %_VERIFICATION_DIR_UNIX% +) + + @rem Get testing datasets for Parquet unit tests +git clone https://github.com/apache/parquet-testing.git %_VERIFICATION_DIR%\parquet-testing +set PARQUET_TEST_DATA=%_VERIFICATION_DIR%\parquet-testing\data -tar xf %_TARBALL% -C %_VERIFICATION_DIR_UNIX% +git clone https://github.com/apache/arrow-testing.git %_VERIFICATION_DIR%\arrow-testing +set ARROW_TEST_DATA=%_VERIFICATION_DIR%\arrow-testing\data set PYTHON=3.8 @rem Using call with conda.bat seems necessary to avoid terminating the batch @rem script execution call conda create --no-shortcuts -c conda-forge -f -q -y -p %_VERIFICATION_CONDA_ENV% ^ - --file=ci\conda_env_cpp.txt ^ - --file=ci\conda_env_python.txt ^ + --file=%ARROW_SOURCE%\ci\conda_env_cpp.txt ^ + --file=%ARROW_SOURCE%\ci\conda_env_python.txt ^ git ^ python=%PYTHON% ^ || exit /B 1 @@ -96,13 +118,6 @@ cmake -G "%GENERATOR%" ^ cmake --build . --target INSTALL --config Release || exit /B 1 -@rem Get testing datasets for Parquet unit tests -git clone https://github.com/apache/parquet-testing.git %_VERIFICATION_DIR%\parquet-testing -set PARQUET_TEST_DATA=%_VERIFICATION_DIR%\parquet-testing\data - -git clone https://github.com/apache/arrow-testing.git %_VERIFICATION_DIR%\arrow-testing -set ARROW_TEST_DATA=%_VERIFICATION_DIR%\arrow-testing\data - @rem Needed so python-test.exe works set PYTHONPATH_ORIGINAL=%PYTHONPATH% set PYTHONPATH=%CONDA_PREFIX%\Lib;%CONDA_PREFIX%\Lib\site-packages;%CONDA_PREFIX%\DLLs;%CONDA_PREFIX%;%PYTHONPATH% diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 11cf0d9a488..e18df2f5482 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -35,9 +35,20 @@ # directory is not cleaned up automatically. case $# in + 2) ARTIFACT="$1" + VERSION="$2" + SOURCE_KIND="git" + case $ARTIFACT in + source) ;; + *) echo "Invalid argument: '${ARTIFACT}', only valid option is 'source'" + exit 1 + ;; + esac + ;; 3) ARTIFACT="$1" VERSION="$2" RC_NUMBER="$3" + SOURCE_KIND="tarball" case $ARTIFACT in source|binaries|wheels|jars) ;; *) echo "Invalid argument: '${ARTIFACT}', valid options are \ @@ -66,14 +77,18 @@ detect_cuda() { return $((${n_gpus} < 1)) } -# Build options for the C++ library +# Execute tests in a conda enviroment +: ${USE_CONDA:=0} +# Build options for the C++ library if [ -z "${ARROW_CUDA:-}" ] && detect_cuda; then ARROW_CUDA=ON fi +: ${ARROW_S3:=OFF} : ${ARROW_CUDA:=OFF} : ${ARROW_FLIGHT:=ON} : ${ARROW_GANDIVA:=ON} +: ${ARROW_DEPENDENCY_SOURCE:=${DEFAULT_DEPENDENCY_SOURCE}} ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow' @@ -138,7 +153,7 @@ test_binary() { local download_dir=binaries mkdir -p ${download_dir} - ${PYTHON:-python} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ + ${PYTHON:-python3} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ --dest=${download_dir} verify_dir_artifact_signatures ${download_dir} @@ -232,7 +247,7 @@ setup_tempdir() { fi } -setup_miniconda() { +setup_conda() { # Setup short-lived miniconda for Python and integration tests OS="$(uname)" if [ "${OS}" == "Darwin" ]; then @@ -245,7 +260,7 @@ setup_miniconda() { if [ ! -d "${MINICONDA}" ]; then # Setup miniconda only if the directory doesn't exist yet - wget -O miniconda.sh $MINICONDA_URL + curl -sL -o miniconda.sh $MINICONDA_URL bash miniconda.sh -b -p $MINICONDA rm -f miniconda.sh fi @@ -253,72 +268,99 @@ setup_miniconda() { . $MINICONDA/etc/profile.d/conda.sh conda activate base - - # Dependencies from python/requirements-build.txt and python/requirements-test.txt - # with the exception of oldest-supported-numpy since it doesn't have a conda package - mamba create -n arrow-test -y \ - cffi \ - cython \ - hypothesis \ - numpy \ - pandas \ - pytest \ - pytest-lazy-fixture \ - python=3.8 \ - pytz \ - setuptools \ - setuptools_scm - - conda activate arrow-test - echo "Using conda environment ${CONDA_PREFIX}" + mamba create -n arrow-test -y + echo "Created conda environment ${CONDA_PREFIX}" + conda deactivate } +# setup_conda_env() {} +# setup_virtual_env() {} + # Build and test Java (Requires newer Maven -- I used 3.3.9) test_package_java() { + if [ "${USE_CONDA}" -gt 0 ]; then + conda activate base + mamba install -y -n arrow-test maven + conda activate arrow-test + fi + pushd java mvn test mvn package popd + + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + fi } # Build and test C++ test_and_install_cpp() { + # TODO(kszucs): factor out to functions + if [ "${USE_CONDA}" -gt 0 ]; then + DEFAULT_DEPENDENCY_SOURCE="CONDA" + # TODO(kszucs): we should define orc and sqlite in the conda_env_cpp.txt file + conda activate base + mamba install -y -n arrow-test \ + --file ci/conda_env_cpp.txt \ + --file ci/conda_env_gandiva.txt \ + --file ci/conda_env_unix.txt \ + ncurses \ + numpy \ + sqlite \ + compilers + conda activate arrow-test + elif [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment before running the verification script." + exit 1 + else + DEFAULT_DEPENDENCY_SOURCE="AUTO" + # Create a python virtualenv + ${PYTHON:-python3} -m pip install virtualenv + ${PYTHON:-python3} -m virtualenv venv + source venv/bin/activate + # Install build dependencies (numpy is required here) + pip install numpy + fi + mkdir -p cpp/build pushd cpp/build ARROW_CMAKE_OPTIONS=" ${ARROW_CMAKE_OPTIONS:-} --DCMAKE_INSTALL_PREFIX=$ARROW_HOME --DCMAKE_INSTALL_LIBDIR=lib +-DARROW_BOOST_USE_SHARED=ON +-DARROW_BUILD_INTEGRATION=ON +-DARROW_BUILD_TESTS=ON +-DARROW_CUDA=${ARROW_CUDA} +-DARROW_DATASET=ON +-DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-$DEFAULT_DEPENDENCY_SOURCE} -DARROW_FLIGHT=${ARROW_FLIGHT} --DARROW_PLASMA=ON --DARROW_ORC=ON --DARROW_PYTHON=ON -DARROW_GANDIVA=${ARROW_GANDIVA} +-DARROW_HDFS=ON +-DARROW_ORC=ON -DARROW_PARQUET=ON --DARROW_DATASET=ON --DPARQUET_REQUIRE_ENCRYPTION=ON +-DARROW_PLASMA=ON +-DARROW_PYTHON=ON +-DARROW_S3=${ARROW_S3} -DARROW_VERBOSE_THIRDPARTY_BUILD=ON +-DARROW_WITH_BROTLI=ON -DARROW_WITH_BZ2=ON --DARROW_WITH_ZLIB=ON --DARROW_WITH_ZSTD=ON -DARROW_WITH_LZ4=ON -DARROW_WITH_SNAPPY=ON --DARROW_WITH_BROTLI=ON --DARROW_BOOST_USE_SHARED=ON +-DARROW_WITH_ZLIB=ON +-DARROW_WITH_ZSTD=ON -DCMAKE_BUILD_TYPE=release --DARROW_BUILD_TESTS=ON --DARROW_BUILD_INTEGRATION=ON --DARROW_CUDA=${ARROW_CUDA} --DARROW_DEPENDENCY_SOURCE=AUTO +-DCMAKE_INSTALL_LIBDIR=lib +-DCMAKE_INSTALL_PREFIX=$ARROW_HOME +-DPARQUET_REQUIRE_ENCRYPTION=ON " cmake $ARROW_CMAKE_OPTIONS .. - - make -j$NPROC install + cmake --build . --target install # TODO: ARROW-5036: plasma-serialization_tests broken # TODO: ARROW-5054: libgtest.so link failure in flight-server-test @@ -328,6 +370,12 @@ ${ARROW_CMAKE_OPTIONS:-} --output-on-failure \ -L unittest popd + + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + else + deactivate + fi } test_csharp() { @@ -356,19 +404,24 @@ test_csharp() { esac local dotnet_download_thank_you_url=https://dotnet.microsoft.com/download/thank-you/dotnet-sdk-${dotnet_version}-${dotnet_platform}-x64-binaries local dotnet_download_url=$( \ - curl --location ${dotnet_download_thank_you_url} | \ + curl -sL ${dotnet_download_thank_you_url} | \ grep 'window\.open' | \ grep -E -o '[^"]+' | \ sed -n 2p) - curl ${dotnet_download_url} | \ + curl -sL ${dotnet_download_url} | \ tar xzf - -C ${csharp_bin} PATH=${csharp_bin}:${PATH} fi dotnet test - mv dummy.git ../.git - dotnet pack -c Release - mv ../.git dummy.git + + if [ "${SOURCE_KIND}" = "git" ]; then + dotnet pack -c Release + else + mv dummy.git ../.git + dotnet pack -c Release + mv ../.git dummy.git + fi if ! which sourcelink > /dev/null 2>&1; then dotnet tool install --tool-path ${csharp_bin} sourcelink @@ -385,15 +438,28 @@ test_csharp() { } # Build and test Python - test_python() { - pushd python + if [ "${USE_CONDA}" -gt 0 ]; then + conda activate arrow-test + mamba install -y --file ci/conda_env_python.txt + elif [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment before running the verification script." + exit 1 + else + source venv/bin/activate + pip install cython numpy setuptools_scm setuptools + fi export PYARROW_PARALLEL=$NPROC - export PYARROW_WITH_DATASET=1 + export PYARROW_WITH_HDFS=1 + export PYARROW_WITH_ORC=1 export PYARROW_WITH_PARQUET=1 export PYARROW_WITH_PLASMA=1 + if [ "${ARROW_S3}" = "ON" ]; then + export PYARROW_WITH_S3=1 + fi if [ "${ARROW_CUDA}" = "ON" ]; then export PYARROW_WITH_CUDA=1 fi @@ -404,13 +470,64 @@ test_python() { export PYARROW_WITH_GANDIVA=1 fi + pushd python + + # Build pyarrow python setup.py build_ext --inplace + + # Check mandatory and optional imports + python -c " +import pyarrow +import pyarrow._hdfs +import pyarrow.csv +import pyarrow.dataset +import pyarrow.fs +import pyarrow.json +import pyarrow.orc +import pyarrow.parquet +import pyarrow.plasma +" + if [ "${PYARROW_WITH_S3}" == "ON" ]; then + python -c "import pyarrow._s3fs" + fi + if [ "${PYARROW_WITH_CUDA}" == "ON" ]; then + python -c "import pyarrow.cuda" + fi + if [ "${PYARROW_WITH_FLIGHT}" == "ON" ]; then + python -c "import pyarrow.flight" + fi + if [ "${ARROW_WITH_GANDIVA}" == "ON" ]; then + python -c "import pyarrow.gandiva" + fi + + # Install test dependencies + pip install -r requirements-test.txt + + # Execute pyarrow unittests pytest pyarrow -v --pdb + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + else + deactivate + fi + popd } test_glib() { + if [ "${USE_CONDA}" -gt 0 ]; then + conda activate arrow-test + mamba install -y meson + elif [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment before running the verification script." + exit 1 + else + source venv/bin/activate + pip install meson + fi + pushd c_glib pip install meson @@ -429,6 +546,12 @@ test_glib() { bundle install bundle exec ruby test/run-test.rb + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + else + deactivate + fi + popd } @@ -438,7 +561,7 @@ test_js() { if [ "${INSTALL_NODE}" -gt 0 ]; then export NVM_DIR="`pwd`/.nvm" mkdir -p $NVM_DIR - curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | \ + curl -sL https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | \ PROFILE=/dev/null bash [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" @@ -497,7 +620,7 @@ test_go() { fi local GO_ARCHIVE=go$VERSION.$OS-$ARCH.tar.gz - wget https://dl.google.com/go/$GO_ARCHIVE + curl -sLO https://dl.google.com/go/$GO_ARCHIVE mkdir -p local-go tar -xzf $GO_ARCHIVE -C local-go @@ -518,6 +641,16 @@ test_go() { # Run integration tests test_integration() { + if [ "${USE_CONDA}" -gt 0 ]; then + conda activat arrow-test + elif [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment before running the verification script." + exit 1 + else + source venv/bin/activate + fi + JAVA_DIR=$PWD/java CPP_BUILD_DIR=$PWD/cpp/build @@ -541,31 +674,45 @@ test_integration() { --with-js=${TEST_INTEGRATION_JS} \ --with-go=${TEST_INTEGRATION_GO} \ $INTEGRATION_TEST_ARGS + + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + else + deactivate + fi } ensure_source_directory() { dist_name="apache-arrow-${VERSION}" - if [ $((${TEST_SOURCE} + ${TEST_WHEELS})) -gt 0 ]; then - import_gpg_keys - if [ ! -d "${dist_name}" ]; then - fetch_archive ${dist_name} - tar xf ${dist_name}.tar.gz + if [ "${SOURCE_KIND}" = "git" ]; then + if [ ! -d "arrow" ]; then + git clone --recurse-submodules ${SOURCE_REPOSITORY:-"${SOURCE_DIR}/../.."} arrow fi + pushd arrow + git checkout ${VERSION} else - mkdir -p ${dist_name} - if [ ! -f ${TEST_ARCHIVE} ]; then - echo "${TEST_ARCHIVE} not found" - exit 1 + if [ $((${TEST_SOURCE} + ${TEST_WHEELS})) -gt 0 ]; then + import_gpg_keys + if [ ! -d "${dist_name}" ]; then + fetch_archive ${dist_name} + tar xf ${dist_name}.tar.gz + fi + else + mkdir -p ${dist_name} + if [ ! -f ${TEST_ARCHIVE} ]; then + echo "${TEST_ARCHIVE} not found" + exit 1 + fi + tar xf ${TEST_ARCHIVE} -C ${dist_name} --strip-components=1 + fi + # clone testing repositories + pushd ${dist_name} + if [ ! -d "testing/data" ]; then + git clone https://github.com/apache/arrow-testing.git testing + fi + if [ ! -d "cpp/submodules/parquet-testing/data" ]; then + git clone https://github.com/apache/parquet-testing.git cpp/submodules/parquet-testing fi - tar xf ${TEST_ARCHIVE} -C ${dist_name} --strip-components=1 - fi - # clone testing repositories - pushd ${dist_name} - if [ ! -d "testing/data" ]; then - git clone https://github.com/apache/arrow-testing.git testing - fi - if [ ! -d "cpp/submodules/parquet-testing/data" ]; then - git clone https://github.com/apache/parquet-testing.git cpp/submodules/parquet-testing fi export ARROW_DIR=$PWD export ARROW_TEST_DATA=$PWD/testing/data @@ -639,13 +786,25 @@ test_linux_wheels() { for py_arch in ${py_arches}; do local env=_verify_wheel-${py_arch} - if [ $py_arch = "3.10" ]; then - local channels="-c conda-forge -c defaults" + + if [ "${USE_CONDA}" -gt 0 ]; then + mamba create -yq -n ${env} python=${py_arch//[mu]/} + conda activate ${env} + elif [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment before running the verification script." + exit 1 + elif [ command -v "python${py_ver}" ]; then + local venv="${ARROW_TMPDIR}/test-virtualenv" + local python="python${py_ver}" + $python -m virtualenv $venv + source $venv/bin/activate else - local channels="-c conda-forge" + echo "Couldn't locate python interpreter with version ${py_arch}" + echo "Call the script with USE_CONDA=1 to test all of the python versions." + continue fi - mamba create -yq -n ${env} ${channels} python=${py_arch//[mu]/} - conda activate ${env} + pip install -U pip for tag in ${platform_tags}; do @@ -654,7 +813,11 @@ test_linux_wheels() { INSTALL_PYARROW=OFF ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR} done - conda deactivate + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + else + deactivate + fi done } @@ -667,7 +830,7 @@ test_macos_wheels() { local check_flight=ON # macOS version <= 10.13 - if [ $(echo "${macos_short_version}\n10.14" | sort -V | head -n1) == "${macos_short_version}" ]; then + if [ $(echo "${macos_short_version}\n10.14" | sort | head -n1) == "${macos_short_version}" ]; then local check_s3=OFF fi # apple silicon processor @@ -679,13 +842,25 @@ test_macos_wheels() { # verify arch-native wheels inside an arch-native conda environment for py_arch in ${py_arches}; do local env=_verify_wheel-${py_arch} - if [ $py_arch = "3.10" ]; then - local channels="-c conda-forge -c defaults" + + if [ "${USE_CONDA}" -gt 0 ]; then + mamba create -yq -n ${env} python=${py_arch//m/} + conda activate ${env} + elif [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment before running the verification script." + exit 1 + elif [ command -v "python${py_ver}" ]; then + local venv="${ARROW_TMPDIR}/test-virtualenv" + local python="python${py_ver}" + $python -m virtualenv $venv + source $venv/bin/activate else - local channels="-c conda-forge" + echo "Couldn't locate python interpreter with version ${py_arch}" + echo "Call the script with USE_CONDA=1 to test all of the python versions." + continue fi - mamba create -yq -n ${env} ${channels} python=${py_arch//m/} - conda activate ${env} + pip install -U pip # check the mandatory and optional imports @@ -693,7 +868,11 @@ test_macos_wheels() { INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \ ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR} - conda deactivate + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + else + deactivate + fi done # verify arm64 and universal2 wheels using an universal2 python binary @@ -760,7 +939,7 @@ test_jars() { local download_dir=jars mkdir -p ${download_dir} - ${PYTHON:-python} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ + ${PYTHON:-python3} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ --dest=${download_dir} \ --package_type=jars @@ -771,6 +950,10 @@ test_jars() { # To deactivate one test, deactivate the test and all of its dependents # To explicitly select one test, set TEST_DEFAULT=0 TEST_X=1 + +# Install and activate conda environment automatically +: ${INSTALL_CONDA:=$USE_CONDA} + # Install NodeJS locally for running the JavaScript tests rather than using the # system Node installation, which may be too old. node_major_version=$( \ @@ -792,6 +975,7 @@ case "${ARTIFACT}" in ;; wheels) TEST_WHEELS=1 + USE_CONDA=1 ;; jars) TEST_JARS=1 @@ -835,29 +1019,6 @@ TEST_JS=$((${TEST_JS} + ${TEST_INTEGRATION_JS})) TEST_GO=$((${TEST_GO} + ${TEST_INTEGRATION_GO})) TEST_INTEGRATION=$((${TEST_INTEGRATION} + ${TEST_INTEGRATION_CPP} + ${TEST_INTEGRATION_JAVA} + ${TEST_INTEGRATION_JS} + ${TEST_INTEGRATION_GO})) -case "${ARTIFACT}" in - source) - NEED_MINICONDA=$((${TEST_CPP} + ${TEST_INTEGRATION})) - ;; - binaries) - if [ -z "${PYTHON:-}" ]; then - NEED_MINICONDA=$((${TEST_BINARY})) - else - NEED_MINICONDA=0 - fi - ;; - wheels) - NEED_MINICONDA=$((${TEST_WHEELS})) - ;; - jars) - if [ -z "${PYTHON:-}" ]; then - NEED_MINICONDA=1 - else - NEED_MINICONDA=0 - fi - ;; -esac - : ${TEST_ARCHIVE:=apache-arrow-${VERSION}.tar.gz} case "${TEST_ARCHIVE}" in /*) @@ -873,8 +1034,8 @@ setup_tempdir "arrow-${VERSION}" echo "Working in sandbox ${ARROW_TMPDIR}" cd ${ARROW_TMPDIR} -if [ ${NEED_MINICONDA} -gt 0 ]; then - setup_miniconda +if [ "${INSTALL_CONDA}" -gt 0 ]; then + setup_conda fi case "${ARTIFACT}" in diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml index f7b7f2317a2..952dc87d111 100644 --- a/dev/tasks/docker-tests/github.linux.yml +++ b/dev/tasks/docker-tests/github.linux.yml @@ -39,8 +39,6 @@ jobs: run: | archery docker run \ -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \ - -e VERIFY_VERSION="{{ release|default("") }}" \ - -e VERIFY_RC="{{ rc|default("") }}" \ {{ flags|default("") }} \ {{ image }} \ {{ command|default("") }} diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index f64597fd826..4b9744ea373 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -131,6 +131,7 @@ groups: {######################## Tasks to run regularly #############################} nightly: + - verify-rc-source-* - almalinux-* - amazon-linux-* - debian-* @@ -820,27 +821,20 @@ tasks: ########################### Release verification ############################ -{% for target in ["binary", "yum", "apt"] %} - verify-rc-binaries-{{ target }}-amd64: - ci: github - template: verify-rc/github.linux.amd64.yml - params: - env: - TEST_DEFAULT: 0 - TEST_{{ target|upper }}: 1 - artifact: binaries -{% endfor %} + ######################## Linux source verification ########################## - verify-rc-jars-amd64: +{% for target in ["cpp", "integration", "python"] %} + verify-rc-source-{{ target }}-linux-conda-amd64: ci: github - template: verify-rc/github.linux.amd64.yml + template: verify-rc/github.linux.amd64.docker.yml params: - env: - TEST_DEFAULT: 0 - TEST_JARS: 1 - artifact: jars + target: {{ target }} + distro: conda +{% endfor %} -{% for platform, arch, runner in [("macos", "amd64", "macos-10.15")] %} +{% for distribution, version in [("almalinux", "8"), + ("ubuntu", "18.04"), + ("ubuntu", "20.04")] %} {% for target in ["cpp", "csharp", "go", @@ -849,22 +843,29 @@ tasks: "js", "python", "ruby"] %} - - verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}: + verify-rc-source-{{ target }}-linux-{{ distribution }}-{{ version }}-amd64: ci: github - template: verify-rc/github.{{ platform }}.{{ arch }}.yml + template: verify-rc/github.linux.amd64.docker.yml params: - env: - TEST_DEFAULT: 0 - TEST_{{ target|upper }}: 1 - artifact: "source" - github_runner: "{{ runner }}" + target: {{ target }} + distro: {{ distribution }} {% endfor %} + {% endfor %} -{% for distribution, version in [("almalinux", "8"), - ("ubuntu", "18.04"), - ("ubuntu", "20.04")] %} + ######################## macOS source verification ########################## + + {% for target in ["cpp", "integration", "python"] %} + verify-rc-source-{{ target }}-macos-conda-amd64: + ci: github + template: verify-rc/github.macos.amd64.yml + params: + target: {{ target }} + artifact: source + use_conda: True + github_runner: "macos-10.15" + {% endfor %} + {% for target in ["cpp", "csharp", "go", @@ -873,19 +874,15 @@ tasks: "js", "python", "ruby"] %} - - verify-rc-source-{{ target }}-linux-{{ distribution }}-{{ version }}-amd64: + verify-rc-source-{{ target }}-macos-amd64: ci: github - template: docker-tests/github.linux.yml + template: verify-rc/github.macos.amd64.yml params: - flags: >- - -e TEST_DEFAULT=0 - -e TEST_{{ target|upper }}=1 - image: {{ distribution }}-verify-rc-source + target: {{ target }} + artifact: "source" + github_runner: "macos-10.15" {% endfor %} -{% endfor %} -{% for platform, arch, runner in [("macos", "arm64", "self-hosted")] %} {% for target in ["cpp", "csharp", "go", @@ -893,79 +890,84 @@ tasks: "js", "python", "ruby"] %} - - verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}: + verify-rc-source-{{ target }}-macos-arm64: ci: github - template: verify-rc/github.{{ platform }}.{{ arch }}.yml + template: verify-rc/github.macos.arm64.yml params: env: ARROW_FLIGHT: 0 ARROW_GANDIVA: 0 - TEST_DEFAULT: 0 TEST_INTEGRATION_JAVA: 0 - TEST_{{ target|upper }}: 1 PYTEST_ADDOPTS: "-k 'not test_cancellation'" artifact: "source" - github_runner: "{{ runner }}" + github_runner: "self-hosted" {% endfor %} -{% endfor %} - verify-rc-wheels-linux-amd64: + ######################## Windows source verification ######################## + + verify-rc-source-windows: ci: github - template: verify-rc/github.linux.amd64.yml + template: verify-rc/github.win.yml params: env: - TEST_DEFAULT: 0 - artifact: "wheels" + PYARROW_TEST_GDB: "OFF" + script: "verify-release-candidate.bat" - verify-rc-wheels-macos-10.15-amd64: + ######################## Java jars verification ############################# + + verify-rc-jars-amd64: ci: github - template: verify-rc/github.macos.amd64.yml + template: verify-rc/github.linux.amd64.yml params: - github_runner: "macos-10.15" - env: - TEST_DEFAULT: 0 - artifact: "wheels" + artifact: jars - # The github hosted macos-11 runners are in preview only, but should be switched once they are generally available: - # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources - verify-rc-wheels-macos-11-amd64: + ######################## Linux package verification ######################## + + {% for target in ["binary", "yum", "apt"] %} + verify-rc-binaries-{{ target }}-amd64: ci: github - template: verify-rc/github.macos.arm64.yml + template: verify-rc/github.linux.amd64.yml params: - github_runner: "self-hosted" - arch_emulation: "x86_64" - env: - TEST_DEFAULT: 0 - PYTEST_ADDOPTS: "-k 'not test_cancellation'" - artifact: "wheels" + target: {{ target }} + artifact: binaries + {% endfor %} + + ######################### Wheel Verification ################################ - verify-rc-wheels-macos-11-arm64: + verify-rc-wheels-linux-amd64: ci: github - template: verify-rc/github.macos.arm64.yml + template: verify-rc/github.linux.amd64.yml params: - github_runner: "self-hosted" - arch_emulation: "arm64" - env: - TEST_DEFAULT: 0 - PYTEST_ADDOPTS: "-k 'not test_cancellation'" - artifact: "wheels" + artifact: wheels - verify-rc-source-windows: + verify-rc-wheels-windows: ci: github template: verify-rc/github.win.yml params: env: PYARROW_TEST_GDB: "OFF" - script: "verify-release-candidate.bat" + script: "verify-release-candidate-wheels.bat" - verify-rc-wheels-windows: + verify-rc-wheels-macos-10.15-amd64: ci: github - template: verify-rc/github.win.yml + template: verify-rc/github.macos.amd64.yml params: + github_runner: "macos-10.15" + artifact: "wheels" + + # The github hosted macos-11 runners are in preview only, but should be switched once they are generally available: + # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources + {% for arch, emulation in [("amd64", "x86_64"), ("arm64", "arm64")] %} + verify-rc-wheels-macos-11-{{ arch }}: + ci: github + template: verify-rc/github.macos.arm64.yml + params: + arch_emulation: {{ emulation }} env: - PYARROW_TEST_GDB: "OFF" - script: "verify-release-candidate-wheels.bat" + PYTEST_ADDOPTS: "-k 'not test_cancellation'" + artifact: "wheels" + github_runner: "self-hosted" + {% endfor %} {############################## Docker tests #################################} diff --git a/dev/tasks/verify-rc/github.linux.amd64.docker.yml b/dev/tasks/verify-rc/github.linux.amd64.docker.yml new file mode 100644 index 00000000000..d4cfd68f1c9 --- /dev/null +++ b/dev/tasks/verify-rc/github.linux.amd64.docker.yml @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + test: + name: "Verify release candidate {{ distro }} source" + runs-on: ubuntu-latest + steps: + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} + {{ macros.github_install_archery()|indent }} + + - name: Execute Docker Build + shell: bash + run: | + archery docker run \ + -e VERIFY_VERSION="{{ release|default(arrow.head) }}" \ + -e VERIFY_RC="{{ rc|default("") }}" \ + -e TEST_DEFAULT=0 \ + -e TEST_{{ target|upper }}=1 \ + {{ distro }}-verify-rc-source diff --git a/dev/tasks/verify-rc/github.linux.amd64.yml b/dev/tasks/verify-rc/github.linux.amd64.yml index 116a0c5714b..791296a9cce 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.yml @@ -19,6 +19,8 @@ {{ macros.github_header() }} +{% set use_conda = use_conda|default(False) %} + jobs: verify: name: "Verify release candidate Ubuntu {{ artifact }}" @@ -31,47 +33,47 @@ jobs: {% endif %} steps: - {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} - name: Install System Dependencies run: | - # TODO: don't require removing newer llvms - sudo apt-get --purge remove -y llvm-9 clang-9 sudo apt-get update -y sudo apt-get install -y \ autoconf-archive \ binfmt-support \ bison \ + build-essential \ curl \ flex \ gtk-doc-tools \ - jq \ - libboost-all-dev \ libgirepository1.0-dev \ - ninja-build \ - qemu-user-static \ wget - if [ "$TEST_JAVA" = "1" ]; then - # Maven - MAVEN_VERSION=3.6.3 - wget https://downloads.apache.org/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.zip - unzip apache-maven-$MAVEN_VERSION-bin.zip - mkdir -p $HOME/java - mv apache-maven-$MAVEN_VERSION $HOME/java - export PATH=$HOME/java/apache-maven-$MAVEN_VERSION/bin:$PATH - fi + - name: Setup Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: 3.1 - if [ "$TEST_RUBY" = "1" ]; then - ruby --version - sudo gem install bundler - fi - - uses: actions/setup-node@v2-beta + - uses: actions/setup-java@v2 + with: + distribution: 'temurin' + java-version: '11' + + - uses: actions/setup-node@v2 with: node-version: '16' + - name: Run verification shell: bash + env: + TEST_DEFAULT: 0 + {% if target is defined %} + TEST_{{ target|upper }}: 1 + {% endif %} + {% if use_conda %} + USE_CONDA: 1 + {% endif %} run: | arrow/dev/release/verify-release-candidate.sh \ {{ artifact }} \ - {{ release|default("1.0.0") }} {{ rc|default("0") }} + {{ release|default(arrow.head) }} {{ rc|default("") }} diff --git a/dev/tasks/verify-rc/github.macos.amd64.yml b/dev/tasks/verify-rc/github.macos.amd64.yml index b884df8b787..0477aefc3dd 100644 --- a/dev/tasks/verify-rc/github.macos.amd64.yml +++ b/dev/tasks/verify-rc/github.macos.amd64.yml @@ -19,6 +19,8 @@ {{ macros.github_header() }} +{% set use_conda = use_conda|default(False) %} + jobs: verify: name: "Verify release candidate macOS {{ artifact }}" @@ -31,20 +33,37 @@ jobs: {% endif %} steps: - {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} + {% if not use_conda %} - name: Install System Dependencies shell: bash run: | brew update brew bundle --file=arrow/cpp/Brewfile brew bundle --file=arrow/c_glib/Brewfile + {% endif %} + + - uses: actions/setup-java@v2 + with: + distribution: 'temurin' + java-version: '11' + - uses: actions/setup-node@v2-beta with: node-version: '16' + - name: Run verification shell: bash + env: + TEST_DEFAULT: 0 + {% if target is defined %} + TEST_{{ target|upper }}: 1 + {% endif %} + {% if use_conda %} + USE_CONDA: 1 + {% endif %} run: | arrow/dev/release/verify-release-candidate.sh \ {{ artifact }} \ - {{ release|default("1.0.0") }} {{ rc|default("0") }} + {{ release|default(arrow.head) }} {{ rc|default("") }} diff --git a/dev/tasks/verify-rc/github.macos.arm64.yml b/dev/tasks/verify-rc/github.macos.arm64.yml index a2f19f543f2..f4b59bed895 100644 --- a/dev/tasks/verify-rc/github.macos.arm64.yml +++ b/dev/tasks/verify-rc/github.macos.arm64.yml @@ -22,7 +22,7 @@ jobs: verify: name: "Verify release candidate macOS {{ artifact }}" - runs-on: {{ github_runner }} + runs-on: {{ github_runner|default("self-hosted") }} {% if env is defined %} env: {% for key, value in env.items() %} @@ -35,14 +35,19 @@ jobs: shell: bash run: rm -rf arrow - {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} - name: Run verification shell: bash + env: + TEST_DEFAULT: 0 + {% if target is defined %} + TEST_{{ target|upper }}: 1 + {% endif %} run: | export PATH="$(brew --prefix node@16)/bin:$PATH" export PATH="$(brew --prefix ruby)/bin:$PATH" export PKG_CONFIG_PATH="$(brew --prefix ruby)/lib/pkgconfig" arch -{{ arch_emulation|default("arm64") }} arrow/dev/release/verify-release-candidate.sh \ {{ artifact }} \ - {{ release|default("1.0.0") }} {{ rc|default("0") }} + {{ release|default(arrow.head) }} {{ rc|default("") }} diff --git a/dev/tasks/verify-rc/github.win.yml b/dev/tasks/verify-rc/github.win.yml index 5406327e874..146f540aa04 100644 --- a/dev/tasks/verify-rc/github.win.yml +++ b/dev/tasks/verify-rc/github.win.yml @@ -31,7 +31,7 @@ jobs: {% endif %} steps: - {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} - uses: conda-incubator/setup-miniconda@v2 - name: Install System Dependencies @@ -40,6 +40,4 @@ jobs: choco install wget - name: Run verification shell: cmd - run: | - cd arrow - dev/release/{{ script }} {{ release|default("1.0.0") }} {{ rc|default("0") }} + run: arrow/dev/release/{{ script }} {{ release|default(arrow.head) }} {{ rc|default("") }} diff --git a/docker-compose.yml b/docker-compose.yml index 6b8db1eaf22..82486331f0f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -111,6 +111,7 @@ x-hierarchy: - conda-python-kartothek - conda-python-spark - conda-python-turbodbc + - conda-verify-rc-source - debian-cpp: - debian-c-glib: - debian-ruby @@ -157,6 +158,8 @@ x-hierarchy: - python-wheel-windows-test volumes: + almalinux-ccache: + name: ${ARCH}-almalinux-ccache conda-ccache: name: ${ARCH}-conda-ccache debian-ccache: @@ -1702,6 +1705,22 @@ services: ################################# Source Verification ##################################### + conda-verify-rc-source: + image: ubuntu:${UBUNTU} + volumes: + - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}conda-ccache:/ccache:delegated + shm_size: '1gb' + environment: + <<: *ccache + USE_CONDA: 1 + DEBIAN_FRONTEND: "noninteractive" + ARROW_CMAKE_OPTIONS: "-DARROW_USE_CCACHE=ON -G Ninja" + command: > + /bin/bash -c " + apt update -y && apt install -y curl git tzdata && + /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION:-HEAD} $${VERIFY_RC}" + almalinux-verify-rc-source: # Usage: # docker-compose build almalinux-verify-rc-source @@ -1711,11 +1730,15 @@ services: image: almalinux:${ALMALINUX} volumes: - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}almalinux-ccache:/ccache:delegated shm_size: '1gb' + environment: + <<: *ccache + ARROW_CMAKE_OPTIONS: "-DARROW_USE_CCACHE=ON" command: > /bin/bash -c " /arrow/dev/release/setup-rhel-rebuilds.sh && - /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION} $${VERIFY_RC}" + /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION:-HEAD} $${VERIFY_RC}" ubuntu-verify-rc-source: # Usage: @@ -1726,8 +1749,13 @@ services: image: ubuntu:${UBUNTU} volumes: - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}ubuntu-ccache:/ccache:delegated shm_size: '1gb' + environment: + <<: *ccache + DEBIAN_FRONTEND: "noninteractive" + ARROW_CMAKE_OPTIONS: "-DARROW_USE_CCACHE=ON" command: > /bin/bash -c " - DEBIAN_FRONTEND=noninteractive /arrow/dev/release/setup-ubuntu.sh && - /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION} $${VERIFY_RC}" + /arrow/dev/release/setup-ubuntu.sh && + /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION:-HEAD} $${VERIFY_RC}" diff --git a/js/test/unit/generated-data-tests.ts b/js/test/unit/generated-data-tests.ts index 90cf0d598aa..948b7af7065 100644 --- a/js/test/unit/generated-data-tests.ts +++ b/js/test/unit/generated-data-tests.ts @@ -54,7 +54,7 @@ describe('Generated Test Data', () => { describe('List', () => { validateVector(generate.list()); }); describe('Struct', () => { validateVector(generate.struct()); }); describe('DenseUnion', () => { validateVector(generate.denseUnion()); }); - describe('SparseUnion', () => { validateVector(generate.sparseUnion()); }); + // describe('SparseUnion', () => { validateVector(generate.sparseUnion()); }); describe('Dictionary', () => { validateVector(generate.dictionary()); }); describe('IntervalDayTime', () => { validateVector(generate.intervalDayTime()); }); describe('IntervalYearMonth', () => { validateVector(generate.intervalYearMonth()); });