From ea009a826e2e3819b671bffe798ec370ee361e92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 12:17:55 +0100 Subject: [PATCH 01/47] [Release] Nightly source verification tasks --- dev/release/verify-release-candidate.bat | 22 ++++++-- dev/release/verify-release-candidate.sh | 55 ++++++++++++------- dev/tasks/docker-tests/github.linux.yml | 2 - dev/tasks/tasks.yml | 8 +-- .../verify-rc/github.linux.amd64.docker.yml | 40 ++++++++++++++ dev/tasks/verify-rc/github.linux.amd64.yml | 4 +- dev/tasks/verify-rc/github.macos.amd64.yml | 4 +- dev/tasks/verify-rc/github.macos.arm64.yml | 4 +- dev/tasks/verify-rc/github.win.yml | 6 +- 9 files changed, 107 insertions(+), 38 deletions(-) create mode 100644 dev/tasks/verify-rc/github.linux.amd64.docker.yml diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat index 387eb25e1db..d984ea85785 100644 --- a/dev/release/verify-release-candidate.bat +++ b/dev/release/verify-release-candidate.bat @@ -27,15 +27,25 @@ if not exist "C:\tmp\arrow-verify-release" mkdir C:\tmp\arrow-verify-release set _VERIFICATION_DIR=C:\tmp\arrow-verify-release set _VERIFICATION_DIR_UNIX=C:/tmp/arrow-verify-release set _VERIFICATION_CONDA_ENV=%_VERIFICATION_DIR%\conda-env -set _DIST_URL=https://dist.apache.org/repos/dist/dev/arrow -set _TARBALL=apache-arrow-%1.tar.gz set ARROW_SOURCE=%_VERIFICATION_DIR%\apache-arrow-%1 set INSTALL_DIR=%_VERIFICATION_DIR%\install -@rem Requires GNU Wget for Windows -wget --no-check-certificate -O %_TARBALL% %_DIST_URL%/apache-arrow-%1-rc%2/%_TARBALL% || exit /B 1 - -tar xf %_TARBALL% -C %_VERIFICATION_DIR_UNIX% +set VERSION=%1 +set RC_NUMBER=%2 + +if "%RC_NUMBER%"=="" ( + @rem verify a specific git revision + if "%SOURCE_REPOSITORY%"=="" set SOURCE_REPOSITORY="https://github.com/apache/arrow.git" + git clone --recurse-submodules %SOURCE_REPOSITORY% %ARROW_SOURCE% + git -C %ARROW_SOURCE% checkout %VERSION% +) else ( + @rem verify a release candidate tarball + @rem Requires GNU Wget for Windows + set TARBALL_NAME=apache-arrow-%VERSION%.tar.gz + set TARBALL_URL=https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-%VERSION%-rc%RC_NUMBER%/%TARBALL_NAME% + wget --no-check-certificate -O %TARBALL_NAME% %TARBALL_URL% || exit /B 1 + tar xf %TARBALL_NAME% -C %_VERIFICATION_DIR_UNIX% +) set PYTHON=3.8 diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 11cf0d9a488..7bd531ee11e 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -35,9 +35,20 @@ # directory is not cleaned up automatically. case $# in + 2) ARTIFACT="$1" + VERSION="$2" + SOURCE_KIND="git" + case $ARTIFACT in + source) ;; + *) echo "Invalid argument: '${ARTIFACT}', only valid option is 'source'" + exit 1 + ;; + esac + ;; 3) ARTIFACT="$1" VERSION="$2" RC_NUMBER="$3" + SOURCE_KIND="tarball" case $ARTIFACT in source|binaries|wheels|jars) ;; *) echo "Invalid argument: '${ARTIFACT}', valid options are \ @@ -545,27 +556,33 @@ test_integration() { ensure_source_directory() { dist_name="apache-arrow-${VERSION}" - if [ $((${TEST_SOURCE} + ${TEST_WHEELS})) -gt 0 ]; then - import_gpg_keys - if [ ! -d "${dist_name}" ]; then - fetch_archive ${dist_name} - tar xf ${dist_name}.tar.gz - fi + if [ "${SOURCE_KIND}" = "git" ]; then + git clone --recurse-submodules ${SOURCE_REPOSITORY:-"https://github.com/apache/arrow.git"} arrow + pushd arrow + git checkout ${VERSION} else - mkdir -p ${dist_name} - if [ ! -f ${TEST_ARCHIVE} ]; then - echo "${TEST_ARCHIVE} not found" - exit 1 + if [ $((${TEST_SOURCE} + ${TEST_WHEELS})) -gt 0 ]; then + import_gpg_keys + if [ ! -d "${dist_name}" ]; then + fetch_archive ${dist_name} + tar xf ${dist_name}.tar.gz + fi + else + mkdir -p ${dist_name} + if [ ! -f ${TEST_ARCHIVE} ]; then + echo "${TEST_ARCHIVE} not found" + exit 1 + fi + tar xf ${TEST_ARCHIVE} -C ${dist_name} --strip-components=1 + fi + # clone testing repositories + pushd ${dist_name} + if [ ! -d "testing/data" ]; then + git clone https://github.com/apache/arrow-testing.git testing + fi + if [ ! -d "cpp/submodules/parquet-testing/data" ]; then + git clone https://github.com/apache/parquet-testing.git cpp/submodules/parquet-testing fi - tar xf ${TEST_ARCHIVE} -C ${dist_name} --strip-components=1 - fi - # clone testing repositories - pushd ${dist_name} - if [ ! -d "testing/data" ]; then - git clone https://github.com/apache/arrow-testing.git testing - fi - if [ ! -d "cpp/submodules/parquet-testing/data" ]; then - git clone https://github.com/apache/parquet-testing.git cpp/submodules/parquet-testing fi export ARROW_DIR=$PWD export ARROW_TEST_DATA=$PWD/testing/data diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml index f7b7f2317a2..952dc87d111 100644 --- a/dev/tasks/docker-tests/github.linux.yml +++ b/dev/tasks/docker-tests/github.linux.yml @@ -39,8 +39,6 @@ jobs: run: | archery docker run \ -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \ - -e VERIFY_VERSION="{{ release|default("") }}" \ - -e VERIFY_RC="{{ rc|default("") }}" \ {{ flags|default("") }} \ {{ image }} \ {{ command|default("") }} diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index f64597fd826..8e94135d988 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -876,12 +876,10 @@ tasks: verify-rc-source-{{ target }}-linux-{{ distribution }}-{{ version }}-amd64: ci: github - template: docker-tests/github.linux.yml + template: docker-tests/github.linux.amd64.docker.yml params: - flags: >- - -e TEST_DEFAULT=0 - -e TEST_{{ target|upper }}=1 - image: {{ distribution }}-verify-rc-source + target: {{ target }} + distro: {{ distribution }} {% endfor %} {% endfor %} diff --git a/dev/tasks/verify-rc/github.linux.amd64.docker.yml b/dev/tasks/verify-rc/github.linux.amd64.docker.yml new file mode 100644 index 00000000000..71e2cdd4e0e --- /dev/null +++ b/dev/tasks/verify-rc/github.linux.amd64.docker.yml @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + test: + name: | + Docker Test {{ flags|default("") }} {{ image }} {{ command|default("") }} + runs-on: ubuntu-latest + steps: + {{ macros.github_checkout_arrow(fetch_depth=fetch_depth if fetch_depth is defined else 1)|indent }} + {{ macros.github_install_archery()|indent }} + + - name: Execute Docker Build + shell: bash + run: | + archery docker run \ + -e SOURCE_REPOSITORY="{{ arrow.repo }}" \ + -e VERIFY_VERSION="{{ release|arrow.head }}" \ + -e VERIFY_RC="{{ rc|default("") }}" \ + -e TEST_DEFAULT=0 \ + -e TEST_{{ target|upper }}=1 + {{ distro }}-verify-rc-source diff --git a/dev/tasks/verify-rc/github.linux.amd64.yml b/dev/tasks/verify-rc/github.linux.amd64.yml index 116a0c5714b..0cc7670f848 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.yml @@ -71,7 +71,9 @@ jobs: node-version: '16' - name: Run verification shell: bash + env: + SOURCE_REPOSITORY: {{ arrow.repo }} run: | arrow/dev/release/verify-release-candidate.sh \ {{ artifact }} \ - {{ release|default("1.0.0") }} {{ rc|default("0") }} + {{ release|arrow.head }} {{ rc|default("") }} diff --git a/dev/tasks/verify-rc/github.macos.amd64.yml b/dev/tasks/verify-rc/github.macos.amd64.yml index b884df8b787..b014c505e1e 100644 --- a/dev/tasks/verify-rc/github.macos.amd64.yml +++ b/dev/tasks/verify-rc/github.macos.amd64.yml @@ -44,7 +44,9 @@ jobs: node-version: '16' - name: Run verification shell: bash + env: + SOURCE_REPOSITORY: {{ arrow.repo }} run: | arrow/dev/release/verify-release-candidate.sh \ {{ artifact }} \ - {{ release|default("1.0.0") }} {{ rc|default("0") }} + {{ release|arrow.head }} {{ rc|default("") }} diff --git a/dev/tasks/verify-rc/github.macos.arm64.yml b/dev/tasks/verify-rc/github.macos.arm64.yml index a2f19f543f2..54045a2b8d1 100644 --- a/dev/tasks/verify-rc/github.macos.arm64.yml +++ b/dev/tasks/verify-rc/github.macos.arm64.yml @@ -39,10 +39,12 @@ jobs: - name: Run verification shell: bash + env: + SOURCE_REPOSITORY: {{ arrow.repo }} run: | export PATH="$(brew --prefix node@16)/bin:$PATH" export PATH="$(brew --prefix ruby)/bin:$PATH" export PKG_CONFIG_PATH="$(brew --prefix ruby)/lib/pkgconfig" arch -{{ arch_emulation|default("arm64") }} arrow/dev/release/verify-release-candidate.sh \ {{ artifact }} \ - {{ release|default("1.0.0") }} {{ rc|default("0") }} + {{ release|arrow.head }} {{ rc|default("") }} diff --git a/dev/tasks/verify-rc/github.win.yml b/dev/tasks/verify-rc/github.win.yml index 5406327e874..f8b2d513ab5 100644 --- a/dev/tasks/verify-rc/github.win.yml +++ b/dev/tasks/verify-rc/github.win.yml @@ -40,6 +40,6 @@ jobs: choco install wget - name: Run verification shell: cmd - run: | - cd arrow - dev/release/{{ script }} {{ release|default("1.0.0") }} {{ rc|default("0") }} + env: + SOURCE_REPOSITORY: {{ arrow.repo }} + run: arrow/dev/release/{{ script }} {{ release|arrow.head }} {{ rc|default("") }} From 760908ec02efe2e9bc036c0001ee7c1a95cb8d40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 12:23:02 +0100 Subject: [PATCH 02/47] Fix jinja template --- dev/tasks/tasks.yml | 2 +- dev/tasks/verify-rc/github.linux.amd64.docker.yml | 7 +++---- dev/tasks/verify-rc/github.linux.amd64.yml | 4 ++-- dev/tasks/verify-rc/github.macos.amd64.yml | 4 ++-- dev/tasks/verify-rc/github.macos.arm64.yml | 4 ++-- dev/tasks/verify-rc/github.win.yml | 4 ++-- 6 files changed, 12 insertions(+), 13 deletions(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 8e94135d988..e63ec9e349b 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -876,7 +876,7 @@ tasks: verify-rc-source-{{ target }}-linux-{{ distribution }}-{{ version }}-amd64: ci: github - template: docker-tests/github.linux.amd64.docker.yml + template: verify-rc/github.linux.amd64.docker.yml params: target: {{ target }} distro: {{ distribution }} diff --git a/dev/tasks/verify-rc/github.linux.amd64.docker.yml b/dev/tasks/verify-rc/github.linux.amd64.docker.yml index 71e2cdd4e0e..a6620017d3d 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.docker.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.docker.yml @@ -21,8 +21,7 @@ jobs: test: - name: | - Docker Test {{ flags|default("") }} {{ image }} {{ command|default("") }} + name: "Verify release candidate {{ distro }} source" runs-on: ubuntu-latest steps: {{ macros.github_checkout_arrow(fetch_depth=fetch_depth if fetch_depth is defined else 1)|indent }} @@ -32,8 +31,8 @@ jobs: shell: bash run: | archery docker run \ - -e SOURCE_REPOSITORY="{{ arrow.repo }}" \ - -e VERIFY_VERSION="{{ release|arrow.head }}" \ + -e SOURCE_REPOSITORY="{{ arrow.remote }}" \ + -e VERIFY_VERSION="{{ release|default(arrow.head) }}" \ -e VERIFY_RC="{{ rc|default("") }}" \ -e TEST_DEFAULT=0 \ -e TEST_{{ target|upper }}=1 diff --git a/dev/tasks/verify-rc/github.linux.amd64.yml b/dev/tasks/verify-rc/github.linux.amd64.yml index 0cc7670f848..49069fe6b9b 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.yml @@ -72,8 +72,8 @@ jobs: - name: Run verification shell: bash env: - SOURCE_REPOSITORY: {{ arrow.repo }} + SOURCE_REPOSITORY: {{ arrow.remote }} run: | arrow/dev/release/verify-release-candidate.sh \ {{ artifact }} \ - {{ release|arrow.head }} {{ rc|default("") }} + {{ release|default(arrow.head) }} {{ rc|default("") }} diff --git a/dev/tasks/verify-rc/github.macos.amd64.yml b/dev/tasks/verify-rc/github.macos.amd64.yml index b014c505e1e..335fc7af5e0 100644 --- a/dev/tasks/verify-rc/github.macos.amd64.yml +++ b/dev/tasks/verify-rc/github.macos.amd64.yml @@ -45,8 +45,8 @@ jobs: - name: Run verification shell: bash env: - SOURCE_REPOSITORY: {{ arrow.repo }} + SOURCE_REPOSITORY: {{ arrow.remote }} run: | arrow/dev/release/verify-release-candidate.sh \ {{ artifact }} \ - {{ release|arrow.head }} {{ rc|default("") }} + {{ release|default(arrow.head) }} {{ rc|default("") }} diff --git a/dev/tasks/verify-rc/github.macos.arm64.yml b/dev/tasks/verify-rc/github.macos.arm64.yml index 54045a2b8d1..90414e22a76 100644 --- a/dev/tasks/verify-rc/github.macos.arm64.yml +++ b/dev/tasks/verify-rc/github.macos.arm64.yml @@ -40,11 +40,11 @@ jobs: - name: Run verification shell: bash env: - SOURCE_REPOSITORY: {{ arrow.repo }} + SOURCE_REPOSITORY: {{ arrow.remote }} run: | export PATH="$(brew --prefix node@16)/bin:$PATH" export PATH="$(brew --prefix ruby)/bin:$PATH" export PKG_CONFIG_PATH="$(brew --prefix ruby)/lib/pkgconfig" arch -{{ arch_emulation|default("arm64") }} arrow/dev/release/verify-release-candidate.sh \ {{ artifact }} \ - {{ release|arrow.head }} {{ rc|default("") }} + {{ release|default(arrow.head) }} {{ rc|default("") }} diff --git a/dev/tasks/verify-rc/github.win.yml b/dev/tasks/verify-rc/github.win.yml index f8b2d513ab5..233b5343e6c 100644 --- a/dev/tasks/verify-rc/github.win.yml +++ b/dev/tasks/verify-rc/github.win.yml @@ -41,5 +41,5 @@ jobs: - name: Run verification shell: cmd env: - SOURCE_REPOSITORY: {{ arrow.repo }} - run: arrow/dev/release/{{ script }} {{ release|arrow.head }} {{ rc|default("") }} + SOURCE_REPOSITORY: {{ arrow.remote }} + run: arrow/dev/release/{{ script }} {{ release|default(arrow.head) }} {{ rc|default("") }} From 0349b8dd99468be0300db7711f90fcc9fc1f6325 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 12:23:58 +0100 Subject: [PATCH 03/47] Add source verification tasks to the nightly group --- dev/tasks/tasks.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index e63ec9e349b..890865e054c 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -131,6 +131,7 @@ groups: {######################## Tasks to run regularly #############################} nightly: + - verify-rc-source-* - almalinux-* - amazon-linux-* - debian-* From b5656607de39f48df1769b1b97078c8d21869d1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 12:45:50 +0100 Subject: [PATCH 04/47] Missing line break --- dev/tasks/verify-rc/github.linux.amd64.docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/verify-rc/github.linux.amd64.docker.yml b/dev/tasks/verify-rc/github.linux.amd64.docker.yml index a6620017d3d..26d6cff68a4 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.docker.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.docker.yml @@ -35,5 +35,5 @@ jobs: -e VERIFY_VERSION="{{ release|default(arrow.head) }}" \ -e VERIFY_RC="{{ rc|default("") }}" \ -e TEST_DEFAULT=0 \ - -e TEST_{{ target|upper }}=1 + -e TEST_{{ target|upper }}=1 \ {{ distro }}-verify-rc-source From 7e0899956aabc458ae5a142478db803b24072e33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 12:58:39 +0100 Subject: [PATCH 05/47] Dummy git handling for C# --- dev/release/verify-release-candidate.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 7bd531ee11e..17329e60b42 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -377,9 +377,14 @@ test_csharp() { fi dotnet test - mv dummy.git ../.git - dotnet pack -c Release - mv ../.git dummy.git + + if [ "${SOURCE_KIND}" = "git" ]; then + dotnet pack -c Release + else + mv dummy.git ../.git + dotnet pack -c Release + mv ../.git dummy.git + fi if ! which sourcelink > /dev/null 2>&1; then dotnet tool install --tool-path ${csharp_bin} sourcelink From 9693d54e7d5a7babfecd096c132dd6be02408932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 15:34:06 +0100 Subject: [PATCH 06/47] Support verifying release without conda --- dev/release/verify-release-candidate.bat | 16 +- dev/release/verify-release-candidate.sh | 122 ++++++++------ dev/tasks/tasks.yml | 152 +++++++++--------- .../verify-rc/github.linux.amd64.docker.yml | 1 - dev/tasks/verify-rc/github.linux.amd64.yml | 38 +++-- dev/tasks/verify-rc/github.macos.amd64.yml | 5 +- dev/tasks/verify-rc/github.macos.arm64.yml | 7 +- dev/tasks/verify-rc/github.win.yml | 2 - 8 files changed, 192 insertions(+), 151 deletions(-) diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat index d984ea85785..e6b6940e213 100644 --- a/dev/release/verify-release-candidate.bat +++ b/dev/release/verify-release-candidate.bat @@ -35,7 +35,7 @@ set RC_NUMBER=%2 if "%RC_NUMBER%"=="" ( @rem verify a specific git revision - if "%SOURCE_REPOSITORY%"=="" set SOURCE_REPOSITORY="https://github.com/apache/arrow.git" + if "%SOURCE_REPOSITORY%"=="" set SOURCE_REPOSITORY="%~dp0..\..\" git clone --recurse-submodules %SOURCE_REPOSITORY% %ARROW_SOURCE% git -C %ARROW_SOURCE% checkout %VERSION% ) else ( @@ -45,6 +45,13 @@ if "%RC_NUMBER%"=="" ( set TARBALL_URL=https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-%VERSION%-rc%RC_NUMBER%/%TARBALL_NAME% wget --no-check-certificate -O %TARBALL_NAME% %TARBALL_URL% || exit /B 1 tar xf %TARBALL_NAME% -C %_VERIFICATION_DIR_UNIX% + + @rem Get testing datasets for Parquet unit tests + git clone https://github.com/apache/parquet-testing.git %_VERIFICATION_DIR%\parquet-testing + set PARQUET_TEST_DATA=%_VERIFICATION_DIR%\parquet-testing\data + + git clone https://github.com/apache/arrow-testing.git %_VERIFICATION_DIR%\arrow-testing + set ARROW_TEST_DATA=%_VERIFICATION_DIR%\arrow-testing\data ) set PYTHON=3.8 @@ -106,13 +113,6 @@ cmake -G "%GENERATOR%" ^ cmake --build . --target INSTALL --config Release || exit /B 1 -@rem Get testing datasets for Parquet unit tests -git clone https://github.com/apache/parquet-testing.git %_VERIFICATION_DIR%\parquet-testing -set PARQUET_TEST_DATA=%_VERIFICATION_DIR%\parquet-testing\data - -git clone https://github.com/apache/arrow-testing.git %_VERIFICATION_DIR%\arrow-testing -set ARROW_TEST_DATA=%_VERIFICATION_DIR%\arrow-testing\data - @rem Needed so python-test.exe works set PYTHONPATH_ORIGINAL=%PYTHONPATH% set PYTHONPATH=%CONDA_PREFIX%\Lib;%CONDA_PREFIX%\Lib\site-packages;%CONDA_PREFIX%\DLLs;%CONDA_PREFIX%;%PYTHONPATH% diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 17329e60b42..948eaca9036 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -77,14 +77,17 @@ detect_cuda() { return $((${n_gpus} < 1)) } -# Build options for the C++ library +# Execute tests in a conda enviroment +: ${USE_CONDA:=0} +# Build options for the C++ library if [ -z "${ARROW_CUDA:-}" ] && detect_cuda; then ARROW_CUDA=ON fi : ${ARROW_CUDA:=OFF} : ${ARROW_FLIGHT:=ON} : ${ARROW_GANDIVA:=ON} +: ${ARROW_DEPENDENCY_SOURCE:=${DEFAULT_DEPENDENCY_SOURCE}} ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow' @@ -149,7 +152,7 @@ test_binary() { local download_dir=binaries mkdir -p ${download_dir} - ${PYTHON:-python} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ + ${PYTHON:-python3} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ --dest=${download_dir} verify_dir_artifact_signatures ${download_dir} @@ -243,7 +246,7 @@ setup_tempdir() { fi } -setup_miniconda() { +setup_conda() { # Setup short-lived miniconda for Python and integration tests OS="$(uname)" if [ "${OS}" == "Darwin" ]; then @@ -264,22 +267,7 @@ setup_miniconda() { . $MINICONDA/etc/profile.d/conda.sh conda activate base - - # Dependencies from python/requirements-build.txt and python/requirements-test.txt - # with the exception of oldest-supported-numpy since it doesn't have a conda package - mamba create -n arrow-test -y \ - cffi \ - cython \ - hypothesis \ - numpy \ - pandas \ - pytest \ - pytest-lazy-fixture \ - python=3.8 \ - pytz \ - setuptools \ - setuptools_scm - + mamba create -n arrow-test -y conda activate arrow-test echo "Using conda environment ${CONDA_PREFIX}" } @@ -298,6 +286,22 @@ test_package_java() { # Build and test C++ test_and_install_cpp() { + if [ "${USE_CONDA}" -gt 0 ]; then + DEFAULT_DEPENDENCY_SOURCE="CONDA" + mamba install -y --file ci/conda_env_cpp.txt + elif [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment before running the verification script." + exit 1 + else + DEFAULT_DEPENDENCY_SOURCE="AUTO" + # Create a python virtualenv + ${PYTHON:-python3} -m venv venv + source venv/bin/activate + # Install build dependencies (numpy is required here) + pip install -r ${ARROW_DIR}/python/requirements-build.txt + fi + mkdir -p cpp/build pushd cpp/build @@ -325,7 +329,7 @@ ${ARROW_CMAKE_OPTIONS:-} -DARROW_BUILD_TESTS=ON -DARROW_BUILD_INTEGRATION=ON -DARROW_CUDA=${ARROW_CUDA} --DARROW_DEPENDENCY_SOURCE=AUTO +-DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-$DEFAULT_DEPENDENCY_SOURCE} " cmake $ARROW_CMAKE_OPTIONS .. @@ -401,12 +405,18 @@ test_csharp() { } # Build and test Python - test_python() { + if [ "${USE_CONDA}" -gt 0 ]; then + mamba install -y --file ci/conda_env_python.txt + elif [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment before running the verification script." + exit 1 + fi + pushd python export PYARROW_PARALLEL=$NPROC - export PYARROW_WITH_DATASET=1 export PYARROW_WITH_PARQUET=1 export PYARROW_WITH_PLASMA=1 @@ -420,9 +430,42 @@ test_python() { export PYARROW_WITH_GANDIVA=1 fi + # Build pyarrow python setup.py build_ext --inplace + + # Check mandatory and optional imports + python -c " +import pyarrow +import pyarrow._s3 +import pyarrow._gcs +import pyarrow._hdfs +import pyarrow.csv +import pyarrow.dataset +import pyarrow.fs +import pyarrow.json +import pyarrow.orc +import pyarrow.parquet +import pyarrow.plasma +" + if [ "${PYARROW_WITH_CUDA}" == "ON" ]; then + python -c "import pyarrow.cuda" + fi + if [ "${PYARROW_WITH_FLIGHT}" == "ON" ]; then + python -c "import pyarrow.flight" + fi + if [ "${ARROW_WITH_GANDIVA}" == "ON" ]; then + python -c "import pyarrow.gandiva" + fi + + # Install test dependencies + pip install -r requirements-test.txt + + # Execute pyarrow unittests pytest pyarrow -v --pdb + # Deactivate virtualenv + deactivate + popd } @@ -562,7 +605,7 @@ test_integration() { ensure_source_directory() { dist_name="apache-arrow-${VERSION}" if [ "${SOURCE_KIND}" = "git" ]; then - git clone --recurse-submodules ${SOURCE_REPOSITORY:-"https://github.com/apache/arrow.git"} arrow + git clone --recurse-submodules ${SOURCE_REPOSITORY:-"${SOURCE_DIR}/../.."} arrow pushd arrow git checkout ${VERSION} else @@ -782,7 +825,7 @@ test_jars() { local download_dir=jars mkdir -p ${download_dir} - ${PYTHON:-python} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ + ${PYTHON:-python3} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ --dest=${download_dir} \ --package_type=jars @@ -793,6 +836,10 @@ test_jars() { # To deactivate one test, deactivate the test and all of its dependents # To explicitly select one test, set TEST_DEFAULT=0 TEST_X=1 + +# Install and activate conda environment automatically +: ${INSTALL_CONDA:=$USE_CONDA} + # Install NodeJS locally for running the JavaScript tests rather than using the # system Node installation, which may be too old. node_major_version=$( \ @@ -857,29 +904,6 @@ TEST_JS=$((${TEST_JS} + ${TEST_INTEGRATION_JS})) TEST_GO=$((${TEST_GO} + ${TEST_INTEGRATION_GO})) TEST_INTEGRATION=$((${TEST_INTEGRATION} + ${TEST_INTEGRATION_CPP} + ${TEST_INTEGRATION_JAVA} + ${TEST_INTEGRATION_JS} + ${TEST_INTEGRATION_GO})) -case "${ARTIFACT}" in - source) - NEED_MINICONDA=$((${TEST_CPP} + ${TEST_INTEGRATION})) - ;; - binaries) - if [ -z "${PYTHON:-}" ]; then - NEED_MINICONDA=$((${TEST_BINARY})) - else - NEED_MINICONDA=0 - fi - ;; - wheels) - NEED_MINICONDA=$((${TEST_WHEELS})) - ;; - jars) - if [ -z "${PYTHON:-}" ]; then - NEED_MINICONDA=1 - else - NEED_MINICONDA=0 - fi - ;; -esac - : ${TEST_ARCHIVE:=apache-arrow-${VERSION}.tar.gz} case "${TEST_ARCHIVE}" in /*) @@ -895,8 +919,8 @@ setup_tempdir "arrow-${VERSION}" echo "Working in sandbox ${ARROW_TMPDIR}" cd ${ARROW_TMPDIR} -if [ ${NEED_MINICONDA} -gt 0 ]; then - setup_miniconda +if [ "${INSTALL_CONDA}" -gt 0 ]; then + setup_conda fi case "${ARTIFACT}" in diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 890865e054c..6aaa91194e2 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -821,27 +821,21 @@ tasks: ########################### Release verification ############################ -{% for target in ["binary", "yum", "apt"] %} - verify-rc-binaries-{{ target }}-amd64: - ci: github - template: verify-rc/github.linux.amd64.yml - params: - env: - TEST_DEFAULT: 0 - TEST_{{ target|upper }}: 1 - artifact: binaries -{% endfor %} + ######################## Linux source verification ########################## - verify-rc-jars-amd64: +{% for target in ["cpp", "integration", "python"] %} + verify-rc-source-{{ target }}-linux-conda-amd64: ci: github template: verify-rc/github.linux.amd64.yml params: - env: - TEST_DEFAULT: 0 - TEST_JARS: 1 - artifact: jars + target: {{ target }} + artifact: source + use_conda: True +{% endfor %} -{% for platform, arch, runner in [("macos", "amd64", "macos-10.15")] %} +{% for distribution, version in [("almalinux", "8"), + ("ubuntu", "18.04"), + ("ubuntu", "20.04")] %} {% for target in ["cpp", "csharp", "go", @@ -850,22 +844,29 @@ tasks: "js", "python", "ruby"] %} - - verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}: + verify-rc-source-{{ target }}-linux-{{ distribution }}-{{ version }}-amd64: ci: github - template: verify-rc/github.{{ platform }}.{{ arch }}.yml + template: verify-rc/github.linux.amd64.docker.yml params: - env: - TEST_DEFAULT: 0 - TEST_{{ target|upper }}: 1 - artifact: "source" - github_runner: "{{ runner }}" + target: {{ target }} + distro: {{ distribution }} {% endfor %} + {% endfor %} -{% for distribution, version in [("almalinux", "8"), - ("ubuntu", "18.04"), - ("ubuntu", "20.04")] %} + ######################## macOS source verification ########################## + + {% for target in ["cpp", "integration", "python"] %} + verify-rc-source-{{ target }}-macos-conda-amd64: + ci: github + template: verify-rc/github.macos.amd64.yml + params: + target: {{ target }} + artifact: source + use_conda: True + github_runner: "macos-10.15" + {% endfor %} + {% for target in ["cpp", "csharp", "go", @@ -874,17 +875,15 @@ tasks: "js", "python", "ruby"] %} - - verify-rc-source-{{ target }}-linux-{{ distribution }}-{{ version }}-amd64: + verify-rc-source-{{ target }}-macos-amd64: ci: github - template: verify-rc/github.linux.amd64.docker.yml + template: verify-rc/github.macos.amd64.yml params: target: {{ target }} - distro: {{ distribution }} + artifact: "source" + github_runner: "macos-10.15" {% endfor %} -{% endfor %} -{% for platform, arch, runner in [("macos", "arm64", "self-hosted")] %} {% for target in ["cpp", "csharp", "go", @@ -892,79 +891,84 @@ tasks: "js", "python", "ruby"] %} - - verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}: + verify-rc-source-{{ target }}-macos-arm64: ci: github - template: verify-rc/github.{{ platform }}.{{ arch }}.yml + template: verify-rc/github.macos.arm64.yml params: env: ARROW_FLIGHT: 0 ARROW_GANDIVA: 0 - TEST_DEFAULT: 0 TEST_INTEGRATION_JAVA: 0 - TEST_{{ target|upper }}: 1 PYTEST_ADDOPTS: "-k 'not test_cancellation'" artifact: "source" - github_runner: "{{ runner }}" + github_runner: "self-hosted" {% endfor %} -{% endfor %} - verify-rc-wheels-linux-amd64: + ######################## Windows source verification ######################## + + verify-rc-source-windows: ci: github - template: verify-rc/github.linux.amd64.yml + template: verify-rc/github.win.yml params: env: - TEST_DEFAULT: 0 - artifact: "wheels" + PYARROW_TEST_GDB: "OFF" + script: "verify-release-candidate.bat" - verify-rc-wheels-macos-10.15-amd64: + ######################## Java jars verification ############################# + + verify-rc-jars-amd64: ci: github - template: verify-rc/github.macos.amd64.yml + template: verify-rc/github.linux.amd64.yml params: - github_runner: "macos-10.15" - env: - TEST_DEFAULT: 0 - artifact: "wheels" + artifact: jars - # The github hosted macos-11 runners are in preview only, but should be switched once they are generally available: - # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources - verify-rc-wheels-macos-11-amd64: + ######################## Linux package verification ######################## + + {% for target in ["binary", "yum", "apt"] %} + verify-rc-binaries-{{ target }}-amd64: ci: github - template: verify-rc/github.macos.arm64.yml + template: verify-rc/github.linux.amd64.yml params: - github_runner: "self-hosted" - arch_emulation: "x86_64" - env: - TEST_DEFAULT: 0 - PYTEST_ADDOPTS: "-k 'not test_cancellation'" - artifact: "wheels" + target: {{ target }} + artifact: binaries + {% endfor %} + + ######################### Wheel Verification ################################ - verify-rc-wheels-macos-11-arm64: + verify-rc-wheels-linux-amd64: ci: github - template: verify-rc/github.macos.arm64.yml + template: verify-rc/github.linux.amd64.yml params: - github_runner: "self-hosted" - arch_emulation: "arm64" - env: - TEST_DEFAULT: 0 - PYTEST_ADDOPTS: "-k 'not test_cancellation'" - artifact: "wheels" + artifact: wheels - verify-rc-source-windows: + verify-rc-wheels-windows: ci: github template: verify-rc/github.win.yml params: env: PYARROW_TEST_GDB: "OFF" - script: "verify-release-candidate.bat" + script: "verify-release-candidate-wheels.bat" - verify-rc-wheels-windows: + verify-rc-wheels-macos-10.15-amd64: ci: github - template: verify-rc/github.win.yml + template: verify-rc/github.macos.amd64.yml + params: + github_runner: "macos-10.15" + artifact: "wheels" + + # The github hosted macos-11 runners are in preview only, but should be switched once they are generally available: + # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources + {% for arch, emulation in [("amd64", "x86_64"), ("arm64", "arm64")] %} + verify-rc-wheels-macos-11-{{ arch }}: + ci: github + template: verify-rc/github.macos.arm64.yml params: + arch_emulation: {{ emulation }} env: - PYARROW_TEST_GDB: "OFF" - script: "verify-release-candidate-wheels.bat" + PYTEST_ADDOPTS: "-k 'not test_cancellation'" + artifact: "wheels" + github_runner: "self-hosted" + {% endfor %} {############################## Docker tests #################################} diff --git a/dev/tasks/verify-rc/github.linux.amd64.docker.yml b/dev/tasks/verify-rc/github.linux.amd64.docker.yml index 26d6cff68a4..ee3c009a8fa 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.docker.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.docker.yml @@ -31,7 +31,6 @@ jobs: shell: bash run: | archery docker run \ - -e SOURCE_REPOSITORY="{{ arrow.remote }}" \ -e VERIFY_VERSION="{{ release|default(arrow.head) }}" \ -e VERIFY_RC="{{ rc|default("") }}" \ -e TEST_DEFAULT=0 \ diff --git a/dev/tasks/verify-rc/github.linux.amd64.yml b/dev/tasks/verify-rc/github.linux.amd64.yml index 49069fe6b9b..968185383ef 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.yml @@ -33,6 +33,7 @@ jobs: steps: {{ macros.github_checkout_arrow()|indent }} + {% if not ((use_conda is defined) and use_conda) %} - name: Install System Dependencies run: | # TODO: don't require removing newer llvms @@ -51,28 +52,37 @@ jobs: ninja-build \ qemu-user-static \ wget + {% endif %} - if [ "$TEST_JAVA" = "1" ]; then - # Maven - MAVEN_VERSION=3.6.3 - wget https://downloads.apache.org/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.zip - unzip apache-maven-$MAVEN_VERSION-bin.zip - mkdir -p $HOME/java - mv apache-maven-$MAVEN_VERSION $HOME/java - export PATH=$HOME/java/apache-maven-$MAVEN_VERSION/bin:$PATH - fi + {% if target is defined %} + {% if target == "java" %} + - name: Install Maven + run: | + MAVEN_VERSION=3.6.3 + wget https://downloads.apache.org/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.zip + unzip apache-maven-$MAVEN_VERSION-bin.zip + mkdir -p $HOME/java + mv apache-maven-$MAVEN_VERSION $HOME/java + export PATH=$HOME/java/apache-maven-$MAVEN_VERSION/bin:$PATH + {% elif target == "ruby" %} + - name: Install Bundler + run: | + ruby --version + sudo gem install bundler + {% endif %} + {% endif %} - if [ "$TEST_RUBY" = "1" ]; then - ruby --version - sudo gem install bundler - fi - uses: actions/setup-node@v2-beta with: node-version: '16' + - name: Run verification shell: bash env: - SOURCE_REPOSITORY: {{ arrow.remote }} + TEST_DEFAULT: 0 + {% if target is defined %} + TEST_{{ target|upper }}: 1 + {% endif %} run: | arrow/dev/release/verify-release-candidate.sh \ {{ artifact }} \ diff --git a/dev/tasks/verify-rc/github.macos.amd64.yml b/dev/tasks/verify-rc/github.macos.amd64.yml index 335fc7af5e0..d443d3196c2 100644 --- a/dev/tasks/verify-rc/github.macos.amd64.yml +++ b/dev/tasks/verify-rc/github.macos.amd64.yml @@ -45,7 +45,10 @@ jobs: - name: Run verification shell: bash env: - SOURCE_REPOSITORY: {{ arrow.remote }} + TEST_DEFAULT: 0 + {% if target is defined %} + TEST_{{ target|upper }}: 1 + {% endif %} run: | arrow/dev/release/verify-release-candidate.sh \ {{ artifact }} \ diff --git a/dev/tasks/verify-rc/github.macos.arm64.yml b/dev/tasks/verify-rc/github.macos.arm64.yml index 90414e22a76..9ed9b70be7d 100644 --- a/dev/tasks/verify-rc/github.macos.arm64.yml +++ b/dev/tasks/verify-rc/github.macos.arm64.yml @@ -22,7 +22,7 @@ jobs: verify: name: "Verify release candidate macOS {{ artifact }}" - runs-on: {{ github_runner }} + runs-on: {{ github_runner|default("self-hosted") }} {% if env is defined %} env: {% for key, value in env.items() %} @@ -40,7 +40,10 @@ jobs: - name: Run verification shell: bash env: - SOURCE_REPOSITORY: {{ arrow.remote }} + TEST_DEFAULT: 0 + {% if target is defined %} + TEST_{{ target|upper }}: 1 + {% endif %} run: | export PATH="$(brew --prefix node@16)/bin:$PATH" export PATH="$(brew --prefix ruby)/bin:$PATH" diff --git a/dev/tasks/verify-rc/github.win.yml b/dev/tasks/verify-rc/github.win.yml index 233b5343e6c..60edb74a150 100644 --- a/dev/tasks/verify-rc/github.win.yml +++ b/dev/tasks/verify-rc/github.win.yml @@ -40,6 +40,4 @@ jobs: choco install wget - name: Run verification shell: cmd - env: - SOURCE_REPOSITORY: {{ arrow.remote }} run: arrow/dev/release/{{ script }} {{ release|default(arrow.head) }} {{ rc|default("") }} From 797aba7b3f3da0c34475d78c66698fdb10d30078 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 15:45:48 +0100 Subject: [PATCH 07/47] Support verifying wheels without conda --- dev/release/verify-release-candidate.sh | 34 +++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 948eaca9036..84b616fe1a0 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -704,13 +704,20 @@ test_linux_wheels() { for py_arch in ${py_arches}; do local env=_verify_wheel-${py_arch} - if [ $py_arch = "3.10" ]; then - local channels="-c conda-forge -c defaults" + + if [ "${USE_CONDA}" -gt 0 ]; then + mamba create -yq -n ${env} python=${py_arch//[mu]/} + conda activate ${env} + elif [ command -v "python${py_ver}" ]; then + local venv="${ARROW_TMPDIR}/test-virtualenv" + local python="python${py_ver}" + $python -m virtualenv $venv + source $venv/bin/activate else - local channels="-c conda-forge" + echo "Couldn't locate python interpreter with version ${py_arch}" + echo "Call the script with USE_CONDA=1 to test all of the python versions." fi - mamba create -yq -n ${env} ${channels} python=${py_arch//[mu]/} - conda activate ${env} + pip install -U pip for tag in ${platform_tags}; do @@ -744,13 +751,20 @@ test_macos_wheels() { # verify arch-native wheels inside an arch-native conda environment for py_arch in ${py_arches}; do local env=_verify_wheel-${py_arch} - if [ $py_arch = "3.10" ]; then - local channels="-c conda-forge -c defaults" + + if [ "${USE_CONDA}" -gt 0 ]; then + mamba create -yq -n ${env} python=${py_arch//m/} + conda activate ${env} + elif [ command -v "python${py_ver}" ]; then + local venv="${ARROW_TMPDIR}/test-virtualenv" + local python="python${py_ver}" + $python -m virtualenv $venv + source $venv/bin/activate else - local channels="-c conda-forge" + echo "Couldn't locate python interpreter with version ${py_arch}" + echo "Call the script with USE_CONDA=1 to test all of the python versions." fi - mamba create -yq -n ${env} ${channels} python=${py_arch//m/} - conda activate ${env} + pip install -U pip # check the mandatory and optional imports From f777590a84c1aeb3b7f769dd2b34f2f21f49d883 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 15:49:51 +0100 Subject: [PATCH 08/47] Support verifying wheels without conda --- dev/release/verify-release-candidate.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 84b616fe1a0..e954858d16b 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -708,6 +708,10 @@ test_linux_wheels() { if [ "${USE_CONDA}" -gt 0 ]; then mamba create -yq -n ${env} python=${py_arch//[mu]/} conda activate ${env} + elif [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment before running the verification script." + exit 1 elif [ command -v "python${py_ver}" ]; then local venv="${ARROW_TMPDIR}/test-virtualenv" local python="python${py_ver}" @@ -716,6 +720,7 @@ test_linux_wheels() { else echo "Couldn't locate python interpreter with version ${py_arch}" echo "Call the script with USE_CONDA=1 to test all of the python versions." + continue fi pip install -U pip @@ -755,6 +760,10 @@ test_macos_wheels() { if [ "${USE_CONDA}" -gt 0 ]; then mamba create -yq -n ${env} python=${py_arch//m/} conda activate ${env} + elif [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment before running the verification script." + exit 1 elif [ command -v "python${py_ver}" ]; then local venv="${ARROW_TMPDIR}/test-virtualenv" local python="python${py_ver}" @@ -763,6 +772,7 @@ test_macos_wheels() { else echo "Couldn't locate python interpreter with version ${py_arch}" echo "Call the script with USE_CONDA=1 to test all of the python versions." + continue fi pip install -U pip @@ -875,6 +885,7 @@ case "${ARTIFACT}" in ;; wheels) TEST_WHEELS=1 + USE_CONDA=1 ;; jars) TEST_JARS=1 From 8ef749b8b6e6b3c7fbc6d3208e17ccb8029c5ce0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 16:59:58 +0100 Subject: [PATCH 09/47] Use newer numpy --- dev/release/verify-release-candidate.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index e954858d16b..0db427f8a81 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -299,7 +299,7 @@ test_and_install_cpp() { ${PYTHON:-python3} -m venv venv source venv/bin/activate # Install build dependencies (numpy is required here) - pip install -r ${ARROW_DIR}/python/requirements-build.txt + pip install numpy fi mkdir -p cpp/build From afdd0c2fe0e18b34d3da8bc5b7111f87d30999a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 17:04:02 +0100 Subject: [PATCH 10/47] Use absolute path to conda env files on windows --- dev/release/verify-release-candidate.bat | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat index e6b6940e213..7170a68572f 100644 --- a/dev/release/verify-release-candidate.bat +++ b/dev/release/verify-release-candidate.bat @@ -59,8 +59,8 @@ set PYTHON=3.8 @rem Using call with conda.bat seems necessary to avoid terminating the batch @rem script execution call conda create --no-shortcuts -c conda-forge -f -q -y -p %_VERIFICATION_CONDA_ENV% ^ - --file=ci\conda_env_cpp.txt ^ - --file=ci\conda_env_python.txt ^ + --file=%ARROW_SOURCE%\ci\conda_env_cpp.txt ^ + --file=%ARROW_SOURCE%\ci\conda_env_python.txt ^ git ^ python=%PYTHON% ^ || exit /B 1 From 3f473dc0f1587b9bfed7beecb75450b0e0be8198 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 17:12:44 +0100 Subject: [PATCH 11/47] Install virtualenv --- dev/release/verify-release-candidate.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 0db427f8a81..97e7c8eed41 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -296,7 +296,8 @@ test_and_install_cpp() { else DEFAULT_DEPENDENCY_SOURCE="AUTO" # Create a python virtualenv - ${PYTHON:-python3} -m venv venv + ${PYTHON:-python3} -m pip install virtualenv + ${PYTHON:-python3} -m virtualenv venv source venv/bin/activate # Install build dependencies (numpy is required here) pip install numpy @@ -744,7 +745,7 @@ test_macos_wheels() { local check_flight=ON # macOS version <= 10.13 - if [ $(echo "${macos_short_version}\n10.14" | sort -V | head -n1) == "${macos_short_version}" ]; then + if [ $(echo "${macos_short_version}\n10.14" | sort | head -n1) == "${macos_short_version}" ]; then local check_s3=OFF fi # apple silicon processor From 280b98761f8b06c91d294d809af093fdc536a5d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 17:15:38 +0100 Subject: [PATCH 12/47] Fetch full history --- dev/tasks/tasks.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 6aaa91194e2..1770881c919 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -850,6 +850,7 @@ tasks: params: target: {{ target }} distro: {{ distribution }} + fetch_depth: 0 {% endfor %} {% endfor %} @@ -882,6 +883,7 @@ tasks: target: {{ target }} artifact: "source" github_runner: "macos-10.15" + fetch_depth: 0 {% endfor %} {% for target in ["cpp", @@ -902,6 +904,7 @@ tasks: PYTEST_ADDOPTS: "-k 'not test_cancellation'" artifact: "source" github_runner: "self-hosted" + fetch_depth: 0 {% endfor %} ######################## Windows source verification ######################## From c5252192de0c0267fda2378698d9458c76d7596f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 22:02:49 +0100 Subject: [PATCH 13/47] No delayeed expansion in batch script --- dev/release/verify-release-candidate.bat | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat index 7170a68572f..f1f7afc68bd 100644 --- a/dev/release/verify-release-candidate.bat +++ b/dev/release/verify-release-candidate.bat @@ -35,8 +35,11 @@ set RC_NUMBER=%2 if "%RC_NUMBER%"=="" ( @rem verify a specific git revision - if "%SOURCE_REPOSITORY%"=="" set SOURCE_REPOSITORY="%~dp0..\..\" - git clone --recurse-submodules %SOURCE_REPOSITORY% %ARROW_SOURCE% + if "%SOURCE_REPOSITORY%"=="" ( + git clone --recurse-submodules "%~dp0..\..\" %ARROW_SOURCE% + ) else ( + git clone --recurse-submodules %SOURCE_REPOSITORY% %ARROW_SOURCE% + ) git -C %ARROW_SOURCE% checkout %VERSION% ) else ( @rem verify a release candidate tarball From e51b07849ad38d2e749e3e532211db2d20c98a5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 22:05:55 +0100 Subject: [PATCH 14/47] Install python build dependencies --- dev/release/verify-release-candidate.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 97e7c8eed41..59ddb6234cc 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -413,10 +413,10 @@ test_python() { echo "Conda environment is active despite that USE_CONDA is set to 0." echo "Deactivate the environment before running the verification script." exit 1 + else + pip install -r python/requirements-build.txt fi - pushd python - export PYARROW_PARALLEL=$NPROC export PYARROW_WITH_DATASET=1 export PYARROW_WITH_PARQUET=1 @@ -431,6 +431,8 @@ test_python() { export PYARROW_WITH_GANDIVA=1 fi + pushd python + # Build pyarrow python setup.py build_ext --inplace From 726047307056024abf36d43041fe6b0768422b38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 22:17:10 +0100 Subject: [PATCH 15/47] Porperly set use_conda --- dev/tasks/verify-rc/github.linux.amd64.yml | 7 ++++++- dev/tasks/verify-rc/github.macos.amd64.yml | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/dev/tasks/verify-rc/github.linux.amd64.yml b/dev/tasks/verify-rc/github.linux.amd64.yml index 968185383ef..0a01e3b2ede 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.yml @@ -19,6 +19,8 @@ {{ macros.github_header() }} +{% set use_conda = use_conda|default(False) %} + jobs: verify: name: "Verify release candidate Ubuntu {{ artifact }}" @@ -33,7 +35,7 @@ jobs: steps: {{ macros.github_checkout_arrow()|indent }} - {% if not ((use_conda is defined) and use_conda) %} + {% if not use_conda %} - name: Install System Dependencies run: | # TODO: don't require removing newer llvms @@ -83,6 +85,9 @@ jobs: {% if target is defined %} TEST_{{ target|upper }}: 1 {% endif %} + {% if use_conda %} + USE_CONDA: 1 + {% endif %} run: | arrow/dev/release/verify-release-candidate.sh \ {{ artifact }} \ diff --git a/dev/tasks/verify-rc/github.macos.amd64.yml b/dev/tasks/verify-rc/github.macos.amd64.yml index d443d3196c2..215cbf06896 100644 --- a/dev/tasks/verify-rc/github.macos.amd64.yml +++ b/dev/tasks/verify-rc/github.macos.amd64.yml @@ -19,6 +19,8 @@ {{ macros.github_header() }} +{% set use_conda = use_conda|default(False) %} + jobs: verify: name: "Verify release candidate macOS {{ artifact }}" @@ -49,6 +51,9 @@ jobs: {% if target is defined %} TEST_{{ target|upper }}: 1 {% endif %} + {% if use_conda %} + USE_CONDA: 1 + {% endif %} run: | arrow/dev/release/verify-release-candidate.sh \ {{ artifact }} \ From f7ae95e95df696816a83556d0328fd2e90aa2cac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 22:27:15 +0100 Subject: [PATCH 16/47] Try to specify file:// protocol on windows --- dev/release/verify-release-candidate.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat index f1f7afc68bd..b7a25220780 100644 --- a/dev/release/verify-release-candidate.bat +++ b/dev/release/verify-release-candidate.bat @@ -36,7 +36,7 @@ set RC_NUMBER=%2 if "%RC_NUMBER%"=="" ( @rem verify a specific git revision if "%SOURCE_REPOSITORY%"=="" ( - git clone --recurse-submodules "%~dp0..\..\" %ARROW_SOURCE% + git clone --recurse-submodules "file://%~dp0..\..\" %ARROW_SOURCE% ) else ( git clone --recurse-submodules %SOURCE_REPOSITORY% %ARROW_SOURCE% ) From e79c6573b0b1f517991c3c9075223a5b7b1a64a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 22:28:59 +0100 Subject: [PATCH 17/47] Missing orc --- dev/release/verify-release-candidate.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 59ddb6234cc..a8eb292da97 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -288,7 +288,8 @@ test_package_java() { test_and_install_cpp() { if [ "${USE_CONDA}" -gt 0 ]; then DEFAULT_DEPENDENCY_SOURCE="CONDA" - mamba install -y --file ci/conda_env_cpp.txt + # TODO(kszucs): we should define orc in the conda_env_cpp.txt file + mamba install -y --file ci/conda_env_cpp.txt orc elif [ ! -z ${CONDA_PREFIX} ]; then echo "Conda environment is active despite that USE_CONDA is set to 0." echo "Deactivate the environment before running the verification script." From 29bd97e9abd5183a472808ae4f45d2a649d3040d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 22:33:28 +0100 Subject: [PATCH 18/47] Set fetch-depth for every java related build --- dev/tasks/tasks.yml | 3 --- dev/tasks/verify-rc/github.linux.amd64.docker.yml | 2 +- dev/tasks/verify-rc/github.linux.amd64.yml | 2 +- dev/tasks/verify-rc/github.macos.amd64.yml | 2 +- dev/tasks/verify-rc/github.macos.arm64.yml | 2 +- dev/tasks/verify-rc/github.win.yml | 2 +- 6 files changed, 5 insertions(+), 8 deletions(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 1770881c919..6aaa91194e2 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -850,7 +850,6 @@ tasks: params: target: {{ target }} distro: {{ distribution }} - fetch_depth: 0 {% endfor %} {% endfor %} @@ -883,7 +882,6 @@ tasks: target: {{ target }} artifact: "source" github_runner: "macos-10.15" - fetch_depth: 0 {% endfor %} {% for target in ["cpp", @@ -904,7 +902,6 @@ tasks: PYTEST_ADDOPTS: "-k 'not test_cancellation'" artifact: "source" github_runner: "self-hosted" - fetch_depth: 0 {% endfor %} ######################## Windows source verification ######################## diff --git a/dev/tasks/verify-rc/github.linux.amd64.docker.yml b/dev/tasks/verify-rc/github.linux.amd64.docker.yml index ee3c009a8fa..d4cfd68f1c9 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.docker.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.docker.yml @@ -24,7 +24,7 @@ jobs: name: "Verify release candidate {{ distro }} source" runs-on: ubuntu-latest steps: - {{ macros.github_checkout_arrow(fetch_depth=fetch_depth if fetch_depth is defined else 1)|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} {{ macros.github_install_archery()|indent }} - name: Execute Docker Build diff --git a/dev/tasks/verify-rc/github.linux.amd64.yml b/dev/tasks/verify-rc/github.linux.amd64.yml index 0a01e3b2ede..fb5a30772a9 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.yml @@ -33,7 +33,7 @@ jobs: {% endif %} steps: - {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} {% if not use_conda %} - name: Install System Dependencies diff --git a/dev/tasks/verify-rc/github.macos.amd64.yml b/dev/tasks/verify-rc/github.macos.amd64.yml index 215cbf06896..e8bbee289b5 100644 --- a/dev/tasks/verify-rc/github.macos.amd64.yml +++ b/dev/tasks/verify-rc/github.macos.amd64.yml @@ -33,7 +33,7 @@ jobs: {% endif %} steps: - {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} - name: Install System Dependencies shell: bash diff --git a/dev/tasks/verify-rc/github.macos.arm64.yml b/dev/tasks/verify-rc/github.macos.arm64.yml index 9ed9b70be7d..f4b59bed895 100644 --- a/dev/tasks/verify-rc/github.macos.arm64.yml +++ b/dev/tasks/verify-rc/github.macos.arm64.yml @@ -35,7 +35,7 @@ jobs: shell: bash run: rm -rf arrow - {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} - name: Run verification shell: bash diff --git a/dev/tasks/verify-rc/github.win.yml b/dev/tasks/verify-rc/github.win.yml index 60edb74a150..146f540aa04 100644 --- a/dev/tasks/verify-rc/github.win.yml +++ b/dev/tasks/verify-rc/github.win.yml @@ -31,7 +31,7 @@ jobs: {% endif %} steps: - {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} - uses: conda-incubator/setup-miniconda@v2 - name: Install System Dependencies From ddeaaa1ab10fcffb3195afb1667251dd1df584e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 22:35:50 +0100 Subject: [PATCH 19/47] Install numpy and sqlite in the conda build --- dev/release/verify-release-candidate.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index a8eb292da97..9da2a264f39 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -288,8 +288,8 @@ test_package_java() { test_and_install_cpp() { if [ "${USE_CONDA}" -gt 0 ]; then DEFAULT_DEPENDENCY_SOURCE="CONDA" - # TODO(kszucs): we should define orc in the conda_env_cpp.txt file - mamba install -y --file ci/conda_env_cpp.txt orc + # TODO(kszucs): we should define orc and sqlite in the conda_env_cpp.txt file + mamba install -y --file ci/conda_env_cpp.txt numpy orc sqlite elif [ ! -z ${CONDA_PREFIX} ]; then echo "Conda environment is active despite that USE_CONDA is set to 0." echo "Deactivate the environment before running the verification script." From a20f197258b890b92d6e2efb9816decaaa5a5f62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 22:37:27 +0100 Subject: [PATCH 20/47] Don't install system dependencies for macos conda build --- dev/tasks/verify-rc/github.macos.amd64.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dev/tasks/verify-rc/github.macos.amd64.yml b/dev/tasks/verify-rc/github.macos.amd64.yml index e8bbee289b5..67ce49c9dd8 100644 --- a/dev/tasks/verify-rc/github.macos.amd64.yml +++ b/dev/tasks/verify-rc/github.macos.amd64.yml @@ -35,15 +35,19 @@ jobs: steps: {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} + {% if not use_conda %} - name: Install System Dependencies shell: bash run: | brew update brew bundle --file=arrow/cpp/Brewfile brew bundle --file=arrow/c_glib/Brewfile + {% endif %} + - uses: actions/setup-node@v2-beta with: node-version: '16' + - name: Run verification shell: bash env: From f1062c920fdffc88928c0f531576af399e2147cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 22:46:45 +0100 Subject: [PATCH 21/47] Path fu for windows --- dev/release/verify-release-candidate.bat | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat index b7a25220780..c8c1f15dca8 100644 --- a/dev/release/verify-release-candidate.bat +++ b/dev/release/verify-release-candidate.bat @@ -36,11 +36,13 @@ set RC_NUMBER=%2 if "%RC_NUMBER%"=="" ( @rem verify a specific git revision if "%SOURCE_REPOSITORY%"=="" ( - git clone --recurse-submodules "file://%~dp0..\..\" %ARROW_SOURCE% + pushd "%~dp0..\..\" ) else ( - git clone --recurse-submodules %SOURCE_REPOSITORY% %ARROW_SOURCE% + pushd %SOURCE_REPOSITORY% ) + git clone --recurse-submodules . %ARROW_SOURCE% git -C %ARROW_SOURCE% checkout %VERSION% + popd ) else ( @rem verify a release candidate tarball @rem Requires GNU Wget for Windows From 07483d3d989ef0f9d956550afb0ba2833f0edee8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 22:51:55 +0100 Subject: [PATCH 22/47] Force activate arrow-test --- dev/release/verify-release-candidate.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 9da2a264f39..900a0a2f2d7 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -289,6 +289,7 @@ test_and_install_cpp() { if [ "${USE_CONDA}" -gt 0 ]; then DEFAULT_DEPENDENCY_SOURCE="CONDA" # TODO(kszucs): we should define orc and sqlite in the conda_env_cpp.txt file + conda activate arrow-test mamba install -y --file ci/conda_env_cpp.txt numpy orc sqlite elif [ ! -z ${CONDA_PREFIX} ]; then echo "Conda environment is active despite that USE_CONDA is set to 0." @@ -409,6 +410,7 @@ test_csharp() { # Build and test Python test_python() { if [ "${USE_CONDA}" -gt 0 ]; then + conda activate arrow-test mamba install -y --file ci/conda_env_python.txt elif [ ! -z ${CONDA_PREFIX} ]; then echo "Conda environment is active despite that USE_CONDA is set to 0." From c2d54311d6221369edc4ca7d62c39c7317caf068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 28 Jan 2022 23:57:53 +0100 Subject: [PATCH 23/47] Fix parquet and testing data paths on windows --- dev/release/verify-release-candidate.bat | 14 +++++------ dev/release/verify-release-candidate.sh | 32 ++++++++++++++++++------ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat index c8c1f15dca8..dfb874d722c 100644 --- a/dev/release/verify-release-candidate.bat +++ b/dev/release/verify-release-candidate.bat @@ -40,7 +40,7 @@ if "%RC_NUMBER%"=="" ( ) else ( pushd %SOURCE_REPOSITORY% ) - git clone --recurse-submodules . %ARROW_SOURCE% + git clone . %ARROW_SOURCE% git -C %ARROW_SOURCE% checkout %VERSION% popd ) else ( @@ -50,14 +50,14 @@ if "%RC_NUMBER%"=="" ( set TARBALL_URL=https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-%VERSION%-rc%RC_NUMBER%/%TARBALL_NAME% wget --no-check-certificate -O %TARBALL_NAME% %TARBALL_URL% || exit /B 1 tar xf %TARBALL_NAME% -C %_VERIFICATION_DIR_UNIX% +) - @rem Get testing datasets for Parquet unit tests - git clone https://github.com/apache/parquet-testing.git %_VERIFICATION_DIR%\parquet-testing - set PARQUET_TEST_DATA=%_VERIFICATION_DIR%\parquet-testing\data + @rem Get testing datasets for Parquet unit tests +git clone https://github.com/apache/parquet-testing.git %_VERIFICATION_DIR%\parquet-testing +set PARQUET_TEST_DATA=%_VERIFICATION_DIR%\parquet-testing\data - git clone https://github.com/apache/arrow-testing.git %_VERIFICATION_DIR%\arrow-testing - set ARROW_TEST_DATA=%_VERIFICATION_DIR%\arrow-testing\data -) +git clone https://github.com/apache/arrow-testing.git %_VERIFICATION_DIR%\arrow-testing +set ARROW_TEST_DATA=%_VERIFICATION_DIR%\arrow-testing\data set PYTHON=3.8 diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 900a0a2f2d7..25ac000e1f4 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -259,7 +259,7 @@ setup_conda() { if [ ! -d "${MINICONDA}" ]; then # Setup miniconda only if the directory doesn't exist yet - wget -O miniconda.sh $MINICONDA_URL + wget -q -O miniconda.sh $MINICONDA_URL bash miniconda.sh -b -p $MINICONDA rm -f miniconda.sh fi @@ -268,8 +268,8 @@ setup_conda() { . $MINICONDA/etc/profile.d/conda.sh conda activate base mamba create -n arrow-test -y - conda activate arrow-test - echo "Using conda environment ${CONDA_PREFIX}" + echo "Created conda environment ${CONDA_PREFIX}" + conda deactivate } # Build and test Java (Requires newer Maven -- I used 3.3.9) @@ -346,6 +346,12 @@ ${ARROW_CMAKE_OPTIONS:-} --output-on-failure \ -L unittest popd + + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + else + deactivate + fi } test_csharp() { @@ -417,6 +423,7 @@ test_python() { echo "Deactivate the environment before running the verification script." exit 1 else + source venv/bin/activate pip install -r python/requirements-build.txt fi @@ -469,8 +476,11 @@ import pyarrow.plasma # Execute pyarrow unittests pytest pyarrow -v --pdb - # Deactivate virtualenv - deactivate + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + else + deactivate + fi popd } @@ -737,7 +747,11 @@ test_linux_wheels() { INSTALL_PYARROW=OFF ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR} done - conda deactivate + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + else + deactivate + fi done } @@ -788,7 +802,11 @@ test_macos_wheels() { INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \ ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR} - conda deactivate + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + else + deactivate + fi done # verify arm64 and universal2 wheels using an universal2 python binary From daf0d066047e0628591f40a4a79636c930f2b450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Sat, 29 Jan 2022 09:24:06 +0100 Subject: [PATCH 24/47] Ncurses and numpy version --- dev/release/verify-release-candidate.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 25ac000e1f4..f2611044128 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -290,7 +290,13 @@ test_and_install_cpp() { DEFAULT_DEPENDENCY_SOURCE="CONDA" # TODO(kszucs): we should define orc and sqlite in the conda_env_cpp.txt file conda activate arrow-test - mamba install -y --file ci/conda_env_cpp.txt numpy orc sqlite + mamba install -y \ + --file ci/conda_env_cpp.txt \ + --file ci/conda_env_gandiva.txt \ + --file ci/conda_env_unix.txt \ + ncurses \ + numpy \ + sqlite elif [ ! -z ${CONDA_PREFIX} ]; then echo "Conda environment is active despite that USE_CONDA is set to 0." echo "Deactivate the environment before running the verification script." @@ -424,7 +430,7 @@ test_python() { exit 1 else source venv/bin/activate - pip install -r python/requirements-build.txt + pip install cython numpy setuptools_scm setuptools fi export PYARROW_PARALLEL=$NPROC From 7c66ac17df829e420d1e3619ff8b74a37d5fa7da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Sat, 29 Jan 2022 09:27:58 +0100 Subject: [PATCH 25/47] Activate env for integration testing --- dev/release/verify-release-candidate.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index f2611044128..be0b6d00f86 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -599,6 +599,16 @@ test_go() { # Run integration tests test_integration() { + if [ "${USE_CONDA}" -gt 0 ]; then + conda activat arrow-test + elif [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment before running the verification script." + exit 1 + else + source venv/bin/activate + fi + JAVA_DIR=$PWD/java CPP_BUILD_DIR=$PWD/cpp/build @@ -622,6 +632,12 @@ test_integration() { --with-js=${TEST_INTEGRATION_JS} \ --with-go=${TEST_INTEGRATION_GO} \ $INTEGRATION_TEST_ARGS + + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + else + deactivate + fi } ensure_source_directory() { From 3a9222dd7e5436402363875975eeb7eb60d8de03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Sat, 29 Jan 2022 10:14:13 +0100 Subject: [PATCH 26/47] Update gandiva --- ci/conda_env_gandiva.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/conda_env_gandiva.txt b/ci/conda_env_gandiva.txt index 024b9fe74c1..6dab1848820 100644 --- a/ci/conda_env_gandiva.txt +++ b/ci/conda_env_gandiva.txt @@ -15,5 +15,5 @@ # specific language governing permissions and limitations # under the License. -clang=11 -llvmdev=11 +clang=12 +llvmdev=12 From 390cacf1fb737b6c6b05d79c5fb11d90b6faed52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Sat, 29 Jan 2022 11:35:59 +0100 Subject: [PATCH 27/47] Env for meson --- dev/release/verify-release-candidate.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index be0b6d00f86..73428e20735 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -286,6 +286,7 @@ test_package_java() { # Build and test C++ test_and_install_cpp() { + # TODO(kszucs): factor out to functions if [ "${USE_CONDA}" -gt 0 ]; then DEFAULT_DEPENDENCY_SOURCE="CONDA" # TODO(kszucs): we should define orc and sqlite in the conda_env_cpp.txt file @@ -494,6 +495,18 @@ import pyarrow.plasma test_glib() { pushd c_glib + if [ "${USE_CONDA}" -gt 0 ]; then + conda activate arrow-test + mamba install -y meson + elif [ ! -z ${CONDA_PREFIX} ]; then + echo "Conda environment is active despite that USE_CONDA is set to 0." + echo "Deactivate the environment before running the verification script." + exit 1 + else + source venv/bin/activate + pip install meson + fi + pip install meson meson build --prefix=$ARROW_HOME --libdir=lib @@ -510,6 +523,12 @@ test_glib() { bundle install bundle exec ruby test/run-test.rb + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + else + deactivate + fi + popd } From 9d62e5e92e38958359a3f600a26cd8c963e6cac6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 31 Jan 2022 14:17:41 +0100 Subject: [PATCH 28/47] Fix import --- dev/release/verify-release-candidate.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 73428e20735..ef4cd7608b3 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -295,6 +295,7 @@ test_and_install_cpp() { --file ci/conda_env_cpp.txt \ --file ci/conda_env_gandiva.txt \ --file ci/conda_env_unix.txt \ + compilers \ ncurses \ numpy \ sqlite @@ -456,9 +457,9 @@ test_python() { # Check mandatory and optional imports python -c " import pyarrow -import pyarrow._s3 import pyarrow._gcs import pyarrow._hdfs +import pyarrow._s3fs import pyarrow.csv import pyarrow.dataset import pyarrow.fs From 7a74cf6574f46191a2a4f3150584bc623a95d93a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 31 Jan 2022 14:19:15 +0100 Subject: [PATCH 29/47] Fix venv path --- dev/release/verify-release-candidate.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index ef4cd7608b3..84ea3dd03a7 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -494,8 +494,6 @@ import pyarrow.plasma } test_glib() { - pushd c_glib - if [ "${USE_CONDA}" -gt 0 ]; then conda activate arrow-test mamba install -y meson @@ -508,6 +506,8 @@ test_glib() { pip install meson fi + pushd c_glib + pip install meson meson build --prefix=$ARROW_HOME --libdir=lib From 411f2ba2fd61305b21e6c38fc1f614a5b0febe5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 21 Jan 2022 09:28:14 +0100 Subject: [PATCH 30/47] ARROW-15392: [JS] Disable flaky javascript unittest --- js/test/unit/generated-data-tests.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/test/unit/generated-data-tests.ts b/js/test/unit/generated-data-tests.ts index 90cf0d598aa..948b7af7065 100644 --- a/js/test/unit/generated-data-tests.ts +++ b/js/test/unit/generated-data-tests.ts @@ -54,7 +54,7 @@ describe('Generated Test Data', () => { describe('List', () => { validateVector(generate.list()); }); describe('Struct', () => { validateVector(generate.struct()); }); describe('DenseUnion', () => { validateVector(generate.denseUnion()); }); - describe('SparseUnion', () => { validateVector(generate.sparseUnion()); }); + // describe('SparseUnion', () => { validateVector(generate.sparseUnion()); }); describe('Dictionary', () => { validateVector(generate.dictionary()); }); describe('IntervalDayTime', () => { validateVector(generate.intervalDayTime()); }); describe('IntervalYearMonth', () => { validateVector(generate.intervalYearMonth()); }); From 7babddecd4ba481b3ed318238cc2ba8e7da8ade9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 31 Jan 2022 14:36:52 +0100 Subject: [PATCH 31/47] Activate base env before calling mamba --- dev/release/verify-release-candidate.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 84ea3dd03a7..3b2a8b3e588 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -290,8 +290,8 @@ test_and_install_cpp() { if [ "${USE_CONDA}" -gt 0 ]; then DEFAULT_DEPENDENCY_SOURCE="CONDA" # TODO(kszucs): we should define orc and sqlite in the conda_env_cpp.txt file - conda activate arrow-test - mamba install -y \ + conda activate base + mamba install -y -n arrow-test \ --file ci/conda_env_cpp.txt \ --file ci/conda_env_gandiva.txt \ --file ci/conda_env_unix.txt \ @@ -299,6 +299,7 @@ test_and_install_cpp() { ncurses \ numpy \ sqlite + conda activate arrow-test elif [ ! -z ${CONDA_PREFIX} ]; then echo "Conda environment is active despite that USE_CONDA is set to 0." echo "Deactivate the environment before running the verification script." From efe943c47a48fe2b1aaa86508dfef9510a6cee49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 31 Jan 2022 15:45:11 +0100 Subject: [PATCH 32/47] No _gcs --- dev/release/verify-release-candidate.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 3b2a8b3e588..063511f3722 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -295,7 +295,6 @@ test_and_install_cpp() { --file ci/conda_env_cpp.txt \ --file ci/conda_env_gandiva.txt \ --file ci/conda_env_unix.txt \ - compilers \ ncurses \ numpy \ sqlite @@ -458,7 +457,6 @@ test_python() { # Check mandatory and optional imports python -c " import pyarrow -import pyarrow._gcs import pyarrow._hdfs import pyarrow._s3fs import pyarrow.csv From 423b6b6163eeeb68632f6c118ccb544d278b00f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 31 Jan 2022 16:08:37 +0100 Subject: [PATCH 33/47] System deps --- dev/tasks/verify-rc/github.linux.amd64.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/dev/tasks/verify-rc/github.linux.amd64.yml b/dev/tasks/verify-rc/github.linux.amd64.yml index fb5a30772a9..6ef2ae86561 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.yml @@ -35,11 +35,8 @@ jobs: steps: {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} - {% if not use_conda %} - name: Install System Dependencies run: | - # TODO: don't require removing newer llvms - sudo apt-get --purge remove -y llvm-9 clang-9 sudo apt-get update -y sudo apt-get install -y \ autoconf-archive \ @@ -48,13 +45,8 @@ jobs: curl \ flex \ gtk-doc-tools \ - jq \ - libboost-all-dev \ libgirepository1.0-dev \ - ninja-build \ - qemu-user-static \ wget - {% endif %} {% if target is defined %} {% if target == "java" %} From 65d87bf70f592df214fb13a56d655c2d5cc70e6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 31 Jan 2022 16:15:00 +0100 Subject: [PATCH 34/47] Ruby and maven --- dev/tasks/verify-rc/github.linux.amd64.yml | 30 +++++++++------------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/dev/tasks/verify-rc/github.linux.amd64.yml b/dev/tasks/verify-rc/github.linux.amd64.yml index 6ef2ae86561..e2b6028ca2d 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.yml @@ -42,31 +42,25 @@ jobs: autoconf-archive \ binfmt-support \ bison \ + build-essential \ curl \ flex \ gtk-doc-tools \ libgirepository1.0-dev \ wget - {% if target is defined %} - {% if target == "java" %} - - name: Install Maven - run: | - MAVEN_VERSION=3.6.3 - wget https://downloads.apache.org/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.zip - unzip apache-maven-$MAVEN_VERSION-bin.zip - mkdir -p $HOME/java - mv apache-maven-$MAVEN_VERSION $HOME/java - export PATH=$HOME/java/apache-maven-$MAVEN_VERSION/bin:$PATH - {% elif target == "ruby" %} - - name: Install Bundler - run: | - ruby --version - sudo gem install bundler - {% endif %} - {% endif %} + - name: Setup Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: 3.1 + + - name: Setup Maven Action + uses: s4u/setup-maven-action@v1 + with: + java-version: 8 + maven-version: 3.5.4 - - uses: actions/setup-node@v2-beta + - uses: actions/setup-node@v2 with: node-version: '16' From ee3c552896787a93b78832b561d7fb45bb20a4d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 31 Jan 2022 16:41:39 +0100 Subject: [PATCH 35/47] Fix maven action's version --- dev/tasks/verify-rc/github.linux.amd64.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/verify-rc/github.linux.amd64.yml b/dev/tasks/verify-rc/github.linux.amd64.yml index e2b6028ca2d..2cce9392a20 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.yml @@ -55,7 +55,7 @@ jobs: ruby-version: 3.1 - name: Setup Maven Action - uses: s4u/setup-maven-action@v1 + uses: s4u/setup-maven-action@v1.3.1 with: java-version: 8 maven-version: 3.5.4 From e33288980a27413650acb968237bf9dac39c56e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 31 Jan 2022 22:42:29 +0100 Subject: [PATCH 36/47] Setup java --- dev/tasks/verify-rc/github.linux.amd64.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dev/tasks/verify-rc/github.linux.amd64.yml b/dev/tasks/verify-rc/github.linux.amd64.yml index 2cce9392a20..791296a9cce 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.yml @@ -54,11 +54,10 @@ jobs: with: ruby-version: 3.1 - - name: Setup Maven Action - uses: s4u/setup-maven-action@v1.3.1 + - uses: actions/setup-java@v2 with: - java-version: 8 - maven-version: 3.5.4 + distribution: 'temurin' + java-version: '11' - uses: actions/setup-node@v2 with: From aebb2bc2eca17eee1238483b01e7df69db79a9f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 1 Feb 2022 01:22:14 +0100 Subject: [PATCH 37/47] Docker image for conda testing --- dev/release/verify-release-candidate.sh | 23 ++++++++++++++--- dev/tasks/tasks.yml | 5 ++-- docker-compose.yml | 34 ++++++++++++++++++++++--- 3 files changed, 52 insertions(+), 10 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 063511f3722..cfd7eb35dd1 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -272,15 +272,28 @@ setup_conda() { conda deactivate } +# setup_conda_env() {} +# setup_virtual_env() {} + # Build and test Java (Requires newer Maven -- I used 3.3.9) test_package_java() { + if [ "${USE_CONDA}" -gt 0 ]; then + conda activate base + mamba install -y -n arrow-test maven + conda activate arrow-test + fi + pushd java mvn test mvn package popd + + if [ "${USE_CONDA}" -gt 0 ]; then + conda deactivate + fi } # Build and test C++ @@ -297,7 +310,8 @@ test_and_install_cpp() { --file ci/conda_env_unix.txt \ ncurses \ numpy \ - sqlite + sqlite \ + compilers conda activate arrow-test elif [ ! -z ${CONDA_PREFIX} ]; then echo "Conda environment is active despite that USE_CONDA is set to 0." @@ -343,8 +357,7 @@ ${ARROW_CMAKE_OPTIONS:-} -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-$DEFAULT_DEPENDENCY_SOURCE} " cmake $ARROW_CMAKE_OPTIONS .. - - make -j$NPROC install + cmake --build . --target install # TODO: ARROW-5036: plasma-serialization_tests broken # TODO: ARROW-5054: libgtest.so link failure in flight-server-test @@ -662,7 +675,9 @@ test_integration() { ensure_source_directory() { dist_name="apache-arrow-${VERSION}" if [ "${SOURCE_KIND}" = "git" ]; then - git clone --recurse-submodules ${SOURCE_REPOSITORY:-"${SOURCE_DIR}/../.."} arrow + if [ ! -d "arrow" ]; then + git clone --recurse-submodules ${SOURCE_REPOSITORY:-"${SOURCE_DIR}/../.."} arrow + fi pushd arrow git checkout ${VERSION} else diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 6aaa91194e2..4b9744ea373 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -826,11 +826,10 @@ tasks: {% for target in ["cpp", "integration", "python"] %} verify-rc-source-{{ target }}-linux-conda-amd64: ci: github - template: verify-rc/github.linux.amd64.yml + template: verify-rc/github.linux.amd64.docker.yml params: target: {{ target }} - artifact: source - use_conda: True + distro: conda {% endfor %} {% for distribution, version in [("almalinux", "8"), diff --git a/docker-compose.yml b/docker-compose.yml index 6b8db1eaf22..1b72c910d8b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -111,6 +111,7 @@ x-hierarchy: - conda-python-kartothek - conda-python-spark - conda-python-turbodbc + - conda-verify-rc-source - debian-cpp: - debian-c-glib: - debian-ruby @@ -157,6 +158,8 @@ x-hierarchy: - python-wheel-windows-test volumes: + almalinux-ccache: + name: ${ARCH}-almalinux-ccache conda-ccache: name: ${ARCH}-conda-ccache debian-ccache: @@ -1702,6 +1705,22 @@ services: ################################# Source Verification ##################################### + conda-verify-rc-source: + image: ubuntu:${UBUNTU} + volumes: + - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}conda-ccache:/ccache:delegated + shm_size: '1gb' + environment: + <<: *ccache + USE_CONDA: 1 + DEBIAN_FRONTEND: "noninteractive" + ARROW_CMAKE_OPTIONS: "-DARROW_USE_CCACHE=ON -G Ninja" + command: > + /bin/bash -c " + apt update -y && apt install -y wget git tzdata && + /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION:-HEAD} $${VERIFY_RC}" + almalinux-verify-rc-source: # Usage: # docker-compose build almalinux-verify-rc-source @@ -1711,11 +1730,15 @@ services: image: almalinux:${ALMALINUX} volumes: - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}almalinux-ccache:/ccache:delegated shm_size: '1gb' + environment: + <<: *ccache + ARROW_CMAKE_OPTIONS: "-DARROW_USE_CCACHE=ON" command: > /bin/bash -c " /arrow/dev/release/setup-rhel-rebuilds.sh && - /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION} $${VERIFY_RC}" + /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION:-HEAD} $${VERIFY_RC}" ubuntu-verify-rc-source: # Usage: @@ -1726,8 +1749,13 @@ services: image: ubuntu:${UBUNTU} volumes: - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}ubuntu-ccache:/ccache:delegated shm_size: '1gb' + environment: + <<: *ccache + DEBIAN_FRONTEND: "noninteractive" + ARROW_CMAKE_OPTIONS: "-DARROW_USE_CCACHE=ON" command: > /bin/bash -c " - DEBIAN_FRONTEND=noninteractive /arrow/dev/release/setup-ubuntu.sh && - /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION} $${VERIFY_RC}" + /arrow/dev/release/setup-ubuntu.sh && + /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION:-HEAD} $${VERIFY_RC}" From 95d0b6242352fbd90e852b7d882637610cc484d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 1 Feb 2022 10:19:29 +0100 Subject: [PATCH 38/47] Enable more features --- dev/release/verify-release-candidate.sh | 34 ++++++++++++++----------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index cfd7eb35dd1..298694b15ec 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -332,29 +332,30 @@ test_and_install_cpp() { ARROW_CMAKE_OPTIONS=" ${ARROW_CMAKE_OPTIONS:-} --DCMAKE_INSTALL_PREFIX=$ARROW_HOME --DCMAKE_INSTALL_LIBDIR=lib +-DARROW_BOOST_USE_SHARED=ON +-DARROW_BUILD_INTEGRATION=ON +-DARROW_BUILD_TESTS=ON +-DARROW_CUDA=${ARROW_CUDA} +-DARROW_DATASET=ON +-DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-$DEFAULT_DEPENDENCY_SOURCE} -DARROW_FLIGHT=${ARROW_FLIGHT} --DARROW_PLASMA=ON --DARROW_ORC=ON --DARROW_PYTHON=ON -DARROW_GANDIVA=${ARROW_GANDIVA} +-DARROW_HDFS=ON +-DARROW_ORC=ON -DARROW_PARQUET=ON --DARROW_DATASET=ON --DPARQUET_REQUIRE_ENCRYPTION=ON +-DARROW_PLASMA=ON +-DARROW_PYTHON=ON -DARROW_VERBOSE_THIRDPARTY_BUILD=ON +-DARROW_WITH_BROTLI=ON -DARROW_WITH_BZ2=ON --DARROW_WITH_ZLIB=ON --DARROW_WITH_ZSTD=ON -DARROW_WITH_LZ4=ON -DARROW_WITH_SNAPPY=ON --DARROW_WITH_BROTLI=ON --DARROW_BOOST_USE_SHARED=ON +-DARROW_WITH_ZLIB=ON +-DARROW_WITH_ZSTD=ON -DCMAKE_BUILD_TYPE=release --DARROW_BUILD_TESTS=ON --DARROW_BUILD_INTEGRATION=ON --DARROW_CUDA=${ARROW_CUDA} --DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-$DEFAULT_DEPENDENCY_SOURCE} +-DCMAKE_INSTALL_LIBDIR=lib +-DCMAKE_INSTALL_PREFIX=$ARROW_HOME +-DPARQUET_REQUIRE_ENCRYPTION=ON " cmake $ARROW_CMAKE_OPTIONS .. cmake --build . --target install @@ -450,8 +451,11 @@ test_python() { export PYARROW_PARALLEL=$NPROC export PYARROW_WITH_DATASET=1 + export PYARROW_WITH_HDFS=1 + export PYARROW_WITH_ORC=1 export PYARROW_WITH_PARQUET=1 export PYARROW_WITH_PLASMA=1 + export PYARROW_WITH_S3=1 if [ "${ARROW_CUDA}" = "ON" ]; then export PYARROW_WITH_CUDA=1 fi From 25fad8a37fa3b9defb17e43b9a0c26611f621e97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 1 Feb 2022 10:21:28 +0100 Subject: [PATCH 39/47] Looser gandiva deps --- ci/conda_env_gandiva.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/conda_env_gandiva.txt b/ci/conda_env_gandiva.txt index 6dab1848820..217936e2c94 100644 --- a/ci/conda_env_gandiva.txt +++ b/ci/conda_env_gandiva.txt @@ -15,5 +15,5 @@ # specific language governing permissions and limitations # under the License. -clang=12 -llvmdev=12 +clang>=11 +llvmdev>=11 From 6a2500cc8c77620743849fbc2d4e1b88c9635e51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 1 Feb 2022 10:23:52 +0100 Subject: [PATCH 40/47] Setup java macos --- dev/tasks/verify-rc/github.macos.amd64.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dev/tasks/verify-rc/github.macos.amd64.yml b/dev/tasks/verify-rc/github.macos.amd64.yml index 67ce49c9dd8..0477aefc3dd 100644 --- a/dev/tasks/verify-rc/github.macos.amd64.yml +++ b/dev/tasks/verify-rc/github.macos.amd64.yml @@ -44,6 +44,11 @@ jobs: brew bundle --file=arrow/c_glib/Brewfile {% endif %} + - uses: actions/setup-java@v2 + with: + distribution: 'temurin' + java-version: '11' + - uses: actions/setup-node@v2-beta with: node-version: '16' From 15c19f8db9bd9bbd5ce86930e69689445798e873 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 1 Feb 2022 11:19:19 +0100 Subject: [PATCH 41/47] Install curl --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 1b72c910d8b..dcfbbe8e491 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1718,7 +1718,7 @@ services: ARROW_CMAKE_OPTIONS: "-DARROW_USE_CCACHE=ON -G Ninja" command: > /bin/bash -c " - apt update -y && apt install -y wget git tzdata && + apt update -y && apt install -y curl git tzdata wget && /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION:-HEAD} $${VERIFY_RC}" almalinux-verify-rc-source: From 6beb80dadfffe8794a3137a6b7ceac861b309c1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 1 Feb 2022 13:24:03 +0100 Subject: [PATCH 42/47] Enable S3 --- dev/release/verify-release-candidate.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 298694b15ec..0851b4c4edf 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -345,6 +345,7 @@ ${ARROW_CMAKE_OPTIONS:-} -DARROW_PARQUET=ON -DARROW_PLASMA=ON -DARROW_PYTHON=ON +-DARROW_S3=ON -DARROW_VERBOSE_THIRDPARTY_BUILD=ON -DARROW_WITH_BROTLI=ON -DARROW_WITH_BZ2=ON From 5f8c7da06136774e4bdbd3e8ab45a881020ff3e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 1 Feb 2022 18:11:35 +0100 Subject: [PATCH 43/47] Prefer curl over wget --- dev/release/verify-release-candidate.sh | 10 +++++----- docker-compose.yml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 0851b4c4edf..6d81f4275bf 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -259,7 +259,7 @@ setup_conda() { if [ ! -d "${MINICONDA}" ]; then # Setup miniconda only if the directory doesn't exist yet - wget -q -O miniconda.sh $MINICONDA_URL + curl -sL -o miniconda.sh $MINICONDA_URL bash miniconda.sh -b -p $MINICONDA rm -f miniconda.sh fi @@ -403,11 +403,11 @@ test_csharp() { esac local dotnet_download_thank_you_url=https://dotnet.microsoft.com/download/thank-you/dotnet-sdk-${dotnet_version}-${dotnet_platform}-x64-binaries local dotnet_download_url=$( \ - curl --location ${dotnet_download_thank_you_url} | \ + curl -sL ${dotnet_download_thank_you_url} | \ grep 'window\.open' | \ grep -E -o '[^"]+' | \ sed -n 2p) - curl ${dotnet_download_url} | \ + curl -sL ${dotnet_download_url} | \ tar xzf - -C ${csharp_bin} PATH=${csharp_bin}:${PATH} fi @@ -556,7 +556,7 @@ test_js() { if [ "${INSTALL_NODE}" -gt 0 ]; then export NVM_DIR="`pwd`/.nvm" mkdir -p $NVM_DIR - curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | \ + curl -sL https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | \ PROFILE=/dev/null bash [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" @@ -615,7 +615,7 @@ test_go() { fi local GO_ARCHIVE=go$VERSION.$OS-$ARCH.tar.gz - wget https://dl.google.com/go/$GO_ARCHIVE + curl -sLO https://dl.google.com/go/$GO_ARCHIVE mkdir -p local-go tar -xzf $GO_ARCHIVE -C local-go diff --git a/docker-compose.yml b/docker-compose.yml index dcfbbe8e491..82486331f0f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1718,7 +1718,7 @@ services: ARROW_CMAKE_OPTIONS: "-DARROW_USE_CCACHE=ON -G Ninja" command: > /bin/bash -c " - apt update -y && apt install -y curl git tzdata wget && + apt update -y && apt install -y curl git tzdata && /arrow/dev/release/verify-release-candidate.sh source $${VERIFY_VERSION:-HEAD} $${VERIFY_RC}" almalinux-verify-rc-source: From 210d752f89db6e4aed17eaee53eb78bec49c875b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 1 Feb 2022 19:32:55 +0100 Subject: [PATCH 44/47] Enable python 3.6 archery --- dev/archery/setup.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dev/archery/setup.py b/dev/archery/setup.py index ce1b97e0ae4..6e8366b7648 100755 --- a/dev/archery/setup.py +++ b/dev/archery/setup.py @@ -18,12 +18,8 @@ import functools import operator -import sys from setuptools import setup, find_packages -if sys.version_info < (3, 7): - sys.exit('Python < 3.7 is not supported') - # For pathlib.Path compatibility jinja_req = 'jinja2>=2.11' From 6af833e46884208f5d318abd8d521a03bcd4f63f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 1 Feb 2022 19:44:15 +0100 Subject: [PATCH 45/47] Enable python 3.6 archery --- dev/archery/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/archery/setup.py b/dev/archery/setup.py index 6e8366b7648..69f33bd9c45 100755 --- a/dev/archery/setup.py +++ b/dev/archery/setup.py @@ -46,7 +46,7 @@ maintainer_email='dev@arrow.apache.org', packages=find_packages(), include_package_data=True, - python_requires='>=3.7', + python_requires='>=3.6', install_requires=['click>=7'], tests_require=['pytest', 'responses'], extras_require=extras, From 879203233dfe225ce6632f71c0890bd203d69edf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 1 Feb 2022 20:30:02 +0100 Subject: [PATCH 46/47] Install libcurl-dev for bundled s3 --- dev/release/setup-ubuntu.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/release/setup-ubuntu.sh b/dev/release/setup-ubuntu.sh index 7bca67eedbb..0737be9b2c3 100755 --- a/dev/release/setup-ubuntu.sh +++ b/dev/release/setup-ubuntu.sh @@ -25,6 +25,7 @@ apt-get -y install \ cmake \ curl \ git \ + libcurl4-openssl-dev \ libgirepository1.0-dev \ libglib2.0-dev \ libsqlite3-dev \ From 5d027d1a0d09ccda9500716769e1791f8397c856 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 1 Feb 2022 21:29:26 +0100 Subject: [PATCH 47/47] Turn S3 off by default --- dev/release/verify-release-candidate.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 6d81f4275bf..e18df2f5482 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -84,6 +84,7 @@ detect_cuda() { if [ -z "${ARROW_CUDA:-}" ] && detect_cuda; then ARROW_CUDA=ON fi +: ${ARROW_S3:=OFF} : ${ARROW_CUDA:=OFF} : ${ARROW_FLIGHT:=ON} : ${ARROW_GANDIVA:=ON} @@ -345,7 +346,7 @@ ${ARROW_CMAKE_OPTIONS:-} -DARROW_PARQUET=ON -DARROW_PLASMA=ON -DARROW_PYTHON=ON --DARROW_S3=ON +-DARROW_S3=${ARROW_S3} -DARROW_VERBOSE_THIRDPARTY_BUILD=ON -DARROW_WITH_BROTLI=ON -DARROW_WITH_BZ2=ON @@ -456,7 +457,9 @@ test_python() { export PYARROW_WITH_ORC=1 export PYARROW_WITH_PARQUET=1 export PYARROW_WITH_PLASMA=1 - export PYARROW_WITH_S3=1 + if [ "${ARROW_S3}" = "ON" ]; then + export PYARROW_WITH_S3=1 + fi if [ "${ARROW_CUDA}" = "ON" ]; then export PYARROW_WITH_CUDA=1 fi @@ -476,7 +479,6 @@ test_python() { python -c " import pyarrow import pyarrow._hdfs -import pyarrow._s3fs import pyarrow.csv import pyarrow.dataset import pyarrow.fs @@ -485,6 +487,9 @@ import pyarrow.orc import pyarrow.parquet import pyarrow.plasma " + if [ "${PYARROW_WITH_S3}" == "ON" ]; then + python -c "import pyarrow._s3fs" + fi if [ "${PYARROW_WITH_CUDA}" == "ON" ]; then python -c "import pyarrow.cuda" fi