Skip to content

Feat: map_from_entries (#2905) #10091

Feat: map_from_entries (#2905)

Feat: map_from_entries (#2905) #10091

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
name: Spark SQL Tests
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
on:
push:
paths-ignore:
- "doc/**"
- "docs/**"
- "**.md"
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
pull_request:
paths-ignore:
- "doc/**"
- "docs/**"
- "**.md"
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
# manual trigger
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
workflow_dispatch:
inputs:
collect-fallback-logs:
description: 'Whether to collect Comet fallback reasons from spark sql unit test logs'
required: false
default: 'false'
type: boolean
env:
RUST_VERSION: stable
jobs:
# Build native library once and share with all test jobs
build-native:
name: Build Native Library
runs-on: ubuntu-24.04
container:
image: amd64/rust
steps:
- uses: actions/checkout@v6
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ env.RUST_VERSION }}
jdk-version: 17
- name: Restore Cargo cache
uses: actions/cache/restore@v5
with:
path: |
~/.cargo/registry
~/.cargo/git
native/target
key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
restore-keys: |
${{ runner.os }}-cargo-ci-
- name: Build native library (CI profile)
run: |
cd native
cargo build --profile ci
- name: Upload native library
uses: actions/upload-artifact@v6
with:
name: native-lib-linux
path: native/target/ci/libcomet.so
retention-days: 1
- name: Save Cargo cache
uses: actions/cache/save@v5
if: github.ref == 'refs/heads/main'
with:
path: |
~/.cargo/registry
~/.cargo/git
native/target
key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
spark-sql-test:
needs: build-native
strategy:
matrix:
os: [ubuntu-24.04]
module:
- {name: "catalyst", args1: "catalyst/test", args2: ""}
- {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
- {name: "sql_core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest"}
- {name: "sql_core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"}
- {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
- {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
- {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
# Test combinations:
# - auto scan: all Spark versions (3.4, 3.5, 4.0)
# - native_comet: Spark 3.4, 3.5
# - native_iceberg_compat: Spark 3.5 only
config:
- {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'auto', scan-env: ''}
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'auto', scan-env: ''}
- {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto', scan-env: ''}
- {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'native_comet', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_comet'}
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'native_comet', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_comet'}
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'native_iceberg_compat', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_iceberg_compat'}
# Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946
exclude:
- config: {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto', scan-env: ''}
module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
fail-fast: false
name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}
runs-on: ${{ matrix.os }}
container:
image: amd64/rust
steps:
- uses: actions/checkout@v6
- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: ${{ matrix.config.java }}
- name: Download native library
uses: actions/download-artifact@v7
with:
name: native-lib-linux
path: native/target/release/
- name: Setup Spark
uses: ./.github/actions/setup-spark-builder
with:
spark-version: ${{ matrix.config.spark-full }}
spark-short-version: ${{ matrix.config.spark-short }}
skip-native-build: true
- name: Run Spark tests
run: |
cd apache-spark
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ${{ matrix.config.scan-env }} ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
fi
env:
LC_ALL: "C.UTF-8"
- name: Upload fallback log
if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
uses: actions/upload-artifact@v6
with:
name: fallback-log-spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}-spark-${{ matrix.config.spark-full }}
path: "**/fallback.log"
merge-fallback-logs:
if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
name: merge-fallback-logs
needs: [spark-sql-test]
runs-on: ubuntu-24.04
steps:
- name: Download fallback log artifacts
uses: actions/download-artifact@v7
with:
path: fallback-logs/
- name: Merge fallback logs
run: |
find ./fallback-logs/ -type f -name "fallback.log" -print0 | xargs -0 cat | sort -u > all_fallback.log
- name: Upload merged fallback log
uses: actions/upload-artifact@v6
with:
name: all-fallback-log
path: all_fallback.log