Skip to content

Commit f120326

Browse files
committed
upmerge
2 parents af871e3 + 267ad4c commit f120326

File tree

127 files changed

+11568
-2905
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

127 files changed

+11568
-2905
lines changed

.github/actions/rust-test/action.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,5 +70,7 @@ runs:
7070
shell: bash
7171
run: |
7272
cd native
73+
# Set LD_LIBRARY_PATH to include JVM library path for tests that use JNI
74+
export LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${LD_LIBRARY_PATH}
7375
RUST_BACKTRACE=1 cargo nextest run
7476

.github/workflows/benchmark-tpcds.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,17 @@ on:
2727
- "doc/**"
2828
- "docs/**"
2929
- "**.md"
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3033
pull_request:
3134
paths-ignore:
3235
- "doc/**"
3336
- "docs/**"
3437
- "**.md"
38+
- "native/core/benches/**"
39+
- "native/spark-expr/benches/**"
40+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3541
# manual trigger
3642
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3743
workflow_dispatch:

.github/workflows/benchmark-tpch.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,17 @@ on:
2727
- "doc/**"
2828
- "docs/**"
2929
- "**.md"
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3033
pull_request:
3134
paths-ignore:
3235
- "doc/**"
3336
- "docs/**"
3437
- "**.md"
38+
- "native/core/benches/**"
39+
- "native/spark-expr/benches/**"
40+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3541
# manual trigger
3642
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3743
workflow_dispatch:

.github/workflows/iceberg_spark_test.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,17 @@ on:
2727
- "doc/**"
2828
- "docs/**"
2929
- "**.md"
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3033
pull_request:
3134
paths-ignore:
3235
- "doc/**"
3336
- "docs/**"
3437
- "**.md"
38+
- "native/core/benches/**"
39+
- "native/spark-expr/benches/**"
40+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3541
# manual trigger
3642
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3743
workflow_dispatch:

.github/workflows/miri.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,17 @@ on:
2323
- "doc/**"
2424
- "docs/**"
2525
- "**.md"
26+
- "native/core/benches/**"
27+
- "native/spark-expr/benches/**"
28+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
2629
pull_request:
2730
paths-ignore:
2831
- "doc/**"
2932
- "docs/**"
3033
- "**.md"
34+
- "native/core/benches/**"
35+
- "native/spark-expr/benches/**"
36+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3137
# manual trigger
3238
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3339
workflow_dispatch:
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# Lightweight CI for benchmark-only changes - verifies compilation and linting
19+
# without running full test suites
20+
21+
name: PR Benchmark Check
22+
23+
concurrency:
24+
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
25+
cancel-in-progress: true
26+
27+
on:
28+
push:
29+
paths:
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
33+
pull_request:
34+
paths:
35+
- "native/core/benches/**"
36+
- "native/spark-expr/benches/**"
37+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
38+
workflow_dispatch:
39+
40+
env:
41+
RUST_VERSION: stable
42+
43+
jobs:
44+
benchmark-check:
45+
name: Benchmark Compile & Lint Check
46+
runs-on: ubuntu-latest
47+
container:
48+
image: amd64/rust
49+
steps:
50+
- uses: actions/checkout@v6
51+
52+
- name: Setup Rust & Java toolchain
53+
uses: ./.github/actions/setup-builder
54+
with:
55+
rust-version: ${{ env.RUST_VERSION }}
56+
jdk-version: 17
57+
58+
- name: Check Cargo fmt
59+
run: |
60+
cd native
61+
cargo fmt --all -- --check --color=never
62+
63+
- name: Check Cargo clippy
64+
run: |
65+
cd native
66+
cargo clippy --color=never --all-targets --workspace -- -D warnings
67+
68+
- name: Check benchmark compilation
69+
run: |
70+
cd native
71+
cargo check --benches
72+
73+
- name: Cache Maven dependencies
74+
uses: actions/cache@v5
75+
with:
76+
path: |
77+
~/.m2/repository
78+
/root/.m2/repository
79+
key: ${{ runner.os }}-benchmark-maven-${{ hashFiles('**/pom.xml') }}
80+
restore-keys: |
81+
${{ runner.os }}-benchmark-maven-
82+
83+
- name: Check Scala compilation and linting
84+
run: |
85+
./mvnw -B compile test-compile scalafix:scalafix -Dscalafix.mode=CHECK -Psemanticdb -DskipTests

.github/workflows/pr_build_linux.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,17 @@ on:
2727
- "doc/**"
2828
- "docs/**"
2929
- "**.md"
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3033
pull_request:
3134
paths-ignore:
3235
- "doc/**"
3336
- "docs/**"
3437
- "**.md"
38+
- "native/core/benches/**"
39+
- "native/spark-expr/benches/**"
40+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3541
# manual trigger
3642
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3743
workflow_dispatch:
@@ -116,6 +122,7 @@ jobs:
116122
org.apache.comet.exec.CometAsyncShuffleSuite
117123
org.apache.comet.exec.DisableAQECometShuffleSuite
118124
org.apache.comet.exec.DisableAQECometAsyncShuffleSuite
125+
org.apache.spark.shuffle.sort.SpillSorterSuite
119126
- name: "parquet"
120127
value: |
121128
org.apache.comet.parquet.CometParquetWriterSuite
@@ -154,6 +161,7 @@ jobs:
154161
value: |
155162
org.apache.comet.CometExpressionSuite
156163
org.apache.comet.CometExpressionCoverageSuite
164+
org.apache.comet.CometHashExpressionSuite
157165
org.apache.comet.CometTemporalExpressionSuite
158166
org.apache.comet.CometArrayExpressionSuite
159167
org.apache.comet.CometCastSuite

.github/workflows/pr_build_macos.yml

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,17 @@ on:
2727
- "doc/**"
2828
- "docs/**"
2929
- "**.md"
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3033
pull_request:
3134
paths-ignore:
3235
- "doc/**"
3336
- "docs/**"
3437
- "**.md"
38+
- "native/core/benches/**"
39+
- "native/spark-expr/benches/**"
40+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3541
# manual trigger
3642
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3743
workflow_dispatch:
@@ -57,11 +63,9 @@ jobs:
5763
java_version: "17"
5864
maven_opts: "-Pspark-3.5 -Pscala-2.13"
5965

60-
# TODO fails with OOM
61-
# https://github.com/apache/datafusion-comet/issues/1949
62-
# - name: "Spark 4.0, JDK 17, Scala 2.13"
63-
# java_version: "17"
64-
# maven_opts: "-Pspark-4.0 -Pscala-2.13"
66+
- name: "Spark 4.0, JDK 17, Scala 2.13"
67+
java_version: "17"
68+
maven_opts: "-Pspark-4.0 -Pscala-2.13"
6569

6670
suite:
6771
- name: "fuzz"
@@ -81,6 +85,7 @@ jobs:
8185
org.apache.comet.exec.CometAsyncShuffleSuite
8286
org.apache.comet.exec.DisableAQECometShuffleSuite
8387
org.apache.comet.exec.DisableAQECometAsyncShuffleSuite
88+
org.apache.spark.shuffle.sort.SpillSorterSuite
8489
- name: "parquet"
8590
value: |
8691
org.apache.comet.parquet.CometParquetWriterSuite
@@ -119,6 +124,7 @@ jobs:
119124
value: |
120125
org.apache.comet.CometExpressionSuite
121126
org.apache.comet.CometExpressionCoverageSuite
127+
org.apache.comet.CometHashExpressionSuite
122128
org.apache.comet.CometTemporalExpressionSuite
123129
org.apache.comet.CometArrayExpressionSuite
124130
org.apache.comet.CometCastSuite
@@ -133,6 +139,7 @@ jobs:
133139
- name: "sql"
134140
value: |
135141
org.apache.spark.sql.CometToPrettyStringSuite
142+
136143
fail-fast: false
137144
name: ${{ matrix.os }}/${{ matrix.profile.name }} [${{ matrix.suite.name }}]
138145
runs-on: ${{ matrix.os }}
@@ -145,6 +152,14 @@ jobs:
145152
jdk-version: ${{ matrix.profile.java_version }}
146153
jdk-architecture: aarch64
147154
protoc-architecture: aarch_64
155+
- name: Set thread thresholds envs for spark test on macOS
156+
# see: https://github.com/apache/datafusion-comet/issues/2965
157+
shell: bash
158+
run: |
159+
echo "SPARK_TEST_SQL_SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD=256" >> $GITHUB_ENV
160+
echo "SPARK_TEST_SQL_RESULT_QUERY_STAGE_MAX_THREAD_THRESHOLD=256" >> $GITHUB_ENV
161+
echo "SPARK_TEST_HIVE_SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD=48" >> $GITHUB_ENV
162+
echo "SPARK_TEST_HIVE_RESULT_QUERY_STAGE_MAX_THREAD_THRESHOLD=48" >> $GITHUB_ENV
148163
- name: Java test steps
149164
uses: ./.github/actions/java-test
150165
with:

.github/workflows/spark_sql_test.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,17 @@ on:
2727
- "doc/**"
2828
- "docs/**"
2929
- "**.md"
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3033
pull_request:
3134
paths-ignore:
3235
- "doc/**"
3336
- "docs/**"
3437
- "**.md"
38+
- "native/core/benches/**"
39+
- "native/spark-expr/benches/**"
40+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3541
# manual trigger
3642
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3743
workflow_dispatch:

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,6 @@ apache-rat-*.jar
1818
venv
1919
dev/release/comet-rm/workdir
2020
spark/benchmarks
21+
.DS_Store
22+
comet-event-trace.json
23+
__pycache__

0 commit comments

Comments
 (0)