Skip to content

Commit 9601bba

Browse files
authored
chore: Collect fallback reasons for spark sql tests (#2313)
1 parent 70f1236 commit 9601bba

File tree

2 files changed

+78
-22
lines changed

2 files changed

+78
-22
lines changed

.github/workflows/spark_sql_test.yml

Lines changed: 76 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ on:
3535
# manual trigger
3636
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3737
workflow_dispatch:
38+
inputs:
39+
collect-fallback-logs:
40+
description: 'Whether to collect Comet fallback reasons from spark sql unit test logs'
41+
required: false
42+
default: 'false'
43+
type: boolean
3844

3945
env:
4046
RUST_VERSION: stable
@@ -48,12 +54,12 @@ jobs:
4854
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.6'}]
4955
module:
5056
- {name: "catalyst", args1: "catalyst/test", args2: ""}
51-
- {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
52-
- {name: "sql/core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest"}
53-
- {name: "sql/core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"}
54-
- {name: "sql/hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
55-
- {name: "sql/hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
56-
- {name: "sql/hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
57+
- {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
58+
- {name: "sql_core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest"}
59+
- {name: "sql_core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"}
60+
- {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
61+
- {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
62+
- {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
5763
fail-fast: false
5864
name: spark-sql-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.java-version }}
5965
runs-on: ${{ matrix.os }}
@@ -75,9 +81,19 @@ jobs:
7581
run: |
7682
cd apache-spark
7783
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
78-
ENABLE_COMET=true build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
84+
ENABLE_COMET=true ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
85+
build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
86+
if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
87+
find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
88+
fi
7989
env:
8090
LC_ALL: "C.UTF-8"
91+
- name: Upload fallback log
92+
if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
93+
uses: actions/upload-artifact@v4
94+
with:
95+
name: fallback-log-spark-sql-${{ matrix.module.name }}-${{ matrix.os }}-spark-${{ matrix.spark-version.full }}-java-${{ matrix.java-version }}
96+
path: "**/fallback.log"
8197

8298
spark-sql-native-native-comet:
8399
strategy:
@@ -87,12 +103,12 @@ jobs:
87103
spark-version: [ { short: '3.4', full: '3.4.3' }, { short: '3.5', full: '3.5.6' } ]
88104
module:
89105
- { name: "catalyst", args1: "catalyst/test", args2: "" }
90-
- { name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest }
91-
- { name: "sql/core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest" }
92-
- { name: "sql/core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest" }
93-
- { name: "sql/hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest" }
94-
- { name: "sql/hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest" }
95-
- { name: "sql/hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest" }
106+
- { name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest }
107+
- { name: "sql_core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest" }
108+
- { name: "sql_core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest" }
109+
- { name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest" }
110+
- { name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest" }
111+
- { name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest" }
96112
fail-fast: false
97113
name: spark-sql-native-comet-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.java-version }}
98114
runs-on: ${{ matrix.os }}
@@ -114,9 +130,19 @@ jobs:
114130
run: |
115131
cd apache-spark
116132
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
117-
ENABLE_COMET=true COMET_PARQUET_SCAN_IMPL=native_comet build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
133+
ENABLE_COMET=true COMET_PARQUET_SCAN_IMPL=native_comet ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
134+
build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
135+
if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
136+
find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
137+
fi
118138
env:
119139
LC_ALL: "C.UTF-8"
140+
- name: Upload fallback log
141+
if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
142+
uses: actions/upload-artifact@v4
143+
with:
144+
name: fallback-log-spark-sql-native-comet-${{ matrix.module.name }}-${{ matrix.os }}-spark-${{ matrix.spark-version.full }}-java-${{ matrix.java-version }}
145+
path: "**/fallback.log"
120146

121147
spark-sql-native-iceberg-compat:
122148
strategy:
@@ -126,12 +152,12 @@ jobs:
126152
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.6'}]
127153
module:
128154
- {name: "catalyst", args1: "catalyst/test", args2: ""}
129-
- {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
130-
- {name: "sql/core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest"}
131-
- {name: "sql/core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"}
132-
- {name: "sql/hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
133-
- {name: "sql/hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
134-
- {name: "sql/hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
155+
- {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
156+
- {name: "sql_core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest"}
157+
- {name: "sql_core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"}
158+
- {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
159+
- {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
160+
- {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
135161
fail-fast: false
136162
name: spark-sql-iceberg-compat-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.java-version }}
137163
runs-on: ${{ matrix.os }}
@@ -153,6 +179,35 @@ jobs:
153179
run: |
154180
cd apache-spark
155181
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
156-
ENABLE_COMET=true COMET_PARQUET_SCAN_IMPL=native_iceberg_compat build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
182+
ENABLE_COMET=true COMET_PARQUET_SCAN_IMPL=native_iceberg_compat ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
183+
build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
184+
if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
185+
find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
186+
fi
157187
env:
158188
LC_ALL: "C.UTF-8"
189+
- name: Upload fallback log
190+
if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
191+
uses: actions/upload-artifact@v4
192+
with:
193+
name: fallback-log-spark-sql-iceberg-compat-${{ matrix.module.name }}-${{ matrix.os }}-spark-${{ matrix.spark-version.full }}-java-${{ matrix.java-version }}
194+
path: "**/fallback.log"
195+
196+
merge-fallback-logs:
197+
if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
198+
name: merge-fallback-logs
199+
needs: [ spark-sql-auto-scan, spark-sql-native-native-comet, spark-sql-native-iceberg-compat ]
200+
runs-on: ubuntu-24.04
201+
steps:
202+
- name: Download fallback log artifacts
203+
uses: actions/download-artifact@v4
204+
with:
205+
path: fallback-logs/
206+
- name: Merge fallback logs
207+
run: |
208+
find ./fallback-logs/ -type f -name "fallback.log" -print0 | xargs -0 cat | sort -u > all_fallback.log
209+
- name: Upload merged fallback log
210+
uses: actions/upload-artifact@v4
211+
with:
212+
name: all-fallback-log
213+
path: all_fallback.log

common/src/main/scala/org/apache/comet/CometConf.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,8 @@ object CometConf extends ShimCometConf {
486486
conf("spark.comet.logFallbackReasons.enabled")
487487
.doc("When this setting is enabled, Comet will log warnings for all fallback reasons.")
488488
.booleanConf
489-
.createWithDefault(false)
489+
.createWithDefault(
490+
sys.env.getOrElse("ENABLE_COMET_LOG_FALLBACK_REASONS", "false").toBoolean)
490491

491492
val COMET_EXPLAIN_FALLBACK_ENABLED: ConfigEntry[Boolean] =
492493
conf("spark.comet.explainFallback.enabled")

0 commit comments

Comments
 (0)