Skip to content

Commit ea26629

Browse files
authored
chore: add script to regenerate golden files for plan stability tests (apache#3204)
1 parent d9ea22b commit ea26629

File tree

2 files changed

+198
-1
lines changed

2 files changed

+198
-1
lines changed

dev/regenerate-golden-files.sh

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
#!/usr/bin/env bash
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
19+
# Script to regenerate golden files for plan stability testing.
20+
# This script must be run from the root of the Comet repository.
21+
#
22+
# Usage: ./dev/regenerate-golden-files.sh [--spark-version <version>]
23+
#
24+
# Options:
25+
# --spark-version <version> Only regenerate for specified Spark version (3.4, 3.5, or 4.0)
26+
# If not specified, regenerates for all versions.
27+
#
28+
# Examples:
29+
# ./dev/regenerate-golden-files.sh # Regenerate for all Spark versions
30+
# ./dev/regenerate-golden-files.sh --spark-version 3.5 # Regenerate only for Spark 3.5
31+
32+
set -e
33+
set -o pipefail
34+
35+
# Check for JDK 17 or later (required for Spark 4.0)
36+
check_jdk_version() {
37+
if [ -z "$JAVA_HOME" ]; then
38+
echo "[ERROR] JAVA_HOME is not set"
39+
exit 1
40+
fi
41+
42+
java_version=$("$JAVA_HOME/bin/java" -version 2>&1 | head -n 1 | cut -d'"' -f2 | cut -d'.' -f1)
43+
44+
# Handle both "17" and "17.0.x" formats
45+
if [[ "$java_version" =~ ^1\. ]]; then
46+
# Old format like 1.8.0 -> extract 8
47+
java_version=$(echo "$java_version" | cut -d'.' -f2)
48+
fi
49+
50+
if [ "$java_version" -lt 17 ]; then
51+
echo "[ERROR] JDK 17 or later is required for Spark 4.0 compatibility"
52+
echo "[ERROR] Current JDK version: $java_version"
53+
echo "[ERROR] Please set JAVA_HOME to point to JDK 17 or later"
54+
exit 1
55+
fi
56+
57+
echo "[INFO] JDK version check passed: version $java_version"
58+
}
59+
60+
# Check if running from repo root
61+
check_repo_root() {
62+
if [ ! -f "pom.xml" ] || [ ! -d "spark" ] || [ ! -d "native" ]; then
63+
echo "[ERROR] This script must be run from the root of the Comet repository"
64+
exit 1
65+
fi
66+
}
67+
68+
# Build native code
69+
build_native() {
70+
echo ""
71+
echo "=============================================="
72+
echo "[INFO] Building native code"
73+
echo "=============================================="
74+
cd native && cargo build && cd ..
75+
}
76+
77+
# Install Comet for a specific Spark version
78+
install_for_spark_version() {
79+
local spark_version=$1
80+
echo ""
81+
echo "=============================================="
82+
echo "[INFO] Installing Comet for Spark $spark_version"
83+
echo "=============================================="
84+
./mvnw install -DskipTests -Pspark-$spark_version
85+
}
86+
87+
# Regenerate golden files for a specific Spark version
88+
regenerate_golden_files() {
89+
local spark_version=$1
90+
91+
echo ""
92+
echo "=============================================="
93+
echo "[INFO] Regenerating golden files for Spark $spark_version"
94+
echo "=============================================="
95+
96+
echo "[INFO] Running CometTPCDSV1_4_PlanStabilitySuite..."
97+
SPARK_GENERATE_GOLDEN_FILES=1 ./mvnw -pl spark \
98+
-Dsuites="org.apache.spark.sql.comet.CometTPCDSV1_4_PlanStabilitySuite" \
99+
-Pspark-$spark_version -nsu test
100+
101+
echo "[INFO] Running CometTPCDSV2_7_PlanStabilitySuite..."
102+
SPARK_GENERATE_GOLDEN_FILES=1 ./mvnw -pl spark \
103+
-Dsuites="org.apache.spark.sql.comet.CometTPCDSV2_7_PlanStabilitySuite" \
104+
-Pspark-$spark_version -nsu test
105+
}
106+
107+
# Main script
108+
main() {
109+
local target_version=""
110+
111+
# Parse command line arguments
112+
while [[ $# -gt 0 ]]; do
113+
case $1 in
114+
--spark-version)
115+
target_version="$2"
116+
shift 2
117+
;;
118+
-h|--help)
119+
echo "Usage: $0 [--spark-version <version>]"
120+
echo ""
121+
echo "Options:"
122+
echo " --spark-version <version> Only regenerate for specified Spark version (3.4, 3.5, or 4.0)"
123+
echo " If not specified, regenerates for all versions."
124+
exit 0
125+
;;
126+
*)
127+
echo "[ERROR] Unknown option: $1"
128+
echo "Use --help for usage information"
129+
exit 1
130+
;;
131+
esac
132+
done
133+
134+
# Validate target version if specified
135+
if [ -n "$target_version" ]; then
136+
if [[ ! "$target_version" =~ ^(3\.4|3\.5|4\.0)$ ]]; then
137+
echo "[ERROR] Invalid Spark version: $target_version"
138+
echo "[ERROR] Supported versions: 3.4, 3.5, 4.0"
139+
exit 1
140+
fi
141+
fi
142+
143+
check_repo_root
144+
check_jdk_version
145+
146+
# Set SPARK_HOME to current directory (required for golden file output)
147+
export SPARK_HOME=$(pwd)
148+
echo "[INFO] SPARK_HOME set to: $SPARK_HOME"
149+
150+
# Build native code first
151+
build_native
152+
153+
# Determine which versions to process
154+
local versions
155+
if [ -n "$target_version" ]; then
156+
versions=("$target_version")
157+
else
158+
versions=("3.4" "3.5" "4.0")
159+
fi
160+
161+
# Install and regenerate for each version
162+
for version in "${versions[@]}"; do
163+
install_for_spark_version "$version"
164+
regenerate_golden_files "$version"
165+
done
166+
167+
echo ""
168+
echo "=============================================="
169+
echo "[INFO] Golden file regeneration complete!"
170+
echo "=============================================="
171+
echo ""
172+
echo "The golden files have been updated in:"
173+
echo " spark/src/test/resources/tpcds-plan-stability/"
174+
echo ""
175+
echo "Please review the changes with 'git diff' before committing."
176+
}
177+
178+
main "$@"

docs/source/contributor-guide/development.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,26 @@ Other options for selecting specific suites are described in the [ScalaTest Mave
160160
## Plan Stability Testing
161161

162162
Comet has a plan stability testing framework that can be used to test the stability of the query plans generated by Comet.
163-
The plan stability testing framework is located in the `spark` module and can be run using the following commands.
163+
The plan stability testing framework is located in the `spark` module.
164+
165+
### Using the Helper Script
166+
167+
The easiest way to regenerate golden files is to use the provided script:
168+
169+
```sh
170+
# Regenerate golden files for all Spark versions
171+
./dev/regenerate-golden-files.sh
172+
173+
# Regenerate only for a specific Spark version
174+
./dev/regenerate-golden-files.sh --spark-version 3.5
175+
```
176+
177+
The script verifies that JDK 17+ is configured (required for Spark 4.0), installs Comet for each
178+
Spark version, and runs the plan stability tests with `SPARK_GENERATE_GOLDEN_FILES=1`.
179+
180+
### Manual Instructions
181+
182+
Alternatively, you can run the tests manually using the following commands.
164183

165184
First, Comet needs to be installed for each Spark version to be tested:
166185

0 commit comments

Comments
 (0)