|
| 1 | +#!/usr/bin/env bash |
| 2 | +# Licensed to the Apache Software Foundation (ASF) under one |
| 3 | +# or more contributor license agreements. See the NOTICE file |
| 4 | +# distributed with this work for additional information |
| 5 | +# regarding copyright ownership. The ASF licenses this file |
| 6 | +# to you under the Apache License, Version 2.0 (the |
| 7 | +# "License"); you may not use this file except in compliance |
| 8 | +# with the License. You may obtain a copy of the License at |
| 9 | +# |
| 10 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | +# |
| 12 | +# Unless required by applicable law or agreed to in writing, |
| 13 | +# software distributed under the License is distributed on an |
| 14 | +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | +# KIND, either express or implied. See the License for the |
| 16 | +# specific language governing permissions and limitations |
| 17 | +# under the License. |
| 18 | + |
| 19 | +# Script to regenerate golden files for plan stability testing. |
| 20 | +# This script must be run from the root of the Comet repository. |
| 21 | +# |
| 22 | +# Usage: ./dev/regenerate-golden-files.sh [--spark-version <version>] |
| 23 | +# |
| 24 | +# Options: |
| 25 | +# --spark-version <version> Only regenerate for specified Spark version (3.4, 3.5, or 4.0) |
| 26 | +# If not specified, regenerates for all versions. |
| 27 | +# |
| 28 | +# Examples: |
| 29 | +# ./dev/regenerate-golden-files.sh # Regenerate for all Spark versions |
| 30 | +# ./dev/regenerate-golden-files.sh --spark-version 3.5 # Regenerate only for Spark 3.5 |
| 31 | + |
| 32 | +set -e |
| 33 | +set -o pipefail |
| 34 | + |
| 35 | +# Check for JDK 17 or later (required for Spark 4.0) |
| 36 | +check_jdk_version() { |
| 37 | + if [ -z "$JAVA_HOME" ]; then |
| 38 | + echo "[ERROR] JAVA_HOME is not set" |
| 39 | + exit 1 |
| 40 | + fi |
| 41 | + |
| 42 | + java_version=$("$JAVA_HOME/bin/java" -version 2>&1 | head -n 1 | cut -d'"' -f2 | cut -d'.' -f1) |
| 43 | + |
| 44 | + # Handle both "17" and "17.0.x" formats |
| 45 | + if [[ "$java_version" =~ ^1\. ]]; then |
| 46 | + # Old format like 1.8.0 -> extract 8 |
| 47 | + java_version=$(echo "$java_version" | cut -d'.' -f2) |
| 48 | + fi |
| 49 | + |
| 50 | + if [ "$java_version" -lt 17 ]; then |
| 51 | + echo "[ERROR] JDK 17 or later is required for Spark 4.0 compatibility" |
| 52 | + echo "[ERROR] Current JDK version: $java_version" |
| 53 | + echo "[ERROR] Please set JAVA_HOME to point to JDK 17 or later" |
| 54 | + exit 1 |
| 55 | + fi |
| 56 | + |
| 57 | + echo "[INFO] JDK version check passed: version $java_version" |
| 58 | +} |
| 59 | + |
| 60 | +# Check if running from repo root |
| 61 | +check_repo_root() { |
| 62 | + if [ ! -f "pom.xml" ] || [ ! -d "spark" ] || [ ! -d "native" ]; then |
| 63 | + echo "[ERROR] This script must be run from the root of the Comet repository" |
| 64 | + exit 1 |
| 65 | + fi |
| 66 | +} |
| 67 | + |
| 68 | +# Build native code |
| 69 | +build_native() { |
| 70 | + echo "" |
| 71 | + echo "==============================================" |
| 72 | + echo "[INFO] Building native code" |
| 73 | + echo "==============================================" |
| 74 | + cd native && cargo build && cd .. |
| 75 | +} |
| 76 | + |
| 77 | +# Install Comet for a specific Spark version |
| 78 | +install_for_spark_version() { |
| 79 | + local spark_version=$1 |
| 80 | + echo "" |
| 81 | + echo "==============================================" |
| 82 | + echo "[INFO] Installing Comet for Spark $spark_version" |
| 83 | + echo "==============================================" |
| 84 | + ./mvnw install -DskipTests -Pspark-$spark_version |
| 85 | +} |
| 86 | + |
| 87 | +# Regenerate golden files for a specific Spark version |
| 88 | +regenerate_golden_files() { |
| 89 | + local spark_version=$1 |
| 90 | + |
| 91 | + echo "" |
| 92 | + echo "==============================================" |
| 93 | + echo "[INFO] Regenerating golden files for Spark $spark_version" |
| 94 | + echo "==============================================" |
| 95 | + |
| 96 | + echo "[INFO] Running CometTPCDSV1_4_PlanStabilitySuite..." |
| 97 | + SPARK_GENERATE_GOLDEN_FILES=1 ./mvnw -pl spark \ |
| 98 | + -Dsuites="org.apache.spark.sql.comet.CometTPCDSV1_4_PlanStabilitySuite" \ |
| 99 | + -Pspark-$spark_version -nsu test |
| 100 | + |
| 101 | + echo "[INFO] Running CometTPCDSV2_7_PlanStabilitySuite..." |
| 102 | + SPARK_GENERATE_GOLDEN_FILES=1 ./mvnw -pl spark \ |
| 103 | + -Dsuites="org.apache.spark.sql.comet.CometTPCDSV2_7_PlanStabilitySuite" \ |
| 104 | + -Pspark-$spark_version -nsu test |
| 105 | +} |
| 106 | + |
| 107 | +# Main script |
| 108 | +main() { |
| 109 | + local target_version="" |
| 110 | + |
| 111 | + # Parse command line arguments |
| 112 | + while [[ $# -gt 0 ]]; do |
| 113 | + case $1 in |
| 114 | + --spark-version) |
| 115 | + target_version="$2" |
| 116 | + shift 2 |
| 117 | + ;; |
| 118 | + -h|--help) |
| 119 | + echo "Usage: $0 [--spark-version <version>]" |
| 120 | + echo "" |
| 121 | + echo "Options:" |
| 122 | + echo " --spark-version <version> Only regenerate for specified Spark version (3.4, 3.5, or 4.0)" |
| 123 | + echo " If not specified, regenerates for all versions." |
| 124 | + exit 0 |
| 125 | + ;; |
| 126 | + *) |
| 127 | + echo "[ERROR] Unknown option: $1" |
| 128 | + echo "Use --help for usage information" |
| 129 | + exit 1 |
| 130 | + ;; |
| 131 | + esac |
| 132 | + done |
| 133 | + |
| 134 | + # Validate target version if specified |
| 135 | + if [ -n "$target_version" ]; then |
| 136 | + if [[ ! "$target_version" =~ ^(3\.4|3\.5|4\.0)$ ]]; then |
| 137 | + echo "[ERROR] Invalid Spark version: $target_version" |
| 138 | + echo "[ERROR] Supported versions: 3.4, 3.5, 4.0" |
| 139 | + exit 1 |
| 140 | + fi |
| 141 | + fi |
| 142 | + |
| 143 | + check_repo_root |
| 144 | + check_jdk_version |
| 145 | + |
| 146 | + # Set SPARK_HOME to current directory (required for golden file output) |
| 147 | + export SPARK_HOME=$(pwd) |
| 148 | + echo "[INFO] SPARK_HOME set to: $SPARK_HOME" |
| 149 | + |
| 150 | + # Build native code first |
| 151 | + build_native |
| 152 | + |
| 153 | + # Determine which versions to process |
| 154 | + local versions |
| 155 | + if [ -n "$target_version" ]; then |
| 156 | + versions=("$target_version") |
| 157 | + else |
| 158 | + versions=("3.4" "3.5" "4.0") |
| 159 | + fi |
| 160 | + |
| 161 | + # Install and regenerate for each version |
| 162 | + for version in "${versions[@]}"; do |
| 163 | + install_for_spark_version "$version" |
| 164 | + regenerate_golden_files "$version" |
| 165 | + done |
| 166 | + |
| 167 | + echo "" |
| 168 | + echo "==============================================" |
| 169 | + echo "[INFO] Golden file regeneration complete!" |
| 170 | + echo "==============================================" |
| 171 | + echo "" |
| 172 | + echo "The golden files have been updated in:" |
| 173 | + echo " spark/src/test/resources/tpcds-plan-stability/" |
| 174 | + echo "" |
| 175 | + echo "Please review the changes with 'git diff' before committing." |
| 176 | +} |
| 177 | + |
| 178 | +main "$@" |
0 commit comments