Skip to content

Commit d793744

Browse files
authored
[CORE] Use build/mvn wrapper for scheduled jobs and in Dockerfiles (#11515)
* [CORE] Use build/mvn wrapper and remove setup-helper.sh - Remove SETUP env var and setup-helper.sh dependency from workflows - Replace raw mvn commands with ./build/mvn wrapper - Remove manual Maven installation (MAVEN_HOME/PATH exports) - Remove wget from yum install since Maven download is no longer needed - Rename 'Setup java and maven' steps to 'Setup java' - Update all Dockerfiles to use ./build/mvn: - Dockerfile.centos7-static-build - Dockerfile.centos7-gcc13-static-build - Dockerfile.centos8-static-build - Dockerfile.centos8-gcc13-static-build - Dockerfile.centos8-dynamic-build - Dockerfile.centos9-static-build - Dockerfile.centos9-dynamic-build - Dockerfile.cudf (removed system maven install) - Remove inline Maven download/install code from dynamic-build Dockerfiles - Delete .github/workflows/util/setup-helper.sh (no longer needed) * [CORE] Fix MVN_CMD path for subdirectory execution * [CORE] Use absolute path for build/mvn in openeuler jobs * [CORE] Inline Hadoop/HDFS setup in velox_backend_x86.yml * [GLUTEN-11515][CORE] Fix install-resources.sh to allow sourcing for function definitions Move install_hadoop and setup_hdfs functions before case statement, and wrap case statement in BASH_SOURCE check so the script can be sourced to access functions without immediately executing the case statement.
1 parent 3311405 commit d793744

18 files changed

+181
-282
lines changed

.github/workflows/build_bundle_package.yml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ name: Build bundle package
1818
env:
1919
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
2020
CCACHE_DIR: "${{ github.workspace }}/.ccache"
21-
SETUP: 'bash .github/workflows/util/setup-helper.sh'
2221

2322
concurrency:
2423
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
@@ -95,16 +94,15 @@ jobs:
9594
with:
9695
name: velox-arrow-jar-centos-7-${{github.sha}}
9796
path: /root/.m2/repository/org/apache/arrow/
98-
- name: Setup java and maven
97+
- name: Setup java
9998
run: |
10099
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && \
101100
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* && \
102-
yum update -y && yum install -y java-1.8.0-openjdk-devel wget
103-
$SETUP install_maven
101+
yum update -y && yum install -y java-1.8.0-openjdk-devel
104102
- name: Build for Spark ${{ github.event.inputs.spark }}
105103
run: |
106104
cd $GITHUB_WORKSPACE/ && \
107-
mvn clean install -P${{ github.event.inputs.spark }} -Dhadoop.version=${{ github.event.inputs.hadoop }} -Pbackends-velox -Pceleborn -Puniffle -DskipTests -Dmaven.source.skip
105+
./build/mvn clean install -P${{ github.event.inputs.spark }} -Dhadoop.version=${{ github.event.inputs.hadoop }} -Pbackends-velox -Pceleborn -Puniffle -DskipTests -Dmaven.source.skip
108106
- name: Upload bundle package
109107
uses: actions/upload-artifact@v4
110108
with:

.github/workflows/docker_image.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ on:
2121
- main
2222
paths:
2323
- '.github/workflows/docker_image.yml'
24-
- '.github/workflows/util/install-spark-resources.sh'
24+
- '.github/workflows/util/install-resources.sh'
2525
- 'dev/docker/*'
2626
- 'dev/docker/cudf/*'
2727
schedule:

.github/workflows/flink.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ on:
1919
pull_request:
2020
paths:
2121
- '.github/workflows/flink.yml'
22+
- 'build/mvn'
2223
- 'gluten-flink/**'
2324

2425
env:
@@ -63,16 +64,16 @@ jobs:
6364
git clone -b gluten-0530 https://github.com/bigo-sg/velox4j.git
6465
cd velox4j && git reset --hard 288d181a1b05c47f1f17339eb498dd6375f7aec8
6566
git apply $GITHUB_WORKSPACE/gluten-flink/patches/fix-velox4j.patch
66-
mvn clean install -DskipTests -Dgpg.skip -Dspotless.skip=true
67+
$GITHUB_WORKSPACE/build/mvn clean install -DskipTests -Dgpg.skip -Dspotless.skip=true
6768
cd ..
6869
git clone https://github.com/nexmark/nexmark.git
6970
cd nexmark
70-
mvn clean install -DskipTests
71+
$GITHUB_WORKSPACE/build/mvn clean install -DskipTests
7172
- name: Build Gluten Flink
7273
run: |
7374
cd $GITHUB_WORKSPACE/gluten-flink
74-
mvn clean package -Dmaven.test.skip=true
75+
$GITHUB_WORKSPACE/build/mvn clean package -Dmaven.test.skip=true
7576
- name: Run Unit Tests
7677
run: |
7778
cd $GITHUB_WORKSPACE/gluten-flink
78-
mvn test
79+
$GITHUB_WORKSPACE/build/mvn test

.github/workflows/scala_code_format.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,6 @@ concurrency:
3737
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
3838
cancel-in-progress: true
3939

40-
env:
41-
SETUP: 'bash .github/workflows/util/setup-helper.sh'
42-
4340
jobs:
4441

4542
scala-format-check:

.github/workflows/util/install-spark-resources.sh renamed to .github/workflows/util/install-resources.sh

Lines changed: 116 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,83 @@
1616

1717
# Download Spark resources, required by some Spark UTs. The resource path should be set
1818
# for spark.test.home in mvn test.
19+
#
20+
# This file can be:
21+
# 1. Executed directly: ./install-resources.sh <spark-version> [install-dir]
22+
# 2. Sourced to use functions: source install-resources.sh; install_hadoop; setup_hdfs
1923

2024
set -e
2125

26+
# Install Hadoop binary
27+
function install_hadoop() {
28+
echo "Installing Hadoop..."
29+
30+
apt-get update -y
31+
apt-get install -y curl tar gzip
32+
33+
local HADOOP_VERSION=3.3.6
34+
curl -fsSL -o hadoop.tgz "https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz"
35+
tar -xzf hadoop.tgz --no-same-owner --no-same-permissions
36+
rm -f hadoop.tgz
37+
38+
export HADOOP_HOME="$PWD/hadoop-${HADOOP_VERSION}"
39+
export PATH="$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH"
40+
export LD_LIBRARY_PATH="$HADOOP_HOME/lib/native:$LD_LIBRARY_PATH"
41+
42+
if [ -n "$GITHUB_ENV" ]; then
43+
echo "HADOOP_HOME=$HADOOP_HOME" >> $GITHUB_ENV
44+
echo "LD_LIBRARY_PATH=$HADOOP_HOME/lib/native:$LD_LIBRARY_PATH" >> $GITHUB_ENV
45+
echo "$HADOOP_HOME/bin" >> $GITHUB_PATH
46+
fi
47+
}
48+
49+
# Setup HDFS namenode and datanode
50+
function setup_hdfs() {
51+
export HADOOP_CONF_DIR="$HADOOP_HOME/etc/hadoop"
52+
53+
cat > "$HADOOP_CONF_DIR/core-site.xml" <<'EOF'
54+
<configuration>
55+
<property>
56+
<name>fs.defaultFS</name>
57+
<value>hdfs://localhost:9000</value>
58+
</property>
59+
</configuration>
60+
EOF
61+
62+
cat > "$HADOOP_CONF_DIR/hdfs-site.xml" <<'EOF'
63+
<configuration>
64+
<property><name>dfs.replication</name><value>1</value></property>
65+
<property><name>dfs.namenode.rpc-address</name><value>localhost:9000</value></property>
66+
<property><name>dfs.namenode.http-address</name><value>localhost:9870</value></property>
67+
<property><name>dfs.datanode.address</name><value>localhost:9866</value></property>
68+
<property><name>dfs.datanode.http.address</name><value>localhost:9864</value></property>
69+
<property><name>dfs.permissions.enabled</name><value>false</value></property>
70+
</configuration>
71+
EOF
72+
73+
HDFS_TMP="${RUNNER_TEMP:-/tmp}/hdfs"
74+
mkdir -p "$HDFS_TMP/nn" "$HDFS_TMP/dn"
75+
76+
perl -0777 -i -pe 's#</configuration># <property>\n <name>dfs.namenode.name.dir</name>\n <value>file:'"$HDFS_TMP"'/nn</value>\n </property>\n <property>\n <name>dfs.datanode.data.dir</name>\n <value>file:'"$HDFS_TMP"'/dn</value>\n </property>\n</configuration>#s' \
77+
"$HADOOP_CONF_DIR/hdfs-site.xml"
78+
79+
if [ -n "${GITHUB_ENV:-}" ]; then
80+
echo "HADOOP_CONF_DIR=$HADOOP_CONF_DIR" >> "$GITHUB_ENV"
81+
echo "HADOOP_HOME=$HADOOP_HOME" >> "$GITHUB_ENV"
82+
fi
83+
84+
"$HADOOP_HOME/bin/hdfs" namenode -format -force -nonInteractive
85+
"$HADOOP_HOME/sbin/hadoop-daemon.sh" start namenode
86+
"$HADOOP_HOME/sbin/hadoop-daemon.sh" start datanode
87+
88+
for i in {1..60}; do
89+
"$HADOOP_HOME/bin/hdfs" dfs -ls / >/dev/null 2>&1 && break
90+
sleep 1
91+
done
92+
93+
"$HADOOP_HOME/bin/hdfs" dfs -ls /
94+
}
95+
2296
# Installs Spark binary and source releases with:
2397
# 1 - spark version
2498
# 2 - hadoop version
@@ -85,42 +159,45 @@ function install_spark() {
85159
rm -rf "${local_source}"
86160
}
87161

88-
INSTALL_DIR=${2:-/opt/}
89-
mkdir -p ${INSTALL_DIR}
90-
91-
case "$1" in
92-
3.3)
93-
# Spark-3.3
94-
cd ${INSTALL_DIR} && \
95-
install_spark "3.3.1" "3" "2.12"
96-
;;
97-
3.4)
98-
# Spark-3.4
99-
cd ${INSTALL_DIR} && \
100-
install_spark "3.4.4" "3" "2.12"
101-
;;
102-
3.5)
103-
# Spark-3.5
104-
cd ${INSTALL_DIR} && \
105-
install_spark "3.5.5" "3" "2.12"
106-
;;
107-
3.5-scala2.13)
108-
# Spark-3.5, scala 2.13
109-
cd ${INSTALL_DIR} && \
110-
install_spark "3.5.5" "3" "2.13"
111-
;;
112-
4.0)
113-
# Spark-4.0, scala 2.12 // using 2.12 as a hack as 4.0 does not have 2.13 suffix
114-
cd ${INSTALL_DIR} && \
115-
install_spark "4.0.1" "3" "2.12"
116-
;;
117-
4.1)
118-
# Spark-4.x, scala 2.12 // using 2.12 as a hack as 4.0 does not have 2.13 suffix
119-
cd ${INSTALL_DIR} && \
120-
install_spark "4.1.1" "3" "2.12"
121-
;;
122-
*)
123-
echo "Spark version is expected to be specified."
124-
exit 1
125-
;;
126-
esac
162+
# Only run install_spark when script is executed directly (not sourced)
163+
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
164+
INSTALL_DIR=${2:-/opt/}
165+
mkdir -p ${INSTALL_DIR}
166+
167+
case "$1" in
168+
3.3)
169+
# Spark-3.3
170+
cd ${INSTALL_DIR} && \
171+
install_spark "3.3.1" "3" "2.12"
172+
;;
173+
3.4)
174+
# Spark-3.4
175+
cd ${INSTALL_DIR} && \
176+
install_spark "3.4.4" "3" "2.12"
177+
;;
178+
3.5)
179+
# Spark-3.5
180+
cd ${INSTALL_DIR} && \
181+
install_spark "3.5.5" "3" "2.12"
182+
;;
183+
3.5-scala2.13)
184+
# Spark-3.5, scala 2.13
185+
cd ${INSTALL_DIR} && \
186+
install_spark "3.5.5" "3" "2.13"
187+
;;
188+
4.0)
189+
# Spark-4.0, scala 2.12 // using 2.12 as a hack as 4.0 does not have 2.13 suffix
190+
cd ${INSTALL_DIR} && \
191+
install_spark "4.0.1" "3" "2.12"
192+
;;
193+
4.1)
194+
# Spark-4.x, scala 2.12 // using 2.12 as a hack as 4.0 does not have 2.13 suffix
195+
cd ${INSTALL_DIR} && \
196+
install_spark "4.1.1" "3" "2.12"
197+
;;
198+
*)
199+
echo "Spark version is expected to be specified."
200+
exit 1
201+
;;
202+
esac
203+
fi

.github/workflows/util/setup-helper.sh

Lines changed: 0 additions & 131 deletions
This file was deleted.

.github/workflows/velox_backend_enhanced.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ jobs:
229229
- name: Prepare Spark Resources for Spark 3.5.5
230230
run: |
231231
rm -rf /opt/shims/spark35
232-
bash .github/workflows/util/install-spark-resources.sh 3.5
232+
bash .github/workflows/util/install-resources.sh 3.5
233233
- name: Build and Run unit test for Spark 3.5.5 (slow tests)
234234
run: |
235235
cd $GITHUB_WORKSPACE/

0 commit comments

Comments
 (0)