forked from EESSI/software-layer-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbuild.sh
More file actions
executable file
·319 lines (272 loc) · 14.3 KB
/
build.sh
File metadata and controls
executable file
·319 lines (272 loc) · 14.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
#!/usr/bin/env bash
#
# script to build the EESSI software layer. Intended use is that it is called
# by a (batch) job running on a compute node.
#
# This script is part of the EESSI software layer, see
# https://github.com/EESSI/software-layer.git
#
# author: Thomas Roeblitz (@trz42)
#
# license: GPLv2
#
# ASSUMPTIONs:
# - working directory has been prepared by the bot with a checkout of a
# pull request (OR by some other means)
# - the working directory contains a directory 'cfg' where the main config
# file 'job.cfg' has been deposited
# - the directory may contain any additional files referenced in job.cfg
# stop as soon as something fails
set -e
# Make sure we are referring to software-layer as working directory
software_layer_dir=$(dirname $(dirname $(realpath $0)))
# source utils.sh and cfg_files.sh
source $software_layer_dir/scripts/utils.sh
source $software_layer_dir/scripts/cfg_files.sh
# defaults
export JOB_CFG_FILE="${JOB_CFG_FILE_OVERRIDE:=cfg/job.cfg}"
HOST_ARCH=$(uname -m)
# check if ${JOB_CFG_FILE} exists
if [[ ! -r "${JOB_CFG_FILE}" ]]; then
fatal_error "job config file (JOB_CFG_FILE=${JOB_CFG_FILE}) does not exist or not readable"
fi
echo "bot/build.sh: showing ${JOB_CFG_FILE} from software-layer side"
cat ${JOB_CFG_FILE}
echo "bot/build.sh: obtaining configuration settings from '${JOB_CFG_FILE}'"
cfg_load ${JOB_CFG_FILE}
# if http_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $http_proxy
HTTP_PROXY=$(cfg_get_value "site_config" "http_proxy")
HTTP_PROXY=${HTTP_PROXY:-${http_proxy}}
echo "bot/build.sh: HTTP_PROXY='${HTTP_PROXY}'"
# if https_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $https_proxy
HTTPS_PROXY=$(cfg_get_value "site_config" "https_proxy")
HTTPS_PROXY=${HTTPS_PROXY:-${https_proxy}}
echo "bot/build.sh: HTTPS_PROXY='${HTTPS_PROXY}'"
LOCAL_TMP=$(cfg_get_value "site_config" "local_tmp")
echo "bot/build.sh: LOCAL_TMP='${LOCAL_TMP}'"
# TODO should local_tmp be mandatory? --> then we check here and exit if it is not provided
# check if path to copy build logs to is specified, so we can copy build logs for failing builds there
BUILD_LOGS_DIR=$(cfg_get_value "site_config" "build_logs_dir")
echo "bot/build.sh: BUILD_LOGS_DIR='${BUILD_LOGS_DIR}'"
# if $BUILD_LOGS_DIR is set, add it to $SINGULARITY_BIND so the path is available in the build container
if [[ ! -z ${BUILD_LOGS_DIR} ]]; then
mkdir -p ${BUILD_LOGS_DIR}
if [[ -z ${SINGULARITY_BIND} ]]; then
export SINGULARITY_BIND="${BUILD_LOGS_DIR}"
else
export SINGULARITY_BIND="${SINGULARITY_BIND},${BUILD_LOGS_DIR}"
fi
fi
# check if path to directory on shared filesystem is specified,
# and use it as location for source tarballs used by EasyBuild if so
SHARED_FS_PATH=$(cfg_get_value "site_config" "shared_fs_path")
echo "bot/build.sh: SHARED_FS_PATH='${SHARED_FS_PATH}'"
# if $SHARED_FS_PATH is set, add it to $SINGULARITY_BIND so the path is available in the build container
if [[ ! -z ${SHARED_FS_PATH} ]]; then
mkdir -p ${SHARED_FS_PATH}
if [[ -z ${SINGULARITY_BIND} ]]; then
export SINGULARITY_BIND="${SHARED_FS_PATH}"
else
export SINGULARITY_BIND="${SINGULARITY_BIND},${SHARED_FS_PATH}"
fi
fi
SINGULARITY_CACHEDIR=$(cfg_get_value "site_config" "container_cachedir")
echo "bot/build.sh: SINGULARITY_CACHEDIR='${SINGULARITY_CACHEDIR}'"
if [[ ! -z ${SINGULARITY_CACHEDIR} ]]; then
# make sure that separate directories are used for different CPU families
SINGULARITY_CACHEDIR=${SINGULARITY_CACHEDIR}/${HOST_ARCH}
export SINGULARITY_CACHEDIR
fi
if [[ -z "${TMPDIR}" ]]; then
echo -n "setting \$STORAGE by replacing any var in '${LOCAL_TMP}' -> "
# replace any env variable in ${LOCAL_TMP} with its
# current value (e.g., a value that is local to the job)
STORAGE=$(envsubst <<< ${LOCAL_TMP})
else
STORAGE=${TMPDIR}
fi
echo "bot/build.sh: STORAGE='${STORAGE}'"
# make sure ${STORAGE} exists
mkdir -p ${STORAGE}
# make sure the base tmp storage is unique
JOB_STORAGE=$(mktemp --directory --tmpdir=${STORAGE} bot_job_tmp_XXX)
echo "bot/build.sh: created unique base tmp storage directory at ${JOB_STORAGE}"
# obtain list of modules to be loaded
LOAD_MODULES=$(cfg_get_value "site_config" "load_modules")
echo "bot/build.sh: LOAD_MODULES='${LOAD_MODULES}'"
# singularity/apptainer settings: CONTAINER, HOME, TMPDIR, BIND
CONTAINER=$(cfg_get_value "repository" "container")
export SINGULARITY_HOME="${PWD}:/eessi_bot_job"
export SINGULARITY_TMPDIR="${JOB_STORAGE}/singularity_tmpdir"
mkdir -p ${SINGULARITY_TMPDIR}
# load modules if LOAD_MODULES is not empty
if [[ ! -z ${LOAD_MODULES} ]]; then
IFS=',' read -r -a modules <<< "$(echo "${LOAD_MODULES}")"
for mod in "${modules[@]}";
do
echo "bot/build.sh: loading module '${mod}'"
module load ${mod}
done
else
echo "bot/build.sh: no modules to be loaded"
fi
# determine repository to be used from entry .repository in ${JOB_CFG_FILE}
REPOSITORY_ID=$(cfg_get_value "repository" "repo_id")
REPOSITORY_NAME=$(cfg_get_value "repository" "repo_name")
REPOSITORY_VERSION=$(cfg_get_value "repository" "repo_version")
EESSI_REPOS_CFG_DIR_OVERRIDE=$(cfg_get_value "repository" "repos_cfg_dir")
export EESSI_REPOS_CFG_DIR_OVERRIDE=${EESSI_REPOS_CFG_DIR_OVERRIDE:-${PWD}/cfg}
echo "bot/build.sh: EESSI_REPOS_CFG_DIR_OVERRIDE='${EESSI_REPOS_CFG_DIR_OVERRIDE}'"
# determine EESSI version to be used from .repository.repo_version in ${JOB_CFG_FILE}
# here, just set & export EESSI_VERSION_OVERRIDE
# next script (eessi_container.sh) makes use of it via sourcing init scripts
# (e.g., init/eessi_defaults or init/minimal_eessi_env)
export EESSI_VERSION_OVERRIDE=${REPOSITORY_VERSION}
echo "bot/build.sh: EESSI_VERSION_OVERRIDE='${EESSI_VERSION_OVERRIDE}'"
# determine CVMFS repo to be used from .repository.repo_name in ${JOB_CFG_FILE}
# here, just set EESSI_CVMFS_REPO_OVERRIDE, a bit further down
# "source init/eessi_defaults" via sourcing init/minimal_eessi_env
# Note: iff ${EESSI_DEV_PROJECT} is defined (building for dev.eessi.io), then we
# append the project subdirectory to ${EESSI_CVMFS_REPO_OVERRIDE}
export EESSI_CVMFS_REPO_OVERRIDE=/cvmfs/${REPOSITORY_NAME}${EESSI_DEV_PROJECT:+/$EESSI_DEV_PROJECT}
echo "bot/build.sh: EESSI_CVMFS_REPO_OVERRIDE='${EESSI_CVMFS_REPO_OVERRIDE}'"
# determine CPU architecture to be used from entry .architecture in ${JOB_CFG_FILE}
# fallbacks:
# - ${CPU_TARGET} handed over from bot
# - left empty to let downstream script(s) determine subdir to be used
EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(cfg_get_value "architecture" "software_subdir")
EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE:-${CPU_TARGET}}
export EESSI_SOFTWARE_SUBDIR_OVERRIDE
echo "bot/build.sh: EESSI_SOFTWARE_SUBDIR_OVERRIDE='${EESSI_SOFTWARE_SUBDIR_OVERRIDE}'"
# determine accelerator target (if any) from .architecture in ${JOB_CFG_FILE}
ACCEL_OVERRIDE=$(cfg_get_value "architecture" "accelerator")
if [[ -n "$ACCEL_OVERRIDE" ]]; then
# bot job config does not include accel subdirectory
export EESSI_ACCELERATOR_TARGET_OVERRIDE="accel/$ACCEL_OVERRIDE"
else
export EESSI_ACCELERATOR_TARGET_OVERRIDE=""
fi
echo "bot/build.sh: EESSI_ACCELERATOR_TARGET_OVERRIDE='${EESSI_ACCELERATOR_TARGET_OVERRIDE}'"
# Log the full lscpu and os-release info:
lscpu > _bot_job${SLURM_JOB_ID}.lscpu
cat /etc/os-release > _bot_job${SLURM_JOB_ID}.os
# Also: fetch CPU flags into an array, so that we can implement a hard check against a reference
lscpu_flags_line=$(lscpu | grep "Flags:")
# strip leading "Flags:" and spaces, and put result in a bash array
if [[ $lscpu_flags =~ Flags:\ (.*) ]]; then lscpu_flags=(${BASH_REMATCH[1]}); fi
# for now, just print
echo "bot/build.sh: CPU flags=${lscpu_flags[@]}"
# TODO: an actual comparison with a reference bash array, e.g. through
# diff_result=$(diff <(printf "%s\n" "${lscpu_flags[@]}" | sort) <(printf "%s\n" "${lscpu_flags_ref[@]}" | sort))
# if [ ! -z "$diff_result" ]; then
# echo "bot/build.sh: ERROR: difference between reported lscpu flags and reference for this ($EESSI_SOFTWARE_SUBDIR_OVERRIDE) CPU architecture. This could mean an incorrect build host was used to build for this target.
# fi
# get EESSI_OS_TYPE from .architecture.os_type in ${JOB_CFG_FILE} (default: linux)
EESSI_OS_TYPE=$(cfg_get_value "architecture" "os_type")
export EESSI_OS_TYPE=${EESSI_OS_TYPE:-linux}
echo "bot/build.sh: EESSI_OS_TYPE='${EESSI_OS_TYPE}'"
# prepare arguments to eessi_container.sh common to build and tarball steps
declare -a COMMON_ARGS=()
COMMON_ARGS+=("--verbose")
COMMON_ARGS+=("--access" "rw")
COMMON_ARGS+=("--mode" "run")
[[ ! -z ${CONTAINER} ]] && COMMON_ARGS+=("--container" "${CONTAINER}")
[[ ! -z ${HTTP_PROXY} ]] && COMMON_ARGS+=("--http-proxy" "${HTTP_PROXY}")
[[ ! -z ${HTTPS_PROXY} ]] && COMMON_ARGS+=("--https-proxy" "${HTTPS_PROXY}")
[[ ! -z ${REPOSITORY_ID} ]] && COMMON_ARGS+=("--repository" "${REPOSITORY_ID}")
# Also expose software.eessi.io when configured for dev.eessi.io
# Need software.eessi.io for the compat layer
if [[ "${REPOSITORY_NAME}" == "dev.eessi.io" ]]; then
COMMON_ARGS+=("--repository" "software.eessi.io,access=ro")
fi
# add $software_layer_dir and /dev as extra bind paths
# - $software_layer_dir is needed because it is used as prefix for running scripts
# - /dev is needed to access /dev/fuse
COMMON_ARGS+=("--extra-bind-paths" "${software_layer_dir},/dev")
# pass through '--contain' to avoid leaking in scripts into the container session
# note, --pass-through can be used multiple times if needed
COMMON_ARGS+=("--pass-through" "--contain")
# make sure to use the same parent dir for storing tarballs of tmp
PREVIOUS_TMP_DIR=${PWD}/previous_tmp
# Skip CUDA installation for riscv.eessi.io
if [[ "${REPOSITORY_NAME}" == "riscv.eessi.io" ]]; then
echo "bot/build.sh: disabling CUDA installation for RISC-V repository (${REPOSITORY_NAME})"
INSTALL_SCRIPT_ARGS+=("--skip-cuda-install")
fi
# prepare directory to store tarball of tmp for build step
TARBALL_TMP_BUILD_STEP_DIR=${PREVIOUS_TMP_DIR}/build_step
mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR}
# prepare arguments to eessi_container.sh specific to build step
declare -a BUILD_STEP_ARGS=()
BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}")
BUILD_STEP_ARGS+=("--storage" "${STORAGE}")
# add options required to handle NVIDIA support
if nvidia_gpu_available; then
BUILD_STEP_ARGS+=("--nvidia" "all")
else
BUILD_STEP_ARGS+=("--nvidia" "install")
fi
# Retain location for host injections so we don't reinstall CUDA
# (Always need to run the driver installation as available driver may change)
if [[ ! -z ${SHARED_FS_PATH} ]]; then
BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections")
fi
# prepare arguments to install_software_layer.sh (specific to build step)
declare -a INSTALL_SCRIPT_ARGS=()
if [[ ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} =~ .*/generic$ ]]; then
INSTALL_SCRIPT_ARGS+=("--generic")
fi
[[ ! -z ${BUILD_LOGS_DIR} ]] && INSTALL_SCRIPT_ARGS+=("--build-logs-dir" "${BUILD_LOGS_DIR}")
[[ ! -z ${SHARED_FS_PATH} ]] && INSTALL_SCRIPT_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}")
# create tmp file for output of build step
build_outerr=$(mktemp build.outerr.XXXX)
echo "Executing command to build software:"
echo "$software_layer_dir/eessi_container.sh ${COMMON_ARGS[@]} ${BUILD_STEP_ARGS[@]}"
echo " -- $software_layer_dir/install_software_layer.sh \"${INSTALL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${build_outerr}"
$software_layer_dir/eessi_container.sh "${COMMON_ARGS[@]}" "${BUILD_STEP_ARGS[@]}" \
-- $software_layer_dir/install_software_layer.sh "${INSTALL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${build_outerr}
# prepare directory to store tarball of tmp for tarball step
TARBALL_TMP_TARBALL_STEP_DIR=${PREVIOUS_TMP_DIR}/tarball_step
mkdir -p ${TARBALL_TMP_TARBALL_STEP_DIR}
# create tmp file for output of tarball step
tar_outerr=$(mktemp tar.outerr.XXXX)
# prepare arguments to eessi_container.sh specific to tarball step
declare -a TARBALL_STEP_ARGS=()
TARBALL_STEP_ARGS+=("--save" "${TARBALL_TMP_TARBALL_STEP_DIR}")
# determine temporary directory to resume from
BUILD_TMPDIR=$(grep ' as tmp directory ' ${build_outerr} | cut -d ' ' -f 2)
TARBALL_STEP_ARGS+=("--resume" "${BUILD_TMPDIR}")
timestamp=$(date +%s)
# determine compression/extension for tarball, check in order of preference
if [[ -x "$(command -v zstd)" ]]; then
tarball_extension="tar.zst"
elif [[ -x "$(command -v gzip)" ]]; then
tarball_extension="tar.gz"
else
tarball_extension="tar"
fi
# to set EESSI_VERSION we need to source init/eessi_defaults now
source $software_layer_dir/init/eessi_defaults
# Note: if ${EESSI_DEV_PROJECT} is defined (building for dev.eessi.io), then we
# append the project (subdirectory) name to the end tarball name. This is information
# then used at the ingestion stage. If ${EESSI_DEV_PROJECT} is not defined, nothing is
# appended
if [[ -z ${EESSI_ACCELERATOR_TARGET_OVERRIDE} ]]; then
export TARBALL=$(printf "eessi-%s-software-%s-%s-%b%d.${tarball_extension}" ${EESSI_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${EESSI_DEV_PROJECT:+$EESSI_DEV_PROJECT-} ${timestamp})
else
export TARBALL=$(printf "eessi-%s-software-%s-%s-%s-%b%d.${tarball_extension}" ${EESSI_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${EESSI_ACCELERATOR_TARGET_OVERRIDE//\//-} ${EESSI_DEV_PROJECT:+$EESSI_DEV_PROJECT-} ${timestamp})
fi
# Export EESSI_DEV_PROJECT to use it (if needed) when making tarball
echo "bot/build.sh: EESSI_DEV_PROJECT='${EESSI_DEV_PROJECT}'"
export EESSI_DEV_PROJECT=${EESSI_DEV_PROJECT}
# value of first parameter to create_tarball.sh - TMP_IN_CONTAINER - needs to be
# synchronised with setting of TMP_IN_CONTAINER in eessi_container.sh
# TODO should we make this a configurable parameter of eessi_container.sh using
# /tmp as default?
TMP_IN_CONTAINER=/tmp
echo "Executing command to create tarball:"
echo "$software_layer_dir/eessi_container.sh ${COMMON_ARGS[@]} ${TARBALL_STEP_ARGS[@]}"
echo " -- $software_layer_dir/create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} \"${EESSI_ACCELERATOR_TARGET_OVERRIDE}\" /eessi_bot_job/${TARBALL} 2>&1 | tee -a ${tar_outerr}"
$software_layer_dir/eessi_container.sh "${COMMON_ARGS[@]}" "${TARBALL_STEP_ARGS[@]}" \
-- $software_layer_dir/create_tarball.sh ${TMP_IN_CONTAINER} ${EESSI_VERSION} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} "${EESSI_ACCELERATOR_TARGET_OVERRIDE}" /eessi_bot_job/${TARBALL} 2>&1 | tee -a ${tar_outerr}
exit 0