-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathdeploy.sh
More file actions
438 lines (382 loc) · 14.7 KB
/
deploy.sh
File metadata and controls
438 lines (382 loc) · 14.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
#!/bin/bash
#########################################################################
# JupyterHub Environment and Kernel Deployment Script
#
# Purpose:
# Deploy a github repo to a new conda environment and JupyterHub kernel
# - Implements verification and logging capabilities
#
# Usage:
# bash ./deploy.sh -r <repo_name> -t <tag_name> [-c <conda_prefix>] [-j <jupyter_prefix>] [--dry-run] [--help]
##########################################################################
# -e = exit on error unless in a conditional expression
# -u = treat unset variables as an error
# -o pipefail = exit with error if any command in a pipeline fails--
# ensures error codes from upstream calls are passed through pipes
set -euo pipefail
# Configuration
LOG_FILE="deployment_$(date +%Y%m%d_%H%M%S).log"
# Function to show usage instructions
show_usage() {
cat << EOF
Usage: $0 -r <repo_name> -t <tag_name> [-c <conda_prefix>] [-j <jupyter_prefix>] [--dry-run] [--help]
Deploy a GitHub repo to a new Conda environment and JupyterHub kernel.
Required options:
-r, --repo <repo_name> GitHub repository name (e.g., MyUser/my-repo)
-t, --tag <tag_name> Git tag or branch to deploy (e.g., 2025.05.1+testdeploy)
Optional options:
-c, --conda-prefix <conda_dir> Directory in which to install the conda environment
(e.g., /bin/envs). Necessary for installing
environments to a system-wide shared location.
-j, --jupyter-prefix <jupyter_dir> Directory in which to install the Jupyter kernel spec
(e.g., /shared/local). Necessary for installing
kernels to a system-wide shared location.
-d, --dry-run Show what would happen without making changes
-h, --help Show this help message and exit
Examples:
$0 -r MyUser/my-repo -t 2025.05.1+testdeploy
$0 -r MyUser/my-repo -c /bin/envs -j /shared/local -t 2025.05.1+testdeploy --dry-run
EOF
exit 1
}
parse_args() {
# Default values
GITHUB_REPO=""
DEPLOY_TAG=""
KERNEL_PREFIX=""
CONDA_PREFIX=""
DRY_RUN=false
while [[ $# -gt 0 ]]; do
case "$1" in
-r|--repo)
GITHUB_REPO="$2"
shift 2
;;
-j|--jupyter-prefix)
KERNEL_PREFIX="$2"
KERNEL_PREFIX=$(echo "$KERNEL_PREFIX" | sed 's:/*$::') # Remove trailing slashes
shift 2
;;
-c|--conda-prefix)
CONDA_PREFIX="$2"
CONDA_PREFIX=$(echo "$CONDA_PREFIX" | sed 's:/*$::') # Remove trailing slashes
shift 2
;;
-t|--tag)
DEPLOY_TAG="$2"
shift 2
;;
-d|--dry-run)
DRY_RUN=true
log "INFO" "Dry run mode enabled - no changes will be made"
shift
;;
-h|--help)
show_usage
;;
*)
echo "Unrecognized input: $1"
show_usage
;;
esac
done
# Validate required arguments
if [[ -z "$GITHUB_REPO" || -z "$DEPLOY_TAG" ]]; then
echo "Error: --repo and --tag are required."
show_usage
fi
}
# Log message with level
log() {
local level=$1
local message=$2
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $message"
}
# Initialize logging
setup_logging() {
# capture any stdout and append to a log file while
# *also* writing to the screen; do the same with stderr
exec > >(tee -a "$LOG_FILE") 2>&1
log "INFO" "Starting deployment script..."
}
# Exit with error message and report deployment failure
error_exit() {
local message=$1
log "ERROR" "Deployment failed: $message"
exit 1
}
# Check dependencies
check_dependencies() {
log "INFO" "Checking dependencies..."
# NB: these are just the dependencies to run this script, not
# the dependencies for the lab notebooks
for cmd in conda git python; do
# check if command is available in shell's $PATH;
# don't want the output (path to command)--just the exit code
if ! command -v $cmd &> /dev/null; then
error_exit "Required command not found: $cmd"
fi
done
log "INFO" "All dependencies are available"
}
# Format kernels directory path
format_kernels_dir() {
local a_kernel_prefix="$1"
local kernels_dir=""
if [[ -n "$a_kernel_prefix" ]]; then
kernels_dir="$a_kernel_prefix/share/jupyter/kernels"
fi
echo "$kernels_dir"
}
# Check if kernel exists
kernel_exists() {
local kernel_name=$1
# if jupyter is available, use it to check for kernel existence since it
# will look in all the relevant places; this is preferable because jupyter
# DOES allow multiple kernels with the same name to exist in different
# locations, and it silently returns the first one it finds based on its
# internal search order, which could lead to very confusing bugs--so if
# ANY path that jupyter checks contains a kernel of the specified name,
# we want to refuse to make another one with the same name
if command -v jupyter &> /dev/null; then
local kernel_names
# Extract kernel names
kernel_names=$(jupyter kernelspec list | tail -n +2 | awk '{print $1}')
# Check if kernel_name exists in the list;
# -F = fixed string, not regex
# -x = matches whole line (exact match)
# -q = quiet mode, no output, just exit status
if echo "$kernel_names" | grep -Fxq "$kernel_name"; then
echo 1 # Kernel exists
return 0 # Function succeeded
fi
else
# If jupyter is not available, we have to fall back to the only method
# left to us, which is to check that there's no kernel of the specified
# name in the kernel directory the user specified--if they did specify
# one. If they didn't, we're basically out of luck for checking and
# will just assume the kernel doesn't exist.
local formatted_kernel_dir=""
formatted_kernel_dir=$(format_kernels_dir "$KERNEL_PREFIX")
if [[ -n "$formatted_kernel_dir" ]]; then
if [ ! -d "$formatted_kernel_dir" ]; then
# a kernel prefix was specified but it isn't a valid directory
return 1
fi
# Get all directories in the kernels directory under the specified prefix
for dir in "$formatted_kernel_dir"/*; do
if [ -d "$dir" ]; then
# Extract just the kernel name (basename)
local name=""
name=$(basename "$dir")
# Check if it matches the input
if [ "$name" = "$kernel_name" ]; then
echo 1 # Kernel exists
return 0 # Function succeeded
fi
fi
done
fi
fi
echo 0 # Kernel does not exist
return 0 # Function succeeded
}
# Undo creation of environment if downstream steps fail
rollback() {
local message=$1
local deploy_name=$2
log "WARNING" "Removing conda environment '$deploy_name' due to error..."
local conda_remove_cmd
# CONDA_LOC_CMD is set in the main function, before this call
conda_remove_cmd=(conda env remove "${CONDA_LOC_CMD[@]}" --yes)
if ! "${conda_remove_cmd[@]}"; then
log "WARNING" "Failed to remove environment with ${CONDA_LOC_CMD[*]}"
fi
error_exit "$message"
}
# Create and set up new environment
setup_new_environment() {
# Create environment name based on deploy type
log "INFO" "Setting up new environment '$DEPLOY_NAME'..."
local conda_install_cmd
local repo_install_cmd
local kernel_install_cmd
# CONDA_LOC_CMD is set in the main function, before this function is called
log "INFO" "Creating conda environment with ${CONDA_LOC_CMD[*]}"
if [ "$DRY_RUN" = true ]; then
log "INFO" "DRY RUN: Would create conda environment"
log "INFO" "DRY RUN: Would install requirements and repo '$GITHUB_REPO'"
log "INFO" "DRY RUN: Would install kernel '$DEPLOY_NAME'"
return
fi
# Clone the repository to get requirements
log "INFO" "Cloning repository to get requirements..."
# Note that lightweight cloning (e.g. --depth 1) that leaves out full history only works for lightweight (not annotated) tags
GITHUB_URL="https://github.com/$GITHUB_REPO"
git clone --depth 1 --branch "$DEPLOY_TAG" "$GITHUB_URL" "$SETUP_TEMP_DIR"
# Create new conda environment from environment.yml
local env_yml_path
env_yml_path="$SETUP_TEMP_DIR/environment.yml"
if [ -f "$env_yml_path" ]; then
log "INFO" "Found environment.yml, installing conda environment and dependencies..."
conda_install_cmd=(conda env create --file "$env_yml_path" "${CONDA_LOC_CMD[@]}")
if ! "${conda_install_cmd[@]}"; then
error_exit "Failed to install from environment.yml"
fi
else
error_exit "Could not find environment.yml"
fi
# Failures before this point just report an error and exit;
# after this point, we need to roll back the environment creation
# if anything fails. Note that kernel rollback (if necessary) is handled
# in the verify_environment function, which is called after this one.
# Install the repo
log "INFO" "Installing repo $GITHUB_REPO"
repo_install_cmd=(conda run "${CONDA_LOC_CMD[@]}" pip install "git+$GITHUB_URL@$DEPLOY_TAG")
if ! "${repo_install_cmd[@]}"; then
rollback "Failed to install repo" "$GITHUB_REPO"
fi
# Install the kernel; send to user-specified directory iff KERNEL_PREFIX is set else to new conda env
# Note that for all code running after this point, $KERNEL_PREFIX will ALWAYS be set (to something)
if [ -z "$KERNEL_PREFIX" ]; then
KERNEL_PREFIX=$(conda run "${CONDA_LOC_CMD[@]}" python -c 'import sys; print(sys.prefix)')
fi
log "INFO" "Installing kernel $DEPLOY_NAME in $KERNEL_PREFIX ..."
kernel_install_cmd=(conda run "${CONDA_LOC_CMD[@]}" python -m ipykernel install --name="$DEPLOY_NAME" --display-name="$DEPLOY_NAME" --prefix="$KERNEL_PREFIX")
if ! "${kernel_install_cmd[@]}"; then
rollback "Failed to install kernel" "$DEPLOY_NAME"
fi
}
# Verify a newly installed environment
verify_environment() {
local env_name=$1
local kernel_name=$2
log "INFO" "Verifying environment '$env_name' and kernel '$kernel_name'..."
# Check if environment exists; if it is a prefix-based environment,
# we have to look at the directory directly; if it is a named environment,
# we can use conda info --envs to check for its existence
if [[ -n "$CONDA_PREFIX" ]]; then
if [ ! -d "$CONDA_PATH/conda-meta" ]; then
log "ERROR" "Conda environment not found at prefix: $CONDA_PATH"
return 1
fi
else
# extract first column of conda info --envs output and match:
# -F = fixed string, not regex
# -x = matches whole line (exact match)
# -q = quiet mode, no output, just exit status
if ! conda info --envs | awk '{print $1}' | grep -Fxq "$env_name"; then
log "ERROR" "Named conda environment '$env_name' not found"
return 1
fi
fi
# Check if kernel we just tried to create in fact exists now
log "INFO" "Checking if kernel '$kernel_name' exists for prefix '$KERNEL_PREFIX'..."
exists=$(kernel_exists "$kernel_name")
# $? holds the exit code of the last command executed
if [ $? -ne 0 ]; then
log "ERROR" "Error checking kernel existence"
return 1
elif [ "$exists" -eq 0 ]; then
log "ERROR" "Kernel '$kernel_name' not found"
return 1
fi
# Create a temporary notebook to verify the kernel
local temp_notebook
temp_notebook="$VERIFY_TEMP_DIR/deploy_test.ipynb"
cat > "$temp_notebook" << EOF
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "c59cc569-40ad-4881-acde-f4099e79edbf",
"metadata": {},
"outputs": [],
"source": [
"print('Kernel verification successful')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "$kernel_name",
"language": "python",
"name": "$kernel_name"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
EOF
log "INFO" "Executing temporary notebook '$temp_notebook'..."
local return_value
# CONDA_LOC_CMD is set in the main function, before this function is called
local notebook_cmd=(conda run "${CONDA_LOC_CMD[@]}" jupyter nbconvert --to notebook --execute --ExecutePreprocessor.timeout=60 "$temp_notebook")
if ! "${notebook_cmd[@]}"; then
log "ERROR" "Kernel verification failed - kernel could not execute notebook"
log "WARNING" "Removing kernel '$kernel_name' due to error ..."
jupyter kernelspec remove -f "$kernel_name"
return_value=1
else
log "INFO" "Environment and kernel verification successful"
return_value=0
fi
return $return_value
}
# Main function
main() {
parse_args "$@"
setup_logging
check_dependencies
log "INFO" "Starting deployment for tag '$DEPLOY_TAG'..."
# Replace literal periods (.) and plus signs (+) in the tag name with underscores (_)
DEPLOY_NAME=$(echo "$DEPLOY_TAG" | sed 's/[.+]/_/g')
# Decide whether the new conda environment will be created as a named
# environment in the default location or as a prefix-based environment
# at the user-specified path
CONDA_LOC_CMD=(-n "$DEPLOY_NAME")
# Note: CONDA_PREFIX is the user-specified directory into which to add a new
# conda environment (if they specified one); if they did, then the
# CONDA_PATH is set to be the user-specified path plus the environment name,
# which is the actual location into which the environment will be installed.
if [[ -n "$CONDA_PREFIX" ]]; then
CONDA_PATH="$CONDA_PREFIX/$DEPLOY_NAME"
CONDA_LOC_CMD=(-p "$CONDA_PATH")
fi
# Check for existing kernel with the same name and error out if it exists
log "INFO" "Checking for pre-existing kernel '$DEPLOY_NAME'..."
exists=$(kernel_exists "$DEPLOY_NAME")
if [ $? -ne 0 ]; then
error_exit "Error checking kernel existence"
elif [ "$exists" -eq 1 ]; then
error_exit "Kernel '$DEPLOY_NAME' already exists"
fi
# Create a temp directory to hold the setup files and ensure it is cleaned up
# on exit, then set up the new environment
SETUP_TEMP_DIR=$(mktemp -d)
trap 'rm -rf "$SETUP_TEMP_DIR"' EXIT
setup_new_environment
# Verify the new environment and kernel
if [ "$DRY_RUN" = false ]; then
# Create a temp directory for verification files and ensure it is cleaned
# up on exit, then verify the new environment. NOT using the same temp
# directory as the one used for setup to ensure that the verification
# isn't incorrectly depending on any of the setup files.
VERIFY_TEMP_DIR=$(mktemp -d)
trap 'rm -rf "$SETUP_TEMP_DIR"; rm -rf "$VERIFY_TEMP_DIR"' EXIT
log "INFO" "Verifying new environment..."
# NB: double use of "$DEPLOY_NAME" is NOT a typo :)
if ! verify_environment "$DEPLOY_NAME" "$DEPLOY_NAME"; then
rollback "Environment verification failed" "$DEPLOY_NAME"
fi
else
log "INFO" "DRY RUN: Would verify environment '$DEPLOY_NAME' and kernel '$DEPLOY_NAME'"
fi
log "INFO" "Deployment successful!"
log "INFO" "New kernel '$DEPLOY_NAME' is using conda environment '$DEPLOY_NAME'"
log "INFO" "Log file: $LOG_FILE"
exit 0
}
# Execute main function with all arguments
main "$@"