Skip to content

Commit 2ef8241

Browse files
committed
More updates to script
1 parent 0abb9ee commit 2ef8241

File tree

1 file changed

+86
-81
lines changed

1 file changed

+86
-81
lines changed
Lines changed: 86 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,104 +1,109 @@
11
#!/bin/bash
2-
set -e
32

43
alias gcurl='curl -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json"'
54

6-
readarray -t notebooks < .cloud-build/Notebooks.txt
7-
NOTEBOOK_RUNTIME_TEMPLATE=$(<NOTEBOOK_RUNTIME_TEMPLATE)
8-
OUTPUT_URI=$(<OUTPUT_URI)
9-
SA=$(<SA)
10-
PROJECT_ID=$(<PROJECT_ID)
11-
REGION=$(<REGION)
12-
PUBSUB_TOPIC=$(<PS_TOPIC)
13-
14-
DATE=$(date +%Y-%m-%d)
15-
TIME=$(date +%H-%M-%S)
16-
TIMESTAMP=$(date "+%B %d %Y %H:%M:%S")
17-
18-
declare -A operation_map
19-
declare -a pending completed_success completed_failure
20-
21-
launch_notebook() {
22-
local path="$1"
23-
local name="${path##*/}"
24-
local display_name="${name%.ipynb}-$DATE-$TIME"
25-
26-
echo "Launching: $path"
27-
local operation_id=$(gcloud colab executions create \
28-
--display-name="$display_name" \
5+
TARGET=$(cat .cloud-build/Notebooks.txt)
6+
7+
current_date=$(date +%Y-%m-%d)
8+
current_time=$(date +%H-%M-%S)
9+
current_time_readable=$(date "+%B %d %Y %H:%M:%S")
10+
11+
NOTEBOOK_RUNTIME_TEMPLATE=$(cat NOTEBOOK_RUNTIME_TEMPLATE)
12+
OUTPUT_URI=$(cat OUTPUT_URI)
13+
SA=$(cat SA)
14+
PROJECT_ID=$(cat PROJECT_ID)
15+
REGION=$(cat REGION)
16+
PUBSUB_TOPIC=$(cat PS_TOPIC)
17+
18+
failed_count=0
19+
successful_count=0
20+
declare -a failed_notebooks
21+
declare -a successful_notebooks
22+
23+
MAX_PARALLEL_JOBS=5
24+
joblist=()
25+
26+
# Function to run a single notebook
27+
run_notebook() {
28+
local x="$1"
29+
local current_date="$2"
30+
local current_time="$3"
31+
32+
DISPLAY_NAME="${x##generative-ai/}"
33+
DISPLAY_NAME="${DISPLAY_NAME%.ipynb}-$current_date-$current_time"
34+
echo "Starting execution for ${x}"
35+
36+
OPERATION_ID=$(gcloud colab executions create \
37+
--display-name="$DISPLAY_NAME" \
2938
--notebook-runtime-template="$NOTEBOOK_RUNTIME_TEMPLATE" \
30-
--direct-content="$path" \
39+
--direct-content="$x" \
3140
--gcs-output-uri="$OUTPUT_URI" \
3241
--project="$PROJECT_ID" \
3342
--region="$REGION" \
3443
--service-account="$SA" \
3544
--execution-timeout="1h30m" \
3645
--format="value(name)")
3746

38-
local id=$(basename "$operation_id")
39-
operation_map["$path"]="$id"
40-
pending+=("$path")
41-
}
42-
43-
monitor_executions() {
44-
while [[ ${#pending[@]} -gt 0 ]]; do
45-
echo "Waiting for ${#pending[@]} notebooks..."
46-
47-
local still_pending=()
48-
49-
for path in "${pending[@]}"; do
50-
local id="${operation_map["$path"]}"
51-
52-
local status=$(gcloud colab executions describe "$id" --region="$REGION" --format="value(jobState)" 2>/dev/null || echo "JOB_STATE_FAILED")
53-
54-
case "$status" in
55-
JOB_STATE_SUCCEEDED)
56-
echo "Success: $path"
57-
completed_success+=("$path")
58-
;;
59-
JOB_STATE_FAILED | *_CANCELLED | *_UNSPECIFIED)
60-
echo "Failure: $path ($status)"
61-
completed_failure+=("$path")
62-
;;
63-
*)
64-
echo "Still running: $path ($status)"
65-
still_pending+=("$path")
66-
;;
67-
esac
68-
done
69-
70-
pending=("${still_pending[@]}")
71-
[[ ${#pending[@]} -gt 0 ]] && sleep 60
72-
done
73-
}
47+
TRUNCATED_OPERATION_ID=$(echo "$OPERATION_ID" | cut -c 67-85)
7448

75-
publish_results() {
76-
local total=${#notebooks[@]}
77-
local failed=${#completed_failure[@]}
78-
local passed=${#completed_success[@]}
49+
if ! EXECUTION_DETAILS=$(gcloud colab executions describe "$TRUNCATED_OPERATION_ID" --region="$REGION"); then
50+
echo "Error describing execution for ${x}" >&2
51+
echo "fail:$x"
52+
return
53+
fi
7954

80-
printf "%s\n" "${completed_failure[@]}" > /workspace/Failure.txt
55+
JOB_STATE=$(echo "$EXECUTION_DETAILS" | grep "jobState:" | awk '{print $2}')
56+
if [[ "$JOB_STATE" == "JOB_STATE_SUCCEEDED" ]]; then
57+
echo "success:$x"
58+
else
59+
echo "fail:$x"
60+
fi
61+
}
8162

82-
local fail_list=$(IFS=, ; echo "${completed_failure[*]}")
83-
local pass_list=$(IFS=, ; echo "${completed_success[*]}")
63+
# Parallel runner
64+
for x in $TARGET; do
65+
run_notebook "$x" "$current_date" "$current_time" > "result_$total_count.txt" 2>&1 &
8466

85-
local message="{\"total_count\":$total,\"failed_count\":$failed,\"failed_notebooks\":\"$fail_list\",\"successful_notebooks\":\"$pass_list\",\"successful_count\":$passed,\"execution_date\":\"$TIMESTAMP\"}"
67+
joblist+=($!)
68+
total_count=$((total_count + 1))
8669

87-
echo "$(date) - INFO - Publishing results..."
88-
if ! gcloud pubsub topics publish "$PUBSUB_TOPIC" --message="$message" --project="$PROJECT_ID"; then
89-
echo "$(date) - ERROR - Failed to publish to Pub/Sub topic $PUBSUB_TOPIC"
70+
# Control concurrency
71+
if [[ ${#joblist[@]} -ge $MAX_PARALLEL_JOBS ]]; then
72+
wait -n
73+
joblist=($(jobs -p)) # prune finished jobs
9074
fi
91-
}
75+
done
9276

93-
echo "--- Launching notebooks ---"
94-
for nb in "${notebooks[@]}"; do
95-
[[ -n "$nb" ]] && launch_notebook "$nb"
77+
# Wait for all remaining jobs
78+
wait
79+
80+
# Collect results
81+
for result_file in result_*.txt; do
82+
if grep -q "^success:" "$result_file"; then
83+
notebook=$(grep "^success:" "$result_file" | cut -d':' -f2-)
84+
successful_notebooks+=("$notebook")
85+
successful_count=$((successful_count + 1))
86+
elif grep -q "^fail:" "$result_file"; then
87+
notebook=$(grep "^fail:" "$result_file" | cut -d':' -f2-)
88+
failed_notebooks+=("$notebook")
89+
failed_count=$((failed_count + 1))
90+
echo "- $notebook" | tee -a /workspace/Failure.txt
91+
fi
92+
rm "$result_file"
9693
done
9794

98-
echo "--- Monitoring executions ---"
99-
monitor_executions
95+
# Summary
96+
echo "Total successful notebook executions: $successful_count"
97+
echo "Total failed notebook executions: $failed_count"
98+
99+
# Publish result to Pub/Sub
100+
failed_notebooks_str=$(IFS=', '; echo "${failed_notebooks[*]}")
101+
successful_notebooks_str=$(IFS=', '; echo "${successful_notebooks[*]}")
102+
103+
message_data="{\"total_count\":$((total_count)),\"failed_count\":$((failed_count)),\"failed_notebooks\":\"${failed_notebooks_str}\",\"successful_notebooks\":\"${successful_notebooks_str}\",\"successful_count\":$((successful_count)),\"execution_date\":\"${current_time_readable}\"}"
100104

101-
echo "--- Publishing summary ---"
102-
publish_results
105+
echo "$(date) - INFO - Publishing to Pub/Sub topic: $PUBSUB_TOPIC"
106+
gcloud pubsub topics publish "$PUBSUB_TOPIC" --message="$message_data" --project="$PROJECT_ID" || \
107+
echo "$(date) - ERROR - Failed to publish to Pub/Sub"
103108

104-
echo "Done. Success: ${#completed_success[@]}, Failures: ${#completed_failure[@]}"
109+
echo "All notebook executions completed."

0 commit comments

Comments
 (0)