|
1 | 1 | #!/bin/bash |
| 2 | +set -e |
2 | 3 |
|
3 | 4 | alias gcurl='curl -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json"' |
4 | 5 |
|
5 | | -TARGET=$(cat .cloud-build/Notebooks.txt) |
6 | | - |
7 | | -current_date=$(date +%Y-%m-%d) |
8 | | -current_time=$(date +%H-%M-%S) |
9 | | -current_time_readable=$(date "+%B %d %Y %H:%M:%S") |
10 | | - |
11 | | -NOTEBOOK_RUNTIME_TEMPLATE=$(cat NOTEBOOK_RUNTIME_TEMPLATE) |
12 | | -OUTPUT_URI=$(cat OUTPUT_URI) |
13 | | -SA=$(cat SA) |
14 | | -PROJECT_ID=$(cat PROJECT_ID) |
15 | | -REGION=$(cat REGION) |
16 | | -PUBSUB_TOPIC=$(cat PS_TOPIC) |
17 | | - |
18 | | -failed_count=0 |
19 | | -failed_notebooks=() |
20 | | -total_count=0 |
21 | | -successful_notebooks=() |
22 | | -successful_count=0 |
23 | | - |
24 | | -for x in $TARGET; do |
25 | | - total_count=$((total_count + 1)) |
26 | | - # Use the full path from the repository for display name |
27 | | - DISPLAY_NAME="${x##generative-ai/}" |
28 | | - DISPLAY_NAME="${DISPLAY_NAME%.ipynb}-$current_date-$current_time" |
29 | | - echo "Starting execution for ${x}" |
30 | | - |
31 | | - # Execute and get the operation ID |
32 | | - OPERATION_ID=$(gcloud colab executions create \ |
33 | | - --display-name="$DISPLAY_NAME" \ |
| 6 | +readarray -t notebooks < .cloud-build/Notebooks.txt |
| 7 | +NOTEBOOK_RUNTIME_TEMPLATE=$(<NOTEBOOK_RUNTIME_TEMPLATE) |
| 8 | +OUTPUT_URI=$(<OUTPUT_URI) |
| 9 | +SA=$(<SA) |
| 10 | +PROJECT_ID=$(<PROJECT_ID) |
| 11 | +REGION=$(<REGION) |
| 12 | +PUBSUB_TOPIC=$(<PS_TOPIC) |
| 13 | + |
| 14 | +DATE=$(date +%Y-%m-%d) |
| 15 | +TIME=$(date +%H-%M-%S) |
| 16 | +TIMESTAMP=$(date "+%B %d %Y %H:%M:%S") |
| 17 | + |
| 18 | +declare -A operation_map |
| 19 | +declare -a pending completed_success completed_failure |
| 20 | + |
| 21 | +launch_notebook() { |
| 22 | + local path="$1" |
| 23 | + local name="${path##*/}" |
| 24 | + local display_name="${name%.ipynb}-$DATE-$TIME" |
| 25 | + |
| 26 | + echo "Launching: $path" |
| 27 | + local operation_id=$(gcloud colab executions create \ |
| 28 | + --display-name="$display_name" \ |
34 | 29 | --notebook-runtime-template="$NOTEBOOK_RUNTIME_TEMPLATE" \ |
35 | | - --direct-content="$x" \ |
| 30 | + --direct-content="$path" \ |
36 | 31 | --gcs-output-uri="$OUTPUT_URI" \ |
37 | 32 | --project="$PROJECT_ID" \ |
38 | 33 | --region="$REGION" \ |
39 | 34 | --service-account="$SA" \ |
40 | | - --verbosity=debug \ |
41 | 35 | --execution-timeout="1h30m" \ |
42 | 36 | --format="value(name)") |
43 | 37 |
|
44 | | - echo "Operation ID: $OPERATION_ID" |
45 | | - TRUNCATED_OPERATION_ID=$(echo "$OPERATION_ID" | cut -c 67-85) |
46 | | - |
47 | | - # check job status |
48 | | - echo "Waiting for execution to complete..." |
49 | | - if ! EXECUTION_DETAILS=$(gcloud colab executions describe "$TRUNCATED_OPERATION_ID" --region="$REGION"); then |
50 | | - echo "Error describing execution for ${x}. See logs for details." |
51 | | - failed_count=$((failed_count + 1)) |
52 | | - failed_notebooks+=("${x}") |
53 | | - continue |
54 | | - else |
55 | | - echo "Execution completed for ${x}" |
56 | | - fi |
| 38 | + local id=$(basename "$operation_id") |
| 39 | + operation_map["$path"]="$id" |
| 40 | + pending+=("$path") |
| 41 | +} |
| 42 | + |
| 43 | +monitor_executions() { |
| 44 | + while [[ ${#pending[@]} -gt 0 ]]; do |
| 45 | + echo "Waiting for ${#pending[@]} notebooks..." |
| 46 | + |
| 47 | + local still_pending=() |
| 48 | + |
| 49 | + for path in "${pending[@]}"; do |
| 50 | + local id="${operation_map["$path"]}" |
| 51 | + |
| 52 | + local status=$(gcloud colab executions describe "$id" --region="$REGION" --format="value(jobState)" 2>/dev/null || echo "JOB_STATE_FAILED") |
| 53 | + |
| 54 | + case "$status" in |
| 55 | + JOB_STATE_SUCCEEDED) |
| 56 | + echo "Success: $path" |
| 57 | + completed_success+=("$path") |
| 58 | + ;; |
| 59 | + JOB_STATE_FAILED | *_CANCELLED | *_UNSPECIFIED) |
| 60 | + echo "Failure: $path ($status)" |
| 61 | + completed_failure+=("$path") |
| 62 | + ;; |
| 63 | + *) |
| 64 | + echo "Still running: $path ($status)" |
| 65 | + still_pending+=("$path") |
| 66 | + ;; |
| 67 | + esac |
| 68 | + done |
| 69 | + |
| 70 | + pending=("${still_pending[@]}") |
| 71 | + [[ ${#pending[@]} -gt 0 ]] && sleep 60 |
| 72 | + done |
| 73 | +} |
| 74 | + |
| 75 | +publish_results() { |
| 76 | + local total=${#notebooks[@]} |
| 77 | + local failed=${#completed_failure[@]} |
| 78 | + local passed=${#completed_success[@]} |
| 79 | + |
| 80 | + printf "%s\n" "${completed_failure[@]}" > /workspace/Failure.txt |
| 81 | + |
| 82 | + local fail_list=$(IFS=, ; echo "${completed_failure[*]}") |
| 83 | + local pass_list=$(IFS=, ; echo "${completed_success[*]}") |
| 84 | + |
| 85 | + local message="{\"total_count\":$total,\"failed_count\":$failed,\"failed_notebooks\":\"$fail_list\",\"successful_notebooks\":\"$pass_list\",\"successful_count\":$passed,\"execution_date\":\"$TIMESTAMP\"}" |
57 | 86 |
|
58 | | - # Check the jobState |
59 | | - JOB_STATE=$(echo "$EXECUTION_DETAILS" | grep "jobState:" | awk '{print $2}') |
60 | | - if [[ "$JOB_STATE" == "JOB_STATE_SUCCEEDED" ]]; then |
61 | | - echo "Notebook execution succeeded." |
62 | | - successful_count=$((successful_count + 1)) |
63 | | - successful_notebooks+=("${x}") |
64 | | - else |
65 | | - echo "Notebook execution failed. Job state: $JOB_STATE. Please use id $TRUNCATED_OPERATION_ID to troubleshoot notebook ${x}. See log for details." |
66 | | - failed_count=$((failed_count + 1)) |
67 | | - failed_notebooks+=("${x}") |
68 | | - continue |
| 87 | + echo "$(date) - INFO - Publishing results..." |
| 88 | + if ! gcloud pubsub topics publish "$PUBSUB_TOPIC" --message="$message" --project="$PROJECT_ID"; then |
| 89 | + echo "$(date) - ERROR - Failed to publish to Pub/Sub topic $PUBSUB_TOPIC" |
69 | 90 | fi |
| 91 | +} |
70 | 92 |
|
| 93 | +echo "--- Launching notebooks ---" |
| 94 | +for nb in "${notebooks[@]}"; do |
| 95 | + [[ -n "$nb" ]] && launch_notebook "$nb" |
71 | 96 | done |
72 | 97 |
|
73 | | -# Print the final list of failed notebooks |
74 | | -if [[ ${#failed_notebooks[@]} -gt 0 ]]; then |
75 | | - echo "Failed Notebooks:" |
76 | | - for notebook in "${failed_notebooks[@]}"; do |
77 | | - echo "- $notebook" | tee -a /workspace/Failure.txt |
78 | | - done |
79 | | -fi |
80 | | - |
81 | | -if [[ $failed_count -gt 0 ]]; then |
82 | | - echo "Total failed notebook executions: $failed_count" |
83 | | -fi |
84 | | - |
85 | | -if [[ $successful_count -gt 0 ]]; then |
86 | | - echo "Total successful notebook executions: $successful_count" |
87 | | -fi |
88 | | - |
89 | | -# Prep pub/sub message |
90 | | -failed_notebooks_str=$( |
91 | | - IFS=', ' |
92 | | - echo "${failed_notebooks[*]}" |
93 | | -) |
94 | | - |
95 | | -# prep notebook name for pub/sub message |
96 | | -failed_notebooks_str=$( |
97 | | - IFS=', ' |
98 | | - echo "${failed_notebooks[*]}" |
99 | | -) |
100 | | - |
101 | | -if [[ -n "$failed_notebooks_str" ]]; then |
102 | | - IFS=',' read -ra failed_notebooks_array <<<"$failed_notebooks_str" |
103 | | - trimmed_notebooks=() |
104 | | - for notebook in "${failed_notebooks_array[@]}"; do |
105 | | - trimmed_notebooks+=("$(echo -n "$notebook" | sed 's/ *$//')") |
106 | | - done |
107 | | - failed_notebooks_str=$( |
108 | | - IFS=', ' |
109 | | - echo "${trimmed_notebooks[*]}" |
110 | | - ) |
111 | | -else |
112 | | - failed_notebooks_str="" |
113 | | -fi |
114 | | - |
115 | | -successful_notebooks_str=$( |
116 | | - IFS=', ' |
117 | | - echo "${successful_notebooks[*]}" |
118 | | -) |
119 | | - |
120 | | -if [[ -n "$successful_notebooks_str" ]]; then |
121 | | - IFS=',' read -ra successful_notebooks_array <<<"$successful_notebooks_str" |
122 | | - trimmed_successful_notebooks=() |
123 | | - for notebook in "${successful_notebooks_array[@]}"; do |
124 | | - trimmed_successful_notebooks+=("$(echo -n "$notebook" | sed 's/ *$//')") |
125 | | - done |
126 | | - successful_notebooks_str=$( |
127 | | - IFS=', ' |
128 | | - echo "${trimmed_successful_notebooks[*]}" |
129 | | - ) |
130 | | -else |
131 | | - successful_notebooks_str="" |
132 | | -fi |
133 | | - |
134 | | -# Construct the message to send to pub/sub topic |
135 | | -message_data="{\"total_count\":$((total_count + 0)),\"failed_count\":$((failed_count + 0)),\"failed_notebooks\":\"${failed_notebooks_str}\",\"successful_notebooks\":\"${successful_notebooks_str}\",\"successful_count\":$((successful_count + 0)),\"execution_date\":\"${current_time_readable}\"}" |
136 | | - |
137 | | -# Publish to Pub/Sub |
138 | | -echo "$(date) - INFO - Publishing to Pub/Sub topic: $PUBSUB_TOPIC" |
139 | | -if ! gcloud pubsub topics publish "$PUBSUB_TOPIC" --message="$message_data" --project="$PROJECT_ID"; then |
140 | | - echo "$(date) - ERROR - Failed to publish to Pub/Sub topic $PUBSUB_TOPIC. Check permissions and topic configuration." |
141 | | - #exit 1 |
142 | | -fi |
143 | | - |
144 | | -echo "All notebook executions completed." |
| 98 | +echo "--- Monitoring executions ---" |
| 99 | +monitor_executions |
| 100 | + |
| 101 | +echo "--- Publishing summary ---" |
| 102 | +publish_results |
| 103 | + |
| 104 | +echo "Done. Success: ${#completed_success[@]}, Failures: ${#completed_failure[@]}" |
0 commit comments