Skip to content

Commit 326b68e

Browse files
committed
fix: CI Updates
* Brings the ci dashboard code from `iac` repo to `ci3/dashboard`. * Attempt to fix cleanup on makefile build path. We used setsid but think correct approach is to do a background subshell `() &` * Fix PR's not getting RUN_ID's. * We explicitly name the dashboard CI runs are shown on with CI_DASHBOARD. * `<merge queue target>` * `prs` * `nightly` * `network` * Add `ci dash` command so you can see your CI runs direct in terminal.
1 parent 24c2c20 commit 326b68e

File tree

14 files changed

+1779
-244
lines changed

14 files changed

+1779
-244
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,5 @@ docs/docs/protocol-specs/public-vm/gen/
3232

3333
# for those who use Claude Code
3434
/.claude
35+
36+
__pycache__

bootstrap.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,10 @@ function build_and_test {
256256
echo_header "build and test"
257257

258258
# Start the test engine.
259-
# setsid will put it in it's own process group we can terminate on cleanup.
260259
rm -f $test_cmds_file
261260
touch $test_cmds_file
262-
setsid color_prefix "test-engine" "denoise test_engine_start" &
261+
# put it in it's own process group via background subshell, we can terminate on cleanup.
262+
(color_prefix "test-engine" "denoise test_engine_start") &
263263
test_engine_pid=$!
264264
test_engine_pgid=$(ps -o pgid= -p $test_engine_pid)
265265

ci.sh

Lines changed: 77 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@ ci3_workflow_id=128853861
1212
function echo_cmd {
1313
local name=$1
1414
shift
15-
printf "${blue}${bold}%12s${reset}: %s\n" $name "$(echo $@ | sed 's/\.\\n/.\n /g')"
15+
printf "${blue}${bold}%21s${reset}: %s\n" $name "$(echo $@ | sed 's/\.\\n/.\n /g')"
1616
}
1717

1818
function print_usage {
1919
echo "usage: $(basename $0) <cmd>"
2020
echo
21+
echo_cmd "dash" "Display a dashboard showing CI runs for the current user."
2122
echo_cmd "fast" "Spin up an EC2 instance and run bootstrap ci-fast."
2223
echo_cmd "full" "Spin up an EC2 instance and run bootstrap ci-full."
2324
echo_cmd "full-no-test-cache" "Spin up an EC2 instance and run bootstrap ci-full-no-test-cache."
@@ -31,25 +32,16 @@ function print_usage {
3132
echo_cmd "network-teardown" "Spin up an EC2 instance to teardown a network deployment."
3233
echo_cmd "release" "Spin up an EC2 instance and run bootstrap release."
3334
echo_cmd "shell-new" "Spin up an EC2 instance, clone the repo, and drop into a shell."
34-
echo_cmd "shell-container" "Drop into a shell in the current running build instance container."
35+
echo_cmd "shell" "Drop into a shell in the current running build instance container."
3536
echo_cmd "shell-host" "Drop into a shell in the current running build host."
36-
echo_cmd "run" "Trigger a GA workflow for the current branch PR and tail logs."
37-
echo_cmd "trigger" "Trigger the GA workflow on the PR associated with the current branch."
38-
echo_cmd "rlog" "Tail the logs of the latest GA run or the given GA run ID."
39-
echo_cmd "ilog" "Tail the logs of the current running build instance."
40-
echo_cmd "dlog" "Display the log of the given denoise log ID."
41-
echo_cmd "kill" "Terminate the EC2 instance for the current branch."
37+
echo_cmd "log" "Display the log of the given log ID."
38+
echo_cmd "kill" "Terminate running EC2 instance with instance_name."
4239
echo_cmd "draft" "Mark the current PR as draft (no automatic CI runs when pushing)."
4340
echo_cmd "ready" "Mark the current PR as ready (enable automatic CI runs when pushing)."
4441
echo_cmd "pr-url" "Print the URL of the current PR associated with the branch."
45-
echo_cmd "last-run-url" "Print the URL of the last GA run for the current branch PR."
46-
echo_cmd "gh-bench" "Download CI-uploaded benchmarks for the current commit."
47-
echo_cmd "gh-deploy-bench" "Download CI-uploaded deployment benchmarks for the current commit."
48-
echo_cmd "gh-spartan-bench" "Download CI-uploaded spartan benchmarks for the current commit."
49-
echo_cmd "avm-inputs-collection" "Nightly: run e2e tests, dump AVM circuit inputs, upload to cache."
50-
echo_cmd "avm-check-circuit" "Nightly: download cached AVM inputs, run check-circuit on each."
42+
echo_cmd "avm-inputs-collection" "Run e2e tests, dump AVM circuit inputs, upload to cache."
43+
echo_cmd "avm-check-circuit" "Download cached AVM inputs, run check-circuit on each."
5144
echo_cmd "help" "Display this help message."
52-
5345
}
5446

5547
[ -n "$cmd" ] && shift
@@ -69,49 +61,45 @@ function get_latest_run_id {
6961
gh run list --workflow $ci3_workflow_id -b $BRANCH --limit 1 --json databaseId -q .[0].databaseId
7062
}
7163

72-
function tail_live_instance {
73-
get_ip_for_instance
74-
[ -z "$ip" ] && return 1;
75-
ssh -F $ci3/aws/build_instance_ssh_config -q -t -o ConnectTimeout=5 ubuntu@$ip "
76-
trap 'exit 0' SIGINT
77-
docker ps -a --filter name=aztec_build --format '{{.Names}}' | grep -q '^aztec_build$' || exit 1
78-
docker logs -f aztec_build
79-
"
80-
}
81-
82-
# Used in merge-queue, nightly, and release flows.
83-
function prep_vars {
84-
export RUN_ID=${RUN_ID:-$(date +%s%3N)}
85-
export PARENT_LOG_URL=http://ci.aztec-labs.com/$RUN_ID
86-
export DENOISE=1
87-
export DENOISE_WIDTH=32
88-
}
64+
# Jobs in the ci dashboards are grouped on a single line by RUN_ID.
65+
export RUN_ID=${RUN_ID:-$(date +%s%3N)}
66+
export PARENT_LOG_URL=http://ci.aztec-labs.com/$RUN_ID
8967

9068
case "$cmd" in
91-
"help"|"")
92-
print_usage
69+
dash)
70+
watch_ci -s next,prs --user --watch
9371
;;
94-
fast|full|full-no-test-cache|full-no-test-cache-makefile|docs|barretenberg|barretenberg-full|avm-inputs-collection|avm-check-circuit)
95-
export JOB_ID="x1-$cmd"
72+
fast|full|full-no-test-cache|full-no-test-cache-makefile|docs|barretenberg|barretenberg-full)
73+
export CI_DASHBOARD="prs"
74+
export JOB_ID="x-$cmd"
9675
bootstrap_ec2 "./bootstrap.sh ci-$cmd"
9776
;;
98-
"grind")
99-
prep_vars
100-
# Spin up ec2 instance and run the merge-queue flow.
77+
avm-inputs-collection|avm-check-circuit)
78+
export CI_DASHBOARD="nightly"
79+
export JOB_ID="x-$cmd"
80+
bootstrap_ec2 "./bootstrap.sh ci-$cmd"
81+
;;
82+
grind)
83+
# Grind a default of 5 times.
84+
export CI_DASHBOARD="local"
85+
export DENOISE=1
86+
export DENOISE_WIDTH=32
10187
run() {
10288
JOB_ID=$1 INSTANCE_POSTFIX=$1 ARCH=$2 exec denoise "bootstrap_ec2 './bootstrap.sh $3'"
10389
}
10490
export -f run
105-
seq 1 ${1:-5} | parallel --termseq 'TERM,10000' --tagstring '{= $_=~s/run (\w+).*/$1/; =}' --line-buffered 'run $USER-x{}-full amd64 ci-full-no-test-cache'
91+
seq 1 ${1:-5} | parallel --termseq 'TERM,10000' --tagstring '{= $_=~s/run (\w+).*/$1/; =}' --line-buffered \
92+
'run $USER-x{}-full amd64 ci-full-no-test-cache'
10693
;;
107-
"merge-queue")
108-
prep_vars
109-
# Spin up ec2 instance and run the merge-queue flow.
94+
merge-queue)
95+
# We perform full runs of all tests on multiple x86, and a single fast run on arm64.
96+
export CI_DASHBOARD=${TARGET_BRANCH:-local}
97+
export DENOISE=1
98+
export DENOISE_WIDTH=32
11099
run() {
111100
JOB_ID=$1 INSTANCE_POSTFIX=$1 ARCH=$2 exec denoise "bootstrap_ec2 './bootstrap.sh $3'"
112101
}
113102
export -f run
114-
# We perform two full runs of all tests on x86, and a single fast run on arm64 (allowing use of test cache).
115103
parallel --jobs 10 --termseq 'TERM,10000' --tagstring '{= $_=~s/run (\w+).*/$1/; =}' --line-buffered --halt now,fail=1 ::: \
116104
'run x1-full amd64 ci-full-no-test-cache' \
117105
'run x2-full amd64 ci-full-no-test-cache' \
@@ -123,29 +111,33 @@ case "$cmd" in
123111
##########################################
124112
# NETWORK DEPLOYMENTS WITH BENCHES/TESTS #
125113
##########################################
126-
"network-deploy")
114+
network-deploy)
127115
# Args: <scenario> <namespace> [docker_image]
128116
# If docker_image is not provided, ci-network-deploy will build and push to aztecdev.
117+
export CI_DASHBOARD="network"
129118
export JOB_ID="x-${2:?namespace is required}-network-deploy"
130119
export INSTANCE_POSTFIX="n-deploy"
131120
bootstrap_ec2 "./bootstrap.sh ci-network-deploy $*"
132121
;;
133-
"network-tests")
122+
network-tests)
134123
# Args: <scenario> <namespace>
124+
export CI_DASHBOARD="network"
135125
export JOB_ID="x-${2:?namespace is required}-network-tests"
136126
export AWS_SHUTDOWN_TIME=360 # 6 hours for network tests
137127
export INSTANCE_POSTFIX="n-tests"
138128
bootstrap_ec2 "./bootstrap.sh ci-network-tests $*"
139129
;;
140-
"network-bench")
130+
network-bench)
141131
# Args: <scenario> <namespace> [docker_image]
142132
# If docker_image is not provided, ci-network-bench will build and push to aztecdev.
133+
export CI_DASHBOARD="network"
143134
export JOB_ID="x-${2:?namespace is required}-network-bench" CPUS=16
144135
export INSTANCE_POSTFIX="n-bench"
145136
bootstrap_ec2 "./bootstrap.sh ci-network-bench $*"
146137
;;
147-
"network-teardown")
138+
network-teardown)
148139
# Args: <scenario> <namespace>
140+
export CI_DASHBOARD="network"
149141
export JOB_ID="x-${2:?namespace is required}-network-teardown"
150142
export CPUS=4
151143
export INSTANCE_POSTFIX="n-teardown"
@@ -155,9 +147,11 @@ case "$cmd" in
155147
############
156148
# RELEASES #
157149
############
158-
"release")
159-
prep_vars
150+
release)
160151
# Spin up ec2 instance and run the release flow.
152+
export CI_DASHBOARD="releases"
153+
export DENOISE=1
154+
export DENOISE_WIDTH=32
161155
run() {
162156
JOB_ID=$1 INSTANCE_POSTFIX=$1 ARCH=$2 exec denoise "bootstrap_ec2 './bootstrap.sh ci-release'"
163157
}
@@ -172,71 +166,44 @@ case "$cmd" in
172166
fi
173167
;;
174168

175-
#######################################
176-
# VARIANTS ON INTERACTIVE CI SESSIONS #
177-
#######################################
178-
"shell-new")
169+
##################
170+
# SHELL SESSIONS #
171+
##################
172+
shell-new)
179173
# Spin up ec2 instance, clone, and drop into shell.
180174
# False triggers the shell on fail.
181175
cmd="${1:-false}"
182176
exec bootstrap_ec2 "$cmd"
183177
;;
184-
"shell-container")
178+
shell-container)
185179
# Drop into a shell in the current running build instance container.
186180
get_ip_for_instance
187181
[ -z "$ip" ] && echo "No instance found: $instance_name" && exit 1
188182
[ "$#" -eq 0 ] && set -- "zsh" || true
189183
ssh -tq -F $ci3/aws/build_instance_ssh_config ubuntu@$ip \
190184
"docker start aztec_build &>/dev/null || true && docker exec -it --user aztec-dev aztec_build $@"
191185
;;
192-
"shell-host")
186+
shell-host)
193187
# Drop into a shell in the current running build host.
194188
get_ip_for_instance
195189
[ -z "$ip" ] && echo "No instance found: $instance_name" && exit 1
196190
ssh -t -F $ci3/aws/build_instance_ssh_config ubuntu@$ip
197191
;;
192+
kill)
193+
existing_instance=$(aws ec2 describe-instances \
194+
--region us-east-2 \
195+
--filters "Name=tag:Name,Values=$instance_name" \
196+
--query "Reservations[].Instances[?State.Name!='terminated'].InstanceId[]" \
197+
--output text)
198+
if [ -n "$existing_instance" ]; then
199+
aws_terminate_instance $existing_instance
200+
fi
201+
;;
198202

199203
###################
200-
# TRIGGER ci3.yml #
204+
# DISPLAYING LOGS #
201205
###################
202-
"run")
203-
# Trigger a GA workflow for current branch PR and tail logs.
204-
$0 trigger
205-
$0 rlog
206-
;;
207-
"trigger")
208-
# Trigger workflow.
209-
# We use this label trick because triggering the workflow direct doesn't associate with the PR.
210-
pr_number=$(gh pr list --head "$BRANCH" --json number --jq '.[0].number')
211-
if [ -z "$pr_number" ]; then
212-
echo "No pull request found for branch $BRANCH."
213-
exit 1
214-
fi
215-
gh pr edit "$pr_number" --remove-label "trigger-workflow" &> /dev/null
216-
gh pr edit "$pr_number" --add-label "trigger-workflow" &> /dev/null
217-
sleep 1
218-
gh pr edit "$pr_number" --remove-label "trigger-workflow" &> /dev/null
219-
run_id=$(get_latest_run_id)
220-
echo "In progress..." | redis_setexz $run_id 3600
221-
echo -e "Triggered CI for PR: $pr_number (ci rlog ${yellow}$run_id${reset})"
222-
;;
223-
"rlog")
224-
[ -z "${1:-}" ] && run_id=$(get_latest_run_id) || run_id=$1
225-
output=$(redis_getz $run_id)
226-
if [ -z "$output" ] || [ "$output" == "In progress..." ]; then
227-
# If we're in progress, tail live logs from launched instance.
228-
exec $0 ilog
229-
else
230-
echo "$output" | $PAGER
231-
fi
232-
;;
233-
"ilog")
234-
while ! tail_live_instance; do
235-
echo "Waiting on instance with name: $instance_name"
236-
sleep 10
237-
done
238-
;;
239-
"dlog")
206+
log|dlog)
240207
if [ "$CI_REDIS_AVAILABLE" -ne 1 ]; then
241208
echo "No redis available for log query."
242209
exit 1
@@ -245,17 +212,11 @@ case "$cmd" in
245212
[ ! -t 0 ] && pager=cat
246213
redis_getz $1 | $pager
247214
;;
248-
"kill")
249-
existing_instance=$(aws ec2 describe-instances \
250-
--region us-east-2 \
251-
--filters "Name=tag:Name,Values=$instance_name" \
252-
--query "Reservations[].Instances[?State.Name!='terminated'].InstanceId[]" \
253-
--output text)
254-
if [ -n "$existing_instance" ]; then
255-
aws_terminate_instance $existing_instance
256-
fi
257-
;;
258-
"draft")
215+
216+
#################
217+
# PR MANAGEMENT #
218+
#################
219+
draft)
259220
pr_number=$(gh pr list --head "$BRANCH" --json number --jq '.[0].number')
260221
if [ -n "$pr_number" ]; then
261222
gh pr ready "$pr_number" --undo
@@ -264,7 +225,7 @@ case "$cmd" in
264225
echo "No pull request found for branch $BRANCH."
265226
fi
266227
;;
267-
"ready")
228+
ready)
268229
pr_number=$(gh pr list --head "$BRANCH" --json number --jq '.[0].number')
269230
if [ -n "$pr_number" ]; then
270231
gh pr ready "$pr_number"
@@ -273,7 +234,7 @@ case "$cmd" in
273234
echo "No pull request found for branch $BRANCH."
274235
fi
275236
;;
276-
"pr-url")
237+
pr-url)
277238
# Print the current PR associated with the branch.
278239
pr_url=$(gh pr list --head "$BRANCH" --limit 1 --json url -q '.[0].url')
279240
if [ -z "$pr_url" ]; then
@@ -282,28 +243,16 @@ case "$cmd" in
282243
fi
283244
echo "$pr_url"
284245
;;
285-
"last-run-url")
286-
# Print the URL of the last GA run for the current branch PR.
287-
run_id=$(get_latest_run_id)
288-
if [ -z "$run_id" ] || [ "$run_id" == "null" ]; then
289-
echo "No recent GitHub Actions run found for branch '$BRANCH'."
290-
exit 1
291-
fi
292-
repo=$(gh repo view --json nameWithOwner -q .nameWithOwner)
293-
echo "https://github.com/$repo/actions/runs/$run_id"
294-
;;
295246

296-
###################################
297-
# DOWNLOAD CI-UPLOADED BENCHMARKS #
298-
###################################
299-
"gh-bench")
300-
cache_download bench-$(git rev-parse HEAD^{tree}).tar.gz
301-
;;
302-
"gh-deploy-bench")
303-
cache_download deploy-bench-$(git rev-parse HEAD^{tree}).tar.gz
247+
########################
248+
# BENCHMARK PROCESSING #
249+
########################
250+
gh-bench|gh-deploy-bench|gh-spartan-bench)
251+
cache_download ${cmd#gh-}-$(git rev-parse HEAD^{tree}).tar.gz
304252
;;
305-
"gh-spartan-bench")
306-
cache_download spartan-bench-$(git rev-parse HEAD^{tree}).tar.gz
253+
254+
help|"")
255+
print_usage
307256
;;
308257
*)
309258
echo "Unknown command: $cmd, see ./ci.sh help"

ci3/bootstrap_ec2

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ container_script=$(
134134
source ci3/source_redis
135135
source ci3/source_cache
136136
ci_log_id=\$(log_ci_run)
137-
export PARENT_LOG_URL=${PARENT_LOG_URL:-http://ci.aztec-labs.com/\$ci_log_id}
137+
export PARENT_LOG_URL=${PARENT_LOG_URL:-}
138138
139139
if [ -n "\$DOCKERHUB_PASSWORD" ]; then
140140
echo \$DOCKERHUB_PASSWORD | docker login -u \$DOCKERHUB_USERNAME --password-stdin
@@ -276,6 +276,7 @@ function run {
276276
-e JOB_ID=${JOB_ID:-} \
277277
-e REF_NAME=${REF_NAME:-} \
278278
-e TARGET_BRANCH=${TARGET_BRANCH:-} \
279+
-e CI_DASHBOARD=${CI_DASHBOARD:-} \
279280
-e PARENT_LOG_URL=${PARENT_LOG_URL:-} \
280281
-e NO_CACHE=${NO_CACHE:-} \
281282
-e NO_FAIL_FAST=${NO_FAIL_FAST:-} \

0 commit comments

Comments
 (0)