|
| 1 | +#!/bin/bashA |
1 | 2 |
|
2 | 3 | instance_id="i-0daab006867136323" |
3 | 4 | region="us-east-1" |
| 5 | +# git_branch="lums/tmp/benchmark" |
| 6 | +git_branch="lums/tmp/gemm2.0" |
| 7 | +ntrials=1 |
4 | 8 |
|
5 | | -for instance_type in c6a.4xlarge c6a.2xlarge; |
| 9 | +max_nc_tries=12 |
| 10 | +nc_tries_sleep=8 |
| 11 | +instance_ip=$(aws ec2 describe-instances --instance-ids i-0daab006867136323 --query 'Reservations[0].Instances[0].PublicIpAddress' --output text) |
| 12 | + |
| 13 | +if [ -f ~/.bash_awsrc ]; then |
| 14 | + . ~/.bash_awsrc |
| 15 | +fi |
| 16 | + |
| 17 | +# ssh ec2 "cd feature-vector-prototype ; git commit -am \"Pause for benchmark [skip ci]\" ; git checkout ${git_branch}" |
| 18 | +# ssh ec2 "cd feature-vector-prototype/src/cmake-build-release ; make -C libtiledbvectorsearch ivf_hack" |
| 19 | + |
| 20 | +# for instance_type in c6a.4xlarge c6a.2xlarge; |
| 21 | +# for instance_type in c6a.16xlarge c6a.2xlarge; |
| 22 | +for instance_type in r6a.24xlarge c6a.16xlarge c6a.4xlarge c6a.2xlarge t3.xlarge t1.micro; |
6 | 23 | do |
| 24 | + |
| 25 | + benchname="1b-${instance_type}-10k-125MiB" |
| 26 | + bash_script="1b-c6a-16x-10k-125MiB.bash" |
| 27 | + |
| 28 | + echo "Benchmark name is ${benchname}, running script ${bash_script}" |
| 29 | + |
| 30 | + echo "Preparing to run ${instance_type}" |
7 | 31 | current_instance_type=$(aws ec2 --region ${region} describe-instances --instance-ids ${instance_id} --query 'Reservations[].Instances[].InstanceType' --output text) |
8 | 32 | state=$(aws ec2 --region ${region} describe-instances --instance-ids ${instance_id} --query 'Reservations[].Instances[].State.Name' --output text) |
9 | 33 |
|
| 34 | + echo "First stopping ${current_instance_type}" |
| 35 | + |
10 | 36 | if [[ ${state} == "running" && ${current_instance_type} == "${instance_type}" ]]; then |
11 | | - echo ${current_instance_type} is ${state} |
| 37 | + echo "${current_instance_type} is already ${state}" |
12 | 38 | else |
13 | 39 |
|
| 40 | + echo "${current_instance_type} is in state ${state}" |
14 | 41 | aws ec2 --region ${region} stop-instances --instance-ids ${instance_id} |
15 | | - aws ec2 --region ${region} stop-instances --instance-ids ${instance_id} |
16 | | - ssh ec2 "sync;sync;sync;sudo shutdown -h now" |
17 | | - ssh ec2 "sync;sync;sync;sudo shutdown -h now" |
18 | | - aws ec2 --region ${region} stop-instances --instance-ids ${instance_id} |
| 42 | + sleep 1 |
| 43 | + if nc_timeout=1 max_nc_tries=1 check_instance_status; |
| 44 | + then |
| 45 | + ssh ec2 "sync;sync;sync;sudo shutdown -h now" |
| 46 | + fi |
| 47 | + sleep 1 |
19 | 48 |
|
20 | 49 | state=$(aws ec2 --region ${region} describe-instances --instance-ids ${instance_id} --query 'Reservations[].Instances[].State.Name' --output text) |
| 50 | + |
| 51 | + # Assume instance *will* stop (eventually) |
21 | 52 | while [ "$state" != "stopped" ]; do |
22 | 53 | state=$(aws ec2 --region ${region} describe-instances --instance-ids ${instance_id} --query 'Reservations[].Instances[].State.Name' --output text) |
23 | | - echo "Instance is ${state}" |
| 54 | + echo "Instance ${current_instance_type} is ${state}" |
24 | 55 | sleep 1 # Delay for 1 second |
25 | 56 | done |
26 | 57 |
|
27 | 58 | echo "Instance is ${state}" |
28 | 59 |
|
29 | | - aws ec2 --region ${region} modify-instance-attribute --instance-id ${instance_id} --instance-type ${instance_type} |
| 60 | + # Change instance type |
| 61 | + change_msg=$(aws ec2 --region ${region} modify-instance-attribute --instance-id ${instance_id} --instance-type ${instance_type}) |
| 62 | + sleep 1 |
| 63 | + current_instance_type=$(aws ec2 --region ${region} describe-instances --instance-ids ${instance_id} --query 'Reservations[].Instances[].InstanceType' --output text) |
| 64 | + if [ "${current_instance_type}" != ${instance_type} ]; |
| 65 | + then |
| 66 | + echo "Could not change to ${instance_type} because ${change_msg}. Skipping ${instance_type}." |
| 67 | + continue |
| 68 | + fi |
| 69 | + |
30 | 70 | aws ec2 --region ${region} start-instances --instance-ids ${instance_id} |
31 | 71 |
|
32 | 72 | state=$(aws ec2 --region ${region} describe-instances --instance-ids ${instance_id} --query 'Reservations[].Instances[].State.Name' --output text) |
|
37 | 77 | done |
38 | 78 |
|
39 | 79 | echo "Instance is ${state}" |
| 80 | + sleep 30 |
40 | 81 | fi |
41 | 82 | # feature-vector-prototype/experimental/benchmarks/1b-c6a-16x-125MiB.bash |
42 | 83 | # 1b-c6a-16x-125MiB-2023-0613-1419.log |
43 | 84 |
|
44 | | - benchname="1b-${instance_type}-125MiB" |
45 | | - bash_script="1b-c6a-16x-125MiB.bash" |
46 | | - command="bash feature-vector-prototype/experimental/benchmarks/${bash_script}" |
47 | | - |
| 85 | + |
| 86 | + # Make sure remote instance is ready to accept logins |
| 87 | + nc_tries=0 |
| 88 | + |
| 89 | + while true; do |
| 90 | + if nc -G 2 -zv "${instance_ip}" 22 >/dev/null 2>&1; then |
| 91 | + echo "EC2 instance is ready for remote logins." |
| 92 | + break |
| 93 | + fi |
| 94 | + |
| 95 | + nc_tries=$((nc_tries + 1)) |
| 96 | + |
| 97 | + if [ "$nc_tries" -eq "$max_nc_tries" ]; then |
| 98 | + echo "Maximum number of tries reached. EC2 instance is not ready for remote logins." |
| 99 | + break |
| 100 | + fi |
| 101 | + |
| 102 | + echo "EC2 instance is not ready yet. Retrying in $nc_tries_sleep seconds..." |
| 103 | + sleep "$nc_tries_sleep" |
| 104 | + done |
| 105 | + |
48 | 106 | for ((i=1; i<=2; i++)) |
| 107 | + do |
| 108 | + # nuke from space, it's the only way to be sure |
| 109 | + # ssh ec2 killall -u lums |
| 110 | + ssh ec2 "kill \$(ps auxw | fgrep feature | awk '{ print \$2 }')" |
| 111 | + sleep 1 |
| 112 | + done |
| 113 | + ssh ec2 ps auxw | fgrep feature |
| 114 | + |
| 115 | + command="bash feature-vector-prototype/src/benchmarks/${bash_script}" |
| 116 | + for ((i=1; i<=${ntrials}; i++)) |
49 | 117 | do |
50 | 118 | logname="${benchname}-$(date +'%Y%m%d-%H%M%S').log" |
51 | 119 | ssh ec2 ${command} | tee ${logname} |
|
0 commit comments