Skip to content

Commit a8f5703

Browse files
committed
[Test] Fix Neuron installation and execution for trainium test.
1 parent 7f67253 commit a8f5703

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

tests/integration-tests/tests/trainium/test_trainium/test_trainium/neuron-ccl.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
cat <<'EOF' >submission-script.sh
88
#!/bin/bash
9-
set -x
9+
set -xe
1010
1111
# FIXME remove this repo once packages are public available
1212
TEMPORARY_ARTIFACTS_BUCKET_PATH=s3://aws-parallelcluster-beta/neuron/
@@ -52,7 +52,7 @@ TOTAL_RANK=$(($SLURM_NNODES*32))
5252
# python3 inst-sweep/genneffs_nccl.py -n <total-number-of-ranks> --all --output <output-dir>
5353
NEFF_FILE=test_nccl_64r_50allg_int8_393216/0/file.neff
5454
if [[ ! -f $NEFF_FILE ]]; then
55-
aws s3 cp ${TEMPORARY_ARTIFACTS_BUCKET_PATH}test_nccl_64r_50allg_int8_393216_0_file.neff $NEFF_FILE
55+
aws s3 cp ${TEMPORARY_ARTIFACTS_BUCKET_PATH}test_nccl_64r_50allg_int8_393216_0_file.neff $NEFF_FILE --region us-east-1
5656
fi
5757
5858
# Print eth0 ip

tests/integration-tests/tests/trainium/test_trainium/test_trainium/neuron-installation.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/bin/bash
2+
set -ex
23

34
# Private Repository Access
45
# I manually created a TrainiumPreviewRepository secret and TrainiumPreviewPolicy on 447714826191 account to permit access to Secret below
@@ -115,7 +116,7 @@ EOF
115116

116117
function main() {
117118
# Download packages from S3 --> FIXME they should be installed from configured repository
118-
aws s3 cp ${TEMPORARY_ARTIFACTS_BUCKET_PATH} . --recursive
119+
aws s3 cp ${TEMPORARY_ARTIFACTS_BUCKET_PATH} . --recursive --region us-east-1
119120

120121
local OS="$(grep "^ID=" /etc/os-release | cut -d"=" -f 2 | xargs)"
121122
case ${OS} in

0 commit comments

Comments
 (0)