Skip to content

Commit 2602628

Browse files
committed
add ci to gpu
1 parent 0c970ca commit 2602628

File tree

3 files changed

+37
-14
lines changed

3 files changed

+37
-14
lines changed

.github/workflows/phoenix/submit.sh

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,23 @@ usage() {
66
echo "Usage: $0 [script.sh] [cpu|gpu]"
77
}
88

9-
if [ ! -z "$1" ]; then
10-
sbatch_script_contents=`cat $1`
11-
else
9+
if [ -z "$1" ] || [ -z "$2" ]; then
1210
usage
1311
exit 1
1412
fi
1513

14+
sbatch_script_contents=`cat $1`
15+
16+
precision="${3:-}"
17+
1618
sbatch_cpu_opts="\
1719
#SBATCH -p cpu-small # partition
1820
#SBATCH --ntasks-per-node=24 # Number of cores per node required
1921
#SBATCH --mem-per-cpu=2G # Memory per core\
2022
"
2123

2224
sbatch_gpu_opts="\
23-
#SBATCH -CV100-16GB
25+
#SBATCH -C V100-16GB
2426
#SBATCH -G2\
2527
"
2628

@@ -35,25 +37,30 @@ fi
3537

3638
job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"
3739

40+
if [ -n "$precision" ]; then
41+
job_slug="$job_slug-$precision"
42+
fi
43+
3844
sbatch <<EOT
3945
#!/bin/bash
4046
#SBATCH -Jshb-$job_slug # Job name
4147
#SBATCH --account=gts-sbryngelson3 # charge account
4248
#SBATCH -N1 # Number of nodes required
4349
$sbatch_device_opts
44-
#SBATCH -t 02:00:00 # Duration of the job (Ex: 15 mins)
50+
#SBATCH -t 02:00:00 # Duration of the job (Ex: 2 hours)
4551
#SBATCH -q embers # QOS Name
46-
#SBATCH -o$job_slug.out # Combined output and error messages file
52+
#SBATCH -o $job_slug.out # Combined output and error messages file
4753
#SBATCH -W # Do not exit until the submitted job terminates.
4854
4955
set -e
5056
set -x
5157
5258
cd "\$SLURM_SUBMIT_DIR"
53-
echo "Running in $(pwd):"
59+
echo "Running in \$(pwd):"
5460
5561
job_slug="$job_slug"
5662
job_device="$2"
63+
precision="$precision"
5764
5865
. ./mfc.sh load -c p -m $2
5966

.github/workflows/phoenix/test.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@ build_opts=""
44
if [ "$job_device" == "gpu" ]; then
55
build_opts="--gpu"
66
fi
7+
precision_flag = ""
8+
if ["$precision" == "single"]; then
9+
precision_flag = "--single"
10+
fi
711

8-
./mfc.sh build -j 8 $build_opts
12+
./mfc.sh build -j 8 $build_opts $precision_flag
913

1014
n_test_threads=8
1115

@@ -16,6 +20,6 @@ if [ "$job_device" == "gpu" ]; then
1620
n_test_threads=`expr $gpu_count \* 2`
1721
fi
1822

19-
./mfc.sh test --max-attempts 3 -a -j $n_test_threads $device_opts -- -c phoenix
23+
./mfc.sh test --max-attempts 3 -a -j $n_test_threads $device_opts $precision_flag -- -c phoenix
2024

2125

.github/workflows/test.yml

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@ jobs:
3636
include:
3737
- os: ubuntu
3838
mpi: no-mpi
39+
precision:
40+
debug: no-debug
41+
intel: false
42+
43+
include:
44+
- os: ubuntu
45+
mpi: mpi
3946
precision: single
4047
debug: no-debug
4148
intel: false
@@ -106,9 +113,14 @@ jobs:
106113
matrix:
107114
device: ['cpu', 'gpu']
108115
lbl: ['gt', 'frontier']
116+
precision: ['']
109117
exclude:
110118
- device: cpu
111119
lbl: frontier
120+
include:
121+
- device: gpu
122+
lbl: gt
123+
precision: single
112124
runs-on:
113125
group: phoenix
114126
labels: ${{ matrix.lbl }}
@@ -121,23 +133,23 @@ jobs:
121133

122134
- name: Build & Test
123135
if: matrix.lbl == 'gt'
124-
run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }}
136+
run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }} ${{ matrix.precision }}
125137

126138
- name: Build
127139
if: matrix.lbl == 'frontier'
128140
run: bash .github/workflows/frontier/build.sh
129141

130142
- name: Test
131143
if: matrix.lbl == 'frontier'
132-
run: bash .github/workflows/frontier/submit.sh .github/workflows/frontier/test.sh ${{matrix.device}}
144+
run: bash .github/workflows/frontier/submit.sh .github/workflows/frontier/test.sh ${{matrix.device}} ${{ matrix.precision }}
133145

134146
- name: Print Logs
135147
if: always()
136-
run: cat test-${{ matrix.device }}.out
148+
run: cat test-${{ matrix.device }}-${{ matrix.precision }}.out
137149

138150
- name: Archive Logs
139151
uses: actions/upload-artifact@v4
140152
if: always()
141153
with:
142-
name: logs-${{ strategy.job-index }}-${{ matrix.device }}
143-
path: test-${{ matrix.device }}.out
154+
name: logs-${{ strategy.job-index }}-${{ matrix.device }}-${{ matrix.precision }}
155+
path: test-${{ matrix.device }}-${{ matrix.precision }}.out

0 commit comments

Comments
 (0)