Skip to content

Commit b4f6336

Browse files
committed
Merge remote-tracking branch 'upstream/master' into MPIRefactor
2 parents bf8af42 + 2f8eef1 commit b4f6336

File tree

87 files changed

+1910
-2314
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+1910
-2314
lines changed

.github/pull_request_template.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,5 @@ To make sure the code is performing as expected on GPU devices, I have:
5454
- [ ] Ran the code on MI200+ GPUs and ensure the new features performed as expected (the GPU results match the CPU results)
5555
- [ ] Enclosed the new feature via `nvtx` ranges so that they can be identified in profiles
5656
- [ ] Ran a Nsight Systems profile using `./mfc.sh run XXXX --gpu -t simulation --nsys`, and have attached the output file (`.nsys-rep`) and plain text results to this PR
57-
- [ ] Ran an Omniperf profile using `./mfc.sh run XXXX --gpu -t simulation --omniperf`, and have attached the output file and plain text results to this PR.
57+
- [ ] Ran a Rocprof Systems profile using `./mfc.sh run XXXX --gpu -t simulation --rsys --hip-trace`, and have attached the output file and plain text results to this PR.
5858
- [ ] Ran my code using various numbers of different GPUs (1, 2, and 8, for example) in parallel and made sure that the results scale similarly to what happens if you run without the new code/feature
Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
#!/bin/bash
22

3+
build_opts=""
4+
if [ "$1" == "gpu" ]; then
5+
build_opts="--gpu"
6+
fi
7+
38
. ./mfc.sh load -c f -m g
4-
./mfc.sh test --dry-run -j 8 --gpu
9+
./mfc.sh test --dry-run -j 8 $build_opts

.github/workflows/frontier/submit.sh

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,29 @@ else
1313
exit 1
1414
fi
1515

16+
if [ "$2" == "cpu" ]; then
17+
sbatch_device_opts="\
18+
#SBATCH -n 32 # Number of cores required"
19+
elif [ "$2" == "gpu" ]; then
20+
sbatch_device_opts="\
21+
#SBATCH -n 8 # Number of cores required"
22+
else
23+
usage
24+
exit 1
25+
fi
26+
27+
1628
job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"
1729

1830
sbatch <<EOT
1931
#!/bin/bash
2032
#SBATCH -JMFC-$job_slug # Job name
2133
#SBATCH -A CFD154 # charge account
2234
#SBATCH -N 1 # Number of nodes required
23-
#SBATCH -n 8 # Number of cores required
35+
$sbatch_device_opts
2436
#SBATCH -t 01:59:00 # Duration of the job (Ex: 15 mins)
2537
#SBATCH -o$job_slug.out # Combined output and error messages file
38+
#SBATCH -p extended # Extended partition for shorter queues
2639
#SBATCH -q debug # Use debug QOS - only one job per user allowed in queue!
2740
#SBATCH -W # Do not exit until the submitted job terminates.
2841

.github/workflows/frontier/test.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,8 @@
33
gpus=`rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' '`
44
ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c`
55

6-
./mfc.sh test --max-attempts 3 -j $ngpus -- -c frontier
7-
6+
if [ "$job_device" == "gpu" ]; then
7+
./mfc.sh test --max-attempts 3 -j $ngpus -- -c frontier
8+
else
9+
./mfc.sh test --max-attempts 3 -j 32 -- -c frontier
10+
fi

.github/workflows/phoenix/bench.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ if [ "$job_device" == "gpu" ]; then
99
fi
1010

1111
if ["$job_device" == "gpu"]; then
12-
./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix $device_opts -n $n_ranks
12+
./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks
1313
else
14-
./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix $device_opts -n $n_ranks
14+
./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks
1515
fi

.github/workflows/phoenix/submit.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ sbatch <<EOT
4242
#SBATCH --account=gts-sbryngelson3 # charge account
4343
#SBATCH -N1 # Number of nodes required
4444
$sbatch_device_opts
45-
#SBATCH -t 02:00:00 # Duration of the job (Ex: 15 mins)
45+
#SBATCH -t 03:00:00 # Duration of the job (Ex: 15 mins)
4646
#SBATCH -q embers # QOS Name
4747
#SBATCH -o$job_slug.out # Combined output and error messages file
4848
#SBATCH -W # Do not exit until the submitted job terminates.

.github/workflows/test.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,6 @@ jobs:
9797
matrix:
9898
device: ['cpu', 'gpu']
9999
lbl: ['gt', 'frontier']
100-
exclude:
101-
- device: cpu
102-
lbl: frontier
103100
runs-on:
104101
group: phoenix
105102
labels: ${{ matrix.lbl }}
@@ -116,7 +113,7 @@ jobs:
116113

117114
- name: Build
118115
if: matrix.lbl == 'frontier'
119-
run: bash .github/workflows/frontier/build.sh
116+
run: bash .github/workflows/frontier/build.sh ${{ matrix.device }}
120117

121118
- name: Test
122119
if: matrix.lbl == 'frontier'

docs/documentation/running.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,13 @@ Learn more about NVIDIA Nsight Compute [here](https://docs.nvidia.com/nsight-com
9898

9999

100100
#### AMD GPUs
101-
- Rocprof (ROC): `./mfc.sh run ... -t simulation --roc --hip-trace [rocprof flags]` allows one to visualize MFC's system-wide performance with [Perfetto UI](https://ui.perfetto.dev/).
101+
- Rocprof Systems (RSYS): `./mfc.sh run ... -t simulation --rsys --hip-trace [rocprof flags]` allows one to visualize MFC's system-wide performance with [Perfetto UI](https://ui.perfetto.dev/).
102102
When used, `--roc` will run the simulation and generate files in the case directory for all targets.
103103
`results.json` can then be imported in [Perfetto's UI](https://ui.perfetto.dev/).
104104
Learn more about AMD Rocprof [here](https://rocm.docs.amd.com/projects/rocprofiler/en/docs-5.5.1/rocprof.html)
105105
It is best to run case files with few timesteps to keep the report file sizes manageable.
106-
- Omniperf (OMNI): `./mfc.sh run ... -t simulation --omni [omniperf flags]` allows one to conduct kernel-level profiling with [AMD's Omniperf](https://rocm.docs.amd.com/projects/omniperf/en/latest/index.html).
107-
When used, `--omni` will output profiling information for all subroutines, including rooflines, cache usage, register usage, and more, after the simulation is run.
106+
- Rocprof Compute (RCU): `./mfc.sh run ... -t simulation --rcu -n <name> [rocprof-compute flags]` allows one to conduct kernel-level profiling with [ROCm Compute Profiler](https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/what-is-rocprof-compute.html).
107+
When used, `--rcu` will output profiling information for all subroutines, including rooflines, cache usage, register usage, and more, after the simulation is run.
108108
Adding this argument will moderately slow down the simulation and run the MFC executable several times.
109109
For this reason, it should only be used with case files with few timesteps.
110110

misc/m_silo_proxy.f90

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ module m_silo_proxy
4444

4545
!> @brief Refer to page 28 of Silo's user guide (10/2007, v4.6) for
4646
!! information about this subroutine
47-
function DBCREATE(pathname, lpathname, mode, target, &
47+
impure function DBCREATE(pathname, lpathname, mode, target, &
4848
fileinfo, lfileinfo, filetype, status)
4949

5050
integer :: DBCREATE
@@ -66,7 +66,7 @@ end function DBCREATE
6666

6767
!> @brief Refer to page 235 of Silo's user guide (10/2007, v4.6)
6868
!! for information about this subroutine
69-
function DBGET2DSTRLEN()
69+
impure function DBGET2DSTRLEN()
7070

7171
integer :: DBGET2DSTRLEN
7272

@@ -79,7 +79,7 @@ end function DBGET2DSTRLEN
7979

8080
!> @brief Refer to page 234 of Silo's user guide (10/2007, v4.6)
8181
!! for information about this subroutine
82-
function DBSET2DSTRLEN(len)
82+
impure function DBSET2DSTRLEN(len)
8383

8484
integer :: DBSET2DSTRLEN
8585
integer, intent(IN) :: len
@@ -93,7 +93,7 @@ end function DBSET2DSTRLEN
9393

9494
!> @brief Refer to page 185 of Silo's user guide (10/2007, v4.6)
9595
!! for information about this subroutine
96-
function DBMKOPTLIST(maxopts, optlist_id)
96+
impure function DBMKOPTLIST(maxopts, optlist_id)
9797

9898
integer :: DBMKOPTLIST
9999
integer, intent(IN) :: maxopts
@@ -107,7 +107,7 @@ end function DBMKOPTLIST
107107

108108
!> @brief Refer to page 186 of Silo's user guide (10/2007, v4.6)
109109
!! for information about this subroutine
110-
function DBADDIOPT(optlist_id, option, ivalue)
110+
impure function DBADDIOPT(optlist_id, option, ivalue)
111111

112112
integer :: DBADDIOPT
113113
integer, intent(IN) :: optlist_id
@@ -123,7 +123,7 @@ end function DBADDIOPT
123123

124124
!> @brief Refer to page 186 of Silo's user guide (10/2007, v4.6)
125125
!! for information about this subroutine
126-
function DBADDDOPT(optlist_id, option, dvalue)
126+
impure function DBADDDOPT(optlist_id, option, dvalue)
127127

128128
integer :: DBADDDOPT
129129
integer, intent(IN) :: optlist_id
@@ -139,7 +139,7 @@ end function DBADDDOPT
139139

140140
!> @brief Refer to page 121 of Silo's user guide (10/2007, v4.6)
141141
!! for information about this subroutine
142-
function DBPUTMMESH(dbid, name, lname, nmesh, meshnames, &
142+
impure function DBPUTMMESH(dbid, name, lname, nmesh, meshnames, &
143143
lmeshnames, meshtypes, optlist_id, status)
144144

145145
integer :: DBPUTMMESH
@@ -162,7 +162,7 @@ end function DBPUTMMESH
162162

163163
!> @brief Refer to page 189 of Silo's user guide (10/2007, v4.6)
164164
!! for information about this subroutine
165-
function DBFREEOPTLIST(optlist_id)
165+
impure function DBFREEOPTLIST(optlist_id)
166166

167167
integer :: DBFREEOPTLIST
168168
integer, intent(IN) :: optlist_id
@@ -175,7 +175,7 @@ end function DBFREEOPTLIST
175175

176176
!> @brief Refer to page 57 of Silo's user guide (10/2007, v4.6) for
177177
!! information about this subroutine
178-
function DBPUTQM(dbid, name, lname, xname, lxname, yname, lyname, &
178+
impure function DBPUTQM(dbid, name, lname, xname, lxname, yname, lyname, &
179179
zname, lzname, x, y, z, dims, ndims, datatype, &
180180
coordtype, optlist_id, status)
181181

@@ -208,7 +208,7 @@ end function DBPUTQM
208208

209209
!> @brief Refer to page 46 of Silo's user guide (10/2007, v4.6) for
210210
!! information about this subroutine
211-
function DBPUTCURVE(dbid, curvename, lcurvename, xvals, yvals, &
211+
impure function DBPUTCURVE(dbid, curvename, lcurvename, xvals, yvals, &
212212
datatype, npoints, optlist_id, status)
213213

214214
integer :: DBPUTCURVE
@@ -231,7 +231,7 @@ end function DBPUTCURVE
231231

232232
!> @brief Refer to page 130 of Silo's user guide (10/2007, v4.6)
233233
!! for information about this subroutine
234-
function DBPUTMVAR(dbid, name, lname, nvar, varnames, lvarnames, &
234+
impure function DBPUTMVAR(dbid, name, lname, nvar, varnames, lvarnames, &
235235
vartypes, optlist_id, status)
236236

237237
integer :: DBPUTMVAR
@@ -254,7 +254,7 @@ end function DBPUTMVAR
254254

255255
!> @brief Refer to page 64 of Silo's user guide (10/2007, v4.6) for
256256
!! information about this subroutine
257-
function DBPUTQV1(dbid, name, lname, meshname, lmeshname, var, &
257+
impure function DBPUTQV1(dbid, name, lname, meshname, lmeshname, var, &
258258
dims, ndims, mixvar, mixlen, datatype, &
259259
centering, optlist_id, status)
260260

@@ -283,7 +283,7 @@ end function DBPUTQV1
283283

284284
!> @brief Refer to page 31 of Silo's user guide (10/2007, v4.6) for
285285
!! information about this subroutine
286-
function DBCLOSE(dbid)
286+
impure function DBCLOSE(dbid)
287287

288288
integer :: DBCLOSE
289289
integer, intent(IN) :: dbid

0 commit comments

Comments
 (0)