Skip to content
Merged
Show file tree
Hide file tree
Changes from 40 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
bac5df7
Frontier Benchmarking (#453)
Jun 11, 2025
819e599
fixed frontier setup
Jun 11, 2025
17db70d
reduced frontier to bench on GPU
Malmahrouqi3 Jun 12, 2025
b28167f
updated frontier job instructions
Malmahrouqi3 Jun 12, 2025
ae04e14
fixed syntax in ymal file
Malmahrouqi3 Jun 12, 2025
8fb75b6
matrix.device synatx error addressed
Malmahrouqi3 Jun 12, 2025
57da396
another fix
Malmahrouqi3 Jun 12, 2025
0432f1f
fix
Malmahrouqi3 Jun 12, 2025
08052ea
matrix.device
Malmahrouqi3 Jun 12, 2025
b66403b
removed matrix.device - flatened
Malmahrouqi3 Jun 12, 2025
e49d55f
removed leading to syntax error
Malmahrouqi3 Jun 12, 2025
03034d7
replaced matrix.name to matrix.cluster to avoid syntax error
Malmahrouqi3 Jun 12, 2025
02688d9
Update bench.yml
Malmahrouqi3 Jun 16, 2025
c6258a6
just some ()
Malmahrouqi3 Jun 16, 2025
11fcf9e
Merge branch 'master' into frontier-CI2
Malmahrouqi3 Jun 17, 2025
4cbab63
Update bench.yml
Malmahrouqi3 Jun 17, 2025
51d34ae
Merge branch 'master' into frontier-CI2
sbryngelson Jun 18, 2025
a8268f3
Update submit-bench.sh
sbryngelson Jun 18, 2025
8dcf100
Update submit-bench.sh
Malmahrouqi3 Jun 18, 2025
bde0c17
Merge branch 'master' into frontier-CI2
sbryngelson Jun 18, 2025
def52bc
Merge branch 'master' into frontier-CI2
sbryngelson Jun 21, 2025
88fcf35
Merge branch 'master' into frontier-CI2
sbryngelson Jun 21, 2025
ea02640
Merge branch 'master' into frontier-CI2
sbryngelson Jun 21, 2025
f705d7f
Normalize line endings to UNIX format
Malmahrouqi3 Jun 22, 2025
53eec50
removed commented Phoenix part
Malmahrouqi3 Jun 22, 2025
93e0fac
removed file changes check
Malmahrouqi3 Jun 22, 2025
c7360eb
undo phoenix stuff
Malmahrouqi3 Jun 22, 2025
cc318bc
Revert "undo phoenix stuff"
Malmahrouqi3 Jun 22, 2025
5c8b925
Revert "Normalize line endings to UNIX format"
Malmahrouqi3 Jun 22, 2025
3c06357
some stuff
Malmahrouqi3 Jun 22, 2025
c6b1ecf
removed parts
Malmahrouqi3 Jun 22, 2025
3317bdb
Revert "Update submit-bench.sh"
Malmahrouqi3 Jun 22, 2025
27aa35b
undo things in test.yml
Malmahrouqi3 Jun 22, 2025
77b30e3
Merge branch 'master' into frontier-CI2
Malmahrouqi3 Jun 22, 2025
6158e12
fixing
sbryngelson Jun 23, 2025
5c6acc0
add build script
wilfonba Jun 25, 2025
32a8292
Merge branch 'master' into frontier-CI2
sbryngelson Jun 28, 2025
b842f21
Merge branch 'master' into frontier-CI2
sbryngelson Jul 3, 2025
405c39c
Update bench.yml
sbryngelson Jul 3, 2025
98437d2
Update submit-bench.sh
sbryngelson Jul 3, 2025
e7e7de8
Update bench.sh
sbryngelson Jul 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 39 additions & 11 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,40 @@ jobs:
filters: ".github/file-filter.yml"

self:
name: Georgia Tech | Phoenix (NVHPC)
name: "${{ matrix.name }} (${{ matrix.device }})"
if: ${{ github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' && (
(github.event_name == 'pull_request_review' && github.event.review.state == 'approved') ||
(github.event_name == 'pull_request' && github.event.pull_request.user.login == 'sbryngelson')
) }}
) }}
needs: file-changes
strategy:
matrix:
device: ['cpu', 'gpu']
fail-fast: false
matrix:
include:
- cluster: phoenix
name: Georgia Tech | Phoenix (NVHPC)
group: phoenix
labels: gt
flag: p
device: cpu
build_script: ""
- cluster: phoenix
name: Georgia Tech | Phoenix (NVHPC)
group: phoenix
labels: gt
flag: p
device: gpu
build_script: ""
- cluster: frontier
name: Oak Ridge | Frontier (CCE)
group: phoenix
labels: frontier
flag: f
device: gpu
build_script: "bash .github/workflows/frontier/build.sh gpu bench"
runs-on:
group: phoenix
labels: gt
group: ${{ matrix.group }}
labels: ${{ matrix.labels }}
timeout-minutes: 1400
env:
ACTIONS_RUNNER_FORCE_ACTIONS_NODE_VERSION: node16
Expand All @@ -53,15 +74,22 @@ jobs:
ref: master
path: master

- name: Setup & Build
if: matrix.build_script != ''
run: |
(cd pr && ${{ matrix.build_script }}) &
(cd master && ${{ matrix.build_script }}) &
wait %1 && wait %2

- name: Bench (Master v. PR)
run: |
(cd pr && bash .github/workflows/phoenix/submit-bench.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}) &
(cd master && bash .github/workflows/phoenix/submit-bench.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}) &
(cd pr && bash .github/workflows/${{ matrix.cluster }}/submit-bench.sh .github/workflows/${{ matrix.cluster }}/bench.sh ${{ matrix.device }}) &
(cd master && bash .github/workflows/${{ matrix.cluster }}/submit-bench.sh .github/workflows/${{ matrix.cluster }}/bench.sh ${{ matrix.device }}) &
wait %1 && wait %2

- name: Generate & Post Comment
run: |
(cd pr && . ./mfc.sh load -c p -m g)
(cd pr && . ./mfc.sh load -c ${{ matrix.flag }} -m g)
(cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}.yaml ../pr/bench-${{ matrix.device }}.yaml)

- name: Print Logs
Expand All @@ -72,9 +100,9 @@ jobs:

- name: Archive Logs
uses: actions/upload-artifact@v4
if: always()
if: always()
with:
name: logs-${{ matrix.device }}
name: ${{ matrix.cluster }}-${{ matrix.device }}
path: |
pr/bench-${{ matrix.device }}.*
pr/build/benchmarks/*
Expand Down
16 changes: 16 additions & 0 deletions .github/workflows/frontier/bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

n_ranks=12

if [ "$job_device" == "gpu" ]; then
gpus=$(rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' ')
n_ranks=$(echo "$gpus" | wc -w) # number of GPUs on node
gpu_ids=$(echo "$gpus" | tr ' ' '\n' | tr '\n' ' ' | sed 's/ $//') # GPU IDs from rocm-smi
device_opts="--gpu -g $gpu_ids"
fi

if [ "$job_device" == "gpu" ]; then
./mfc.sh bench --mem 12 -j $n_ranks -o "$job_slug.yaml" -- -c frontier $device_opts -n $n_ranks
else
./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c frontier $device_opts -n $n_ranks
fi
10 changes: 9 additions & 1 deletion .github/workflows/frontier/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,12 @@ if [ "$1" = "gpu" ]; then
fi

. ./mfc.sh load -c f -m g
./mfc.sh test --dry-run -j 8 $build_opts

if [ "$2" == "bench" ]; then
for dir in benchmarks/*/; do
dirname=$(basename "$dir")
./mfc.sh run "$dir/case.py" --case-optimization -j 8 --dry-run $build_opts
done
else
./mfc.sh test --dry-run -j 8 $build_opts
fi
54 changes: 54 additions & 0 deletions .github/workflows/frontier/submit-bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/bash

set -e

usage() {
echo "Usage: $0 [script.sh] [cpu|gpu]"
}

if [ ! -z "$1" ]; then
sbatch_script_contents=`cat $1`
else
usage
exit 1
fi

if [ "$2" = "cpu" ]; then
sbatch_device_opts="\
#SBATCH -n 32 # Number of cores required"
elif [ "$2" = "gpu" ]; then
sbatch_device_opts="\
#SBATCH -n 8 # Number of cores required"
else
usage; exit 1
fi


job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"

sbatch <<EOT
#!/bin/bash
#SBATCH -JMFC-$job_slug # Job name
#SBATCH -A CFD154 # charge account
#SBATCH -N 1 # Number of nodes required
$sbatch_device_opts
#SBATCH -t 03:59:00 # Duration of the job (Ex: 15 mins)
#SBATCH -o$job_slug.out # Combined output and error messages file
#SBATCH -p extended # Extended partition for shorter queues
#SBATCH -W # Do not exit until the submitted job terminates.

set -e
set -x

cd "\$SLURM_SUBMIT_DIR"
echo "Running in $(pwd):"

job_slug="$job_slug"
job_device="$2"

. ./mfc.sh load -c f -m g

$sbatch_script_contents

EOT

2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,4 +128,4 @@ jobs:
if: always()
with:
name: logs-${{ strategy.job-index }}-${{ matrix.device }}
path: test-${{ matrix.device }}.out
path: test-${{ matrix.device }}.out
Loading