Skip to content

Commit 37cdc6a

Browse files
Malmahrouqi3sbryngelsonMohammed Said Hamed Humaid Al-Mahrouqimohdsaid497566
authored
Test Suite Flag --rdma-mpi Implemented (#598) (#878)
Co-authored-by: mohdsaid497566 <[email protected]> Co-authored-by: Spencer Bryngelson <[email protected]> Co-authored-by: Mohammed Said Hamed Humaid Al-Mahrouqi <[email protected]> Co-authored-by: mohdsaid497566 <[email protected]>
1 parent 16de11c commit 37cdc6a

File tree

7 files changed

+24
-14
lines changed

7 files changed

+24
-14
lines changed

.github/workflows/frontier/build.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,6 @@ if [ "$2" == "bench" ]; then
1313
./mfc.sh run "$dir/case.py" --case-optimization -j 8 --dry-run $build_opts
1414
done
1515
else
16-
./mfc.sh test --dry-run -j 8 $build_opts
16+
./mfc.sh test -a --dry-run --rdma-mpi --generate -j 8 $build_opts
1717
fi
18+

.github/workflows/frontier/test.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ gpus=`rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n'
44
ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c`
55

66
if [ "$job_device" = "gpu" ]; then
7-
./mfc.sh test --max-attempts 3 -j $ngpus -- -c frontier
7+
./mfc.sh test -a --rdma-mpi --max-attempts 3 -j $ngpus -- -c frontier
88
else
9-
./mfc.sh test --max-attempts 3 -j 32 -- -c frontier
9+
./mfc.sh test -a --rdma-mpi --max-attempts 3 -j 32 -- -c frontier
1010
fi

docs/documentation/testing.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ A test is considered passing when our error tolerances are met in order to maint
1616
- `--percent` (`%`) to specify a percentage of the test suite to select at random and test
1717
- `--max-attempts` (`-m`) the maximum number of attempts to make on a test before considering it failed
1818
- `--no-examples` skips the testing of cases in the examples folder
19+
- `--rdma-mpi` runs additional tests where RDMA MPI is enabled.
1920

2021
To specify a computer, pass the `-c` flag to `./mfc.sh run` like so:
2122
```shell

toolchain/mfc/args.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,15 @@ def add_common_arguments(p, mask = None):
8383
test.add_argument("-l", "--list", action="store_true", help="List all available tests.")
8484
test.add_argument("-f", "--from", default=test_cases[0].get_uuid(), type=str, help="First test UUID to run.")
8585
test.add_argument("-t", "--to", default=test_cases[-1].get_uuid(), type=str, help="Last test UUID to run.")
86-
test.add_argument("-o", "--only", nargs="+", type=str, default=[], metavar="L", help="Only run tests with specified properties.")
87-
test.add_argument("-a", "--test-all", action="store_true", default=False, help="Run the Post Process Tests too.")
88-
test.add_argument("-%", "--percent", type=int, default=100, help="Percentage of tests to run.")
89-
test.add_argument("-m", "--max-attempts", type=int, default=1, help="Maximum number of attempts to run a test.")
90-
test.add_argument( "--no-build", action="store_true", default=False, help="(Testing) Do not rebuild MFC.")
91-
test.add_argument( "--no-examples", action="store_true", default=False, help="Do not test example cases." )
92-
test.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.")
93-
test.add_argument( "--dry-run", action="store_true", default=False, help="Build and generate case files but do not run tests.")
86+
test.add_argument("-o", "--only", nargs="+", type=str, default=[], metavar="L", help="Only run tests with specified properties.")
87+
test.add_argument("-a", "--test-all", action="store_true", default=False, help="Run the Post Process Tests too.")
88+
test.add_argument("-%", "--percent", type=int, default=100, help="Percentage of tests to run.")
89+
test.add_argument("-m", "--max-attempts", type=int, default=1, help="Maximum number of attempts to run a test.")
90+
test.add_argument( "--rdma-mpi", action="store_true", default=False, help="Run tests with RDMA MPI enabled")
91+
test.add_argument( "--no-build", action="store_true", default=False, help="(Testing) Do not rebuild MFC.")
92+
test.add_argument( "--no-examples", action="store_true", default=False, help="Do not test example cases." )
93+
test.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.")
94+
test.add_argument( "--dry-run", action="store_true", default=False, help="Build and generate case files but do not run tests.")
9495

9596
test_meg = test.add_mutually_exclusive_group()
9697
test_meg.add_argument("--generate", action="store_true", default=False, help="(Test Generation) Generate golden files.")

toolchain/mfc/test/case.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int]) -> subproces
132132
filepath = f'{self.get_dirpath()}/case.py'
133133
tasks = ["-n", str(self.ppn)]
134134
jobs = ["-j", str(ARG("jobs"))] if ARG("case_optimization") else []
135-
case_optimization = ["--case-optimization"] if ARG("case_optimization") else []
135+
case_optimization = ["--case-optimization"] if ARG("case_optimization") else []
136136

137137
if self.params.get("bubbles_lagrange", 'F') == 'T':
138138
input_bubbles_lagrange(self)

toolchain/mfc/test/cases.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,9 +346,10 @@ def alter_3d():
346346
def alter_ppn(dimInfo):
347347
if len(dimInfo[0]) == 3:
348348
cases.append(define_case_d(stack, '2 MPI Ranks', {'m': 29, 'n': 29, 'p': 49}, ppn=2))
349+
cases.append(define_case_d(stack, '2 MPI Ranks -> RDMA MPI', {'m': 29, 'n': 29, 'p': 49, 'rdma_mpi': 'T'}, ppn=2))
349350
else:
350351
cases.append(define_case_d(stack, '2 MPI Ranks', {}, ppn=2))
351-
352+
cases.append(define_case_d(stack, '2 MPI Ranks -> RDMA MPI', {'rdma_mpi': 'T'}, ppn=2))
352353

353354
def alter_ib(dimInfo, six_eqn_model=False):
354355
for slip in [True, False]:

toolchain/mfc/test/test.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,12 @@ def __filter(cases_) -> typing.List[TestCase]:
5858
if case.ppn > 1 and not ARG("mpi"):
5959
cases.remove(case)
6060
skipped_cases.append(case)
61-
61+
62+
for case in cases[:]:
63+
if "RDMA MPI" in case.trace:
64+
cases.remove(case)
65+
skipped_cases.append(case)
66+
6267
for case in cases[:]:
6368
if ARG("single"):
6469
skip = ['low_Mach', 'Hypoelasticity', 'teno', 'Chemistry', 'Phase Change model 6'
@@ -191,6 +196,7 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
191196
return
192197

193198
cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices)
199+
194200
out_filepath = os.path.join(case.get_dirpath(), "out_pre_sim.txt")
195201

196202
common.file_write(out_filepath, cmd.stdout)

0 commit comments

Comments
 (0)