diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh index c2e1893427..af272564e8 100644 --- a/.github/workflows/frontier/build.sh +++ b/.github/workflows/frontier/build.sh @@ -13,5 +13,6 @@ if [ "$2" == "bench" ]; then ./mfc.sh run "$dir/case.py" --case-optimization -j 8 --dry-run $build_opts done else - ./mfc.sh test --dry-run -j 8 $build_opts + ./mfc.sh test -a --dry-run --rdma-mpi --generate -j 8 $build_opts fi + diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh index 57481fa949..aa977aa004 100644 --- a/.github/workflows/frontier/test.sh +++ b/.github/workflows/frontier/test.sh @@ -4,7 +4,7 @@ gpus=`rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c` if [ "$job_device" = "gpu" ]; then - ./mfc.sh test --max-attempts 3 -j $ngpus -- -c frontier + ./mfc.sh test -a --rdma-mpi --max-attempts 3 -j $ngpus -- -c frontier else - ./mfc.sh test --max-attempts 3 -j 32 -- -c frontier + ./mfc.sh test -a --rdma-mpi --max-attempts 3 -j 32 -- -c frontier fi diff --git a/docs/documentation/testing.md b/docs/documentation/testing.md index 15b1ca3618..e139c5b201 100644 --- a/docs/documentation/testing.md +++ b/docs/documentation/testing.md @@ -16,6 +16,7 @@ A test is considered passing when our error tolerances are met in order to maint - `--percent` (`%`) to specify a percentage of the test suite to select at random and test - `--max-attempts` (`-m`) the maximum number of attempts to make on a test before considering it failed - `--no-examples` skips the testing of cases in the examples folder +- `--rdma-mpi` runs additional tests where RDMA MPI is enabled. To specify a computer, pass the `-c` flag to `./mfc.sh run` like so: ```shell diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py index d3de6769c3..7659132bb6 100644 --- a/toolchain/mfc/args.py +++ b/toolchain/mfc/args.py @@ -83,14 +83,15 @@ def add_common_arguments(p, mask = None): test.add_argument("-l", "--list", action="store_true", help="List all available tests.") test.add_argument("-f", "--from", default=test_cases[0].get_uuid(), type=str, help="First test UUID to run.") test.add_argument("-t", "--to", default=test_cases[-1].get_uuid(), type=str, help="Last test UUID to run.") - test.add_argument("-o", "--only", nargs="+", type=str, default=[], metavar="L", help="Only run tests with specified properties.") - test.add_argument("-a", "--test-all", action="store_true", default=False, help="Run the Post Process Tests too.") - test.add_argument("-%", "--percent", type=int, default=100, help="Percentage of tests to run.") - test.add_argument("-m", "--max-attempts", type=int, default=1, help="Maximum number of attempts to run a test.") - test.add_argument( "--no-build", action="store_true", default=False, help="(Testing) Do not rebuild MFC.") - test.add_argument( "--no-examples", action="store_true", default=False, help="Do not test example cases." ) - test.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.") - test.add_argument( "--dry-run", action="store_true", default=False, help="Build and generate case files but do not run tests.") + test.add_argument("-o", "--only", nargs="+", type=str, default=[], metavar="L", help="Only run tests with specified properties.") + test.add_argument("-a", "--test-all", action="store_true", default=False, help="Run the Post Process Tests too.") + test.add_argument("-%", "--percent", type=int, default=100, help="Percentage of tests to run.") + test.add_argument("-m", "--max-attempts", type=int, default=1, help="Maximum number of attempts to run a test.") + test.add_argument( "--rdma-mpi", action="store_true", default=False, help="Run tests with RDMA MPI enabled") + test.add_argument( "--no-build", action="store_true", default=False, help="(Testing) Do not rebuild MFC.") + test.add_argument( "--no-examples", action="store_true", default=False, help="Do not test example cases." ) + test.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.") + test.add_argument( "--dry-run", action="store_true", default=False, help="Build and generate case files but do not run tests.") test_meg = test.add_mutually_exclusive_group() test_meg.add_argument("--generate", action="store_true", default=False, help="(Test Generation) Generate golden files.") diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py index 46d9cd0fd8..80a16600de 100644 --- a/toolchain/mfc/test/case.py +++ b/toolchain/mfc/test/case.py @@ -132,7 +132,7 @@ def run(self, targets: List[Union[str, MFCTarget]], gpus: Set[int]) -> subproces filepath = f'{self.get_dirpath()}/case.py' tasks = ["-n", str(self.ppn)] jobs = ["-j", str(ARG("jobs"))] if ARG("case_optimization") else [] - case_optimization = ["--case-optimization"] if ARG("case_optimization") else [] + case_optimization = ["--case-optimization"] if ARG("case_optimization") else [] if self.params.get("bubbles_lagrange", 'F') == 'T': input_bubbles_lagrange(self) diff --git a/toolchain/mfc/test/cases.py b/toolchain/mfc/test/cases.py index 99bd686259..8c1de1acf2 100644 --- a/toolchain/mfc/test/cases.py +++ b/toolchain/mfc/test/cases.py @@ -346,9 +346,10 @@ def alter_3d(): def alter_ppn(dimInfo): if len(dimInfo[0]) == 3: cases.append(define_case_d(stack, '2 MPI Ranks', {'m': 29, 'n': 29, 'p': 49}, ppn=2)) + cases.append(define_case_d(stack, '2 MPI Ranks -> RDMA MPI', {'m': 29, 'n': 29, 'p': 49, 'rdma_mpi': 'T'}, ppn=2)) else: cases.append(define_case_d(stack, '2 MPI Ranks', {}, ppn=2)) - + cases.append(define_case_d(stack, '2 MPI Ranks -> RDMA MPI', {'rdma_mpi': 'T'}, ppn=2)) def alter_ib(dimInfo, six_eqn_model=False): for slip in [True, False]: diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py index 6e3ec8cb6e..23f54ea3c9 100644 --- a/toolchain/mfc/test/test.py +++ b/toolchain/mfc/test/test.py @@ -58,7 +58,12 @@ def __filter(cases_) -> typing.List[TestCase]: if case.ppn > 1 and not ARG("mpi"): cases.remove(case) skipped_cases.append(case) - + + for case in cases[:]: + if "RDMA MPI" in case.trace: + cases.remove(case) + skipped_cases.append(case) + for case in cases[:]: if ARG("single"): skip = ['low_Mach', 'Hypoelasticity', 'teno', 'Chemistry', 'Phase Change model 6' @@ -191,6 +196,7 @@ def _handle_case(case: TestCase, devices: typing.Set[int]): return cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices) + out_filepath = os.path.join(case.get_dirpath(), "out_pre_sim.txt") common.file_write(out_filepath, cmd.stdout)