revert CI changes

Archith Iyer · Archith Iyer · commit 2b2702ead894 · 2024-11-24T22:17:56.000-08:00
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -11,6 +11,7 @@ jobs:
     steps:
       - name: Clone
         uses: actions/checkout@v4
+
       - name: Detect Changes
         uses: dorny/paths-filter@v3
         id: changes
@@ -25,7 +26,7 @@ jobs:
       matrix:
         device: ['cpu', 'gpu']
     runs-on:
-      group: phoenix
+      group:  phoenix
       labels: gt
     timeout-minutes: 1400
     env:
@@ -36,43 +37,32 @@ jobs:
         uses: actions/checkout@v4
         with:
           path: pr
+
       - name: Clone - Master
         uses: actions/checkout@v4
         with:
           repository: MFlowCode/MFC
           ref: master
           path: master
 
-      - name: Bench (Master vs PR)
+      - name: Bench (Master v. PR)
         run: |
-          (cd pr     && bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }} double) &
-          (cd master && bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }} double) &
+          (cd pr     && bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}) &
+          (cd master && bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}) &
           wait %1 && wait %2
 
-          (cd pr     && bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }} single) &
-          wait %3
-
       - name: Generate & Post Comment
         run: |
           (cd pr && . ./mfc.sh load -c p -m g)
-          (cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}-double.yaml ../pr/bench-${{ matrix.device }}-double.yaml)
-
-      - name: Check PR Single vs Double Precision
-        run: |
-          # Compare single and double precision within the PR
-          cd pr
-          . ./mfc.sh load -c p -m ${{ matrix.device }}
-          ./mfc.sh bench_diff bench-${{ matrix.device }}-double.yaml bench-${{ matrix.device }}-single.yaml
+          (cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}.yaml ../pr/bench-${{ matrix.device }}.yaml)
 
       - name: Archive Logs
         uses: actions/upload-artifact@v4
-        if: always()
+        if:   always()
         with:
-            name: logs-${{ matrix.device }}
-            path: |
-                pr/bench-${{ matrix.device }}-*.*
-                pr/build/benchmarks/*
-                master/bench-${{ matrix.device }}-*.*
-                master/build/benchmarks/*
-        
-
+          name: logs-${{ matrix.device }}
+          path: |
+            pr/bench-${{ matrix.device }}.*
+            pr/build/benchmarks/*
+            master/bench-${{ matrix.device }}.*
+            master/build/benchmarks/*
diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh
@@ -1,32 +1,15 @@
 #!/bin/bash
 
-
-if [ -z "$job_device" ] || [ -z "$job_precision" ]; then
-    echo "Usage: $0 [cpu|gpu] [single|double]"
-    exit 1
-fi
-
 n_ranks=12
-precision_flag=""
-
-if [ "$job_precision" == "single" ]; then
-    precision_flag="--single"
-fi
 
 if [ "$job_device" == "gpu" ]; then
-    n_ranks=$(nvidia-smi -L | wc -l)
-    gpu_ids=$(seq -s ' ' 0 $(($n_ranks-1)))
+    n_ranks=$(nvidia-smi -L | wc -l)        # number of GPUs on node
+    gpu_ids=$(seq -s ' ' 0 $(($n_ranks-1))) # 0,1,2,...,gpu_count-1
     device_opts="--gpu -g $gpu_ids"
-else
-    device_opts=""
 fi
 
-mem_value=1
-if [ "$job_device" == "gpu" ]; then
-    mem_value=12
-fi
-
-./mfc.sh clean
-./mfc.sh build -j 8 -- $precision_flag
-./mfc.sh bench --mem $mem_value -j $(nproc) -o "bench-${job_device}-${job_precision}.yaml" -- $precision_flag -c phoenix $device_opts -n $n_ranks
-
+if ["$job_device" == "gpu"]; then
+    ./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix $device_opts -n $n_ranks
+else
+    ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix $device_opts -n $n_ranks
+fi
diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh
@@ -33,20 +33,7 @@ else
     exit 1
 fi
 
-# Set default precision to 'double' if not provided
-
-if [ -z "$3" ]; then
-    precision="double"
-else
-    if [ "$3" != "single" ] && [ "$3" != "double" ]; then
-        usage
-        exit 1
-    fi
-    precision="$3"
-fi
-
-
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$precision"
+job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"
 
 sbatch <<EOT
 #!/bin/bash
@@ -67,9 +54,9 @@ echo "Running in $(pwd):"
 
 job_slug="$job_slug"
 job_device="$2"
-job_precision="$precision"
 
 . ./mfc.sh load -c p -m $2
 
 $sbatch_script_contents
+
 EOT
diff --git a/toolchain/mfc/bench.py b/toolchain/mfc/bench.py
@@ -1,156 +1,170 @@
-import os, sys, uuid, subprocess, dataclasses, typing
+import os, sys, uuid, subprocess, dataclasses, typing, math
+
+import rich.table
+
 from .printer import cons
-from .state import ARG, CFG
-from .build import get_targets
-from .common import system, MFC_BENCH_FILEPATH, MFC_BUILD_DIR, format_list_to_string
-from .common import file_load_yaml, file_dump_yaml, create_directory
-from .common import MFCException
+from .state   import ARG, CFG
+from .build   import get_targets, DEFAULT_TARGETS, SIMULATION
+from .common  import system, MFC_BENCH_FILEPATH, MFC_BUILD_DIR, format_list_to_string
+from .common  import file_load_yaml, file_dump_yaml, create_directory
+from .common  import MFCException
+
 
 @dataclasses.dataclass
 class BenchCase:
     slug: str
     path: str
     args: typing.List[str]
 
-def bench(targets=None):
+
+def bench(targets = None):
     if targets is None:
         targets = ARG("targets")
 
-    precision = "single" if ARG("single") else "double"
-
-    additional_args = ARG("--")
-
     targets = get_targets(targets)
+
     bench_dirpath = os.path.join(MFC_BUILD_DIR, "benchmarks", str(uuid.uuid4())[:4])
     create_directory(bench_dirpath)
 
-    cons.print(f"[bold]Benchmarking {format_list_to_string(ARG('targets'), 'magenta')} in '{precision}' precision "
-               f"([magenta]{os.path.relpath(bench_dirpath)}[/magenta]):[/bold]")
+    cons.print()
+    cons.print(f"[bold]Benchmarking {format_list_to_string(ARG('targets'), 'magenta')} ([magenta]{os.path.relpath(bench_dirpath)}[/magenta]):[/bold]")
     cons.indent()
     cons.print()
 
-    CASES = [BenchCase(**case) for case in file_load_yaml(MFC_BENCH_FILEPATH)]
+    CASES = [ BenchCase(**case) for case in file_load_yaml(MFC_BENCH_FILEPATH) ]
 
     for case in CASES:
-        case.args = case.args + additional_args  
-        if precision == "single":
-            case.args.append("--single")  
+        case.args = case.args + ARG("--")
         case.path = os.path.abspath(case.path)
 
     results = {
         "metadata": {
             "invocation": sys.argv[1:],
-            "lock": dataclasses.asdict(CFG()),
-            "precision": precision
+            "lock":       dataclasses.asdict(CFG())
         },
         "cases": {},
     }
 
     for i, case in enumerate(CASES):
-        summary_filepath = os.path.join(bench_dirpath, f"{case.slug}-{precision}.yaml")
-        log_filepath = os.path.join(bench_dirpath, f"{case.slug}-{precision}.out")
+        summary_filepath = os.path.join(bench_dirpath, f"{case.slug}.yaml")
+        log_filepath     = os.path.join(bench_dirpath, f"{case.slug}.out")
 
-        cons.print(f"{str(i + 1).zfill(len(str(len(CASES))))}/{len(CASES)}: {case.slug} @ [bold]{os.path.relpath(case.path)}[/bold]")
+        cons.print(f"{str(i+1).zfill(len(CASES) // 10 + 1)}/{len(CASES)}: {case.slug} @ [bold]{os.path.relpath(case.path)}[/bold]")
         cons.indent()
+        cons.print()
         cons.print(f"> Log:     [bold]{os.path.relpath(log_filepath)}[/bold]")
         cons.print(f"> Summary: [bold]{os.path.relpath(summary_filepath)}[/bold]")
 
         with open(log_filepath, "w") as log_file:
-            command = [
-                "./mfc.sh", "run", case.path, "--case-optimization",
-                "--targets"
-            ] + [t.name for t in targets] + [
-                "--output-summary", summary_filepath
-            ] + case.args  
-
-            cons.print(f"Case precision: {precision}")
-            cons.print(f"Case args: {case.args}")
-            cons.print(f"Running command: {' '.join(command)}")
-
             system(
-                command,
+                ["./mfc.sh", "run", case.path, "--case-optimization"] +
+                ["--targets"] + [t.name for t in targets] +
+                ["--output-summary", summary_filepath] +
+                case.args +
+                ["--", "--gbpp", ARG('mem')],
                 stdout=log_file,
-                stderr=subprocess.STDOUT
-            )
+                stderr=subprocess.STDOUT)
 
         results["cases"][case.slug] = {
-            "description": dataclasses.asdict(case),
+            "description":    dataclasses.asdict(case),
             "output_summary": file_load_yaml(summary_filepath),
         }
-        cons.unindent()
 
     file_dump_yaml(ARG("output"), results)
+
     cons.print(f"Wrote results to [bold magenta]{os.path.relpath(ARG('output'))}[/bold magenta].")
+
     cons.unindent()
 
+
+# TODO: This function is too long and not nicely written at all. Someone should
+#       refactor it...
+# pylint: disable=too-many-branches
 def diff():
-    """
-    Compares the results between two benchmark YAML files (lhs vs rhs).
-    Checks both PR vs master and PR single vs PR double precision.
-    """
     lhs, rhs = file_load_yaml(ARG("lhs")), file_load_yaml(ARG("rhs"))
 
-    lhs_precision = lhs["metadata"].get("precision", "double")
-    rhs_precision = rhs["metadata"].get("precision", "double")
-
-    is_pr_single_vs_double = lhs_precision == "double" and rhs_precision == "single"
-
-    cons.print(f"[bold]Comparing Benchmarks: Speedups from [magenta]{os.path.relpath(ARG('lhs'))}[/magenta] to "
-               f"[magenta]{os.path.relpath(ARG('rhs'))}[/magenta][/bold]")
-
-    if lhs["metadata"] != rhs["metadata"] and not is_pr_single_vs_double:
+    cons.print(f"[bold]Comparing Benchmarks: Speedups from [magenta]{os.path.relpath(ARG('lhs'))}[/magenta] to [magenta]{os.path.relpath(ARG('rhs'))}[/magenta] are displayed below. Thus, numbers > 1 represent increases in performance.[/bold]")
+    if lhs["metadata"] != rhs["metadata"]:
         def _lock_to_str(lock):
             return ' '.join([f"{k}={v}" for k, v in lock.items()])
 
         cons.print(f"""\
 [bold yellow]Warning[/bold yellow]: Metadata in lhs and rhs are not equal.
+    This could mean that the benchmarks are not comparable (e.g. one was run on CPUs and the other on GPUs).
     lhs:
     * Invocation: [magenta]{' '.join(lhs['metadata']['invocation'])}[/magenta]
     * Modes:      {_lock_to_str(lhs['metadata']['lock'])}
-    * Precision:  {lhs_precision}
     rhs:
     * Invocation: {' '.join(rhs['metadata']['invocation'])}
     * Modes:      [magenta]{_lock_to_str(rhs['metadata']['lock'])}[/magenta]
-    * Precision:  {rhs_precision}
         """)
 
     slugs = set(lhs["cases"].keys()) & set(rhs["cases"].keys())
     if len(slugs) not in [len(lhs["cases"]), len(rhs["cases"])]:
         cons.print(f"""\
 [bold yellow]Warning[/bold yellow]: Cases in lhs and rhs are not equal.
-    Using intersection: {slugs} with {len(slugs)} elements.""")
+    * rhs cases: {', '.join(set(rhs['cases'].keys()) - slugs)}.
+    * lhs cases: {', '.join(set(lhs['cases'].keys()) - slugs)}.
+    Using intersection: {slugs} with {len(slugs)} elements.
+        """)
 
     table = rich.table.Table(show_header=True, box=rich.table.box.SIMPLE)
-    table.add_column("[bold]Case[/bold]", justify="left")
-    table.add_column("[bold]Speedup (Exec)[/bold]", justify="right")
-    table.add_column("[bold]Speedup (Grind)[/bold]", justify="right")
+    table.add_column("[bold]Case[/bold]",    justify="left")
+    table.add_column("[bold]Pre Process[/bold]", justify="right")
+    table.add_column("[bold]Simulation[/bold]", justify="right")
+    table.add_column("[bold]Post Process[/bold]", justify="right")
 
     err = 0
 
     for slug in slugs:
         lhs_summary = lhs["cases"][slug]["output_summary"]
         rhs_summary = rhs["cases"][slug]["output_summary"]
 
-        try:
-            exec_speedup = lhs_summary["exec"] / rhs_summary["exec"]
-            grind_speedup = lhs_summary["grind"] / rhs_summary["grind"]
+        speedups = ['N/A', 'N/A', 'N/A']
+
+        for i, target in enumerate(sorted(DEFAULT_TARGETS, key=lambda t: t.runOrder)):
+            if (target.name not in lhs_summary) or (target.name not in rhs_summary):
 
-            if is_pr_single_vs_double and exec_speedup < SINGLE_PRECISION_SPEEDUP_THRESHOLD:
-                cons.print(f"[bold red]Error[/bold red]: Case {slug} failed speedup requirement: "
-                           f"Exec speedup {exec_speedup:.2f} < {SINGLE_PRECISION_SPEEDUP_THRESHOLD}.")
                 err = 1
 
-            table.add_row(slug, f"{exec_speedup:.2f}x", f"{grind_speedup:.2f}x")
+                if target.name not in lhs_summary:
+                    cons.print(f"{target.name} not present in lhs_summary - Case: {slug}")
 
-        except KeyError as e:
-            table.add_row(slug, "Error", "Error")
-            cons.print(f"[bold yellow]Warning[/bold yellow]: Missing key {e} for case {slug}.")
-        except ZeroDivisionError:
-            table.add_row(slug, "Inf", "Inf")
-            cons.print(f"[bold yellow]Warning[/bold yellow]: Zero execution time in case {slug}.")
+                if target.name not in rhs_summary:
+                    cons.print(f"{target.name} not present in rhs_summary - Case: {slug}")
 
-    cons.raw.print(table)
+                continue
+
+            if not math.isfinite(lhs_summary[target.name]["exec"]) or not math.isfinite(rhs_summary[target.name]["exec"]):
+                err = 1
+                cons.print(f"lhs_summary or rhs_summary reports non-real exec time for {target.name} - Case: {slug}")
+
+            exec_time_speedup = "N/A"
+            try:
+                exec_time_speedup = f'{lhs_summary[target.name]["exec"] / rhs_summary[target.name]["exec"]:.2f}'
+            except Exception as _:
+                err = 1
+                cons.print(f"lhs_summary or rhs_summary reports non-real exec time for {target.name} - Case: {slug}")
+
+            speedups[i] = f"Exec: {exec_time_speedup}"
+
+            if target == SIMULATION:
+                grind_time_speedup = "N/A"
+                if not math.isfinite(lhs_summary[target.name]["grind"]) or not math.isfinite(rhs_summary[target.name]["grind"]):
+                    err = 1
+                    cons.print(f"lhs_summary or rhs_summary reports non-real grind time for {target.name} - Case: {slug}")
+
+                try:
+                    grind_time_speedup = f'{lhs_summary[target.name]["grind"] / rhs_summary[target.name]["grind"]:.2f}'
+                except Exception as _:
+                    err = 1
+                    cons.print(f"lhs_summary or rhs_summary reports non-real grind time for {target.name} - Case: {slug}")
 
-    if err:
-        raise MFCException("Benchmarking failed: Some cases did not meet the performance requirements.")
+                speedups[i] += f" & Grind: {grind_time_speedup}"
+
+        table.add_row(f"[magenta]{slug}[/magenta]", *speedups)
+
+    cons.raw.print(table)
 
+    if err != 0:
+        raise MFCException("Benchmarking failed")