diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 10e8e51681..f4404ff65f 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -1,6 +1,9 @@ name: 'Benchmark' -on: pull_request +on: + pull_request_review: + types: [submitted] + workflow_dispatch: jobs: file-changes: @@ -20,11 +23,12 @@ jobs: self: name: Georgia Tech | Phoenix (NVHPC) - if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' + if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' && ${{ github.event.review.state == 'approved' }} needs: file-changes strategy: matrix: device: ['cpu', 'gpu'] + fail-fast: false runs-on: group: phoenix labels: gt @@ -56,6 +60,12 @@ jobs: (cd pr && . ./mfc.sh load -c p -m g) (cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}.yaml ../pr/bench-${{ matrix.device }}.yaml) + - name: Print Logs + if: always() + run: | + cat pr/bench-${{ matrix.device }}.* 2>/dev/null || true + cat master/bench-${{ matrix.device }}.* 2>/dev/null || true + - name: Archive Logs uses: actions/upload-artifact@v4 if: always() diff --git a/toolchain/mfc/bench.py b/toolchain/mfc/bench.py index 0eb28d8e30..eb1b120035 100644 --- a/toolchain/mfc/bench.py +++ b/toolchain/mfc/bench.py @@ -125,7 +125,6 @@ def diff(): if not math.isfinite(lhs_summary[target.name]["exec"]) or not math.isfinite(rhs_summary[target.name]["exec"]): err = 1 cons.print(f"lhs_summary or rhs_summary reports non-real exec time for {target.name} - Case: {slug}") - try: exec_time_value = lhs_summary[target.name]["exec"] / rhs_summary[target.name]["exec"] if exec_time_value < 0.9: @@ -139,10 +138,10 @@ def diff(): grind_time_value = lhs_summary[target.name]["grind"] / rhs_summary[target.name]["grind"] speedups[i] += f" & Grind: {grind_time_value:.2f}" if grind_time_value <0.95: - raise MFCException(f"Benchmarking failed since grind time speedup for {target.name} below acceptable threshold (<0.98) - Case: {slug}") + cons.print(f"[bold red]Error[/bold red]: Benchmarking failed since grind time speedup for {target.name} below acceptable threshold (<0.95) - Case: {slug}") + err = 1 except Exception as _: - err = 1 - cons.print(f"lhs_summary or rhs_summary reports non-real grind time for {target.name} - Case: {slug}") + pass table.add_row(f"[magenta]{slug}[/magenta]", *speedups)