Skip to content

Commit d7e3f49

Browse files
jeffdailypytorchmergebot
authored andcommitted
[ROCm][CI] add mi355 to inductor perf test nightly (pytorch#165326)
Fixes #ISSUE_NUMBER Pull Request resolved: pytorch#165326 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily <[email protected]>
1 parent 08f09d9 commit d7e3f49

File tree

3 files changed

+159
-22
lines changed

3 files changed

+159
-22
lines changed

.github/pytorch-probot.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ ciflow_push_tags:
1515
- ciflow/inductor-micro-benchmark
1616
- ciflow/inductor-micro-benchmark-cpu-x86
1717
- ciflow/inductor-perf-compare
18-
- ciflow/inductor-perf-test-nightly-rocm
18+
- ciflow/inductor-perf-test-nightly-rocm-mi300
19+
- ciflow/inductor-perf-test-nightly-rocm-mi355
1920
- ciflow/inductor-perf-test-nightly-x86-zen
2021
- ciflow/inductor-periodic
2122
- ciflow/inductor-rocm
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
name: inductor-perf-nightly-rocm-mi300
2+
3+
on:
4+
push:
5+
tags:
6+
- ciflow/inductor-perf-test-nightly-rocm-mi300/*
7+
schedule:
8+
- cron: 15 0 * * *
9+
# NB: GitHub has an upper limit of 10 inputs here, so before we can sort it
10+
# out, let try to run torchao cudagraphs_low_precision as part of cudagraphs
11+
workflow_dispatch:
12+
inputs:
13+
training:
14+
description: Run training (on by default)?
15+
required: false
16+
type: boolean
17+
default: true
18+
inference:
19+
description: Run inference (on by default)?
20+
required: false
21+
type: boolean
22+
default: true
23+
default:
24+
description: Run inductor_default?
25+
required: false
26+
type: boolean
27+
default: false
28+
dynamic:
29+
description: Run inductor_dynamic_shapes?
30+
required: false
31+
type: boolean
32+
default: false
33+
cppwrapper:
34+
description: Run inductor_cpp_wrapper?
35+
required: false
36+
type: boolean
37+
default: false
38+
cudagraphs:
39+
description: Run inductor_cudagraphs?
40+
required: false
41+
type: boolean
42+
default: true
43+
freezing_cudagraphs:
44+
description: Run inductor_cudagraphs with freezing for inference?
45+
required: false
46+
type: boolean
47+
default: false
48+
aotinductor:
49+
description: Run aot_inductor for inference?
50+
required: false
51+
type: boolean
52+
default: false
53+
maxautotune:
54+
description: Run inductor_max_autotune?
55+
required: false
56+
type: boolean
57+
default: false
58+
benchmark_configs:
59+
description: The list of configs used the benchmark
60+
required: false
61+
type: string
62+
default: inductor_huggingface_perf_rocm_mi300,inductor_timm_perf_rocm_mi300,inductor_torchbench_perf_rocm_mi300
63+
64+
concurrency:
65+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
66+
cancel-in-progress: true
67+
68+
permissions: read-all
69+
70+
jobs:
71+
get-label-type:
72+
name: get-label-type
73+
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
74+
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
75+
with:
76+
triggering_actor: ${{ github.triggering_actor }}
77+
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
78+
curr_branch: ${{ github.head_ref || github.ref_name }}
79+
curr_ref_type: ${{ github.ref_type }}
80+
opt_out_experiments: lf
81+
82+
linux-jammy-rocm-py3_10-inductor-benchmark-build:
83+
if: github.repository_owner == 'pytorch'
84+
name: rocm-py3_10-inductor-benchmark-build
85+
uses: ./.github/workflows/_linux-build.yml
86+
with:
87+
build-environment: linux-jammy-rocm-py3_10
88+
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3-benchmarks
89+
test-matrix: |
90+
{ include: [
91+
{ config: "inductor_huggingface_perf_rocm_mi300", shard: 1, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
92+
{ config: "inductor_huggingface_perf_rocm_mi300", shard: 2, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
93+
{ config: "inductor_huggingface_perf_rocm_mi300", shard: 3, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
94+
{ config: "inductor_huggingface_perf_rocm_mi300", shard: 4, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
95+
{ config: "inductor_huggingface_perf_rocm_mi300", shard: 5, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
96+
{ config: "inductor_timm_perf_rocm_mi300", shard: 1, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
97+
{ config: "inductor_timm_perf_rocm_mi300", shard: 2, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
98+
{ config: "inductor_timm_perf_rocm_mi300", shard: 3, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
99+
{ config: "inductor_timm_perf_rocm_mi300", shard: 4, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
100+
{ config: "inductor_timm_perf_rocm_mi300", shard: 5, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
101+
{ config: "inductor_timm_perf_rocm_mi300", shard: 6, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
102+
{ config: "inductor_timm_perf_rocm_mi300", shard: 7, num_shards: 7, runner: "linux.rocm.gpu.gfx942.1" },
103+
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 1, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
104+
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 2, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
105+
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 3, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
106+
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 4, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
107+
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 5, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
108+
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 6, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
109+
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 7, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
110+
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 8, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
111+
{ config: "inductor_torchbench_perf_rocm_mi300", shard: 9, num_shards: 9, runner: "linux.rocm.gpu.gfx942.1" },
112+
]}
113+
secrets: inherit
114+
115+
linux-jammy-rocm-py3_10-inductor-benchmark-test:
116+
permissions:
117+
id-token: write
118+
contents: read
119+
name: rocm-py3_10-inductor-benchmark-test
120+
uses: ./.github/workflows/_rocm-test.yml
121+
needs: linux-jammy-rocm-py3_10-inductor-benchmark-build
122+
with:
123+
build-environment: linux-jammy-rocm-py3_10
124+
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
125+
docker-image: ${{ needs.linux-jammy-rocm-py3_10-inductor-benchmark-build.outputs.docker-image }}
126+
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-inductor-benchmark-build.outputs.test-matrix }}
127+
timeout-minutes: 720
128+
# Disable monitor in perf tests for more investigation
129+
disable-monitor: true
130+
monitor-log-interval: 10
131+
monitor-data-collect-interval: 2
132+
secrets: inherit

.github/workflows/inductor-perf-test-nightly-rocm.yml renamed to .github/workflows/inductor-perf-test-nightly-rocm-mi355.yml

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
name: inductor-perf-nightly-rocm
1+
name: inductor-perf-nightly-rocm-mi355
22

33
on:
44
push:
55
tags:
6-
- ciflow/inductor-perf-test-nightly-rocm/*
6+
- ciflow/inductor-perf-test-nightly-rocm-mi355/*
77
schedule:
8-
- cron: 0 7 * * 0,3
8+
- cron: 15 0 * * *
99
# NB: GitHub has an upper limit of 10 inputs here, so before we can sort it
1010
# out, let try to run torchao cudagraphs_low_precision as part of cudagraphs
1111
workflow_dispatch:
@@ -59,7 +59,7 @@ on:
5959
description: The list of configs used the benchmark
6060
required: false
6161
type: string
62-
default: inductor_huggingface_perf_rocm,inductor_timm_perf_rocm,inductor_torchbench_perf_rocm
62+
default: inductor_huggingface_perf_rocm_mi355,inductor_timm_perf_rocm_mi355,inductor_torchbench_perf_rocm_mi355
6363

6464
concurrency:
6565
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
@@ -88,23 +88,27 @@ jobs:
8888
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3-benchmarks
8989
test-matrix: |
9090
{ include: [
91-
{ config: "inductor_huggingface_perf_rocm", shard: 1, num_shards: 4, runner: "linux.rocm.gpu.gfx942.1" },
92-
{ config: "inductor_huggingface_perf_rocm", shard: 2, num_shards: 4, runner: "linux.rocm.gpu.gfx942.1" },
93-
{ config: "inductor_huggingface_perf_rocm", shard: 3, num_shards: 4, runner: "linux.rocm.gpu.gfx942.1" },
94-
{ config: "inductor_huggingface_perf_rocm", shard: 4, num_shards: 4, runner: "linux.rocm.gpu.gfx942.1" },
95-
{ config: "inductor_timm_perf_rocm", shard: 1, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
96-
{ config: "inductor_timm_perf_rocm", shard: 2, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
97-
{ config: "inductor_timm_perf_rocm", shard: 3, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
98-
{ config: "inductor_timm_perf_rocm", shard: 4, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
99-
{ config: "inductor_timm_perf_rocm", shard: 5, num_shards: 5, runner: "linux.rocm.gpu.gfx942.1" },
100-
{ config: "inductor_torchbench_perf_rocm", shard: 1, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
101-
{ config: "inductor_torchbench_perf_rocm", shard: 2, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
102-
{ config: "inductor_torchbench_perf_rocm", shard: 3, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
103-
{ config: "inductor_torchbench_perf_rocm", shard: 4, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
104-
{ config: "inductor_torchbench_perf_rocm", shard: 5, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
105-
{ config: "inductor_torchbench_perf_rocm", shard: 6, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
106-
{ config: "inductor_torchbench_perf_rocm", shard: 7, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
107-
{ config: "inductor_torchbench_perf_rocm", shard: 8, num_shards: 8, runner: "linux.rocm.gpu.gfx942.1" },
91+
{ config: "inductor_huggingface_perf_rocm_mi355", shard: 1, num_shards: 5, runner: "linux.rocm.gpu.mi355.2" },
92+
{ config: "inductor_huggingface_perf_rocm_mi355", shard: 2, num_shards: 5, runner: "linux.rocm.gpu.mi355.2" },
93+
{ config: "inductor_huggingface_perf_rocm_mi355", shard: 3, num_shards: 5, runner: "linux.rocm.gpu.mi355.2" },
94+
{ config: "inductor_huggingface_perf_rocm_mi355", shard: 4, num_shards: 5, runner: "linux.rocm.gpu.mi355.2" },
95+
{ config: "inductor_huggingface_perf_rocm_mi355", shard: 5, num_shards: 5, runner: "linux.rocm.gpu.mi355.2" },
96+
{ config: "inductor_timm_perf_rocm_mi355", shard: 1, num_shards: 7, runner: "linux.rocm.gpu.mi355.2" },
97+
{ config: "inductor_timm_perf_rocm_mi355", shard: 2, num_shards: 7, runner: "linux.rocm.gpu.mi355.2" },
98+
{ config: "inductor_timm_perf_rocm_mi355", shard: 3, num_shards: 7, runner: "linux.rocm.gpu.mi355.2" },
99+
{ config: "inductor_timm_perf_rocm_mi355", shard: 4, num_shards: 7, runner: "linux.rocm.gpu.mi355.2" },
100+
{ config: "inductor_timm_perf_rocm_mi355", shard: 5, num_shards: 7, runner: "linux.rocm.gpu.mi355.2" },
101+
{ config: "inductor_timm_perf_rocm_mi355", shard: 6, num_shards: 7, runner: "linux.rocm.gpu.mi355.2" },
102+
{ config: "inductor_timm_perf_rocm_mi355", shard: 7, num_shards: 7, runner: "linux.rocm.gpu.mi355.2" },
103+
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 1, num_shards: 9, runner: "linux.rocm.gpu.mi355.2" },
104+
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 2, num_shards: 9, runner: "linux.rocm.gpu.mi355.2" },
105+
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 3, num_shards: 9, runner: "linux.rocm.gpu.mi355.2" },
106+
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 4, num_shards: 9, runner: "linux.rocm.gpu.mi355.2" },
107+
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 5, num_shards: 9, runner: "linux.rocm.gpu.mi355.2" },
108+
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 6, num_shards: 9, runner: "linux.rocm.gpu.mi355.2" },
109+
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 7, num_shards: 9, runner: "linux.rocm.gpu.mi355.2" },
110+
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 8, num_shards: 9, runner: "linux.rocm.gpu.mi355.2" },
111+
{ config: "inductor_torchbench_perf_rocm_mi355", shard: 9, num_shards: 9, runner: "linux.rocm.gpu.mi355.2" },
108112
]}
109113
secrets: inherit
110114

0 commit comments

Comments
 (0)