Skip to content

Commit 9bca3c1

Browse files
jithunnair-amdpytorchmergebot
authored andcommitted
[ROCm][CI] Expand trunk.yml coverage for ROCm (pytorch#168162)
We are expanding the test coverage on pre-submit (PR-based) trunk.yml runs for ROCm to the full list of unit tests. Consequently, we are swapping the labels (CSPs) for the rocm-mi300.yml and periodic-rocm-mi300.yml workflows to balance capacity concerns. We will be disabling the shadow workflow trunk-rocm-mi300.yml as it is not required due to this PR anymore. Fixes pytorch#166108 Pull Request resolved: pytorch#168162 Approved by: https://github.com/jeffdaily
1 parent 9177d6e commit 9bca3c1

File tree

3 files changed

+18
-13
lines changed

3 files changed

+18
-13
lines changed

.github/workflows/periodic-rocm-mi300.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,9 @@ jobs:
6060
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
6161
test-matrix: |
6262
{ include: [
63-
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4", owners: ["module:rocm", "oncall:distributed"] },
64-
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4", owners: ["module:rocm", "oncall:distributed"] },
65-
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4", owners: ["module:rocm", "oncall:distributed"] },
63+
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4.b", owners: ["module:rocm", "oncall:distributed"] },
64+
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4.b", owners: ["module:rocm", "oncall:distributed"] },
65+
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4.b", owners: ["module:rocm", "oncall:distributed"] },
6666
]}
6767
secrets: inherit
6868

.github/workflows/rocm-mi300.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,12 @@ jobs:
4848
docker-image-name: ci-image:pytorch-linux-noble-rocm-n-py3
4949
test-matrix: |
5050
{ include: [
51-
{ config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" },
52-
{ config: "default", shard: 2, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" },
53-
{ config: "default", shard: 3, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" },
54-
{ config: "default", shard: 4, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" },
55-
{ config: "default", shard: 5, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" },
56-
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" },
51+
{ config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1.b" },
52+
{ config: "default", shard: 2, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1.b" },
53+
{ config: "default", shard: 3, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1.b" },
54+
{ config: "default", shard: 4, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1.b" },
55+
{ config: "default", shard: 5, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1.b" },
56+
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1.b" },
5757
]}
5858
secrets: inherit
5959

.github/workflows/trunk.yml

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,15 @@ jobs:
203203
sync-tag: rocm-build
204204
test-matrix: |
205205
{ include: [
206-
{ config: "default", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
207-
{ config: "default", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
208-
{ config: "distributed", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.4" },
206+
{ config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" },
207+
{ config: "default", shard: 2, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" },
208+
{ config: "default", shard: 3, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" },
209+
{ config: "default", shard: 4, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" },
210+
{ config: "default", shard: 5, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" },
211+
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.gfx942.1" },
212+
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4" },
213+
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4" },
214+
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4" },
209215
]}
210216
secrets: inherit
211217

@@ -223,7 +229,6 @@ jobs:
223229
build-environment: linux-jammy-rocm-py3.10
224230
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
225231
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
226-
tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd inductor/test_torchinductor distributed/test_c10d_common distributed/test_c10d_nccl"
227232
secrets: inherit
228233

229234
inductor-build:

0 commit comments

Comments
 (0)