Skip to content

Commit 8c33f83

Browse files
committed
[AMDGPU] Treat XDL ops as TRANS on gfx12
1 parent a5a33d8 commit 8c33f83

File tree

4 files changed

+29
-23
lines changed

4 files changed

+29
-23
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1717
#include "SIInstrInfo.h"
1818
#include "llvm/ADT/SetVector.h"
19+
#include "llvm/CodeGen/MachineInstr.h"
1920

2021
using namespace llvm;
2122

@@ -51,12 +52,15 @@ class AMDGPUInsertDelayAlu {
5152
enum DelayType { VALU, TRANS, SALU, OTHER };
5253

5354
// Get the delay type for an instruction with the specified TSFlags.
54-
static DelayType getDelayType(uint64_t TSFlags) {
55-
if (TSFlags & SIInstrFlags::TRANS)
55+
DelayType getDelayType(MachineInstr &MI) {
56+
if (SIInstrInfo::isTRANS(MI))
5657
return TRANS;
57-
if (TSFlags & SIInstrFlags::VALU)
58+
// XDL ops are treated the same as TRANS.
59+
if (SII->isXDL(MI))
60+
return TRANS;
61+
if (SIInstrInfo::isVALU(MI))
5862
return VALU;
59-
if (TSFlags & SIInstrFlags::SALU)
63+
if (SIInstrInfo::isSALU(MI))
6064
return SALU;
6165
return OTHER;
6266
}
@@ -343,7 +347,7 @@ class AMDGPUInsertDelayAlu {
343347
continue;
344348
}
345349

346-
DelayType Type = getDelayType(MI.getDesc().TSFlags);
350+
DelayType Type = getDelayType(MI);
347351

348352
if (instructionWaitsForVALU(MI)) {
349353
// Forget about all outstanding VALU delays.

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10262,6 +10262,8 @@ bool SIInstrInfo::isGlobalMemoryObject(const MachineInstr *MI) const {
1026210262

1026310263
bool SIInstrInfo::isXDL(const MachineInstr &MI) const {
1026410264
unsigned Opcode = MI.getOpcode();
10265+
if (AMDGPU::isGFX12(ST))
10266+
return isWMMA(MI) || isSWMMAC(MI) || isDOT(MI);
1026510267

1026610268
if (!SIInstrInfo::isMAI(MI) || isDGEMM(Opcode) ||
1026710269
Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||

llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<4 x half> %A, <8
2020
; GFX12-NEXT: s_wait_loadcnt 0x0
2121
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[20:23], v[0:1], v[2:5], v10
2222
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[24:27], v[0:1], v[2:5], v10 index_key:1
23-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
23+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
2424
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[28:31], v[0:1], v[2:5], v10 index_key:2
2525
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:3
2626
; GFX12-NEXT: global_store_b128 v[12:13], v[20:23], off
@@ -64,7 +64,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16_index_key(<4 x i16> %A, <8
6464
; GFX12-NEXT: s_wait_loadcnt 0x0
6565
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[20:23], v[0:1], v[2:5], v10
6666
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[24:27], v[0:1], v[2:5], v10 index_key:1
67-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
67+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
6868
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[28:31], v[0:1], v[2:5], v10 index_key:2
6969
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:3
7070
; GFX12-NEXT: global_store_b128 v[12:13], v[20:23], off
@@ -102,7 +102,7 @@ define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_index_key(<4 x half> %A, <8
102102
; GFX12-NEXT: s_wait_loadcnt 0x0
103103
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[8:9], v[0:1], v[2:5], v22
104104
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[18:19], v[0:1], v[2:5], v22 index_key:1
105-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
105+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
106106
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[20:21], v[0:1], v[2:5], v22 index_key:2
107107
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v22 index_key:3
108108
; GFX12-NEXT: global_store_b64 v[10:11], v[8:9], off
@@ -140,7 +140,7 @@ define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16_index_key(<4 x i16> %A, <8
140140
; GFX12-NEXT: s_wait_loadcnt 0x0
141141
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[8:9], v[0:1], v[2:5], v22
142142
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[18:19], v[0:1], v[2:5], v22 index_key:1
143-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
143+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
144144
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[20:21], v[0:1], v[2:5], v22 index_key:2
145145
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v22 index_key:3
146146
; GFX12-NEXT: global_store_b64 v[10:11], v[8:9], off
@@ -184,7 +184,7 @@ define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_index_key(i32 %A, <2 x i32>
184184
; GFX12-NEXT: s_wait_loadcnt 0x0
185185
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[17:20], v0, v[1:2], v7
186186
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[21:24], v0, v[1:2], v7 index_key:1
187-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
187+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
188188
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[25:28], v0, v[1:2], v7 index_key:2
189189
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:3
190190
; GFX12-NEXT: global_store_b128 v[9:10], v[17:20], off
@@ -280,7 +280,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8_index_key(i32 %A, <2 x i
280280
; GFX12-NEXT: s_wait_loadcnt 0x0
281281
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[17:20], v0, v[1:2], v7
282282
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[21:24], v0, v[1:2], v7 index_key:1
283-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
283+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
284284
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[25:28], v0, v[1:2], v7 index_key:2
285285
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:3
286286
; GFX12-NEXT: global_store_b128 v[9:10], v[17:20], off
@@ -324,7 +324,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8_index_key(i32 %A, <2 x i
324324
; GFX12-NEXT: s_wait_loadcnt 0x0
325325
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[17:20], v0, v[1:2], v7
326326
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[21:24], v0, v[1:2], v7 index_key:1
327-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
327+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
328328
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[25:28], v0, v[1:2], v7 index_key:2
329329
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:3
330330
; GFX12-NEXT: global_store_b128 v[9:10], v[17:20], off
@@ -368,7 +368,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8_index_key(i32 %A, <2 x i
368368
; GFX12-NEXT: s_wait_loadcnt 0x0
369369
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[17:20], v0, v[1:2], v7
370370
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[21:24], v0, v[1:2], v7 index_key:1
371-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
371+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
372372
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[25:28], v0, v[1:2], v7 index_key:2
373373
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:3
374374
; GFX12-NEXT: global_store_b128 v[9:10], v[17:20], off
@@ -412,7 +412,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8_index_key(i32 %A, <2 x i
412412
; GFX12-NEXT: s_wait_loadcnt 0x0
413413
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[17:20], v0, v[1:2], v7
414414
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[21:24], v0, v[1:2], v7 index_key:1
415-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
415+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
416416
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[25:28], v0, v[1:2], v7 index_key:2
417417
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:3
418418
; GFX12-NEXT: global_store_b128 v[9:10], v[17:20], off

llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<4 x half> %A, <8
2020
; GFX12-NEXT: s_wait_loadcnt 0x0
2121
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[20:23], v[0:1], v[2:5], v10
2222
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[24:27], v[0:1], v[2:5], v10 index_key:1
23-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
23+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
2424
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[28:31], v[0:1], v[2:5], v10 index_key:2
2525
; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:3
2626
; GFX12-NEXT: global_store_b128 v[12:13], v[20:23], off
@@ -64,7 +64,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16_index_key(<4 x i16> %A, <8
6464
; GFX12-NEXT: s_wait_loadcnt 0x0
6565
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[20:23], v[0:1], v[2:5], v10
6666
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[24:27], v[0:1], v[2:5], v10 index_key:1
67-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
67+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
6868
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[28:31], v[0:1], v[2:5], v10 index_key:2
6969
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:3
7070
; GFX12-NEXT: global_store_b128 v[12:13], v[20:23], off
@@ -102,7 +102,7 @@ define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_index_key(<4 x half> %A, <8
102102
; GFX12-NEXT: s_wait_loadcnt 0x0
103103
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[8:9], v[0:1], v[2:5], v22
104104
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[18:19], v[0:1], v[2:5], v22 index_key:1
105-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
105+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
106106
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[20:21], v[0:1], v[2:5], v22 index_key:2
107107
; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v22 index_key:3
108108
; GFX12-NEXT: global_store_b64 v[10:11], v[8:9], off
@@ -140,7 +140,7 @@ define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16_index_key(<4 x i16> %A, <8
140140
; GFX12-NEXT: s_wait_loadcnt 0x0
141141
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[8:9], v[0:1], v[2:5], v22
142142
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[18:19], v[0:1], v[2:5], v22 index_key:1
143-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
143+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
144144
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[20:21], v[0:1], v[2:5], v22 index_key:2
145145
; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v22 index_key:3
146146
; GFX12-NEXT: global_store_b64 v[10:11], v[8:9], off
@@ -184,7 +184,7 @@ define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_index_key(i32 %A, <2 x i32>
184184
; GFX12-NEXT: s_wait_loadcnt 0x0
185185
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[17:20], v0, v[1:2], v7
186186
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[21:24], v0, v[1:2], v7 index_key:1
187-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
187+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
188188
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[25:28], v0, v[1:2], v7 index_key:2
189189
; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:3
190190
; GFX12-NEXT: global_store_b128 v[9:10], v[17:20], off
@@ -280,7 +280,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8_index_key(i32 %A, <2 x i
280280
; GFX12-NEXT: s_wait_loadcnt 0x0
281281
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[17:20], v0, v[1:2], v7
282282
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[21:24], v0, v[1:2], v7 index_key:1
283-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
283+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
284284
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[25:28], v0, v[1:2], v7 index_key:2
285285
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:3
286286
; GFX12-NEXT: global_store_b128 v[9:10], v[17:20], off
@@ -324,7 +324,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8_index_key(i32 %A, <2 x i
324324
; GFX12-NEXT: s_wait_loadcnt 0x0
325325
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[17:20], v0, v[1:2], v7
326326
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[21:24], v0, v[1:2], v7 index_key:1
327-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
327+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
328328
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[25:28], v0, v[1:2], v7 index_key:2
329329
; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:3
330330
; GFX12-NEXT: global_store_b128 v[9:10], v[17:20], off
@@ -368,7 +368,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8_index_key(i32 %A, <2 x i
368368
; GFX12-NEXT: s_wait_loadcnt 0x0
369369
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[17:20], v0, v[1:2], v7
370370
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[21:24], v0, v[1:2], v7 index_key:1
371-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
371+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
372372
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[25:28], v0, v[1:2], v7 index_key:2
373373
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:3
374374
; GFX12-NEXT: global_store_b128 v[9:10], v[17:20], off
@@ -412,7 +412,7 @@ define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8_index_key(i32 %A, <2 x i
412412
; GFX12-NEXT: s_wait_loadcnt 0x0
413413
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[17:20], v0, v[1:2], v7
414414
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[21:24], v0, v[1:2], v7 index_key:1
415-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
415+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
416416
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[25:28], v0, v[1:2], v7 index_key:2
417417
; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:3
418418
; GFX12-NEXT: global_store_b128 v[9:10], v[17:20], off

0 commit comments

Comments
 (0)