diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 4ae2c1ed04dae..31d8bce4d0c87 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -1707,7 +1707,7 @@ multiclass WMMAInstGFX12, WMMAInstInfo { let PseudoInstr = Instr#PseudoInstrSuffix; @@ -1734,7 +1734,7 @@ multiclass SWMMACInstGFX12; } // End is_wmma_xdl = 1. -defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3PInst<"v_wmma_ld_scale_paired_b32", VOP_WMMA_LD_SCALE>; -defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3PInst<"v_wmma_ld_scale16_paired_b64", VOP_WMMA_LD_SCALE>; +let isConvergent = 1 in { + defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3PInst<"v_wmma_ld_scale_paired_b32", VOP_WMMA_LD_SCALE>; + defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3PInst<"v_wmma_ld_scale16_paired_b64", VOP_WMMA_LD_SCALE>; +} } // End SubtargetPredicate = isGFX125xOnly } // End WaveSizePredicate = isWave32 diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-convergent.mir b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-convergent.mir new file mode 100644 index 0000000000000..df3e780c61f46 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-convergent.mir @@ -0,0 +1,42 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx12-generic -run-pass=machine-sink %s -o - | FileCheck %s + +--- +name: wmma_test +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: wmma_test + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; CHECK-NEXT: early-clobber %3:vreg_256 = V_WMMA_F32_16X16X16_F16_w32_threeaddr 8, [[DEF]], 8, [[DEF1]], 8, 0, 0, 0, implicit $exec + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF2]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY %3.sub1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + %0:vreg_128 = IMPLICIT_DEF + %1:vreg_128 = IMPLICIT_DEF + %2:sreg_32 = IMPLICIT_DEF + early-clobber %3:vreg_256 = V_WMMA_F32_16X16X16_F16_w32_threeaddr 8, %0:vreg_128, 8, %1:vreg_128, 8, 0, 0, 0, implicit $exec + %4:sreg_32 = SI_IF %2:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + %5:vreg_256 = COPY %3.sub1:vreg_256 + + bb.2: + SI_END_CF %4:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_ENDPGM 0 + +...