From f4c952a9c32eb12080c02bc29c027686dfb7f940 Mon Sep 17 00:00:00 2001 From: Paul Trojahn Date: Mon, 14 Jul 2025 11:21:00 +0200 Subject: [PATCH] [AMDGPU] Check legality of both operands before commute When trying to fold an SGPR into a DPP add, si-fold-operands correctly realizes that this is not possible and then tries to commute which mistakenly succeeds, creating a dpp add with two SGPRs. We need to check both operands if they are legal in their new position. This crashes a test in triton on gfx12: ttps://github.com/triton-lang/triton/blob/345c633787e90a7f94864de3035346eb5de1781f/python/test/unit/language/test_core.py#L2718 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 +++-- .../test/CodeGen/AMDGPU/fold-commute-sgpr.mir | 24 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-commute-sgpr.mir diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4c5f938831243..8e985ea6af53f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2807,12 +2807,14 @@ bool SIInstrInfo::isLegalToSwap(const MachineInstr &MI, unsigned OpIdx0, if ((int)OpIdx1 != Src0Idx && MO0->isReg()) { if (!DefinedRC1) return OpInfo1.OperandType == MCOI::OPERAND_UNKNOWN; - return isLegalRegOperand(MI, OpIdx1, *MO0); + return isLegalRegOperand(MI, OpIdx1, *MO0) && + (!MO1->isReg() || isLegalRegOperand(MI, OpIdx0, *MO1)); } if ((int)OpIdx0 != Src0Idx && MO1->isReg()) { if (!DefinedRC0) return OpInfo0.OperandType == MCOI::OPERAND_UNKNOWN; - return isLegalRegOperand(MI, OpIdx0, *MO1); + return (!MO0->isReg() || isLegalRegOperand(MI, OpIdx1, *MO0)) && + isLegalRegOperand(MI, OpIdx0, *MO1); } // No need to check 64-bit literals since swapping does not bring new diff --git a/llvm/test/CodeGen/AMDGPU/fold-commute-sgpr.mir b/llvm/test/CodeGen/AMDGPU/fold-commute-sgpr.mir new file mode 100644 index 0000000000000..c6bc248f13388 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-commute-sgpr.mir @@ -0,0 +1,24 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: fold_commute_sgprs +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: fold_commute_sgprs + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; CHECK-NEXT: [[V_ADD_NC_U16_fake16_e64_dpp:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp [[COPY1]], 0, [[COPY2]], 0, [[COPY3]], 0, 0, 280, 15, 15, 1, implicit $exec + %0:sreg_32 = COPY $sgpr0 + %1:sreg_32 = IMPLICIT_DEF + %2:vgpr_32 = COPY %1:sreg_32 + %3:vgpr_32 = COPY %0:sreg_32 + %4:sreg_32 = COPY $sgpr1 + %5:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %2:vgpr_32, 0, %3:vgpr_32, 0, %4:sreg_32, 0, 0, 280, 15, 15, 1, implicit $exec +...