From fde9ab2695c7f9f8c3fede8678914627d675a0bc Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Thu, 2 Oct 2025 16:57:48 -0700 Subject: [PATCH 1/8] NFC: Refactor tryFoldZeroHiBits Change-Id: Ice882043dc3171eedd08049bb05ef5a046dbf94a --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 45 +++++++++++++---------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 51c56ecea2c96..93d0653b4ef54 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -245,7 +245,7 @@ class SIFoldOperandsImpl { std::optional getImmOrMaterializedImm(MachineOperand &Op) const; bool tryConstantFoldOp(MachineInstr *MI) const; bool tryFoldCndMask(MachineInstr &MI) const; - bool tryFoldZeroHighBits(MachineInstr &MI) const; + bool tryFoldArithmetic(MachineInstr &MI) const; bool foldInstOperand(MachineInstr &MI, const FoldableDef &OpToFold) const; bool foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const; @@ -1730,26 +1730,33 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const { return true; } -bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const { - if (MI.getOpcode() != AMDGPU::V_AND_B32_e64 && - MI.getOpcode() != AMDGPU::V_AND_B32_e32) - return false; +bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const { + unsigned Opc = MI.getOpcode(); - std::optional Src0Imm = getImmOrMaterializedImm(MI.getOperand(1)); - if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg()) - return false; + switch (Opc) { + default: + return false; + case AMDGPU::V_AND_B32_e64: + case AMDGPU::V_AND_B32_e32: { + std::optional Src0Imm = getImmOrMaterializedImm(MI.getOperand(1)); + if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg()) + return false; - Register Src1 = MI.getOperand(2).getReg(); - MachineInstr *SrcDef = MRI->getVRegDef(Src1); - if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) - return false; + Register Src1 = MI.getOperand(2).getReg(); + MachineInstr *SrcDef = MRI->getVRegDef(Src1); + if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) + return false; - Register Dst = MI.getOperand(0).getReg(); - MRI->replaceRegWith(Dst, Src1); - if (!MI.getOperand(2).isKill()) - MRI->clearKillFlags(Src1); - MI.eraseFromParent(); - return true; + Register Dst = MI.getOperand(0).getReg(); + MRI->replaceRegWith(Dst, Src1); + if (!MI.getOperand(2).isKill()) + MRI->clearKillFlags(Src1); + MI.eraseFromParent(); + return true; + } + } + + return false; } bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI, @@ -2790,7 +2797,7 @@ bool SIFoldOperandsImpl::run(MachineFunction &MF) { for (auto &MI : make_early_inc_range(*MBB)) { Changed |= tryFoldCndMask(MI); - if (tryFoldZeroHighBits(MI)) { + if (tryFoldArithmetic(MI)) { Changed = true; continue; } From 3c1c5a4ac995ed0ce017a66bab6a734283181f5c Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Thu, 2 Oct 2025 17:21:13 -0700 Subject: [PATCH 2/8] [AMDGPU] Fold dst = v_add 0, src -> src Change-Id: I9b1162d93722f33eb5067502baf87590bf861e3c --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 32 ++++++++++++++----- .../CodeGen/AMDGPU/groupstaticsize-zero.ll | 20 ++++++++++++ 2 files changed, 44 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 93d0653b4ef54..382360150d42f 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1733,6 +1733,18 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const { bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); + auto replaceAndFold = [this](MachineOperand &NewOp, MachineOperand &OldOp, + MachineInstr &MI) -> bool { + if (!(NewOp.isReg() && OldOp.isReg())) + return false; + Register OldReg = OldOp.getReg(); + MRI->replaceRegWith(NewOp.getReg(), OldReg); + if (!OldOp.isKill()) + MRI->clearKillFlags(OldReg); + MI.eraseFromParent(); + return true; + }; + switch (Opc) { default: return false; @@ -1742,17 +1754,21 @@ bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const { if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg()) return false; - Register Src1 = MI.getOperand(2).getReg(); - MachineInstr *SrcDef = MRI->getVRegDef(Src1); + MachineOperand &Src1Op = MI.getOperand(2); + MachineInstr *SrcDef = MRI->getVRegDef(Src1Op.getReg()); if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) return false; - Register Dst = MI.getOperand(0).getReg(); - MRI->replaceRegWith(Dst, Src1); - if (!MI.getOperand(2).isKill()) - MRI->clearKillFlags(Src1); - MI.eraseFromParent(); - return true; + return replaceAndFold(MI.getOperand(0), Src1Op, MI); + } + case AMDGPU::V_ADD_U32_e64: + case AMDGPU::V_ADD_U32_e32: { + std::optional Src0Imm = + getImmOrMaterializedImm(MI.getOperand(1)); + if (!Src0Imm || *Src0Imm != 0 || !MI.getOperand(2).isReg()) + return false; + + return replaceAndFold(MI.getOperand(0), MI.getOperand(2), MI); } } diff --git a/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll b/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll new file mode 100644 index 0000000000000..e52eb8aca9f84 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN %s + +@global_smem = external addrspace(3) global [0 x i8] + +define amdgpu_kernel void @addzero() { +; GCN-LABEL: addzero: +; GCN: ; %bb.0: ; %.lr.ph +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: v_and_b32_e32 v0, 1, v0 +; GCN-NEXT: v_mov_b32_e32 v3, v2 +; GCN-NEXT: ds_write_b64 v0, v[2:3] +; GCN-NEXT: s_endpgm +.lr.ph: + %0 = tail call i32 @llvm.amdgcn.workitem.id.x() + %1 = and i32 %0, 1 + %2 = getelementptr i8, ptr addrspace(3) @global_smem, i32 %1 + store <4 x bfloat> zeroinitializer, ptr addrspace(3) %2, align 8 + ret void +} From 69e60fb368930fd6d440ad4ff7b893b1dd5b5d1b Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Mon, 13 Oct 2025 18:00:20 -0700 Subject: [PATCH 3/8] Formatting Change-Id: If2b8c5e64be9291fb92f433542cd926be3193027 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 39 +++++++++++------------ 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 382360150d42f..115a429ea303a 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1746,30 +1746,29 @@ bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const { }; switch (Opc) { - default: + default: + return false; + case AMDGPU::V_AND_B32_e64: + case AMDGPU::V_AND_B32_e32: { + std::optional Src0Imm = getImmOrMaterializedImm(MI.getOperand(1)); + if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg()) return false; - case AMDGPU::V_AND_B32_e64: - case AMDGPU::V_AND_B32_e32: { - std::optional Src0Imm = getImmOrMaterializedImm(MI.getOperand(1)); - if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg()) - return false; - MachineOperand &Src1Op = MI.getOperand(2); - MachineInstr *SrcDef = MRI->getVRegDef(Src1Op.getReg()); - if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) - return false; + MachineOperand &Src1Op = MI.getOperand(2); + MachineInstr *SrcDef = MRI->getVRegDef(Src1Op.getReg()); + if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) + return false; - return replaceAndFold(MI.getOperand(0), Src1Op, MI); - } - case AMDGPU::V_ADD_U32_e64: - case AMDGPU::V_ADD_U32_e32: { - std::optional Src0Imm = - getImmOrMaterializedImm(MI.getOperand(1)); - if (!Src0Imm || *Src0Imm != 0 || !MI.getOperand(2).isReg()) - return false; + return replaceAndFold(MI.getOperand(0), Src1Op, MI); + } + case AMDGPU::V_ADD_U32_e64: + case AMDGPU::V_ADD_U32_e32: { + std::optional Src0Imm = getImmOrMaterializedImm(MI.getOperand(1)); + if (!Src0Imm || *Src0Imm != 0 || !MI.getOperand(2).isReg()) + return false; - return replaceAndFold(MI.getOperand(0), MI.getOperand(2), MI); - } + return replaceAndFold(MI.getOperand(0), MI.getOperand(2), MI); + } } return false; From 62866dfdd5a55b6749a70f236e51e8b91aa49326 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 14 Oct 2025 10:12:29 -0700 Subject: [PATCH 4/8] Revert "Formatting" This reverts commit 69e60fb368930fd6d440ad4ff7b893b1dd5b5d1b. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 39 ++++++++++++----------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 115a429ea303a..382360150d42f 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1746,29 +1746,30 @@ bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const { }; switch (Opc) { - default: - return false; - case AMDGPU::V_AND_B32_e64: - case AMDGPU::V_AND_B32_e32: { - std::optional Src0Imm = getImmOrMaterializedImm(MI.getOperand(1)); - if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg()) + default: return false; + case AMDGPU::V_AND_B32_e64: + case AMDGPU::V_AND_B32_e32: { + std::optional Src0Imm = getImmOrMaterializedImm(MI.getOperand(1)); + if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg()) + return false; - MachineOperand &Src1Op = MI.getOperand(2); - MachineInstr *SrcDef = MRI->getVRegDef(Src1Op.getReg()); - if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) - return false; + MachineOperand &Src1Op = MI.getOperand(2); + MachineInstr *SrcDef = MRI->getVRegDef(Src1Op.getReg()); + if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) + return false; - return replaceAndFold(MI.getOperand(0), Src1Op, MI); - } - case AMDGPU::V_ADD_U32_e64: - case AMDGPU::V_ADD_U32_e32: { - std::optional Src0Imm = getImmOrMaterializedImm(MI.getOperand(1)); - if (!Src0Imm || *Src0Imm != 0 || !MI.getOperand(2).isReg()) - return false; + return replaceAndFold(MI.getOperand(0), Src1Op, MI); + } + case AMDGPU::V_ADD_U32_e64: + case AMDGPU::V_ADD_U32_e32: { + std::optional Src0Imm = + getImmOrMaterializedImm(MI.getOperand(1)); + if (!Src0Imm || *Src0Imm != 0 || !MI.getOperand(2).isReg()) + return false; - return replaceAndFold(MI.getOperand(0), MI.getOperand(2), MI); - } + return replaceAndFold(MI.getOperand(0), MI.getOperand(2), MI); + } } return false; From 24501fe0db668cb007436da6f79b79027a28a766 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 14 Oct 2025 10:12:37 -0700 Subject: [PATCH 5/8] Revert "[AMDGPU] Fold dst = v_add 0, src -> src" This reverts commit 3c1c5a4ac995ed0ce017a66bab6a734283181f5c. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 32 +++++-------------- .../CodeGen/AMDGPU/groupstaticsize-zero.ll | 20 ------------ 2 files changed, 8 insertions(+), 44 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 382360150d42f..93d0653b4ef54 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1733,18 +1733,6 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const { bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); - auto replaceAndFold = [this](MachineOperand &NewOp, MachineOperand &OldOp, - MachineInstr &MI) -> bool { - if (!(NewOp.isReg() && OldOp.isReg())) - return false; - Register OldReg = OldOp.getReg(); - MRI->replaceRegWith(NewOp.getReg(), OldReg); - if (!OldOp.isKill()) - MRI->clearKillFlags(OldReg); - MI.eraseFromParent(); - return true; - }; - switch (Opc) { default: return false; @@ -1754,21 +1742,17 @@ bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const { if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg()) return false; - MachineOperand &Src1Op = MI.getOperand(2); - MachineInstr *SrcDef = MRI->getVRegDef(Src1Op.getReg()); + Register Src1 = MI.getOperand(2).getReg(); + MachineInstr *SrcDef = MRI->getVRegDef(Src1); if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) return false; - return replaceAndFold(MI.getOperand(0), Src1Op, MI); - } - case AMDGPU::V_ADD_U32_e64: - case AMDGPU::V_ADD_U32_e32: { - std::optional Src0Imm = - getImmOrMaterializedImm(MI.getOperand(1)); - if (!Src0Imm || *Src0Imm != 0 || !MI.getOperand(2).isReg()) - return false; - - return replaceAndFold(MI.getOperand(0), MI.getOperand(2), MI); + Register Dst = MI.getOperand(0).getReg(); + MRI->replaceRegWith(Dst, Src1); + if (!MI.getOperand(2).isKill()) + MRI->clearKillFlags(Src1); + MI.eraseFromParent(); + return true; } } diff --git a/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll b/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll deleted file mode 100644 index e52eb8aca9f84..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll +++ /dev/null @@ -1,20 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN %s - -@global_smem = external addrspace(3) global [0 x i8] - -define amdgpu_kernel void @addzero() { -; GCN-LABEL: addzero: -; GCN: ; %bb.0: ; %.lr.ph -; GCN-NEXT: v_mov_b32_e32 v2, 0 -; GCN-NEXT: v_and_b32_e32 v0, 1, v0 -; GCN-NEXT: v_mov_b32_e32 v3, v2 -; GCN-NEXT: ds_write_b64 v0, v[2:3] -; GCN-NEXT: s_endpgm -.lr.ph: - %0 = tail call i32 @llvm.amdgcn.workitem.id.x() - %1 = and i32 %0, 1 - %2 = getelementptr i8, ptr addrspace(3) @global_smem, i32 %1 - store <4 x bfloat> zeroinitializer, ptr addrspace(3) %2, align 8 - ret void -} From 7434565e0a73c40cb6f0340d8274e52be0f553ff Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 14 Oct 2025 10:12:44 -0700 Subject: [PATCH 6/8] Revert "NFC: Refactor tryFoldZeroHiBits" This reverts commit fde9ab2695c7f9f8c3fede8678914627d675a0bc. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 45 ++++++++++------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 93d0653b4ef54..51c56ecea2c96 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -245,7 +245,7 @@ class SIFoldOperandsImpl { std::optional getImmOrMaterializedImm(MachineOperand &Op) const; bool tryConstantFoldOp(MachineInstr *MI) const; bool tryFoldCndMask(MachineInstr &MI) const; - bool tryFoldArithmetic(MachineInstr &MI) const; + bool tryFoldZeroHighBits(MachineInstr &MI) const; bool foldInstOperand(MachineInstr &MI, const FoldableDef &OpToFold) const; bool foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const; @@ -1730,33 +1730,26 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const { return true; } -bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const { - unsigned Opc = MI.getOpcode(); - - switch (Opc) { - default: - return false; - case AMDGPU::V_AND_B32_e64: - case AMDGPU::V_AND_B32_e32: { - std::optional Src0Imm = getImmOrMaterializedImm(MI.getOperand(1)); - if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg()) - return false; +bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const { + if (MI.getOpcode() != AMDGPU::V_AND_B32_e64 && + MI.getOpcode() != AMDGPU::V_AND_B32_e32) + return false; - Register Src1 = MI.getOperand(2).getReg(); - MachineInstr *SrcDef = MRI->getVRegDef(Src1); - if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) - return false; + std::optional Src0Imm = getImmOrMaterializedImm(MI.getOperand(1)); + if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg()) + return false; - Register Dst = MI.getOperand(0).getReg(); - MRI->replaceRegWith(Dst, Src1); - if (!MI.getOperand(2).isKill()) - MRI->clearKillFlags(Src1); - MI.eraseFromParent(); - return true; - } - } + Register Src1 = MI.getOperand(2).getReg(); + MachineInstr *SrcDef = MRI->getVRegDef(Src1); + if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) + return false; - return false; + Register Dst = MI.getOperand(0).getReg(); + MRI->replaceRegWith(Dst, Src1); + if (!MI.getOperand(2).isKill()) + MRI->clearKillFlags(Src1); + MI.eraseFromParent(); + return true; } bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI, @@ -2797,7 +2790,7 @@ bool SIFoldOperandsImpl::run(MachineFunction &MF) { for (auto &MI : make_early_inc_range(*MBB)) { Changed |= tryFoldCndMask(MI); - if (tryFoldArithmetic(MI)) { + if (tryFoldZeroHighBits(MI)) { Changed = true; continue; } From ec17ae2eee170a12d11272e6086e4336de0616b1 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 14 Oct 2025 10:18:55 -0700 Subject: [PATCH 7/8] Move to tryConstantFoldOp Change-Id: I9b14559b4b5dc9c4bb383ebd517edcdc094a2e6c --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 8 ++++++++ .../CodeGen/AMDGPU/groupstaticsize-zero.ll | 20 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 51c56ecea2c96..eefdcf6d0d1ab 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1684,6 +1684,14 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { } } + if (Opc == AMDGPU::V_ADD_U32_e64 || Opc == AMDGPU::V_ADD_U32_e32) { + if (Src1Val == 0) { + // y = add x, 0 -> y = copy x + MI->removeOperand(Src1Idx); + mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); + } + } + return false; } diff --git a/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll b/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll new file mode 100644 index 0000000000000..e52eb8aca9f84 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN %s + +@global_smem = external addrspace(3) global [0 x i8] + +define amdgpu_kernel void @addzero() { +; GCN-LABEL: addzero: +; GCN: ; %bb.0: ; %.lr.ph +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: v_and_b32_e32 v0, 1, v0 +; GCN-NEXT: v_mov_b32_e32 v3, v2 +; GCN-NEXT: ds_write_b64 v0, v[2:3] +; GCN-NEXT: s_endpgm +.lr.ph: + %0 = tail call i32 @llvm.amdgcn.workitem.id.x() + %1 = and i32 %0, 1 + %2 = getelementptr i8, ptr addrspace(3) @global_smem, i32 %1 + store <4 x bfloat> zeroinitializer, ptr addrspace(3) %2, align 8 + ret void +} From 1b17398ed6bc6271536e7ad61acb239208238391 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 14 Oct 2025 10:20:14 -0700 Subject: [PATCH 8/8] Return true for changed Change-Id: I8e7681f4920c9dee1bb4fb1b303c4c886c1969e3 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index eefdcf6d0d1ab..913f49503660f 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1689,6 +1689,7 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { // y = add x, 0 -> y = copy x MI->removeOperand(Src1Idx); mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); + return true; } }