[AMDGPU] When shrinking and/or to bitset*, remove implicit scc def #168128

LU-JOHN · 2025-11-14T22:01:12Z

When shrinking and/or to bitset* remove leftover implicit scc def. bitset* instructions do not set scc.

llvmbot · 2025-11-14T22:01:46Z

@llvm/pr-subscribers-backend-amdgpu

Author: None (LU-JOHN)

Changes

When shrinking and/or to bitset* remove leftover implicit scc def. bitset* instructions do not set scc.

Patch is 43.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168128.diff

7 Files Affected:

(modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+11-22)
(modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+15)
(modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+3)
(modified) llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll (+51-51)
(modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll (+32-32)
(modified) llvm/test/CodeGen/AMDGPU/shrink-insts-scalar-bit-ops.mir (+2-2)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 438ca139757ad..18d90346d1d88 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1534,20 +1534,6 @@ static unsigned getMovOpc(bool IsScalar) {
   return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
 }
 
-static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
-  MI.setDesc(NewDesc);
-
-  // Remove any leftover implicit operands from mutating the instruction. e.g.
-  // if we replace an s_and_b32 with a copy, we don't need the implicit scc def
-  // anymore.
-  const MCInstrDesc &Desc = MI.getDesc();
-  unsigned NumOps = Desc.getNumOperands() + Desc.implicit_uses().size() +
-                    Desc.implicit_defs().size();
-
-  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
-    MI.removeOperand(I);
-}
-
 std::optional<int64_t>
 SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &Op) const {
   if (Op.isImm())
@@ -1586,7 +1572,8 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
        Opc == AMDGPU::S_NOT_B32) &&
       Src0Imm) {
     MI->getOperand(1).ChangeToImmediate(~*Src0Imm);
-    mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
+    TII->mutateAndCleanupImplicit(
+        *MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
     return true;
   }
 
@@ -1614,7 +1601,7 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
     // instruction.
     MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
     MI->removeOperand(Src1Idx);
-    mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
+    TII->mutateAndCleanupImplicit(*MI, TII->get(getMovOpc(IsSGPR)));
     return true;
   }
 
@@ -1634,11 +1621,12 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
     if (Src1Val == 0) {
       // y = or x, 0 => y = copy x
       MI->removeOperand(Src1Idx);
-      mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
+      TII->mutateAndCleanupImplicit(*MI, TII->get(AMDGPU::COPY));
     } else if (Src1Val == -1) {
       // y = or x, -1 => y = v_mov_b32 -1
       MI->removeOperand(Src1Idx);
-      mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
+      TII->mutateAndCleanupImplicit(
+          *MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
     } else
       return false;
 
@@ -1650,11 +1638,12 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
     if (Src1Val == 0) {
       // y = and x, 0 => y = v_mov_b32 0
       MI->removeOperand(Src0Idx);
-      mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
+      TII->mutateAndCleanupImplicit(
+          *MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
     } else if (Src1Val == -1) {
       // y = and x, -1 => y = copy x
       MI->removeOperand(Src1Idx);
-      mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
+      TII->mutateAndCleanupImplicit(*MI, TII->get(AMDGPU::COPY));
     } else
       return false;
 
@@ -1666,7 +1655,7 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
     if (Src1Val == 0) {
       // y = xor x, 0 => y = copy x
       MI->removeOperand(Src1Idx);
-      mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
+      TII->mutateAndCleanupImplicit(*MI, TII->get(AMDGPU::COPY));
       return true;
     }
   }
@@ -1712,7 +1701,7 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
     MI.removeOperand(Src1ModIdx);
   if (Src0ModIdx != -1)
     MI.removeOperand(Src0ModIdx);
-  mutateCopyOp(MI, NewDesc);
+  TII->mutateAndCleanupImplicit(MI, NewDesc);
   LLVM_DEBUG(dbgs() << MI);
   return true;
 }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index f5b52425e7841..7cb7f47ddb220 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3460,6 +3460,21 @@ void SIInstrInfo::removeModOperands(MachineInstr &MI) const {
   }
 }
 
+void SIInstrInfo::mutateAndCleanupImplicit(MachineInstr &MI,
+                                           const MCInstrDesc &NewDesc) const {
+  MI.setDesc(NewDesc);
+
+  // Remove any leftover implicit operands from mutating the instruction. e.g.
+  // if we replace an s_and_b32 with a copy, we don't need the implicit scc def
+  // anymore.
+  const MCInstrDesc &Desc = MI.getDesc();
+  unsigned NumOps = Desc.getNumOperands() + Desc.implicit_uses().size() +
+                    Desc.implicit_defs().size();
+
+  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
+    MI.removeOperand(I);
+}
+
 std::optional<int64_t> SIInstrInfo::extractSubregFromImm(int64_t Imm,
                                                          unsigned SubRegIndex) {
   switch (SubRegIndex) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index b12d9525a7605..c66985a19685b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -425,6 +425,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
 
   void removeModOperands(MachineInstr &MI) const;
 
+  void mutateAndCleanupImplicit(MachineInstr &MI,
+                                const MCInstrDesc &NewDesc) const;
+
   /// Return the extracted immediate value in a subregister use from a constant
   /// materialized in a super register.
   ///
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 0d81cb935069c..1b78f67e76d07 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -586,7 +586,7 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
     if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
       const bool IsUndef = SrcReg->isUndef();
       const bool IsKill = SrcReg->isKill();
-      MI.setDesc(TII->get(Opc));
+      TII->mutateAndCleanupImplicit(MI, TII->get(Opc));
       if (Opc == AMDGPU::S_BITSET0_B32 ||
           Opc == AMDGPU::S_BITSET1_B32) {
         Src0->ChangeToImmediate(NewImm);
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
index d3ebd92f0677b..402dfcb869e66 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
@@ -170170,8 +170170,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s8, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s8, s8, s7
 ; VI-NEXT:    s_add_i32 s10, s8, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[8:9], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s10
 ; VI-NEXT:    s_lshr_b32 s7, s7, 16
@@ -170182,8 +170182,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s8, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s8, s8, s7
 ; VI-NEXT:    s_add_i32 s10, s8, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[8:9], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s8, s7, s10
 ; VI-NEXT:    s_and_b32 s7, s16, 0xffff0000
@@ -170192,8 +170192,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[10:11], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s9
 ; VI-NEXT:    s_lshr_b32 s9, s7, 16
@@ -170204,8 +170204,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s8, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s8, s8, s7
 ; VI-NEXT:    s_add_i32 s10, s8, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[8:9], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s8, s7, s10
 ; VI-NEXT:    s_and_b32 s7, s19, 0xffff0000
@@ -170214,8 +170214,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[10:11], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s9
 ; VI-NEXT:    s_lshr_b32 s9, s7, 16
@@ -170226,8 +170226,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[10:11], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s10, s7, s9
 ; VI-NEXT:    s_and_b32 s7, s18, 0xffff0000
@@ -170236,8 +170236,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[12:13], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s9
 ; VI-NEXT:    s_lshr_b32 s11, s7, 16
@@ -170248,8 +170248,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_lshr_b64 s[18:19], s[10:11], 16
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[10:11], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s10, s7, s9
 ; VI-NEXT:    s_and_b32 s7, s21, 0xffff0000
@@ -170258,8 +170258,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[12:13], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s9
 ; VI-NEXT:    s_lshr_b32 s11, s7, 16
@@ -170270,8 +170270,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_lshr_b64 s[10:11], s[10:11], 16
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[12:13], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s12, s7, s9
 ; VI-NEXT:    s_and_b32 s7, s20, 0xffff0000
@@ -170280,8 +170280,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[14:15], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s9
 ; VI-NEXT:    s_lshr_b32 s13, s7, 16
@@ -170292,8 +170292,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_lshr_b64 s[20:21], s[12:13], 16
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[12:13], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s12, s7, s9
 ; VI-NEXT:    s_and_b32 s7, s23, 0xffff0000
@@ -170302,8 +170302,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[14:15], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s9
 ; VI-NEXT:    s_lshr_b32 s13, s7, 16
@@ -170314,8 +170314,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_lshr_b64 s[12:13], s[12:13], 16
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[14:15], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s14, s7, s9
 ; VI-NEXT:    s_and_b32 s7, s22, 0xffff0000
@@ -170324,8 +170324,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[22:23], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s9
 ; VI-NEXT:    s_lshr_b32 s15, s7, 16
@@ -170336,8 +170336,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_lshr_b64 s[22:23], s[14:15], 16
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[14:15], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s14, s7, s9
 ; VI-NEXT:    s_and_b32 s7, s25, 0xffff0000
@@ -170346,8 +170346,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[40:41], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s9
 ; VI-NEXT:    s_lshr_b32 s15, s7, 16
@@ -170358,8 +170358,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_lshr_b64 s[40:41], s[14:15], 16
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[14:15], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s14, s7, s9
 ; VI-NEXT:    s_and_b32 s7, s24, 0xffff0000
@@ -170368,8 +170368,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[24:25], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s9
 ; VI-NEXT:    s_lshr_b32 s15, s7, 16
@@ -170380,8 +170380,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_lshr_b64 s[24:25], s[14:15], 16
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[14:15], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s14, s7, s9
 ; VI-NEXT:    s_and_b32 s7, s27, 0xffff0000
@@ -170390,8 +170390,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[42:43], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s9
 ; VI-NEXT:    s_lshr_b32 s15, s7, 16
@@ -170402,8 +170402,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_lshr_b64 s[44:45], s[14:15], 16
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[14:15], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s14, s7, s9
 ; VI-NEXT:    s_and_b32 s7, s26, 0xffff0000
@@ -170412,8 +170412,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[26:27], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s9
 ; VI-NEXT:    s_lshr_b32 s15, s7, 16
@@ -170424,8 +170424,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_lshr_b64 s[26:27], s[14:15], 16
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[14:15], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s14, s7, s9
 ; VI-NEXT:    s_and_b32 s7, s29, 0xffff0000
@@ -170434,8 +170434,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[42:43], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s9
 ; VI-NEXT:    s_lshr_b32 s15, s7, 16
@@ -170446,8 +170446,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_lshr_b64 s[58:59], s[14:15], 16
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[14:15], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s14, s7, s9
 ; VI-NEXT:    s_and_b32 s7, s28, 0xffff0000
@@ -170456,8 +170456,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s9, s7, 0x10010
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[28:29], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s7, s7, s9
 ; VI-NEXT:    s_lshr_b32 s15, s7, 16
@@ -170468,8 +170468,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_add_i32 s9, s9, s7
 ; VI-NEXT:    s_lshr_b64 s[28:29], s[14:15], 16
 ; VI-NEXT:    s_addk_i32 s9, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s7, 22
 ; VI-NEXT:    s_and_b64 s[14:15], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s14, s7, s9
 ; VI-NEXT:    s_and_b32 s5, s5, 0xffff0000
@@ -170478,8 +170478,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_bfe_u32 s7, s5, 0x10010
 ; VI-NEXT:    s_add_i32 s7, s7, s5
 ; VI-NEXT:    s_addk_i32 s7, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s5, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
+; VI-NEXT:    s_bitset1_b32 s5, 22
 ; VI-NEXT:    s_and_b64 s[42:43], vcc, exec
 ; VI-NEXT:    s_cselect_b32 s5, s5, s7
 ; VI-NEXT:    s_lshr_b32 s15, s5, 16
@@ -170490,9 +170490,9 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a
 ; VI-NEXT:    s_add_i32 s7, s7, s5
 ; VI-NEXT:    s_lshr_b64 s[62:63], s[14:15], 16
 ; VI-NEXT:    s_addk_i32 s7, 0x7fff
-; VI-NEXT:    s_bitset1_b32 s5, 22
 ; VI-NEXT:    v_cmp_u_f32_e32 vcc, v26, v26
 ; VI-NEXT:    v_lshrrev_b32_e32 v24, 16, v24
+; VI-NEXT:    s_bitset1_b32 s5, 22
 ; VI-NEXT:    s_and_b64 s[14:15], vcc, exec
 ; VI-NEXT:    v_lshrrev_b64 v[23:24], 16, v[23:24]
 ; VI-NEXT:    s_cselect_b32 s14, s5, s7
@...
[truncated]

arsenm

It's so much cleaner to create new instructions than trying to modify in place

Signed-off-by: John Lu <[email protected]>

llvm-ci · 2025-11-15T15:38:19Z

LLVM Buildbot has detected a new failure on builder llvm-nvptx-nvidia-win running on as-builder-8 while building llvm at step 7 "test-build-unified-tree-check-llvm".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/54/builds/14709

Here is the relevant piece of the build log for the reference

Step 7 (test-build-unified-tree-check-llvm) failure: test (failure)
******************** TEST 'LLVM-Unit :: Support/./SupportTests.exe/79/105' FAILED ********************
Script(shard):
--
GTEST_OUTPUT=json:C:\buildbot\as-builder-8\llvm-nvptx-nvidia-win\build\unittests\Support\.\SupportTests.exe-LLVM-Unit-4036-79-105.json GTEST_SHUFFLE=0 GTEST_TOTAL_SHARDS=105 GTEST_SHARD_INDEX=79 C:\buildbot\as-builder-8\llvm-nvptx-nvidia-win\build\unittests\Support\.\SupportTests.exe
--


Note: This is test shard 80 of 105.

[==========] Running 16 tests from 16 test suites.

[----------] Global test environment set-up.

[----------] 1 test from BinaryStreamTest

[ RUN      ] BinaryStreamTest.DropOperations

[       OK ] BinaryStreamTest.DropOperations (0 ms)

[----------] 1 test from BinaryStreamTest (0 ms total)



[----------] 1 test from CommandLineTest

[ RUN      ] CommandLineTest.TokenizeAndMarkEOLs

[       OK ] CommandLineTest.TokenizeAndMarkEOLs (0 ms)

[----------] 1 test from CommandLineTest (0 ms total)



[----------] 1 test from DataExtractorTest

[ RUN      ] DataExtractorTest.LEB128_error

[       OK ] DataExtractorTest.LEB128_error (0 ms)

[----------] 1 test from DataExtractorTest (0 ms total)



[----------] 1 test from Error

[ RUN      ] Error.ForwardToExpected

[       OK ] Error.ForwardToExpected (0 ms)

[----------] 1 test from Error (0 ms total)
...

llvmbot added the backend:AMDGPU label Nov 14, 2025

arsenm approved these changes Nov 15, 2025

View reviewed changes

When shrinking and/or to bitset, remove implicit scc def

8bb05e0

Signed-off-by: John Lu <[email protected]>

LU-JOHN force-pushed the cleanup_implicit branch from 2d1ee78 to 8bb05e0 Compare November 15, 2025 14:36

LU-JOHN merged commit 9fa15ef into llvm:main Nov 15, 2025
10 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] When shrinking and/or to bitset*, remove implicit scc def #168128

[AMDGPU] When shrinking and/or to bitset*, remove implicit scc def #168128

Uh oh!

LU-JOHN commented Nov 14, 2025

Uh oh!

llvmbot commented Nov 14, 2025

Uh oh!

arsenm left a comment

Uh oh!

Uh oh!

llvm-ci commented Nov 15, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[AMDGPU] When shrinking and/or to bitset*, remove implicit scc def #168128

[AMDGPU] When shrinking and/or to bitset*, remove implicit scc def #168128

Uh oh!

Conversation

LU-JOHN commented Nov 14, 2025

Uh oh!

llvmbot commented Nov 14, 2025

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Nov 15, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants