Skip to content

Conversation

@mbrkusanin
Copy link
Collaborator

No description provided.

@llvmbot llvmbot added backend:AMDGPU clang:frontend Language frontend issues, e.g. anything involving "Sema" llvm:ir labels Dec 3, 2025
@llvmbot
Copy link
Member

llvmbot commented Dec 3, 2025

@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-backend-amdgpu

Author: Mirko Brkušanin (mbrkusanin)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/170501.diff

13 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+1)
  • (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl (+15)
  • (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+6)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+14)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp (+1)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+2)
  • (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+25-4)
  • (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+14)
  • (modified) llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync-and-module-lds.ll (+1-2)
  • (modified) llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync.ll (-1)
  • (added) llvm/test/CodeGen/AMDGPU/s-wakeup-barrier.ll (+41)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s (+12)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt (+9)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..2ec065716d21c 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -749,6 +749,7 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8bf16, "V8yV8y*3", "nc", "gfx
 
 TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
 TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep,  "vIs", "n", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "gfx1250-insts")
 
 TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index e4a5fe9014c2e..b32bcdd408512 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -1489,6 +1489,21 @@ void test_s_cluster_barrier()
   __builtin_amdgcn_s_cluster_barrier();
 }
 
+// CHECK-LABEL: @test_s_wakeup_barrier(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
+// CHECK-NEXT:    store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT:    call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) [[TMP1]])
+// CHECK-NEXT:    ret void
+//
+void test_s_wakeup_barrier(void *bar)
+{
+  __builtin_amdgcn_s_wakeup_barrier(bar);
+}
+
 // CHECK-LABEL: @test_global_add_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index c2057ac3a14e6..cdad810f1458d 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -314,6 +314,12 @@ def int_amdgcn_s_barrier_join : ClangBuiltin<"__builtin_amdgcn_s_barrier_join">,
   Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
                                 IntrNoCallback, IntrNoFree]>;
 
+// void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %barrier)
+// The %barrier argument must be uniform, otherwise behavior is undefined.
+def int_amdgcn_s_wakeup_barrier : ClangBuiltin<"__builtin_amdgcn_s_wakeup_barrier">,
+  Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
+                                IntrNoCallback, IntrNoFree]>;
+
 // void @llvm.amdgcn.s.barrier.wait(i16 %barrierType)
 def int_amdgcn_s_barrier_wait : ClangBuiltin<"__builtin_amdgcn_s_barrier_wait">,
   Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index c575714cf61cd..a06c0090a7b1f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2392,6 +2392,16 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
   case Intrinsic::amdgcn_s_barrier_init:
   case Intrinsic::amdgcn_s_barrier_signal_var:
     return selectNamedBarrierInit(I, IntrinsicID);
+  case Intrinsic::amdgcn_s_wakeup_barrier: {
+    if (!AMDGPU::isGFX1250(STI)) {
+      Function &F = I.getMF()->getFunction();
+      F.getContext().diagnose(
+          DiagnosticInfoUnsupported(F, "intrinsic not supported on subtarget",
+                                    I.getDebugLoc(), DS_Error));
+      return false;
+    }
+    return selectNamedBarrierInst(I, IntrinsicID);
+  }
   case Intrinsic::amdgcn_s_barrier_join:
   case Intrinsic::amdgcn_s_get_named_barrier_state:
     return selectNamedBarrierInst(I, IntrinsicID);
@@ -6830,6 +6840,8 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
       llvm_unreachable("not a named barrier op");
     case Intrinsic::amdgcn_s_barrier_join:
       return AMDGPU::S_BARRIER_JOIN_IMM;
+    case Intrinsic::amdgcn_s_wakeup_barrier:
+      return AMDGPU::S_WAKEUP_BARRIER_IMM;
     case Intrinsic::amdgcn_s_get_named_barrier_state:
       return AMDGPU::S_GET_BARRIER_STATE_IMM;
     };
@@ -6839,6 +6851,8 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
       llvm_unreachable("not a named barrier op");
     case Intrinsic::amdgcn_s_barrier_join:
       return AMDGPU::S_BARRIER_JOIN_M0;
+    case Intrinsic::amdgcn_s_wakeup_barrier:
+      return AMDGPU::S_WAKEUP_BARRIER_M0;
     case Intrinsic::amdgcn_s_get_named_barrier_state:
       return AMDGPU::S_GET_BARRIER_STATE_M0;
     };
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
index 8145816405915..4e16c13e30e91 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
@@ -371,6 +371,7 @@ bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
     case Intrinsic::amdgcn_s_barrier_wait:
     case Intrinsic::amdgcn_s_barrier_leave:
     case Intrinsic::amdgcn_s_get_barrier_state:
+    case Intrinsic::amdgcn_s_wakeup_barrier:
     case Intrinsic::amdgcn_wave_barrier:
     case Intrinsic::amdgcn_sched_barrier:
     case Intrinsic::amdgcn_sched_group_barrier:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index a88e4b2a2a31d..d7ad08d6bc0ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3345,6 +3345,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
       constrainOpWithReadfirstlane(B, MI, 1);
       return;
     case Intrinsic::amdgcn_s_barrier_join:
+    case Intrinsic::amdgcn_s_wakeup_barrier:
       constrainOpWithReadfirstlane(B, MI, 1);
       return;
     case Intrinsic::amdgcn_s_barrier_init:
@@ -5579,6 +5580,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
       break;
     case Intrinsic::amdgcn_s_barrier_join:
+    case Intrinsic::amdgcn_s_wakeup_barrier:
       OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
       break;
     case Intrinsic::amdgcn_s_barrier_init:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 301f2fc8dab45..c2a0f1c5bfb8d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11503,6 +11503,11 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
     auto *NewMI = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
     return SDValue(NewMI, 0);
   }
+  case Intrinsic::amdgcn_s_wakeup_barrier: {
+    if (!AMDGPU::isGFX1250(*Subtarget))
+      return SDValue();
+    [[fallthrough]];
+  }
   case Intrinsic::amdgcn_s_barrier_join: {
     // these three intrinsics have one operand: barrier pointer
     SDValue Chain = Op->getOperand(0);
@@ -11512,16 +11517,32 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
 
     if (isa<ConstantSDNode>(BarOp)) {
       uint64_t BarVal = cast<ConstantSDNode>(BarOp)->getZExtValue();
-      Opc = AMDGPU::S_BARRIER_JOIN_IMM;
-
+      switch (IntrinsicID) {
+      default:
+        return SDValue();
+      case Intrinsic::amdgcn_s_barrier_join:
+        Opc = AMDGPU::S_BARRIER_JOIN_IMM;
+        break;
+      case Intrinsic::amdgcn_s_wakeup_barrier:
+        Opc = AMDGPU::S_WAKEUP_BARRIER_IMM;
+        break;
+      }
       // extract the BarrierID from bits 4-9 of the immediate
       unsigned BarID = (BarVal >> 4) & 0x3F;
       SDValue K = DAG.getTargetConstant(BarID, DL, MVT::i32);
       Ops.push_back(K);
       Ops.push_back(Chain);
     } else {
-      Opc = AMDGPU::S_BARRIER_JOIN_M0;
-
+      switch (IntrinsicID) {
+      default:
+        return SDValue();
+      case Intrinsic::amdgcn_s_barrier_join:
+        Opc = AMDGPU::S_BARRIER_JOIN_M0;
+        break;
+      case Intrinsic::amdgcn_s_wakeup_barrier:
+        Opc = AMDGPU::S_WAKEUP_BARRIER_M0;
+        break;
+      }
       // extract the BarrierID from bits 4-9 of BarOp, copy to M0[5:0]
       SDValue M0Val;
       M0Val = DAG.getNode(ISD::SRL, DL, MVT::i32, BarOp,
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 1931e0be15152..8f92dfb957b1a 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -504,6 +504,12 @@ def S_BARRIER_JOIN_M0 : SOP1_Pseudo <"s_barrier_join m0", (outs), (ins),
   let isConvergent = 1;
 }
 
+def S_WAKEUP_BARRIER_M0 : SOP1_Pseudo <"s_wakeup_barrier m0", (outs), (ins),
+  "", []>{
+  let SchedRW = [WriteBarrier];
+  let isConvergent = 1;
+  let SubtargetPredicate = isGFX1250Plus;
+}
 } // End Uses = [M0]
 
 def S_BARRIER_SIGNAL_IMM : SOP1_Pseudo <"s_barrier_signal", (outs),
@@ -527,6 +533,12 @@ def S_BARRIER_JOIN_IMM : SOP1_Pseudo <"s_barrier_join", (outs),
   let isConvergent = 1;
 }
 
+def S_WAKEUP_BARRIER_IMM : SOP1_Pseudo <"s_wakeup_barrier", (outs),
+  (ins SplitBarrier:$src0), "$src0", []>{
+  let SchedRW = [WriteBarrier];
+  let isConvergent = 1;
+  let SubtargetPredicate = isGFX1250Plus;
+}
 } // End has_sdst = 0
 
 def S_GET_BARRIER_STATE_IMM : SOP1_Pseudo <"s_get_barrier_state", (outs SSrc_b32:$sdst),
@@ -2226,6 +2238,8 @@ defm S_SLEEP_VAR                  : SOP1_IMM_Real_gfx12<0x058>;
 // GFX1250
 defm S_GET_SHADER_CYCLES_U64      : SOP1_Real_gfx12<0x06>;
 defm S_ADD_PC_I64                 : SOP1_Real_gfx12<0x04b>;
+defm S_WAKEUP_BARRIER_M0          : SOP1_M0_Real_gfx12<0x057>;
+defm S_WAKEUP_BARRIER_IMM         : SOP1_IMM_Real_gfx12<0x057>;
 
 //===----------------------------------------------------------------------===//
 // SOP1 - GFX1150, GFX12
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync-and-module-lds.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync-and-module-lds.ll
index bed8fa20a5044..215fb06106e11 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync-and-module-lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync-and-module-lds.ll
@@ -112,8 +112,7 @@ attributes #2 = { nounwind readnone }
 ; CHECK: attributes #[[ATTR0]] = { nounwind }
 ; CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-lds-size"="1" }
 ; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
 ;.
 ; CHECK: [[META0]] = !{i32 8396816, i32 8396817}
 ; CHECK: [[META1]] = !{i32 8396912, i32 8396913}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync.ll
index bde6db6463cb1..74e6d83ed2d94 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-lower-exec-sync.ll
@@ -96,7 +96,6 @@ attributes #2 = { nounwind readnone }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { nounwind }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind }
 ;.
 ; CHECK: [[META0]] = !{i32 8396816, i32 8396817}
 ; CHECK: [[META1]] = !{i32 8396912, i32 8396913}
diff --git a/llvm/test/CodeGen/AMDGPU/s-wakeup-barrier.ll b/llvm/test/CodeGen/AMDGPU/s-wakeup-barrier.ll
new file mode 100644
index 0000000000000..b92d38cd857f2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/s-wakeup-barrier.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1250-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1250-GISEL %s
+
+@bar = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
+
+define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
+; GFX1250-SDAG-LABEL: kernel1:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX1250-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX1250-SDAG-NEXT:    s_mov_b32 m0, 1
+; GFX1250-SDAG-NEXT:    s_wakeup_barrier m0
+; GFX1250-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT:    s_lshr_b32 s0, s0, 4
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-NEXT:    s_and_b32 m0, s0, 63
+; GFX1250-SDAG-NEXT:    s_wakeup_barrier m0
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: kernel1:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX1250-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX1250-GISEL-NEXT:    s_wakeup_barrier 1
+; GFX1250-GISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT:    s_lshr_b32 s0, s0, 4
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-NEXT:    s_and_b32 m0, s0, 63
+; GFX1250-GISEL-NEXT:    s_wakeup_barrier m0
+; GFX1250-GISEL-NEXT:    s_endpgm
+    call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) @bar)
+    call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %in)
+    ret void
+}
+
+
+declare void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3)) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { convergent nounwind }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s
index cc351afd49f04..68cfdc4c01178 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s
@@ -70,3 +70,15 @@ s_get_barrier_state s3, -4
 
 s_get_barrier_state s3, m0
 // GFX1250: s_get_barrier_state s3, m0              ; encoding: [0x7d,0x50,0x83,0xbe]
+
+s_wakeup_barrier 1
+// GFX1250: s_wakeup_barrier 1                      ; encoding: [0x81,0x57,0x80,0xbe]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+s_wakeup_barrier -1
+// GFX1250: s_wakeup_barrier -1                     ; encoding: [0xc1,0x57,0x80,0xbe]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+s_wakeup_barrier m0
+// GFX1250: s_wakeup_barrier m0                     ; encoding: [0x7d,0x57,0x80,0xbe]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt
index 34a46467c6839..1490914a5f61f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt
@@ -33,3 +33,12 @@
 
 0x7d,0x50,0x83,0xbe
 # GFX1250: s_get_barrier_state s3, m0              ; encoding: [0x7d,0x50,0x83,0xbe]
+
+0x81,0x57,0x80,0xbe
+# GFX1250: s_wakeup_barrier 1                      ; encoding: [0x81,0x57,0x80,0xbe]
+
+0xc1,0x57,0x80,0xbe
+# GFX1250: s_wakeup_barrier -1                     ; encoding: [0xc1,0x57,0x80,0xbe]
+
+0x7d,0x57,0x80,0xbe
+# GFX1250: s_wakeup_barrier m0                     ; encoding: [0x7d,0x57,0x80,0xbe]

@mbrkusanin
Copy link
Collaborator Author

This is pretty much revert of #122277 but enabled only for gfx1250

@shiltian
Copy link
Contributor

shiltian commented Dec 3, 2025

Hmm, I'm still not sure why we wanted to remove it in the first place.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU clang:frontend Language frontend issues, e.g. anything involving "Sema" llvm:ir

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants