Skip to content

Commit 5759a3a

Browse files
authored
[AMDGPU] Add s_wakeup_barrier instruction for gfx1250 (#170501)
1 parent 804e768 commit 5759a3a

18 files changed

+160
-11
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,7 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8bf16, "V8yV8y*3", "nc", "gfx
749749

750750
TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
751751
TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep, "vIs", "n", "gfx1250-insts")
752+
TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "s-wakeup-barrier-inst")
752753

753754
TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts")
754755
TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")

clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ kernel void foo(global int *p) { *p = 1; }
2626
// CHECK-NEXT: ret void
2727
//
2828
//.
29-
// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" }
30-
// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" }
29+
// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" }
30+
// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" }
3131
// CHECK: attributes #[[ATTR2]] = { convergent nounwind }
3232
//.
3333
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}

clang/test/CodeGenOpenCL/amdgpu-features.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,8 @@
109109
// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
110110
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
111111
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
112-
// GFX1250: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
113-
// GFX1251: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
112+
// GFX1250: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
113+
// GFX1251: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
114114

115115
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize64"
116116

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1489,6 +1489,21 @@ void test_s_cluster_barrier()
14891489
__builtin_amdgcn_s_cluster_barrier();
14901490
}
14911491

1492+
// CHECK-LABEL: @test_s_wakeup_barrier(
1493+
// CHECK-NEXT: entry:
1494+
// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1495+
// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
1496+
// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
1497+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
1498+
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
1499+
// CHECK-NEXT: call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) [[TMP1]])
1500+
// CHECK-NEXT: ret void
1501+
//
1502+
void test_s_wakeup_barrier(void *bar)
1503+
{
1504+
__builtin_amdgcn_s_wakeup_barrier(bar);
1505+
}
1506+
14921507
// CHECK-LABEL: @test_global_add_f32(
14931508
// CHECK-NEXT: entry:
14941509
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,12 @@ def int_amdgcn_s_barrier_join : ClangBuiltin<"__builtin_amdgcn_s_barrier_join">,
314314
Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
315315
IntrNoCallback, IntrNoFree]>;
316316

317+
// void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %barrier)
318+
// The %barrier argument must be uniform, otherwise behavior is undefined.
319+
def int_amdgcn_s_wakeup_barrier : ClangBuiltin<"__builtin_amdgcn_s_wakeup_barrier">,
320+
Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
321+
IntrNoCallback, IntrNoFree]>;
322+
317323
// void @llvm.amdgcn.s.barrier.wait(i16 %barrierType)
318324
def int_amdgcn_s_barrier_wait : ClangBuiltin<"__builtin_amdgcn_s_barrier_wait">,
319325
Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,6 +1276,12 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
12761276
"Has s_setprio_inc_wg instruction."
12771277
>;
12781278

1279+
def FeatureSWakeupBarrier : SubtargetFeature<"s-wakeup-barrier-inst",
1280+
"HasSWakeupBarrier",
1281+
"true",
1282+
"Has s_wakeup_barrier instruction."
1283+
>;
1284+
12791285
//===------------------------------------------------------------===//
12801286
// Subtarget Features (options and debugging)
12811287
//===------------------------------------------------------------===//
@@ -2200,6 +2206,7 @@ def FeatureISAVersion12_50_Common : FeatureSet<
22002206
FeaturePkAddMinMaxInsts,
22012207
FeatureLdsBarrierArriveAtomic,
22022208
FeatureSetPrioIncWgInst,
2209+
FeatureSWakeupBarrier,
22032210
Feature45BitNumRecordsBufferResource,
22042211
FeatureSupportsXNACK,
22052212
FeatureXNACK,
@@ -3076,6 +3083,9 @@ def HasLdsBarrierArriveAtomic : Predicate<"Subtarget->hasLdsBarrierArriveAtomic(
30763083
def HasSetPrioIncWgInst : Predicate<"Subtarget->hasSetPrioIncWgInst()">,
30773084
AssemblerPredicate<(all_of FeatureSetPrioIncWgInst)>;
30783085

3086+
def HasSWakeupBarrier : Predicate<"Subtarget->hasSWakeupBarrier()">,
3087+
AssemblerPredicate<(all_of FeatureSWakeupBarrier)>;
3088+
30793089
def NeedsAlignedVGPRs : Predicate<"Subtarget->needsAlignedVGPRs()">,
30803090
AssemblerPredicate<(all_of FeatureRequiresAlignedVGPRs)>;
30813091

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2392,6 +2392,16 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
23922392
case Intrinsic::amdgcn_s_barrier_init:
23932393
case Intrinsic::amdgcn_s_barrier_signal_var:
23942394
return selectNamedBarrierInit(I, IntrinsicID);
2395+
case Intrinsic::amdgcn_s_wakeup_barrier: {
2396+
if (!STI.hasSWakeupBarrier()) {
2397+
Function &F = I.getMF()->getFunction();
2398+
F.getContext().diagnose(
2399+
DiagnosticInfoUnsupported(F, "intrinsic not supported on subtarget",
2400+
I.getDebugLoc(), DS_Error));
2401+
return false;
2402+
}
2403+
return selectNamedBarrierInst(I, IntrinsicID);
2404+
}
23952405
case Intrinsic::amdgcn_s_barrier_join:
23962406
case Intrinsic::amdgcn_s_get_named_barrier_state:
23972407
return selectNamedBarrierInst(I, IntrinsicID);
@@ -6832,6 +6842,8 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
68326842
llvm_unreachable("not a named barrier op");
68336843
case Intrinsic::amdgcn_s_barrier_join:
68346844
return AMDGPU::S_BARRIER_JOIN_IMM;
6845+
case Intrinsic::amdgcn_s_wakeup_barrier:
6846+
return AMDGPU::S_WAKEUP_BARRIER_IMM;
68356847
case Intrinsic::amdgcn_s_get_named_barrier_state:
68366848
return AMDGPU::S_GET_BARRIER_STATE_IMM;
68376849
};
@@ -6841,6 +6853,8 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
68416853
llvm_unreachable("not a named barrier op");
68426854
case Intrinsic::amdgcn_s_barrier_join:
68436855
return AMDGPU::S_BARRIER_JOIN_M0;
6856+
case Intrinsic::amdgcn_s_wakeup_barrier:
6857+
return AMDGPU::S_WAKEUP_BARRIER_M0;
68446858
case Intrinsic::amdgcn_s_get_named_barrier_state:
68456859
return AMDGPU::S_GET_BARRIER_STATE_M0;
68466860
};

llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,7 @@ bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
371371
case Intrinsic::amdgcn_s_barrier_wait:
372372
case Intrinsic::amdgcn_s_barrier_leave:
373373
case Intrinsic::amdgcn_s_get_barrier_state:
374+
case Intrinsic::amdgcn_s_wakeup_barrier:
374375
case Intrinsic::amdgcn_wave_barrier:
375376
case Intrinsic::amdgcn_sched_barrier:
376377
case Intrinsic::amdgcn_sched_group_barrier:

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3347,6 +3347,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
33473347
constrainOpWithReadfirstlane(B, MI, 1);
33483348
return;
33493349
case Intrinsic::amdgcn_s_barrier_join:
3350+
case Intrinsic::amdgcn_s_wakeup_barrier:
33503351
constrainOpWithReadfirstlane(B, MI, 1);
33513352
return;
33523353
case Intrinsic::amdgcn_s_barrier_init:
@@ -5581,6 +5582,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
55815582
OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
55825583
break;
55835584
case Intrinsic::amdgcn_s_barrier_join:
5585+
case Intrinsic::amdgcn_s_wakeup_barrier:
55845586
OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
55855587
break;
55865588
case Intrinsic::amdgcn_s_barrier_init:

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
289289
bool HasPointSampleAccel = false;
290290
bool HasLdsBarrierArriveAtomic = false;
291291
bool HasSetPrioIncWgInst = false;
292+
bool HasSWakeupBarrier = false;
292293

293294
bool RequiresCOV6 = false;
294295
bool UseBlockVGPROpsForCSR = false;
@@ -1612,6 +1613,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
16121613
// \returns true if target has S_SETPRIO_INC_WG instruction.
16131614
bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; }
16141615

1616+
// \returns true if target has S_WAKEUP_BARRIER instruction.
1617+
bool hasSWakeupBarrier() const { return HasSWakeupBarrier; }
1618+
16151619
// \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
16161620
// of sign-extending. Note that GFX1250 has not only fixed the bug but also
16171621
// extended VA to 57 bits.

0 commit comments

Comments
 (0)