Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions clang/docs/LanguageExtensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5162,6 +5162,23 @@ If no address spaces names are provided, all address spaces are fenced.
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local")
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global")

__builtin_amdgcn_ballot_w{32,64}
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

``__builtin_amdgcn_ballot_w{32,64}`` returns a bitmask that contains its
boolean argument as a bit for every lane of the current wave that is currently
active (i.e., that is converged with the executing thread), and a 0 bit for
every lane that is not active.

The result is uniform, i.e. it is the same in every active thread of the wave.

__builtin_amdgcn_inverse_ballot_w{32,64}
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Given a wave-uniform bitmask, ``__builtin_amdgcn_inverse_ballot_w{32,64}(mask)``
returns the bit at the position of the current lane. It is almost equivalent to
``(mask & (1 << lane_id)) != 0``, except that its behavior is only defined if
the given mask has the same value for all active lanes of the current wave.

ARM/AArch64 Language Extensions
-------------------------------
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi",
TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "ZUib", "nc", "wavefrontsize32")
BUILTIN(__builtin_amdgcn_ballot_w64, "WUib", "nc")

TARGET_BUILTIN(__builtin_amdgcn_inverse_ballot_w32, "bZUi", "nc", "wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_inverse_ballot_w64, "bWUi", "nc", "wavefrontsize64")

// Deprecated intrinsics in favor of __builtin_amdgn_ballot_{w32|w64}
BUILTIN(__builtin_amdgcn_uicmp, "WUiUiUiIi", "nc")
BUILTIN(__builtin_amdgcn_uicmpl, "WUiWUiWUiIi", "nc")
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
return Builder.CreateCall(F, { Src });
}
case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
llvm::Value *Src = EmitScalarExpr(E->getArg(0));
Function *F =
CGM.getIntrinsic(Intrinsic::amdgcn_inverse_ballot, {Src->getType()});
return Builder.CreateCall(F, {Src});
}
case AMDGPU::BI__builtin_amdgcn_tanhf:
case AMDGPU::BI__builtin_amdgcn_tanhh:
case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
Expand Down
7 changes: 7 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b)
*out = __builtin_amdgcn_ballot_w32(a == b);
}

// CHECK-LABEL: @test_inverse_ballot_wave32(
// CHECK: call i1 @llvm.amdgcn.inverse.ballot.i32(i32 %{{.+}})
void test_inverse_ballot_wave32(global bool* out, int a)
{
*out = __builtin_amdgcn_inverse_ballot_w32(a);
}

// CHECK-LABEL: @test_read_exec(
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
void test_read_exec(global uint* out) {
Expand Down
7 changes: 7 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ void test_ballot_wave64_target_attr(global ulong* out, int a, int b)
*out = __builtin_amdgcn_ballot_w64(a == b);
}

// CHECK-LABEL: @test_inverse_ballot_wave64(
// CHECK: call i1 @llvm.amdgcn.inverse.ballot.i64(i64 %{{.+}})
void test_inverse_ballot_wave64(global bool* out, ulong a)
{
*out = __builtin_amdgcn_inverse_ballot_w64(a);
}

// CHECK-LABEL: @test_read_exec(
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
void test_read_exec(global ulong* out) {
Expand Down