Skip to content

Commit 602d43c

Browse files
shiltianrampitec
andauthored
[Clang][AMDGPU] Add the missing builtin __builtin_amdgcn_sqrt_bf16 (#149447)
Co-authored-by: Mekhanoshin, Stanislav <[email protected]>
1 parent a9f8143 commit 602d43c

File tree

3 files changed

+21
-0
lines changed

3 files changed

+21
-0
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,7 @@ TARGET_BUILTIN(__builtin_amdgcn_tanhf, "ff", "nc", "tanh-insts")
673673
TARGET_BUILTIN(__builtin_amdgcn_tanhh, "hh", "nc", "tanh-insts")
674674
TARGET_BUILTIN(__builtin_amdgcn_tanh_bf16, "yy", "nc", "bf16-trans-insts")
675675
TARGET_BUILTIN(__builtin_amdgcn_rcp_bf16, "yy", "nc", "bf16-trans-insts")
676+
TARGET_BUILTIN(__builtin_amdgcn_sqrt_bf16, "yy", "nc", "bf16-trans-insts")
676677
TARGET_BUILTIN(__builtin_amdgcn_rsq_bf16, "yy", "nc", "bf16-trans-insts")
677678
TARGET_BUILTIN(__builtin_amdgcn_log_bf16, "yy", "nc", "bf16-trans-insts")
678679
TARGET_BUILTIN(__builtin_amdgcn_exp2_bf16, "yy", "nc", "bf16-trans-insts")

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
416416
case AMDGPU::BI__builtin_amdgcn_sqrt:
417417
case AMDGPU::BI__builtin_amdgcn_sqrtf:
418418
case AMDGPU::BI__builtin_amdgcn_sqrth:
419+
case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
419420
return emitBuiltinWithOneOverloadedType<1>(*this, E,
420421
Intrinsic::amdgcn_sqrt);
421422
case AMDGPU::BI__builtin_amdgcn_rsq:

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,25 @@ void test_rcp_bf16(global __bf16* out, __bf16 a)
119119
*out = __builtin_amdgcn_rcp_bf16(a);
120120
}
121121

122+
// CHECK-LABEL: @test_sqrt_bf16(
123+
// CHECK-NEXT: entry:
124+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
125+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
126+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
127+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
128+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
129+
// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
130+
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2
131+
// CHECK-NEXT: [[TMP1:%.*]] = call bfloat @llvm.amdgcn.sqrt.bf16(bfloat [[TMP0]])
132+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
133+
// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
134+
// CHECK-NEXT: ret void
135+
//
136+
void test_sqrt_bf16(global __bf16* out, __bf16 a)
137+
{
138+
*out = __builtin_amdgcn_sqrt_bf16(a);
139+
}
140+
122141
// CHECK-LABEL: @test_rsq_bf16(
123142
// CHECK-NEXT: entry:
124143
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)

0 commit comments

Comments
 (0)