@@ -62,6 +62,44 @@ void test_cvt_f16_fp8(global half* out, int a)
6262 out [3 ] = __builtin_amdgcn_cvt_f16_fp8 (a , 3 );
6363}
6464
65+ // CHECK-LABEL: @test_cvt_f16_bf8(
66+ // CHECK-NEXT: entry:
67+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
68+ // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
69+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
70+ // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
71+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
72+ // CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
73+ // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
74+ // CHECK-NEXT: [[TMP1:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP0]], i32 0)
75+ // CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
76+ // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP2]], i64 0
77+ // CHECK-NEXT: store half [[TMP1]], ptr addrspace(1) [[ARRAYIDX]], align 2
78+ // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
79+ // CHECK-NEXT: [[TMP4:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP3]], i32 1)
80+ // CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
81+ // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP5]], i64 1
82+ // CHECK-NEXT: store half [[TMP4]], ptr addrspace(1) [[ARRAYIDX1]], align 2
83+ // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
84+ // CHECK-NEXT: [[TMP7:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP6]], i32 2)
85+ // CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
86+ // CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP8]], i64 2
87+ // CHECK-NEXT: store half [[TMP7]], ptr addrspace(1) [[ARRAYIDX2]], align 2
88+ // CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
89+ // CHECK-NEXT: [[TMP10:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP9]], i32 3)
90+ // CHECK-NEXT: [[TMP11:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
91+ // CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP11]], i64 3
92+ // CHECK-NEXT: store half [[TMP10]], ptr addrspace(1) [[ARRAYIDX3]], align 2
93+ // CHECK-NEXT: ret void
94+ //
95+ void test_cvt_f16_bf8 (global half * out , int a )
96+ {
97+ out [0 ] = __builtin_amdgcn_cvt_f16_bf8 (a , 0 );
98+ out [1 ] = __builtin_amdgcn_cvt_f16_bf8 (a , 1 );
99+ out [2 ] = __builtin_amdgcn_cvt_f16_bf8 (a , 2 );
100+ out [3 ] = __builtin_amdgcn_cvt_f16_bf8 (a , 3 );
101+ }
102+
65103// CHECK-LABEL: @test_cvt_pk_f16_fp8(
66104// CHECK-NEXT: entry:
67105// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
0 commit comments