@@ -652,6 +652,60 @@ void test_prefetch(generic void *fptr, global void *gptr) {
652652 __builtin_amdgcn_global_prefetch (gptr , 8 );
653653}
654654
655+ // CHECK-LABEL: @test_cvt_pk_fp8_f32_e5m3(
656+ // CHECK-NEXT: entry:
657+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
658+ // CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
659+ // CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
660+ // CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4, addrspace(5)
661+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
662+ // CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
663+ // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
664+ // CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
665+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
666+ // CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
667+ // CHECK-NEXT: store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
668+ // CHECK-NEXT: store float [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
669+ // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
670+ // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR_ASCAST]], align 4
671+ // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
672+ // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.pk.fp8.f32.e5m3(float [[TMP0]], float [[TMP1]], i32 [[TMP2]], i1 true)
673+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
674+ // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
675+ // CHECK-NEXT: ret void
676+ //
677+ void test_cvt_pk_fp8_f32_e5m3 (global int * out , int old , float a , float b )
678+ {
679+ * out = __builtin_amdgcn_cvt_pk_fp8_f32_e5m3 (a , b , old , true);
680+ }
681+
682+ // CHECK-LABEL: @test_cvt_sr_fp8_f32_e5m3(
683+ // CHECK-NEXT: entry:
684+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
685+ // CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
686+ // CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
687+ // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
688+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
689+ // CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
690+ // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
691+ // CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
692+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
693+ // CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
694+ // CHECK-NEXT: store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
695+ // CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
696+ // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
697+ // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
698+ // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
699+ // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f32.e5m3(float [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 3)
700+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
701+ // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
702+ // CHECK-NEXT: ret void
703+ //
704+ void test_cvt_sr_fp8_f32_e5m3 (global int * out , int old , float a , int b )
705+ {
706+ * out = __builtin_amdgcn_cvt_sr_fp8_f32_e5m3 (a , b , old , 3 );
707+ }
708+
655709// CHECK-LABEL: @test_cvt_f32_fp8_e5m3(
656710// CHECK-NEXT: entry:
657711// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
0 commit comments