Skip to content

Commit 1170b00

Browse files
rampiteckrishna2803
authored andcommitted
[AMDGPU] v_cvt_sr_pk_f16_f32 gfx1250 instruction (llvm#151482)
1 parent 3235673 commit 1170b00

16 files changed

+478
-0
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,7 @@ TARGET_BUILTIN(__builtin_amdgcn_sin_bf16, "yy", "nc", "bf16-trans-insts")
698698
TARGET_BUILTIN(__builtin_amdgcn_cos_bf16, "yy", "nc", "bf16-trans-insts")
699699

700700
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_pk_bf16_f32, "V2yffi", "nc", "bf16-cvt-insts")
701+
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_pk_f16_f32, "V2hffi", "nc", "gfx1250-insts")
701702
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
702703
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
703704
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,33 @@ void test_cvt_sr_pk_bf16_f32(global bfloat2* out, float a, float b, uint sr)
282282
*out = __builtin_amdgcn_cvt_sr_pk_bf16_f32(a, b, sr);
283283
}
284284

285+
// CHECK-LABEL: @test_cvt_sr_pk_f16_f32(
286+
// CHECK-NEXT: entry:
287+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
288+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
289+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4, addrspace(5)
290+
// CHECK-NEXT: [[SR_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
291+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
292+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
293+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
294+
// CHECK-NEXT: [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
295+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
296+
// CHECK-NEXT: store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
297+
// CHECK-NEXT: store float [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
298+
// CHECK-NEXT: store i32 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 4
299+
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
300+
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR_ASCAST]], align 4
301+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
302+
// CHECK-NEXT: [[TMP3:%.*]] = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float [[TMP0]], float [[TMP1]], i32 [[TMP2]])
303+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
304+
// CHECK-NEXT: store <2 x half> [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
305+
// CHECK-NEXT: ret void
306+
//
307+
void test_cvt_sr_pk_f16_f32(global half2* out, float a, float b, uint sr)
308+
{
309+
*out = __builtin_amdgcn_cvt_sr_pk_f16_f32(a, b, sr);
310+
}
311+
285312
// CHECK-LABEL: @test_cvt_f16_fp8(
286313
// CHECK-NEXT: entry:
287314
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,10 @@ def int_amdgcn_tanh : DefaultAttrsIntrinsic<
593593
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
594594
>;
595595

596+
def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic<
597+
[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
598+
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">;
599+
596600
def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic<
597601
[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
598602
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4574,6 +4574,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
45744574
case Intrinsic::amdgcn_cvt_pknorm_u16:
45754575
case Intrinsic::amdgcn_cvt_pk_i16:
45764576
case Intrinsic::amdgcn_cvt_pk_u16:
4577+
case Intrinsic::amdgcn_cvt_sr_pk_f16_f32:
45774578
case Intrinsic::amdgcn_cvt_sr_pk_bf16_f32:
45784579
case Intrinsic::amdgcn_cvt_pk_f16_fp8:
45794580
case Intrinsic::amdgcn_cvt_pk_f16_bf8:

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2928,6 +2928,7 @@ def VOP_V32F32_V6I32_F32 : VOPProfile <[v32f32, v6i32, f32, untyped]>;
29282928
def VOP_V32F16_V6I32_F32 : VOPProfile <[v32f16, v6i32, f32, untyped]>;
29292929
def VOP_V32BF16_V6I32_F32 : VOPProfile <[v32bf16, v6i32, f32, untyped]>;
29302930
def VOP_V2BF16_F32_F32_I32 : VOPProfile <[v2bf16, f32, f32, i32]>;
2931+
def VOP_V2F16_F32_F32_I32 : VOPProfile <[v2f16, f32, f32, i32]>;
29312932
def VOP_V6I32_V32F16_F32 : VOPProfile<[v6i32, v32f16, f32, untyped]>;
29322933
def VOP_V6I32_V32BF16_F32 : VOPProfile<[v6i32, v32bf16, f32, untyped]>;
29332934
def VOP_V6I32_V16F32_V16F32_F32 : VOPProfile<[v6i32, v16f32, v16f32, f32]>;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1650,6 +1650,8 @@ def VOP3_CVT_SR_F8_F16_Fake16_Profile : VOP3_Profile_Fake16<VOP3_CVT_SR_F8_F16_P
16501650

16511651
let SubtargetPredicate = isGFX1250Plus in {
16521652
let ReadsModeReg = 0 in {
1653+
defm V_CVT_SR_PK_F16_F32 : VOP3Inst<"v_cvt_sr_pk_f16_f32", VOP3_Profile<VOP_V2F16_F32_F32_I32>, int_amdgcn_cvt_sr_pk_f16_f32>;
1654+
16531655
// These instructions have non-standard use of op_sel. They are using bits 2 and 3 of opsel
16541656
// to select a byte in the vdst. Bits 0 and 1 are unused.
16551657
let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in {
@@ -2067,6 +2069,7 @@ defm V_ASHR_PK_U8_I32 : VOP3Only_Realtriple_gfx1250<0x291>;
20672069
defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>;
20682070
defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>;
20692071
defm V_CVT_PK_F16_F32 : VOP3Only_Realtriple_gfx1250<0x36f>;
2072+
defm V_CVT_SR_PK_F16_F32 : VOP3Only_Realtriple_gfx1250<0x370>;
20702073
defm V_CVT_PK_FP8_F16_gfx1250 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x372, "v_cvt_pk_fp8_f16">;
20712074
defm V_CVT_PK_BF8_F16_gfx1250 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x373, "v_cvt_pk_bf8_f16">;
20722075
defm V_CVT_SR_FP8_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x374>;
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
4+
5+
declare <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float, float, i32) #0
6+
7+
define amdgpu_ps float @cvt_sr_pk_f16_f32_vvv(float %src0, float %src1, i32 %src2) #1 {
8+
; GCN-LABEL: cvt_sr_pk_f16_f32_vvv:
9+
; GCN: ; %bb.0:
10+
; GCN-NEXT: v_cvt_sr_pk_f16_f32 v0, v0, v1, v2
11+
; GCN-NEXT: ; return to shader part epilog
12+
%cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %src0, float %src1, i32 %src2) #0
13+
%ret = bitcast <2 x half> %cvt to float
14+
ret float %ret
15+
}
16+
17+
define amdgpu_ps float @cvt_sr_pk_f16_f32_sss(float inreg %src0, float inreg %src1, i32 inreg %src2) #1 {
18+
; GCN-LABEL: cvt_sr_pk_f16_f32_sss:
19+
; GCN: ; %bb.0:
20+
; GCN-NEXT: v_mov_b32_e32 v0, s2
21+
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
22+
; GCN-NEXT: v_cvt_sr_pk_f16_f32 v0, s0, s1, v0
23+
; GCN-NEXT: ; return to shader part epilog
24+
%cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %src0, float %src1, i32 %src2) #0
25+
%ret = bitcast <2 x half> %cvt to float
26+
ret float %ret
27+
}
28+
29+
define amdgpu_ps float @cvt_sr_pk_f16_f32_vvi(float %src0, float %src1) #1 {
30+
; GCN-LABEL: cvt_sr_pk_f16_f32_vvi:
31+
; GCN: ; %bb.0:
32+
; GCN-NEXT: v_cvt_sr_pk_f16_f32 v0, v0, v1, 0x10002
33+
; GCN-NEXT: ; return to shader part epilog
34+
%cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %src0, float %src1, i32 65538) #0
35+
%ret = bitcast <2 x half> %cvt to float
36+
ret float %ret
37+
}
38+
39+
define amdgpu_ps float @cvt_sr_pk_f16_f32_vvi_mods(float %src0, float %src1) #1 {
40+
; GCN-LABEL: cvt_sr_pk_f16_f32_vvi_mods:
41+
; GCN: ; %bb.0:
42+
; GCN-NEXT: v_cvt_sr_pk_f16_f32 v0, -v0, |v1|, 1
43+
; GCN-NEXT: ; return to shader part epilog
44+
%s0 = fneg float %src0
45+
%s1 = call float @llvm.fabs.f32(float %src1) #0
46+
%cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %s0, float %s1, i32 1) #0
47+
%ret = bitcast <2 x half> %cvt to float
48+
ret float %ret
49+
}
50+
51+
define amdgpu_ps float @cvt_sr_pk_f16_f32_ssi(float inreg %src0, float inreg %src1) #1 {
52+
; GCN-LABEL: cvt_sr_pk_f16_f32_ssi:
53+
; GCN: ; %bb.0:
54+
; GCN-NEXT: v_cvt_sr_pk_f16_f32 v0, s0, s1, 1
55+
; GCN-NEXT: ; return to shader part epilog
56+
%cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %src0, float %src1, i32 1) #0
57+
%ret = bitcast <2 x half> %cvt to float
58+
ret float %ret
59+
}
60+
61+
declare float @llvm.fabs.f32(float) #0
62+
63+
attributes #0 = { nounwind readnone }
64+
attributes #1 = { nounwind }

llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,51 @@ v_cvt_pk_f16_f32 v5, src_scc, vcc_lo mul:4
512512
v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2
513513
// GFX1250: v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6f,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf]
514514

515+
v_cvt_sr_pk_f16_f32 v5, v1, v2, s3
516+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x70,0xd7,0x01,0x05,0x0e,0x00]
517+
518+
v_cvt_sr_pk_f16_f32 v5, v255, s2, s105
519+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x70,0xd7,0xff,0x05,0xa4,0x01]
520+
521+
v_cvt_sr_pk_f16_f32 v5, s1, v255, exec_hi
522+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x70,0xd7,0x01,0xfe,0xff,0x01]
523+
524+
v_cvt_sr_pk_f16_f32 v5, s105, s105, exec_lo
525+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x70,0xd7,0x69,0xd2,0xf8,0x01]
526+
527+
v_cvt_sr_pk_f16_f32 v5, vcc_lo, ttmp15, v3
528+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x70,0xd7,0x6a,0xf6,0x0c,0x04]
529+
530+
v_cvt_sr_pk_f16_f32 v5, vcc_hi, 0xaf123456, v255
531+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x70,0xd7,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
532+
533+
v_cvt_sr_pk_f16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15
534+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15 ; encoding: [0x05,0x03,0x70,0xd7,0x7b,0xfa,0xed,0x61]
535+
536+
v_cvt_sr_pk_f16_f32 v5, m0, 0.5, m0
537+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x70,0xd7,0x7d,0xe0,0xf5,0x01]
538+
539+
v_cvt_sr_pk_f16_f32 v5, |exec_lo|, -1, vcc_hi
540+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x70,0xd7,0x7e,0x82,0xad,0x01]
541+
542+
v_cvt_sr_pk_f16_f32 v5, -|exec_hi|, null, vcc_lo
543+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, -|exec_hi|, null, vcc_lo ; encoding: [0x05,0x01,0x70,0xd7,0x7f,0xf8,0xa8,0x21]
544+
545+
v_cvt_sr_pk_f16_f32 v5, null, exec_lo, 0xaf123456
546+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x70,0xd7,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf]
547+
548+
v_cvt_sr_pk_f16_f32 v5, -1, -|exec_hi|, src_scc
549+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, -1, -|exec_hi|, src_scc ; encoding: [0x05,0x02,0x70,0xd7,0xc1,0xfe,0xf4,0x43]
550+
551+
v_cvt_sr_pk_f16_f32 v5, 0.5, -m0, 0.5 mul:2
552+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x70,0xd7,0xf0,0xfa,0xc0,0x4b]
553+
554+
v_cvt_sr_pk_f16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
555+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x70,0xd7,0xfd,0xd4,0x04,0x33]
556+
557+
v_cvt_sr_pk_f16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
558+
// GFX1250: v_cvt_sr_pk_f16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x70,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]
559+
515560
v_cvt_sr_bf8_f16 v1, v2, v3
516561
// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
517562

llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,51 @@ v_cvt_pk_f16_f32 v5, src_scc, vcc_lo mul:4
512512
v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2
513513
// GFX1250: v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6f,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf]
514514

515+
v_cvt_sr_pk_f16_f32 v5, v1, v2, s3
516+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x70,0xd7,0x01,0x05,0x0e,0x00]
517+
518+
v_cvt_sr_pk_f16_f32 v5, v255, s2, s105
519+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x70,0xd7,0xff,0x05,0xa4,0x01]
520+
521+
v_cvt_sr_pk_f16_f32 v5, s1, v255, exec_hi
522+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x70,0xd7,0x01,0xfe,0xff,0x01]
523+
524+
v_cvt_sr_pk_f16_f32 v5, s105, s105, exec_lo
525+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x70,0xd7,0x69,0xd2,0xf8,0x01]
526+
527+
v_cvt_sr_pk_f16_f32 v5, vcc_lo, ttmp15, v3
528+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x70,0xd7,0x6a,0xf6,0x0c,0x04]
529+
530+
v_cvt_sr_pk_f16_f32 v5, vcc_hi, 0xaf123456, v255
531+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x70,0xd7,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
532+
533+
v_cvt_sr_pk_f16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15
534+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15 ; encoding: [0x05,0x03,0x70,0xd7,0x7b,0xfa,0xed,0x61]
535+
536+
v_cvt_sr_pk_f16_f32 v5, m0, 0.5, m0
537+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x70,0xd7,0x7d,0xe0,0xf5,0x01]
538+
539+
v_cvt_sr_pk_f16_f32 v5, |exec_lo|, -1, vcc_hi
540+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x70,0xd7,0x7e,0x82,0xad,0x01]
541+
542+
v_cvt_sr_pk_f16_f32 v5, -|exec_hi|, null, vcc_lo
543+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, -|exec_hi|, null, vcc_lo ; encoding: [0x05,0x01,0x70,0xd7,0x7f,0xf8,0xa8,0x21]
544+
545+
v_cvt_sr_pk_f16_f32 v5, null, exec_lo, 0xaf123456
546+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x70,0xd7,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf]
547+
548+
v_cvt_sr_pk_f16_f32 v5, -1, -|exec_hi|, src_scc
549+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, -1, -|exec_hi|, src_scc ; encoding: [0x05,0x02,0x70,0xd7,0xc1,0xfe,0xf4,0x43]
550+
551+
v_cvt_sr_pk_f16_f32 v5, 0.5, -m0, 0.5 mul:2
552+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x70,0xd7,0xf0,0xfa,0xc0,0x4b]
553+
554+
v_cvt_sr_pk_f16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
555+
// GFX1250: v_cvt_sr_pk_f16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x70,0xd7,0xfd,0xd4,0x04,0x33]
556+
557+
v_cvt_sr_pk_f16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
558+
// GFX1250: v_cvt_sr_pk_f16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x70,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]
559+
515560
v_cvt_sr_bf8_f16 v1, v2.l, v3
516561
// GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
517562

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,54 @@ v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0
402402
// GFX1250: v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x6f,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30]
403403
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
404404

405+
v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
406+
// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
407+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
408+
409+
v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3]
410+
// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
411+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
412+
413+
v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_mirror
414+
// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
415+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
416+
417+
v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v255 row_half_mirror
418+
// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
419+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
420+
421+
v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, s105 row_shl:1
422+
// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
423+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
424+
425+
v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15
426+
// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
427+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
428+
429+
v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1
430+
// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
431+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
432+
433+
v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1
434+
// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x70,0xd7,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
435+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
436+
437+
v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf
438+
// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x70,0xd7,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
439+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
440+
441+
v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, v2, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
442+
// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, v2, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x70,0xd7,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
443+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
444+
445+
v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
446+
// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x70,0xd7,0xfa,0x04,0x16,0x52,0x01,0x60,0x09,0x13]
447+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
448+
449+
v_cvt_sr_pk_f16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
450+
// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x70,0xd7,0xfa,0xfe,0xf7,0x7b,0xff,0x6f,0x05,0x30]
451+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
452+
405453
v_cvt_sr_bf8_f16 v1, v2, v3 quad_perm:[0,1,2,3] fi:1
406454
// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
407455
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

0 commit comments

Comments
 (0)