krishna2803
diff --git a/‎clang/include/clang/Basic/BuiltinsAMDGPU.def
Lines changed: 1 addition & 0 deletions b/‎clang/include/clang/Basic/BuiltinsAMDGPU.def
Lines changed: 1 addition & 0 deletions
diff --git a/‎clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
Lines changed: 27 additions & 0 deletions b/‎clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
Lines changed: 27 additions & 0 deletions
diff --git a/‎llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Lines changed: 4 additions & 0 deletions b/‎llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Lines changed: 4 additions & 0 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Lines changed: 1 addition & 0 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/SIInstrInfo.td
Lines changed: 1 addition & 0 deletions b/‎llvm/lib/Target/AMDGPU/SIInstrInfo.td
Lines changed: 1 addition & 0 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/VOP3Instructions.td
Lines changed: 3 additions & 0 deletions b/‎llvm/lib/Target/AMDGPU/VOP3Instructions.td
Lines changed: 3 additions & 0 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.f16.ll
Lines changed: 64 additions & 0 deletions b/‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.f16.ll
Lines changed: 64 additions & 0 deletions
diff --git a/‎llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
Lines changed: 45 additions & 0 deletions b/‎llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
Lines changed: 45 additions & 0 deletions
diff --git a/‎llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
Lines changed: 45 additions & 0 deletions b/‎llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
Lines changed: 45 additions & 0 deletions
diff --git a/‎llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
Lines changed: 48 additions & 0 deletions b/‎llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
Lines changed: 48 additions & 0 deletions
@@ -698,6 +698,7 @@ TARGET_BUILTIN(__builtin_amdgcn_sin_bf16, "yy", "nc", "bf16-trans-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cos_bf16, "yy", "nc", "bf16-trans-insts")
 
 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_pk_bf16_f32, "V2yffi", "nc", "bf16-cvt-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_pk_f16_f32, "V2hffi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
 
@@ -282,6 +282,33 @@ void test_cvt_sr_pk_bf16_f32(global bfloat2* out, float a, float b, uint sr)
   *out = __builtin_amdgcn_cvt_sr_pk_bf16_f32(a, b, sr);
 }
 
+// CHECK-LABEL: @test_cvt_sr_pk_f16_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:    [[SR_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-NEXT:    [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store float [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store i32 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float [[TMP0]], float [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <2 x half> [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
+// CHECK-NEXT:    ret void
+//
+void test_cvt_sr_pk_f16_f32(global half2* out, float a, float b, uint sr)
+{
+  *out = __builtin_amdgcn_cvt_sr_pk_f16_f32(a, b, sr);
+}
+
 // CHECK-LABEL: @test_cvt_f16_fp8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
 
@@ -593,6 +593,10 @@ def int_amdgcn_tanh : DefaultAttrsIntrinsic<
   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
 >;
 
+def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic<
+  [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">;
+
 def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic<
   [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
 >, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">;
 
@@ -4574,6 +4574,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     case Intrinsic::amdgcn_cvt_pknorm_u16:
     case Intrinsic::amdgcn_cvt_pk_i16:
     case Intrinsic::amdgcn_cvt_pk_u16:
+    case Intrinsic::amdgcn_cvt_sr_pk_f16_f32:
     case Intrinsic::amdgcn_cvt_sr_pk_bf16_f32:
     case Intrinsic::amdgcn_cvt_pk_f16_fp8:
     case Intrinsic::amdgcn_cvt_pk_f16_bf8:
 
@@ -2928,6 +2928,7 @@ def VOP_V32F32_V6I32_F32 : VOPProfile <[v32f32, v6i32, f32, untyped]>;
 def VOP_V32F16_V6I32_F32 : VOPProfile <[v32f16, v6i32, f32, untyped]>;
 def VOP_V32BF16_V6I32_F32 : VOPProfile <[v32bf16, v6i32, f32, untyped]>;
 def VOP_V2BF16_F32_F32_I32 : VOPProfile <[v2bf16, f32, f32, i32]>;
+def VOP_V2F16_F32_F32_I32 : VOPProfile <[v2f16, f32, f32, i32]>;
 def VOP_V6I32_V32F16_F32 : VOPProfile<[v6i32, v32f16, f32, untyped]>;
 def VOP_V6I32_V32BF16_F32 : VOPProfile<[v6i32, v32bf16, f32, untyped]>;
 def VOP_V6I32_V16F32_V16F32_F32 : VOPProfile<[v6i32, v16f32, v16f32, f32]>;
 
@@ -1650,6 +1650,8 @@ def VOP3_CVT_SR_F8_F16_Fake16_Profile : VOP3_Profile_Fake16<VOP3_CVT_SR_F8_F16_P
 
 let SubtargetPredicate = isGFX1250Plus in {
   let ReadsModeReg = 0 in {
+    defm V_CVT_SR_PK_F16_F32 : VOP3Inst<"v_cvt_sr_pk_f16_f32", VOP3_Profile<VOP_V2F16_F32_F32_I32>, int_amdgcn_cvt_sr_pk_f16_f32>;
+
     // These instructions have non-standard use of op_sel. They are using bits 2 and 3 of opsel
     // to select a byte in the vdst. Bits 0 and 1 are unused.
     let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in {
@@ -2067,6 +2069,7 @@ defm V_ASHR_PK_U8_I32                : VOP3Only_Realtriple_gfx1250<0x291>;
 defm V_CVT_PK_BF16_F32               : VOP3Only_Realtriple_gfx1250<0x36d>;
 defm V_CVT_SR_PK_BF16_F32            : VOP3Only_Realtriple_gfx1250<0x36e>;
 defm V_CVT_PK_F16_F32                : VOP3Only_Realtriple_gfx1250<0x36f>;
+defm V_CVT_SR_PK_F16_F32             : VOP3Only_Realtriple_gfx1250<0x370>;
 defm V_CVT_PK_FP8_F16_gfx1250        : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x372, "v_cvt_pk_fp8_f16">;
 defm V_CVT_PK_BF8_F16_gfx1250        : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x373, "v_cvt_pk_bf8_f16">;
 defm V_CVT_SR_FP8_F16                : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x374>;
 
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
+
+declare <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float, float, i32) #0
+
+define amdgpu_ps float @cvt_sr_pk_f16_f32_vvv(float %src0, float %src1, i32 %src2) #1 {
+; GCN-LABEL: cvt_sr_pk_f16_f32_vvv:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_cvt_sr_pk_f16_f32 v0, v0, v1, v2
+; GCN-NEXT:    ; return to shader part epilog
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %src0, float %src1, i32 %src2) #0
+  %ret = bitcast <2 x half> %cvt to float
+  ret float %ret
+}
+
+define amdgpu_ps float @cvt_sr_pk_f16_f32_sss(float inreg %src0, float inreg %src1, i32 inreg %src2) #1 {
+; GCN-LABEL: cvt_sr_pk_f16_f32_sss:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_mov_b32_e32 v0, s2
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_cvt_sr_pk_f16_f32 v0, s0, s1, v0
+; GCN-NEXT:    ; return to shader part epilog
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %src0, float %src1, i32 %src2) #0
+  %ret = bitcast <2 x half> %cvt to float
+  ret float %ret
+}
+
+define amdgpu_ps float @cvt_sr_pk_f16_f32_vvi(float %src0, float %src1) #1 {
+; GCN-LABEL: cvt_sr_pk_f16_f32_vvi:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_cvt_sr_pk_f16_f32 v0, v0, v1, 0x10002
+; GCN-NEXT:    ; return to shader part epilog
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %src0, float %src1, i32 65538) #0
+  %ret = bitcast <2 x half> %cvt to float
+  ret float %ret
+}
+
+define amdgpu_ps float @cvt_sr_pk_f16_f32_vvi_mods(float %src0, float %src1) #1 {
+; GCN-LABEL: cvt_sr_pk_f16_f32_vvi_mods:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_cvt_sr_pk_f16_f32 v0, -v0, |v1|, 1
+; GCN-NEXT:    ; return to shader part epilog
+  %s0 = fneg float %src0
+  %s1 = call float @llvm.fabs.f32(float %src1) #0
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %s0, float %s1, i32 1) #0
+  %ret = bitcast <2 x half> %cvt to float
+  ret float %ret
+}
+
+define amdgpu_ps float @cvt_sr_pk_f16_f32_ssi(float inreg %src0, float inreg %src1) #1 {
+; GCN-LABEL: cvt_sr_pk_f16_f32_ssi:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_cvt_sr_pk_f16_f32 v0, s0, s1, 1
+; GCN-NEXT:    ; return to shader part epilog
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %src0, float %src1, i32 1) #0
+  %ret = bitcast <2 x half> %cvt to float
+  ret float %ret
+}
+
+declare float @llvm.fabs.f32(float) #0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
@@ -512,6 +512,51 @@ v_cvt_pk_f16_f32 v5, src_scc, vcc_lo mul:4
 v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2
 // GFX1250: v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6f,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf]
 
+v_cvt_sr_pk_f16_f32 v5, v1, v2, s3
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, v1, v2, s3      ; encoding: [0x05,0x00,0x70,0xd7,0x01,0x05,0x0e,0x00]
+
+v_cvt_sr_pk_f16_f32 v5, v255, s2, s105
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, v255, s2, s105  ; encoding: [0x05,0x00,0x70,0xd7,0xff,0x05,0xa4,0x01]
+
+v_cvt_sr_pk_f16_f32 v5, s1, v255, exec_hi
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x70,0xd7,0x01,0xfe,0xff,0x01]
+
+v_cvt_sr_pk_f16_f32 v5, s105, s105, exec_lo
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x70,0xd7,0x69,0xd2,0xf8,0x01]
+
+v_cvt_sr_pk_f16_f32 v5, vcc_lo, ttmp15, v3
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x70,0xd7,0x6a,0xf6,0x0c,0x04]
+
+v_cvt_sr_pk_f16_f32 v5, vcc_hi, 0xaf123456, v255
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x70,0xd7,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
+
+v_cvt_sr_pk_f16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15 ; encoding: [0x05,0x03,0x70,0xd7,0x7b,0xfa,0xed,0x61]
+
+v_cvt_sr_pk_f16_f32 v5, m0, 0.5, m0
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, m0, 0.5, m0     ; encoding: [0x05,0x00,0x70,0xd7,0x7d,0xe0,0xf5,0x01]
+
+v_cvt_sr_pk_f16_f32 v5, |exec_lo|, -1, vcc_hi
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x70,0xd7,0x7e,0x82,0xad,0x01]
+
+v_cvt_sr_pk_f16_f32 v5, -|exec_hi|, null, vcc_lo
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, -|exec_hi|, null, vcc_lo ; encoding: [0x05,0x01,0x70,0xd7,0x7f,0xf8,0xa8,0x21]
+
+v_cvt_sr_pk_f16_f32 v5, null, exec_lo, 0xaf123456
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x70,0xd7,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_cvt_sr_pk_f16_f32 v5, -1, -|exec_hi|, src_scc
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, -1, -|exec_hi|, src_scc ; encoding: [0x05,0x02,0x70,0xd7,0xc1,0xfe,0xf4,0x43]
+
+v_cvt_sr_pk_f16_f32 v5, 0.5, -m0, 0.5 mul:2
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x70,0xd7,0xf0,0xfa,0xc0,0x4b]
+
+v_cvt_sr_pk_f16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x70,0xd7,0xfd,0xd4,0x04,0x33]
+
+v_cvt_sr_pk_f16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
+// GFX1250: v_cvt_sr_pk_f16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x70,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]
+
 v_cvt_sr_bf8_f16 v1, v2, v3
 // GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3             ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
 
 
@@ -512,6 +512,51 @@ v_cvt_pk_f16_f32 v5, src_scc, vcc_lo mul:4
 v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2
 // GFX1250: v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6f,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf]
 
+v_cvt_sr_pk_f16_f32 v5, v1, v2, s3
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, v1, v2, s3      ; encoding: [0x05,0x00,0x70,0xd7,0x01,0x05,0x0e,0x00]
+
+v_cvt_sr_pk_f16_f32 v5, v255, s2, s105
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, v255, s2, s105  ; encoding: [0x05,0x00,0x70,0xd7,0xff,0x05,0xa4,0x01]
+
+v_cvt_sr_pk_f16_f32 v5, s1, v255, exec_hi
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x70,0xd7,0x01,0xfe,0xff,0x01]
+
+v_cvt_sr_pk_f16_f32 v5, s105, s105, exec_lo
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x70,0xd7,0x69,0xd2,0xf8,0x01]
+
+v_cvt_sr_pk_f16_f32 v5, vcc_lo, ttmp15, v3
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x70,0xd7,0x6a,0xf6,0x0c,0x04]
+
+v_cvt_sr_pk_f16_f32 v5, vcc_hi, 0xaf123456, v255
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x70,0xd7,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
+
+v_cvt_sr_pk_f16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15 ; encoding: [0x05,0x03,0x70,0xd7,0x7b,0xfa,0xed,0x61]
+
+v_cvt_sr_pk_f16_f32 v5, m0, 0.5, m0
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, m0, 0.5, m0     ; encoding: [0x05,0x00,0x70,0xd7,0x7d,0xe0,0xf5,0x01]
+
+v_cvt_sr_pk_f16_f32 v5, |exec_lo|, -1, vcc_hi
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x70,0xd7,0x7e,0x82,0xad,0x01]
+
+v_cvt_sr_pk_f16_f32 v5, -|exec_hi|, null, vcc_lo
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, -|exec_hi|, null, vcc_lo ; encoding: [0x05,0x01,0x70,0xd7,0x7f,0xf8,0xa8,0x21]
+
+v_cvt_sr_pk_f16_f32 v5, null, exec_lo, 0xaf123456
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x70,0xd7,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_cvt_sr_pk_f16_f32 v5, -1, -|exec_hi|, src_scc
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, -1, -|exec_hi|, src_scc ; encoding: [0x05,0x02,0x70,0xd7,0xc1,0xfe,0xf4,0x43]
+
+v_cvt_sr_pk_f16_f32 v5, 0.5, -m0, 0.5 mul:2
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x70,0xd7,0xf0,0xfa,0xc0,0x4b]
+
+v_cvt_sr_pk_f16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
+// GFX1250: v_cvt_sr_pk_f16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x70,0xd7,0xfd,0xd4,0x04,0x33]
+
+v_cvt_sr_pk_f16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
+// GFX1250: v_cvt_sr_pk_f16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x70,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]
+
 v_cvt_sr_bf8_f16 v1, v2.l, v3
 // GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3           ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00]
 
 
@@ -402,6 +402,54 @@ v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0
 // GFX1250: v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x6f,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
+// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3]
+// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_mirror
+// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v255 row_half_mirror
+// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, s105 row_shl:1
+// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15
+// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1
+// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1
+// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x70,0xd7,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x70,0xd7,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, v2, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, v2, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x70,0xd7,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x70,0xd7,0xfa,0x04,0x16,0x52,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_sr_pk_f16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x70,0xd7,0xfa,0xfe,0xf7,0x7b,0xff,0x6f,0x05,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_sr_bf8_f16 v1, v2, v3 quad_perm:[0,1,2,3] fi:1
 // GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU