diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 275d0193452a5..6a46c280f08bf 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1054,8 +1054,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, auto &FPTruncActions = getActionDefinitionsBuilder(G_FPTRUNC); if (ST.hasCvtPkF16F32Inst()) - FPTruncActions.legalFor( - {{S32, S64}, {S16, S32}, {V2S16, V2S32}, {V2S16, V2S64}}); + FPTruncActions.legalFor({{S32, S64}, {S16, S32}, {V2S16, V2S32}}); else FPTruncActions.legalFor({{S32, S64}, {S16, S32}}); FPTruncActions.scalarize(0).lower(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index bd95bcd89e183..1cf1825915a18 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -915,10 +915,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal); } - if (Subtarget->hasCvtPkF16F32Inst()) { - setOperationAction(ISD::FP_ROUND, MVT::v2f16, Legal); - } - setTargetDAGCombine({ISD::ADD, ISD::UADDO_CARRY, ISD::SUB, diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll index 188b2ada64686..6ab17c91c1439 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -654,7 +654,9 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3] ; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] -; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v0, v0, v2 +; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX950-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX950-SDAG-NEXT: s_endpgm ; @@ -666,11 +668,11 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 ; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX950-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v2, v[2:3] -; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] -; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v0, v0, v2 +; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] +; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7] +; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX950-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX950-GISEL-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.fpmath.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.fpmath.ll new file mode 100644 index 0000000000000..9e9b9629931b9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.fpmath.ll @@ -0,0 +1,213 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 < %s | FileCheck -check-prefixes=GFX950,GFX950-SAFE-SDAG %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 < %s | FileCheck -check-prefixes=GFX950,GFX950-SAFE-GISEL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX950,GFX950-UNSAFE %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX950,GFX950-UNSAFE %s + +define <2 x half> @v_test_cvt_v2f32_v2f16(<2 x float> %src) { +; GFX950-LABEL: v_test_cvt_v2f32_v2f16: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] + %res = fptrunc <2 x float> %src to <2 x half> + ret <2 x half> %res +} + +define <2 x half> @v_test_cvt_v2f64_v2f16(<2 x double> %src) { +; GFX950-SAFE-SDAG-LABEL: v_test_cvt_v2f64_v2f16: +; GFX950-SAFE-SDAG: ; %bb.0: +; GFX950-SAFE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SAFE-SDAG-NEXT: s_movk_i32 s0, 0x1ff +; GFX950-SAFE-SDAG-NEXT: v_and_or_b32 v0, v1, s0, v0 +; GFX950-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v4, 8, v1 +; GFX950-SAFE-SDAG-NEXT: s_movk_i32 s1, 0xffe +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX950-SAFE-SDAG-NEXT: v_bfe_u32 v5, v1, 20, 11 +; GFX950-SAFE-SDAG-NEXT: v_and_or_b32 v0, v4, s1, v0 +; GFX950-SAFE-SDAG-NEXT: v_sub_u32_e32 v6, 0x3f1, v5 +; GFX950-SAFE-SDAG-NEXT: v_or_b32_e32 v4, 0x1000, v0 +; GFX950-SAFE-SDAG-NEXT: v_med3_i32 v6, v6, 0, 13 +; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v7, v6, v4 +; GFX950-SAFE-SDAG-NEXT: v_lshlrev_b32_e32 v6, v6, v7 +; GFX950-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, v6, v4 +; GFX950-SAFE-SDAG-NEXT: v_add_u32_e32 v5, 0xfffffc10, v5 +; GFX950-SAFE-SDAG-NEXT: v_lshl_or_b32 v6, v5, 12, v0 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX950-SAFE-SDAG-NEXT: v_or_b32_e32 v4, v7, v4 +; GFX950-SAFE-SDAG-NEXT: v_cmp_gt_i32_e32 vcc, 1, v5 +; GFX950-SAFE-SDAG-NEXT: s_movk_i32 s2, 0x40f +; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc +; GFX950-SAFE-SDAG-NEXT: v_and_b32_e32 v6, 7, v4 +; GFX950-SAFE-SDAG-NEXT: v_cmp_lt_i32_e32 vcc, 5, v6 +; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v4, 2, v4 +; GFX950-SAFE-SDAG-NEXT: s_mov_b32 s3, 0x8000 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GFX950-SAFE-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 3, v6 +; GFX950-SAFE-SDAG-NEXT: s_nop 1 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GFX950-SAFE-SDAG-NEXT: v_or_b32_e32 v6, v6, v7 +; GFX950-SAFE-SDAG-NEXT: v_add_u32_e32 v4, v4, v6 +; GFX950-SAFE-SDAG-NEXT: v_mov_b32_e32 v6, 0x7c00 +; GFX950-SAFE-SDAG-NEXT: v_cmp_gt_i32_e32 vcc, 31, v5 +; GFX950-SAFE-SDAG-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX950-SAFE-SDAG-NEXT: s_nop 0 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc +; GFX950-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX950-SAFE-SDAG-NEXT: s_nop 1 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc +; GFX950-SAFE-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, s2, v5 +; GFX950-SAFE-SDAG-NEXT: s_nop 1 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; GFX950-SAFE-SDAG-NEXT: v_and_or_b32 v0, v1, s3, v0 +; GFX950-SAFE-SDAG-NEXT: v_and_or_b32 v1, v3, s0, v2 +; GFX950-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v2, 8, v3 +; GFX950-SAFE-SDAG-NEXT: v_bfe_u32 v4, v3, 20, 11 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX950-SAFE-SDAG-NEXT: v_and_or_b32 v1, v2, s1, v1 +; GFX950-SAFE-SDAG-NEXT: v_sub_u32_e32 v5, 0x3f1, v4 +; GFX950-SAFE-SDAG-NEXT: v_or_b32_e32 v2, 0x1000, v1 +; GFX950-SAFE-SDAG-NEXT: v_med3_i32 v5, v5, 0, 13 +; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v8, v5, v2 +; GFX950-SAFE-SDAG-NEXT: v_lshlrev_b32_e32 v5, v5, v8 +; GFX950-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, v5, v2 +; GFX950-SAFE-SDAG-NEXT: v_add_u32_e32 v4, 0xfffffc10, v4 +; GFX950-SAFE-SDAG-NEXT: v_lshl_or_b32 v5, v4, 12, v1 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX950-SAFE-SDAG-NEXT: v_or_b32_e32 v2, v8, v2 +; GFX950-SAFE-SDAG-NEXT: v_cmp_gt_i32_e32 vcc, 1, v4 +; GFX950-SAFE-SDAG-NEXT: s_mov_b32 s0, 0x5040100 +; GFX950-SAFE-SDAG-NEXT: s_nop 0 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc +; GFX950-SAFE-SDAG-NEXT: v_and_b32_e32 v5, 7, v2 +; GFX950-SAFE-SDAG-NEXT: v_cmp_lt_i32_e32 vcc, 5, v5 +; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v2, 2, v2 +; GFX950-SAFE-SDAG-NEXT: s_nop 0 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GFX950-SAFE-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 3, v5 +; GFX950-SAFE-SDAG-NEXT: s_nop 1 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX950-SAFE-SDAG-NEXT: v_or_b32_e32 v5, v5, v8 +; GFX950-SAFE-SDAG-NEXT: v_add_u32_e32 v2, v2, v5 +; GFX950-SAFE-SDAG-NEXT: v_cmp_gt_i32_e32 vcc, 31, v4 +; GFX950-SAFE-SDAG-NEXT: s_nop 1 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; GFX950-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GFX950-SAFE-SDAG-NEXT: s_nop 1 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc +; GFX950-SAFE-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, s2, v4 +; GFX950-SAFE-SDAG-NEXT: s_nop 1 +; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; GFX950-SAFE-SDAG-NEXT: v_and_or_b32 v1, v2, s3, v1 +; GFX950-SAFE-SDAG-NEXT: v_perm_b32 v0, v1, v0, s0 +; GFX950-SAFE-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SAFE-GISEL-LABEL: v_test_cvt_v2f64_v2f16: +; GFX950-SAFE-GISEL: ; %bb.0: +; GFX950-SAFE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SAFE-GISEL-NEXT: s_movk_i32 s0, 0x1ff +; GFX950-SAFE-GISEL-NEXT: v_and_or_b32 v0, v1, s0, v0 +; GFX950-SAFE-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v1 +; GFX950-SAFE-GISEL-NEXT: s_movk_i32 s1, 0xffe +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX950-SAFE-GISEL-NEXT: v_bfe_u32 v5, v1, 20, 11 +; GFX950-SAFE-GISEL-NEXT: v_and_or_b32 v0, v4, s1, v0 +; GFX950-SAFE-GISEL-NEXT: v_sub_u32_e32 v6, 0x3f1, v5 +; GFX950-SAFE-GISEL-NEXT: v_or_b32_e32 v4, 0x1000, v0 +; GFX950-SAFE-GISEL-NEXT: v_med3_i32 v6, v6, 0, 13 +; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v7, v6, v4 +; GFX950-SAFE-GISEL-NEXT: v_lshlrev_b32_e32 v6, v6, v7 +; GFX950-SAFE-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, v6, v4 +; GFX950-SAFE-GISEL-NEXT: v_add_u32_e32 v5, 0xfffffc10, v5 +; GFX950-SAFE-GISEL-NEXT: v_lshl_or_b32 v6, v5, 12, v0 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX950-SAFE-GISEL-NEXT: v_or_b32_e32 v4, v7, v4 +; GFX950-SAFE-GISEL-NEXT: v_cmp_gt_i32_e32 vcc, 1, v5 +; GFX950-SAFE-GISEL-NEXT: s_movk_i32 s2, 0x40f +; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc +; GFX950-SAFE-GISEL-NEXT: v_and_b32_e32 v6, 7, v4 +; GFX950-SAFE-GISEL-NEXT: v_cmp_lt_i32_e32 vcc, 5, v6 +; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v4 +; GFX950-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x8000 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GFX950-SAFE-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v6 +; GFX950-SAFE-GISEL-NEXT: s_nop 1 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GFX950-SAFE-GISEL-NEXT: v_or_b32_e32 v6, v6, v7 +; GFX950-SAFE-GISEL-NEXT: v_add_u32_e32 v4, v4, v6 +; GFX950-SAFE-GISEL-NEXT: v_mov_b32_e32 v6, 0x7c00 +; GFX950-SAFE-GISEL-NEXT: v_cmp_gt_i32_e32 vcc, 31, v5 +; GFX950-SAFE-GISEL-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX950-SAFE-GISEL-NEXT: s_nop 0 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc +; GFX950-SAFE-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX950-SAFE-GISEL-NEXT: s_nop 1 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc +; GFX950-SAFE-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s2, v5 +; GFX950-SAFE-GISEL-NEXT: s_nop 1 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; GFX950-SAFE-GISEL-NEXT: v_and_or_b32 v0, v1, s3, v0 +; GFX950-SAFE-GISEL-NEXT: v_and_or_b32 v1, v3, s0, v2 +; GFX950-SAFE-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v2, 8, v3 +; GFX950-SAFE-GISEL-NEXT: v_bfe_u32 v4, v3, 20, 11 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX950-SAFE-GISEL-NEXT: v_and_or_b32 v1, v2, s1, v1 +; GFX950-SAFE-GISEL-NEXT: v_sub_u32_e32 v5, 0x3f1, v4 +; GFX950-SAFE-GISEL-NEXT: v_or_b32_e32 v2, 0x1000, v1 +; GFX950-SAFE-GISEL-NEXT: v_med3_i32 v5, v5, 0, 13 +; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v8, v5, v2 +; GFX950-SAFE-GISEL-NEXT: v_lshlrev_b32_e32 v5, v5, v8 +; GFX950-SAFE-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, v5, v2 +; GFX950-SAFE-GISEL-NEXT: v_add_u32_e32 v4, 0xfffffc10, v4 +; GFX950-SAFE-GISEL-NEXT: v_lshl_or_b32 v5, v4, 12, v1 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX950-SAFE-GISEL-NEXT: v_or_b32_e32 v2, v8, v2 +; GFX950-SAFE-GISEL-NEXT: v_cmp_gt_i32_e32 vcc, 1, v4 +; GFX950-SAFE-GISEL-NEXT: s_mov_b32 s0, 0x5040100 +; GFX950-SAFE-GISEL-NEXT: s_nop 0 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc +; GFX950-SAFE-GISEL-NEXT: v_and_b32_e32 v5, 7, v2 +; GFX950-SAFE-GISEL-NEXT: v_cmp_lt_i32_e32 vcc, 5, v5 +; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v2, 2, v2 +; GFX950-SAFE-GISEL-NEXT: s_nop 0 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GFX950-SAFE-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v5 +; GFX950-SAFE-GISEL-NEXT: s_nop 1 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX950-SAFE-GISEL-NEXT: v_or_b32_e32 v5, v5, v8 +; GFX950-SAFE-GISEL-NEXT: v_add_u32_e32 v2, v2, v5 +; GFX950-SAFE-GISEL-NEXT: v_cmp_gt_i32_e32 vcc, 31, v4 +; GFX950-SAFE-GISEL-NEXT: s_nop 1 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; GFX950-SAFE-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GFX950-SAFE-GISEL-NEXT: s_nop 1 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc +; GFX950-SAFE-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s2, v4 +; GFX950-SAFE-GISEL-NEXT: s_nop 1 +; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; GFX950-SAFE-GISEL-NEXT: v_and_or_b32 v1, v2, s3, v1 +; GFX950-SAFE-GISEL-NEXT: v_perm_b32 v0, v1, v0, s0 +; GFX950-SAFE-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-UNSAFE-LABEL: v_test_cvt_v2f64_v2f16: +; GFX950-UNSAFE: ; %bb.0: +; GFX950-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-UNSAFE-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] +; GFX950-UNSAFE-NEXT: v_cvt_f32_f64_e32 v1, v[2:3] +; GFX950-UNSAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX950-UNSAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX950-UNSAFE-NEXT: s_mov_b32 s0, 0x5040100 +; GFX950-UNSAFE-NEXT: v_perm_b32 v0, v1, v0, s0 +; GFX950-UNSAFE-NEXT: s_setpc_b64 s[30:31] + %res = fptrunc <2 x double> %src to <2 x half> + ret <2 x half> %res +} +