diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6805e0cb23ace..58ab99e0dcdee 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -27526,23 +27526,27 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT)) return SDValue(); + if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode()) { + // All but one element should have an undef input, which will fold to a + // constant or undef. Avoid splatting which would over-define potentially + // undefined elements. + + // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) --> + // build_vec ..undef, (bo X, Y), undef... + SmallVector EltsX, EltsY, EltsResult; + DAG.ExtractVectorElements(Src0, EltsX); + DAG.ExtractVectorElements(Src1, EltsY); + + for (auto [X, Y] : zip(EltsX, EltsY)) + EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags())); + return DAG.getBuildVector(VT, DL, EltsResult); + } + SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC); SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC); SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()); - // If all lanes but 1 are undefined, no need to splat the scalar result. - // TODO: Keep track of undefs and use that info in the general case. - if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() && - count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 && - count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) { - // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) --> - // build_vec ..undef, (bo X, Y), undef... - SmallVector Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT)); - Ops[Index0] = ScalarBO; - return DAG.getBuildVector(VT, DL, Ops); - } - // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index return DAG.getSplat(VT, DL, ScalarBO); } diff --git a/llvm/test/CodeGen/AMDGPU/trunc-combine.ll b/llvm/test/CodeGen/AMDGPU/trunc-combine.ll index aa3e05fdbdb36..02e30b6c68e99 100644 --- a/llvm/test/CodeGen/AMDGPU/trunc-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/trunc-combine.ll @@ -156,3 +156,334 @@ define <2 x i16> @trunc_v2i64_arg_to_v2i16(<2 x i64> %arg0) #0 { %trunc = trunc <2 x i64> %arg0 to <2 x i16> ret <2 x i16> %trunc } + +; Test for regression where an unnecessary v_alignbit_b32 was inserted +; on the final result, due to losing the fact that the upper half of +; the lhs vector was undef. +define <2 x i16> @vector_trunc_high_bits_undef_lshr_lhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_lshr_lhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_lshr_lhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %lshr = lshr <2 x i32> %undef.hi.elt, splat (i32 16) + %trunc = trunc <2 x i32> %lshr to <2 x i16> + ret <2 x i16> %trunc +} + +define <2 x i16> @vector_trunc_high_bits_undef_lshr_rhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_lshr_rhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_lshr_b32_e32 v0, 16, v0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_lshr_rhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_lshrrev_b32_e64 v0, v0, 16 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %lshr = lshr <2 x i32> splat (i32 16), %undef.hi.elt + %trunc = trunc <2 x i32> %lshr to <2 x i16> + ret <2 x i16> %trunc +} + +define <2 x i16> @vector_trunc_high_bits_undef_ashr_lhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_ashr_lhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_ashr_lhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %ashr = ashr <2 x i32> %undef.hi.elt, splat (i32 16) + %trunc = trunc <2 x i32> %ashr to <2 x i16> + ret <2 x i16> %trunc +} + +define <2 x i16> @vector_trunc_high_bits_undef_ashr_rhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_ashr_rhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_ashr_i32_e32 v0, -4, v0 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_ashr_rhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_ashrrev_i32_e64 v0, v0, -4 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %lshr = ashr <2 x i32> splat (i32 -4), %undef.hi.elt + %trunc = trunc <2 x i32> %lshr to <2 x i16> + ret <2 x i16> %trunc +} + +define <2 x i16> @vector_trunc_high_bits_undef_add_lhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_add_lhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_add_i32_e32 v0, vcc, 16, v0 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_add_lhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %lshr = add <2 x i32> %undef.hi.elt, splat (i32 16) + %trunc = trunc <2 x i32> %lshr to <2 x i16> + ret <2 x i16> %trunc +} + +define <2 x i16> @vector_trunc_high_bits_undef_shl_rhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_shl_rhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_lshl_b32_e32 v0, 2, v0 +; SI-NEXT: v_and_b32_e32 v0, 0xfffe, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_shl_rhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_lshlrev_b32_e64 v0, v0, 2 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %lshr = shl <2 x i32> splat (i32 2), %undef.hi.elt + %trunc = trunc <2 x i32> %lshr to <2 x i16> + ret <2 x i16> %trunc +} + +define <2 x i16> @vector_trunc_high_bits_undef_sub_lhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_sub_lhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_add_i32_e32 v0, vcc, -16, v0 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_sub_lhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_add_u32_e32 v0, vcc, -16, v0 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %lshr = sub <2 x i32> %undef.hi.elt, splat (i32 16) + %trunc = trunc <2 x i32> %lshr to <2 x i16> + ret <2 x i16> %trunc +} + +define <2 x i16> @vector_trunc_high_bits_undef_or_lhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_or_lhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_or_b32_e32 v0, 0xffff0011, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0xffff +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_or_lhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_or_b32_e32 v0, 0xffff0011, v0 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %lshr = or <2 x i32> %undef.hi.elt, splat (i32 17) + %trunc = trunc <2 x i32> %lshr to <2 x i16> + ret <2 x i16> %trunc +} + +define <2 x i16> @vector_trunc_high_bits_undef_xor_lhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_xor_lhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_xor_b32_e32 v0, 17, v0 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_xor_lhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_xor_b32_e32 v0, 17, v0 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %lshr = xor <2 x i32> %undef.hi.elt, splat (i32 17) + %trunc = trunc <2 x i32> %lshr to <2 x i16> + ret <2 x i16> %trunc +} + +define <2 x i16> @vector_trunc_high_bits_undef_shl_lhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_shl_lhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; SI-NEXT: v_and_b32_e32 v0, 0xfffc, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_shl_lhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_lshlrev_b16_e32 v0, 2, v0 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %shl = shl <2 x i32> %undef.hi.elt, splat (i32 2) + %trunc = trunc <2 x i32> %shl to <2 x i16> + ret <2 x i16> %trunc +} + +define <2 x i16> @vector_trunc_high_bits_undef_mul_lhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_mul_lhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_mul_lo_u32 v0, v0, 18 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: v_and_b32_e32 v0, 0xfffe, v0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_mul_lhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_mul_lo_u32 v0, v0, 18 +; VI-NEXT: v_and_b32_e32 v0, 0xfffe, v0 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %lshr = mul <2 x i32> %undef.hi.elt, splat (i32 18) + %trunc = trunc <2 x i32> %lshr to <2 x i16> + ret <2 x i16> %trunc +} + +define <2 x i16> @vector_trunc_high_bits_undef_sdiv_lhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_sdiv_lhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, 0x38e38e39 +; SI-NEXT: v_mul_hi_i32 v0, v0, s4 +; SI-NEXT: v_lshrrev_b32_e32 v1, 31, v0 +; SI-NEXT: v_lshrrev_b32_e32 v0, 2, v0 +; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_sdiv_lhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: s_mov_b32 s4, 0x38e38e39 +; VI-NEXT: v_mul_hi_i32 v0, v0, s4 +; VI-NEXT: v_lshrrev_b32_e32 v1, 31, v0 +; VI-NEXT: v_ashrrev_i32_e32 v0, 2, v0 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %lshr = sdiv <2 x i32> %undef.hi.elt, splat (i32 18) + %trunc = trunc <2 x i32> %lshr to <2 x i16> + ret <2 x i16> %trunc +} + +define <2 x i16> @vector_trunc_high_bits_undef_srem_lhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_srem_lhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, 0x38e38e39 +; SI-NEXT: v_mul_hi_i32 v1, v0, s4 +; SI-NEXT: v_lshrrev_b32_e32 v2, 31, v1 +; SI-NEXT: v_lshrrev_b32_e32 v1, 2, v1 +; SI-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; SI-NEXT: v_mul_lo_u32 v1, v1, 18 +; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_srem_lhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: s_mov_b32 s4, 0x38e38e39 +; VI-NEXT: v_mul_hi_i32 v1, v0, s4 +; VI-NEXT: v_lshrrev_b32_e32 v2, 31, v1 +; VI-NEXT: v_ashrrev_i32_e32 v1, 2, v1 +; VI-NEXT: v_add_u32_e32 v1, vcc, v1, v2 +; VI-NEXT: v_mul_lo_u32 v1, v1, 18 +; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %lshr = srem <2 x i32> %undef.hi.elt, splat (i32 18) + %trunc = trunc <2 x i32> %lshr to <2 x i16> + ret <2 x i16> %trunc +} + + +define <2 x i16> @vector_trunc_high_bits_undef_udiv_lhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_udiv_lhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, 0x38e38e39 +; SI-NEXT: v_mul_hi_u32 v0, v0, s4 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: v_bfe_u32 v0, v0, 2, 16 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_udiv_lhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: s_mov_b32 s4, 0x38e38e39 +; VI-NEXT: v_mul_hi_u32 v0, v0, s4 +; VI-NEXT: v_lshrrev_b32_e32 v0, 2, v0 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %lshr = udiv <2 x i32> %undef.hi.elt, splat (i32 18) + %trunc = trunc <2 x i32> %lshr to <2 x i16> + ret <2 x i16> %trunc +} + +define <2 x i16> @vector_trunc_high_bits_undef_urem_lhs_alignbit_regression(i32 %arg0) { +; SI-LABEL: vector_trunc_high_bits_undef_urem_lhs_alignbit_regression: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, 0x38e38e39 +; SI-NEXT: v_mul_hi_u32 v1, v0, s4 +; SI-NEXT: v_lshrrev_b32_e32 v1, 2, v1 +; SI-NEXT: v_mul_lo_u32 v1, v1, 18 +; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: vector_trunc_high_bits_undef_urem_lhs_alignbit_regression: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: s_mov_b32 s4, 0x38e38e39 +; VI-NEXT: v_mul_hi_u32 v1, v0, s4 +; VI-NEXT: v_lshrrev_b32_e32 v1, 2, v1 +; VI-NEXT: v_mul_lo_u32 v1, v1, 18 +; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: s_setpc_b64 s[30:31] + %undef.hi.elt = insertelement <2 x i32> poison, i32 %arg0, i32 0 + %lshr = urem <2 x i32> %undef.hi.elt, splat (i32 18) + %trunc = trunc <2 x i32> %lshr to <2 x i16> + ret <2 x i16> %trunc +}