Skip to content

Commit 4223c9c

Browse files
committed
tcg/s390x: Implement TCG_TARGET_HAS_sat_vec
The unsigned saturations are handled via generic code using min/max. The signed saturations are expanded using double-sized arithmetic and a saturating pack. Since all operations are done via expansion, do not actually set TCG_TARGET_HAS_sat_vec. Signed-off-by: Richard Henderson <[email protected]>
1 parent 220db7a commit 4223c9c

File tree

2 files changed

+66
-0
lines changed

2 files changed

+66
-0
lines changed

tcg/s390x/tcg-target.c.inc

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,10 @@ typedef enum S390Opcode {
291291
VRRc_VNO = 0xe76b,
292292
VRRc_VO = 0xe76a,
293293
VRRc_VOC = 0xe76f,
294+
VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */
294295
VRRc_VS = 0xe7f7,
296+
VRRa_VUPH = 0xe7d7,
297+
VRRa_VUPL = 0xe7d6,
295298
VRRc_VX = 0xe76d,
296299
VRRf_VLVGP = 0xe762,
297300

@@ -2800,6 +2803,16 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
28002803
}
28012804
break;
28022805

2806+
case INDEX_op_s390_vuph_vec:
2807+
tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2808+
break;
2809+
case INDEX_op_s390_vupl_vec:
2810+
tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2811+
break;
2812+
case INDEX_op_s390_vpks_vec:
2813+
tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2814+
break;
2815+
28032816
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
28042817
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
28052818
default:
@@ -2842,6 +2855,9 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
28422855
return -1;
28432856
case INDEX_op_mul_vec:
28442857
return vece < MO_64;
2858+
case INDEX_op_ssadd_vec:
2859+
case INDEX_op_sssub_vec:
2860+
return vece < MO_64 ? -1 : 0;
28452861
default:
28462862
return 0;
28472863
}
@@ -2897,6 +2913,43 @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
28972913
}
28982914
}
28992915

2916+
static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
2917+
TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
2918+
{
2919+
TCGv_vec h1 = tcg_temp_new_vec(type);
2920+
TCGv_vec h2 = tcg_temp_new_vec(type);
2921+
TCGv_vec l1 = tcg_temp_new_vec(type);
2922+
TCGv_vec l2 = tcg_temp_new_vec(type);
2923+
2924+
tcg_debug_assert (vece < MO_64);
2925+
2926+
/* Unpack with sign-extension. */
2927+
vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
2928+
tcgv_vec_arg(h1), tcgv_vec_arg(v1));
2929+
vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
2930+
tcgv_vec_arg(h2), tcgv_vec_arg(v2));
2931+
2932+
vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
2933+
tcgv_vec_arg(l1), tcgv_vec_arg(v1));
2934+
vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
2935+
tcgv_vec_arg(l2), tcgv_vec_arg(v2));
2936+
2937+
/* Arithmetic on a wider element size. */
2938+
vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
2939+
tcgv_vec_arg(h1), tcgv_vec_arg(h2));
2940+
vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
2941+
tcgv_vec_arg(l1), tcgv_vec_arg(l2));
2942+
2943+
/* Pack with saturation. */
2944+
vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
2945+
tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
2946+
2947+
tcg_temp_free_vec(h1);
2948+
tcg_temp_free_vec(h2);
2949+
tcg_temp_free_vec(l1);
2950+
tcg_temp_free_vec(l2);
2951+
}
2952+
29002953
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
29012954
TCGArg a0, ...)
29022955
{
@@ -2920,6 +2973,13 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
29202973
tcg_temp_free_vec(t0);
29212974
break;
29222975

2976+
case INDEX_op_ssadd_vec:
2977+
expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
2978+
break;
2979+
case INDEX_op_sssub_vec:
2980+
expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
2981+
break;
2982+
29232983
default:
29242984
g_assert_not_reached();
29252985
}
@@ -3080,6 +3140,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
30803140
case INDEX_op_sari_vec:
30813141
case INDEX_op_shli_vec:
30823142
case INDEX_op_shri_vec:
3143+
case INDEX_op_s390_vuph_vec:
3144+
case INDEX_op_s390_vupl_vec:
30833145
return C_O1_I1(v, v);
30843146
case INDEX_op_add_vec:
30853147
case INDEX_op_sub_vec:
@@ -3099,6 +3161,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
30993161
case INDEX_op_smin_vec:
31003162
case INDEX_op_umax_vec:
31013163
case INDEX_op_umin_vec:
3164+
case INDEX_op_s390_vpks_vec:
31023165
return C_O1_I2(v, v, v);
31033166
case INDEX_op_rotls_vec:
31043167
case INDEX_op_shls_vec:

tcg/s390x/tcg-target.opc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,6 @@
1010
* emitted by tcg_expand_vec_op. For those familiar with GCC internals,
1111
* consider these to be UNSPEC with names.
1212
*/
13+
DEF(s390_vuph_vec, 1, 1, 0, IMPLVEC)
14+
DEF(s390_vupl_vec, 1, 1, 0, IMPLVEC)
15+
DEF(s390_vpks_vec, 1, 2, 0, IMPLVEC)

0 commit comments

Comments
 (0)