Skip to content

Commit 99dcd82

Browse files
committed
RISC-V: Add intrinsics support for SiFive Xsfvqmaccqoq/dod extensions.
1 parent cf261dd commit 99dcd82

16 files changed

+581
-18
lines changed

gcc/config/riscv/generic-vector-ooo.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969

7070
;; Vector float multiplication and FMA.
7171
(define_insn_reservation "vec_fmul" 6
72-
(eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16")
72+
(eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16,vqmacc")
7373
"vxu_ooo_issue,vxu_ooo_alu")
7474

7575
;; Vector crypto, assumed to be a generic operation for now.

gcc/config/riscv/genrvv-type-indexer.cc

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,12 @@ main (int argc, const char **argv)
255255
fprintf (fp, " /*SHIFT*/ INVALID,\n");
256256
fprintf (fp, " /*DOUBLE_TRUNC*/ INVALID,\n");
257257
fprintf (fp, " /*QUAD_TRUNC*/ INVALID,\n");
258+
fprintf (fp, " /*QUAD_EMUL*/ INVALID,\n");
259+
fprintf (fp, " /*QUAD_EMUL_SIGNED*/ INVALID,\n");
260+
fprintf (fp, " /*QUAD_EMUL_UNSIGNED*/ INVALID,\n");
261+
fprintf (fp, " /*QUAD_FIX*/ INVALID,\n");
262+
fprintf (fp, " /*QUAD_FIX_SIGNED*/ INVALID,\n");
263+
fprintf (fp, " /*QUAD_FIX_UNSIGNED*/ INVALID,\n");
258264
fprintf (fp, " /*OCT_TRUNC*/ INVALID,\n");
259265
fprintf (fp, " /*DOUBLE_TRUNC_SCALAR*/ INVALID,\n");
260266
fprintf (fp, " /*DOUBLE_TRUNC_SIGNED*/ INVALID,\n");
@@ -266,6 +272,9 @@ main (int argc, const char **argv)
266272
fprintf (fp, " /*FLOAT*/ INVALID,\n");
267273
fprintf (fp, " /*LMUL1*/ INVALID,\n");
268274
fprintf (fp, " /*WLMUL1*/ INVALID,\n");
275+
fprintf (fp, " /*QLMUL1*/ INVALID,\n");
276+
fprintf (fp, " /*QLMUL1_SIGNED*/ INVALID,\n");
277+
fprintf (fp, " /*QLMUL1_UNSIGNED*/ INVALID,\n");
269278
for (unsigned eew : {8, 16, 32, 64})
270279
fprintf (fp, " /*EEW%d_INTERPRET*/ INVALID,\n", eew);
271280

@@ -322,6 +331,18 @@ main (int argc, const char **argv)
322331
same_ratio_eew_type (sew, lmul_log2, sew / 4, unsigned_p,
323332
false)
324333
.c_str ());
334+
fprintf (fp, " /*QUAD_EMUL*/ %s,\n",
335+
inttype (8, lmul_log2 - 1, unsigned_p).c_str ());
336+
fprintf (fp, " /*QUAD_EMUL_SIGNED*/ %s,\n",
337+
inttype (8, lmul_log2 - 1, false).c_str ());
338+
fprintf (fp, " /*QUAD_EMUL_UNSIGNED*/ %s,\n",
339+
inttype (8, lmul_log2 - 1, true).c_str ());
340+
fprintf (fp, " /*QUAD_FIX*/ %s,\n",
341+
inttype (8, lmul_log2, unsigned_p).c_str ());
342+
fprintf (fp, " /*QUAD_FIX_SIGNED*/ %s,\n",
343+
inttype (8, lmul_log2, false).c_str ());
344+
fprintf (fp, " /*QUAD_FIX_UNSIGNED*/ %s,\n",
345+
inttype (8, lmul_log2, true).c_str ());
325346
fprintf (fp, " /*OCT_TRUNC*/ %s,\n",
326347
same_ratio_eew_type (sew, lmul_log2, sew / 8, unsigned_p,
327348
false)
@@ -352,6 +373,12 @@ main (int argc, const char **argv)
352373
inttype (sew, /*lmul_log2*/ 0, unsigned_p).c_str ());
353374
fprintf (fp, " /*WLMUL1*/ %s,\n",
354375
inttype (sew * 2, /*lmul_log2*/ 0, unsigned_p).c_str ());
376+
fprintf (fp, " /*QLMUL1*/ %s,\n",
377+
inttype (8, /*lmul_log2*/ 0, unsigned_p).c_str ());
378+
fprintf (fp, " /*QLMUL1_SIGNED*/ %s,\n",
379+
inttype (8, /*lmul_log2*/ 0, false).c_str ());
380+
fprintf (fp, " /*QLMUL1_UNSIGNED*/ %s,\n",
381+
inttype (8, /*lmul_log2*/ 0, true).c_str ());
355382
for (unsigned eew : {8, 16, 32, 64})
356383
{
357384
if (eew == sew)
@@ -413,6 +440,12 @@ main (int argc, const char **argv)
413440
fprintf (fp, " /*DOUBLE_TRUNC*/ %s,\n",
414441
same_ratio_eew_type (16, lmul_log2, 8, false, true).c_str ());
415442
fprintf (fp, " /*QUAD_TRUNC*/ INVALID,\n");
443+
fprintf (fp, " /*QUAD_EMUL*/ INVALID,\n");
444+
fprintf (fp, " /*QUAD_EMUL_SIGNED*/ INVALID,\n");
445+
fprintf (fp, " /*QUAD_EMUL_UNSIGNED*/ INVALID,\n");
446+
fprintf (fp, " /*QUAD_FIX*/ INVALID,\n");
447+
fprintf (fp, " /*QUAD_FIX_SIGNED*/ INVALID,\n");
448+
fprintf (fp, " /*QUAD_FIX_UNSIGNED*/ INVALID,\n");
416449
fprintf (fp, " /*OCT_TRUNC*/ INVALID,\n");
417450
fprintf (fp, " /*DOUBLE_TRUNC_SCALAR*/ %s,\n",
418451
same_ratio_eew_type (16, lmul_log2, 8, false, true).c_str ());
@@ -430,6 +463,10 @@ main (int argc, const char **argv)
430463
bfloat16_type (/*lmul_log2*/ 0).c_str ());
431464
fprintf (fp, " /*WLMUL1*/ %s,\n",
432465
bfloat16_wide_type (/*lmul_log2*/ 0).c_str ());
466+
fprintf (fp, " /*QLMUL1*/ %s,\n",
467+
bfloat16_wide_type (/*lmul_log2*/ 0).c_str ());
468+
fprintf (fp, " /*QLMUL1_SIGNED*/ INVALID,\n");
469+
fprintf (fp, " /*QLMUL1_UNSIGNED*/ INVALID,\n");
433470
for (unsigned eew : {8, 16, 32, 64})
434471
fprintf (fp, " /*EEW%d_INTERPRET*/ INVALID,\n", eew);
435472

@@ -478,6 +515,12 @@ main (int argc, const char **argv)
478515
same_ratio_eew_type (sew, lmul_log2, sew / 2, false, true)
479516
.c_str ());
480517
fprintf (fp, " /*QUAD_TRUNC*/ INVALID,\n");
518+
fprintf (fp, " /*QUAD_EMUL*/ INVALID,\n");
519+
fprintf (fp, " /*QUAD_EMUL_SIGNED*/ INVALID,\n");
520+
fprintf (fp, " /*QUAD_EMUL_UNSIGNED*/ INVALID,\n");
521+
fprintf (fp, " /*QUAD_FIX*/ INVALID,\n");
522+
fprintf (fp, " /*QUAD_FIX_SIGNED*/ INVALID,\n");
523+
fprintf (fp, " /*QUAD_FIX_UNSIGNED*/ INVALID,\n");
481524
fprintf (fp, " /*OCT_TRUNC*/ INVALID,\n");
482525
fprintf (fp, " /*DOUBLE_TRUNC_SCALAR*/ %s,\n",
483526
same_ratio_eew_type (sew, lmul_log2, sew / 2, false, true)
@@ -501,6 +544,10 @@ main (int argc, const char **argv)
501544
floattype (sew, /*lmul_log2*/ 0).c_str ());
502545
fprintf (fp, " /*WLMUL1*/ %s,\n",
503546
floattype (sew * 2, /*lmul_log2*/ 0).c_str ());
547+
fprintf (fp, " /*QLMUL1*/ %s,\n",
548+
floattype (sew / 4, /*lmul_log2*/ 0).c_str ());
549+
fprintf (fp, " /*QLMUL1_SIGNED*/ INVALID,\n");
550+
fprintf (fp, " /*QLMUL1_UNSIGNED*/ INVALID,\n");
504551
for (unsigned eew : {8, 16, 32, 64})
505552
fprintf (fp, " /*EEW%d_INTERPRET*/ INVALID,\n", eew);
506553

gcc/config/riscv/riscv-vector-builtins-bases.cc

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,105 @@ class vwmaccus : public function_base
869869
}
870870
};
871871

872+
/* Implements vqmacc. */
873+
class vqmacc : public function_base
874+
{
875+
public:
876+
bool has_merge_operand_p () const override { return false; }
877+
bool apply_mask_policy_p () const override { return false; }
878+
bool use_mask_predication_p () const override { return false; }
879+
bool can_be_overloaded_p (enum predication_type_index pred) const override
880+
{
881+
return pred == PRED_TYPE_tu;
882+
}
883+
884+
rtx expand (function_expander &e) const override
885+
{
886+
if (e.op_info->op == OP_TYPE_4x8x4)
887+
return e.use_widen_ternop_insn (
888+
code_for_pred_matrix_mul_plus_qoq (SIGN_EXTEND, e.vector_mode ()));
889+
if (e.op_info->op == OP_TYPE_2x8x2)
890+
return e.use_widen_ternop_insn (
891+
code_for_pred_matrix_mul_plus_dod (SIGN_EXTEND, e.vector_mode ()));
892+
gcc_unreachable ();
893+
}
894+
};
895+
896+
/* Implements vqmaccu. */
897+
class vqmaccu : public function_base
898+
{
899+
public:
900+
bool has_merge_operand_p () const override { return false; }
901+
bool apply_mask_policy_p () const override { return false; }
902+
bool use_mask_predication_p () const override { return false; }
903+
904+
bool can_be_overloaded_p (enum predication_type_index pred) const override
905+
{
906+
return pred == PRED_TYPE_tu;
907+
}
908+
909+
rtx expand (function_expander &e) const override
910+
{
911+
if (e.op_info->op == OP_TYPE_4x8x4)
912+
return e.use_widen_ternop_insn (
913+
code_for_pred_matrix_mul_plus_qoq (ZERO_EXTEND, e.vector_mode ()));
914+
if (e.op_info->op == OP_TYPE_2x8x2)
915+
return e.use_widen_ternop_insn (
916+
code_for_pred_matrix_mul_plus_dod (ZERO_EXTEND, e.vector_mode ()));
917+
gcc_unreachable ();
918+
}
919+
};
920+
921+
/* Implements vqmaccsu. */
922+
class vqmaccsu : public function_base
923+
{
924+
public:
925+
bool has_merge_operand_p () const override { return false; }
926+
bool apply_mask_policy_p () const override { return false; }
927+
bool use_mask_predication_p () const override { return false; }
928+
929+
bool can_be_overloaded_p (enum predication_type_index pred) const override
930+
{
931+
return pred == PRED_TYPE_tu;
932+
}
933+
934+
rtx expand (function_expander &e) const override
935+
{
936+
if (e.op_info->op == OP_TYPE_4x8x4)
937+
return e.use_widen_ternop_insn (
938+
code_for_pred_matrix_mul_plussu_qoq (e.vector_mode ()));
939+
if (e.op_info->op == OP_TYPE_2x8x2)
940+
return e.use_widen_ternop_insn (
941+
code_for_pred_matrix_mul_plussu_dod (e.vector_mode ()));
942+
gcc_unreachable ();
943+
}
944+
};
945+
946+
/* Implements vqmaccus. */
947+
class vqmaccus : public function_base
948+
{
949+
public:
950+
bool has_merge_operand_p () const override { return false; }
951+
bool apply_mask_policy_p () const override { return false; }
952+
bool use_mask_predication_p () const override { return false; }
953+
954+
bool can_be_overloaded_p (enum predication_type_index pred) const override
955+
{
956+
return pred == PRED_TYPE_tu;
957+
}
958+
959+
rtx expand (function_expander &e) const override
960+
{
961+
if (e.op_info->op == OP_TYPE_4x8x4)
962+
return e.use_widen_ternop_insn (
963+
code_for_pred_matrix_mul_plusus_qoq (e.vector_mode ()));
964+
if (e.op_info->op == OP_TYPE_2x8x2)
965+
return e.use_widen_ternop_insn (
966+
code_for_pred_matrix_mul_plusus_dod (e.vector_mode ()));
967+
gcc_unreachable ();
968+
}
969+
};
970+
872971
/* Implements vmand/vmnand/vmandn/vmxor/vmor/vmnor/vmorn/vmxnor */
873972
template<rtx_code CODE>
874973
class mask_logic : public function_base
@@ -2560,6 +2659,10 @@ static CONSTEXPR const vwmacc vwmacc_obj;
25602659
static CONSTEXPR const vwmaccu vwmaccu_obj;
25612660
static CONSTEXPR const vwmaccsu vwmaccsu_obj;
25622661
static CONSTEXPR const vwmaccus vwmaccus_obj;
2662+
static CONSTEXPR const vqmacc sf_vqmacc_obj;
2663+
static CONSTEXPR const vqmaccu sf_vqmaccu_obj;
2664+
static CONSTEXPR const vqmaccsu sf_vqmaccsu_obj;
2665+
static CONSTEXPR const vqmaccsu sf_vqmaccus_obj;
25632666
static CONSTEXPR const binop<SS_PLUS> vsadd_obj;
25642667
static CONSTEXPR const binop<SS_MINUS> vssub_obj;
25652668
static CONSTEXPR const binop<US_PLUS> vsaddu_obj;
@@ -2890,6 +2993,10 @@ BASE (vwmacc)
28902993
BASE (vwmaccu)
28912994
BASE (vwmaccsu)
28922995
BASE (vwmaccus)
2996+
BASE (sf_vqmacc)
2997+
BASE (sf_vqmaccu)
2998+
BASE (sf_vqmaccsu)
2999+
BASE (sf_vqmaccus)
28933000
BASE (vsadd)
28943001
BASE (vssub)
28953002
BASE (vsaddu)

gcc/config/riscv/riscv-vector-builtins-bases.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ extern const function_base *const vwmacc;
109109
extern const function_base *const vwmaccu;
110110
extern const function_base *const vwmaccsu;
111111
extern const function_base *const vwmaccus;
112+
extern const function_base *const sf_vqmacc;
113+
extern const function_base *const sf_vqmaccu;
114+
extern const function_base *const sf_vqmaccsu;
115+
extern const function_base *const sf_vqmaccus;
112116
extern const function_base *const vsadd;
113117
extern const function_base *const vssub;
114118
extern const function_base *const vsaddu;

gcc/config/riscv/riscv-vector-builtins-shapes.cc

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,6 +1287,35 @@ struct crypto_vv_no_op_type_def : public build_base
12871287
}
12881288
};
12891289

1290+
/* sf_vqmacc_def class. */
1291+
struct sf_vqmacc_def : public build_base
1292+
{
1293+
char *get_name (function_builder &b, const function_instance &instance,
1294+
bool overloaded_p) const override
1295+
{
1296+
b.append_base_name (instance.base_name);
1297+
1298+
/* vop --> vop_v. */
1299+
b.append_name (operand_suffixes[instance.op_info->op]);
1300+
1301+
/* Return nullptr if it can not be overloaded. */
1302+
if (overloaded_p && !instance.base->can_be_overloaded_p (instance.pred))
1303+
return b.finish_name ();
1304+
1305+
if (!overloaded_p)
1306+
{
1307+
/* vop_v --> vop_v_<type>. */
1308+
b.append_name (type_suffixes[instance.type.index].vector);
1309+
}
1310+
1311+
/* According to SIFIVE vector-intrinsic-doc, it adds "_tu" suffix
1312+
for vop_m C++ overloaded API.*/
1313+
b.append_name (predication_suffixes[instance.pred]);
1314+
1315+
return b.finish_name ();
1316+
}
1317+
};
1318+
12901319
SHAPE(vsetvl, vsetvl)
12911320
SHAPE(vsetvl, vsetvlmax)
12921321
SHAPE(loadstore, loadstore)
@@ -1321,4 +1350,5 @@ SHAPE(seg_fault_load, seg_fault_load)
13211350
SHAPE(crypto_vv, crypto_vv)
13221351
SHAPE(crypto_vi, crypto_vi)
13231352
SHAPE(crypto_vv_no_op_type, crypto_vv_no_op_type)
1353+
SHAPE (sf_vqmacc, sf_vqmacc)
13241354
} // end namespace riscv_vector

gcc/config/riscv/riscv-vector-builtins-shapes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ extern const function_shape *const seg_fault_load;
5959
extern const function_shape *const crypto_vv;
6060
extern const function_shape *const crypto_vi;
6161
extern const function_shape *const crypto_vv_no_op_type;
62+
/* Sifive vendor extension. */
63+
extern const function_shape *const sf_vqmacc;
6264
}
6365

6466
} // end namespace riscv_vector

gcc/config/riscv/riscv-vector-builtins-types.def

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,12 @@ along with GCC; see the file COPYING3. If not see
357357
#define DEF_RVV_CRYPTO_SEW64_OPS(TYPE, REQUIRE)
358358
#endif
359359

360+
/* Use "DEF_RVV_QMACC_OPS" macro include signed integer which will
361+
be iterated and registered as intrinsic functions. */
362+
#ifndef DEF_RVV_QMACC_OPS
363+
#define DEF_RVV_QMACC_OPS(TYPE, REQUIRE)
364+
#endif
365+
360366
DEF_RVV_I_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
361367
DEF_RVV_I_OPS (vint8mf4_t, 0)
362368
DEF_RVV_I_OPS (vint8mf2_t, 0)
@@ -1440,6 +1446,11 @@ DEF_RVV_CRYPTO_SEW64_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
14401446
DEF_RVV_CRYPTO_SEW64_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64)
14411447
DEF_RVV_CRYPTO_SEW64_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64)
14421448

1449+
DEF_RVV_QMACC_OPS (vint32m1_t, 0)
1450+
DEF_RVV_QMACC_OPS (vint32m2_t, 0)
1451+
DEF_RVV_QMACC_OPS (vint32m4_t, 0)
1452+
DEF_RVV_QMACC_OPS (vint32m8_t, 0)
1453+
14431454
#undef DEF_RVV_I_OPS
14441455
#undef DEF_RVV_U_OPS
14451456
#undef DEF_RVV_F_OPS
@@ -1494,3 +1505,4 @@ DEF_RVV_CRYPTO_SEW64_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64)
14941505
#undef DEF_RVV_CRYPTO_SEW32_OPS
14951506
#undef DEF_RVV_CRYPTO_SEW64_OPS
14961507
#undef DEF_RVV_F32_OPS
1508+
#undef DEF_RVV_QMACC_OPS

0 commit comments

Comments
 (0)