Skip to content

Commit 0934f19

Browse files
committed
Implement vector configuration intructions
Add support for vset{i}vl{i} instructions following the RISC-V vector extension version 1.0. Simplify avlmax calculation by directly computing avlmax = lmul * vlen / sew instead of converting to floating-point as described in the specification.
1 parent 1737e76 commit 0934f19

File tree

1 file changed

+143
-3
lines changed

1 file changed

+143
-3
lines changed

src/rv32_template.c

Lines changed: 143 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2995,24 +2995,164 @@ RVOP(
29952995
(rv)->V[rv_reg_zero][i] = 0; \
29962996
}
29972997

2998+
#define VREG_U32_COUNT ((VLEN) >> (5))
2999+
/*
3000+
* Vector Configuration-Setting Instructions
3001+
*
3002+
* These instructions set the vector CSRs, specifically csr_vl and csr_vtype.
3003+
* The CSRs can only be updated using vset{i}vl{i} instructions. The current
3004+
* implementation does not support vma and vta.
3005+
*
3006+
* The value VLMAX = (LMUL * VLEN) / SEW represents the maximum number of
3007+
* elements that can be processed by a single vector instruction given the
3008+
* current SEW and LMUL.
3009+
*
3010+
* Constraints on Setting vl:
3011+
* - vl = AVL if AVL ≤ VLMAX
3012+
* - ceil(AVL / 2) ≤ vl ≤ VLMAX if AVL < 2 * VLMAX
3013+
* - vl = VLMAX if AVL ≥ 2 * VLMAX
3014+
*
3015+
* +------------+------+--------------+
3016+
* | vlmul[2:0] | LMUL | VLMAX |
3017+
* +------------+------+--------------+
3018+
* | 1 0 0 | - | - |
3019+
* | 1 0 1 | 1/8 | VLEN/SEW/8 |
3020+
* | 1 1 0 | 1/4 | VLEN/SEW/4 |
3021+
* | 1 1 1 | 1/2 | VLEN/SEW/2 |
3022+
* | 0 0 0 | 1 | VLEN/SEW |
3023+
* | 0 0 1 | 2 | 2*VLEN/SEW |
3024+
* | 0 1 0 | 4 | 4*VLEN/SEW |
3025+
* | 0 1 1 | 8 | 8*VLEN/SEW |
3026+
* +------------+------+--------------+
3027+
*
3028+
* LMUL determines how vector registers are grouped. Since VL controls the
3029+
* number of processed elements (based on SEW) and is derived from VLMAX,
3030+
* LMUL's primary role is setting VLMAX. This implementation computes VLMAX
3031+
* directly, avoiding fractional LMUL values (e.g., 1/2, 1/4, 1/8).
3032+
*
3033+
* Mapping of rd, rs1, and AVL value effects on vl:
3034+
* +-----+-----+------------------+----------------------------------+
3035+
* | rd | rs1 | AVL value | Effect on vl |
3036+
* +-----+-----+------------------+----------------------------------+
3037+
* | - | !x0 | Value in x[rs1] | Normal stripmining |
3038+
* | !x0 | x0 | ~0 | Set vl to VLMAX |
3039+
* | x0 | x0 | Value in vl reg | Keep existing vl |
3040+
* +-----+-----+------------------+----------------------------------+
3041+
*
3042+
* +------------+----------+
3043+
* | vsew[2:0] | SEW |
3044+
* +------------+----------+
3045+
* | 0 0 0 | 8 |
3046+
* | 0 0 1 | 16 |
3047+
* | 0 1 0 | 32 |
3048+
* | 0 1 1 | 64 |
3049+
* | 1 X X | Reserved |
3050+
* +------------+----------+
3051+
*/
3052+
3053+
#define vl_setting(vlmax_, rs1, vl) \
3054+
if ((rs1) <= vlmax_) { \
3055+
(vl) = (rs1); \
3056+
} else if ((rs1) < (2 * vlmax_)) { \
3057+
(vl) = vlmax_; \
3058+
} else { \
3059+
(vl) = vlmax_; \
3060+
}
3061+
29983062
RVOP(
29993063
vsetvli,
3000-
{ NO_IMP; },
3064+
{
3065+
uint8_t v_lmul = ir->zimm & 0b111;
3066+
uint8_t v_sew = (ir->zimm >> 3) & 0b111;
3067+
3068+
if (v_lmul == 4 || v_sew >= 4) {
3069+
/* Illegal setting */
3070+
rv->csr_vl = 0;
3071+
rv->csr_vtype = 0x80000000;
3072+
return true;
3073+
}
3074+
uint16_t vlmax = (v_lmul < 4)
3075+
? ((1 << v_lmul) * VLEN) >> (3 + v_sew)
3076+
: (VLEN >> (3 + v_sew) >> (3 - (v_lmul - 5)));
3077+
if (ir->rs1) {
3078+
vl_setting(vlmax, rv->X[ir->rs1], rv->csr_vl);
3079+
rv->csr_vtype = ir->zimm;
3080+
} else {
3081+
if (!ir->rd) {
3082+
rv->csr_vtype = ir->zimm;
3083+
} else {
3084+
rv->csr_vl = vlmax;
3085+
rv->csr_vtype = ir->zimm;
3086+
}
3087+
}
3088+
rv->X[ir->rd] = rv->csr_vl;
3089+
},
30013090
GEN({
30023091
assert; /* FIXME: Implement */
30033092
}))
30043093
RVOP(
30053094
vsetivli,
3006-
{ NO_IMP; },
3095+
{
3096+
uint8_t v_lmul = ir->zimm & 0b111;
3097+
uint8_t v_sew = (ir->zimm >> 3) & 0b111;
3098+
3099+
if (v_lmul == 4 || v_sew >= 4) {
3100+
/* Illegal setting */
3101+
rv->csr_vl = 0;
3102+
rv->csr_vtype = 0x80000000;
3103+
return true;
3104+
}
3105+
uint16_t vlmax = (v_lmul < 4)
3106+
? ((1 << v_lmul) * VLEN) >> (3 + v_sew)
3107+
: (VLEN >> (3 + v_sew) >> (3 - (v_lmul - 5)));
3108+
if (ir->rs1) {
3109+
vl_setting(vlmax, ir->rs1, rv->csr_vl);
3110+
rv->csr_vtype = ir->zimm;
3111+
} else {
3112+
if (!ir->rd) {
3113+
rv->csr_vtype = ir->zimm;
3114+
} else {
3115+
rv->csr_vl = vlmax;
3116+
rv->csr_vtype = ir->zimm;
3117+
}
3118+
}
3119+
rv->X[ir->rd] = rv->csr_vl;
3120+
},
30073121
GEN({
30083122
assert; /* FIXME: Implement */
30093123
}))
30103124
RVOP(
30113125
vsetvl,
3012-
{ NO_IMP; },
3126+
{
3127+
uint8_t v_lmul = rv->X[ir->rs2] & 0b111;
3128+
uint8_t v_sew = (rv->X[ir->rs2] >> 3) & 0b111;
3129+
3130+
if (v_lmul == 4 || v_sew >= 4) {
3131+
/* Illegal setting */
3132+
rv->csr_vl = 0;
3133+
rv->csr_vtype = 0x80000000;
3134+
return true;
3135+
}
3136+
uint16_t vlmax = (v_lmul < 4)
3137+
? ((1 << v_lmul) * VLEN) >> (3 + v_sew)
3138+
: (VLEN >> (3 + v_sew) >> (3 - (v_lmul - 5)));
3139+
if (rv->X[ir->rs1]) {
3140+
vl_setting(vlmax, rv->X[ir->rs1], rv->csr_vl);
3141+
rv->csr_vtype = rv->X[ir->rs2];
3142+
} else {
3143+
if (!ir->rd) {
3144+
rv->csr_vtype = rv->X[ir->rs2];
3145+
} else {
3146+
rv->csr_vl = vlmax;
3147+
rv->csr_vtype = rv->X[ir->rs2];
3148+
}
3149+
}
3150+
rv->X[ir->rd] = rv->csr_vl;
3151+
},
30133152
GEN({
30143153
assert; /* FIXME: Implement */
30153154
}))
3155+
#undef vl_setting
30163156

30173157
RVOP(
30183158
vle8_v,

0 commit comments

Comments
 (0)