@@ -2995,24 +2995,164 @@ RVOP(
2995
2995
(rv)->V[rv_reg_zero][i] = 0; \
2996
2996
}
2997
2997
2998
+ #define VREG_U32_COUNT ((VLEN) >> (5))
2999
+ /*
3000
+ * Vector Configuration-Setting Instructions
3001
+ *
3002
+ * These instructions set the vector CSRs, specifically csr_vl and csr_vtype.
3003
+ * The CSRs can only be updated using vset{i}vl{i} instructions. The current
3004
+ * implementation does not support vma and vta.
3005
+ *
3006
+ * The value VLMAX = (LMUL * VLEN) / SEW represents the maximum number of
3007
+ * elements that can be processed by a single vector instruction given the
3008
+ * current SEW and LMUL.
3009
+ *
3010
+ * Constraints on Setting vl:
3011
+ * - vl = AVL if AVL ≤ VLMAX
3012
+ * - ceil(AVL / 2) ≤ vl ≤ VLMAX if AVL < 2 * VLMAX
3013
+ * - vl = VLMAX if AVL ≥ 2 * VLMAX
3014
+ *
3015
+ * +------------+------+--------------+
3016
+ * | vlmul[2:0] | LMUL | VLMAX |
3017
+ * +------------+------+--------------+
3018
+ * | 1 0 0 | - | - |
3019
+ * | 1 0 1 | 1/8 | VLEN/SEW/8 |
3020
+ * | 1 1 0 | 1/4 | VLEN/SEW/4 |
3021
+ * | 1 1 1 | 1/2 | VLEN/SEW/2 |
3022
+ * | 0 0 0 | 1 | VLEN/SEW |
3023
+ * | 0 0 1 | 2 | 2*VLEN/SEW |
3024
+ * | 0 1 0 | 4 | 4*VLEN/SEW |
3025
+ * | 0 1 1 | 8 | 8*VLEN/SEW |
3026
+ * +------------+------+--------------+
3027
+ *
3028
+ * LMUL determines how vector registers are grouped. Since VL controls the
3029
+ * number of processed elements (based on SEW) and is derived from VLMAX,
3030
+ * LMUL's primary role is setting VLMAX. This implementation computes VLMAX
3031
+ * directly, avoiding fractional LMUL values (e.g., 1/2, 1/4, 1/8).
3032
+ *
3033
+ * Mapping of rd, rs1, and AVL value effects on vl:
3034
+ * +-----+-----+------------------+----------------------------------+
3035
+ * | rd | rs1 | AVL value | Effect on vl |
3036
+ * +-----+-----+------------------+----------------------------------+
3037
+ * | - | !x0 | Value in x[rs1] | Normal stripmining |
3038
+ * | !x0 | x0 | ~0 | Set vl to VLMAX |
3039
+ * | x0 | x0 | Value in vl reg | Keep existing vl |
3040
+ * +-----+-----+------------------+----------------------------------+
3041
+ *
3042
+ * +------------+----------+
3043
+ * | vsew[2:0] | SEW |
3044
+ * +------------+----------+
3045
+ * | 0 0 0 | 8 |
3046
+ * | 0 0 1 | 16 |
3047
+ * | 0 1 0 | 32 |
3048
+ * | 0 1 1 | 64 |
3049
+ * | 1 X X | Reserved |
3050
+ * +------------+----------+
3051
+ */
3052
+
3053
+ #define vl_setting (vlmax_ , rs1 , vl ) \
3054
+ if ((rs1) <= vlmax_) { \
3055
+ (vl) = (rs1); \
3056
+ } else if ((rs1) < (2 * vlmax_)) { \
3057
+ (vl) = vlmax_; \
3058
+ } else { \
3059
+ (vl) = vlmax_; \
3060
+ }
3061
+
2998
3062
RVOP (
2999
3063
vsetvli ,
3000
- { NO_IMP ; },
3064
+ {
3065
+ uint8_t v_lmul = ir -> zimm & 0b111 ;
3066
+ uint8_t v_sew = (ir -> zimm >> 3 ) & 0b111 ;
3067
+
3068
+ if (v_lmul == 4 || v_sew >= 4 ) {
3069
+ /* Illegal setting */
3070
+ rv -> csr_vl = 0 ;
3071
+ rv -> csr_vtype = 0x80000000 ;
3072
+ return true;
3073
+ }
3074
+ uint16_t vlmax = (v_lmul < 4 )
3075
+ ? ((1 << v_lmul ) * VLEN ) >> (3 + v_sew )
3076
+ : (VLEN >> (3 + v_sew ) >> (3 - (v_lmul - 5 )));
3077
+ if (ir -> rs1 ) {
3078
+ vl_setting (vlmax , rv -> X [ir -> rs1 ], rv -> csr_vl );
3079
+ rv -> csr_vtype = ir -> zimm ;
3080
+ } else {
3081
+ if (!ir -> rd ) {
3082
+ rv -> csr_vtype = ir -> zimm ;
3083
+ } else {
3084
+ rv -> csr_vl = vlmax ;
3085
+ rv -> csr_vtype = ir -> zimm ;
3086
+ }
3087
+ }
3088
+ rv -> X [ir -> rd ] = rv -> csr_vl ;
3089
+ },
3001
3090
GEN ({
3002
3091
assert ; /* FIXME: Implement */
3003
3092
}))
3004
3093
RVOP (
3005
3094
vsetivli ,
3006
- { NO_IMP ; },
3095
+ {
3096
+ uint8_t v_lmul = ir -> zimm & 0b111 ;
3097
+ uint8_t v_sew = (ir -> zimm >> 3 ) & 0b111 ;
3098
+
3099
+ if (v_lmul == 4 || v_sew >= 4 ) {
3100
+ /* Illegal setting */
3101
+ rv -> csr_vl = 0 ;
3102
+ rv -> csr_vtype = 0x80000000 ;
3103
+ return true;
3104
+ }
3105
+ uint16_t vlmax = (v_lmul < 4 )
3106
+ ? ((1 << v_lmul ) * VLEN ) >> (3 + v_sew )
3107
+ : (VLEN >> (3 + v_sew ) >> (3 - (v_lmul - 5 )));
3108
+ if (ir -> rs1 ) {
3109
+ vl_setting (vlmax , ir -> rs1 , rv -> csr_vl );
3110
+ rv -> csr_vtype = ir -> zimm ;
3111
+ } else {
3112
+ if (!ir -> rd ) {
3113
+ rv -> csr_vtype = ir -> zimm ;
3114
+ } else {
3115
+ rv -> csr_vl = vlmax ;
3116
+ rv -> csr_vtype = ir -> zimm ;
3117
+ }
3118
+ }
3119
+ rv -> X [ir -> rd ] = rv -> csr_vl ;
3120
+ },
3007
3121
GEN ({
3008
3122
assert ; /* FIXME: Implement */
3009
3123
}))
3010
3124
RVOP (
3011
3125
vsetvl ,
3012
- { NO_IMP ; },
3126
+ {
3127
+ uint8_t v_lmul = rv -> X [ir -> rs2 ] & 0b111 ;
3128
+ uint8_t v_sew = (rv -> X [ir -> rs2 ] >> 3 ) & 0b111 ;
3129
+
3130
+ if (v_lmul == 4 || v_sew >= 4 ) {
3131
+ /* Illegal setting */
3132
+ rv -> csr_vl = 0 ;
3133
+ rv -> csr_vtype = 0x80000000 ;
3134
+ return true;
3135
+ }
3136
+ uint16_t vlmax = (v_lmul < 4 )
3137
+ ? ((1 << v_lmul ) * VLEN ) >> (3 + v_sew )
3138
+ : (VLEN >> (3 + v_sew ) >> (3 - (v_lmul - 5 )));
3139
+ if (rv -> X [ir -> rs1 ]) {
3140
+ vl_setting (vlmax , rv -> X [ir -> rs1 ], rv -> csr_vl );
3141
+ rv -> csr_vtype = rv -> X [ir -> rs2 ];
3142
+ } else {
3143
+ if (!ir -> rd ) {
3144
+ rv -> csr_vtype = rv -> X [ir -> rs2 ];
3145
+ } else {
3146
+ rv -> csr_vl = vlmax ;
3147
+ rv -> csr_vtype = rv -> X [ir -> rs2 ];
3148
+ }
3149
+ }
3150
+ rv -> X [ir -> rd ] = rv -> csr_vl ;
3151
+ },
3013
3152
GEN ({
3014
3153
assert ; /* FIXME: Implement */
3015
3154
}))
3155
+ #undef vl_setting
3016
3156
3017
3157
RVOP (
3018
3158
vle8_v ,
0 commit comments