Skip to content

Commit aa0d15f

Browse files
committed
Modularize vector single-width instruction
Since we are using `uint32_t * n` to emulate vector registers with length `vlen`, modularize the handling of `SEW=*` by introducing `sew_*b_handler`, which also handles lmuls. These macros allows RVOP to pass the corresponding operation in `OPT()`, mapping to the respective handling in `VI_LOOP`, `VV_LOOP`, and `VX_LOOP`. `V*_LOOP` steps: 1. Copy the operand op1(op2). 2. Align `op1` to the right. 3. Perform the specified operation between `op1` and `op2`. 4. Mask to the corresponding `sew`. 5. Shift the result left to align with the corresponing position. 6. Accumulate the result into `vd`. Notice that in vector register group should be using vector register v2*n, v2*n+1 when lmul = 2 etc. The Implementation right now allows using any vector registers except of the ones exceed v31.
1 parent 5910e7e commit aa0d15f

File tree

1 file changed

+147
-12
lines changed

1 file changed

+147
-12
lines changed

src/rv32_template.c

Lines changed: 147 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3092,6 +3092,149 @@ RVOP(
30923092
GEN({/* no operation */}))
30933093
#undef vl_setting
30943094

3095+
/* clang-format off */
3096+
#define OPT(des, op1, op2, op, op_type) { \
3097+
switch (8 << ((rv->csr_vtype >> 3) & 0b111)) { \
3098+
case 8: \
3099+
sew_8b_handler(des, op1, op2, op, op_type); \
3100+
break; \
3101+
case 16: \
3102+
sew_16b_handler(des, op1, op2, op, op_type); \
3103+
break; \
3104+
case 32: \
3105+
sew_32b_handler(des, op1, op2, op, op_type); \
3106+
break; \
3107+
default: \
3108+
break; \
3109+
} \
3110+
}
3111+
3112+
#define VI_LOOP(des, op1, op2, op, SHIFT, MASK, i, j, itr) \
3113+
uint32_t tmp_1 = rv->V[op1 + j][i]; \
3114+
rv->V[des + j][i] = 0; \
3115+
for (uint8_t ___cnt = 0; ___cnt < itr; ___cnt++) { \
3116+
rv->V[des + j][i] += \
3117+
( \
3118+
( (tmp_1 >> (___cnt << (SHIFT))) op (op2) ) & (MASK) \
3119+
) << (___cnt << (SHIFT)); \
3120+
}
3121+
3122+
#define VI_LOOP_LEFT(des, op1, op2, op, SHIFT, MASK, i, j, itr) \
3123+
uint32_t tmp_1 = rv->V[op1 + j][i]; \
3124+
for (uint8_t __cnt = 0; __cnt < (rv->csr_vl % 4); __cnt++) { \
3125+
assert((des + j) < 32); \
3126+
rv->V[des + j][i] += \
3127+
( \
3128+
( (tmp_1 >> (__cnt << (SHIFT))) op (op2) ) & (MASK) \
3129+
) << (__cnt << (SHIFT)); \
3130+
}
3131+
3132+
#define VV_LOOP(des, op1, op2, op, SHIFT, MASK, i, j, itr) \
3133+
uint32_t tmp_1 = rv->V[op1 + j][i]; \
3134+
uint32_t tmp_2 = rv->V[op2 + j][i]; \
3135+
rv->V[des + j][i] = 0; \
3136+
for (uint8_t ___cnt = 0; ___cnt < itr; ___cnt++) { \
3137+
rv->V[des + j][i] += \
3138+
( \
3139+
( (tmp_1 >> (___cnt << (SHIFT))) op (tmp_2) ) & (MASK) \
3140+
) << (___cnt << (SHIFT)); \
3141+
}
3142+
3143+
#define VV_LOOP_LEFT(des, op1, op2, op, SHIFT, MASK, i, j, itr) \
3144+
uint32_t tmp_1 = rv->V[op1 + j][i]; \
3145+
uint32_t tmp_2 = rv->V[op2 + j][i]; \
3146+
for (uint8_t __cnt = 0; __cnt < (rv->csr_vl % 4); __cnt++) { \
3147+
assert((des + j) < 32); \
3148+
rv->V[des + j][i] += \
3149+
( \
3150+
( (tmp_1 >> (__cnt << (SHIFT))) op (tmp_2) ) & (MASK) \
3151+
) << (__cnt << (SHIFT)); \
3152+
}
3153+
3154+
#define VX_LOOP(des, op1, op2, op, SHIFT, MASK, i, j, itr) \
3155+
uint32_t tmp_1 = rv->V[op1 + j][i]; \
3156+
uint32_t tmp_2 = rv->X[op2]; \
3157+
rv->V[des + j][i] = 0; \
3158+
for (uint8_t ___cnt = 0; ___cnt < itr; ___cnt++) { \
3159+
rv->V[des + j][i] += \
3160+
( \
3161+
( (tmp_1 >> (___cnt << (SHIFT))) op (tmp_2) ) & (MASK) \
3162+
) << (___cnt << (SHIFT)); \
3163+
}
3164+
3165+
#define VX_LOOP_LEFT(des, op1, op2, op, SHIFT, MASK, i, j, itr) \
3166+
uint32_t tmp_1 = rv->V[op1 + j][i]; \
3167+
uint32_t tmp_2 = rv->X[op2]; \
3168+
for (uint8_t __cnt = 0; __cnt < (rv->csr_vl % 4); __cnt++) { \
3169+
assert((des + j) < 32); \
3170+
rv->V[des + j][i] += \
3171+
( \
3172+
( (tmp_1 >> (__cnt << (SHIFT))) op (tmp_2) ) & (MASK) \
3173+
) << (__cnt << (SHIFT)); \
3174+
}
3175+
3176+
#define sew_8b_handler(des, op1, op2, op, op_type) \
3177+
{ \
3178+
uint8_t __i = 0; \
3179+
uint8_t __j = 0; \
3180+
for (uint32_t __cnt = 0; (rv->csr_vl - __cnt) >= 4;) { \
3181+
__i %= LEN; \
3182+
assert((des + __j) < 32); \
3183+
op_type##_LOOP(des, op1, op2, op, 3, 0xFF, __i, __j, 4); \
3184+
__cnt += 4; \
3185+
__i++; \
3186+
if (!(__cnt & ((LEN << 2) - 1))) { \
3187+
__j++; \
3188+
__i = 0; \
3189+
} \
3190+
} \
3191+
if (rv->csr_vl % 4) { \
3192+
rv->V[des + __j][__i] &= \
3193+
(0xFFFFFFFF << ((rv->csr_vl % 4) << 3)); \
3194+
} \
3195+
op_type##_LOOP_LEFT(des, op1, op2, op, 3, 0xFF, __i, __j, 4); \
3196+
}
3197+
3198+
#define sew_16b_handler(des, op1, op2, op, op_type) \
3199+
{ \
3200+
uint8_t __i = 0; \
3201+
uint8_t __j = 0; \
3202+
for (uint32_t __cnt = 0; (rv->csr_vl - __cnt) >= 2;) { \
3203+
__i %= LEN; \
3204+
assert((des + __j) < 32); \
3205+
op_type##_LOOP(des, op1, op2, op, 4, 0xFFFF, __i, __j, 2); \
3206+
__cnt += 2; \
3207+
__i++; \
3208+
if (!(__cnt & ((LEN << 1) - 1))) { \
3209+
__j++; \
3210+
__i = 0; \
3211+
} \
3212+
} \
3213+
if (rv->csr_vl % 2) { \
3214+
rv->V[des + __j][__i] &= \
3215+
(0xFFFFFFFF << ((rv->csr_vl % 4) << 3)); \
3216+
} \
3217+
op_type##_LOOP_LEFT(des, op1, op2, op, 4, 0xFFFF, __i, __j, 2); \
3218+
}
3219+
3220+
#define sew_32b_handler(des, op1, op2, op, op_type) \
3221+
{ \
3222+
uint8_t __i = 0; \
3223+
uint8_t __j = 0; \
3224+
for (uint32_t __cnt = 0; rv->csr_vl > __cnt;) { \
3225+
__i %= LEN; \
3226+
assert((des + __j) < 32); \
3227+
op_type##_LOOP(des, op1, op2, op, 0, 0xFFFFFFFF, __i, __j, 1); \
3228+
__cnt += 1; \
3229+
__i++; \
3230+
if (!(__cnt & (LEN - 1))) { \
3231+
__j++; \
3232+
__i = 0; \
3233+
} \
3234+
} \
3235+
}
3236+
/* clang-format on */
3237+
30953238
/*
30963239
* j sets (v*n + j)
30973240
* i sets (rv->V[ir->vd][0,1,2,3] for 128 as example)
@@ -5769,25 +5912,19 @@ RVOP(
57695912
RVOP(
57705913
vadd_vv,
57715914
{
5772-
for (int i = 0; i < 4; i++) {
5773-
rv->V[rv_reg_zero][i] = 0;
5774-
}
5915+
OPT(ir->vd, ir->vs2, ir->vs1, +, VV)
57755916
},
57765917
GEN({/* no operation */}))
57775918
RVOP(
57785919
vadd_vx,
57795920
{
5780-
for (int i = 0; i < 4; i++) {
5781-
rv->V[rv_reg_zero][i] = 0;
5782-
}
5921+
OPT(ir->vd, ir->vs2, ir->rs1, +, VX)
57835922
},
57845923
GEN({/* no operation */}))
57855924
RVOP(
57865925
vadd_vi,
57875926
{
5788-
for (int i = 0; i < 4; i++) {
5789-
rv->V[rv_reg_zero][i] = 0;
5790-
}
5927+
OPT(ir->vd, ir->vs2, ir->imm, +, VI)
57915928
},
57925929
GEN({/* no operation */}))
57935930
RVOP(
@@ -6145,9 +6282,7 @@ RVOP(
61456282
RVOP(
61466283
vmv_v_i,
61476284
{
6148-
for (int i = 0; i < 4; i++) {
6149-
rv->V[rv_reg_zero][i] = 0;
6150-
}
6285+
OPT(ir->vd, 0, ir->imm, +, VI)
61516286
},
61526287
GEN({/* no operation */}))
61536288
RVOP(

0 commit comments

Comments
 (0)