@@ -3092,6 +3092,149 @@ RVOP(
30923092 GEN ({/* no operation */ }))
30933093#undef vl_setting
30943094
3095+ /* clang-format off */
3096+ #define OPT (des , op1 , op2 , op , op_type ) { \
3097+ switch (8 << ((rv->csr_vtype >> 3) & 0b111)) { \
3098+ case 8: \
3099+ sew_8b_handler(des, op1, op2, op, op_type); \
3100+ break; \
3101+ case 16: \
3102+ sew_16b_handler(des, op1, op2, op, op_type); \
3103+ break; \
3104+ case 32: \
3105+ sew_32b_handler(des, op1, op2, op, op_type); \
3106+ break; \
3107+ default: \
3108+ break; \
3109+ } \
3110+ }
3111+
3112+ #define VI_LOOP (des , op1 , op2 , op , SHIFT , MASK , i , j , itr ) \
3113+ uint32_t tmp_1 = rv->V[op1 + j][i]; \
3114+ rv->V[des + j][i] = 0; \
3115+ for (uint8_t ___cnt = 0; ___cnt < itr; ___cnt++) { \
3116+ rv->V[des + j][i] += \
3117+ ( \
3118+ ( (tmp_1 >> (___cnt << (SHIFT))) op (op2) ) & (MASK) \
3119+ ) << (___cnt << (SHIFT)); \
3120+ }
3121+
3122+ #define VI_LOOP_LEFT (des , op1 , op2 , op , SHIFT , MASK , i , j , itr ) \
3123+ uint32_t tmp_1 = rv->V[op1 + j][i]; \
3124+ for (uint8_t __cnt = 0; __cnt < (rv->csr_vl % 4); __cnt++) { \
3125+ assert((des + j) < 32); \
3126+ rv->V[des + j][i] += \
3127+ ( \
3128+ ( (tmp_1 >> (__cnt << (SHIFT))) op (op2) ) & (MASK) \
3129+ ) << (__cnt << (SHIFT)); \
3130+ }
3131+
3132+ #define VV_LOOP (des , op1 , op2 , op , SHIFT , MASK , i , j , itr ) \
3133+ uint32_t tmp_1 = rv->V[op1 + j][i]; \
3134+ uint32_t tmp_2 = rv->V[op2 + j][i]; \
3135+ rv->V[des + j][i] = 0; \
3136+ for (uint8_t ___cnt = 0; ___cnt < itr; ___cnt++) { \
3137+ rv->V[des + j][i] += \
3138+ ( \
3139+ ( (tmp_1 >> (___cnt << (SHIFT))) op (tmp_2) ) & (MASK) \
3140+ ) << (___cnt << (SHIFT)); \
3141+ }
3142+
3143+ #define VV_LOOP_LEFT (des , op1 , op2 , op , SHIFT , MASK , i , j , itr ) \
3144+ uint32_t tmp_1 = rv->V[op1 + j][i]; \
3145+ uint32_t tmp_2 = rv->V[op2 + j][i]; \
3146+ for (uint8_t __cnt = 0; __cnt < (rv->csr_vl % 4); __cnt++) { \
3147+ assert((des + j) < 32); \
3148+ rv->V[des + j][i] += \
3149+ ( \
3150+ ( (tmp_1 >> (__cnt << (SHIFT))) op (tmp_2) ) & (MASK) \
3151+ ) << (__cnt << (SHIFT)); \
3152+ }
3153+
3154+ #define VX_LOOP (des , op1 , op2 , op , SHIFT , MASK , i , j , itr ) \
3155+ uint32_t tmp_1 = rv->V[op1 + j][i]; \
3156+ uint32_t tmp_2 = rv->X[op2]; \
3157+ rv->V[des + j][i] = 0; \
3158+ for (uint8_t ___cnt = 0; ___cnt < itr; ___cnt++) { \
3159+ rv->V[des + j][i] += \
3160+ ( \
3161+ ( (tmp_1 >> (___cnt << (SHIFT))) op (tmp_2) ) & (MASK) \
3162+ ) << (___cnt << (SHIFT)); \
3163+ }
3164+
3165+ #define VX_LOOP_LEFT (des , op1 , op2 , op , SHIFT , MASK , i , j , itr ) \
3166+ uint32_t tmp_1 = rv->V[op1 + j][i]; \
3167+ uint32_t tmp_2 = rv->X[op2]; \
3168+ for (uint8_t __cnt = 0; __cnt < (rv->csr_vl % 4); __cnt++) { \
3169+ assert((des + j) < 32); \
3170+ rv->V[des + j][i] += \
3171+ ( \
3172+ ( (tmp_1 >> (__cnt << (SHIFT))) op (tmp_2) ) & (MASK) \
3173+ ) << (__cnt << (SHIFT)); \
3174+ }
3175+
3176+ #define sew_8b_handler (des , op1 , op2 , op , op_type ) \
3177+ { \
3178+ uint8_t __i = 0; \
3179+ uint8_t __j = 0; \
3180+ for (uint32_t __cnt = 0; (rv->csr_vl - __cnt) >= 4;) { \
3181+ __i %= LEN; \
3182+ assert((des + __j) < 32); \
3183+ op_type##_LOOP(des, op1, op2, op, 3, 0xFF, __i, __j, 4); \
3184+ __cnt += 4; \
3185+ __i++; \
3186+ if (!(__cnt & ((LEN << 2) - 1))) { \
3187+ __j++; \
3188+ __i = 0; \
3189+ } \
3190+ } \
3191+ if (rv->csr_vl % 4) { \
3192+ rv->V[des + __j][__i] &= \
3193+ (0xFFFFFFFF << ((rv->csr_vl % 4) << 3)); \
3194+ } \
3195+ op_type##_LOOP_LEFT(des, op1, op2, op, 3, 0xFF, __i, __j, 4); \
3196+ }
3197+
3198+ #define sew_16b_handler (des , op1 , op2 , op , op_type ) \
3199+ { \
3200+ uint8_t __i = 0; \
3201+ uint8_t __j = 0; \
3202+ for (uint32_t __cnt = 0; (rv->csr_vl - __cnt) >= 2;) { \
3203+ __i %= LEN; \
3204+ assert((des + __j) < 32); \
3205+ op_type##_LOOP(des, op1, op2, op, 4, 0xFFFF, __i, __j, 2); \
3206+ __cnt += 2; \
3207+ __i++; \
3208+ if (!(__cnt & ((LEN << 1) - 1))) { \
3209+ __j++; \
3210+ __i = 0; \
3211+ } \
3212+ } \
3213+ if (rv->csr_vl % 2) { \
3214+ rv->V[des + __j][__i] &= \
3215+ (0xFFFFFFFF << ((rv->csr_vl % 4) << 3)); \
3216+ } \
3217+ op_type##_LOOP_LEFT(des, op1, op2, op, 4, 0xFFFF, __i, __j, 2); \
3218+ }
3219+
3220+ #define sew_32b_handler (des , op1 , op2 , op , op_type ) \
3221+ { \
3222+ uint8_t __i = 0; \
3223+ uint8_t __j = 0; \
3224+ for (uint32_t __cnt = 0; rv->csr_vl > __cnt;) { \
3225+ __i %= LEN; \
3226+ assert((des + __j) < 32); \
3227+ op_type##_LOOP(des, op1, op2, op, 0, 0xFFFFFFFF, __i, __j, 1); \
3228+ __cnt += 1; \
3229+ __i++; \
3230+ if (!(__cnt & (LEN - 1))) { \
3231+ __j++; \
3232+ __i = 0; \
3233+ } \
3234+ } \
3235+ }
3236+ /* clang-format on */
3237+
30953238/*
30963239 * j sets (v*n + j)
30973240 * i sets (rv->V[ir->vd][0,1,2,3] for 128 as example)
@@ -5769,25 +5912,19 @@ RVOP(
57695912RVOP (
57705913 vadd_vv ,
57715914 {
5772- for (int i = 0 ; i < 4 ; i ++ ) {
5773- rv -> V [rv_reg_zero ][i ] = 0 ;
5774- }
5915+ OPT (ir -> vd , ir -> vs2 , ir -> vs1 , + , VV )
57755916 },
57765917 GEN ({/* no operation */ }))
57775918RVOP (
57785919 vadd_vx ,
57795920 {
5780- for (int i = 0 ; i < 4 ; i ++ ) {
5781- rv -> V [rv_reg_zero ][i ] = 0 ;
5782- }
5921+ OPT (ir -> vd , ir -> vs2 , ir -> rs1 , + , VX )
57835922 },
57845923 GEN ({/* no operation */ }))
57855924RVOP (
57865925 vadd_vi ,
57875926 {
5788- for (int i = 0 ; i < 4 ; i ++ ) {
5789- rv -> V [rv_reg_zero ][i ] = 0 ;
5790- }
5927+ OPT (ir -> vd , ir -> vs2 , ir -> imm , + , VI )
57915928 },
57925929 GEN ({/* no operation */ }))
57935930RVOP (
@@ -6145,9 +6282,7 @@ RVOP(
61456282RVOP (
61466283 vmv_v_i ,
61476284 {
6148- for (int i = 0 ; i < 4 ; i ++ ) {
6149- rv -> V [rv_reg_zero ][i ] = 0 ;
6150- }
6285+ OPT (ir -> vd , 0 , ir -> imm , + , VI )
61516286 },
61526287 GEN ({/* no operation */ }))
61536288RVOP (
0 commit comments