@@ -553,6 +553,8 @@ class drcbe_arm64 : public drcbe_interface
553553 void op_set (a64::Assembler &a, const uml::instruction &inst);
554554 void op_mov (a64::Assembler &a, const uml::instruction &inst);
555555 void op_sext (a64::Assembler &a, const uml::instruction &inst);
556+ void op_bfxu (a64::Assembler &a, const uml::instruction &inst);
557+ void op_bfxs (a64::Assembler &a, const uml::instruction &inst);
556558 void op_roland (a64::Assembler &a, const uml::instruction &inst);
557559 void op_rolins (a64::Assembler &a, const uml::instruction &inst);
558560 template <bool CarryIn> void op_add (a64::Assembler &a, const uml::instruction &inst);
@@ -710,8 +712,10 @@ inline void drcbe_arm64::generate_one(a64::Assembler &a, const uml::instruction
710712 case uml::OP_SET: op_set (a, inst); break ; // SET dst,c
711713 case uml::OP_MOV: op_mov (a, inst); break ; // MOV dst,src[,c]
712714 case uml::OP_SEXT: op_sext (a, inst); break ; // SEXT dst,src
713- case uml::OP_ROLAND: op_roland (a, inst); break ; // ROLAND dst,src1,src2,src3
714- case uml::OP_ROLINS: op_rolins (a, inst); break ; // ROLINS dst,src1,src2,src3
715+ case uml::OP_BFXU: op_bfxu (a, inst); break ; // BFXU dst,src,shift,width
716+ case uml::OP_BFXS: op_bfxs (a, inst); break ; // BFXS dst,src,shift,width
717+ case uml::OP_ROLAND: op_roland (a, inst); break ; // ROLAND dst,src,count,mask
718+ case uml::OP_ROLINS: op_rolins (a, inst); break ; // ROLINS dst,src,count,mask
715719 case uml::OP_ADD: op_add<false >(a, inst); break ; // ADD dst,src1,src2[,f]
716720 case uml::OP_ADDC: op_add<true >(a, inst); break ; // ADDC dst,src1,src2[,f]
717721 case uml::OP_SUB: op_sub<false >(a, inst); break ; // SUB dst,src1,src2[,f]
@@ -3223,6 +3227,173 @@ void drcbe_arm64::op_sext(a64::Assembler &a, const uml::instruction &inst)
32233227 }
32243228}
32253229
3230+ void drcbe_arm64::op_bfxu (a64::Assembler &a, const uml::instruction &inst)
3231+ {
3232+ assert (inst.size () == 4 || inst.size () == 8 );
3233+ assert_no_condition (inst);
3234+ assert_flags (inst, FLAG_S | FLAG_Z);
3235+
3236+ be_parameter dstp (*this , inst.param (0 ), PTYPE_MR);
3237+ be_parameter srcp (*this , inst.param (1 ), PTYPE_MRI);
3238+ be_parameter shiftp (*this , inst.param (2 ), PTYPE_MRI);
3239+ be_parameter widthp (*this , inst.param (3 ), PTYPE_MRI);
3240+
3241+ const a64::Gp output = dstp.select_register (TEMP_REG1, inst.size ());
3242+ const a64::Gp src = srcp.select_register (TEMP_REG2, inst.size ());
3243+ const a64::Inst::Id maskop = inst.flags () ? a64::Inst::kIdAnds : a64::Inst::kIdAnd ;
3244+ const uint64_t instbits = inst.size () * 8 ;
3245+
3246+ if (widthp.is_immediate_value (0 ))
3247+ {
3248+ // undefined behaviour - do something
3249+ const a64::Gp zero = select_register (a64::xzr, inst.size ());
3250+
3251+ if (inst.flags ())
3252+ a.ands (output, zero, zero);
3253+ else
3254+ a.mov (output, zero);
3255+ }
3256+ else if (widthp.is_immediate ())
3257+ {
3258+ const auto width (widthp.immediate () & (instbits - 1 ));
3259+ const auto mask (util::make_bitmask<uint64_t >(width));
3260+
3261+ mov_reg_param (a, inst.size (), src, srcp);
3262+
3263+ if (shiftp.is_immediate ())
3264+ {
3265+ const auto shift (shiftp.immediate () & (instbits - 1 ));
3266+
3267+ if ((shift + width) <= instbits)
3268+ {
3269+ // contiguous bit field
3270+ a.ubfx (output, src, shift, width);
3271+ if (inst.flags ())
3272+ a.tst (output, output);
3273+ }
3274+ else
3275+ {
3276+ // bit field wraps from LSB to MSB
3277+ a.ror (output, src, shift);
3278+ a.emit (maskop, output, output, mask);
3279+ }
3280+ }
3281+ else
3282+ {
3283+ const a64::Gp shift = shiftp.select_register (TEMP_REG3, inst.size ());
3284+
3285+ mov_reg_param (a, inst.size (), shift, shiftp);
3286+
3287+ a.ror (output, src, shift);
3288+ a.emit (maskop, output, output, mask);
3289+ }
3290+ }
3291+ else
3292+ {
3293+ const a64::Gp width = (widthp != dstp) ? widthp.select_register (TEMP_REG3, inst.size ()) : select_register (TEMP_REG3, inst.size ());
3294+ const a64::Gp temp = select_register (FUNC_SCRATCH_REG, inst.size ());
3295+
3296+ mov_reg_param (a, inst.size (), width, widthp);
3297+ if (!shiftp.is_immediate ())
3298+ mov_reg_param (a, inst.size (), temp, shiftp);
3299+ mov_reg_param (a, inst.size (), src, srcp);
3300+
3301+ if (shiftp.is_immediate ())
3302+ a.add (temp, width, shiftp.immediate () & (instbits - 1 ));
3303+ else
3304+ a.add (temp, width, temp);
3305+ a.ror (output, src, temp);
3306+ a.neg (temp, width);
3307+ a.lsr (output, output, temp);
3308+ if (inst.flags ())
3309+ a.tst (output, output);
3310+ }
3311+
3312+ mov_param_reg (a, inst.size (), dstp, output);
3313+ }
3314+
3315+ void drcbe_arm64::op_bfxs (a64::Assembler &a, const uml::instruction &inst)
3316+ {
3317+ assert (inst.size () == 4 || inst.size () == 8 );
3318+ assert_no_condition (inst);
3319+ assert_flags (inst, FLAG_S | FLAG_Z);
3320+
3321+ be_parameter dstp (*this , inst.param (0 ), PTYPE_MR);
3322+ be_parameter srcp (*this , inst.param (1 ), PTYPE_MRI);
3323+ be_parameter shiftp (*this , inst.param (2 ), PTYPE_MRI);
3324+ be_parameter widthp (*this , inst.param (3 ), PTYPE_MRI);
3325+
3326+ const a64::Gp output = dstp.select_register (TEMP_REG1, inst.size ());
3327+ const a64::Gp src = srcp.select_register (TEMP_REG2, inst.size ());
3328+ const uint64_t instbits = inst.size () * 8 ;
3329+
3330+ if (widthp.is_immediate_value (0 ))
3331+ {
3332+ // undefined behaviour - do something
3333+ const a64::Gp zero = select_register (a64::xzr, inst.size ());
3334+
3335+ if (inst.flags ())
3336+ a.ands (output, zero, zero);
3337+ else
3338+ a.mov (output, zero);
3339+ }
3340+ else if (widthp.is_immediate ())
3341+ {
3342+ const auto width (widthp.immediate () & (instbits - 1 ));
3343+
3344+ mov_reg_param (a, inst.size (), src, srcp);
3345+
3346+ if (shiftp.is_immediate ())
3347+ {
3348+ const auto shift (shiftp.immediate () & (instbits - 1 ));
3349+
3350+ if ((shift + width) <= instbits)
3351+ {
3352+ // contiguous bit field
3353+ a.sbfx (output, src, shift, width);
3354+ }
3355+ else
3356+ {
3357+ // bit field wraps from LSB to MSB
3358+ a.ror (output, src, shift);
3359+ a.sbfx (output, output, 0 , width);
3360+ }
3361+ }
3362+ else
3363+ {
3364+ const a64::Gp shift = shiftp.select_register (TEMP_REG3, inst.size ());
3365+
3366+ mov_reg_param (a, inst.size (), shift, shiftp);
3367+
3368+ a.ror (output, src, shift);
3369+ a.sbfx (output, output, 0 , width);
3370+ }
3371+ }
3372+ else
3373+ {
3374+ const a64::Gp width = (widthp != dstp) ? widthp.select_register (TEMP_REG3, inst.size ()) : select_register (TEMP_REG3, inst.size ());
3375+ const a64::Gp temp = select_register (FUNC_SCRATCH_REG, inst.size ());
3376+
3377+ mov_reg_param (a, inst.size (), src, srcp);
3378+ if (!shiftp.is_immediate ())
3379+ mov_reg_param (a, inst.size (), temp, shiftp);
3380+ mov_reg_param (a, inst.size (), width, widthp);
3381+
3382+ if (shiftp.is_immediate ())
3383+ a.add (temp, width, shiftp.immediate () & (instbits - 1 ));
3384+ else
3385+ a.add (temp, width, temp);
3386+ a.ror (output, src, temp);
3387+ a.neg (temp, width);
3388+ a.asr (output, output, temp);
3389+ }
3390+
3391+ mov_param_reg (a, inst.size (), dstp, output);
3392+
3393+ if (inst.flags ())
3394+ a.tst (output, output);
3395+ }
3396+
32263397void drcbe_arm64::op_roland (a64::Assembler &a, const uml::instruction &inst)
32273398{
32283399 assert (inst.size () == 4 || inst.size () == 8 );
@@ -3246,11 +3417,10 @@ void drcbe_arm64::op_roland(a64::Assembler &a, const uml::instruction &inst)
32463417 const auto pop = population_count_64 (maskp.immediate ());
32473418 const auto lz = count_leading_zeros_64 (maskp.immediate ()) & (instbits - 1 );
32483419 const auto invlamask = ~(maskp.immediate () << lz) & instmask;
3249- const bool is_right_aligned = (maskp.immediate () & (maskp.immediate () + 1 )) == 0 ;
32503420 const bool is_contiguous = (invlamask & (invlamask + 1 )) == 0 ;
32513421 const auto s = shiftp.immediate () & (instbits - 1 );
32523422
3253- if (is_right_aligned || is_contiguous)
3423+ if (is_contiguous)
32543424 {
32553425 mov_reg_param (a, inst.size (), src, srcp);
32563426 optimized = true ;
@@ -3260,25 +3430,6 @@ void drcbe_arm64::op_roland(a64::Assembler &a, const uml::instruction &inst)
32603430 {
32613431 a.mov (output, select_register (a64::xzr, inst.size ()));
32623432 }
3263- else if (is_right_aligned)
3264- {
3265- // Optimize a contiguous right-aligned mask
3266- const auto s2 = -int (s) & (instbits - 1 );
3267-
3268- if (s >= pop)
3269- {
3270- a.ubfx (output, src, s2, pop);
3271- }
3272- else if (s2 > 0 )
3273- {
3274- a.ror (output, src, s2);
3275- a.bfc (output, pop, instbits - pop);
3276- }
3277- else
3278- {
3279- a.and_ (output, src, ~maskp.immediate () & instmask);
3280- }
3281- }
32823433 else if (is_contiguous)
32833434 {
32843435 // Optimize a contiguous mask
0 commit comments