diff --git a/docs/source/techspecs/uml_instructions.rst b/docs/source/techspecs/uml_instructions.rst index f0137215242c0..449acf55c7c0a 100644 --- a/docs/source/techspecs/uml_instructions.rst +++ b/docs/source/techspecs/uml_instructions.rst @@ -3502,6 +3502,94 @@ Simplification rules * Immediate values for the ``count`` operand are truncated to five or six bits for 32-bit or 64-bit operands, respectively. +.. _umlinst-bfx: + +BFX +~~~ + +Extract a contiguous bit field from an integer value. + ++---------------------------------+-----------------------------------------------+ +| Disassembly | Usage | ++=================================+===============================================+ +| .. code-block:: | .. code-block:: C++ | +| | | +| bfxu dst,src,shift,width | UML_BFXU(block, dst, src, shift, width); | +| bfxs dst,src,shift,width | UML_BFXS(block, dst, src, shift, width); | +| dbfxu dst,src,shift,width | UML_DBFXU(block, dst, src, shift, width); | +| dbfxs dst,src,shift,width | UML_DBFXS(block, dst, src, shift, width); | ++---------------------------------+-----------------------------------------------+ + +Extracts and right-aligns a contiguous bit field from the value of +``src``, specified by its least significant bit position and width in +bits. The field must be narrower than the ``src`` operand, but it may +wrap around from the most significant bit position to the least +significant bit position. BFXU and DBFXU zero-extend an unsigned field, +while BFXS and DBFXS sign-extend a signed field. + +Back-ends may be able to optimise some forms of this instruction for +example when the ``shift`` and ``width`` operands are both immediate +values. + +Operands +^^^^^^^^ + +dst (32-bit or 64-bit – memory, integer register) + The destination where the extracted field will be stored. +src (32-bit or 64-bit – memory, integer register, immediate, map variable) + The value to extract a contiguous bit field from. +shift (32-bit or 64-bit – memory, integer register, immediate, map variable) + The position of the least significant bit of the field to extract, + where zero is the least significant bit position, and bit numbers + increase toward the most significant bit position. Only the least + significant five bits or six bits of this operand are used, + depending on the instruction size. +width (32-bit or 64-bit – memory, integer register, immediate, map variable) + The width of the field to extract in bits. Only the least + significant five bits or six bits of this operand are used, + depending on the instruction size. The result is undefined if the + width modulo the instruction size in bits is zero. + +Flags +^^^^^ + +carry (C) + Undefined. +overflow (V) + Undefined. +zero (Z) + Set if the result is zero, or cleared otherwise. +sign (S) + Set to the value of the most significant bit of the result (set if + the result is a negative signed integer value, or cleared + otherwise). +unordered (U) + Undefined. + +Simplification rules +^^^^^^^^^^^^^^^^^^^^ + +* Converted to :ref:`MOV `, :ref:`AND ` or + :ref:`OR ` if the ``src``, ``shift`` and ``width`` + operands are all immediate values, or if the ``width`` operand is the + immediate value zero. +* Converted to :ref:`SHR ` or :ref:`SAR ` if + the ``src`` operand is not an immediate value, the ``shift`` and + ``width`` operands are both immediate values, and the sum of the value + of the ``shift`` operand and the value of the ``width`` operand is + equal to the instruction size in bits. +* BFXU and DBFXU are converted to :ref:`AND ` if the + ``shift`` operand is the immediate value zero and ``width`` operand is + an immediate value. +* BFXS and DBFXS are converted to :ref:`SEXT ` if the + ``shift`` operand is the immediate value zero and ``width`` operand is + the immediate value 8, 16 or 32. +* Immediate values for the ``src`` operand are truncated to the + instruction size. +* Immediate values for the ``shift`` and ``width`` operands are + truncated to five or six bits for 32-bit or 64-bit operands, + respectively. + .. _umlinst-roland: ROLAND @@ -3572,10 +3660,10 @@ Simplification rules immediate value and the ``mask`` operand is an immediate value containing a single contiguous left-aligned sequence of set bits of the appropriate length for the value of the ``count`` operand. -* Converted to :ref:`SHR ` if the ``count`` operand is an - immediate value and the ``mask`` operand is an immediate value - containing a single contiguous right-aligned sequence of set bits of - the appropriate length for the value of the ``count`` operand. +* Converted to :ref:`SHR ` or :ref:`BFX ` if + the ``count`` operand is an immediate value and the ``mask`` operand + is an immediate value containing a single contiguous right-aligned + sequence of set bits. * Immediate values for the ``src`` and ``mask`` operands are truncated to the instruction size. * Immediate values for the ``count`` operand are truncated to five or diff --git a/src/devices/cpu/drcbearm64.cpp b/src/devices/cpu/drcbearm64.cpp index 39d32889dbb89..7795a73a12cb8 100644 --- a/src/devices/cpu/drcbearm64.cpp +++ b/src/devices/cpu/drcbearm64.cpp @@ -553,6 +553,8 @@ class drcbe_arm64 : public drcbe_interface void op_set(a64::Assembler &a, const uml::instruction &inst); void op_mov(a64::Assembler &a, const uml::instruction &inst); void op_sext(a64::Assembler &a, const uml::instruction &inst); + void op_bfxu(a64::Assembler &a, const uml::instruction &inst); + void op_bfxs(a64::Assembler &a, const uml::instruction &inst); void op_roland(a64::Assembler &a, const uml::instruction &inst); void op_rolins(a64::Assembler &a, const uml::instruction &inst); template void op_add(a64::Assembler &a, const uml::instruction &inst); @@ -710,8 +712,10 @@ inline void drcbe_arm64::generate_one(a64::Assembler &a, const uml::instruction case uml::OP_SET: op_set(a, inst); break; // SET dst,c case uml::OP_MOV: op_mov(a, inst); break; // MOV dst,src[,c] case uml::OP_SEXT: op_sext(a, inst); break; // SEXT dst,src - case uml::OP_ROLAND: op_roland(a, inst); break; // ROLAND dst,src1,src2,src3 - case uml::OP_ROLINS: op_rolins(a, inst); break; // ROLINS dst,src1,src2,src3 + case uml::OP_BFXU: op_bfxu(a, inst); break; // BFXU dst,src,shift,width + case uml::OP_BFXS: op_bfxs(a, inst); break; // BFXS dst,src,shift,width + case uml::OP_ROLAND: op_roland(a, inst); break; // ROLAND dst,src,count,mask + case uml::OP_ROLINS: op_rolins(a, inst); break; // ROLINS dst,src,count,mask case uml::OP_ADD: op_add(a, inst); break; // ADD dst,src1,src2[,f] case uml::OP_ADDC: op_add(a, inst); break; // ADDC dst,src1,src2[,f] case uml::OP_SUB: op_sub(a, inst); break; // SUB dst,src1,src2[,f] @@ -3223,6 +3227,173 @@ void drcbe_arm64::op_sext(a64::Assembler &a, const uml::instruction &inst) } } +void drcbe_arm64::op_bfxu(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_S | FLAG_Z); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + be_parameter shiftp(*this, inst.param(2), PTYPE_MRI); + be_parameter widthp(*this, inst.param(3), PTYPE_MRI); + + const a64::Gp output = dstp.select_register(TEMP_REG1, inst.size()); + const a64::Gp src = srcp.select_register(TEMP_REG2, inst.size()); + const a64::Inst::Id maskop = inst.flags() ? a64::Inst::kIdAnds : a64::Inst::kIdAnd; + const uint64_t instbits = inst.size() * 8; + + if (widthp.is_immediate_value(0)) + { + // undefined behaviour - do something + const a64::Gp zero = select_register(a64::xzr, inst.size()); + + if (inst.flags()) + a.ands(output, zero, zero); + else + a.mov(output, zero); + } + else if (widthp.is_immediate()) + { + const auto width(widthp.immediate() & (instbits - 1)); + const auto mask(util::make_bitmask(width)); + + mov_reg_param(a, inst.size(), src, srcp); + + if (shiftp.is_immediate()) + { + const auto shift(shiftp.immediate() & (instbits - 1)); + + if ((shift + width) <= instbits) + { + // contiguous bit field + a.ubfx(output, src, shift, width); + if (inst.flags()) + a.tst(output, output); + } + else + { + // bit field wraps from LSB to MSB + a.ror(output, src, shift); + a.emit(maskop, output, output, mask); + } + } + else + { + const a64::Gp shift = shiftp.select_register(TEMP_REG3, inst.size()); + + mov_reg_param(a, inst.size(), shift, shiftp); + + a.ror(output, src, shift); + a.emit(maskop, output, output, mask); + } + } + else + { + const a64::Gp width = (widthp != dstp) ? widthp.select_register(TEMP_REG3, inst.size()) : select_register(TEMP_REG3, inst.size()); + const a64::Gp temp = select_register(FUNC_SCRATCH_REG, inst.size()); + + mov_reg_param(a, inst.size(), width, widthp); + if (!shiftp.is_immediate()) + mov_reg_param(a, inst.size(), temp, shiftp); + mov_reg_param(a, inst.size(), src, srcp); + + if (shiftp.is_immediate()) + a.add(temp, width, shiftp.immediate() & (instbits - 1)); + else + a.add(temp, width, temp); + a.ror(output, src, temp); + a.neg(temp, width); + a.lsr(output, output, temp); + if (inst.flags()) + a.tst(output, output); + } + + mov_param_reg(a, inst.size(), dstp, output); +} + +void drcbe_arm64::op_bfxs(a64::Assembler &a, const uml::instruction &inst) +{ + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_S | FLAG_Z); + + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + be_parameter shiftp(*this, inst.param(2), PTYPE_MRI); + be_parameter widthp(*this, inst.param(3), PTYPE_MRI); + + const a64::Gp output = dstp.select_register(TEMP_REG1, inst.size()); + const a64::Gp src = srcp.select_register(TEMP_REG2, inst.size()); + const uint64_t instbits = inst.size() * 8; + + if (widthp.is_immediate_value(0)) + { + // undefined behaviour - do something + const a64::Gp zero = select_register(a64::xzr, inst.size()); + + if (inst.flags()) + a.ands(output, zero, zero); + else + a.mov(output, zero); + } + else if (widthp.is_immediate()) + { + const auto width(widthp.immediate() & (instbits - 1)); + + mov_reg_param(a, inst.size(), src, srcp); + + if (shiftp.is_immediate()) + { + const auto shift(shiftp.immediate() & (instbits - 1)); + + if ((shift + width) <= instbits) + { + // contiguous bit field + a.sbfx(output, src, shift, width); + } + else + { + // bit field wraps from LSB to MSB + a.ror(output, src, shift); + a.sbfx(output, output, 0, width); + } + } + else + { + const a64::Gp shift = shiftp.select_register(TEMP_REG3, inst.size()); + + mov_reg_param(a, inst.size(), shift, shiftp); + + a.ror(output, src, shift); + a.sbfx(output, output, 0, width); + } + } + else + { + const a64::Gp width = (widthp != dstp) ? widthp.select_register(TEMP_REG3, inst.size()) : select_register(TEMP_REG3, inst.size()); + const a64::Gp temp = select_register(FUNC_SCRATCH_REG, inst.size()); + + mov_reg_param(a, inst.size(), src, srcp); + if (!shiftp.is_immediate()) + mov_reg_param(a, inst.size(), temp, shiftp); + mov_reg_param(a, inst.size(), width, widthp); + + if (shiftp.is_immediate()) + a.add(temp, width, shiftp.immediate() & (instbits - 1)); + else + a.add(temp, width, temp); + a.ror(output, src, temp); + a.neg(temp, width); + a.asr(output, output, temp); + } + + mov_param_reg(a, inst.size(), dstp, output); + + if (inst.flags()) + a.tst(output, output); +} + void drcbe_arm64::op_roland(a64::Assembler &a, const uml::instruction &inst) { assert(inst.size() == 4 || inst.size() == 8); @@ -3246,11 +3417,10 @@ void drcbe_arm64::op_roland(a64::Assembler &a, const uml::instruction &inst) const auto pop = population_count_64(maskp.immediate()); const auto lz = count_leading_zeros_64(maskp.immediate()) & (instbits - 1); const auto invlamask = ~(maskp.immediate() << lz) & instmask; - const bool is_right_aligned = (maskp.immediate() & (maskp.immediate() + 1)) == 0; const bool is_contiguous = (invlamask & (invlamask + 1)) == 0; const auto s = shiftp.immediate() & (instbits - 1); - if (is_right_aligned || is_contiguous) + if (is_contiguous) { mov_reg_param(a, inst.size(), src, srcp); optimized = true; @@ -3260,25 +3430,6 @@ void drcbe_arm64::op_roland(a64::Assembler &a, const uml::instruction &inst) { a.mov(output, select_register(a64::xzr, inst.size())); } - else if (is_right_aligned) - { - // Optimize a contiguous right-aligned mask - const auto s2 = -int(s) & (instbits - 1); - - if (s >= pop) - { - a.ubfx(output, src, s2, pop); - } - else if (s2 > 0) - { - a.ror(output, src, s2); - a.bfc(output, pop, instbits - pop); - } - else - { - a.and_(output, src, ~maskp.immediate() & instmask); - } - } else if (is_contiguous) { // Optimize a contiguous mask diff --git a/src/devices/cpu/drcbec.cpp b/src/devices/cpu/drcbec.cpp index fb409593bb5fe..51e3caf656a0b 100644 --- a/src/devices/cpu/drcbec.cpp +++ b/src/devices/cpu/drcbec.cpp @@ -1116,21 +1116,41 @@ int drcbe_c::execute(code_handle &entry) break; case MAKE_OPCODE_SHORT(OP_SEXT1, 4, 0): // SEXT1 dst,src - PARAM0 = (int8_t)PARAM1; + PARAM0 = int8_t(uint8_t(PARAM1)); break; case MAKE_OPCODE_SHORT(OP_SEXT1, 4, 1): - temp32 = (int8_t)PARAM1; + temp32 = int8_t(uint8_t(PARAM1)); flags = FLAGS32_NZ(temp32); PARAM0 = temp32; break; case MAKE_OPCODE_SHORT(OP_SEXT2, 4, 0): // SEXT2 dst,src - PARAM0 = (int16_t)PARAM1; + PARAM0 = int16_t(uint16_t(PARAM1)); break; case MAKE_OPCODE_SHORT(OP_SEXT2, 4, 1): - temp32 = (int16_t)PARAM1; + temp32 = int16_t(uint16_t(PARAM1)); + flags = FLAGS32_NZ(temp32); + PARAM0 = temp32; + break; + + case MAKE_OPCODE_SHORT(OP_BFXU, 4, 0): // BFXU dst,src,shift,width[,f] + PARAM0 = rotr_32(PARAM1, PARAM2 + PARAM3) >> (-int32_t(PARAM3) & 0x1f); + break; + + case MAKE_OPCODE_SHORT(OP_BFXU, 4, 1): + temp32 = rotr_32(PARAM1, PARAM2 + PARAM3) >> (-int32_t(PARAM3) & 0x1f); + flags = FLAGS32_NZ(temp32); + PARAM0 = temp32; + break; + + case MAKE_OPCODE_SHORT(OP_BFXS, 4, 0): // BFXS dst,src,shift,width[,f] + PARAM0 = uint32_t(int32_t(rotr_32(PARAM1, PARAM2 + PARAM3)) >> (-int32_t(PARAM3) & 0x1f)); + break; + + case MAKE_OPCODE_SHORT(OP_BFXS, 4, 1): + temp32 = uint32_t(int32_t(rotr_32(PARAM1, PARAM2 + PARAM3)) >> (-int32_t(PARAM3) & 0x1f)); flags = FLAGS32_NZ(temp32); PARAM0 = temp32; break; @@ -1767,31 +1787,51 @@ int drcbe_c::execute(code_handle &entry) break; case MAKE_OPCODE_SHORT(OP_SEXT1, 8, 0): // DSEXT dst,src,BYTE - DPARAM0 = (int8_t)PARAM1; + DPARAM0 = int8_t(uint8_t(PARAM1)); break; case MAKE_OPCODE_SHORT(OP_SEXT1, 8, 1): - temp64 = (int8_t)PARAM1; + temp64 = int8_t(uint8_t(PARAM1)); flags = FLAGS64_NZ(temp64); DPARAM0 = temp64; break; case MAKE_OPCODE_SHORT(OP_SEXT2, 8, 0): // DSEXT dst,src,WORD - DPARAM0 = (int16_t)PARAM1; + DPARAM0 = int16_t(uint16_t(PARAM1)); break; case MAKE_OPCODE_SHORT(OP_SEXT2, 8, 1): - temp64 = (int16_t)PARAM1; + temp64 = int16_t(uint16_t(PARAM1)); flags = FLAGS64_NZ(temp64); DPARAM0 = temp64; break; case MAKE_OPCODE_SHORT(OP_SEXT4, 8, 0): // DSEXT dst,src,DWORD - DPARAM0 = (int32_t)PARAM1; + DPARAM0 = int32_t(uint32_t(PARAM1)); break; case MAKE_OPCODE_SHORT(OP_SEXT4, 8, 1): - temp64 = (int32_t)PARAM1; + temp64 = int32_t(uint32_t(PARAM1)); + flags = FLAGS64_NZ(temp64); + DPARAM0 = temp64; + break; + + case MAKE_OPCODE_SHORT(OP_BFXU, 8, 0): // BFXU dst,src,shift,width[,f] + DPARAM0 = rotr_64(DPARAM1, DPARAM2 + DPARAM3) >> (-int64_t(DPARAM3) & 0x3f); + break; + + case MAKE_OPCODE_SHORT(OP_BFXU, 8, 1): + temp64 = rotr_64(DPARAM1, DPARAM2 + DPARAM3) >> (-int64_t(DPARAM3) & 0x3f); + flags = FLAGS64_NZ(temp64); + DPARAM0 = temp64; + break; + + case MAKE_OPCODE_SHORT(OP_BFXS, 8, 0): // BFXS dst,src,shift,width[,f] + DPARAM0 = uint64_t(int64_t(rotr_64(DPARAM1, DPARAM2 + DPARAM3)) >> (-int64_t(DPARAM3) & 0x3f)); + break; + + case MAKE_OPCODE_SHORT(OP_BFXS, 8, 1): + temp64 = uint64_t(int64_t(rotr_64(DPARAM1, DPARAM2 + DPARAM3)) >> (-int64_t(DPARAM3) & 0x3f)); flags = FLAGS64_NZ(temp64); DPARAM0 = temp64; break; diff --git a/src/devices/cpu/drcbex64.cpp b/src/devices/cpu/drcbex64.cpp index d14de0a0c80af..b916919aa918b 100644 --- a/src/devices/cpu/drcbex64.cpp +++ b/src/devices/cpu/drcbex64.cpp @@ -244,15 +244,15 @@ using namespace asmjit::x86; // CONSTANTS //************************************************************************** -const uint32_t PTYPE_M = 1 << parameter::PTYPE_MEMORY; -const uint32_t PTYPE_I = 1 << parameter::PTYPE_IMMEDIATE; -const uint32_t PTYPE_R = 1 << parameter::PTYPE_INT_REGISTER; -const uint32_t PTYPE_F = 1 << parameter::PTYPE_FLOAT_REGISTER; -//const uint32_t PTYPE_MI = PTYPE_M | PTYPE_I; -//const uint32_t PTYPE_RI = PTYPE_R | PTYPE_I; -const uint32_t PTYPE_MR = PTYPE_M | PTYPE_R; -const uint32_t PTYPE_MRI = PTYPE_M | PTYPE_R | PTYPE_I; -const uint32_t PTYPE_MF = PTYPE_M | PTYPE_F; +const u32 PTYPE_M = 1 << parameter::PTYPE_MEMORY; +const u32 PTYPE_I = 1 << parameter::PTYPE_IMMEDIATE; +const u32 PTYPE_R = 1 << parameter::PTYPE_INT_REGISTER; +const u32 PTYPE_F = 1 << parameter::PTYPE_FLOAT_REGISTER; +//const u32 PTYPE_MI = PTYPE_M | PTYPE_I; +//const u32 PTYPE_RI = PTYPE_R | PTYPE_I; +const u32 PTYPE_MR = PTYPE_M | PTYPE_R; +const u32 PTYPE_MRI = PTYPE_M | PTYPE_R | PTYPE_I; +const u32 PTYPE_MF = PTYPE_M | PTYPE_F; #ifdef _WIN32 @@ -283,7 +283,7 @@ const Gp::Id int_register_map[REG_I_COUNT] = #endif }; -uint32_t float_register_map[REG_F_COUNT] = +u32 float_register_map[REG_F_COUNT] = { #ifdef _WIN32 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 @@ -313,7 +313,7 @@ const CondCode condition_map[uml::COND_MAX - uml::COND_Z] = #if 0 // rounding mode mapping table -const uint8_t fprnd_map[4] = +const u8 fprnd_map[4] = { FPRND_CHOP, // ROUND_TRUNC, truncate FPRND_NEAR, // ROUND_ROUND, round @@ -323,7 +323,7 @@ const uint8_t fprnd_map[4] = #endif // size-to-mask table -//const uint64_t size_to_mask[] = { 0, 0xff, 0xffff, 0, 0xffffffff, 0, 0, 0, 0xffffffffffffffffU }; +//const u64 size_to_mask[] = { 0, 0xff, 0xffff, 0, 0xffffffff, 0, 0, 0, 0xffffffffffffffffU }; @@ -369,18 +369,18 @@ inline bool is_nonvolatile_register(Gp reg) class drcbe_x64 : public drcbe_interface { - using x86_entry_point_func = uint32_t (*)(uint8_t *rbpvalue, x86code *entry); + using x86_entry_point_func = u32 (*)(u8 *rbpvalue, x86code *entry); public: // construction/destruction - drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, uint32_t flags, int modes, int addrbits, int ignorebits); + drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, u32 flags, int modes, int addrbits, int ignorebits); virtual ~drcbe_x64(); // required overrides virtual void reset() override; virtual int execute(uml::code_handle &entry) override; - virtual void generate(drcuml_block &block, const uml::instruction *instlist, uint32_t numinst) override; - virtual bool hash_exists(uint32_t mode, uint32_t pc) const noexcept override; + virtual void generate(drcuml_block &block, const uml::instruction *instlist, u32 numinst) override; + virtual bool hash_exists(u32 mode, u32 pc) const noexcept override; virtual void get_info(drcbe_info &info) const noexcept override; virtual bool logging() const noexcept override { return bool(m_log); } @@ -401,12 +401,12 @@ class drcbe_x64 : public drcbe_interface }; // represents the value of a parameter - typedef uint64_t be_parameter_value; + typedef u64 be_parameter_value; // construction be_parameter() : m_type(PTYPE_NONE), m_value(0), m_coldreg(false) { } - be_parameter(uint64_t val) : m_type(PTYPE_IMMEDIATE), m_value(val), m_coldreg(false) { } - be_parameter(drcbe_x64 &drcbe, const uml::parameter ¶m, uint32_t allowed); + be_parameter(u64 val) : m_type(PTYPE_IMMEDIATE), m_value(val), m_coldreg(false) { } + be_parameter(drcbe_x64 &drcbe, const uml::parameter ¶m, u32 allowed); be_parameter(const be_parameter ¶m) = default; // creators for types that don't safely default @@ -421,9 +421,9 @@ class drcbe_x64 : public drcbe_interface // getters be_parameter_type type() const { return m_type; } - uint64_t immediate() const { assert(m_type == PTYPE_IMMEDIATE); return m_value; } - uint32_t ireg() const { assert(m_type == PTYPE_INT_REGISTER); assert(m_value < REG_MAX); return m_value; } - uint32_t freg() const { assert(m_type == PTYPE_FLOAT_REGISTER); assert(m_value < REG_MAX); return m_value; } + u64 immediate() const { assert(m_type == PTYPE_IMMEDIATE); return m_value; } + u32 ireg() const { assert(m_type == PTYPE_INT_REGISTER); assert(m_value < REG_MAX); return m_value; } + u32 freg() const { assert(m_type == PTYPE_FLOAT_REGISTER); assert(m_value < REG_MAX); return m_value; } void *memory() const { assert(m_type == PTYPE_MEMORY); return reinterpret_cast(m_value); } // type queries @@ -433,7 +433,7 @@ class drcbe_x64 : public drcbe_interface bool is_memory() const { return (m_type == PTYPE_MEMORY); } // other queries - bool is_immediate_value(uint64_t value) const { return (m_type == PTYPE_IMMEDIATE && m_value == value); } + bool is_immediate_value(u64 value) const { return (m_type == PTYPE_IMMEDIATE && m_value == value); } bool is_cold_register() const { return m_coldreg; } // helpers @@ -462,16 +462,16 @@ class drcbe_x64 : public drcbe_interface x86code * debug_log_hashjmp; // hashjmp debugging x86code * debug_log_hashjmp_fail; // hashjmp debugging - uint32_t ssemode; // saved SSE mode - uint32_t ssemodesave; // temporary location for saving - uint32_t ssecontrol[4]; // copy of the sse_control array + u32 ssemode; // saved SSE mode + u32 ssemodesave; // temporary location for saving + u32 ssecontrol[4]; // copy of the sse_control array float single1; // 1.0 in single-precision double double1; // 1.0 in double-precision void * stacksave; // saved stack pointer - uint8_t flagsmap[0x100]; // flags map - uint16_t flagsunmap[0x20]; // flags unmapper + u8 flagsmap[0x100]; // x86 flags to UML flags table + u16 flagsunmap[0x20]; // UML flags to x86 flags table }; // resolved memory handler functions @@ -486,12 +486,12 @@ class drcbe_x64 : public drcbe_interface }; // helpers - Mem MABS(const void *ptr, const uint32_t size = 0) const { return Mem(rbp, offset_from_rbp(ptr), size); } - bool short_immediate(int64_t immediate) const { return (int32_t)immediate == immediate; } + Mem MABS(const void *ptr, const u32 size = 0) const { return Mem(rbp, offset_from_rbp(ptr), size); } + bool short_immediate(s64 immediate) const { return s32(immediate) == immediate; } void normalize_commutative(be_parameter &inner, be_parameter &outer); void normalize_commutative(const be_parameter &dst, be_parameter &inner, be_parameter &outer); - int32_t offset_from_rbp(const void *ptr) const; - Gp get_base_register_and_offset(Assembler &a, void *target, Gp const ®, int32_t &offset); + s32 offset_from_rbp(const void *ptr) const; + Gp get_base_register_and_offset(Assembler &a, void *target, Gp const ®, s32 &offset); void smart_call_r64(Assembler &a, x86code *target, Gp const ®) const; void smart_call_m64(Assembler &a, x86code **target) const; void emit_memaccess_setup(Assembler &a, const memory_accessors &accessors, const address_space::specific_access_info::side &side) const; @@ -540,6 +540,8 @@ class drcbe_x64 : public drcbe_interface void op_set(Assembler &a, const uml::instruction &inst); void op_mov(Assembler &a, const uml::instruction &inst); void op_sext(Assembler &a, const uml::instruction &inst); + void op_bfxu(Assembler &a, const uml::instruction &inst); + void op_bfxs(Assembler &a, const uml::instruction &inst); void op_roland(Assembler &a, const uml::instruction &inst); void op_rolins(Assembler &a, const uml::instruction &inst); void op_add(Assembler &a, const uml::instruction &inst); @@ -597,7 +599,7 @@ class drcbe_x64 : public drcbe_interface // special-case move helpers void movsx_r64_p32(Assembler &a, Gp const ®, be_parameter const ¶m); - void mov_r64_imm(Assembler &a, Gp const ®, uint64_t const imm) const; + void mov_r64_imm(Assembler &a, Gp const ®, u64 const imm) const; // floating-point helpers void movss_r128_p32(Assembler &a, Vec const ®, be_parameter const ¶m); @@ -615,10 +617,12 @@ class drcbe_x64 : public drcbe_interface drc_map_variables m_map; // code map x86log_context::ptr m_log; // logging FILE * m_log_asmjit; + bool m_lzcnt; // do we have lzcnt support? + bool m_bmi; // do we have BMI support? - uint32_t * m_absmask32; // absolute value mask (32-bit) - uint64_t * m_absmask64; // absolute value mask (32-bit) - uint8_t * m_rbpvalue; // value of RBP + u32 * m_absmask32; // absolute value mask (32-bit) + u64 * m_absmask64; // absolute value mask (32-bit) + u8 * m_rbpvalue; // value of RBP x86_entry_point_func m_entry; // entry point x86code * m_exit; // exit point @@ -683,6 +687,8 @@ inline void drcbe_x64::generate_one(Assembler &a, const uml::instruction &inst) case uml::OP_SET: op_set(a, inst); break; // SET dst,c case uml::OP_MOV: op_mov(a, inst); break; // MOV dst,src[,c] case uml::OP_SEXT: op_sext(a, inst); break; // SEXT dst,src + case uml::OP_BFXU: op_bfxu(a, inst); break; // BFXU dst,src1,src2,src3 + case uml::OP_BFXS: op_bfxs(a, inst); break; // BFXS dst,src1,src2,src3 case uml::OP_ROLAND: op_roland(a, inst); break; // ROLAND dst,src1,src2,src3 case uml::OP_ROLINS: op_rolins(a, inst); break; // ROLINS dst,src1,src2,src3 case uml::OP_ADD: op_add(a, inst); break; // ADD dst,src1,src2[,f] @@ -748,7 +754,7 @@ inline void drcbe_x64::generate_one(Assembler &a, const uml::instruction &inst) // into a reduced set //------------------------------------------------- -drcbe_x64::be_parameter::be_parameter(drcbe_x64 &drcbe, const parameter ¶m, uint32_t allowed) +drcbe_x64::be_parameter::be_parameter(drcbe_x64 &drcbe, const parameter ¶m, u32 allowed) { int regnum; @@ -879,12 +885,12 @@ inline void drcbe_x64::normalize_commutative(const be_parameter &dst, be_paramet // from rbp //------------------------------------------------- -inline int32_t drcbe_x64::offset_from_rbp(const void *ptr) const +inline s32 drcbe_x64::offset_from_rbp(const void *ptr) const { - const int64_t delta = reinterpret_cast(ptr) - m_rbpvalue; - if (int32_t(delta) != delta) + const s64 delta = reinterpret_cast(ptr) - m_rbpvalue; + if (s32(delta) != delta) throw emu_fatalerror("drcbe_x64::offset_from_rbp: delta out of range"); - return int32_t(delta); + return s32(delta); } @@ -894,9 +900,9 @@ inline int32_t drcbe_x64::offset_from_rbp(const void *ptr) const // target address //------------------------------------------------- -inline Gp drcbe_x64::get_base_register_and_offset(Assembler &a, void *target, Gp const ®, int32_t &offset) +inline Gp drcbe_x64::get_base_register_and_offset(Assembler &a, void *target, Gp const ®, s32 &offset) { - const int64_t delta = reinterpret_cast(target) - m_rbpvalue; + const s64 delta = reinterpret_cast(target) - m_rbpvalue; if (short_immediate(delta)) { offset = delta; @@ -918,7 +924,7 @@ inline Gp drcbe_x64::get_base_register_and_offset(Assembler &a, void *target, Gp inline void drcbe_x64::smart_call_r64(Assembler &a, x86code *target, Gp const ®) const { - const int64_t delta = target - (x86code *)(a.code()->base_address() + a.offset() + 5); + const s64 delta = target - (x86code *)(a.code()->base_address() + a.offset() + 5); if (short_immediate(delta)) a.call(imm(target)); // call target else @@ -936,7 +942,7 @@ inline void drcbe_x64::smart_call_r64(Assembler &a, x86code *target, Gp const &r inline void drcbe_x64::smart_call_m64(Assembler &a, x86code **target) const { - const int64_t delta = *target - (x86code *)(a.code()->base_address() + a.offset() + 5); + const s64 delta = *target - (x86code *)(a.code()->base_address() + a.offset() + 5); if (short_immediate(delta)) a.call(imm(*target)); // call *target else @@ -1001,12 +1007,14 @@ void drcbe_x64::emit_memaccess_setup(Assembler &a, const memory_accessors &acces // drcbe_x64 - constructor //------------------------------------------------- -drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, uint32_t flags, int modes, int addrbits, int ignorebits) +drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, u32 flags, int modes, int addrbits, int ignorebits) : drcbe_interface(drcuml, cache, device) , m_hash(cache, modes, addrbits, ignorebits) , m_map(cache, 0xaaaaaaaa5555) , m_log_asmjit(nullptr) - , m_absmask32((uint32_t *)cache.alloc_near(16*2 + 15)) + , m_lzcnt(false) + , m_bmi(false) + , m_absmask32((u32 *)cache.alloc_near(16*2 + 15)) , m_absmask64(nullptr) , m_rbpvalue(cache.near() + 0x80) , m_entry(nullptr) @@ -1015,8 +1023,13 @@ drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, u , m_endofblock(nullptr) , m_near(*(near_state *)cache.alloc_near(sizeof(m_near))) { + // check for optional CPU features + const auto &x86_features = CpuInfo::host().features().x86(); + m_lzcnt = x86_features.has_lzcnt(); + m_bmi = x86_features.has_bmi(); + // build up necessary arrays - static const uint32_t sse_control[4] = + constexpr u32 sse_control[4] = { 0xff80, // ROUND_TRUNC 0x9f80, // ROUND_ROUND @@ -1024,13 +1037,13 @@ drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, u 0xbf80 // ROUND_FLOOR }; memcpy(m_near.ssecontrol, sse_control, sizeof(m_near.ssecontrol)); - m_near.single1 = 1.0f; + m_near.single1 = 1.0F; m_near.double1 = 1.0; // create absolute value masks that are aligned to SSE boundaries - m_absmask32 = (uint32_t *)(((uintptr_t)m_absmask32 + 15) & ~15); + m_absmask32 = (u32 *)(((uintptr_t)m_absmask32 + 15) & ~15); m_absmask32[0] = m_absmask32[1] = m_absmask32[2] = m_absmask32[3] = 0x7fffffff; - m_absmask64 = (uint64_t *)&m_absmask32[4]; + m_absmask64 = (u64 *)&m_absmask32[4]; m_absmask64[0] = m_absmask64[1] = 0x7fffffffffffffffU; // get pointers to C functions we need to call @@ -1043,7 +1056,7 @@ drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, u // build the flags map for (int entry = 0; entry < std::size(m_near.flagsmap); entry++) { - uint8_t flags = 0; + u8 flags = 0; if (entry & 0x001) flags |= FLAG_C; if (entry & 0x004) flags |= FLAG_U; if (entry & 0x040) flags |= FLAG_Z; @@ -1053,7 +1066,7 @@ drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, u } for (int entry = 0; entry < std::size(m_near.flagsunmap); entry++) { - uint16_t flags = 0; + u16 flags = 0; if (entry & FLAG_C) flags |= 0x001 << 8; if (entry & FLAG_U) flags |= 0x004 << 8; if (entry & FLAG_Z) flags |= 0x040 << 8; @@ -1127,12 +1140,12 @@ size_t drcbe_x64::emit(CodeHolder &ch) throw emu_fatalerror("asmjit::CodeHolder::relocate_to_base() error %u", std::underlying_type_t(err)); } - size_t const alignment = ch.base_address() - uint64_t(m_cache.top()); + size_t const alignment = ch.base_address() - uintptr_t(m_cache.top()); size_t const code_size = ch.code_size(); // test if enough room remains in drc cache drccodeptr *cachetop = m_cache.begin_codegen(alignment + code_size); - if (cachetop == nullptr) + if (!cachetop) return 0; err = ch.copy_flattened_data(drccodeptr(ch.base_address()), code_size, CopySectionFlags::kPadTargetBuffer); @@ -1160,7 +1173,7 @@ void drcbe_x64::reset() x86code *dst = (x86code *)m_cache.top(); CodeHolder ch; - ch.init(Environment::host(), uint64_t(dst)); + ch.init(Environment::host(), u64(dst)); FileLogger logger(m_log_asmjit); if (logger.file()) @@ -1179,7 +1192,7 @@ void drcbe_x64::reset() a.bind(a.new_named_label("entry_point")); FuncDetail entry_point; - entry_point.init(FuncSignature::build(CallConvId::kCDecl), Environment::host()); + entry_point.init(FuncSignature::build(CallConvId::kCDecl), Environment::host()); FuncFrame frame; frame.init(entry_point); @@ -1251,7 +1264,7 @@ int drcbe_x64::execute(code_handle &entry) // drcbex64_generate - generate code //------------------------------------------------- -void drcbe_x64::generate(drcuml_block &block, const instruction *instlist, uint32_t numinst) +void drcbe_x64::generate(drcuml_block &block, const instruction *instlist, u32 numinst) { // do this here because device.debug() isn't initialised at construction time if (!m_debug_cpu_instruction_hook && (m_device.machine().debug_flags & DEBUG_FLAG_ENABLED)) @@ -1282,7 +1295,7 @@ void drcbe_x64::generate(drcuml_block &block, const instruction *instlist, uint3 x86code *dst = (x86code *)(uintptr_t(m_cache.top() + linemask) & ~linemask); CodeHolder ch; - ch.init(Environment::host(), uint64_t(dst)); + ch.init(Environment::host(), u64(dst)); ThrowableErrorHandler e; ch.set_error_handler(&e); @@ -1323,7 +1336,7 @@ void drcbe_x64::generate(drcuml_block &block, const instruction *instlist, uint3 if (inst.opcode() == OP_HANDLE) blockname = inst.param(0).handle().string(); else if (inst.opcode() == OP_HASH) - blockname = string_format("Code: mode=%d PC=%08X", (uint32_t)inst.param(0).immediate(), (offs_t)inst.param(1).immediate()); + blockname = string_format("Code: mode=%d PC=%08X", (u32)inst.param(0).immediate(), (offs_t)inst.param(1).immediate()); } // generate code @@ -1358,7 +1371,7 @@ void drcbe_x64::generate(drcuml_block &block, const instruction *instlist, uint3 // exists in the hash table //------------------------------------------------- -bool drcbe_x64::hash_exists(uint32_t mode, uint32_t pc) const noexcept +bool drcbe_x64::hash_exists(u32 mode, u32 pc) const noexcept { return m_hash.code_exists(mode, pc); } @@ -1506,7 +1519,7 @@ void drcbe_x64::shift_op_param(Assembler &a, Inst::Id const opcode, size_t opsiz if (param.is_immediate()) { - const uint32_t bitshift = param.immediate() & (opsize * 8 - 1); + const u32 bitshift = param.immediate() & (opsize * 8 - 1); if (bitshift) a.emit(opcode, dst, imm(param.immediate())); @@ -1523,7 +1536,7 @@ void drcbe_x64::shift_op_param(Assembler &a, Inst::Id const opcode, size_t opsiz if (!rotate || (update_flags & (FLAG_S | FLAG_Z))) { if (dst.is_mem()) - a.test(dst.as(), util::make_bitmask(opsize * 8)); + a.test(dst.as(), util::make_bitmask(opsize * 8)); else a.test(dst.as(), dst.as()); } @@ -1553,7 +1566,7 @@ void drcbe_x64::shift_op_param(Assembler &a, Inst::Id const opcode, size_t opsiz if (update_flags & (FLAG_S | FLAG_Z)) { if (dst.is_mem()) - a.test(dst.as(), util::make_bitmask(opsize * 8)); + a.test(dst.as(), util::make_bitmask(opsize * 8)); else a.test(dst.as(), dst.as()); } @@ -1583,7 +1596,7 @@ void drcbe_x64::shift_op_param(Assembler &a, Inst::Id const opcode, size_t opsiz a.rcl(r10b, 1); // save carry if (dst.is_mem()) - a.test(dst.as(), util::make_bitmask(opsize * 8)); + a.test(dst.as(), util::make_bitmask(opsize * 8)); else a.test(dst.as(), dst.as()); @@ -1670,10 +1683,10 @@ void drcbe_x64::movsx_r64_p32(Assembler &a, Gp const ®, be_parameter const &p { if (param.is_immediate()) { - if ((int32_t)param.immediate() >= 0) + if (s32(param.immediate()) >= 0) a.mov(reg.r32(), param.immediate()); // mov reg,param else - mov_r64_imm(a, reg, int32_t(param.immediate())); // mov reg,param + mov_r64_imm(a, reg, s32(param.immediate())); // mov reg,param } else if (param.is_memory()) a.movsxd(reg, MABS(param.memory())); // movsxd reg,[param] @@ -1681,7 +1694,7 @@ void drcbe_x64::movsx_r64_p32(Assembler &a, Gp const ®, be_parameter const &p a.movsxd(reg, gpd(param.ireg())); // movsxd reg,param } -void drcbe_x64::mov_r64_imm(Assembler &a, Gp const ®, uint64_t const imm) const +void drcbe_x64::mov_r64_imm(Assembler &a, Gp const ®, u64 const imm) const { if (s32(u32(imm)) == s64(imm)) { @@ -1693,7 +1706,7 @@ void drcbe_x64::mov_r64_imm(Assembler &a, Gp const ®, uint64_t const imm) con } else { - const int64_t delta = imm - (a.code()->base_address() + a.offset() + 7); + const s64 delta = imm - (a.code()->base_address() + a.offset() + 7); if (short_immediate(delta)) a.lea(reg.r64(), ptr(rip, delta)); else @@ -2036,8 +2049,8 @@ void drcbe_x64::op_hashjmp(Assembler &a, const instruction &inst) if (pcp.is_immediate()) { // a straight immediate jump is direct, though we need the PC in EAX in case of failure - uint32_t l1val = (pcp.immediate() >> m_hash.l1shift()) & m_hash.l1mask(); - uint32_t l2val = (pcp.immediate() >> m_hash.l2shift()) & m_hash.l2mask(); + u32 l1val = (pcp.immediate() >> m_hash.l1shift()) & m_hash.l1mask(); + u32 l2val = (pcp.immediate() >> m_hash.l2shift()) & m_hash.l2mask(); a.short_().lea(gpq(REG_PARAM1), ptr(nocode)); // lea rcx,[rip+nocode] a.jmp(MABS(&m_hash.base()[modep.immediate()][l1val][l2val])); // jmp hash[modep][l1val][l2val] } @@ -2064,8 +2077,8 @@ void drcbe_x64::op_hashjmp(Assembler &a, const instruction &inst) if (pcp.is_immediate()) { // fixed PC - uint32_t l1val = (pcp.immediate() >> m_hash.l1shift()) & m_hash.l1mask(); - uint32_t l2val = (pcp.immediate() >> m_hash.l2shift()) & m_hash.l2mask(); + u32 l1val = (pcp.immediate() >> m_hash.l1shift()) & m_hash.l1mask(); + u32 l2val = (pcp.immediate() >> m_hash.l2shift()) & m_hash.l2mask(); a.mov(rdx, ptr(rcx, l1val * 8)); // mov rdx,[rcx+l1val*8] a.short_().lea(gpq(REG_PARAM1), ptr(nocode)); // lea rcx,[rip+nocode] a.jmp(ptr(rdx, l2val * 8)); // jmp [l2val*8] @@ -2391,7 +2404,7 @@ void drcbe_x64::op_getflgs(Assembler &a, const instruction &inst) // pick a target register for the general case Gp dstreg = dstp.select_register(edx); - uint32_t flagmask = 0; + u32 flagmask = 0; switch (maskp.immediate()) { @@ -2567,7 +2580,7 @@ void drcbe_x64::op_setflgs(Assembler &a, const instruction &inst) if (srcp.is_immediate()) { - uint32_t const flags = m_near.flagsunmap[srcp.immediate() & FLAGS_ALL]; + u32 const flags = m_near.flagsunmap[srcp.immediate() & FLAGS_ALL]; if (!flags) a.xor_(eax, eax); else @@ -2749,7 +2762,7 @@ void drcbe_x64::op_load(Assembler &a, const instruction &inst) int size = scalesizep.size(); // determine the pointer base - int32_t baseoffs; + s32 baseoffs; const Gp basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs); // pick a target register for the general case @@ -2761,11 +2774,11 @@ void drcbe_x64::op_load(Assembler &a, const instruction &inst) ptrdiff_t const offset = baseoffs + (ptrdiff_t(s32(u32(indp.immediate()))) << scalesizep.scale()); if (size == SIZE_BYTE) - a.movzx(dstreg, byte_ptr(basereg, offset)); + a.movzx(dstreg.r32(), byte_ptr(basereg, offset)); else if (size == SIZE_WORD) - a.movzx(dstreg, word_ptr(basereg, offset)); + a.movzx(dstreg.r32(), word_ptr(basereg, offset)); else if (size == SIZE_DWORD) - a.mov(dstreg, dword_ptr(basereg, offset)); + a.mov(dstreg.r32(), dword_ptr(basereg, offset)); else if (size == SIZE_QWORD) a.mov(dstreg, ptr(basereg, offset)); } @@ -2775,11 +2788,11 @@ void drcbe_x64::op_load(Assembler &a, const instruction &inst) const Gp indreg = rcx; movsx_r64_p32(a, indreg, indp); if (size == SIZE_BYTE) - a.movzx(dstreg, byte_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); + a.movzx(dstreg.r32(), byte_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); else if (size == SIZE_WORD) - a.movzx(dstreg, word_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); + a.movzx(dstreg.r32(), word_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); else if (size == SIZE_DWORD) - a.mov(dstreg, dword_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); + a.mov(dstreg.r32(), dword_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); else if (size == SIZE_QWORD) a.mov(dstreg, ptr(basereg, indreg, scalesizep.scale(), baseoffs)); } @@ -2809,7 +2822,7 @@ void drcbe_x64::op_loads(Assembler &a, const instruction &inst) int size = scalesizep.size(); // determine the pointer base - int32_t baseoffs; + s32 baseoffs; const Gp basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs); // pick a target register for the general case @@ -2872,7 +2885,7 @@ void drcbe_x64::op_store(Assembler &a, const instruction &inst) int size = scalesizep.size(); // determine the pointer base - int32_t baseoffs; + s32 baseoffs; const Gp basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs); // pick a source register for the general case @@ -2996,9 +3009,9 @@ void drcbe_x64::op_read(Assembler &a, const instruction &inst) { // set default mem_mask if (accessors.specific.native_bytes <= 4) - a.mov(gpd(REG_PARAM3), make_bitmask(accessors.specific.native_bytes << 3)); + a.mov(gpd(REG_PARAM3), make_bitmask(accessors.specific.native_bytes << 3)); else - a.mov(gpq(REG_PARAM3), make_bitmask(accessors.specific.native_bytes << 3)); + a.mov(gpq(REG_PARAM3), make_bitmask(accessors.specific.native_bytes << 3)); emit_memaccess_setup(a, accessors, accessors.specific.read); // get dispatch table entry } @@ -3010,9 +3023,9 @@ void drcbe_x64::op_read(Assembler &a, const instruction &inst) a.mov(ptr(rsp, 32), gpq(int_register_map[0])); // save I0 register if ((accessors.specific.native_bytes <= 4) || (spacesizep.size() != SIZE_QWORD)) - a.mov(gpd(REG_PARAM3), imm(make_bitmask(8 << spacesizep.size()))); // set default mem_mask + a.mov(gpd(REG_PARAM3), imm(make_bitmask(8 << spacesizep.size()))); // set default mem_mask else - a.mov(gpq(REG_PARAM3), imm(make_bitmask(8 << spacesizep.size()))); // set default mem_mask + a.mov(gpq(REG_PARAM3), imm(make_bitmask(8 << spacesizep.size()))); // set default mem_mask a.mov(ecx, gpd(REG_PARAM2)); // copy address for bit offset if (accessors.has_high_bits && !accessors.mask_high_bits) @@ -3327,9 +3340,9 @@ void drcbe_x64::op_write(Assembler &a, const instruction &inst) { // set default mem_mask if (accessors.specific.native_bytes <= 4) - a.mov(gpd(REG_PARAM4), make_bitmask(accessors.specific.native_bytes << 3)); + a.mov(gpd(REG_PARAM4), make_bitmask(accessors.specific.native_bytes << 3)); else - a.mov(gpq(REG_PARAM4), make_bitmask(accessors.specific.native_bytes << 3)); + a.mov(gpq(REG_PARAM4), make_bitmask(accessors.specific.native_bytes << 3)); emit_memaccess_setup(a, accessors, accessors.specific.write); } @@ -3373,9 +3386,9 @@ void drcbe_x64::op_write(Assembler &a, const instruction &inst) } a.and_(ecx, imm((accessors.specific.native_bytes - (1 << spacesizep.size())) << 3)); // mask bit address if ((accessors.specific.native_bytes <= 4) || (spacesizep.size() != SIZE_QWORD)) - a.mov(r11d, imm(make_bitmask(8 << spacesizep.size()))); // set default mem_mask + a.mov(r11d, imm(make_bitmask(8 << spacesizep.size()))); // set default mem_mask else - a.mov(r11, imm(make_bitmask(8 << spacesizep.size()))); // set default mem_mask + a.mov(r11, imm(make_bitmask(8 << spacesizep.size()))); // set default mem_mask if (accessors.has_high_bits && accessors.mask_high_bits && accessors.specific.low_bits) a.mov(gpd(REG_PARAM2), r10d); // restore masked address if (accessors.specific.write.is_virtual) @@ -3739,13 +3752,12 @@ void drcbe_x64::op_sext(Assembler &a, const instruction &inst) const parameter &sizep = inst.param(2); assert(sizep.is_size()); - Gp dstreg = dstp.select_register(rax); + Gp const dstreg = dstp.select_register((inst.size() == 4) ? eax : rax); + Gp const srcreg = srcp.select_register(dstreg); if (inst.size() == 4) { // 32-bit form - dstreg = dstreg.r32(); - if (srcp.is_memory()) { if (sizep.size() == SIZE_BYTE) @@ -3757,7 +3769,6 @@ void drcbe_x64::op_sext(Assembler &a, const instruction &inst) } else { - Gp const srcreg = srcp.select_register(dstreg); mov_reg_param(a, srcreg, srcp); if (sizep.size() == SIZE_BYTE) a.movsx(dstreg, srcreg.r8()); @@ -3766,8 +3777,6 @@ void drcbe_x64::op_sext(Assembler &a, const instruction &inst) else if (sizep.size() == SIZE_DWORD) a.mov(dstreg, srcreg); } - - mov_param_reg(a, dstp, dstreg); } else if (inst.size() == 8) { @@ -3785,7 +3794,6 @@ void drcbe_x64::op_sext(Assembler &a, const instruction &inst) } else { - Gp const srcreg = srcp.select_register(dstreg); mov_reg_param(a, srcreg, srcp); if (sizep.size() == SIZE_BYTE) a.movsx(dstreg, srcreg.r8()); @@ -3796,15 +3804,283 @@ void drcbe_x64::op_sext(Assembler &a, const instruction &inst) else if (sizep.size() == SIZE_QWORD) a.mov(dstreg, srcreg); } - - mov_param_reg(a, dstp, dstreg); } + mov_param_reg(a, dstp, dstreg); + if (inst.flags() != 0) a.test(dstreg, dstreg); } +//------------------------------------------------- +// op_bfxu - process a BFXU opcode +//------------------------------------------------- + +void drcbe_x64::op_bfxu(Assembler &a, const instruction &inst) +{ + // validate instruction + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_S | FLAG_Z); + + // normalize parameters + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + be_parameter shiftp(*this, inst.param(2), PTYPE_MRI); + be_parameter widthp(*this, inst.param(3), PTYPE_MRI); + const unsigned bits = inst.size() * 8; + + if (widthp.is_immediate_value(0)) + { + // undefined behaviour - do something + if (inst.flags() || dstp.is_int_register()) + { + Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax); + + a.xor_(dstreg, dstreg); + + mov_param_reg(a, dstp, dstreg); + } + else if (dstp.is_memory()) + { + a.mov(MABS(dstp.memory(), inst.size()), 0); + } + } + else + { + Gp dstreg; + Gp tempreg = (inst.size() == 4) ? ecx : rcx; + + if (widthp.is_immediate()) + { + const unsigned width = widthp.immediate() & (bits - 1); + + dstreg = dstp.select_register((inst.size() == 4) ? eax : rax); + + if (m_bmi && shiftp.is_immediate() && ((width + (shiftp.immediate() & (bits - 1))) <= bits)) + { + const unsigned shift = shiftp.immediate() & (bits - 1); + + if (srcp.is_immediate()) + mov_reg_param(a, dstreg, srcp); + + a.mov(tempreg.r32(), shift | (width << 8)); + if (srcp.is_immediate()) + a.bextr(dstreg, dstreg, tempreg); + else if (srcp.is_int_register()) + a.bextr(dstreg, srcp.select_register((inst.size() == 4) ? eax : rax), tempreg); + else if (srcp.is_memory()) + a.bextr(dstreg, MABS(srcp.memory()), tempreg); + + if (inst.flags() & FLAG_S) + a.test(dstreg, dstreg); + } + else + { + if (!shiftp.is_immediate()) + mov_reg_param(a, tempreg.r32(), shiftp); + mov_reg_param(a, dstreg, srcp); + + if (shiftp.is_immediate()) + { + const unsigned shift = shiftp.immediate() & (bits - 1); + + a.ror(dstreg, shift); + } + else + { + a.ror(dstreg, tempreg.r8()); + } + + if (!inst.flags() && (width == 8)) + { + a.movzx(dstreg.r32(), dstreg.r8()); + } + else if (!inst.flags() && (width == 16)) + { + a.movzx(dstreg.r32(), dstreg.r16()); + } + else if (!inst.flags() && (width == 32)) + { + a.mov(dstreg.r32(), dstreg.r32()); + } + else if (width <= 32) + { + if ((bits == 64) && (inst.flags() & FLAG_S) && (width == 32)) + { + a.mov(dstreg.r32(), dstreg.r32()); + a.test(dstreg, dstreg); + } + else + { + a.and_(dstreg.r32(), util::make_bitmask(width)); + } + } + else + { + a.mov(tempreg, util::make_bitmask(width)); + a.and_(dstreg, tempreg); + } + } + } + else + { + Gp widthreg = widthp.select_register((inst.size() == 4) ? edx : rdx); + dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, widthp); + + if (!shiftp.is_immediate()) + mov_reg_param(a, tempreg, shiftp); + mov_reg_param(a, widthreg, widthp); + mov_reg_param(a, dstreg, srcp); + + if (shiftp.is_immediate()) + a.mov(tempreg.r32(), shiftp.immediate() & (bits - 1)); + a.add(tempreg.r32(), widthreg.r32()); + a.ror(dstreg, tempreg.r8()); + a.mov(tempreg.r32(), widthreg.r32()); + a.neg(tempreg.r32()); + a.and_(tempreg.r32(), bits - 1); + a.shr(dstreg, tempreg.r8()); + } + + mov_param_reg(a, dstp, dstreg); + } +} + + +//------------------------------------------------- +// op_bfxs - process a BFXS opcode +//------------------------------------------------- + +void drcbe_x64::op_bfxs(Assembler &a, const instruction &inst) +{ + // validate instruction + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_S | FLAG_Z); + + // normalize parameters + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + be_parameter shiftp(*this, inst.param(2), PTYPE_MRI); + be_parameter widthp(*this, inst.param(3), PTYPE_MRI); + const unsigned bits = inst.size() * 8; + + if (widthp.is_immediate_value(0)) + { + // undefined behaviour - do something + if (inst.flags() || dstp.is_int_register()) + { + Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax); + + a.xor_(dstreg, dstreg); + + mov_param_reg(a, dstp, dstreg); + } + else if (dstp.is_memory()) + { + a.mov(MABS(dstp.memory(), inst.size()), 0); + } + } + else + { + Gp dstreg; + Gp tempreg = (inst.size() == 4) ? ecx : rcx; + + if (widthp.is_immediate()) + { + const unsigned width = widthp.immediate() & (bits - 1); + const bool use_movsx = (width == 8) || (width == 16) || (width == 32); + + if (!shiftp.is_immediate() && use_movsx) + { + dstreg = dstp.select_register((inst.size() == 4) ? eax : rax); + + mov_reg_param(a, tempreg.r32(), shiftp); + } + else + { + dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, shiftp); + } + mov_reg_param(a, dstreg, srcp); + + if (shiftp.is_immediate()) + { + const unsigned shift = shiftp.immediate() & (bits - 1); + + if (use_movsx) + a.ror(dstreg, shift); + else + a.ror(dstreg, (width + shift) & (bits - 1)); + } + else + { + if (!use_movsx) + { + a.mov(tempreg.r32(), width); + if (shiftp.is_int_register()) + a.add(tempreg.r32(), shiftp.select_register(edx)); + else + a.add(tempreg.r32(), MABS(shiftp.memory())); + } + a.ror(dstreg, tempreg.r8()); + } + + if (!use_movsx) + { + a.sar(dstreg, -int(width) & (bits - 1)); + } + else + { + if (width == 8) + a.movsx(dstreg, dstreg.r8()); + else if (width == 16) + a.movsx(dstreg, dstreg.r16()); + else if (width == 32) + a.movsxd(dstreg, dstreg.r32()); + + if (inst.flags()) + a.test(dstreg, dstreg); + } + } + else if (shiftp.is_immediate_value(0)) + { + dstreg = dstp.select_register((inst.size() == 4) ? eax : rax); + + mov_reg_param(a, tempreg, widthp); + mov_reg_param(a, dstreg, srcp); + + a.ror(dstreg, tempreg.r8()); + a.neg(tempreg); + a.and_(tempreg, bits - 1); + a.sar(dstreg, tempreg.r8()); + } + else + { + Gp widthreg = widthp.select_register((inst.size() == 4) ? edx : rdx); + dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, widthp); + + if (!shiftp.is_immediate()) + mov_reg_param(a, tempreg, shiftp); + mov_reg_param(a, widthreg, widthp); + mov_reg_param(a, dstreg, srcp); + + if (shiftp.is_immediate()) + a.mov(tempreg.r32(), shiftp.immediate() & (bits - 1)); + a.add(tempreg.r32(), widthreg.r32()); + a.ror(dstreg, tempreg.r8()); + a.mov(tempreg.r32(), widthreg.r32()); + a.neg(tempreg.r32()); + a.and_(tempreg.r32(), bits - 1); + a.sar(dstreg, tempreg.r8()); + } + + mov_param_reg(a, dstp, dstreg); + } +} + + //------------------------------------------------- // op_roland - process an ROLAND opcode //------------------------------------------------- @@ -3829,17 +4105,17 @@ void drcbe_x64::op_roland(Assembler &a, const instruction &inst) if (shiftp.is_immediate() && maskp.is_immediate()) { const unsigned shift = shiftp.immediate() & (bits - 1); - const uint64_t sizemask = util::make_bitmask(bits); - const uint64_t mask = maskp.immediate() & sizemask; + const u64 sizemask = util::make_bitmask(bits); + const u64 mask = maskp.immediate() & sizemask; mov_reg_param(a, dstreg, srcp); a.rol(dstreg, shift); if (!inst.flags() && (mask == 0x000000ff)) { - a.movzx(dstreg, dstreg.r8()); + a.movzx(dstreg.r32(), dstreg.r8()); } else if (!inst.flags() && (mask == 0x0000ffff)) { - a.movzx(dstreg, dstreg.r16()); + a.movzx(dstreg.r32(), dstreg.r16()); } else if (!inst.flags() && (mask == 0xffffffff)) { @@ -3849,10 +4125,10 @@ void drcbe_x64::op_roland(Assembler &a, const instruction &inst) { a.and_(dstreg, mask); } - else if (uint32_t(mask) == mask) + else if (u32(mask) == mask) { a.and_(dstreg, mask); // asmjit converts this to a DWORD-size operation - if (inst.flags()) + if (inst.flags() & FLAG_S) a.test(dstreg, dstreg); } else @@ -3904,7 +4180,7 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst) Gp maskreg = (inst.size() == 4) ? edx : rdx; const unsigned bits = inst.size() * 8; - const uint64_t sizemask = util::make_bitmask(bits); + const u64 sizemask = util::make_bitmask(bits); if (shiftp.is_immediate() && (srcp.is_immediate() || maskp.is_immediate())) { @@ -3913,12 +4189,12 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst) { // immediate source - uint64_t src = srcp.immediate() & sizemask; + u64 src = srcp.immediate() & sizemask; src = ((src << shift) | (src >> (bits - shift))) & sizemask; if (maskp.is_immediate()) { - const uint64_t mask = maskp.immediate() & sizemask; + const u64 mask = maskp.immediate() & sizemask; src &= mask; bool flags = false; @@ -3940,7 +4216,7 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst) a.and_(dstreg, ~mask); flags = true; } - else if (uint32_t(~mask) == ~mask) + else if (u32(~mask) == ~mask) { a.and_(dstreg, ~mask); } @@ -3999,7 +4275,7 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst) else { // variables source, immediate mask - const uint64_t mask = maskp.immediate() & sizemask; + const u64 mask = maskp.immediate() & sizemask; mov_reg_param(a, dstreg, dstp); @@ -4030,12 +4306,12 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst) a.and_(srcreg, mask); } } - else if (mask == (util::make_bitmask(shift) & sizemask)) + else if (mask == (util::make_bitmask(shift) & sizemask)) { mov_reg_param(a, srcreg, srcp); a.shr(srcreg, bits - shift); } - else if (mask == (~util::make_bitmask(shift) & sizemask)) + else if (mask == (~util::make_bitmask(shift) & sizemask)) { mov_reg_param(a, srcreg, srcp); a.shl(srcreg, shift); @@ -4056,7 +4332,7 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst) { a.mov(srcreg.r32(), srcreg.r32()); } - else if ((bits == 32) || (util::sext(mask, 32) == mask) || (uint32_t(mask) == mask)) + else if ((bits == 32) || (util::sext(mask, 32) == mask) || (u32(mask) == mask)) { a.and_(srcreg, mask); } @@ -4080,7 +4356,7 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst) { a.movzx(dstreg, dstreg.r8()); } - else if ((bits == 32) || (util::sext(~mask, 32) == ~mask) || (uint32_t(~mask) == ~mask)) + else if ((bits == 32) || (util::sext(~mask, 32) == ~mask) || (u32(~mask) == ~mask)) { a.and_(dstreg, ~mask & sizemask); } @@ -4103,15 +4379,15 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst) // generic case bool maskimm = maskp.is_immediate(); - uint64_t mask = 0; + u64 mask = 0; if (maskimm) { mask = maskp.immediate() & sizemask; if (bits != 32) { maskimm = - ((util::sext(mask, 32) == mask) && (uint32_t(~mask) == ~mask)) || - ((util::sext(~mask, 32) == ~mask) && (uint32_t(mask) == mask)); + ((util::sext(mask, 32) == mask) && (u32(~mask) == ~mask)) || + ((util::sext(~mask, 32) == ~mask) && (u32(mask) == mask)); } } @@ -4531,7 +4807,7 @@ void drcbe_x64::op_mulslw(Assembler &a, const instruction &inst) if (use3op) { // use 3-operand form to multiply by immediate - const int64_t imm = (inst.size() == 4) ? s32(u32(src2p.immediate())) : src2p.immediate(); + const s64 imm = (inst.size() == 4) ? s32(u32(src2p.immediate())) : src2p.immediate(); if (src1p.is_memory()) { a.imul(dstreg, MABS(src1p.memory(), inst.size()), imm); @@ -4976,41 +5252,40 @@ void drcbe_x64::op_lzcnt(Assembler &a, const instruction &inst) // normalize parameters be_parameter dstp(*this, inst.param(0), PTYPE_MR); be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + const unsigned bits = inst.size() * 8; - if (inst.flags()) - { - a.xor_(eax, eax); // reset status flags - a.test(eax, eax); - } + // pick a target register + Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax); - // 32-bit form - if (inst.size() == 4) + if (srcp.is_immediate()) + mov_reg_param(a, dstreg, srcp); + + if (m_lzcnt) { - // pick a target register - Gp dstreg = dstp.select_register(eax); + if (srcp.is_immediate()) + a.lzcnt(dstreg, dstreg); + else if (srcp.is_int_register()) + a.lzcnt(dstreg, srcp.select_register((inst.size() == 4) ? eax : rax)); + else if (srcp.is_memory()) + a.lzcnt(dstreg, MABS(srcp.memory())); - mov_reg_param(a, dstreg, srcp); - a.mov(ecx, 32 ^ 31); - a.bsr(dstreg, dstreg); - a.cmovz(dstreg, ecx); - a.xor_(dstreg, 31); mov_param_reg(a, dstp, dstreg); - if (inst.flags()) + if (inst.flags() & FLAG_S) a.test(dstreg, dstreg); } - - // 64-bit form - else if (inst.size() == 8) + else { - // pick a target register - Gp dstreg = dstp.select_register(rax); + a.mov(ecx, bits ^ (bits - 1)); + if (srcp.is_immediate()) + a.bsr(dstreg, dstreg); + else if (srcp.is_int_register()) + a.bsr(dstreg, srcp.select_register((inst.size() == 4) ? eax : rax)); + else if (srcp.is_memory()) + a.bsr(dstreg, MABS(srcp.memory())); + a.cmovz(dstreg, ecx); + a.xor_(dstreg.r32(), bits - 1); - mov_reg_param(a, dstreg, srcp); - a.mov(ecx, 64 ^ 63); - a.bsr(dstreg, dstreg); - a.cmovz(dstreg, rcx); - a.xor_(dstreg, 63); mov_param_reg(a, dstp, dstreg); if (inst.flags()) @@ -5156,7 +5431,7 @@ void drcbe_x64::op_fload(Assembler &a, const instruction &inst) Vec const dstreg = dstp.select_register(REG_FSCRATCH1); // determine the pointer base - int32_t baseoffs; + s32 baseoffs; Gp const basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs); if (indp.is_immediate()) @@ -5200,7 +5475,7 @@ void drcbe_x64::op_fstore(Assembler &a, const instruction &inst) Vec const srcreg = srcp.select_register(REG_FSCRATCH1); // determine the pointer base - int32_t baseoffs; + s32 baseoffs; Gp const basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs); // 32-bit form @@ -6191,7 +6466,7 @@ std::unique_ptr make_drcbe_x64( drcuml_state &drcuml, device_t &device, drc_cache &cache, - uint32_t flags, + u32 flags, int modes, int addrbits, int ignorebits) diff --git a/src/devices/cpu/drcbex86.cpp b/src/devices/cpu/drcbex86.cpp index 88b5382746768..0350baa231c2a 100644 --- a/src/devices/cpu/drcbex86.cpp +++ b/src/devices/cpu/drcbex86.cpp @@ -546,6 +546,8 @@ class drcbe_x86 : public drcbe_interface void op_set(Assembler &a, const uml::instruction &inst); void op_mov(Assembler &a, const uml::instruction &inst); void op_sext(Assembler &a, const uml::instruction &inst); + void op_bfxu(Assembler &a, const uml::instruction &inst); + void op_bfxs(Assembler &a, const uml::instruction &inst); void op_roland(Assembler &a, const uml::instruction &inst); void op_rolins(Assembler &a, const uml::instruction &inst); void op_add(Assembler &a, const uml::instruction &inst); @@ -617,13 +619,13 @@ class drcbe_x86 : public drcbe_interface void emit_or_m64_p64(Assembler &a, Mem const &memref_lo, Mem const &memref_hi, be_parameter const ¶m, const uml::instruction &inst); void emit_xor_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const uml::instruction &inst); void emit_xor_m64_p64(Assembler &a, Mem const &memref_lo, Mem const &memref_hi, be_parameter const ¶m, const uml::instruction &inst); - void emit_shl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const uml::instruction &inst); - void emit_shr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const uml::instruction &inst); - void emit_sar_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const uml::instruction &inst); - void emit_rol_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const uml::instruction &inst); - void emit_ror_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const uml::instruction &inst); - void emit_rcl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const uml::instruction &inst); - void emit_rcr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const uml::instruction &inst); + void emit_shl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags); + void emit_shr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags); + void emit_sar_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags); + void emit_rol_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags); + void emit_ror_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags); + void emit_rcl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags); + void emit_rcr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags); void alu_op_param(Assembler &a, Inst::Id const opcode_lo, Inst::Id const opcode_hi, Gp const &lo, Gp const &hi, be_parameter const ¶m, bool const saveflags); void alu_op_param(Assembler &a, Inst::Id const opcode_lo, Inst::Id const opcode_hi, Mem const &lo, Mem const &hi, be_parameter const ¶m, bool const saveflags); @@ -724,6 +726,8 @@ inline void drcbe_x86::generate_one(Assembler &a, const uml::instruction &inst) case uml::OP_SET: op_set(a, inst); break; // SET dst,c case uml::OP_MOV: op_mov(a, inst); break; // MOV dst,src[,c] case uml::OP_SEXT: op_sext(a, inst); break; // SEXT dst,src + case uml::OP_BFXU: op_bfxu(a, inst); break; // BFXU dst,src1,src2,src3 + case uml::OP_BFXS: op_bfxs(a, inst); break; // BFXS dst,src1,src2,src3 case uml::OP_ROLAND: op_roland(a, inst); break; // ROLAND dst,src1,src2,src3 case uml::OP_ROLINS: op_rolins(a, inst); break; // ROLINS dst,src1,src2,src3 case uml::OP_ADD: op_add(a, inst); break; // ADD dst,src1,src2[,f] @@ -1013,8 +1017,8 @@ inline bool drcbe_x86::can_skip_upper_load(Assembler &a, uint32_t *memref, Gp co // drcbe_x86 - constructor //------------------------------------------------- -drcbe_x86::drcbe_x86(drcuml_state &drcuml, device_t &device, drc_cache &cache, uint32_t flags, int modes, int addrbits, int ignorebits) : - drcbe_interface(drcuml, cache, device) +drcbe_x86::drcbe_x86(drcuml_state &drcuml, device_t &device, drc_cache &cache, uint32_t flags, int modes, int addrbits, int ignorebits) + : drcbe_interface(drcuml, cache, device) , m_hash(cache, modes, addrbits, ignorebits) , m_map(cache, 0) , m_log_asmjit(nullptr) @@ -2022,19 +2026,19 @@ void drcbe_x86::emit_xor_m64_p64(Assembler &a, Mem const &memref_lo, Mem const & // pair of registers from a 64-bit parameter //------------------------------------------------- -void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) +void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags) { - int saveflags = inst.flags() != 0; + bool const saveflags = flags != 0; if (param.is_immediate()) { int count = param.immediate() & 63; - if (!inst.flags() && count == 0) + if (!flags && count == 0) ;// skip else { while (count >= 32) { - if (inst.flags() != 0) + if (flags != 0) { a.shld(reghi, reglo, 31); // shld reghi,reglo,31 a.shl(reglo, 31); // shl reglo,31 @@ -2047,7 +2051,7 @@ void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, count -= 32; } } - if (inst.flags() != 0 || count > 0) + if (flags != 0 || count > 0) { a.shld(reghi, reglo, count); // shld reghi,reglo,count if (saveflags && count != 0) a.pushfd(); // pushf @@ -2085,7 +2089,7 @@ void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, a.test(ecx, 0x20); // test ecx,0x20 a.short_().jz(skip1); // jz skip1 - if (inst.flags() != 0) + if (flags != 0) { a.sub(ecx, 31); // sub ecx,31 a.shld(reghi, reglo, 31); // shld reghi,reglo,31 @@ -2136,19 +2140,19 @@ void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, // pair of registers from a 64-bit parameter //------------------------------------------------- -void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) +void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags) { - int saveflags = inst.flags() != 0; + bool const saveflags = flags != 0; if (param.is_immediate()) { int count = param.immediate() & 63; - if (!inst.flags() && count == 0) + if (!flags && count == 0) ;// skip else { while (count >= 32) { - if (inst.flags() != 0) + if (flags != 0) { a.shrd(reglo, reghi, 31); // shrd reglo,reghi,31 a.shr(reghi, 31); // shr reghi,31 @@ -2161,7 +2165,7 @@ void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, count -= 32; } } - if (inst.flags() != 0 || count > 0) + if (flags != 0 || count > 0) { a.shrd(reglo, reghi, count); // shrd reglo,reghi,count if (saveflags && count != 0) a.pushfd(); // pushf @@ -2207,7 +2211,7 @@ void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, a.test(ecx, 0x20); // test ecx,0x20 a.short_().jz(skip1); // jz skip1 - if (inst.flags() != 0) + if (flags != 0) { a.sub(ecx, 31); // sub ecx,31 a.shrd(reglo, reghi, 31); // shrd reglo,reghi,31 @@ -2266,19 +2270,19 @@ void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, // pair of registers from a 64-bit parameter //------------------------------------------------- -void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) +void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags) { - int saveflags = inst.flags() != 0; + bool const saveflags = flags != 0; if (param.is_immediate()) { int count = param.immediate() & 63; - if (!inst.flags() && count == 0) + if (!flags && count == 0) ;// skip else { while (count >= 32) { - if (inst.flags() != 0) + if (flags != 0) { a.shrd(reglo, reghi, 31); // shrd reglo,reghi,31 a.sar(reghi, 31); // sar reghi,31 @@ -2291,7 +2295,7 @@ void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, count -= 32; } } - if (inst.flags() != 0 || count > 0) + if (flags != 0 || count > 0) { a.shrd(reglo, reghi, count); // shrd reglo,reghi,count if (saveflags && count != 0) a.pushfd(); // pushf @@ -2337,7 +2341,7 @@ void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, a.test(ecx, 0x20); // test ecx,0x20 a.short_().jz(skip1); // jz skip1 - if (inst.flags() != 0) + if (flags != 0) { a.sub(ecx, 31); // sub ecx,31 a.shrd(reglo, reghi, 31); // shrd reglo,reghi,31 @@ -2396,9 +2400,9 @@ void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, // pair of registers from a 64-bit parameter //------------------------------------------------- -void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) +void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags) { - int saveflags = inst.flags() != 0; + bool const saveflags = flags != 0; Gp tempreg = esi; if ((reglo == tempreg) || (reghi == tempreg)) @@ -2412,7 +2416,7 @@ void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, if (param.is_immediate()) { int count = param.immediate() & 63; - if (!inst.flags() && count == 0) + if (!flags && count == 0) { // skip } @@ -2420,7 +2424,7 @@ void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, { while (count >= 32) { - if (inst.flags() != 0) + if (flags != 0) { a.mov(ecx, reglo); a.shld(reglo, reghi, 31); @@ -2470,7 +2474,7 @@ void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, a.cmp(ecx, 32); a.short_().jl(skip1); - if (inst.flags()) + if (flags) { Label const shift_loop = a.new_label(); @@ -2523,9 +2527,9 @@ void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, // pair of registers from a 64-bit parameter //------------------------------------------------- -void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) +void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags) { - int saveflags = inst.flags() != 0; + bool const saveflags = flags != 0; Gp tempreg = esi; if ((reglo == tempreg) || (reghi == tempreg)) @@ -2539,7 +2543,7 @@ void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, if (param.is_immediate()) { int count = param.immediate() & 63; - if (!inst.flags() && count == 0) + if (!flags && count == 0) { // skip } @@ -2547,7 +2551,7 @@ void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, { while (count >= 32) { - if (inst.flags() != 0) + if (flags != 0) { a.mov(tempreg, reglo); a.shrd(reglo, reghi, 31); @@ -2599,7 +2603,7 @@ void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, a.short_().jl(skip1); a.bind(shift_loop); - if (inst.flags() != 0) + if (flags != 0) { a.sub(ecx, 31); a.mov(tempreg, reglo); @@ -2649,7 +2653,7 @@ void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, // pair of registers from a 64-bit parameter //------------------------------------------------- -void drcbe_x86::emit_rcl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) +void drcbe_x86::emit_rcl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags) { Label loop = a.new_label(); Label skipall = a.new_label(); @@ -2677,9 +2681,9 @@ void drcbe_x86::emit_rcl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, a.rcl(reghi, 1); a.bind(skipall); - if (inst.flags()) + if (flags) { - if (inst.flags() & FLAG_C) + if (flags & FLAG_C) calculate_status_flags(a, reglo, FLAG_Z); else a.test(reglo, reglo); @@ -2697,7 +2701,7 @@ void drcbe_x86::emit_rcl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, // pair of registers from a 64-bit parameter //------------------------------------------------- -void drcbe_x86::emit_rcr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) +void drcbe_x86::emit_rcr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, u8 flags) { Label loop = a.new_label(); Label skipall = a.new_label(); @@ -2725,9 +2729,9 @@ void drcbe_x86::emit_rcr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, a.rcr(reglo, 1); a.bind(skipall); - if (inst.flags()) + if (flags) { - if (inst.flags() & FLAG_C) + if (flags & FLAG_C) calculate_status_flags(a, reglo, FLAG_Z); else a.test(reglo, reglo); @@ -4586,6 +4590,400 @@ void drcbe_x86::op_sext(Assembler &a, const instruction &inst) } +//------------------------------------------------- +// op_bfxu - process a BFXU opcode +//------------------------------------------------- + +void drcbe_x86::op_bfxu(Assembler &a, const instruction &inst) +{ + // validate instruction + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_S | FLAG_Z); + + // normalize parameters + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + be_parameter shiftp(*this, inst.param(2), PTYPE_MRI); + be_parameter widthp(*this, inst.param(3), PTYPE_MRI); + + if (inst.size() == 4) + { + // 32-bit form + if (widthp.is_immediate_value(0)) + { + // undefined behaviour - do something + if (inst.flags() || dstp.is_int_register()) + { + Gp const dstreg = dstp.select_register(eax); + + a.xor_(dstreg, dstreg); + + emit_mov_p32_r32(a, dstp, dstreg); + } + else if (dstp.is_memory()) + { + a.mov(MABS(dstp.memory(), 4), 0); + } + } + else + { + Gp dstreg; + + if (widthp.is_immediate()) + { + const unsigned width = widthp.immediate() & 31; + + dstreg = dstp.select_register(eax); + + if (!shiftp.is_immediate()) + emit_mov_r32_p32(a, ecx, shiftp); + emit_mov_r32_p32(a, dstreg, srcp); + + if (shiftp.is_immediate()) + { + const unsigned shift = shiftp.immediate() & 31; + + a.ror(dstreg, shift); + } + else + { + a.ror(dstreg, cl); + } + a.and_(dstreg, util::make_bitmask(width)); + } + else + { + Gp const widthreg = widthp.select_register(edx); + dstreg = dstp.select_register(eax, widthp); + + if (!shiftp.is_immediate()) + emit_mov_r32_p32(a, ecx, shiftp); + emit_mov_r32_p32(a, widthreg, widthp); + emit_mov_r32_p32(a, dstreg, srcp); + + if (shiftp.is_immediate()) + a.mov(ecx, shiftp.immediate() & 31); + a.add(ecx, widthreg); + a.ror(dstreg, cl); + a.mov(ecx, widthreg); + a.neg(ecx); + a.and_(ecx, 31); + a.shr(dstreg, cl); + } + + emit_mov_p32_r32(a, dstp, dstreg); + } + } + else if (inst.size() == 8) + { + // 64-bit form + if (widthp.is_immediate()) + { + be_parameter maskp(*this, uml::parameter(util::make_bitmask(widthp.immediate() & 63)), PTYPE_I); + + Gp const dstreg = dstp.select_register(eax, shiftp); + + emit_mov_r64_p64(a, dstreg, edx, srcp); + emit_ror_r64_p64(a, dstreg, edx, shiftp, FLAGS_NONE); + emit_and_r64_p64(a, dstreg, edx, maskp, inst); + emit_mov_p64_r64(a, dstp, dstreg, edx); + } + else + { + Gp const dstreg = dstp.select_register(eax, shiftp); + + // first make the mask + Label large = a.new_label(); + Label shift = a.new_label(); + + emit_mov_r32_p32(a, ecx, widthp); + + a.mov(eax, ~u32(0)); + a.test(ecx, 0x20); + a.short_().jnz(large); + a.mov(dword_ptr(esp, 4), 0); + a.and_(ecx, 31); + a.shl(eax, cl); + a.not_(eax); + a.mov(dword_ptr(esp), eax); + a.short_().jmp(shift); + + a.bind(large); + + a.mov(dword_ptr(esp), eax); + a.and_(ecx, 31); + a.shl(eax, cl); + a.not_(eax); + a.mov(dword_ptr(esp, 4), eax); + + a.bind(shift); + + // shift the field into position + emit_mov_r64_p64(a, dstreg, edx, srcp); + emit_ror_r64_p64(a, dstreg, edx, shiftp, FLAGS_NONE); + + // apply the mask + a.and_(dstreg, dword_ptr(esp)); + if (inst.flags() & FLAG_Z) + { + a.pushfd(); + a.and_(edx, dword_ptr(esp, 8)); + emit_combine_z_flags(a); + } + else + { + a.and_(edx, dword_ptr(esp, 4)); + } + + emit_mov_p64_r64(a, dstp, dstreg, edx); + } + } +} + + +//------------------------------------------------- +// op_bfxs - process a BFXS opcode +//------------------------------------------------- + +void drcbe_x86::op_bfxs(Assembler &a, const instruction &inst) +{ + // validate instruction + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_S | FLAG_Z); + + // normalize parameters + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + be_parameter shiftp(*this, inst.param(2), PTYPE_MRI); + be_parameter widthp(*this, inst.param(3), PTYPE_MRI); + + if (inst.size() == 4) + { + // 32-bit form + if (widthp.is_immediate_value(0)) + { + // undefined behaviour - do something + if (inst.flags() || dstp.is_int_register()) + { + Gp const dstreg = dstp.select_register(eax); + + a.xor_(dstreg, dstreg); + + emit_mov_p32_r32(a, dstp, dstreg); + } + else if (dstp.is_memory()) + { + a.mov(MABS(dstp.memory(), 4), 0); + } + } + else + { + Gp dstreg; + + if (widthp.is_immediate()) + { + dstreg = dstp.select_register(eax, shiftp); + + const unsigned width = widthp.immediate() & 31; + + emit_mov_r32_p32(a, dstreg, srcp); + + if (shiftp.is_immediate()) + { + const unsigned shift = shiftp.immediate() & 31; + + a.ror(dstreg, (width + shift) & 31); + } + else + { + a.mov(ecx, width); + if (shiftp.is_int_register()) + a.add(ecx, shiftp.select_register(edx)); + else + a.add(ecx, MABS(shiftp.memory())); + a.ror(dstreg, cl); + } + a.sar(dstreg, -int(width) & 31); + } + else if (shiftp.is_immediate_value(0)) + { + dstreg = dstp.select_register(eax); + + emit_mov_r32_p32(a, ecx, widthp); + emit_mov_r32_p32(a, dstreg, srcp); + + a.ror(dstreg, cl); + a.neg(ecx); + a.and_(ecx, 31); + a.sar(dstreg, cl); + } + else + { + Gp const widthreg = widthp.select_register(edx); + dstreg = dstp.select_register(eax, widthp); + + if (!shiftp.is_immediate()) + emit_mov_r32_p32(a, ecx, shiftp); + emit_mov_r32_p32(a, widthreg, widthp); + emit_mov_r32_p32(a, dstreg, srcp); + + if (shiftp.is_immediate()) + a.mov(ecx, shiftp.immediate() & 31); + a.add(ecx, widthreg); + a.ror(dstreg, cl); + a.mov(ecx, widthreg); + a.neg(ecx); + a.and_(ecx, 31); + a.sar(dstreg, cl); + } + + emit_mov_p32_r32(a, dstp, dstreg); + } + } + else if (inst.size() == 8) + { + // 64-bit form + Gp dstreg; + + if (widthp.is_immediate()) + { + be_parameter rshiftp(*this, uml::parameter(-int64_t(widthp.immediate()) & 63), PTYPE_I); + + dstreg = dstp.select_register(eax); + + if (shiftp.is_immediate()) + { + be_parameter rotp(*this, uml::parameter((shiftp.immediate() + widthp.immediate()) & 63), PTYPE_I); + + emit_mov_r64_p64(a, dstreg, edx, srcp); + emit_ror_r64_p64(a, dstreg, edx, rotp, FLAGS_NONE); + } + else + { + Gp tempreg = esi; + if (dstreg == tempreg) + tempreg = edi; + if (dstreg == tempreg) + tempreg = ebp; + assert(dstreg != tempreg); + a.mov(dword_ptr(esp), tempreg); + + emit_mov_r32_p32(a, ecx, shiftp); + emit_mov_r64_p64(a, dstreg, edx, srcp); + + // if the count is at least 32, swap the halves + Label small = a.new_label(); + + a.add(ecx, widthp.immediate() & 63); + a.test(ecx, 0x20); + a.short_().jz(small); + a.xchg(dstreg, edx); + a.bind(small); + a.and_(ecx, 31); + + // do the extended rotate + reset_last_upper_lower_reg(); + a.mov(tempreg, edx); + a.shrd(edx, dstreg, cl); + a.shrd(dstreg, tempreg, cl); + + a.mov(tempreg, dword_ptr(esp)); + } + + emit_sar_r64_p64(a, dstreg, edx, rshiftp, inst.flags()); + } + else + { + dstreg = dstp.select_register(eax, widthp); + + Gp tempreg = esi; + if (dstreg == tempreg) + tempreg = edi; + if (dstreg == tempreg) + tempreg = ebp; + assert(dstreg != tempreg); + a.mov(dword_ptr(esp), tempreg); + + // calculate the required rotation + emit_mov_r32_p32(a, ecx, widthp); + if (shiftp.is_immediate_value(0)) + ; + else if (shiftp.is_immediate()) + a.add(ecx, shiftp.immediate()); + else if (shiftp.is_int_register()) + a.add(ecx, shiftp.select_register(edx)); + else if (shiftp.is_memory()) + a.add(ecx, MABS(shiftp.memory())); + + emit_mov_r64_p64(a, dstreg, edx, srcp); + + // if the count is at least 32, swap the halves + Label small_ror = a.new_label(); + + a.test(ecx, 0x20); + a.short_().jz(small_ror); + a.xchg(dstreg, edx); + a.bind(small_ror); + a.and_(ecx, 31); + + // do the extended rotate + reset_last_upper_lower_reg(); + a.mov(tempreg, edx); + a.shrd(edx, dstreg, cl); + a.shrd(dstreg, tempreg, cl); + + a.mov(tempreg, dword_ptr(esp)); + + // now do the shift + Label small_sar = a.new_label(); + Label no_shift, done; + if (inst.flags()) + { + no_shift = a.new_label(); + done = a.new_label(); + } + + emit_mov_r32_p32(a, ecx, widthp); + a.neg(ecx); + a.test(ecx, 0x20); + a.short_().jz(small_sar); + a.mov(dstreg, edx); + a.sar(edx, 31); + a.bind(small_sar); + a.and_(ecx, 31); + if (inst.flags()) + a.short_().jz(no_shift); + + a.shrd(dstreg, edx, cl); + if (inst.flags() & FLAG_Z) + a.pushfd(); + a.sar(edx, cl); + + // zero-bit shifts don't update the flags + if (inst.flags()) + { + a.short_().jmp(done); + + a.bind(no_shift); + if (inst.flags() & FLAG_Z) + { + a.test(eax, eax); + a.pushfd(); + } + a.test(edx, edx); + a.bind(done); + if (inst.flags() & FLAG_Z) + emit_combine_z_flags(a); + } + } + + emit_mov_p64_r64(a, dstp, dstreg, edx); + } +} + + //------------------------------------------------- // op_roland - process an ROLAND opcode //------------------------------------------------- @@ -4636,7 +5034,7 @@ void drcbe_x86::op_roland(Assembler &a, const instruction &inst) { // 64-bit form emit_mov_r64_p64(a, dstreg, edx, srcp); // mov edx:dstreg,srcp - emit_rol_r64_p64(a, dstreg, edx, shiftp, inst); // rol edx:dstreg,shiftp + emit_rol_r64_p64(a, dstreg, edx, shiftp, FLAGS_NONE); // rol edx:dstreg,shiftp emit_and_r64_p64(a, dstreg, edx, maskp, inst); // and edx:dstreg,maskp emit_mov_p64_r64(a, dstp, dstreg, edx); // mov dstp,edx:dstreg } @@ -4696,7 +5094,7 @@ void drcbe_x86::op_rolins(Assembler &a, const instruction &inst) { // 64-bit form emit_mov_r64_p64(a, eax, edx, srcp); // mov edx:eax,srcp - emit_rol_r64_p64(a, eax, edx, shiftp, inst); // rol edx:eax,shiftp + emit_rol_r64_p64(a, eax, edx, shiftp, FLAGS_NONE); // rol edx:eax,shiftp if (maskp.is_immediate()) { a.and_(eax, maskp.immediate()); // and eax,maskp @@ -6213,7 +6611,7 @@ void drcbe_x86::op_shl(Assembler &a, const instruction &inst) { // general case emit_mov_r64_p64(a, dstreg, edx, src1p); - emit_shl_r64_p64(a, dstreg, edx, src2p, inst); + emit_shl_r64_p64(a, dstreg, edx, src2p, inst.flags()); } emit_mov_p64_r64(a, dstp, dstreg, edx); } @@ -6270,7 +6668,7 @@ void drcbe_x86::op_shr(Assembler &a, const instruction &inst) { // general case emit_mov_r64_p64(a, dstreg, edx, src1p); // mov edx:dstreg,[src1p] - emit_shr_r64_p64(a, dstreg, edx, src2p, inst); // shr edx:dstreg,src2p + emit_shr_r64_p64(a, dstreg, edx, src2p, inst.flags()); // shr edx:dstreg,src2p emit_mov_p64_r64(a, dstp, dstreg, edx); // mov dstp,edx:dstreg } } @@ -6326,7 +6724,7 @@ void drcbe_x86::op_sar(Assembler &a, const instruction &inst) { // general case emit_mov_r64_p64(a, dstreg, edx, src1p); // mov edx:dstreg,[src1p] - emit_sar_r64_p64(a, dstreg, edx, src2p, inst); // sar edx:dstreg,src2p + emit_sar_r64_p64(a, dstreg, edx, src2p, inst.flags()); // sar edx:dstreg,src2p emit_mov_p64_r64(a, dstp, dstreg, edx); // mov dstp,edx:dstreg } } @@ -6382,7 +6780,7 @@ void drcbe_x86::op_rol(Assembler &a, const instruction &inst) { // general case emit_mov_r64_p64(a, dstreg, edx, src1p); // mov edx:dstreg,[src1p] - emit_rol_r64_p64(a, dstreg, edx, src2p, inst); // rol edx:dstreg,src2p + emit_rol_r64_p64(a, dstreg, edx, src2p, inst.flags()); // rol edx:dstreg,src2p emit_mov_p64_r64(a, dstp, dstreg, edx); // mov dstp,edx:dstreg } } @@ -6438,7 +6836,7 @@ void drcbe_x86::op_ror(Assembler &a, const instruction &inst) { // general case emit_mov_r64_p64(a, dstreg, edx, src1p); // mov edx:dstreg,[src1p] - emit_ror_r64_p64(a, dstreg, edx, src2p, inst); // ror edx:dstreg,src2p + emit_ror_r64_p64(a, dstreg, edx, src2p, inst.flags()); // ror edx:dstreg,src2p emit_mov_p64_r64(a, dstp, dstreg, edx); // mov dstp,edx:dstreg } } @@ -6494,7 +6892,7 @@ void drcbe_x86::op_rolc(Assembler &a, const instruction &inst) { // general case emit_mov_r64_p64_keepflags(a, dstreg, edx, src1p); // mov edx:dstreg,[src1p] - emit_rcl_r64_p64(a, dstreg, edx, src2p, inst); // rcl edx:dstreg,src2p + emit_rcl_r64_p64(a, dstreg, edx, src2p, inst.flags()); // rcl edx:dstreg,src2p emit_mov_p64_r64(a, dstp, dstreg, edx); // mov dstp,edx:dstreg } } @@ -6550,7 +6948,7 @@ void drcbe_x86::op_rorc(Assembler &a, const instruction &inst) { // general case emit_mov_r64_p64_keepflags(a, dstreg, edx, src1p); // mov edx:dstreg,[src1p] - emit_rcr_r64_p64(a, dstreg, edx, src2p, inst); // rcr edx:dstreg,src2p + emit_rcr_r64_p64(a, dstreg, edx, src2p, inst.flags()); // rcr edx:dstreg,src2p emit_mov_p64_r64(a, dstp, dstreg, edx); // mov dstp,edx:dstreg } } diff --git a/src/devices/cpu/drcumlsh.h b/src/devices/cpu/drcumlsh.h index 2394354c81c6e..28af595fa4f24 100644 --- a/src/devices/cpu/drcumlsh.h +++ b/src/devices/cpu/drcumlsh.h @@ -70,6 +70,8 @@ #define UML_MOV(block, dst, src) do { using namespace uml; block.append().mov(dst, src); } while (0) #define UML_MOVc(block, cond, dst, src) do { using namespace uml; block.append().mov(cond, dst, src); } while (0) #define UML_SEXT(block, dst, src, size) do { using namespace uml; block.append().sext(dst, src, size); } while (0) +#define UML_BFXU(block, dst, src, shift, width) do { using namespace uml; block.append().bfxu(dst, src, shift, width); } while (0) +#define UML_BFXS(block, dst, src, shift, width) do { using namespace uml; block.append().bfxs(dst, src, shift, width); } while (0) #define UML_ROLAND(block, dst, src, shift, mask) do { using namespace uml; block.append().roland(dst, src, shift, mask); } while (0) #define UML_ROLINS(block, dst, src, shift, mask) do { using namespace uml; block.append().rolins(dst, src, shift, mask); } while (0) #define UML_ADD(block, dst, src1, src2) do { using namespace uml; block.append().add(dst, src1, src2); } while (0) @@ -112,6 +114,8 @@ #define UML_DMOV(block, dst, src) do { using namespace uml; block.append().dmov(dst, src); } while (0) #define UML_DMOVc(block, cond, dst, src) do { using namespace uml; block.append().dmov(cond, dst, src); } while (0) #define UML_DSEXT(block, dst, src, size) do { using namespace uml; block.append().dsext(dst, src, size); } while (0) +#define UML_DBFXU(block, dst, src, shift, width) do { using namespace uml; block.append().dbfxu(dst, src, shift, width); } while (0) +#define UML_DBFXS(block, dst, src, shift, width) do { using namespace uml; block.append().dbfxs(dst, src, shift, width); } while (0) #define UML_DROLAND(block, dst, src, shift, mask) do { using namespace uml; block.append().droland(dst, src, shift, mask); } while (0) #define UML_DROLINS(block, dst, src, shift, mask) do { using namespace uml; block.append().drolins(dst, src, shift, mask); } while (0) #define UML_DADD(block, dst, src1, src2) do { using namespace uml; block.append().dadd(dst, src1, src2); } while (0) diff --git a/src/devices/cpu/e132xs/e132xsdrc_ops.hxx b/src/devices/cpu/e132xs/e132xsdrc_ops.hxx index d4732b5dc5303..bc8675fa7c955 100644 --- a/src/devices/cpu/e132xs/e132xsdrc_ops.hxx +++ b/src/devices/cpu/e132xs/e132xsdrc_ops.hxx @@ -651,8 +651,8 @@ void hyperstone_device::generate_trap_exception_or_int(drcuml_block &block, uml: UML_MOV(block, I4, DRC_SR); // I4 = old SR UML_MOV(block, I1, I4); // I1 = SR to be updated - UML_ROLAND(block, I3, I4, 32 - FP_SHIFT, 0x7f); // I3 = old FP - UML_ROLAND(block, I2, I4, 32 - FL_SHIFT, 0xf); // I2 = old FL + UML_BFXU(block, I3, I4, FP_SHIFT, 7); // I3 = old FP + UML_BFXU(block, I2, I4, FL_SHIFT, 4); // I2 = old FL UML_MOVc(block, uml::COND_Z, I2, 16); // convert FL == 0 to 16 UML_ADD(block, I3, I3, I2); // I3 = updated FP @@ -691,7 +691,7 @@ inline void hyperstone_device::generate_logic_op(drcuml_block &block, compiler_s UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); uml::parameter src = uml::I1; if (!SrcGlobal) @@ -724,7 +724,7 @@ inline void hyperstone_device::generate_logic_op_imm(drcuml_block &block, compil UML_MOV(block, I2, DRC_SR); if (!DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); if (!DstGlobal || (dst_code <= SR_REGISTER)) { @@ -758,7 +758,7 @@ void hyperstone_device::generate_software(drcuml_block &block, compiler_state &c const uint32_t dst_code = (op & 0xf0) >> 4; UML_MOV(block, I2, DRC_SR); - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); // I3 = FP + UML_BFXU(block, I3, I2, FP_SHIFT, 7); // I3 = FP UML_ADD(block, I1, I3, src_code); UML_AND(block, I1, I1, 0x3f); @@ -771,7 +771,7 @@ void hyperstone_device::generate_software(drcuml_block &block, compiler_state &c UML_OR(block, I2, I2, 1 << ILC_SHIFT); UML_MOV(block, DRC_SR, I2); - UML_ROLAND(block, I4, I2, 32 - FL_SHIFT, 0xf); + UML_BFXU(block, I4, I2, FL_SHIFT, 4); UML_MOVc(block, uml::COND_Z, I4, 16); UML_ADD(block, I4, I4, I3); // I4 = reg @@ -843,7 +843,7 @@ void hyperstone_device::generate_chk(drcuml_block &block, compiler_state &compil UML_MOV(block, I2, DRC_SR); if (!DstGlobal || !SrcGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I0); @@ -914,7 +914,7 @@ void hyperstone_device::generate_movd(drcuml_block &block, compiler_state &compi } else { - UML_ROLAND(block, I2, I3, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I2, I3, FP_SHIFT, 7); UML_ADD(block, I0, I2, src_code); UML_AND(block, I0, I0, 0x3f); UML_LOAD(block, I0, (void *)m_core->local_regs, I0, SIZE_DWORD, SCALE_x4); @@ -954,11 +954,10 @@ void hyperstone_device::generate_movd(drcuml_block &block, compiler_state &compi const int pop_next = compiler.next_label(); const int done_ret = compiler.next_label(); UML_MOV(block, I0, mem(&SP)); // I0 = SP - UML_ROLAND(block, I1, I0, 30, 0x7f); // I3 = FP - SP(8..2) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I1, I0, 2, 7); // I3 = FP - SP(8..2) + UML_BFXU(block, I3, I2, FP_SHIFT, 7); UML_SUB(block, I3, I3, I1); - UML_SHL(block, I3, I3, 32 - 7); // sign-extend 7-bit number - UML_SAR(block, I3, I3, 32 - 7); + UML_BFXS(block, I3, I3, 0, 7); // sign-extend 7-bit number UML_JMPc(block, uml::COND_NS, done_ret); // nothing to pull if not negative UML_LABEL(block, pop_next); UML_SUB(block, I0, I0, 4); // pull a word @@ -986,7 +985,7 @@ void hyperstone_device::generate_movd(drcuml_block &block, compiler_state &compi } else { - UML_ROLAND(block, I0, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I0, I2, FP_SHIFT, 7); UML_ADD(block, I0, I0, dst_code); UML_AND(block, I0, I0, 0x3f); UML_STORE(block, (void *)m_core->local_regs, I0, 0, SIZE_DWORD, SCALE_x4); @@ -999,7 +998,7 @@ void hyperstone_device::generate_movd(drcuml_block &block, compiler_state &compi { UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); if (SrcGlobal) { @@ -1062,7 +1061,7 @@ void hyperstone_device::generate_divsu(drcuml_block &block, compiler_state &comp } if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I0, uml::I0); @@ -1152,7 +1151,7 @@ void hyperstone_device::generate_xm(drcuml_block &block, compiler_state &compile UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I1, uml::I1); @@ -1188,7 +1187,7 @@ void hyperstone_device::generate_mask(drcuml_block &block, compiler_state &compi UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I0, uml::I0); @@ -1214,7 +1213,7 @@ void hyperstone_device::generate_sum(drcuml_block &block, compiler_state &compil UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); if (SrcGlobal && (src_code == PC_REGISTER) && ((desc->flags & OPFLAG_IN_DELAY_SLOT) || !compiler.check_delay())) { @@ -1257,7 +1256,7 @@ void hyperstone_device::generate_sums(drcuml_block &block, compiler_state &compi UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I0, uml::I0); @@ -1296,7 +1295,7 @@ void hyperstone_device::generate_cmp(drcuml_block &block, compiler_state &compil UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_src_addsub(block, compiler, SrcGlobal, src_code, uml::I1, uml::I1, uml::I2); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3); @@ -1318,7 +1317,7 @@ void hyperstone_device::generate_mov(drcuml_block &block, compiler_state &compil UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); if (DstGlobal && compiler.user_mode()) { @@ -1438,7 +1437,7 @@ void hyperstone_device::generate_add(drcuml_block &block, compiler_state &compil UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); if ((desc->flags & OPFLAG_IN_DELAY_SLOT) || !compiler.check_delay()) { @@ -1514,7 +1513,7 @@ void hyperstone_device::generate_adds(drcuml_block &block, compiler_state &compi UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_src_addsub(block, compiler, SrcGlobal, src_code, uml::I1, uml::I1, uml::I2); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3); @@ -1553,7 +1552,7 @@ void hyperstone_device::generate_cmpb(drcuml_block &block, compiler_state &compi const uint32_t dst_code = (op & 0xf0) >> 4; if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I1, uml::I1); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3); @@ -1575,7 +1574,7 @@ void hyperstone_device::generate_subc(drcuml_block &block, compiler_state &compi UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_src_addsub(block, compiler, SrcGlobal,src_code, uml::I1, uml::I1, uml::I2); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3); @@ -1608,7 +1607,7 @@ void hyperstone_device::generate_sub(drcuml_block &block, compiler_state &compil UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_src_addsub(block, compiler, SrcGlobal, src_code, uml::I1, uml::I1, uml::I2); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3); @@ -1633,7 +1632,7 @@ void hyperstone_device::generate_subs(drcuml_block &block, compiler_state &compi UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_src_addsub(block, compiler, SrcGlobal, src_code, uml::I1, uml::I1, uml::I2); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3); @@ -1673,7 +1672,7 @@ void hyperstone_device::generate_addc(drcuml_block &block, compiler_state &compi UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_src_addsub(block, compiler, SrcGlobal,src_code, uml::I1, uml::I1, uml::I2); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3); @@ -1706,7 +1705,7 @@ void hyperstone_device::generate_neg(drcuml_block &block, compiler_state &compil UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_src_addsub(block, compiler, SrcGlobal, src_code, uml::I0, uml::I0, uml::I2); @@ -1730,7 +1729,7 @@ void hyperstone_device::generate_negs(drcuml_block &block, compiler_state &compi UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_src_addsub(block, compiler, SrcGlobal, src_code, uml::I0, uml::I0, uml::I2); @@ -1828,7 +1827,7 @@ void hyperstone_device::generate_not(drcuml_block &block, compiler_state &compil UML_MOV(block, I2, DRC_SR); if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I0, uml::I0); @@ -1857,7 +1856,7 @@ void hyperstone_device::generate_cmpi(drcuml_block &block, compiler_state &compi UML_MOV(block, I2, DRC_SR); if (!DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I0); @@ -1944,7 +1943,7 @@ void hyperstone_device::generate_movi(drcuml_block &block, compiler_state &compi UML_AND(block, I2, I2, ~H_MASK); UML_MOV(block, DRC_SR, I2); - UML_ROLAND(block, I2, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I2, I2, FP_SHIFT, 7); UML_ADD(block, I2, I2, dst_code); UML_AND(block, I2, I2, 0x3f); UML_STORE(block, (void *)m_core->local_regs, I2, src, SIZE_DWORD, SCALE_x4); @@ -1969,7 +1968,7 @@ void hyperstone_device::generate_addi(drcuml_block &block, compiler_state &compi UML_MOV(block, I2, DRC_SR); if (!DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); if (DstGlobal && (dst_code == PC_REGISTER) && ((desc->flags & OPFLAG_IN_DELAY_SLOT) || !compiler.check_delay())) { @@ -2041,7 +2040,7 @@ void hyperstone_device::generate_cmpbi(drcuml_block &block, compiler_state &comp UML_MOV(block, I2, DRC_SR); if (!DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); if (!DstGlobal || !n) generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3); @@ -2167,7 +2166,7 @@ void hyperstone_device::generate_shrdi(drcuml_block &block, compiler_state &comp const uint32_t n = HiN ? DRC_HI_N_VALUE : DRC_LO_N_VALUE; UML_MOV(block, I2, DRC_SR); - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, LOCAL, dst_code, uml::I1, uml::I4); generate_load_operand(block, compiler, LOCAL, dst_code + 1, uml::I0, uml::I3); @@ -2206,7 +2205,7 @@ void hyperstone_device::generate_shrd(drcuml_block &block, compiler_state &compi } UML_MOV(block, I2, DRC_SR); - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, LOCAL, dst_code, uml::I1, uml::I4); generate_load_operand(block, compiler, LOCAL, dst_code + 1, uml::I0, uml::I5); @@ -2244,7 +2243,7 @@ void hyperstone_device::generate_shr(drcuml_block &block, compiler_state &compil const uint32_t src_code = op & 0xf; UML_MOV(block, I2, DRC_SR); - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, LOCAL, src_code, uml::I1, uml::I1); generate_load_operand(block, compiler, LOCAL, dst_code, uml::I0, uml::I3); @@ -2277,7 +2276,7 @@ void hyperstone_device::generate_shri(drcuml_block &block, compiler_state &compi UML_MOV(block, I2, DRC_SR); if (!DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3); @@ -2306,7 +2305,7 @@ void hyperstone_device::generate_sardi(drcuml_block &block, compiler_state &comp const uint32_t n = HiN ? DRC_HI_N_VALUE : DRC_LO_N_VALUE; UML_MOV(block, I2, DRC_SR); - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, LOCAL, dst_code, uml::I1, uml::I4); generate_load_operand(block, compiler, LOCAL, dst_code + 1, uml::I0, uml::I3); @@ -2345,7 +2344,7 @@ void hyperstone_device::generate_sard(drcuml_block &block, compiler_state &compi } UML_MOV(block, I2, DRC_SR); - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, LOCAL, dst_code, uml::I1, uml::I4); generate_load_operand(block, compiler, LOCAL, dst_code + 1, uml::I0, uml::I5); @@ -2383,7 +2382,7 @@ void hyperstone_device::generate_sar(drcuml_block &block, compiler_state &compil const uint32_t src_code = op & 0xf; UML_MOV(block, I2, DRC_SR); - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, LOCAL, src_code, uml::I1, uml::I1); generate_load_operand(block, compiler, LOCAL, dst_code, uml::I0, uml::I3); @@ -2416,7 +2415,7 @@ void hyperstone_device::generate_sari(drcuml_block &block, compiler_state &compi UML_MOV(block, I2, DRC_SR); if (!DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3); @@ -2445,7 +2444,7 @@ void hyperstone_device::generate_shldi(drcuml_block &block, compiler_state &comp const uint32_t n = HiN ? DRC_HI_N_VALUE : DRC_LO_N_VALUE; UML_MOV(block, I2, DRC_SR); - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, LOCAL, dst_code, uml::I1, uml::I4); generate_load_operand(block, compiler, LOCAL, dst_code + 1, uml::I0, uml::I5); @@ -2499,7 +2498,7 @@ void hyperstone_device::generate_shld(drcuml_block &block, compiler_state &compi } UML_MOV(block, I2, DRC_SR); - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, LOCAL, dst_code, uml::I1, uml::I5); generate_load_operand(block, compiler, LOCAL, dst_code + 1, uml::I0, uml::I6); @@ -2549,10 +2548,8 @@ void hyperstone_device::generate_shl(drcuml_block &block, compiler_state &compil const uint32_t dst_code = (op & 0xf0) >> 4; const uint32_t src_code = op & 0xf; - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); - UML_MOV(block, I2, DRC_SR); - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); if (dst_code != src_code) { @@ -2602,7 +2599,7 @@ void hyperstone_device::generate_shli(drcuml_block &block, compiler_state &compi UML_MOV(block, I2, DRC_SR); if (!DstGlobal) - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3); @@ -2644,7 +2641,7 @@ void hyperstone_device::generate_testlz(drcuml_block &block, compiler_state &com const uint32_t dst_code = (op & 0xf0) >> 4; const uint32_t src_code = op & 0xf; - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); generate_load_operand(block, compiler, LOCAL, src_code, uml::I0, uml::I0); @@ -2663,7 +2660,7 @@ void hyperstone_device::generate_rol(drcuml_block &block, compiler_state &compil const uint32_t src_code = op & 0xf; UML_MOV(block, I2, DRC_SR); - UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, I2, FP_SHIFT, 7); if (dst_code != src_code) { @@ -2713,7 +2710,7 @@ void hyperstone_device::generate_ldxx1(drcuml_block &block, compiler_state &comp const auto [sub_type, extra_s] = generate_get_d_code_dis(desc); if (!DstGlobal || !SrcGlobal) - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); const uml::parameter dstp = generate_load_address_ad(block, compiler, desc, DstGlobal, dst_code, uml::I0, uml::I0); @@ -2862,7 +2859,7 @@ void hyperstone_device::generate_ldxx2(drcuml_block &block, compiler_state &comp } if (!DstGlobal || !SrcGlobal) - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); generate_load_address_ns(block, compiler, desc, DstGlobal, dst_code, uml::I6, uml::I2, sub_type, extra_s); @@ -2968,7 +2965,7 @@ void hyperstone_device::generate_ldxx2(drcuml_block &block, compiler_state &comp UML_CMP(block, I6, mem(&m_core->global_regs[SP_REGISTER])); UML_JMPc(block, uml::COND_B, below_sp); - UML_ROLAND(block, I0, I6, 32 - 2, 0x3f); + UML_BFXU(block, I0, I6, 2, 6); UML_LOAD(block, I1, (void *)m_core->local_regs, I0, SIZE_DWORD, SCALE_x4); UML_JMP(block, done); @@ -3004,7 +3001,7 @@ void hyperstone_device::generate_stxx1(drcuml_block &block, compiler_state &comp const auto [sub_type, extra_s] = generate_get_d_code_dis(desc); if (!DstGlobal || !SrcGlobal) - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); const uml::parameter dstp = generate_load_address_ad(block, compiler, desc, DstGlobal, dst_code, uml::I0, uml::I0); @@ -3138,7 +3135,7 @@ void hyperstone_device::generate_stxx2(drcuml_block &block, compiler_state &comp } if (!DstGlobal || !SrcGlobal) - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); generate_load_address_ns(block, compiler, desc, DstGlobal, dst_code, uml::I0, uml::I6, sub_type, extra_s); @@ -3256,7 +3253,7 @@ void hyperstone_device::generate_stxx2(drcuml_block &block, compiler_state &comp UML_CMP(block, I5, mem(&SP)); UML_JMPc(block, uml::COND_B, less_than_sp); - UML_ROLAND(block, I4, I0, 30, 0x3f); + UML_BFXU(block, I4, I0, 2, 6); UML_STORE(block, (void *)m_core->local_regs, I4, I1, SIZE_DWORD, SCALE_x4); UML_JMP(block, store_done); @@ -3295,7 +3292,7 @@ void hyperstone_device::generate_mulsu(drcuml_block &block, compiler_state &comp } if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I0, uml::I4); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I1, uml::I6); @@ -3363,7 +3360,7 @@ void hyperstone_device::generate_mul(drcuml_block &block, compiler_state &compil } if (!SrcGlobal || !DstGlobal) - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I0, uml::I1); generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I1, uml::I6); @@ -3469,9 +3466,9 @@ void hyperstone_device::generate_set(drcuml_block &block, compiler_state &compil } else { - UML_ROLAND(block, I1, DRC_SR, 32 - FP_SHIFT, 0x7f); - UML_ADD(block, I2, I1, dst_code); - UML_AND(block, I3, I2, 0x3f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); + UML_ADD(block, I3, I3, dst_code); + UML_AND(block, I3, I3, 0x3f); UML_STORE(block, (void *)m_core->local_regs, I3, I0, SIZE_DWORD, SCALE_x4); } } @@ -3486,7 +3483,7 @@ void hyperstone_device::generate_ldwr(drcuml_block &block, compiler_state &compi const uint32_t src_code = op & 0xf; const uint32_t dst_code = (op & 0xf0) >> 4; - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); if (SrcGlobal || (src_code != dst_code)) generate_load_address_rp(block, compiler, desc, dst_code, uml::I0, uml::I2, 0); @@ -3511,7 +3508,7 @@ void hyperstone_device::generate_lddr(drcuml_block &block, compiler_state &compi const uint32_t src_code = op & 0xf; const uint32_t dst_code = (op & 0xf0) >> 4; - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); if (!SrcGlobal && (src_code != dst_code)) generate_load_address_rp(block, compiler, desc, dst_code, uml::I0, uml::I2, 0); @@ -3557,7 +3554,7 @@ void hyperstone_device::generate_ldwp(drcuml_block &block, compiler_state &compi const uint32_t src_code = op & 0xf; const uint32_t dst_code = (op & 0xf0) >> 4; - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); if (SrcGlobal || (src_code != dst_code)) generate_load_address_rp(block, compiler, desc, dst_code, uml::I4, uml::I2, 4); @@ -3589,7 +3586,7 @@ void hyperstone_device::generate_lddp(drcuml_block &block, compiler_state &compi const uint32_t src_code = op & 0xf; const uint32_t dst_code = (op & 0xf0) >> 4; - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); if (SrcGlobal || (src_code != dst_code)) generate_load_address_rp(block, compiler, desc, dst_code, uml::I4, uml::I2, 8); @@ -3641,7 +3638,7 @@ void hyperstone_device::generate_stwr(drcuml_block &block, compiler_state &compi const uint32_t src_code = op & 0xf; const uint32_t dst_code = (op & 0xf0) >> 4; - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); if (SrcGlobal || (src_code != dst_code)) generate_load_address_rp(block, compiler, desc, dst_code, uml::I0, uml::I2, 0); @@ -3675,7 +3672,7 @@ void hyperstone_device::generate_stdr(drcuml_block &block, compiler_state &compi const uint32_t src_code = op & 0xf; const uint32_t dst_code = (op & 0xf0) >> 4; - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); if (SrcGlobal || (src_code != dst_code)) generate_load_address_rp(block, compiler, desc, dst_code, uml::I0, uml::I2, 0); @@ -3726,7 +3723,7 @@ void hyperstone_device::generate_stwp(drcuml_block &block, compiler_state &compi const uint32_t src_code = op & 0xf; const uint32_t dst_code = (op & 0xf0) >> 4; - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); if (SrcGlobal || (src_code != dst_code)) generate_load_address_rp(block, compiler, desc, dst_code, uml::I4, uml::I2, 4); @@ -3771,7 +3768,7 @@ void hyperstone_device::generate_stdp(drcuml_block &block, compiler_state &compi const uint32_t src_code = op & 0xf; const uint32_t dst_code = (op & 0xf0) >> 4; - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); if (SrcGlobal || (src_code != dst_code)) generate_load_address_rp(block, compiler, desc, dst_code, uml::I4, uml::I2, 8); @@ -3955,7 +3952,7 @@ void hyperstone_device::generate_frame(drcuml_block &block, compiler_state &comp const uint32_t dst_code = (op & 0xf0) >> 4; UML_MOV(block, I2, DRC_SR); // I2 = SR - UML_ROLAND(block, I1, I2, 32 - FP_SHIFT, 0x7f); // I1 = FP -= Ls + UML_BFXU(block, I1, I2, FP_SHIFT, 7); // I1 = FP -= Ls UML_SUB(block, I1, I1, op & 0xf); UML_ROLAND(block, I0, I1, FP_SHIFT, FP_MASK); UML_OR(block, I0, I0, dst_code << FL_SHIFT); // FL = Ld @@ -3968,8 +3965,7 @@ void hyperstone_device::generate_frame(drcuml_block &block, compiler_state &comp UML_ROLAND(block, I3, I0, 30, 0x7f); UML_ADD(block, I3, I3, (64 - 10)); UML_SUB(block, I3, I3, I1); - UML_SHL(block, I3, I3, 32 - 7); // sign-extend 7-bit value - UML_SAR(block, I3, I3, 32 - 7); + UML_BFXS(block, I3, I3, 0, 7); // sign-extend 7-bit value UML_JMPc(block, uml::COND_NS, done); UML_CMP(block, I0, mem(&UB)); // check stack pointer against upper bound @@ -4030,7 +4026,7 @@ void hyperstone_device::generate_call(drcuml_block &block, compiler_state &compi if (!dst_code) dst_code = 16; - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); if (SrcGlobal) { @@ -4111,7 +4107,7 @@ void hyperstone_device::generate_extend(drcuml_block &block, compiler_state &com const uint16_t func = m_pr16(desc->pc + 2); - UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f); + UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7); UML_ADD(block, I2, I3, src_code); UML_AND(block, I2, I2, 0x3f); diff --git a/src/devices/cpu/uml.cpp b/src/devices/cpu/uml.cpp index 23775118d93e9..0beb9a7a00e2b 100644 --- a/src/devices/cpu/uml.cpp +++ b/src/devices/cpu/uml.cpp @@ -56,60 +56,60 @@ using namespace uml; //************************************************************************** // opcode validation condition/flag valid bitmasks -constexpr u8 OPFLAGS_NONE = FLAGS_NONE; -constexpr u8 OPFLAGS_C = FLAG_C; -constexpr u8 OPFLAGS_Z = FLAG_Z; -constexpr u8 OPFLAGS_SZ = FLAG_S | FLAG_Z; -constexpr u8 OPFLAGS_SZC = FLAG_S | FLAG_Z | FLAG_C; -constexpr u8 OPFLAGS_SZV = FLAG_S | FLAG_Z | FLAG_V; -constexpr u8 OPFLAGS_SZVC = FLAG_S | FLAG_Z | FLAG_V | FLAG_C; -constexpr u8 OPFLAGS_UZC = FLAG_U | FLAG_Z | FLAG_C; -constexpr u8 OPFLAGS_ALL = FLAGS_ALL; -constexpr u8 OPFLAGS_P1 = 0x81; -constexpr u8 OPFLAGS_P2 = 0x82; -constexpr u8 OPFLAGS_P3 = 0x83; -constexpr u8 OPFLAGS_P4 = 0x84; +constexpr u8 OPFLAGS_NONE = FLAGS_NONE; +constexpr u8 OPFLAGS_C = FLAG_C; +constexpr u8 OPFLAGS_Z = FLAG_Z; +constexpr u8 OPFLAGS_SZ = FLAG_S | FLAG_Z; +constexpr u8 OPFLAGS_SZC = FLAG_S | FLAG_Z | FLAG_C; +constexpr u8 OPFLAGS_SZV = FLAG_S | FLAG_Z | FLAG_V; +constexpr u8 OPFLAGS_SZVC = FLAG_S | FLAG_Z | FLAG_V | FLAG_C; +constexpr u8 OPFLAGS_UZC = FLAG_U | FLAG_Z | FLAG_C; +constexpr u8 OPFLAGS_ALL = FLAGS_ALL; +constexpr u8 OPFLAGS_P1 = 0x81; +constexpr u8 OPFLAGS_P2 = 0x82; +constexpr u8 OPFLAGS_P3 = 0x83; +constexpr u8 OPFLAGS_P4 = 0x84; // parameter input/output states -#define PIO_IN 0x01 -#define PIO_OUT 0x02 -#define PIO_INOUT (PIO_IN | PIO_OUT) +constexpr u8 PIO_IN = 0x01; +constexpr u8 PIO_OUT = 0x02; +constexpr u8 PIO_INOUT = (PIO_IN | PIO_OUT); // parameter sizes -#define PSIZE_4 SIZE_DWORD -#define PSIZE_8 SIZE_QWORD -#define PSIZE_OP 0x80 -#define PSIZE_P1 0x81 -#define PSIZE_P2 0x82 -#define PSIZE_P3 0x83 -#define PSIZE_P4 0x84 +constexpr u8 PSIZE_4 = SIZE_DWORD; +constexpr u8 PSIZE_8 = SIZE_QWORD; +constexpr u8 PSIZE_OP = 0x80; +constexpr u8 PSIZE_P1 = 0x81; +constexpr u8 PSIZE_P2 = 0x82; +constexpr u8 PSIZE_P3 = 0x83; +constexpr u8 PSIZE_P4 = 0x84; // basic parameter types -#define PTYPES_NONE 0 -#define PTYPES_IMM (1 << parameter::PTYPE_IMMEDIATE) -#define PTYPES_IREG (1 << parameter::PTYPE_INT_REGISTER) -#define PTYPES_FREG (1 << parameter::PTYPE_FLOAT_REGISTER) -#define PTYPES_MVAR (1 << parameter::PTYPE_MAPVAR) -#define PTYPES_MEM (1 << parameter::PTYPE_MEMORY) -#define PTYPES_SIZE (1 << parameter::PTYPE_SIZE) -#define PTYPES_SCSIZE (1 << parameter::PTYPE_SIZE_SCALE) -#define PTYPES_SPSIZE (1 << parameter::PTYPE_SIZE_SPACE) -#define PTYPES_HANDLE (1 << parameter::PTYPE_CODE_HANDLE) -#define PTYPES_LABEL (1 << parameter::PTYPE_CODE_LABEL) -#define PTYPES_CFUNC (1 << parameter::PTYPE_C_FUNCTION) -#define PTYPES_ROUND (1 << parameter::PTYPE_ROUNDING) -#define PTYPES_STR (1 << parameter::PTYPE_STRING) +constexpr u16 PTYPES_NONE = 0; +constexpr u16 PTYPES_IMM = (1 << parameter::PTYPE_IMMEDIATE); +constexpr u16 PTYPES_IREG = (1 << parameter::PTYPE_INT_REGISTER); +constexpr u16 PTYPES_FREG = (1 << parameter::PTYPE_FLOAT_REGISTER); +constexpr u16 PTYPES_MVAR = (1 << parameter::PTYPE_MAPVAR); +constexpr u16 PTYPES_MEM = (1 << parameter::PTYPE_MEMORY); +constexpr u16 PTYPES_SIZE = (1 << parameter::PTYPE_SIZE); +constexpr u16 PTYPES_SCSIZE = (1 << parameter::PTYPE_SIZE_SCALE); +constexpr u16 PTYPES_SPSIZE = (1 << parameter::PTYPE_SIZE_SPACE); +constexpr u16 PTYPES_HANDLE = (1 << parameter::PTYPE_CODE_HANDLE); +constexpr u16 PTYPES_LABEL = (1 << parameter::PTYPE_CODE_LABEL); +constexpr u16 PTYPES_CFUNC = (1 << parameter::PTYPE_C_FUNCTION); +constexpr u16 PTYPES_ROUND = (1 << parameter::PTYPE_ROUNDING); +constexpr u16 PTYPES_STR = (1 << parameter::PTYPE_STRING); // special parameter types -#define PTYPES_PTR (PTYPES_MEM | 0x1000) -#define PTYPES_STATE (PTYPES_MEM | 0x2000) +constexpr u16 PTYPES_PTR = (PTYPES_MEM | 0x1000); +constexpr u16 PTYPES_STATE = (PTYPES_MEM | 0x2000); // combinations of types -#define PTYPES_IRM (PTYPES_IREG | PTYPES_MEM) -#define PTYPES_FRM (PTYPES_FREG | PTYPES_MEM) -#define PTYPES_IMV (PTYPES_IMM | PTYPES_MVAR) -#define PTYPES_IANY (PTYPES_IRM | PTYPES_IMV) -#define PTYPES_FANY (PTYPES_FRM) +constexpr u16 PTYPES_IRM = (PTYPES_IREG | PTYPES_MEM); +constexpr u16 PTYPES_FRM = (PTYPES_FREG | PTYPES_MEM); +constexpr u16 PTYPES_IMV = (PTYPES_IMM | PTYPES_MVAR); +constexpr u16 PTYPES_IANY = (PTYPES_IRM | PTYPES_IMV); +constexpr u16 PTYPES_FANY = (PTYPES_FRM); @@ -171,6 +171,8 @@ opcode_info const instruction::s_opcode_info_table[OP_MAX] = OPINFO1(SET, "!set", 4|8, true, NONE, NONE, NONE, PINFO(OUT, OP, IRM)) // Get the state of the specified condition (e.g. calling UML_SET with COND_NZ will return 0 if the condition is not met and 1 if the condition is met) OPINFO2(MOV, "!mov", 4|8, true, NONE, NONE, NONE, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY)) OPINFO3(SEXT, "!sext", 4|8, false, NONE, SZ, ALL, PINFO(OUT, OP, IRM), PINFO(IN, P3, IANY), PINFO(IN, OP, SIZE)) + OPINFO4(BFXU, "!bfxu", 4|8, false, NONE, SZ, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) + OPINFO4(BFXS, "!bfxs", 4|8, false, NONE, SZ, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) OPINFO4(ROLAND, "!roland", 4|8, false, NONE, SZ, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) // Rotate left + AND (see drcbec.cpp for implementation) OPINFO4(ROLINS, "!rolins", 4|8, false, NONE, SZ, ALL, PINFO(INOUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) // Rotate left + OR (see drcbec.cpp for implementation) OPINFO3(ADD, "!add", 4|8, false, NONE, SZVC, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) @@ -449,6 +451,114 @@ struct uml::instruction::simplify_op } } + static void bfxu(instruction &inst) + { + auto const size = inst.size(); + auto const bits = size << 3; + u64 const mask = size_mask(inst); + assert((size == 4) || (size == 8)); + + // truncate immediates to instruction size + truncate_immediate(inst, 1, mask); + truncate_immediate(inst, 2, bits - 1); + truncate_immediate(inst, 3, bits - 1); + + if (inst.param(2).is_immediate() && inst.param(3).is_immediate()) + { + auto const field = util::make_bitmask(inst.param(3).immediate()); + + if (inst.param(1).is_immediate()) + { + // constant result, convert to MOV or a logic operation + auto const rot = inst.param(2).immediate(); + + if (size == 4) + convert_to_mov_immediate(inst, rotr_32(inst.param(1).immediate(), rot) & field); + else + convert_to_mov_immediate(inst, rotr_64(inst.param(1).immediate(), rot) & field); + } + else if (inst.param(2).is_immediate_value(0)) + { + // no shift, convert to AND + inst.m_opcode = OP_AND; + inst.m_param[2] = field; + inst.m_numparams = 3; + } + else if ((inst.param(2).immediate() + inst.param(3).immediate()) == bits) + { + // equivalent to right shift + inst.m_opcode = OP_SHR; + inst.m_numparams = 3; + } + } + else if (inst.param(3).is_immediate_value(0)) + { + // undefined behaviour - just generate zero + convert_to_mov_immediate(inst, 0); + } + } + + static void bfxs(instruction &inst) + { + auto const size = inst.size(); + auto const bits = size << 3; + u64 const mask = size_mask(inst); + assert((size == 4) || (size == 8)); + + // truncate immediates to instruction size + truncate_immediate(inst, 1, mask); + truncate_immediate(inst, 2, bits - 1); + truncate_immediate(inst, 3, bits - 1); + + if (inst.param(2).is_immediate() && inst.param(3).is_immediate()) + { + if (inst.param(1).is_immediate()) + { + // constant result, convert to MOV or a logic operation + auto const rot = inst.param(2).immediate() + inst.param(3).immediate(); + auto const shift = -s64(inst.param(3).immediate()) & (bits - 1); + + if (size == 4) + convert_to_mov_immediate(inst, u32(s32(rotr_32(inst.param(1).immediate(), rot)) >> shift)); + else + convert_to_mov_immediate(inst, u64(s64(rotr_64(inst.param(1).immediate(), rot)) >> shift)); + } + else if (inst.param(2).is_immediate_value(0)) + { + // no shift, convert to SEXT if possible + switch (inst.param(3).immediate()) + { + case 8: + inst.m_opcode = OP_SEXT; + inst.m_param[2] = parameter::make_size(SIZE_BYTE); + inst.m_numparams = 3; + break; + case 16: + inst.m_opcode = OP_SEXT; + inst.m_param[2] = parameter::make_size(SIZE_WORD); + inst.m_numparams = 3; + break; + case 32: + inst.m_opcode = OP_SEXT; + inst.m_param[2] = parameter::make_size(SIZE_DWORD); + inst.m_numparams = 3; + break; + } + } + else if ((inst.param(2).immediate() + inst.param(3).immediate()) == bits) + { + // equivalent to right shift + inst.m_opcode = OP_SAR; + inst.m_numparams = 3; + } + } + else if (inst.param(3).is_immediate_value(0)) + { + // undefined behaviour - just generate zero + convert_to_mov_immediate(inst, 0); + } + } + static void roland(instruction &inst) { auto const size = inst.size(); @@ -474,9 +584,9 @@ struct uml::instruction::simplify_op // only mask is variable, convert to AND inst.m_opcode = OP_AND; if (size == 4) - inst.m_param[1] = parameter(rotl_32(inst.param(1).immediate(), inst.param(2).immediate())); + inst.m_param[1] = rotl_32(inst.param(1).immediate(), inst.param(2).immediate()); else - inst.m_param[1] = parameter(rotl_64(inst.param(1).immediate(), inst.param(2).immediate())); + inst.m_param[1] = rotl_64(inst.param(1).immediate(), inst.param(2).immediate()); inst.m_param[2] = inst.param(3); inst.m_numparams = 3; } @@ -506,6 +616,13 @@ struct uml::instruction::simplify_op inst.m_numparams = 3; inst.m_param[2] = bits - inst.param(2).immediate(); } + else if (inst.param(2).is_immediate() && inst.param(3).is_immediate() && !(inst.param(3).immediate() & (inst.param(3).immediate() + 1))) + { + // extract right-aligned field, convert to BFXU + inst.m_opcode = OP_BFXU; + inst.m_param[2] = bits - inst.param(2).immediate(); + inst.m_param[3] = 64 - count_leading_zeros_64(inst.param(3).immediate()); + } } static void rolins(instruction &inst) @@ -1322,6 +1439,8 @@ void uml::instruction::simplify() case OP_SET: simplify_op::set(*this); break; case OP_MOV: simplify_op::mov(*this); break; case OP_SEXT: simplify_op::sext(*this); break; + case OP_BFXU: simplify_op::bfxu(*this); break; + case OP_BFXS: simplify_op::bfxs(*this); break; case OP_ROLAND: simplify_op::roland(*this); break; case OP_ROLINS: simplify_op::rolins(*this); break; case OP_ADD: simplify_op::add(*this); break; diff --git a/src/devices/cpu/uml.h b/src/devices/cpu/uml.h index 68dbaeea65349..1431fb3b63871 100644 --- a/src/devices/cpu/uml.h +++ b/src/devices/cpu/uml.h @@ -29,8 +29,8 @@ struct drcuml_machine_state; // use a namespace to wrap all the UML instruction concepts so that // we can keep names short -namespace uml -{ +namespace uml { + // integer registers constexpr int REG_I0 = 0x400; constexpr int REG_I_COUNT = 10; @@ -181,6 +181,8 @@ namespace uml OP_SET, // SET dst,c OP_MOV, // MOV dst,src[,c] OP_SEXT, // SEXT dst,src,size + OP_BFXU, // BFXU dst,src,shift,width + OP_BFXS, // BFXS dst,src,shift,width OP_ROLAND, // ROLAND dst,src,shift,mask OP_ROLINS, // ROLINS dst,src,shift,mask OP_ADD, // ADD dst,src1,src2[,f] @@ -489,6 +491,8 @@ namespace uml void mov(parameter dst, parameter src1) { configure(OP_MOV, 4, dst, src1); } void mov(condition_t cond, parameter dst, parameter src1) { configure(OP_MOV, 4, dst, src1, cond); } void sext(parameter dst, parameter src1, operand_size size) { configure(OP_SEXT, 4, dst, src1, parameter::make_size(size)); } + void bfxu(parameter dst, parameter src, parameter shift, parameter width) { configure(OP_BFXU, 4, dst, src, shift, width); } + void bfxs(parameter dst, parameter src, parameter shift, parameter width) { configure(OP_BFXS, 4, dst, src, shift, width); } void roland(parameter dst, parameter src, parameter shift, parameter mask) { configure(OP_ROLAND, 4, dst, src, shift, mask); } void rolins(parameter dst, parameter src, parameter shift, parameter mask) { configure(OP_ROLINS, 4, dst, src, shift, mask); } void add(parameter dst, parameter src1, parameter src2) { configure(OP_ADD, 4, dst, src1, src2); } @@ -530,6 +534,8 @@ namespace uml void dmov(parameter dst, parameter src1) { configure(OP_MOV, 8, dst, src1); } void dmov(condition_t cond, parameter dst, parameter src1) { configure(OP_MOV, 8, dst, src1, cond); } void dsext(parameter dst, parameter src1, operand_size size) { configure(OP_SEXT, 8, dst, src1, parameter::make_size(size)); } + void dbfxu(parameter dst, parameter src, parameter shift, parameter width) { configure(OP_BFXU, 8, dst, src, shift, width); } + void dbfxs(parameter dst, parameter src, parameter shift, parameter width) { configure(OP_BFXS, 8, dst, src, shift, width); } void droland(parameter dst, parameter src, parameter shift, parameter mask) { configure(OP_ROLAND, 8, dst, src, shift, mask); } void drolins(parameter dst, parameter src, parameter shift, parameter mask) { configure(OP_ROLINS, 8, dst, src, shift, mask); } void dadd(parameter dst, parameter src1, parameter src2) { configure(OP_ADD, 8, dst, src1, src2); } @@ -677,6 +683,7 @@ namespace uml const parameter M7(parameter::make_mapvar(MAPVAR_M0 + 7)); const parameter M8(parameter::make_mapvar(MAPVAR_M0 + 8)); const parameter M9(parameter::make_mapvar(MAPVAR_M0 + 9)); -} + +} // namespace uml #endif // MAME_CPU_UML_H