diff --git a/docs/source/techspecs/uml_instructions.rst b/docs/source/techspecs/uml_instructions.rst
index f0137215242c0..449acf55c7c0a 100644
--- a/docs/source/techspecs/uml_instructions.rst
+++ b/docs/source/techspecs/uml_instructions.rst
@@ -3502,6 +3502,94 @@ Simplification rules
 * Immediate values for the ``count`` operand are truncated to five or
   six bits for 32-bit or 64-bit operands, respectively.
 
+.. _umlinst-bfx:
+
+BFX
+~~~
+
+Extract a contiguous bit field from an integer value.
+
++---------------------------------+-----------------------------------------------+
+| Disassembly                     | Usage                                         |
++=================================+===============================================+
+| .. code-block::                 | .. code-block:: C++                           |
+|                                 |                                               |
+|     bfxu    dst,src,shift,width |     UML_BFXU(block, dst, src, shift, width);  |
+|     bfxs    dst,src,shift,width |     UML_BFXS(block, dst, src, shift, width);  |
+|     dbfxu   dst,src,shift,width |     UML_DBFXU(block, dst, src, shift, width); |
+|     dbfxs   dst,src,shift,width |     UML_DBFXS(block, dst, src, shift, width); |
++---------------------------------+-----------------------------------------------+
+
+Extracts and right-aligns a contiguous bit field from the value of
+``src``, specified by its least significant bit position and width in
+bits.  The field must be narrower than the ``src`` operand, but it may
+wrap around from the most significant bit position to the least
+significant bit position.  BFXU and DBFXU zero-extend an unsigned field,
+while BFXS and DBFXS sign-extend a signed field.
+
+Back-ends may be able to optimise some forms of this instruction for
+example when the ``shift`` and ``width`` operands are both immediate
+values.
+
+Operands
+^^^^^^^^
+
+dst (32-bit or 64-bit – memory, integer register)
+    The destination where the extracted field will be stored.
+src (32-bit or 64-bit – memory, integer register, immediate, map variable)
+    The value to extract a contiguous bit field from.
+shift (32-bit or 64-bit – memory, integer register, immediate, map variable)
+    The position of the least significant bit of the field to extract,
+    where zero is the least significant bit position, and bit numbers
+    increase toward the most significant bit position.  Only the least
+    significant five bits or six bits of this operand are used,
+    depending on the instruction size.
+width (32-bit or 64-bit – memory, integer register, immediate, map variable)
+    The width of the field to extract in bits.  Only the least
+    significant five bits or six bits of this operand are used,
+    depending on the instruction size.  The result is undefined if the
+    width modulo the instruction size in bits is zero.
+
+Flags
+^^^^^
+
+carry (C)
+    Undefined.
+overflow (V)
+    Undefined.
+zero (Z)
+    Set if the result is zero, or cleared otherwise.
+sign (S)
+    Set to the value of the most significant bit of the result (set if
+    the result is a negative signed integer value, or cleared
+    otherwise).
+unordered (U)
+    Undefined.
+
+Simplification rules
+^^^^^^^^^^^^^^^^^^^^
+
+* Converted to :ref:`MOV <umlinst-mov>`, :ref:`AND <umlinst-and>` or
+  :ref:`OR <umlinst-or>` if the ``src``, ``shift`` and ``width``
+  operands are all immediate values, or if the ``width`` operand is the
+  immediate value zero.
+* Converted to :ref:`SHR <umlinst-shr>` or :ref:`SAR <umlinst-sar>` if
+  the ``src`` operand is not an immediate value, the ``shift`` and
+  ``width`` operands are both immediate values, and the sum of the value
+  of the ``shift`` operand and the value of the ``width`` operand is
+  equal to the instruction size in bits.
+* BFXU and DBFXU are converted to :ref:`AND <umlinst-and>` if the
+  ``shift`` operand is the immediate value zero and ``width`` operand is
+  an immediate value.
+* BFXS and DBFXS are converted to :ref:`SEXT <umlinst-sext>` if the
+  ``shift`` operand is the immediate value zero and ``width`` operand is
+  the immediate value 8, 16 or 32.
+* Immediate values for the ``src`` operand are truncated to the
+  instruction size.
+* Immediate values for the ``shift`` and ``width`` operands are
+  truncated to five or six bits for 32-bit or 64-bit operands,
+  respectively.
+
 .. _umlinst-roland:
 
 ROLAND
@@ -3572,10 +3660,10 @@ Simplification rules
   immediate value and the ``mask`` operand is an immediate value
   containing a single contiguous left-aligned sequence of set bits of
   the appropriate length for the value of the ``count`` operand.
-* Converted to :ref:`SHR <umlinst-shr>` if the ``count`` operand is an
-  immediate value and the ``mask`` operand is an immediate value
-  containing a single contiguous right-aligned sequence of set bits of
-  the appropriate length for the value of the ``count`` operand.
+* Converted to :ref:`SHR <umlinst-shr>` or :ref:`BFX <umlinst-bfx>` if
+  the ``count`` operand is an immediate value and the ``mask`` operand
+  is an immediate value containing a single contiguous right-aligned
+  sequence of set bits.
 * Immediate values for the ``src`` and ``mask`` operands are truncated
   to the instruction size.
 * Immediate values for the ``count`` operand are truncated to five or
diff --git a/src/devices/cpu/drcbearm64.cpp b/src/devices/cpu/drcbearm64.cpp
index 39d32889dbb89..7795a73a12cb8 100644
--- a/src/devices/cpu/drcbearm64.cpp
+++ b/src/devices/cpu/drcbearm64.cpp
@@ -553,6 +553,8 @@ class drcbe_arm64 : public drcbe_interface
 	void op_set(a64::Assembler &a, const uml::instruction &inst);
 	void op_mov(a64::Assembler &a, const uml::instruction &inst);
 	void op_sext(a64::Assembler &a, const uml::instruction &inst);
+	void op_bfxu(a64::Assembler &a, const uml::instruction &inst);
+	void op_bfxs(a64::Assembler &a, const uml::instruction &inst);
 	void op_roland(a64::Assembler &a, const uml::instruction &inst);
 	void op_rolins(a64::Assembler &a, const uml::instruction &inst);
 	template <bool CarryIn> void op_add(a64::Assembler &a, const uml::instruction &inst);
@@ -710,8 +712,10 @@ inline void drcbe_arm64::generate_one(a64::Assembler &a, const uml::instruction
 	case uml::OP_SET:     op_set(a, inst);                      break; // SET     dst,c
 	case uml::OP_MOV:     op_mov(a, inst);                      break; // MOV     dst,src[,c]
 	case uml::OP_SEXT:    op_sext(a, inst);                     break; // SEXT    dst,src
-	case uml::OP_ROLAND:  op_roland(a, inst);                   break; // ROLAND  dst,src1,src2,src3
-	case uml::OP_ROLINS:  op_rolins(a, inst);                   break; // ROLINS  dst,src1,src2,src3
+	case uml::OP_BFXU:    op_bfxu(a, inst);                     break; // BFXU    dst,src,shift,width
+	case uml::OP_BFXS:    op_bfxs(a, inst);                     break; // BFXS    dst,src,shift,width
+	case uml::OP_ROLAND:  op_roland(a, inst);                   break; // ROLAND  dst,src,count,mask
+	case uml::OP_ROLINS:  op_rolins(a, inst);                   break; // ROLINS  dst,src,count,mask
 	case uml::OP_ADD:     op_add<false>(a, inst);               break; // ADD     dst,src1,src2[,f]
 	case uml::OP_ADDC:    op_add<true>(a, inst);                break; // ADDC    dst,src1,src2[,f]
 	case uml::OP_SUB:     op_sub<false>(a, inst);               break; // SUB     dst,src1,src2[,f]
@@ -3223,6 +3227,173 @@ void drcbe_arm64::op_sext(a64::Assembler &a, const uml::instruction &inst)
 	}
 }
 
+void drcbe_arm64::op_bfxu(a64::Assembler &a, const uml::instruction &inst)
+{
+	assert(inst.size() == 4 || inst.size() == 8);
+	assert_no_condition(inst);
+	assert_flags(inst, FLAG_S | FLAG_Z);
+
+	be_parameter dstp(*this, inst.param(0), PTYPE_MR);
+	be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
+	be_parameter shiftp(*this, inst.param(2), PTYPE_MRI);
+	be_parameter widthp(*this, inst.param(3), PTYPE_MRI);
+
+	const a64::Gp output = dstp.select_register(TEMP_REG1, inst.size());
+	const a64::Gp src = srcp.select_register(TEMP_REG2, inst.size());
+	const a64::Inst::Id maskop = inst.flags() ? a64::Inst::kIdAnds : a64::Inst::kIdAnd;
+	const uint64_t instbits = inst.size() * 8;
+
+	if (widthp.is_immediate_value(0))
+	{
+		// undefined behaviour - do something
+		const a64::Gp zero = select_register(a64::xzr, inst.size());
+
+		if (inst.flags())
+			a.ands(output, zero, zero);
+		else
+			a.mov(output, zero);
+	}
+	else if (widthp.is_immediate())
+	{
+		const auto width(widthp.immediate() & (instbits - 1));
+		const auto mask(util::make_bitmask<uint64_t>(width));
+
+		mov_reg_param(a, inst.size(), src, srcp);
+
+		if (shiftp.is_immediate())
+		{
+			const auto shift(shiftp.immediate() & (instbits - 1));
+
+			if ((shift + width) <= instbits)
+			{
+				// contiguous bit field
+				a.ubfx(output, src, shift, width);
+				if (inst.flags())
+					a.tst(output, output);
+			}
+			else
+			{
+				// bit field wraps from LSB to MSB
+				a.ror(output, src, shift);
+				a.emit(maskop, output, output, mask);
+			}
+		}
+		else
+		{
+			const a64::Gp shift = shiftp.select_register(TEMP_REG3, inst.size());
+
+			mov_reg_param(a, inst.size(), shift, shiftp);
+
+			a.ror(output, src, shift);
+			a.emit(maskop, output, output, mask);
+		}
+	}
+	else
+	{
+		const a64::Gp width = (widthp != dstp) ? widthp.select_register(TEMP_REG3, inst.size()) : select_register(TEMP_REG3, inst.size());
+		const a64::Gp temp = select_register(FUNC_SCRATCH_REG, inst.size());
+
+		mov_reg_param(a, inst.size(), width, widthp);
+		if (!shiftp.is_immediate())
+			mov_reg_param(a, inst.size(), temp, shiftp);
+		mov_reg_param(a, inst.size(), src, srcp);
+
+		if (shiftp.is_immediate())
+			a.add(temp, width, shiftp.immediate() & (instbits - 1));
+		else
+			a.add(temp, width, temp);
+		a.ror(output, src, temp);
+		a.neg(temp, width);
+		a.lsr(output, output, temp);
+		if (inst.flags())
+			a.tst(output, output);
+	}
+
+	mov_param_reg(a, inst.size(), dstp, output);
+}
+
+void drcbe_arm64::op_bfxs(a64::Assembler &a, const uml::instruction &inst)
+{
+	assert(inst.size() == 4 || inst.size() == 8);
+	assert_no_condition(inst);
+	assert_flags(inst, FLAG_S | FLAG_Z);
+
+	be_parameter dstp(*this, inst.param(0), PTYPE_MR);
+	be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
+	be_parameter shiftp(*this, inst.param(2), PTYPE_MRI);
+	be_parameter widthp(*this, inst.param(3), PTYPE_MRI);
+
+	const a64::Gp output = dstp.select_register(TEMP_REG1, inst.size());
+	const a64::Gp src = srcp.select_register(TEMP_REG2, inst.size());
+	const uint64_t instbits = inst.size() * 8;
+
+	if (widthp.is_immediate_value(0))
+	{
+		// undefined behaviour - do something
+		const a64::Gp zero = select_register(a64::xzr, inst.size());
+
+		if (inst.flags())
+			a.ands(output, zero, zero);
+		else
+			a.mov(output, zero);
+	}
+	else if (widthp.is_immediate())
+	{
+		const auto width(widthp.immediate() & (instbits - 1));
+
+		mov_reg_param(a, inst.size(), src, srcp);
+
+		if (shiftp.is_immediate())
+		{
+			const auto shift(shiftp.immediate() & (instbits - 1));
+
+			if ((shift + width) <= instbits)
+			{
+				// contiguous bit field
+				a.sbfx(output, src, shift, width);
+			}
+			else
+			{
+				// bit field wraps from LSB to MSB
+				a.ror(output, src, shift);
+				a.sbfx(output, output, 0, width);
+			}
+		}
+		else
+		{
+			const a64::Gp shift = shiftp.select_register(TEMP_REG3, inst.size());
+
+			mov_reg_param(a, inst.size(), shift, shiftp);
+
+			a.ror(output, src, shift);
+			a.sbfx(output, output, 0, width);
+		}
+	}
+	else
+	{
+		const a64::Gp width = (widthp != dstp) ? widthp.select_register(TEMP_REG3, inst.size()) : select_register(TEMP_REG3, inst.size());
+		const a64::Gp temp = select_register(FUNC_SCRATCH_REG, inst.size());
+
+		mov_reg_param(a, inst.size(), src, srcp);
+		if (!shiftp.is_immediate())
+			mov_reg_param(a, inst.size(), temp, shiftp);
+		mov_reg_param(a, inst.size(), width, widthp);
+
+		if (shiftp.is_immediate())
+			a.add(temp, width, shiftp.immediate() & (instbits - 1));
+		else
+			a.add(temp, width, temp);
+		a.ror(output, src, temp);
+		a.neg(temp, width);
+		a.asr(output, output, temp);
+	}
+
+	mov_param_reg(a, inst.size(), dstp, output);
+
+	if (inst.flags())
+		a.tst(output, output);
+}
+
 void drcbe_arm64::op_roland(a64::Assembler &a, const uml::instruction &inst)
 {
 	assert(inst.size() == 4 || inst.size() == 8);
@@ -3246,11 +3417,10 @@ void drcbe_arm64::op_roland(a64::Assembler &a, const uml::instruction &inst)
 		const auto pop = population_count_64(maskp.immediate());
 		const auto lz = count_leading_zeros_64(maskp.immediate()) & (instbits - 1);
 		const auto invlamask = ~(maskp.immediate() << lz) & instmask;
-		const bool is_right_aligned = (maskp.immediate() & (maskp.immediate() + 1)) == 0;
 		const bool is_contiguous = (invlamask & (invlamask + 1)) == 0;
 		const auto s = shiftp.immediate() & (instbits - 1);
 
-		if (is_right_aligned || is_contiguous)
+		if (is_contiguous)
 		{
 			mov_reg_param(a, inst.size(), src, srcp);
 			optimized = true;
@@ -3260,25 +3430,6 @@ void drcbe_arm64::op_roland(a64::Assembler &a, const uml::instruction &inst)
 		{
 			a.mov(output, select_register(a64::xzr, inst.size()));
 		}
-		else if (is_right_aligned)
-		{
-			// Optimize a contiguous right-aligned mask
-			const auto s2 = -int(s) & (instbits - 1);
-
-			if (s >= pop)
-			{
-				a.ubfx(output, src, s2, pop);
-			}
-			else if (s2 > 0)
-			{
-				a.ror(output, src, s2);
-				a.bfc(output, pop, instbits - pop);
-			}
-			else
-			{
-				a.and_(output, src, ~maskp.immediate() & instmask);
-			}
-		}
 		else if (is_contiguous)
 		{
 			// Optimize a contiguous mask
diff --git a/src/devices/cpu/drcbec.cpp b/src/devices/cpu/drcbec.cpp
index fb409593bb5fe..51e3caf656a0b 100644
--- a/src/devices/cpu/drcbec.cpp
+++ b/src/devices/cpu/drcbec.cpp
@@ -1116,21 +1116,41 @@ int drcbe_c::execute(code_handle &entry)
 				break;
 
 			case MAKE_OPCODE_SHORT(OP_SEXT1, 4, 0):     // SEXT1   dst,src
-				PARAM0 = (int8_t)PARAM1;
+				PARAM0 = int8_t(uint8_t(PARAM1));
 				break;
 
 			case MAKE_OPCODE_SHORT(OP_SEXT1, 4, 1):
-				temp32 = (int8_t)PARAM1;
+				temp32 = int8_t(uint8_t(PARAM1));
 				flags = FLAGS32_NZ(temp32);
 				PARAM0 = temp32;
 				break;
 
 			case MAKE_OPCODE_SHORT(OP_SEXT2, 4, 0):     // SEXT2   dst,src
-				PARAM0 = (int16_t)PARAM1;
+				PARAM0 = int16_t(uint16_t(PARAM1));
 				break;
 
 			case MAKE_OPCODE_SHORT(OP_SEXT2, 4, 1):
-				temp32 = (int16_t)PARAM1;
+				temp32 = int16_t(uint16_t(PARAM1));
+				flags = FLAGS32_NZ(temp32);
+				PARAM0 = temp32;
+				break;
+
+			case MAKE_OPCODE_SHORT(OP_BFXU, 4, 0):      // BFXU    dst,src,shift,width[,f]
+				PARAM0 = rotr_32(PARAM1, PARAM2 + PARAM3) >> (-int32_t(PARAM3) & 0x1f);
+				break;
+
+			case MAKE_OPCODE_SHORT(OP_BFXU, 4, 1):
+				temp32 = rotr_32(PARAM1, PARAM2 + PARAM3) >> (-int32_t(PARAM3) & 0x1f);
+				flags = FLAGS32_NZ(temp32);
+				PARAM0 = temp32;
+				break;
+
+			case MAKE_OPCODE_SHORT(OP_BFXS, 4, 0):      // BFXS    dst,src,shift,width[,f]
+				PARAM0 = uint32_t(int32_t(rotr_32(PARAM1, PARAM2 + PARAM3)) >> (-int32_t(PARAM3) & 0x1f));
+				break;
+
+			case MAKE_OPCODE_SHORT(OP_BFXS, 4, 1):
+				temp32 = uint32_t(int32_t(rotr_32(PARAM1, PARAM2 + PARAM3)) >> (-int32_t(PARAM3) & 0x1f));
 				flags = FLAGS32_NZ(temp32);
 				PARAM0 = temp32;
 				break;
@@ -1767,31 +1787,51 @@ int drcbe_c::execute(code_handle &entry)
 				break;
 
 			case MAKE_OPCODE_SHORT(OP_SEXT1, 8, 0):     // DSEXT   dst,src,BYTE
-				DPARAM0 = (int8_t)PARAM1;
+				DPARAM0 = int8_t(uint8_t(PARAM1));
 				break;
 
 			case MAKE_OPCODE_SHORT(OP_SEXT1, 8, 1):
-				temp64 = (int8_t)PARAM1;
+				temp64 = int8_t(uint8_t(PARAM1));
 				flags = FLAGS64_NZ(temp64);
 				DPARAM0 = temp64;
 				break;
 
 			case MAKE_OPCODE_SHORT(OP_SEXT2, 8, 0):     // DSEXT   dst,src,WORD
-				DPARAM0 = (int16_t)PARAM1;
+				DPARAM0 = int16_t(uint16_t(PARAM1));
 				break;
 
 			case MAKE_OPCODE_SHORT(OP_SEXT2, 8, 1):
-				temp64 = (int16_t)PARAM1;
+				temp64 = int16_t(uint16_t(PARAM1));
 				flags = FLAGS64_NZ(temp64);
 				DPARAM0 = temp64;
 				break;
 
 			case MAKE_OPCODE_SHORT(OP_SEXT4, 8, 0):     // DSEXT   dst,src,DWORD
-				DPARAM0 = (int32_t)PARAM1;
+				DPARAM0 = int32_t(uint32_t(PARAM1));
 				break;
 
 			case MAKE_OPCODE_SHORT(OP_SEXT4, 8, 1):
-				temp64 = (int32_t)PARAM1;
+				temp64 = int32_t(uint32_t(PARAM1));
+				flags = FLAGS64_NZ(temp64);
+				DPARAM0 = temp64;
+				break;
+
+			case MAKE_OPCODE_SHORT(OP_BFXU, 8, 0):      // BFXU    dst,src,shift,width[,f]
+				DPARAM0 = rotr_64(DPARAM1, DPARAM2 + DPARAM3) >> (-int64_t(DPARAM3) & 0x3f);
+				break;
+
+			case MAKE_OPCODE_SHORT(OP_BFXU, 8, 1):
+				temp64 = rotr_64(DPARAM1, DPARAM2 + DPARAM3) >> (-int64_t(DPARAM3) & 0x3f);
+				flags = FLAGS64_NZ(temp64);
+				DPARAM0 = temp64;
+				break;
+
+			case MAKE_OPCODE_SHORT(OP_BFXS, 8, 0):      // BFXS    dst,src,shift,width[,f]
+				DPARAM0 = uint64_t(int64_t(rotr_64(DPARAM1, DPARAM2 + DPARAM3)) >> (-int64_t(DPARAM3) & 0x3f));
+				break;
+
+			case MAKE_OPCODE_SHORT(OP_BFXS, 8, 1):
+				temp64 = uint64_t(int64_t(rotr_64(DPARAM1, DPARAM2 + DPARAM3)) >> (-int64_t(DPARAM3) & 0x3f));
 				flags = FLAGS64_NZ(temp64);
 				DPARAM0 = temp64;
 				break;
diff --git a/src/devices/cpu/drcbex64.cpp b/src/devices/cpu/drcbex64.cpp
index d14de0a0c80af..b916919aa918b 100644
--- a/src/devices/cpu/drcbex64.cpp
+++ b/src/devices/cpu/drcbex64.cpp
@@ -244,15 +244,15 @@ using namespace asmjit::x86;
 //  CONSTANTS
 //**************************************************************************
 
-const uint32_t PTYPE_M    = 1 << parameter::PTYPE_MEMORY;
-const uint32_t PTYPE_I    = 1 << parameter::PTYPE_IMMEDIATE;
-const uint32_t PTYPE_R    = 1 << parameter::PTYPE_INT_REGISTER;
-const uint32_t PTYPE_F    = 1 << parameter::PTYPE_FLOAT_REGISTER;
-//const uint32_t PTYPE_MI   = PTYPE_M | PTYPE_I;
-//const uint32_t PTYPE_RI   = PTYPE_R | PTYPE_I;
-const uint32_t PTYPE_MR   = PTYPE_M | PTYPE_R;
-const uint32_t PTYPE_MRI  = PTYPE_M | PTYPE_R | PTYPE_I;
-const uint32_t PTYPE_MF   = PTYPE_M | PTYPE_F;
+const u32 PTYPE_M    = 1 << parameter::PTYPE_MEMORY;
+const u32 PTYPE_I    = 1 << parameter::PTYPE_IMMEDIATE;
+const u32 PTYPE_R    = 1 << parameter::PTYPE_INT_REGISTER;
+const u32 PTYPE_F    = 1 << parameter::PTYPE_FLOAT_REGISTER;
+//const u32 PTYPE_MI   = PTYPE_M | PTYPE_I;
+//const u32 PTYPE_RI   = PTYPE_R | PTYPE_I;
+const u32 PTYPE_MR   = PTYPE_M | PTYPE_R;
+const u32 PTYPE_MRI  = PTYPE_M | PTYPE_R | PTYPE_I;
+const u32 PTYPE_MF   = PTYPE_M | PTYPE_F;
 
 #ifdef _WIN32
 
@@ -283,7 +283,7 @@ const Gp::Id int_register_map[REG_I_COUNT] =
 #endif
 };
 
-uint32_t float_register_map[REG_F_COUNT] =
+u32 float_register_map[REG_F_COUNT] =
 {
 #ifdef _WIN32
 	6, 7, 8, 9, 10, 11, 12, 13, 14, 15
@@ -313,7 +313,7 @@ const CondCode condition_map[uml::COND_MAX - uml::COND_Z] =
 
 #if 0
 // rounding mode mapping table
-const uint8_t fprnd_map[4] =
+const u8 fprnd_map[4] =
 {
 	FPRND_CHOP,     // ROUND_TRUNC,   truncate
 	FPRND_NEAR,     // ROUND_ROUND,   round
@@ -323,7 +323,7 @@ const uint8_t fprnd_map[4] =
 #endif
 
 // size-to-mask table
-//const uint64_t size_to_mask[] = { 0, 0xff, 0xffff, 0, 0xffffffff, 0, 0, 0, 0xffffffffffffffffU };
+//const u64 size_to_mask[] = { 0, 0xff, 0xffff, 0, 0xffffffff, 0, 0, 0, 0xffffffffffffffffU };
 
 
 
@@ -369,18 +369,18 @@ inline bool is_nonvolatile_register(Gp reg)
 
 class drcbe_x64 : public drcbe_interface
 {
-	using x86_entry_point_func = uint32_t (*)(uint8_t *rbpvalue, x86code *entry);
+	using x86_entry_point_func = u32 (*)(u8 *rbpvalue, x86code *entry);
 
 public:
 	// construction/destruction
-	drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, uint32_t flags, int modes, int addrbits, int ignorebits);
+	drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, u32 flags, int modes, int addrbits, int ignorebits);
 	virtual ~drcbe_x64();
 
 	// required overrides
 	virtual void reset() override;
 	virtual int execute(uml::code_handle &entry) override;
-	virtual void generate(drcuml_block &block, const uml::instruction *instlist, uint32_t numinst) override;
-	virtual bool hash_exists(uint32_t mode, uint32_t pc) const noexcept override;
+	virtual void generate(drcuml_block &block, const uml::instruction *instlist, u32 numinst) override;
+	virtual bool hash_exists(u32 mode, u32 pc) const noexcept override;
 	virtual void get_info(drcbe_info &info) const noexcept override;
 	virtual bool logging() const noexcept override { return bool(m_log); }
 
@@ -401,12 +401,12 @@ class drcbe_x64 : public drcbe_interface
 		};
 
 		// represents the value of a parameter
-		typedef uint64_t be_parameter_value;
+		typedef u64 be_parameter_value;
 
 		// construction
 		be_parameter() : m_type(PTYPE_NONE), m_value(0), m_coldreg(false) { }
-		be_parameter(uint64_t val) : m_type(PTYPE_IMMEDIATE), m_value(val), m_coldreg(false) { }
-		be_parameter(drcbe_x64 &drcbe, const uml::parameter &param, uint32_t allowed);
+		be_parameter(u64 val) : m_type(PTYPE_IMMEDIATE), m_value(val), m_coldreg(false) { }
+		be_parameter(drcbe_x64 &drcbe, const uml::parameter &param, u32 allowed);
 		be_parameter(const be_parameter &param) = default;
 
 		// creators for types that don't safely default
@@ -421,9 +421,9 @@ class drcbe_x64 : public drcbe_interface
 
 		// getters
 		be_parameter_type type() const { return m_type; }
-		uint64_t immediate() const { assert(m_type == PTYPE_IMMEDIATE); return m_value; }
-		uint32_t ireg() const { assert(m_type == PTYPE_INT_REGISTER); assert(m_value < REG_MAX); return m_value; }
-		uint32_t freg() const { assert(m_type == PTYPE_FLOAT_REGISTER); assert(m_value < REG_MAX); return m_value; }
+		u64 immediate() const { assert(m_type == PTYPE_IMMEDIATE); return m_value; }
+		u32 ireg() const { assert(m_type == PTYPE_INT_REGISTER); assert(m_value < REG_MAX); return m_value; }
+		u32 freg() const { assert(m_type == PTYPE_FLOAT_REGISTER); assert(m_value < REG_MAX); return m_value; }
 		void *memory() const { assert(m_type == PTYPE_MEMORY); return reinterpret_cast<void *>(m_value); }
 
 		// type queries
@@ -433,7 +433,7 @@ class drcbe_x64 : public drcbe_interface
 		bool is_memory() const { return (m_type == PTYPE_MEMORY); }
 
 		// other queries
-		bool is_immediate_value(uint64_t value) const { return (m_type == PTYPE_IMMEDIATE && m_value == value); }
+		bool is_immediate_value(u64 value) const { return (m_type == PTYPE_IMMEDIATE && m_value == value); }
 		bool is_cold_register() const { return m_coldreg; }
 
 		// helpers
@@ -462,16 +462,16 @@ class drcbe_x64 : public drcbe_interface
 		x86code *           debug_log_hashjmp;      // hashjmp debugging
 		x86code *           debug_log_hashjmp_fail; // hashjmp debugging
 
-		uint32_t            ssemode;                // saved SSE mode
-		uint32_t            ssemodesave;            // temporary location for saving
-		uint32_t            ssecontrol[4];          // copy of the sse_control array
+		u32                 ssemode;                // saved SSE mode
+		u32                 ssemodesave;            // temporary location for saving
+		u32                 ssecontrol[4];          // copy of the sse_control array
 		float               single1;                // 1.0 in single-precision
 		double              double1;                // 1.0 in double-precision
 
 		void *              stacksave;              // saved stack pointer
 
-		uint8_t             flagsmap[0x100];        // flags map
-		uint16_t            flagsunmap[0x20];       // flags unmapper
+		u8                  flagsmap[0x100];        // x86 flags to UML flags table
+		u16                 flagsunmap[0x20];       // UML flags to x86 flags table
 	};
 
 	// resolved memory handler functions
@@ -486,12 +486,12 @@ class drcbe_x64 : public drcbe_interface
 	};
 
 	// helpers
-	Mem MABS(const void *ptr, const uint32_t size = 0) const { return Mem(rbp, offset_from_rbp(ptr), size); }
-	bool short_immediate(int64_t immediate) const { return (int32_t)immediate == immediate; }
+	Mem MABS(const void *ptr, const u32 size = 0) const { return Mem(rbp, offset_from_rbp(ptr), size); }
+	bool short_immediate(s64 immediate) const { return s32(immediate) == immediate; }
 	void normalize_commutative(be_parameter &inner, be_parameter &outer);
 	void normalize_commutative(const be_parameter &dst, be_parameter &inner, be_parameter &outer);
-	int32_t offset_from_rbp(const void *ptr) const;
-	Gp get_base_register_and_offset(Assembler &a, void *target, Gp const &reg, int32_t &offset);
+	s32 offset_from_rbp(const void *ptr) const;
+	Gp get_base_register_and_offset(Assembler &a, void *target, Gp const &reg, s32 &offset);
 	void smart_call_r64(Assembler &a, x86code *target, Gp const &reg) const;
 	void smart_call_m64(Assembler &a, x86code **target) const;
 	void emit_memaccess_setup(Assembler &a, const memory_accessors &accessors, const address_space::specific_access_info::side &side) const;
@@ -540,6 +540,8 @@ class drcbe_x64 : public drcbe_interface
 	void op_set(Assembler &a, const uml::instruction &inst);
 	void op_mov(Assembler &a, const uml::instruction &inst);
 	void op_sext(Assembler &a, const uml::instruction &inst);
+	void op_bfxu(Assembler &a, const uml::instruction &inst);
+	void op_bfxs(Assembler &a, const uml::instruction &inst);
 	void op_roland(Assembler &a, const uml::instruction &inst);
 	void op_rolins(Assembler &a, const uml::instruction &inst);
 	void op_add(Assembler &a, const uml::instruction &inst);
@@ -597,7 +599,7 @@ class drcbe_x64 : public drcbe_interface
 
 	// special-case move helpers
 	void movsx_r64_p32(Assembler &a, Gp const &reg, be_parameter const &param);
-	void mov_r64_imm(Assembler &a, Gp const &reg, uint64_t const imm) const;
+	void mov_r64_imm(Assembler &a, Gp const &reg, u64 const imm) const;
 
 	// floating-point helpers
 	void movss_r128_p32(Assembler &a, Vec const &reg, be_parameter const &param);
@@ -615,10 +617,12 @@ class drcbe_x64 : public drcbe_interface
 	drc_map_variables       m_map;                  // code map
 	x86log_context::ptr     m_log;                  // logging
 	FILE *                  m_log_asmjit;
+	bool                    m_lzcnt;                // do we have lzcnt support?
+	bool                    m_bmi;                  // do we have BMI support?
 
-	uint32_t *              m_absmask32;            // absolute value mask (32-bit)
-	uint64_t *              m_absmask64;            // absolute value mask (32-bit)
-	uint8_t *               m_rbpvalue;             // value of RBP
+	u32 *                   m_absmask32;            // absolute value mask (32-bit)
+	u64 *                   m_absmask64;            // absolute value mask (32-bit)
+	u8 *                    m_rbpvalue;             // value of RBP
 
 	x86_entry_point_func    m_entry;                // entry point
 	x86code *               m_exit;                 // exit point
@@ -683,6 +687,8 @@ inline void drcbe_x64::generate_one(Assembler &a, const uml::instruction &inst)
 	case uml::OP_SET:     op_set(a, inst);                  break; // SET     dst,c
 	case uml::OP_MOV:     op_mov(a, inst);                  break; // MOV     dst,src[,c]
 	case uml::OP_SEXT:    op_sext(a, inst);                 break; // SEXT    dst,src
+	case uml::OP_BFXU:    op_bfxu(a, inst);                 break; // BFXU    dst,src1,src2,src3
+	case uml::OP_BFXS:    op_bfxs(a, inst);                 break; // BFXS    dst,src1,src2,src3
 	case uml::OP_ROLAND:  op_roland(a, inst);               break; // ROLAND  dst,src1,src2,src3
 	case uml::OP_ROLINS:  op_rolins(a, inst);               break; // ROLINS  dst,src1,src2,src3
 	case uml::OP_ADD:     op_add(a, inst);                  break; // ADD     dst,src1,src2[,f]
@@ -748,7 +754,7 @@ inline void drcbe_x64::generate_one(Assembler &a, const uml::instruction &inst)
 //  into a reduced set
 //-------------------------------------------------
 
-drcbe_x64::be_parameter::be_parameter(drcbe_x64 &drcbe, const parameter &param, uint32_t allowed)
+drcbe_x64::be_parameter::be_parameter(drcbe_x64 &drcbe, const parameter &param, u32 allowed)
 {
 	int regnum;
 
@@ -879,12 +885,12 @@ inline void drcbe_x64::normalize_commutative(const be_parameter &dst, be_paramet
 //  from rbp
 //-------------------------------------------------
 
-inline int32_t drcbe_x64::offset_from_rbp(const void *ptr) const
+inline s32 drcbe_x64::offset_from_rbp(const void *ptr) const
 {
-	const int64_t delta = reinterpret_cast<const uint8_t *>(ptr) - m_rbpvalue;
-	if (int32_t(delta) != delta)
+	const s64 delta = reinterpret_cast<const u8 *>(ptr) - m_rbpvalue;
+	if (s32(delta) != delta)
 		throw emu_fatalerror("drcbe_x64::offset_from_rbp: delta out of range");
-	return int32_t(delta);
+	return s32(delta);
 }
 
 
@@ -894,9 +900,9 @@ inline int32_t drcbe_x64::offset_from_rbp(const void *ptr) const
 //  target address
 //-------------------------------------------------
 
-inline Gp drcbe_x64::get_base_register_and_offset(Assembler &a, void *target, Gp const &reg, int32_t &offset)
+inline Gp drcbe_x64::get_base_register_and_offset(Assembler &a, void *target, Gp const &reg, s32 &offset)
 {
-	const int64_t delta = reinterpret_cast<uint8_t *>(target) - m_rbpvalue;
+	const s64 delta = reinterpret_cast<u8 *>(target) - m_rbpvalue;
 	if (short_immediate(delta))
 	{
 		offset = delta;
@@ -918,7 +924,7 @@ inline Gp drcbe_x64::get_base_register_and_offset(Assembler &a, void *target, Gp
 
 inline void drcbe_x64::smart_call_r64(Assembler &a, x86code *target, Gp const &reg) const
 {
-	const int64_t delta = target - (x86code *)(a.code()->base_address() + a.offset() + 5);
+	const s64 delta = target - (x86code *)(a.code()->base_address() + a.offset() + 5);
 	if (short_immediate(delta))
 		a.call(imm(target));                                                            // call  target
 	else
@@ -936,7 +942,7 @@ inline void drcbe_x64::smart_call_r64(Assembler &a, x86code *target, Gp const &r
 
 inline void drcbe_x64::smart_call_m64(Assembler &a, x86code **target) const
 {
-	const int64_t delta = *target - (x86code *)(a.code()->base_address() + a.offset() + 5);
+	const s64 delta = *target - (x86code *)(a.code()->base_address() + a.offset() + 5);
 	if (short_immediate(delta))
 		a.call(imm(*target));                                                           // call  *target
 	else
@@ -1001,12 +1007,14 @@ void drcbe_x64::emit_memaccess_setup(Assembler &a, const memory_accessors &acces
 //  drcbe_x64 - constructor
 //-------------------------------------------------
 
-drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, uint32_t flags, int modes, int addrbits, int ignorebits)
+drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, u32 flags, int modes, int addrbits, int ignorebits)
 	: drcbe_interface(drcuml, cache, device)
 	, m_hash(cache, modes, addrbits, ignorebits)
 	, m_map(cache, 0xaaaaaaaa5555)
 	, m_log_asmjit(nullptr)
-	, m_absmask32((uint32_t *)cache.alloc_near(16*2 + 15))
+	, m_lzcnt(false)
+	, m_bmi(false)
+	, m_absmask32((u32 *)cache.alloc_near(16*2 + 15))
 	, m_absmask64(nullptr)
 	, m_rbpvalue(cache.near() + 0x80)
 	, m_entry(nullptr)
@@ -1015,8 +1023,13 @@ drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, u
 	, m_endofblock(nullptr)
 	, m_near(*(near_state *)cache.alloc_near(sizeof(m_near)))
 {
+	// check for optional CPU features
+	const auto &x86_features = CpuInfo::host().features().x86();
+	m_lzcnt = x86_features.has_lzcnt();
+	m_bmi = x86_features.has_bmi();
+
 	// build up necessary arrays
-	static const uint32_t sse_control[4] =
+	constexpr u32 sse_control[4] =
 	{
 		0xff80,     // ROUND_TRUNC
 		0x9f80,     // ROUND_ROUND
@@ -1024,13 +1037,13 @@ drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, u
 		0xbf80      // ROUND_FLOOR
 	};
 	memcpy(m_near.ssecontrol, sse_control, sizeof(m_near.ssecontrol));
-	m_near.single1 = 1.0f;
+	m_near.single1 = 1.0F;
 	m_near.double1 = 1.0;
 
 	// create absolute value masks that are aligned to SSE boundaries
-	m_absmask32 = (uint32_t *)(((uintptr_t)m_absmask32 + 15) & ~15);
+	m_absmask32 = (u32 *)(((uintptr_t)m_absmask32 + 15) & ~15);
 	m_absmask32[0] = m_absmask32[1] = m_absmask32[2] = m_absmask32[3] = 0x7fffffff;
-	m_absmask64 = (uint64_t *)&m_absmask32[4];
+	m_absmask64 = (u64 *)&m_absmask32[4];
 	m_absmask64[0] = m_absmask64[1] = 0x7fffffffffffffffU;
 
 	// get pointers to C functions we need to call
@@ -1043,7 +1056,7 @@ drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, u
 	// build the flags map
 	for (int entry = 0; entry < std::size(m_near.flagsmap); entry++)
 	{
-		uint8_t flags = 0;
+		u8 flags = 0;
 		if (entry & 0x001) flags |= FLAG_C;
 		if (entry & 0x004) flags |= FLAG_U;
 		if (entry & 0x040) flags |= FLAG_Z;
@@ -1053,7 +1066,7 @@ drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, u
 	}
 	for (int entry = 0; entry < std::size(m_near.flagsunmap); entry++)
 	{
-		uint16_t flags = 0;
+		u16 flags = 0;
 		if (entry & FLAG_C) flags |= 0x001 << 8;
 		if (entry & FLAG_U) flags |= 0x004 << 8;
 		if (entry & FLAG_Z) flags |= 0x040 << 8;
@@ -1127,12 +1140,12 @@ size_t drcbe_x64::emit(CodeHolder &ch)
 			throw emu_fatalerror("asmjit::CodeHolder::relocate_to_base() error %u", std::underlying_type_t<Error>(err));
 	}
 
-	size_t const alignment = ch.base_address() - uint64_t(m_cache.top());
+	size_t const alignment = ch.base_address() - uintptr_t(m_cache.top());
 	size_t const code_size = ch.code_size();
 
 	// test if enough room remains in drc cache
 	drccodeptr *cachetop = m_cache.begin_codegen(alignment + code_size);
-	if (cachetop == nullptr)
+	if (!cachetop)
 		return 0;
 
 	err = ch.copy_flattened_data(drccodeptr(ch.base_address()), code_size, CopySectionFlags::kPadTargetBuffer);
@@ -1160,7 +1173,7 @@ void drcbe_x64::reset()
 	x86code *dst = (x86code *)m_cache.top();
 
 	CodeHolder ch;
-	ch.init(Environment::host(), uint64_t(dst));
+	ch.init(Environment::host(), u64(dst));
 
 	FileLogger logger(m_log_asmjit);
 	if (logger.file())
@@ -1179,7 +1192,7 @@ void drcbe_x64::reset()
 	a.bind(a.new_named_label("entry_point"));
 
 	FuncDetail entry_point;
-	entry_point.init(FuncSignature::build<uint32_t, uint8_t *, x86code *>(CallConvId::kCDecl), Environment::host());
+	entry_point.init(FuncSignature::build<u32, u8 *, x86code *>(CallConvId::kCDecl), Environment::host());
 
 	FuncFrame frame;
 	frame.init(entry_point);
@@ -1251,7 +1264,7 @@ int drcbe_x64::execute(code_handle &entry)
 //  drcbex64_generate - generate code
 //-------------------------------------------------
 
-void drcbe_x64::generate(drcuml_block &block, const instruction *instlist, uint32_t numinst)
+void drcbe_x64::generate(drcuml_block &block, const instruction *instlist, u32 numinst)
 {
 	// do this here because device.debug() isn't initialised at construction time
 	if (!m_debug_cpu_instruction_hook && (m_device.machine().debug_flags & DEBUG_FLAG_ENABLED))
@@ -1282,7 +1295,7 @@ void drcbe_x64::generate(drcuml_block &block, const instruction *instlist, uint3
 	x86code *dst = (x86code *)(uintptr_t(m_cache.top() + linemask) & ~linemask);
 
 	CodeHolder ch;
-	ch.init(Environment::host(), uint64_t(dst));
+	ch.init(Environment::host(), u64(dst));
 	ThrowableErrorHandler e;
 	ch.set_error_handler(&e);
 
@@ -1323,7 +1336,7 @@ void drcbe_x64::generate(drcuml_block &block, const instruction *instlist, uint3
 			if (inst.opcode() == OP_HANDLE)
 				blockname = inst.param(0).handle().string();
 			else if (inst.opcode() == OP_HASH)
-				blockname = string_format("Code: mode=%d PC=%08X", (uint32_t)inst.param(0).immediate(), (offs_t)inst.param(1).immediate());
+				blockname = string_format("Code: mode=%d PC=%08X", (u32)inst.param(0).immediate(), (offs_t)inst.param(1).immediate());
 		}
 
 		// generate code
@@ -1358,7 +1371,7 @@ void drcbe_x64::generate(drcuml_block &block, const instruction *instlist, uint3
 //  exists in the hash table
 //-------------------------------------------------
 
-bool drcbe_x64::hash_exists(uint32_t mode, uint32_t pc) const noexcept
+bool drcbe_x64::hash_exists(u32 mode, u32 pc) const noexcept
 {
 	return m_hash.code_exists(mode, pc);
 }
@@ -1506,7 +1519,7 @@ void drcbe_x64::shift_op_param(Assembler &a, Inst::Id const opcode, size_t opsiz
 
 	if (param.is_immediate())
 	{
-		const uint32_t bitshift = param.immediate() & (opsize * 8 - 1);
+		const u32 bitshift = param.immediate() & (opsize * 8 - 1);
 
 		if (bitshift)
 			a.emit(opcode, dst, imm(param.immediate()));
@@ -1523,7 +1536,7 @@ void drcbe_x64::shift_op_param(Assembler &a, Inst::Id const opcode, size_t opsiz
 			if (!rotate || (update_flags & (FLAG_S | FLAG_Z)))
 			{
 				if (dst.is_mem())
-					a.test(dst.as<Mem>(), util::make_bitmask<uint64_t>(opsize * 8));
+					a.test(dst.as<Mem>(), util::make_bitmask<u64>(opsize * 8));
 				else
 					a.test(dst.as<Gp>(), dst.as<Gp>());
 			}
@@ -1553,7 +1566,7 @@ void drcbe_x64::shift_op_param(Assembler &a, Inst::Id const opcode, size_t opsiz
 			if (update_flags & (FLAG_S | FLAG_Z))
 			{
 				if (dst.is_mem())
-					a.test(dst.as<Mem>(), util::make_bitmask<uint64_t>(opsize * 8));
+					a.test(dst.as<Mem>(), util::make_bitmask<u64>(opsize * 8));
 				else
 					a.test(dst.as<Gp>(), dst.as<Gp>());
 			}
@@ -1583,7 +1596,7 @@ void drcbe_x64::shift_op_param(Assembler &a, Inst::Id const opcode, size_t opsiz
 				a.rcl(r10b, 1); // save carry
 
 			if (dst.is_mem())
-				a.test(dst.as<Mem>(), util::make_bitmask<uint64_t>(opsize * 8));
+				a.test(dst.as<Mem>(), util::make_bitmask<u64>(opsize * 8));
 			else
 				a.test(dst.as<Gp>(), dst.as<Gp>());
 
@@ -1670,10 +1683,10 @@ void drcbe_x64::movsx_r64_p32(Assembler &a, Gp const &reg, be_parameter const &p
 {
 	if (param.is_immediate())
 	{
-		if ((int32_t)param.immediate() >= 0)
+		if (s32(param.immediate()) >= 0)
 			a.mov(reg.r32(), param.immediate());                                        // mov   reg,param
 		else
-			mov_r64_imm(a, reg, int32_t(param.immediate()));                            // mov   reg,param
+			mov_r64_imm(a, reg, s32(param.immediate()));                                // mov   reg,param
 	}
 	else if (param.is_memory())
 		a.movsxd(reg, MABS(param.memory()));                                            // movsxd reg,[param]
@@ -1681,7 +1694,7 @@ void drcbe_x64::movsx_r64_p32(Assembler &a, Gp const &reg, be_parameter const &p
 		a.movsxd(reg, gpd(param.ireg()));                                               // movsxd reg,param
 }
 
-void drcbe_x64::mov_r64_imm(Assembler &a, Gp const &reg, uint64_t const imm) const
+void drcbe_x64::mov_r64_imm(Assembler &a, Gp const &reg, u64 const imm) const
 {
 	if (s32(u32(imm)) == s64(imm))
 	{
@@ -1693,7 +1706,7 @@ void drcbe_x64::mov_r64_imm(Assembler &a, Gp const &reg, uint64_t const imm) con
 	}
 	else
 	{
-		const int64_t delta = imm - (a.code()->base_address() + a.offset() + 7);
+		const s64 delta = imm - (a.code()->base_address() + a.offset() + 7);
 		if (short_immediate(delta))
 			a.lea(reg.r64(), ptr(rip, delta));
 		else
@@ -2036,8 +2049,8 @@ void drcbe_x64::op_hashjmp(Assembler &a, const instruction &inst)
 		if (pcp.is_immediate())
 		{
 			// a straight immediate jump is direct, though we need the PC in EAX in case of failure
-			uint32_t l1val = (pcp.immediate() >> m_hash.l1shift()) & m_hash.l1mask();
-			uint32_t l2val = (pcp.immediate() >> m_hash.l2shift()) & m_hash.l2mask();
+			u32 l1val = (pcp.immediate() >> m_hash.l1shift()) & m_hash.l1mask();
+			u32 l2val = (pcp.immediate() >> m_hash.l2shift()) & m_hash.l2mask();
 			a.short_().lea(gpq(REG_PARAM1), ptr(nocode));                               // lea   rcx,[rip+nocode]
 			a.jmp(MABS(&m_hash.base()[modep.immediate()][l1val][l2val]));               // jmp   hash[modep][l1val][l2val]
 		}
@@ -2064,8 +2077,8 @@ void drcbe_x64::op_hashjmp(Assembler &a, const instruction &inst)
 		if (pcp.is_immediate())
 		{
 			// fixed PC
-			uint32_t l1val = (pcp.immediate() >> m_hash.l1shift()) & m_hash.l1mask();
-			uint32_t l2val = (pcp.immediate() >> m_hash.l2shift()) & m_hash.l2mask();
+			u32 l1val = (pcp.immediate() >> m_hash.l1shift()) & m_hash.l1mask();
+			u32 l2val = (pcp.immediate() >> m_hash.l2shift()) & m_hash.l2mask();
 			a.mov(rdx, ptr(rcx, l1val * 8));                                            // mov   rdx,[rcx+l1val*8]
 			a.short_().lea(gpq(REG_PARAM1), ptr(nocode));                               // lea   rcx,[rip+nocode]
 			a.jmp(ptr(rdx, l2val * 8));                                                 // jmp   [l2val*8]
@@ -2391,7 +2404,7 @@ void drcbe_x64::op_getflgs(Assembler &a, const instruction &inst)
 	// pick a target register for the general case
 	Gp dstreg = dstp.select_register(edx);
 
-	uint32_t flagmask = 0;
+	u32 flagmask = 0;
 
 	switch (maskp.immediate())
 	{
@@ -2567,7 +2580,7 @@ void drcbe_x64::op_setflgs(Assembler &a, const instruction &inst)
 
 	if (srcp.is_immediate())
 	{
-		uint32_t const flags = m_near.flagsunmap[srcp.immediate() & FLAGS_ALL];
+		u32 const flags = m_near.flagsunmap[srcp.immediate() & FLAGS_ALL];
 		if (!flags)
 			a.xor_(eax, eax);
 		else
@@ -2749,7 +2762,7 @@ void drcbe_x64::op_load(Assembler &a, const instruction &inst)
 	int size = scalesizep.size();
 
 	// determine the pointer base
-	int32_t baseoffs;
+	s32 baseoffs;
 	const Gp basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs);
 
 	// pick a target register for the general case
@@ -2761,11 +2774,11 @@ void drcbe_x64::op_load(Assembler &a, const instruction &inst)
 		ptrdiff_t const offset = baseoffs + (ptrdiff_t(s32(u32(indp.immediate()))) << scalesizep.scale());
 
 		if (size == SIZE_BYTE)
-			a.movzx(dstreg, byte_ptr(basereg, offset));
+			a.movzx(dstreg.r32(), byte_ptr(basereg, offset));
 		else if (size == SIZE_WORD)
-			a.movzx(dstreg, word_ptr(basereg, offset));
+			a.movzx(dstreg.r32(), word_ptr(basereg, offset));
 		else if (size == SIZE_DWORD)
-			a.mov(dstreg, dword_ptr(basereg, offset));
+			a.mov(dstreg.r32(), dword_ptr(basereg, offset));
 		else if (size == SIZE_QWORD)
 			a.mov(dstreg, ptr(basereg, offset));
 	}
@@ -2775,11 +2788,11 @@ void drcbe_x64::op_load(Assembler &a, const instruction &inst)
 		const Gp indreg = rcx;
 		movsx_r64_p32(a, indreg, indp);
 		if (size == SIZE_BYTE)
-			a.movzx(dstreg, byte_ptr(basereg, indreg, scalesizep.scale(), baseoffs));
+			a.movzx(dstreg.r32(), byte_ptr(basereg, indreg, scalesizep.scale(), baseoffs));
 		else if (size == SIZE_WORD)
-			a.movzx(dstreg, word_ptr(basereg, indreg, scalesizep.scale(), baseoffs));
+			a.movzx(dstreg.r32(), word_ptr(basereg, indreg, scalesizep.scale(), baseoffs));
 		else if (size == SIZE_DWORD)
-			a.mov(dstreg, dword_ptr(basereg, indreg, scalesizep.scale(), baseoffs));
+			a.mov(dstreg.r32(), dword_ptr(basereg, indreg, scalesizep.scale(), baseoffs));
 		else if (size == SIZE_QWORD)
 			a.mov(dstreg, ptr(basereg, indreg, scalesizep.scale(), baseoffs));
 	}
@@ -2809,7 +2822,7 @@ void drcbe_x64::op_loads(Assembler &a, const instruction &inst)
 	int size = scalesizep.size();
 
 	// determine the pointer base
-	int32_t baseoffs;
+	s32 baseoffs;
 	const Gp basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs);
 
 	// pick a target register for the general case
@@ -2872,7 +2885,7 @@ void drcbe_x64::op_store(Assembler &a, const instruction &inst)
 	int size = scalesizep.size();
 
 	// determine the pointer base
-	int32_t baseoffs;
+	s32 baseoffs;
 	const Gp basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs);
 
 	// pick a source register for the general case
@@ -2996,9 +3009,9 @@ void drcbe_x64::op_read(Assembler &a, const instruction &inst)
 	{
 		// set default mem_mask
 		if (accessors.specific.native_bytes <= 4)
-			a.mov(gpd(REG_PARAM3), make_bitmask<uint32_t>(accessors.specific.native_bytes << 3));
+			a.mov(gpd(REG_PARAM3), make_bitmask<u32>(accessors.specific.native_bytes << 3));
 		else
-			a.mov(gpq(REG_PARAM3), make_bitmask<uint64_t>(accessors.specific.native_bytes << 3));
+			a.mov(gpq(REG_PARAM3), make_bitmask<u64>(accessors.specific.native_bytes << 3));
 
 		emit_memaccess_setup(a, accessors, accessors.specific.read);                                // get dispatch table entry
 	}
@@ -3010,9 +3023,9 @@ void drcbe_x64::op_read(Assembler &a, const instruction &inst)
 			a.mov(ptr(rsp, 32), gpq(int_register_map[0]));                                       // save I0 register
 
 		if ((accessors.specific.native_bytes <= 4) || (spacesizep.size() != SIZE_QWORD))
-			a.mov(gpd(REG_PARAM3), imm(make_bitmask<uint32_t>(8 << spacesizep.size())));         // set default mem_mask
+			a.mov(gpd(REG_PARAM3), imm(make_bitmask<u32>(8 << spacesizep.size())));              // set default mem_mask
 		else
-			a.mov(gpq(REG_PARAM3), imm(make_bitmask<uint64_t>(8 << spacesizep.size())));         // set default mem_mask
+			a.mov(gpq(REG_PARAM3), imm(make_bitmask<u64>(8 << spacesizep.size())));              // set default mem_mask
 
 		a.mov(ecx, gpd(REG_PARAM2));                                                             // copy address for bit offset
 		if (accessors.has_high_bits && !accessors.mask_high_bits)
@@ -3327,9 +3340,9 @@ void drcbe_x64::op_write(Assembler &a, const instruction &inst)
 	{
 		// set default mem_mask
 		if (accessors.specific.native_bytes <= 4)
-			a.mov(gpd(REG_PARAM4), make_bitmask<uint32_t>(accessors.specific.native_bytes << 3));
+			a.mov(gpd(REG_PARAM4), make_bitmask<u32>(accessors.specific.native_bytes << 3));
 		else
-			a.mov(gpq(REG_PARAM4), make_bitmask<uint64_t>(accessors.specific.native_bytes << 3));
+			a.mov(gpq(REG_PARAM4), make_bitmask<u64>(accessors.specific.native_bytes << 3));
 
 		emit_memaccess_setup(a, accessors, accessors.specific.write);
 	}
@@ -3373,9 +3386,9 @@ void drcbe_x64::op_write(Assembler &a, const instruction &inst)
 		}
 		a.and_(ecx, imm((accessors.specific.native_bytes - (1 << spacesizep.size())) << 3));     // mask bit address
 		if ((accessors.specific.native_bytes <= 4) || (spacesizep.size() != SIZE_QWORD))
-			a.mov(r11d, imm(make_bitmask<uint32_t>(8 << spacesizep.size())));                    // set default mem_mask
+			a.mov(r11d, imm(make_bitmask<u32>(8 << spacesizep.size())));                         // set default mem_mask
 		else
-			a.mov(r11, imm(make_bitmask<uint64_t>(8 << spacesizep.size())));                     // set default mem_mask
+			a.mov(r11, imm(make_bitmask<u64>(8 << spacesizep.size())));                          // set default mem_mask
 		if (accessors.has_high_bits && accessors.mask_high_bits && accessors.specific.low_bits)
 			a.mov(gpd(REG_PARAM2), r10d);                                                        // restore masked address
 		if (accessors.specific.write.is_virtual)
@@ -3739,13 +3752,12 @@ void drcbe_x64::op_sext(Assembler &a, const instruction &inst)
 	const parameter &sizep = inst.param(2);
 	assert(sizep.is_size());
 
-	Gp dstreg = dstp.select_register(rax);
+	Gp const dstreg = dstp.select_register((inst.size() == 4) ? eax : rax);
+	Gp const srcreg = srcp.select_register(dstreg);
 
 	if (inst.size() == 4)
 	{
 		// 32-bit form
-		dstreg = dstreg.r32();
-
 		if (srcp.is_memory())
 		{
 			if (sizep.size() == SIZE_BYTE)
@@ -3757,7 +3769,6 @@ void drcbe_x64::op_sext(Assembler &a, const instruction &inst)
 		}
 		else
 		{
-			Gp const srcreg = srcp.select_register(dstreg);
 			mov_reg_param(a, srcreg, srcp);
 			if (sizep.size() == SIZE_BYTE)
 				a.movsx(dstreg, srcreg.r8());
@@ -3766,8 +3777,6 @@ void drcbe_x64::op_sext(Assembler &a, const instruction &inst)
 			else if (sizep.size() == SIZE_DWORD)
 				a.mov(dstreg, srcreg);
 		}
-
-		mov_param_reg(a, dstp, dstreg);
 	}
 	else if (inst.size() == 8)
 	{
@@ -3785,7 +3794,6 @@ void drcbe_x64::op_sext(Assembler &a, const instruction &inst)
 		}
 		else
 		{
-			Gp const srcreg = srcp.select_register(dstreg);
 			mov_reg_param(a, srcreg, srcp);
 			if (sizep.size() == SIZE_BYTE)
 				a.movsx(dstreg, srcreg.r8());
@@ -3796,15 +3804,283 @@ void drcbe_x64::op_sext(Assembler &a, const instruction &inst)
 			else if (sizep.size() == SIZE_QWORD)
 				a.mov(dstreg, srcreg);
 		}
-
-		mov_param_reg(a, dstp, dstreg);
 	}
 
+	mov_param_reg(a, dstp, dstreg);
+
 	if (inst.flags() != 0)
 		a.test(dstreg, dstreg);
 }
 
 
+//-------------------------------------------------
+//  op_bfxu - process a BFXU opcode
+//-------------------------------------------------
+
+void drcbe_x64::op_bfxu(Assembler &a, const instruction &inst)
+{
+	// validate instruction
+	assert(inst.size() == 4 || inst.size() == 8);
+	assert_no_condition(inst);
+	assert_flags(inst, FLAG_S | FLAG_Z);
+
+	// normalize parameters
+	be_parameter dstp(*this, inst.param(0), PTYPE_MR);
+	be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
+	be_parameter shiftp(*this, inst.param(2), PTYPE_MRI);
+	be_parameter widthp(*this, inst.param(3), PTYPE_MRI);
+	const unsigned bits = inst.size() * 8;
+
+	if (widthp.is_immediate_value(0))
+	{
+		// undefined behaviour - do something
+		if (inst.flags() || dstp.is_int_register())
+		{
+			Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax);
+
+			a.xor_(dstreg, dstreg);
+
+			mov_param_reg(a, dstp, dstreg);
+		}
+		else if (dstp.is_memory())
+		{
+			a.mov(MABS(dstp.memory(), inst.size()), 0);
+		}
+	}
+	else
+	{
+		Gp dstreg;
+		Gp tempreg = (inst.size() == 4) ? ecx : rcx;
+
+		if (widthp.is_immediate())
+		{
+			const unsigned width = widthp.immediate() & (bits - 1);
+
+			dstreg = dstp.select_register((inst.size() == 4) ? eax : rax);
+
+			if (m_bmi && shiftp.is_immediate() && ((width + (shiftp.immediate() & (bits - 1))) <= bits))
+			{
+				const unsigned shift = shiftp.immediate() & (bits - 1);
+
+				if (srcp.is_immediate())
+					mov_reg_param(a, dstreg, srcp);
+
+				a.mov(tempreg.r32(), shift | (width << 8));
+				if (srcp.is_immediate())
+					a.bextr(dstreg, dstreg, tempreg);
+				else if (srcp.is_int_register())
+					a.bextr(dstreg, srcp.select_register((inst.size() == 4) ? eax : rax), tempreg);
+				else if (srcp.is_memory())
+					a.bextr(dstreg, MABS(srcp.memory()), tempreg);
+
+				if (inst.flags() & FLAG_S)
+					a.test(dstreg, dstreg);
+			}
+			else
+			{
+				if (!shiftp.is_immediate())
+					mov_reg_param(a, tempreg.r32(), shiftp);
+				mov_reg_param(a, dstreg, srcp);
+
+				if (shiftp.is_immediate())
+				{
+					const unsigned shift = shiftp.immediate() & (bits - 1);
+
+					a.ror(dstreg, shift);
+				}
+				else
+				{
+					a.ror(dstreg, tempreg.r8());
+				}
+
+				if (!inst.flags() && (width == 8))
+				{
+					a.movzx(dstreg.r32(), dstreg.r8());
+				}
+				else if (!inst.flags() && (width == 16))
+				{
+					a.movzx(dstreg.r32(), dstreg.r16());
+				}
+				else if (!inst.flags() && (width == 32))
+				{
+					a.mov(dstreg.r32(), dstreg.r32());
+				}
+				else if (width <= 32)
+				{
+					if ((bits == 64) && (inst.flags() & FLAG_S) && (width == 32))
+					{
+						a.mov(dstreg.r32(), dstreg.r32());
+						a.test(dstreg, dstreg);
+					}
+					else
+					{
+						a.and_(dstreg.r32(), util::make_bitmask<u32>(width));
+					}
+				}
+				else
+				{
+					a.mov(tempreg, util::make_bitmask<u64>(width));
+					a.and_(dstreg, tempreg);
+				}
+			}
+		}
+		else
+		{
+			Gp widthreg = widthp.select_register((inst.size() == 4) ? edx : rdx);
+			dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, widthp);
+
+			if (!shiftp.is_immediate())
+				mov_reg_param(a, tempreg, shiftp);
+			mov_reg_param(a, widthreg, widthp);
+			mov_reg_param(a, dstreg, srcp);
+
+			if (shiftp.is_immediate())
+				a.mov(tempreg.r32(), shiftp.immediate() & (bits - 1));
+			a.add(tempreg.r32(), widthreg.r32());
+			a.ror(dstreg, tempreg.r8());
+			a.mov(tempreg.r32(), widthreg.r32());
+			a.neg(tempreg.r32());
+			a.and_(tempreg.r32(), bits - 1);
+			a.shr(dstreg, tempreg.r8());
+		}
+
+		mov_param_reg(a, dstp, dstreg);
+	}
+}
+
+
+//-------------------------------------------------
+//  op_bfxs - process a BFXS opcode
+//-------------------------------------------------
+
+void drcbe_x64::op_bfxs(Assembler &a, const instruction &inst)
+{
+	// validate instruction
+	assert(inst.size() == 4 || inst.size() == 8);
+	assert_no_condition(inst);
+	assert_flags(inst, FLAG_S | FLAG_Z);
+
+	// normalize parameters
+	be_parameter dstp(*this, inst.param(0), PTYPE_MR);
+	be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
+	be_parameter shiftp(*this, inst.param(2), PTYPE_MRI);
+	be_parameter widthp(*this, inst.param(3), PTYPE_MRI);
+	const unsigned bits = inst.size() * 8;
+
+	if (widthp.is_immediate_value(0))
+	{
+		// undefined behaviour - do something
+		if (inst.flags() || dstp.is_int_register())
+		{
+			Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax);
+
+			a.xor_(dstreg, dstreg);
+
+			mov_param_reg(a, dstp, dstreg);
+		}
+		else if (dstp.is_memory())
+		{
+			a.mov(MABS(dstp.memory(), inst.size()), 0);
+		}
+	}
+	else
+	{
+		Gp dstreg;
+		Gp tempreg = (inst.size() == 4) ? ecx : rcx;
+
+		if (widthp.is_immediate())
+		{
+			const unsigned width = widthp.immediate() & (bits - 1);
+			const bool use_movsx = (width == 8) || (width == 16) || (width == 32);
+
+			if (!shiftp.is_immediate() && use_movsx)
+			{
+				dstreg = dstp.select_register((inst.size() == 4) ? eax : rax);
+
+				mov_reg_param(a, tempreg.r32(), shiftp);
+			}
+			else
+			{
+				dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, shiftp);
+			}
+			mov_reg_param(a, dstreg, srcp);
+
+			if (shiftp.is_immediate())
+			{
+				const unsigned shift = shiftp.immediate() & (bits - 1);
+
+				if (use_movsx)
+					a.ror(dstreg, shift);
+				else
+					a.ror(dstreg, (width + shift) & (bits - 1));
+			}
+			else
+			{
+				if (!use_movsx)
+				{
+					a.mov(tempreg.r32(), width);
+					if (shiftp.is_int_register())
+						a.add(tempreg.r32(), shiftp.select_register(edx));
+					else
+						a.add(tempreg.r32(), MABS(shiftp.memory()));
+				}
+				a.ror(dstreg, tempreg.r8());
+			}
+
+			if (!use_movsx)
+			{
+				a.sar(dstreg, -int(width) & (bits - 1));
+			}
+			else
+			{
+				if (width == 8)
+					a.movsx(dstreg, dstreg.r8());
+				else if (width == 16)
+					a.movsx(dstreg, dstreg.r16());
+				else if (width == 32)
+					a.movsxd(dstreg, dstreg.r32());
+
+				if (inst.flags())
+					a.test(dstreg, dstreg);
+			}
+		}
+		else if (shiftp.is_immediate_value(0))
+		{
+			dstreg = dstp.select_register((inst.size() == 4) ? eax : rax);
+
+			mov_reg_param(a, tempreg, widthp);
+			mov_reg_param(a, dstreg, srcp);
+
+			a.ror(dstreg, tempreg.r8());
+			a.neg(tempreg);
+			a.and_(tempreg, bits - 1);
+			a.sar(dstreg, tempreg.r8());
+		}
+		else
+		{
+			Gp widthreg = widthp.select_register((inst.size() == 4) ? edx : rdx);
+			dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, widthp);
+
+			if (!shiftp.is_immediate())
+				mov_reg_param(a, tempreg, shiftp);
+			mov_reg_param(a, widthreg, widthp);
+			mov_reg_param(a, dstreg, srcp);
+
+			if (shiftp.is_immediate())
+				a.mov(tempreg.r32(), shiftp.immediate() & (bits - 1));
+			a.add(tempreg.r32(), widthreg.r32());
+			a.ror(dstreg, tempreg.r8());
+			a.mov(tempreg.r32(), widthreg.r32());
+			a.neg(tempreg.r32());
+			a.and_(tempreg.r32(), bits - 1);
+			a.sar(dstreg, tempreg.r8());
+		}
+
+		mov_param_reg(a, dstp, dstreg);
+	}
+}
+
+
 //-------------------------------------------------
 //  op_roland - process an ROLAND opcode
 //-------------------------------------------------
@@ -3829,17 +4105,17 @@ void drcbe_x64::op_roland(Assembler &a, const instruction &inst)
 	if (shiftp.is_immediate() && maskp.is_immediate())
 	{
 		const unsigned shift = shiftp.immediate() & (bits - 1);
-		const uint64_t sizemask = util::make_bitmask<uint64_t>(bits);
-		const uint64_t mask = maskp.immediate() & sizemask;
+		const u64 sizemask = util::make_bitmask<u64>(bits);
+		const u64 mask = maskp.immediate() & sizemask;
 		mov_reg_param(a, dstreg, srcp);
 		a.rol(dstreg, shift);
 		if (!inst.flags() && (mask == 0x000000ff))
 		{
-			a.movzx(dstreg, dstreg.r8());
+			a.movzx(dstreg.r32(), dstreg.r8());
 		}
 		else if (!inst.flags() && (mask == 0x0000ffff))
 		{
-			a.movzx(dstreg, dstreg.r16());
+			a.movzx(dstreg.r32(), dstreg.r16());
 		}
 		else if (!inst.flags() && (mask == 0xffffffff))
 		{
@@ -3849,10 +4125,10 @@ void drcbe_x64::op_roland(Assembler &a, const instruction &inst)
 		{
 			a.and_(dstreg, mask);
 		}
-		else if (uint32_t(mask) == mask)
+		else if (u32(mask) == mask)
 		{
 			a.and_(dstreg, mask); // asmjit converts this to a DWORD-size operation
-			if (inst.flags())
+			if (inst.flags() & FLAG_S)
 				a.test(dstreg, dstreg);
 		}
 		else
@@ -3904,7 +4180,7 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst)
 	Gp maskreg = (inst.size() == 4) ? edx : rdx;
 
 	const unsigned bits = inst.size() * 8;
-	const uint64_t sizemask = util::make_bitmask<uint64_t>(bits);
+	const u64 sizemask = util::make_bitmask<u64>(bits);
 
 	if (shiftp.is_immediate() && (srcp.is_immediate() || maskp.is_immediate()))
 	{
@@ -3913,12 +4189,12 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst)
 		{
 			// immediate source
 
-			uint64_t src = srcp.immediate() & sizemask;
+			u64 src = srcp.immediate() & sizemask;
 			src = ((src << shift) | (src >> (bits - shift))) & sizemask;
 
 			if (maskp.is_immediate())
 			{
-				const uint64_t mask = maskp.immediate() & sizemask;
+				const u64 mask = maskp.immediate() & sizemask;
 				src &= mask;
 
 				bool flags = false;
@@ -3940,7 +4216,7 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst)
 					a.and_(dstreg, ~mask);
 					flags = true;
 				}
-				else if (uint32_t(~mask) == ~mask)
+				else if (u32(~mask) == ~mask)
 				{
 					a.and_(dstreg, ~mask);
 				}
@@ -3999,7 +4275,7 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst)
 		else
 		{
 			// variables source, immediate mask
-			const uint64_t mask = maskp.immediate() & sizemask;
+			const u64 mask = maskp.immediate() & sizemask;
 
 			mov_reg_param(a, dstreg, dstp);
 
@@ -4030,12 +4306,12 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst)
 					a.and_(srcreg, mask);
 				}
 			}
-			else if (mask == (util::make_bitmask<uint64_t>(shift) & sizemask))
+			else if (mask == (util::make_bitmask<u64>(shift) & sizemask))
 			{
 				mov_reg_param(a, srcreg, srcp);
 				a.shr(srcreg, bits - shift);
 			}
-			else if (mask == (~util::make_bitmask<uint64_t>(shift) & sizemask))
+			else if (mask == (~util::make_bitmask<u64>(shift) & sizemask))
 			{
 				mov_reg_param(a, srcreg, srcp);
 				a.shl(srcreg, shift);
@@ -4056,7 +4332,7 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst)
 				{
 					a.mov(srcreg.r32(), srcreg.r32());
 				}
-				else if ((bits == 32) || (util::sext(mask, 32) == mask) || (uint32_t(mask) == mask))
+				else if ((bits == 32) || (util::sext(mask, 32) == mask) || (u32(mask) == mask))
 				{
 					a.and_(srcreg, mask);
 				}
@@ -4080,7 +4356,7 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst)
 			{
 				a.movzx(dstreg, dstreg.r8());
 			}
-			else if ((bits == 32) || (util::sext(~mask, 32) == ~mask) || (uint32_t(~mask) == ~mask))
+			else if ((bits == 32) || (util::sext(~mask, 32) == ~mask) || (u32(~mask) == ~mask))
 			{
 				a.and_(dstreg, ~mask & sizemask);
 			}
@@ -4103,15 +4379,15 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst)
 		// generic case
 
 		bool maskimm = maskp.is_immediate();
-		uint64_t mask = 0;
+		u64 mask = 0;
 		if (maskimm)
 		{
 			mask = maskp.immediate() & sizemask;
 			if (bits != 32)
 			{
 				maskimm =
-						((util::sext(mask, 32) == mask) && (uint32_t(~mask) == ~mask)) ||
-						((util::sext(~mask, 32) == ~mask) && (uint32_t(mask) == mask));
+						((util::sext(mask, 32) == mask) && (u32(~mask) == ~mask)) ||
+						((util::sext(~mask, 32) == ~mask) && (u32(mask) == mask));
 			}
 		}
 
@@ -4531,7 +4807,7 @@ void drcbe_x64::op_mulslw(Assembler &a, const instruction &inst)
 	if (use3op)
 	{
 		// use 3-operand form to multiply by immediate
-		const int64_t imm = (inst.size() == 4) ? s32(u32(src2p.immediate())) : src2p.immediate();
+		const s64 imm = (inst.size() == 4) ? s32(u32(src2p.immediate())) : src2p.immediate();
 		if (src1p.is_memory())
 		{
 			a.imul(dstreg, MABS(src1p.memory(), inst.size()), imm);
@@ -4976,41 +5252,40 @@ void drcbe_x64::op_lzcnt(Assembler &a, const instruction &inst)
 	// normalize parameters
 	be_parameter dstp(*this, inst.param(0), PTYPE_MR);
 	be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
+	const unsigned bits = inst.size() * 8;
 
-	if (inst.flags())
-	{
-		a.xor_(eax, eax); // reset status flags
-		a.test(eax, eax);
-	}
+	// pick a target register
+	Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax);
 
-	// 32-bit form
-	if (inst.size() == 4)
+	if (srcp.is_immediate())
+		mov_reg_param(a, dstreg, srcp);
+
+	if (m_lzcnt)
 	{
-		// pick a target register
-		Gp dstreg = dstp.select_register(eax);
+		if (srcp.is_immediate())
+			a.lzcnt(dstreg, dstreg);
+		else if (srcp.is_int_register())
+			a.lzcnt(dstreg, srcp.select_register((inst.size() == 4) ? eax : rax));
+		else if (srcp.is_memory())
+			a.lzcnt(dstreg, MABS(srcp.memory()));
 
-		mov_reg_param(a, dstreg, srcp);
-		a.mov(ecx, 32 ^ 31);
-		a.bsr(dstreg, dstreg);
-		a.cmovz(dstreg, ecx);
-		a.xor_(dstreg, 31);
 		mov_param_reg(a, dstp, dstreg);
 
-		if (inst.flags())
+		if (inst.flags() & FLAG_S)
 			a.test(dstreg, dstreg);
 	}
-
-	// 64-bit form
-	else if (inst.size() == 8)
+	else
 	{
-		// pick a target register
-		Gp dstreg = dstp.select_register(rax);
+		a.mov(ecx, bits ^ (bits - 1));
+		if (srcp.is_immediate())
+			a.bsr(dstreg, dstreg);
+		else if (srcp.is_int_register())
+			a.bsr(dstreg, srcp.select_register((inst.size() == 4) ? eax : rax));
+		else if (srcp.is_memory())
+			a.bsr(dstreg, MABS(srcp.memory()));
+		a.cmovz(dstreg, ecx);
+		a.xor_(dstreg.r32(), bits - 1);
 
-		mov_reg_param(a, dstreg, srcp);
-		a.mov(ecx, 64 ^ 63);
-		a.bsr(dstreg, dstreg);
-		a.cmovz(dstreg, rcx);
-		a.xor_(dstreg, 63);
 		mov_param_reg(a, dstp, dstreg);
 
 		if (inst.flags())
@@ -5156,7 +5431,7 @@ void drcbe_x64::op_fload(Assembler &a, const instruction &inst)
 	Vec const dstreg = dstp.select_register(REG_FSCRATCH1);
 
 	// determine the pointer base
-	int32_t baseoffs;
+	s32 baseoffs;
 	Gp const basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs);
 
 	if (indp.is_immediate())
@@ -5200,7 +5475,7 @@ void drcbe_x64::op_fstore(Assembler &a, const instruction &inst)
 	Vec const srcreg = srcp.select_register(REG_FSCRATCH1);
 
 	// determine the pointer base
-	int32_t baseoffs;
+	s32 baseoffs;
 	Gp const basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs);
 
 	// 32-bit form
@@ -6191,7 +6466,7 @@ std::unique_ptr<drcbe_interface> make_drcbe_x64(
 		drcuml_state &drcuml,
 		device_t &device,
 		drc_cache &cache,
-		uint32_t flags,
+		u32 flags,
 		int modes,
 		int addrbits,
 		int ignorebits)
diff --git a/src/devices/cpu/drcbex86.cpp b/src/devices/cpu/drcbex86.cpp
index 88b5382746768..0350baa231c2a 100644
--- a/src/devices/cpu/drcbex86.cpp
+++ b/src/devices/cpu/drcbex86.cpp
@@ -546,6 +546,8 @@ class drcbe_x86 : public drcbe_interface
 	void op_set(Assembler &a, const uml::instruction &inst);
 	void op_mov(Assembler &a, const uml::instruction &inst);
 	void op_sext(Assembler &a, const uml::instruction &inst);
+	void op_bfxu(Assembler &a, const uml::instruction &inst);
+	void op_bfxs(Assembler &a, const uml::instruction &inst);
 	void op_roland(Assembler &a, const uml::instruction &inst);
 	void op_rolins(Assembler &a, const uml::instruction &inst);
 	void op_add(Assembler &a, const uml::instruction &inst);
@@ -617,13 +619,13 @@ class drcbe_x86 : public drcbe_interface
 	void emit_or_m64_p64(Assembler &a, Mem const &memref_lo, Mem const &memref_hi, be_parameter const &param, const uml::instruction &inst);
 	void emit_xor_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const uml::instruction &inst);
 	void emit_xor_m64_p64(Assembler &a, Mem const &memref_lo, Mem const &memref_hi, be_parameter const &param, const uml::instruction &inst);
-	void emit_shl_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const uml::instruction &inst);
-	void emit_shr_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const uml::instruction &inst);
-	void emit_sar_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const uml::instruction &inst);
-	void emit_rol_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const uml::instruction &inst);
-	void emit_ror_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const uml::instruction &inst);
-	void emit_rcl_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const uml::instruction &inst);
-	void emit_rcr_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const uml::instruction &inst);
+	void emit_shl_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags);
+	void emit_shr_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags);
+	void emit_sar_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags);
+	void emit_rol_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags);
+	void emit_ror_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags);
+	void emit_rcl_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags);
+	void emit_rcr_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags);
 
 	void alu_op_param(Assembler &a, Inst::Id const opcode_lo, Inst::Id const opcode_hi, Gp const &lo, Gp const &hi, be_parameter const &param, bool const saveflags);
 	void alu_op_param(Assembler &a, Inst::Id const opcode_lo, Inst::Id const opcode_hi, Mem const &lo, Mem const &hi, be_parameter const &param, bool const saveflags);
@@ -724,6 +726,8 @@ inline void drcbe_x86::generate_one(Assembler &a, const uml::instruction &inst)
 	case uml::OP_SET:     op_set(a, inst);        break; // SET     dst,c
 	case uml::OP_MOV:     op_mov(a, inst);        break; // MOV     dst,src[,c]
 	case uml::OP_SEXT:    op_sext(a, inst);       break; // SEXT    dst,src
+	case uml::OP_BFXU:    op_bfxu(a, inst);       break; // BFXU    dst,src1,src2,src3
+	case uml::OP_BFXS:    op_bfxs(a, inst);       break; // BFXS    dst,src1,src2,src3
 	case uml::OP_ROLAND:  op_roland(a, inst);     break; // ROLAND  dst,src1,src2,src3
 	case uml::OP_ROLINS:  op_rolins(a, inst);     break; // ROLINS  dst,src1,src2,src3
 	case uml::OP_ADD:     op_add(a, inst);        break; // ADD     dst,src1,src2[,f]
@@ -1013,8 +1017,8 @@ inline bool drcbe_x86::can_skip_upper_load(Assembler &a, uint32_t *memref, Gp co
 //  drcbe_x86 - constructor
 //-------------------------------------------------
 
-drcbe_x86::drcbe_x86(drcuml_state &drcuml, device_t &device, drc_cache &cache, uint32_t flags, int modes, int addrbits, int ignorebits) :
-	drcbe_interface(drcuml, cache, device)
+drcbe_x86::drcbe_x86(drcuml_state &drcuml, device_t &device, drc_cache &cache, uint32_t flags, int modes, int addrbits, int ignorebits)
+	: drcbe_interface(drcuml, cache, device)
 	, m_hash(cache, modes, addrbits, ignorebits)
 	, m_map(cache, 0)
 	, m_log_asmjit(nullptr)
@@ -2022,19 +2026,19 @@ void drcbe_x86::emit_xor_m64_p64(Assembler &a, Mem const &memref_lo, Mem const &
 //  pair of registers from a 64-bit parameter
 //-------------------------------------------------
 
-void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const instruction &inst)
+void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags)
 {
-	int saveflags = inst.flags() != 0;
+	bool const saveflags = flags != 0;
 	if (param.is_immediate())
 	{
 		int count = param.immediate() & 63;
-		if (!inst.flags() && count == 0)
+		if (!flags && count == 0)
 			;// skip
 		else
 		{
 			while (count >= 32)
 			{
-				if (inst.flags() != 0)
+				if (flags != 0)
 				{
 					a.shld(reghi, reglo, 31);                                           // shld  reghi,reglo,31
 					a.shl(reglo, 31);                                                   // shl   reglo,31
@@ -2047,7 +2051,7 @@ void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 					count -= 32;
 				}
 			}
-			if (inst.flags() != 0 || count > 0)
+			if (flags != 0 || count > 0)
 			{
 				a.shld(reghi, reglo, count);                                            // shld  reghi,reglo,count
 				if (saveflags && count != 0) a.pushfd();                                // pushf
@@ -2085,7 +2089,7 @@ void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 
 		a.test(ecx, 0x20);                                                              // test  ecx,0x20
 		a.short_().jz(skip1);                                                           // jz    skip1
-		if (inst.flags() != 0)
+		if (flags != 0)
 		{
 			a.sub(ecx, 31);                                                             // sub   ecx,31
 			a.shld(reghi, reglo, 31);                                                   // shld  reghi,reglo,31
@@ -2136,19 +2140,19 @@ void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 //  pair of registers from a 64-bit parameter
 //-------------------------------------------------
 
-void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const instruction &inst)
+void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags)
 {
-	int saveflags = inst.flags() != 0;
+	bool const saveflags = flags != 0;
 	if (param.is_immediate())
 	{
 		int count = param.immediate() & 63;
-		if (!inst.flags() && count == 0)
+		if (!flags && count == 0)
 			;// skip
 		else
 		{
 			while (count >= 32)
 			{
-				if (inst.flags() != 0)
+				if (flags != 0)
 				{
 					a.shrd(reglo, reghi, 31);                                           // shrd  reglo,reghi,31
 					a.shr(reghi, 31);                                                   // shr   reghi,31
@@ -2161,7 +2165,7 @@ void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 					count -= 32;
 				}
 			}
-			if (inst.flags() != 0 || count > 0)
+			if (flags != 0 || count > 0)
 			{
 				a.shrd(reglo, reghi, count);                                            // shrd  reglo,reghi,count
 				if (saveflags && count != 0) a.pushfd();                                // pushf
@@ -2207,7 +2211,7 @@ void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 
 		a.test(ecx, 0x20);                                                              // test  ecx,0x20
 		a.short_().jz(skip1);                                                           // jz    skip1
-		if (inst.flags() != 0)
+		if (flags != 0)
 		{
 			a.sub(ecx, 31);                                                             // sub   ecx,31
 			a.shrd(reglo, reghi, 31);                                                   // shrd  reglo,reghi,31
@@ -2266,19 +2270,19 @@ void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 //  pair of registers from a 64-bit parameter
 //-------------------------------------------------
 
-void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const instruction &inst)
+void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags)
 {
-	int saveflags = inst.flags() != 0;
+	bool const saveflags = flags != 0;
 	if (param.is_immediate())
 	{
 		int count = param.immediate() & 63;
-		if (!inst.flags() && count == 0)
+		if (!flags && count == 0)
 			;// skip
 		else
 		{
 			while (count >= 32)
 			{
-				if (inst.flags() != 0)
+				if (flags != 0)
 				{
 					a.shrd(reglo, reghi, 31);                                           // shrd  reglo,reghi,31
 					a.sar(reghi, 31);                                                   // sar   reghi,31
@@ -2291,7 +2295,7 @@ void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 					count -= 32;
 				}
 			}
-			if (inst.flags() != 0 || count > 0)
+			if (flags != 0 || count > 0)
 			{
 				a.shrd(reglo, reghi, count);                                            // shrd  reglo,reghi,count
 				if (saveflags && count != 0) a.pushfd();                                              // pushf
@@ -2337,7 +2341,7 @@ void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 
 		a.test(ecx, 0x20);                                                              // test  ecx,0x20
 		a.short_().jz(skip1);                                                           // jz    skip1
-		if (inst.flags() != 0)
+		if (flags != 0)
 		{
 			a.sub(ecx, 31);                                                             // sub   ecx,31
 			a.shrd(reglo, reghi, 31);                                                   // shrd  reglo,reghi,31
@@ -2396,9 +2400,9 @@ void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 //  pair of registers from a 64-bit parameter
 //-------------------------------------------------
 
-void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const instruction &inst)
+void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags)
 {
-	int saveflags = inst.flags() != 0;
+	bool const saveflags = flags != 0;
 
 	Gp tempreg = esi;
 	if ((reglo == tempreg) || (reghi == tempreg))
@@ -2412,7 +2416,7 @@ void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 	if (param.is_immediate())
 	{
 		int count = param.immediate() & 63;
-		if (!inst.flags() && count == 0)
+		if (!flags && count == 0)
 		{
 			// skip
 		}
@@ -2420,7 +2424,7 @@ void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 		{
 			while (count >= 32)
 			{
-				if (inst.flags() != 0)
+				if (flags != 0)
 				{
 					a.mov(ecx, reglo);
 					a.shld(reglo, reghi, 31);
@@ -2470,7 +2474,7 @@ void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 		a.cmp(ecx, 32);
 		a.short_().jl(skip1);
 
-		if (inst.flags())
+		if (flags)
 		{
 			Label const shift_loop = a.new_label();
 
@@ -2523,9 +2527,9 @@ void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 //  pair of registers from a 64-bit parameter
 //-------------------------------------------------
 
-void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const instruction &inst)
+void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags)
 {
-	int saveflags = inst.flags() != 0;
+	bool const saveflags = flags != 0;
 
 	Gp tempreg = esi;
 	if ((reglo == tempreg) || (reghi == tempreg))
@@ -2539,7 +2543,7 @@ void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 	if (param.is_immediate())
 	{
 		int count = param.immediate() & 63;
-		if (!inst.flags() && count == 0)
+		if (!flags && count == 0)
 		{
 			// skip
 		}
@@ -2547,7 +2551,7 @@ void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 		{
 			while (count >= 32)
 			{
-				if (inst.flags() != 0)
+				if (flags != 0)
 				{
 					a.mov(tempreg, reglo);
 					a.shrd(reglo, reghi, 31);
@@ -2599,7 +2603,7 @@ void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 		a.short_().jl(skip1);
 
 		a.bind(shift_loop);
-		if (inst.flags() != 0)
+		if (flags != 0)
 		{
 			a.sub(ecx, 31);
 			a.mov(tempreg, reglo);
@@ -2649,7 +2653,7 @@ void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 //  pair of registers from a 64-bit parameter
 //-------------------------------------------------
 
-void drcbe_x86::emit_rcl_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const instruction &inst)
+void drcbe_x86::emit_rcl_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags)
 {
 	Label loop = a.new_label();
 	Label skipall = a.new_label();
@@ -2677,9 +2681,9 @@ void drcbe_x86::emit_rcl_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 	a.rcl(reghi, 1);
 
 	a.bind(skipall);
-	if (inst.flags())
+	if (flags)
 	{
-		if (inst.flags() & FLAG_C)
+		if (flags & FLAG_C)
 			calculate_status_flags(a, reglo, FLAG_Z);
 		else
 			a.test(reglo, reglo);
@@ -2697,7 +2701,7 @@ void drcbe_x86::emit_rcl_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 //  pair of registers from a 64-bit parameter
 //-------------------------------------------------
 
-void drcbe_x86::emit_rcr_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, const instruction &inst)
+void drcbe_x86::emit_rcr_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi, be_parameter const &param, u8 flags)
 {
 	Label loop = a.new_label();
 	Label skipall = a.new_label();
@@ -2725,9 +2729,9 @@ void drcbe_x86::emit_rcr_r64_p64(Assembler &a, Gp const &reglo, Gp const &reghi,
 	a.rcr(reglo, 1);
 
 	a.bind(skipall);
-	if (inst.flags())
+	if (flags)
 	{
-		if (inst.flags() & FLAG_C)
+		if (flags & FLAG_C)
 			calculate_status_flags(a, reglo, FLAG_Z);
 		else
 			a.test(reglo, reglo);
@@ -4586,6 +4590,400 @@ void drcbe_x86::op_sext(Assembler &a, const instruction &inst)
 }
 
 
+//-------------------------------------------------
+//  op_bfxu - process a BFXU opcode
+//-------------------------------------------------
+
+void drcbe_x86::op_bfxu(Assembler &a, const instruction &inst)
+{
+	// validate instruction
+	assert(inst.size() == 4 || inst.size() == 8);
+	assert_no_condition(inst);
+	assert_flags(inst, FLAG_S | FLAG_Z);
+
+	// normalize parameters
+	be_parameter dstp(*this, inst.param(0), PTYPE_MR);
+	be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
+	be_parameter shiftp(*this, inst.param(2), PTYPE_MRI);
+	be_parameter widthp(*this, inst.param(3), PTYPE_MRI);
+
+	if (inst.size() == 4)
+	{
+		// 32-bit form
+		if (widthp.is_immediate_value(0))
+		{
+			// undefined behaviour - do something
+			if (inst.flags() || dstp.is_int_register())
+			{
+				Gp const dstreg = dstp.select_register(eax);
+
+				a.xor_(dstreg, dstreg);
+
+				emit_mov_p32_r32(a, dstp, dstreg);
+			}
+			else if (dstp.is_memory())
+			{
+				a.mov(MABS(dstp.memory(), 4), 0);
+			}
+		}
+		else
+		{
+			Gp dstreg;
+
+			if (widthp.is_immediate())
+			{
+				const unsigned width = widthp.immediate() & 31;
+
+				dstreg = dstp.select_register(eax);
+
+				if (!shiftp.is_immediate())
+					emit_mov_r32_p32(a, ecx, shiftp);
+				emit_mov_r32_p32(a, dstreg, srcp);
+
+				if (shiftp.is_immediate())
+				{
+					const unsigned shift = shiftp.immediate() & 31;
+
+					a.ror(dstreg, shift);
+				}
+				else
+				{
+					a.ror(dstreg, cl);
+				}
+				a.and_(dstreg, util::make_bitmask<uint32_t>(width));
+			}
+			else
+			{
+				Gp const widthreg = widthp.select_register(edx);
+				dstreg = dstp.select_register(eax, widthp);
+
+				if (!shiftp.is_immediate())
+					emit_mov_r32_p32(a, ecx, shiftp);
+				emit_mov_r32_p32(a, widthreg, widthp);
+				emit_mov_r32_p32(a, dstreg, srcp);
+
+				if (shiftp.is_immediate())
+					a.mov(ecx, shiftp.immediate() & 31);
+				a.add(ecx, widthreg);
+				a.ror(dstreg, cl);
+				a.mov(ecx, widthreg);
+				a.neg(ecx);
+				a.and_(ecx, 31);
+				a.shr(dstreg, cl);
+			}
+
+			emit_mov_p32_r32(a, dstp, dstreg);
+		}
+	}
+	else if (inst.size() == 8)
+	{
+		// 64-bit form
+		if (widthp.is_immediate())
+		{
+			be_parameter maskp(*this, uml::parameter(util::make_bitmask<u64>(widthp.immediate() & 63)), PTYPE_I);
+
+			Gp const dstreg = dstp.select_register(eax, shiftp);
+
+			emit_mov_r64_p64(a, dstreg, edx, srcp);
+			emit_ror_r64_p64(a, dstreg, edx, shiftp, FLAGS_NONE);
+			emit_and_r64_p64(a, dstreg, edx, maskp, inst);
+			emit_mov_p64_r64(a, dstp, dstreg, edx);
+		}
+		else
+		{
+			Gp const dstreg = dstp.select_register(eax, shiftp);
+
+			// first make the mask
+			Label large = a.new_label();
+			Label shift = a.new_label();
+
+			emit_mov_r32_p32(a, ecx, widthp);
+
+			a.mov(eax, ~u32(0));
+			a.test(ecx, 0x20);
+			a.short_().jnz(large);
+			a.mov(dword_ptr(esp, 4), 0);
+			a.and_(ecx, 31);
+			a.shl(eax, cl);
+			a.not_(eax);
+			a.mov(dword_ptr(esp), eax);
+			a.short_().jmp(shift);
+
+			a.bind(large);
+
+			a.mov(dword_ptr(esp), eax);
+			a.and_(ecx, 31);
+			a.shl(eax, cl);
+			a.not_(eax);
+			a.mov(dword_ptr(esp, 4), eax);
+
+			a.bind(shift);
+
+			// shift the field into position
+			emit_mov_r64_p64(a, dstreg, edx, srcp);
+			emit_ror_r64_p64(a, dstreg, edx, shiftp, FLAGS_NONE);
+
+			// apply the mask
+			a.and_(dstreg, dword_ptr(esp));
+			if (inst.flags() & FLAG_Z)
+			{
+				a.pushfd();
+				a.and_(edx, dword_ptr(esp, 8));
+				emit_combine_z_flags(a);
+			}
+			else
+			{
+				a.and_(edx, dword_ptr(esp, 4));
+			}
+
+			emit_mov_p64_r64(a, dstp, dstreg, edx);
+		}
+	}
+}
+
+
+//-------------------------------------------------
+//  op_bfxs - process a BFXS opcode
+//-------------------------------------------------
+
+void drcbe_x86::op_bfxs(Assembler &a, const instruction &inst)
+{
+	// validate instruction
+	assert(inst.size() == 4 || inst.size() == 8);
+	assert_no_condition(inst);
+	assert_flags(inst, FLAG_S | FLAG_Z);
+
+	// normalize parameters
+	be_parameter dstp(*this, inst.param(0), PTYPE_MR);
+	be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
+	be_parameter shiftp(*this, inst.param(2), PTYPE_MRI);
+	be_parameter widthp(*this, inst.param(3), PTYPE_MRI);
+
+	if (inst.size() == 4)
+	{
+		// 32-bit form
+		if (widthp.is_immediate_value(0))
+		{
+			// undefined behaviour - do something
+			if (inst.flags() || dstp.is_int_register())
+			{
+				Gp const dstreg = dstp.select_register(eax);
+
+				a.xor_(dstreg, dstreg);
+
+				emit_mov_p32_r32(a, dstp, dstreg);
+			}
+			else if (dstp.is_memory())
+			{
+				a.mov(MABS(dstp.memory(), 4), 0);
+			}
+		}
+		else
+		{
+			Gp dstreg;
+
+			if (widthp.is_immediate())
+			{
+				dstreg = dstp.select_register(eax, shiftp);
+
+				const unsigned width = widthp.immediate() & 31;
+
+				emit_mov_r32_p32(a, dstreg, srcp);
+
+				if (shiftp.is_immediate())
+				{
+					const unsigned shift = shiftp.immediate() & 31;
+
+					a.ror(dstreg, (width + shift) & 31);
+				}
+				else
+				{
+					a.mov(ecx, width);
+					if (shiftp.is_int_register())
+						a.add(ecx, shiftp.select_register(edx));
+					else
+						a.add(ecx, MABS(shiftp.memory()));
+					a.ror(dstreg, cl);
+				}
+				a.sar(dstreg, -int(width) & 31);
+			}
+			else if (shiftp.is_immediate_value(0))
+			{
+				dstreg = dstp.select_register(eax);
+
+				emit_mov_r32_p32(a, ecx, widthp);
+				emit_mov_r32_p32(a, dstreg, srcp);
+
+				a.ror(dstreg, cl);
+				a.neg(ecx);
+				a.and_(ecx, 31);
+				a.sar(dstreg, cl);
+			}
+			else
+			{
+				Gp const widthreg = widthp.select_register(edx);
+				dstreg = dstp.select_register(eax, widthp);
+
+				if (!shiftp.is_immediate())
+					emit_mov_r32_p32(a, ecx, shiftp);
+				emit_mov_r32_p32(a, widthreg, widthp);
+				emit_mov_r32_p32(a, dstreg, srcp);
+
+				if (shiftp.is_immediate())
+					a.mov(ecx, shiftp.immediate() & 31);
+				a.add(ecx, widthreg);
+				a.ror(dstreg, cl);
+				a.mov(ecx, widthreg);
+				a.neg(ecx);
+				a.and_(ecx, 31);
+				a.sar(dstreg, cl);
+			}
+
+			emit_mov_p32_r32(a, dstp, dstreg);
+		}
+	}
+	else if (inst.size() == 8)
+	{
+		// 64-bit form
+		Gp dstreg;
+
+		if (widthp.is_immediate())
+		{
+			be_parameter rshiftp(*this, uml::parameter(-int64_t(widthp.immediate()) & 63), PTYPE_I);
+
+			dstreg = dstp.select_register(eax);
+
+			if (shiftp.is_immediate())
+			{
+				be_parameter rotp(*this, uml::parameter((shiftp.immediate() + widthp.immediate()) & 63), PTYPE_I);
+
+				emit_mov_r64_p64(a, dstreg, edx, srcp);
+				emit_ror_r64_p64(a, dstreg, edx, rotp, FLAGS_NONE);
+			}
+			else
+			{
+				Gp tempreg = esi;
+				if (dstreg == tempreg)
+					tempreg = edi;
+				if (dstreg == tempreg)
+					tempreg = ebp;
+				assert(dstreg != tempreg);
+				a.mov(dword_ptr(esp), tempreg);
+
+				emit_mov_r32_p32(a, ecx, shiftp);
+				emit_mov_r64_p64(a, dstreg, edx, srcp);
+
+				// if the count is at least 32, swap the halves
+				Label small = a.new_label();
+
+				a.add(ecx, widthp.immediate() & 63);
+				a.test(ecx, 0x20);
+				a.short_().jz(small);
+				a.xchg(dstreg, edx);
+				a.bind(small);
+				a.and_(ecx, 31);
+
+				// do the extended rotate
+				reset_last_upper_lower_reg();
+				a.mov(tempreg, edx);
+				a.shrd(edx, dstreg, cl);
+				a.shrd(dstreg, tempreg, cl);
+
+				a.mov(tempreg, dword_ptr(esp));
+			}
+
+			emit_sar_r64_p64(a, dstreg, edx, rshiftp, inst.flags());
+		}
+		else
+		{
+			dstreg = dstp.select_register(eax, widthp);
+
+			Gp tempreg = esi;
+			if (dstreg == tempreg)
+				tempreg = edi;
+			if (dstreg == tempreg)
+				tempreg = ebp;
+			assert(dstreg != tempreg);
+			a.mov(dword_ptr(esp), tempreg);
+
+			// calculate the required rotation
+			emit_mov_r32_p32(a, ecx, widthp);
+			if (shiftp.is_immediate_value(0))
+				;
+			else if (shiftp.is_immediate())
+				a.add(ecx, shiftp.immediate());
+			else if (shiftp.is_int_register())
+				a.add(ecx, shiftp.select_register(edx));
+			else if (shiftp.is_memory())
+				a.add(ecx, MABS(shiftp.memory()));
+
+			emit_mov_r64_p64(a, dstreg, edx, srcp);
+
+			// if the count is at least 32, swap the halves
+			Label small_ror = a.new_label();
+
+			a.test(ecx, 0x20);
+			a.short_().jz(small_ror);
+			a.xchg(dstreg, edx);
+			a.bind(small_ror);
+			a.and_(ecx, 31);
+
+			// do the extended rotate
+			reset_last_upper_lower_reg();
+			a.mov(tempreg, edx);
+			a.shrd(edx, dstreg, cl);
+			a.shrd(dstreg, tempreg, cl);
+
+			a.mov(tempreg, dword_ptr(esp));
+
+			// now do the shift
+			Label small_sar = a.new_label();
+			Label no_shift, done;
+			if (inst.flags())
+			{
+				no_shift = a.new_label();
+				done = a.new_label();
+			}
+
+			emit_mov_r32_p32(a, ecx, widthp);
+			a.neg(ecx);
+			a.test(ecx, 0x20);
+			a.short_().jz(small_sar);
+			a.mov(dstreg, edx);
+			a.sar(edx, 31);
+			a.bind(small_sar);
+			a.and_(ecx, 31);
+			if (inst.flags())
+				a.short_().jz(no_shift);
+
+			a.shrd(dstreg, edx, cl);
+			if (inst.flags() & FLAG_Z)
+				a.pushfd();
+			a.sar(edx, cl);
+
+			// zero-bit shifts don't update the flags
+			if (inst.flags())
+			{
+				a.short_().jmp(done);
+
+				a.bind(no_shift);
+				if (inst.flags() & FLAG_Z)
+				{
+					a.test(eax, eax);
+					a.pushfd();
+				}
+				a.test(edx, edx);
+				a.bind(done);
+				if (inst.flags() & FLAG_Z)
+					emit_combine_z_flags(a);
+			}
+		}
+
+		emit_mov_p64_r64(a, dstp, dstreg, edx);
+	}
+}
+
+
 //-------------------------------------------------
 //  op_roland - process an ROLAND opcode
 //-------------------------------------------------
@@ -4636,7 +5034,7 @@ void drcbe_x86::op_roland(Assembler &a, const instruction &inst)
 	{
 		// 64-bit form
 		emit_mov_r64_p64(a, dstreg, edx, srcp);                                         // mov   edx:dstreg,srcp
-		emit_rol_r64_p64(a, dstreg, edx, shiftp, inst);                                 // rol   edx:dstreg,shiftp
+		emit_rol_r64_p64(a, dstreg, edx, shiftp, FLAGS_NONE);                           // rol   edx:dstreg,shiftp
 		emit_and_r64_p64(a, dstreg, edx, maskp, inst);                                  // and   edx:dstreg,maskp
 		emit_mov_p64_r64(a, dstp, dstreg, edx);                                         // mov   dstp,edx:dstreg
 	}
@@ -4696,7 +5094,7 @@ void drcbe_x86::op_rolins(Assembler &a, const instruction &inst)
 	{
 		// 64-bit form
 		emit_mov_r64_p64(a, eax, edx, srcp);                                            // mov   edx:eax,srcp
-		emit_rol_r64_p64(a, eax, edx, shiftp, inst);                                    // rol   edx:eax,shiftp
+		emit_rol_r64_p64(a, eax, edx, shiftp, FLAGS_NONE);                              // rol   edx:eax,shiftp
 		if (maskp.is_immediate())
 		{
 			a.and_(eax, maskp.immediate());                                             // and   eax,maskp
@@ -6213,7 +6611,7 @@ void drcbe_x86::op_shl(Assembler &a, const instruction &inst)
 		{
 			// general case
 			emit_mov_r64_p64(a, dstreg, edx, src1p);
-			emit_shl_r64_p64(a, dstreg, edx, src2p, inst);
+			emit_shl_r64_p64(a, dstreg, edx, src2p, inst.flags());
 		}
 		emit_mov_p64_r64(a, dstp, dstreg, edx);
 	}
@@ -6270,7 +6668,7 @@ void drcbe_x86::op_shr(Assembler &a, const instruction &inst)
 	{
 		// general case
 		emit_mov_r64_p64(a, dstreg, edx, src1p);                                        // mov   edx:dstreg,[src1p]
-		emit_shr_r64_p64(a, dstreg, edx, src2p, inst);                                  // shr   edx:dstreg,src2p
+		emit_shr_r64_p64(a, dstreg, edx, src2p, inst.flags());                          // shr   edx:dstreg,src2p
 		emit_mov_p64_r64(a, dstp, dstreg, edx);                                         // mov   dstp,edx:dstreg
 	}
 }
@@ -6326,7 +6724,7 @@ void drcbe_x86::op_sar(Assembler &a, const instruction &inst)
 	{
 		// general case
 		emit_mov_r64_p64(a, dstreg, edx, src1p);                                        // mov   edx:dstreg,[src1p]
-		emit_sar_r64_p64(a, dstreg, edx, src2p, inst);                                  // sar   edx:dstreg,src2p
+		emit_sar_r64_p64(a, dstreg, edx, src2p, inst.flags());                          // sar   edx:dstreg,src2p
 		emit_mov_p64_r64(a, dstp, dstreg, edx);                                         // mov   dstp,edx:dstreg
 	}
 }
@@ -6382,7 +6780,7 @@ void drcbe_x86::op_rol(Assembler &a, const instruction &inst)
 	{
 		// general case
 		emit_mov_r64_p64(a, dstreg, edx, src1p);                                        // mov   edx:dstreg,[src1p]
-		emit_rol_r64_p64(a, dstreg, edx, src2p, inst);                                  // rol   edx:dstreg,src2p
+		emit_rol_r64_p64(a, dstreg, edx, src2p, inst.flags());                          // rol   edx:dstreg,src2p
 		emit_mov_p64_r64(a, dstp, dstreg, edx);                                         // mov   dstp,edx:dstreg
 	}
 }
@@ -6438,7 +6836,7 @@ void drcbe_x86::op_ror(Assembler &a, const instruction &inst)
 	{
 		// general case
 		emit_mov_r64_p64(a, dstreg, edx, src1p);                                        // mov   edx:dstreg,[src1p]
-		emit_ror_r64_p64(a, dstreg, edx, src2p, inst);                                  // ror   edx:dstreg,src2p
+		emit_ror_r64_p64(a, dstreg, edx, src2p, inst.flags());                          // ror   edx:dstreg,src2p
 		emit_mov_p64_r64(a, dstp, dstreg, edx);                                         // mov   dstp,edx:dstreg
 	}
 }
@@ -6494,7 +6892,7 @@ void drcbe_x86::op_rolc(Assembler &a, const instruction &inst)
 	{
 		// general case
 		emit_mov_r64_p64_keepflags(a, dstreg, edx, src1p);                              // mov   edx:dstreg,[src1p]
-		emit_rcl_r64_p64(a, dstreg, edx, src2p, inst);                                  // rcl   edx:dstreg,src2p
+		emit_rcl_r64_p64(a, dstreg, edx, src2p, inst.flags());                          // rcl   edx:dstreg,src2p
 		emit_mov_p64_r64(a, dstp, dstreg, edx);                                         // mov   dstp,edx:dstreg
 	}
 }
@@ -6550,7 +6948,7 @@ void drcbe_x86::op_rorc(Assembler &a, const instruction &inst)
 	{
 		// general case
 		emit_mov_r64_p64_keepflags(a, dstreg, edx, src1p);                              // mov   edx:dstreg,[src1p]
-		emit_rcr_r64_p64(a, dstreg, edx, src2p, inst);                                  // rcr   edx:dstreg,src2p
+		emit_rcr_r64_p64(a, dstreg, edx, src2p, inst.flags());                          // rcr   edx:dstreg,src2p
 		emit_mov_p64_r64(a, dstp, dstreg, edx);                                         // mov   dstp,edx:dstreg
 	}
 }
diff --git a/src/devices/cpu/drcumlsh.h b/src/devices/cpu/drcumlsh.h
index 2394354c81c6e..28af595fa4f24 100644
--- a/src/devices/cpu/drcumlsh.h
+++ b/src/devices/cpu/drcumlsh.h
@@ -70,6 +70,8 @@
 #define UML_MOV(block, dst, src)                            do { using namespace uml; block.append().mov(dst, src); } while (0)
 #define UML_MOVc(block, cond, dst, src)                     do { using namespace uml; block.append().mov(cond, dst, src); } while (0)
 #define UML_SEXT(block, dst, src, size)                     do { using namespace uml; block.append().sext(dst, src, size); } while (0)
+#define UML_BFXU(block, dst, src, shift, width)             do { using namespace uml; block.append().bfxu(dst, src, shift, width); } while (0)
+#define UML_BFXS(block, dst, src, shift, width)             do { using namespace uml; block.append().bfxs(dst, src, shift, width); } while (0)
 #define UML_ROLAND(block, dst, src, shift, mask)            do { using namespace uml; block.append().roland(dst, src, shift, mask); } while (0)
 #define UML_ROLINS(block, dst, src, shift, mask)            do { using namespace uml; block.append().rolins(dst, src, shift, mask); } while (0)
 #define UML_ADD(block, dst, src1, src2)                     do { using namespace uml; block.append().add(dst, src1, src2); } while (0)
@@ -112,6 +114,8 @@
 #define UML_DMOV(block, dst, src)                           do { using namespace uml; block.append().dmov(dst, src); } while (0)
 #define UML_DMOVc(block, cond, dst, src)                    do { using namespace uml; block.append().dmov(cond, dst, src); } while (0)
 #define UML_DSEXT(block, dst, src, size)                    do { using namespace uml; block.append().dsext(dst, src, size); } while (0)
+#define UML_DBFXU(block, dst, src, shift, width)            do { using namespace uml; block.append().dbfxu(dst, src, shift, width); } while (0)
+#define UML_DBFXS(block, dst, src, shift, width)            do { using namespace uml; block.append().dbfxs(dst, src, shift, width); } while (0)
 #define UML_DROLAND(block, dst, src, shift, mask)           do { using namespace uml; block.append().droland(dst, src, shift, mask); } while (0)
 #define UML_DROLINS(block, dst, src, shift, mask)           do { using namespace uml; block.append().drolins(dst, src, shift, mask); } while (0)
 #define UML_DADD(block, dst, src1, src2)                    do { using namespace uml; block.append().dadd(dst, src1, src2); } while (0)
diff --git a/src/devices/cpu/e132xs/e132xsdrc_ops.hxx b/src/devices/cpu/e132xs/e132xsdrc_ops.hxx
index d4732b5dc5303..bc8675fa7c955 100644
--- a/src/devices/cpu/e132xs/e132xsdrc_ops.hxx
+++ b/src/devices/cpu/e132xs/e132xsdrc_ops.hxx
@@ -651,8 +651,8 @@ void hyperstone_device::generate_trap_exception_or_int(drcuml_block &block, uml:
 	UML_MOV(block, I4, DRC_SR);                                       // I4 = old SR
 
 	UML_MOV(block, I1, I4);                                           // I1 = SR to be updated
-	UML_ROLAND(block, I3, I4, 32 - FP_SHIFT, 0x7f);                   // I3 = old FP
-	UML_ROLAND(block, I2, I4, 32 - FL_SHIFT, 0xf);                    // I2 = old FL
+	UML_BFXU(block, I3, I4, FP_SHIFT, 7);                             // I3 = old FP
+	UML_BFXU(block, I2, I4, FL_SHIFT, 4);                             // I2 = old FL
 	UML_MOVc(block, uml::COND_Z, I2, 16);                             // convert FL == 0 to 16
 	UML_ADD(block, I3, I3, I2);                                       // I3 = updated FP
 
@@ -691,7 +691,7 @@ inline void hyperstone_device::generate_logic_op(drcuml_block &block, compiler_s
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	uml::parameter src = uml::I1;
 	if (!SrcGlobal)
@@ -724,7 +724,7 @@ inline void hyperstone_device::generate_logic_op_imm(drcuml_block &block, compil
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	if (!DstGlobal || (dst_code <= SR_REGISTER))
 	{
@@ -758,7 +758,7 @@ void hyperstone_device::generate_software(drcuml_block &block, compiler_state &c
 	const uint32_t dst_code = (op & 0xf0) >> 4;
 
 	UML_MOV(block, I2, DRC_SR);
-	UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f); // I3 = FP
+	UML_BFXU(block, I3, I2, FP_SHIFT, 7); // I3 = FP
 
 	UML_ADD(block, I1, I3, src_code);
 	UML_AND(block, I1, I1, 0x3f);
@@ -771,7 +771,7 @@ void hyperstone_device::generate_software(drcuml_block &block, compiler_state &c
 	UML_OR(block, I2, I2, 1 << ILC_SHIFT);
 	UML_MOV(block, DRC_SR, I2);
 
-	UML_ROLAND(block, I4, I2, 32 - FL_SHIFT, 0xf);
+	UML_BFXU(block, I4, I2, FL_SHIFT, 4);
 	UML_MOVc(block, uml::COND_Z, I4, 16);
 	UML_ADD(block, I4, I4, I3); // I4 = reg
 
@@ -843,7 +843,7 @@ void hyperstone_device::generate_chk(drcuml_block &block, compiler_state &compil
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!DstGlobal || !SrcGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I0);
 
@@ -914,7 +914,7 @@ void hyperstone_device::generate_movd(drcuml_block &block, compiler_state &compi
 		}
 		else
 		{
-			UML_ROLAND(block, I2, I3, 32 - FP_SHIFT, 0x7f);
+			UML_BFXU(block, I2, I3, FP_SHIFT, 7);
 			UML_ADD(block, I0, I2, src_code);
 			UML_AND(block, I0, I0, 0x3f);
 			UML_LOAD(block, I0, (void *)m_core->local_regs, I0, SIZE_DWORD, SCALE_x4);
@@ -954,11 +954,10 @@ void hyperstone_device::generate_movd(drcuml_block &block, compiler_state &compi
 		const int pop_next = compiler.next_label();
 		const int done_ret = compiler.next_label();
 		UML_MOV(block, I0, mem(&SP));                           // I0 = SP
-		UML_ROLAND(block, I1, I0, 30, 0x7f);                    // I3 = FP - SP(8..2)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I1, I0, 2, 7);                          // I3 = FP - SP(8..2)
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 		UML_SUB(block, I3, I3, I1);
-		UML_SHL(block, I3, I3, 32 - 7);                         // sign-extend 7-bit number
-		UML_SAR(block, I3, I3, 32 - 7);
+		UML_BFXS(block, I3, I3, 0, 7);                          // sign-extend 7-bit number
 		UML_JMPc(block, uml::COND_NS, done_ret);                // nothing to pull if not negative
 		UML_LABEL(block, pop_next);
 		UML_SUB(block, I0, I0, 4);                              // pull a word
@@ -986,7 +985,7 @@ void hyperstone_device::generate_movd(drcuml_block &block, compiler_state &compi
 		}
 		else
 		{
-			UML_ROLAND(block, I0, I2, 32 - FP_SHIFT, 0x7f);
+			UML_BFXU(block, I0, I2, FP_SHIFT, 7);
 			UML_ADD(block, I0, I0, dst_code);
 			UML_AND(block, I0, I0, 0x3f);
 			UML_STORE(block, (void *)m_core->local_regs, I0, 0, SIZE_DWORD, SCALE_x4);
@@ -999,7 +998,7 @@ void hyperstone_device::generate_movd(drcuml_block &block, compiler_state &compi
 	{
 		UML_MOV(block, I2, DRC_SR);
 		if (!SrcGlobal || !DstGlobal)
-			UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+			UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 		if (SrcGlobal)
 		{
@@ -1062,7 +1061,7 @@ void hyperstone_device::generate_divsu(drcuml_block &block, compiler_state &comp
 	}
 
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I0, uml::I0);
 
@@ -1152,7 +1151,7 @@ void hyperstone_device::generate_xm(drcuml_block &block, compiler_state &compile
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I1, uml::I1);
 
@@ -1188,7 +1187,7 @@ void hyperstone_device::generate_mask(drcuml_block &block, compiler_state &compi
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I0, uml::I0);
 
@@ -1214,7 +1213,7 @@ void hyperstone_device::generate_sum(drcuml_block &block, compiler_state &compil
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	if (SrcGlobal && (src_code == PC_REGISTER) && ((desc->flags & OPFLAG_IN_DELAY_SLOT) || !compiler.check_delay()))
 	{
@@ -1257,7 +1256,7 @@ void hyperstone_device::generate_sums(drcuml_block &block, compiler_state &compi
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I0, uml::I0);
 
@@ -1296,7 +1295,7 @@ void hyperstone_device::generate_cmp(drcuml_block &block, compiler_state &compil
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_src_addsub(block, compiler, SrcGlobal, src_code, uml::I1, uml::I1, uml::I2);
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3);
@@ -1318,7 +1317,7 @@ void hyperstone_device::generate_mov(drcuml_block &block, compiler_state &compil
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	if (DstGlobal && compiler.user_mode())
 	{
@@ -1438,7 +1437,7 @@ void hyperstone_device::generate_add(drcuml_block &block, compiler_state &compil
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	if ((desc->flags & OPFLAG_IN_DELAY_SLOT) || !compiler.check_delay())
 	{
@@ -1514,7 +1513,7 @@ void hyperstone_device::generate_adds(drcuml_block &block, compiler_state &compi
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_src_addsub(block, compiler, SrcGlobal, src_code, uml::I1, uml::I1, uml::I2);
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3);
@@ -1553,7 +1552,7 @@ void hyperstone_device::generate_cmpb(drcuml_block &block, compiler_state &compi
 	const uint32_t dst_code = (op & 0xf0) >> 4;
 
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I1, uml::I1);
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3);
@@ -1575,7 +1574,7 @@ void hyperstone_device::generate_subc(drcuml_block &block, compiler_state &compi
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_src_addsub(block, compiler, SrcGlobal,src_code, uml::I1, uml::I1, uml::I2);
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3);
@@ -1608,7 +1607,7 @@ void hyperstone_device::generate_sub(drcuml_block &block, compiler_state &compil
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_src_addsub(block, compiler, SrcGlobal, src_code, uml::I1, uml::I1, uml::I2);
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3);
@@ -1633,7 +1632,7 @@ void hyperstone_device::generate_subs(drcuml_block &block, compiler_state &compi
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_src_addsub(block, compiler, SrcGlobal, src_code, uml::I1, uml::I1, uml::I2);
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3);
@@ -1673,7 +1672,7 @@ void hyperstone_device::generate_addc(drcuml_block &block, compiler_state &compi
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_src_addsub(block, compiler, SrcGlobal,src_code, uml::I1, uml::I1, uml::I2);
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3);
@@ -1706,7 +1705,7 @@ void hyperstone_device::generate_neg(drcuml_block &block, compiler_state &compil
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_src_addsub(block, compiler, SrcGlobal, src_code, uml::I0, uml::I0, uml::I2);
 
@@ -1730,7 +1729,7 @@ void hyperstone_device::generate_negs(drcuml_block &block, compiler_state &compi
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_src_addsub(block, compiler, SrcGlobal, src_code, uml::I0, uml::I0, uml::I2);
 
@@ -1828,7 +1827,7 @@ void hyperstone_device::generate_not(drcuml_block &block, compiler_state &compil
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I0, uml::I0);
 
@@ -1857,7 +1856,7 @@ void hyperstone_device::generate_cmpi(drcuml_block &block, compiler_state &compi
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I0);
 
@@ -1944,7 +1943,7 @@ void hyperstone_device::generate_movi(drcuml_block &block, compiler_state &compi
 		UML_AND(block, I2, I2, ~H_MASK);
 		UML_MOV(block, DRC_SR, I2);
 
-		UML_ROLAND(block, I2, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I2, I2, FP_SHIFT, 7);
 		UML_ADD(block, I2, I2, dst_code);
 		UML_AND(block, I2, I2, 0x3f);
 		UML_STORE(block, (void *)m_core->local_regs, I2, src, SIZE_DWORD, SCALE_x4);
@@ -1969,7 +1968,7 @@ void hyperstone_device::generate_addi(drcuml_block &block, compiler_state &compi
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	if (DstGlobal && (dst_code == PC_REGISTER) && ((desc->flags & OPFLAG_IN_DELAY_SLOT) || !compiler.check_delay()))
 	{
@@ -2041,7 +2040,7 @@ void hyperstone_device::generate_cmpbi(drcuml_block &block, compiler_state &comp
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	if (!DstGlobal || !n)
 		generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3);
@@ -2167,7 +2166,7 @@ void hyperstone_device::generate_shrdi(drcuml_block &block, compiler_state &comp
 	const uint32_t n = HiN ? DRC_HI_N_VALUE : DRC_LO_N_VALUE;
 
 	UML_MOV(block, I2, DRC_SR);
-	UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, LOCAL, dst_code, uml::I1, uml::I4);
 	generate_load_operand(block, compiler, LOCAL, dst_code + 1, uml::I0, uml::I3);
@@ -2206,7 +2205,7 @@ void hyperstone_device::generate_shrd(drcuml_block &block, compiler_state &compi
 	}
 
 	UML_MOV(block, I2, DRC_SR);
-	UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, LOCAL, dst_code, uml::I1, uml::I4);
 	generate_load_operand(block, compiler, LOCAL, dst_code + 1, uml::I0, uml::I5);
@@ -2244,7 +2243,7 @@ void hyperstone_device::generate_shr(drcuml_block &block, compiler_state &compil
 	const uint32_t src_code = op & 0xf;
 
 	UML_MOV(block, I2, DRC_SR);
-	UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, LOCAL, src_code, uml::I1, uml::I1);
 	generate_load_operand(block, compiler, LOCAL, dst_code, uml::I0, uml::I3);
@@ -2277,7 +2276,7 @@ void hyperstone_device::generate_shri(drcuml_block &block, compiler_state &compi
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3);
 
@@ -2306,7 +2305,7 @@ void hyperstone_device::generate_sardi(drcuml_block &block, compiler_state &comp
 	const uint32_t n = HiN ? DRC_HI_N_VALUE : DRC_LO_N_VALUE;
 
 	UML_MOV(block, I2, DRC_SR);
-	UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, LOCAL, dst_code, uml::I1, uml::I4);
 	generate_load_operand(block, compiler, LOCAL, dst_code + 1, uml::I0, uml::I3);
@@ -2345,7 +2344,7 @@ void hyperstone_device::generate_sard(drcuml_block &block, compiler_state &compi
 	}
 
 	UML_MOV(block, I2, DRC_SR);
-	UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, LOCAL, dst_code, uml::I1, uml::I4);
 	generate_load_operand(block, compiler, LOCAL, dst_code + 1, uml::I0, uml::I5);
@@ -2383,7 +2382,7 @@ void hyperstone_device::generate_sar(drcuml_block &block, compiler_state &compil
 	const uint32_t src_code = op & 0xf;
 
 	UML_MOV(block, I2, DRC_SR);
-	UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, LOCAL, src_code, uml::I1, uml::I1);
 	generate_load_operand(block, compiler, LOCAL, dst_code, uml::I0, uml::I3);
@@ -2416,7 +2415,7 @@ void hyperstone_device::generate_sari(drcuml_block &block, compiler_state &compi
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3);
 
@@ -2445,7 +2444,7 @@ void hyperstone_device::generate_shldi(drcuml_block &block, compiler_state &comp
 	const uint32_t n = HiN ? DRC_HI_N_VALUE : DRC_LO_N_VALUE;
 
 	UML_MOV(block, I2, DRC_SR);
-	UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, LOCAL, dst_code, uml::I1, uml::I4);
 	generate_load_operand(block, compiler, LOCAL, dst_code + 1, uml::I0, uml::I5);
@@ -2499,7 +2498,7 @@ void hyperstone_device::generate_shld(drcuml_block &block, compiler_state &compi
 	}
 
 	UML_MOV(block, I2, DRC_SR);
-	UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, LOCAL, dst_code, uml::I1, uml::I5);
 	generate_load_operand(block, compiler, LOCAL, dst_code + 1, uml::I0, uml::I6);
@@ -2549,10 +2548,8 @@ void hyperstone_device::generate_shl(drcuml_block &block, compiler_state &compil
 	const uint32_t dst_code = (op & 0xf0) >> 4;
 	const uint32_t src_code = op & 0xf;
 
-	UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
-
 	UML_MOV(block, I2, DRC_SR);
-	UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	if (dst_code != src_code)
 	{
@@ -2602,7 +2599,7 @@ void hyperstone_device::generate_shli(drcuml_block &block, compiler_state &compi
 
 	UML_MOV(block, I2, DRC_SR);
 	if (!DstGlobal)
-		UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I0, uml::I3);
 
@@ -2644,7 +2641,7 @@ void hyperstone_device::generate_testlz(drcuml_block &block, compiler_state &com
 	const uint32_t dst_code = (op & 0xf0) >> 4;
 	const uint32_t src_code = op & 0xf;
 
-	UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, LOCAL, src_code, uml::I0, uml::I0);
 
@@ -2663,7 +2660,7 @@ void hyperstone_device::generate_rol(drcuml_block &block, compiler_state &compil
 	const uint32_t src_code = op & 0xf;
 
 	UML_MOV(block, I2, DRC_SR);
-	UML_ROLAND(block, I3, I2, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, I2, FP_SHIFT, 7);
 
 	if (dst_code != src_code)
 	{
@@ -2713,7 +2710,7 @@ void hyperstone_device::generate_ldxx1(drcuml_block &block, compiler_state &comp
 	const auto [sub_type, extra_s] = generate_get_d_code_dis(desc);
 
 	if (!DstGlobal || !SrcGlobal)
-		UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	const uml::parameter dstp = generate_load_address_ad(block, compiler, desc, DstGlobal, dst_code, uml::I0, uml::I0);
 
@@ -2862,7 +2859,7 @@ void hyperstone_device::generate_ldxx2(drcuml_block &block, compiler_state &comp
 	}
 
 	if (!DstGlobal || !SrcGlobal)
-		UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	generate_load_address_ns(block, compiler, desc, DstGlobal, dst_code, uml::I6, uml::I2, sub_type, extra_s);
 
@@ -2968,7 +2965,7 @@ void hyperstone_device::generate_ldxx2(drcuml_block &block, compiler_state &comp
 					UML_CMP(block, I6, mem(&m_core->global_regs[SP_REGISTER]));
 					UML_JMPc(block, uml::COND_B, below_sp);
 
-					UML_ROLAND(block, I0, I6, 32 - 2, 0x3f);
+					UML_BFXU(block, I0, I6, 2, 6);
 					UML_LOAD(block, I1, (void *)m_core->local_regs, I0, SIZE_DWORD, SCALE_x4);
 					UML_JMP(block, done);
 
@@ -3004,7 +3001,7 @@ void hyperstone_device::generate_stxx1(drcuml_block &block, compiler_state &comp
 	const auto [sub_type, extra_s] = generate_get_d_code_dis(desc);
 
 	if (!DstGlobal || !SrcGlobal)
-		UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	const uml::parameter dstp = generate_load_address_ad(block, compiler, desc, DstGlobal, dst_code, uml::I0, uml::I0);
 
@@ -3138,7 +3135,7 @@ void hyperstone_device::generate_stxx2(drcuml_block &block, compiler_state &comp
 	}
 
 	if (!DstGlobal || !SrcGlobal)
-		UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	generate_load_address_ns(block, compiler, desc, DstGlobal, dst_code, uml::I0, uml::I6, sub_type, extra_s);
 
@@ -3256,7 +3253,7 @@ void hyperstone_device::generate_stxx2(drcuml_block &block, compiler_state &comp
 					UML_CMP(block, I5, mem(&SP));
 					UML_JMPc(block, uml::COND_B, less_than_sp);
 
-					UML_ROLAND(block, I4, I0, 30, 0x3f);
+					UML_BFXU(block, I4, I0, 2, 6);
 					UML_STORE(block, (void *)m_core->local_regs, I4, I1, SIZE_DWORD, SCALE_x4);
 					UML_JMP(block, store_done);
 
@@ -3295,7 +3292,7 @@ void hyperstone_device::generate_mulsu(drcuml_block &block, compiler_state &comp
 	}
 
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I0, uml::I4);
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I1, uml::I6);
@@ -3363,7 +3360,7 @@ void hyperstone_device::generate_mul(drcuml_block &block, compiler_state &compil
 	}
 
 	if (!SrcGlobal || !DstGlobal)
-		UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+		UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	generate_load_operand(block, compiler, SrcGlobal, src_code, uml::I0, uml::I1);
 	generate_load_operand(block, compiler, DstGlobal, dst_code, uml::I1, uml::I6);
@@ -3469,9 +3466,9 @@ void hyperstone_device::generate_set(drcuml_block &block, compiler_state &compil
 	}
 	else
 	{
-		UML_ROLAND(block, I1, DRC_SR, 32 - FP_SHIFT, 0x7f);
-		UML_ADD(block, I2, I1, dst_code);
-		UML_AND(block, I3, I2, 0x3f);
+		UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
+		UML_ADD(block, I3, I3, dst_code);
+		UML_AND(block, I3, I3, 0x3f);
 		UML_STORE(block, (void *)m_core->local_regs, I3, I0, SIZE_DWORD, SCALE_x4);
 	}
 }
@@ -3486,7 +3483,7 @@ void hyperstone_device::generate_ldwr(drcuml_block &block, compiler_state &compi
 	const uint32_t src_code = op & 0xf;
 	const uint32_t dst_code = (op & 0xf0) >> 4;
 
-	UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	if (SrcGlobal || (src_code != dst_code))
 		generate_load_address_rp(block, compiler, desc, dst_code, uml::I0, uml::I2, 0);
@@ -3511,7 +3508,7 @@ void hyperstone_device::generate_lddr(drcuml_block &block, compiler_state &compi
 	const uint32_t src_code = op & 0xf;
 	const uint32_t dst_code = (op & 0xf0) >> 4;
 
-	UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	if (!SrcGlobal && (src_code != dst_code))
 		generate_load_address_rp(block, compiler, desc, dst_code, uml::I0, uml::I2, 0);
@@ -3557,7 +3554,7 @@ void hyperstone_device::generate_ldwp(drcuml_block &block, compiler_state &compi
 	const uint32_t src_code = op & 0xf;
 	const uint32_t dst_code = (op & 0xf0) >> 4;
 
-	UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	if (SrcGlobal || (src_code != dst_code))
 		generate_load_address_rp(block, compiler, desc, dst_code, uml::I4, uml::I2, 4);
@@ -3589,7 +3586,7 @@ void hyperstone_device::generate_lddp(drcuml_block &block, compiler_state &compi
 	const uint32_t src_code = op & 0xf;
 	const uint32_t dst_code = (op & 0xf0) >> 4;
 
-	UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	if (SrcGlobal || (src_code != dst_code))
 		generate_load_address_rp(block, compiler, desc, dst_code, uml::I4, uml::I2, 8);
@@ -3641,7 +3638,7 @@ void hyperstone_device::generate_stwr(drcuml_block &block, compiler_state &compi
 	const uint32_t src_code = op & 0xf;
 	const uint32_t dst_code = (op & 0xf0) >> 4;
 
-	UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	if (SrcGlobal || (src_code != dst_code))
 		generate_load_address_rp(block, compiler, desc, dst_code, uml::I0, uml::I2, 0);
@@ -3675,7 +3672,7 @@ void hyperstone_device::generate_stdr(drcuml_block &block, compiler_state &compi
 	const uint32_t src_code = op & 0xf;
 	const uint32_t dst_code = (op & 0xf0) >> 4;
 
-	UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	if (SrcGlobal || (src_code != dst_code))
 		generate_load_address_rp(block, compiler, desc, dst_code, uml::I0, uml::I2, 0);
@@ -3726,7 +3723,7 @@ void hyperstone_device::generate_stwp(drcuml_block &block, compiler_state &compi
 	const uint32_t src_code = op & 0xf;
 	const uint32_t dst_code = (op & 0xf0) >> 4;
 
-	UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	if (SrcGlobal || (src_code != dst_code))
 		generate_load_address_rp(block, compiler, desc, dst_code, uml::I4, uml::I2, 4);
@@ -3771,7 +3768,7 @@ void hyperstone_device::generate_stdp(drcuml_block &block, compiler_state &compi
 	const uint32_t src_code = op & 0xf;
 	const uint32_t dst_code = (op & 0xf0) >> 4;
 
-	UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	if (SrcGlobal || (src_code != dst_code))
 		generate_load_address_rp(block, compiler, desc, dst_code, uml::I4, uml::I2, 8);
@@ -3955,7 +3952,7 @@ void hyperstone_device::generate_frame(drcuml_block &block, compiler_state &comp
 	const uint32_t dst_code = (op & 0xf0) >> 4;
 
 	UML_MOV(block, I2, DRC_SR);                                // I2 = SR
-	UML_ROLAND(block, I1, I2, 32 - FP_SHIFT, 0x7f);            // I1 = FP -= Ls
+	UML_BFXU(block, I1, I2, FP_SHIFT, 7);                      // I1 = FP -= Ls
 	UML_SUB(block, I1, I1, op & 0xf);
 	UML_ROLAND(block, I0, I1, FP_SHIFT, FP_MASK);
 	UML_OR(block, I0, I0, dst_code << FL_SHIFT);               // FL = Ld
@@ -3968,8 +3965,7 @@ void hyperstone_device::generate_frame(drcuml_block &block, compiler_state &comp
 	UML_ROLAND(block, I3, I0, 30, 0x7f);
 	UML_ADD(block, I3, I3, (64 - 10));
 	UML_SUB(block, I3, I3, I1);
-	UML_SHL(block, I3, I3, 32 - 7);                            // sign-extend 7-bit value
-	UML_SAR(block, I3, I3, 32 - 7);
+	UML_BFXS(block, I3, I3, 0, 7);                             // sign-extend 7-bit value
 	UML_JMPc(block, uml::COND_NS, done);
 
 	UML_CMP(block, I0, mem(&UB));                              // check stack pointer against upper bound
@@ -4030,7 +4026,7 @@ void hyperstone_device::generate_call(drcuml_block &block, compiler_state &compi
 	if (!dst_code)
 		dst_code = 16;
 
-	UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	if (SrcGlobal)
 	{
@@ -4111,7 +4107,7 @@ void hyperstone_device::generate_extend(drcuml_block &block, compiler_state &com
 
 	const uint16_t func = m_pr16(desc->pc + 2);
 
-	UML_ROLAND(block, I3, DRC_SR, 32 - FP_SHIFT, 0x7f);
+	UML_BFXU(block, I3, DRC_SR, FP_SHIFT, 7);
 
 	UML_ADD(block, I2, I3, src_code);
 	UML_AND(block, I2, I2, 0x3f);
diff --git a/src/devices/cpu/uml.cpp b/src/devices/cpu/uml.cpp
index 23775118d93e9..0beb9a7a00e2b 100644
--- a/src/devices/cpu/uml.cpp
+++ b/src/devices/cpu/uml.cpp
@@ -56,60 +56,60 @@ using namespace uml;
 //**************************************************************************
 
 // opcode validation condition/flag valid bitmasks
-constexpr u8 OPFLAGS_NONE   = FLAGS_NONE;
-constexpr u8 OPFLAGS_C      = FLAG_C;
-constexpr u8 OPFLAGS_Z      = FLAG_Z;
-constexpr u8 OPFLAGS_SZ     = FLAG_S | FLAG_Z;
-constexpr u8 OPFLAGS_SZC    = FLAG_S | FLAG_Z | FLAG_C;
-constexpr u8 OPFLAGS_SZV    = FLAG_S | FLAG_Z | FLAG_V;
-constexpr u8 OPFLAGS_SZVC   = FLAG_S | FLAG_Z | FLAG_V | FLAG_C;
-constexpr u8 OPFLAGS_UZC    = FLAG_U | FLAG_Z | FLAG_C;
-constexpr u8 OPFLAGS_ALL    = FLAGS_ALL;
-constexpr u8 OPFLAGS_P1     = 0x81;
-constexpr u8 OPFLAGS_P2     = 0x82;
-constexpr u8 OPFLAGS_P3     = 0x83;
-constexpr u8 OPFLAGS_P4     = 0x84;
+constexpr u8  OPFLAGS_NONE   = FLAGS_NONE;
+constexpr u8  OPFLAGS_C      = FLAG_C;
+constexpr u8  OPFLAGS_Z      = FLAG_Z;
+constexpr u8  OPFLAGS_SZ     = FLAG_S | FLAG_Z;
+constexpr u8  OPFLAGS_SZC    = FLAG_S | FLAG_Z | FLAG_C;
+constexpr u8  OPFLAGS_SZV    = FLAG_S | FLAG_Z | FLAG_V;
+constexpr u8  OPFLAGS_SZVC   = FLAG_S | FLAG_Z | FLAG_V | FLAG_C;
+constexpr u8  OPFLAGS_UZC    = FLAG_U | FLAG_Z | FLAG_C;
+constexpr u8  OPFLAGS_ALL    = FLAGS_ALL;
+constexpr u8  OPFLAGS_P1     = 0x81;
+constexpr u8  OPFLAGS_P2     = 0x82;
+constexpr u8  OPFLAGS_P3     = 0x83;
+constexpr u8  OPFLAGS_P4     = 0x84;
 
 // parameter input/output states
-#define PIO_IN          0x01
-#define PIO_OUT         0x02
-#define PIO_INOUT       (PIO_IN | PIO_OUT)
+constexpr u8  PIO_IN         = 0x01;
+constexpr u8  PIO_OUT        = 0x02;
+constexpr u8  PIO_INOUT      = (PIO_IN | PIO_OUT);
 
 // parameter sizes
-#define PSIZE_4         SIZE_DWORD
-#define PSIZE_8         SIZE_QWORD
-#define PSIZE_OP        0x80
-#define PSIZE_P1        0x81
-#define PSIZE_P2        0x82
-#define PSIZE_P3        0x83
-#define PSIZE_P4        0x84
+constexpr u8  PSIZE_4        = SIZE_DWORD;
+constexpr u8  PSIZE_8        = SIZE_QWORD;
+constexpr u8  PSIZE_OP       = 0x80;
+constexpr u8  PSIZE_P1       = 0x81;
+constexpr u8  PSIZE_P2       = 0x82;
+constexpr u8  PSIZE_P3       = 0x83;
+constexpr u8  PSIZE_P4       = 0x84;
 
 // basic parameter types
-#define PTYPES_NONE     0
-#define PTYPES_IMM      (1 << parameter::PTYPE_IMMEDIATE)
-#define PTYPES_IREG     (1 << parameter::PTYPE_INT_REGISTER)
-#define PTYPES_FREG     (1 << parameter::PTYPE_FLOAT_REGISTER)
-#define PTYPES_MVAR     (1 << parameter::PTYPE_MAPVAR)
-#define PTYPES_MEM      (1 << parameter::PTYPE_MEMORY)
-#define PTYPES_SIZE     (1 << parameter::PTYPE_SIZE)
-#define PTYPES_SCSIZE   (1 << parameter::PTYPE_SIZE_SCALE)
-#define PTYPES_SPSIZE   (1 << parameter::PTYPE_SIZE_SPACE)
-#define PTYPES_HANDLE   (1 << parameter::PTYPE_CODE_HANDLE)
-#define PTYPES_LABEL    (1 << parameter::PTYPE_CODE_LABEL)
-#define PTYPES_CFUNC    (1 << parameter::PTYPE_C_FUNCTION)
-#define PTYPES_ROUND    (1 << parameter::PTYPE_ROUNDING)
-#define PTYPES_STR      (1 << parameter::PTYPE_STRING)
+constexpr u16 PTYPES_NONE    = 0;
+constexpr u16 PTYPES_IMM     = (1 << parameter::PTYPE_IMMEDIATE);
+constexpr u16 PTYPES_IREG    = (1 << parameter::PTYPE_INT_REGISTER);
+constexpr u16 PTYPES_FREG    = (1 << parameter::PTYPE_FLOAT_REGISTER);
+constexpr u16 PTYPES_MVAR    = (1 << parameter::PTYPE_MAPVAR);
+constexpr u16 PTYPES_MEM     = (1 << parameter::PTYPE_MEMORY);
+constexpr u16 PTYPES_SIZE    = (1 << parameter::PTYPE_SIZE);
+constexpr u16 PTYPES_SCSIZE  = (1 << parameter::PTYPE_SIZE_SCALE);
+constexpr u16 PTYPES_SPSIZE  = (1 << parameter::PTYPE_SIZE_SPACE);
+constexpr u16 PTYPES_HANDLE  = (1 << parameter::PTYPE_CODE_HANDLE);
+constexpr u16 PTYPES_LABEL   = (1 << parameter::PTYPE_CODE_LABEL);
+constexpr u16 PTYPES_CFUNC   = (1 << parameter::PTYPE_C_FUNCTION);
+constexpr u16 PTYPES_ROUND   = (1 << parameter::PTYPE_ROUNDING);
+constexpr u16 PTYPES_STR     = (1 << parameter::PTYPE_STRING);
 
 // special parameter types
-#define PTYPES_PTR      (PTYPES_MEM | 0x1000)
-#define PTYPES_STATE    (PTYPES_MEM | 0x2000)
+constexpr u16 PTYPES_PTR     = (PTYPES_MEM | 0x1000);
+constexpr u16 PTYPES_STATE   = (PTYPES_MEM | 0x2000);
 
 // combinations of types
-#define PTYPES_IRM      (PTYPES_IREG | PTYPES_MEM)
-#define PTYPES_FRM      (PTYPES_FREG | PTYPES_MEM)
-#define PTYPES_IMV      (PTYPES_IMM | PTYPES_MVAR)
-#define PTYPES_IANY     (PTYPES_IRM | PTYPES_IMV)
-#define PTYPES_FANY     (PTYPES_FRM)
+constexpr u16 PTYPES_IRM     = (PTYPES_IREG | PTYPES_MEM);
+constexpr u16 PTYPES_FRM     = (PTYPES_FREG | PTYPES_MEM);
+constexpr u16 PTYPES_IMV     = (PTYPES_IMM | PTYPES_MVAR);
+constexpr u16 PTYPES_IANY    = (PTYPES_IRM | PTYPES_IMV);
+constexpr u16 PTYPES_FANY    = (PTYPES_FRM);
 
 
 
@@ -171,6 +171,8 @@ opcode_info const instruction::s_opcode_info_table[OP_MAX] =
 	OPINFO1(SET,     "!set",     4|8, true,  NONE, NONE, NONE, PINFO(OUT, OP, IRM)) // Get the state of the specified condition (e.g. calling UML_SET with COND_NZ will return 0 if the condition is not met and 1 if the condition is met)
 	OPINFO2(MOV,     "!mov",     4|8, true,  NONE, NONE, NONE, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY))
 	OPINFO3(SEXT,    "!sext",    4|8, false, NONE, SZ,   ALL,  PINFO(OUT, OP, IRM), PINFO(IN, P3, IANY), PINFO(IN, OP, SIZE))
+	OPINFO4(BFXU,    "!bfxu",    4|8, false, NONE, SZ,   ALL,  PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY))
+	OPINFO4(BFXS,    "!bfxs",    4|8, false, NONE, SZ,   ALL,  PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY))
 	OPINFO4(ROLAND,  "!roland",  4|8, false, NONE, SZ,   ALL,  PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) // Rotate left + AND (see drcbec.cpp for implementation)
 	OPINFO4(ROLINS,  "!rolins",  4|8, false, NONE, SZ,   ALL,  PINFO(INOUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) // Rotate left + OR (see drcbec.cpp for implementation)
 	OPINFO3(ADD,     "!add",     4|8, false, NONE, SZVC, ALL,  PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY))
@@ -449,6 +451,114 @@ struct uml::instruction::simplify_op
 		}
 	}
 
+	static void bfxu(instruction &inst)
+	{
+		auto const size = inst.size();
+		auto const bits = size << 3;
+		u64 const mask = size_mask(inst);
+		assert((size == 4) || (size == 8));
+
+		// truncate immediates to instruction size
+		truncate_immediate(inst, 1, mask);
+		truncate_immediate(inst, 2, bits - 1);
+		truncate_immediate(inst, 3, bits - 1);
+
+		if (inst.param(2).is_immediate() && inst.param(3).is_immediate())
+		{
+			auto const field = util::make_bitmask<u64>(inst.param(3).immediate());
+
+			if (inst.param(1).is_immediate())
+			{
+				// constant result, convert to MOV or a logic operation
+				auto const rot = inst.param(2).immediate();
+
+				if (size == 4)
+					convert_to_mov_immediate(inst, rotr_32(inst.param(1).immediate(), rot) & field);
+				else
+					convert_to_mov_immediate(inst, rotr_64(inst.param(1).immediate(), rot) & field);
+			}
+			else if (inst.param(2).is_immediate_value(0))
+			{
+				// no shift, convert to AND
+				inst.m_opcode = OP_AND;
+				inst.m_param[2] = field;
+				inst.m_numparams = 3;
+			}
+			else if ((inst.param(2).immediate() + inst.param(3).immediate()) == bits)
+			{
+				// equivalent to right shift
+				inst.m_opcode = OP_SHR;
+				inst.m_numparams = 3;
+			}
+		}
+		else if (inst.param(3).is_immediate_value(0))
+		{
+			// undefined behaviour - just generate zero
+			convert_to_mov_immediate(inst, 0);
+		}
+	}
+
+	static void bfxs(instruction &inst)
+	{
+		auto const size = inst.size();
+		auto const bits = size << 3;
+		u64 const mask = size_mask(inst);
+		assert((size == 4) || (size == 8));
+
+		// truncate immediates to instruction size
+		truncate_immediate(inst, 1, mask);
+		truncate_immediate(inst, 2, bits - 1);
+		truncate_immediate(inst, 3, bits - 1);
+
+		if (inst.param(2).is_immediate() && inst.param(3).is_immediate())
+		{
+			if (inst.param(1).is_immediate())
+			{
+				// constant result, convert to MOV or a logic operation
+				auto const rot = inst.param(2).immediate() + inst.param(3).immediate();
+				auto const shift = -s64(inst.param(3).immediate()) & (bits - 1);
+
+				if (size == 4)
+					convert_to_mov_immediate(inst, u32(s32(rotr_32(inst.param(1).immediate(), rot)) >> shift));
+				else
+					convert_to_mov_immediate(inst, u64(s64(rotr_64(inst.param(1).immediate(), rot)) >> shift));
+			}
+			else if (inst.param(2).is_immediate_value(0))
+			{
+				// no shift, convert to SEXT if possible
+				switch (inst.param(3).immediate())
+				{
+				case 8:
+					inst.m_opcode = OP_SEXT;
+					inst.m_param[2] = parameter::make_size(SIZE_BYTE);
+					inst.m_numparams = 3;
+					break;
+				case 16:
+					inst.m_opcode = OP_SEXT;
+					inst.m_param[2] = parameter::make_size(SIZE_WORD);
+					inst.m_numparams = 3;
+					break;
+				case 32:
+					inst.m_opcode = OP_SEXT;
+					inst.m_param[2] = parameter::make_size(SIZE_DWORD);
+					inst.m_numparams = 3;
+					break;
+				}
+			}
+			else if ((inst.param(2).immediate() + inst.param(3).immediate()) == bits)
+			{
+				// equivalent to right shift
+				inst.m_opcode = OP_SAR;
+				inst.m_numparams = 3;
+			}
+		}
+		else if (inst.param(3).is_immediate_value(0))
+		{
+			// undefined behaviour - just generate zero
+			convert_to_mov_immediate(inst, 0);
+		}
+	}
+
 	static void roland(instruction &inst)
 	{
 		auto const size = inst.size();
@@ -474,9 +584,9 @@ struct uml::instruction::simplify_op
 			// only mask is variable, convert to AND
 			inst.m_opcode = OP_AND;
 			if (size == 4)
-				inst.m_param[1] = parameter(rotl_32(inst.param(1).immediate(), inst.param(2).immediate()));
+				inst.m_param[1] = rotl_32(inst.param(1).immediate(), inst.param(2).immediate());
 			else
-				inst.m_param[1] = parameter(rotl_64(inst.param(1).immediate(), inst.param(2).immediate()));
+				inst.m_param[1] = rotl_64(inst.param(1).immediate(), inst.param(2).immediate());
 			inst.m_param[2] = inst.param(3);
 			inst.m_numparams = 3;
 		}
@@ -506,6 +616,13 @@ struct uml::instruction::simplify_op
 			inst.m_numparams = 3;
 			inst.m_param[2] = bits - inst.param(2).immediate();
 		}
+		else if (inst.param(2).is_immediate() && inst.param(3).is_immediate() && !(inst.param(3).immediate() & (inst.param(3).immediate() + 1)))
+		{
+			// extract right-aligned field, convert to BFXU
+			inst.m_opcode = OP_BFXU;
+			inst.m_param[2] = bits - inst.param(2).immediate();
+			inst.m_param[3] = 64 - count_leading_zeros_64(inst.param(3).immediate());
+		}
 	}
 
 	static void rolins(instruction &inst)
@@ -1322,6 +1439,8 @@ void uml::instruction::simplify()
 		case OP_SET:    simplify_op::set(*this);                      break;
 		case OP_MOV:    simplify_op::mov(*this);                      break;
 		case OP_SEXT:   simplify_op::sext(*this);                     break;
+		case OP_BFXU:   simplify_op::bfxu(*this);                     break;
+		case OP_BFXS:   simplify_op::bfxs(*this);                     break;
 		case OP_ROLAND: simplify_op::roland(*this);                   break;
 		case OP_ROLINS: simplify_op::rolins(*this);                   break;
 		case OP_ADD:    simplify_op::add(*this);                      break;
diff --git a/src/devices/cpu/uml.h b/src/devices/cpu/uml.h
index 68dbaeea65349..1431fb3b63871 100644
--- a/src/devices/cpu/uml.h
+++ b/src/devices/cpu/uml.h
@@ -29,8 +29,8 @@ struct drcuml_machine_state;
 
 // use a namespace to wrap all the UML instruction concepts so that
 // we can keep names short
-namespace uml
-{
+namespace uml {
+
 	// integer registers
 	constexpr int REG_I0 = 0x400;
 	constexpr int REG_I_COUNT = 10;
@@ -181,6 +181,8 @@ namespace uml
 		OP_SET,                     // SET     dst,c
 		OP_MOV,                     // MOV     dst,src[,c]
 		OP_SEXT,                    // SEXT    dst,src,size
+		OP_BFXU,                    // BFXU    dst,src,shift,width
+		OP_BFXS,                    // BFXS    dst,src,shift,width
 		OP_ROLAND,                  // ROLAND  dst,src,shift,mask
 		OP_ROLINS,                  // ROLINS  dst,src,shift,mask
 		OP_ADD,                     // ADD     dst,src1,src2[,f]
@@ -489,6 +491,8 @@ namespace uml
 		void mov(parameter dst, parameter src1) { configure(OP_MOV, 4, dst, src1); }
 		void mov(condition_t cond, parameter dst, parameter src1) { configure(OP_MOV, 4, dst, src1, cond); }
 		void sext(parameter dst, parameter src1, operand_size size) { configure(OP_SEXT, 4, dst, src1, parameter::make_size(size)); }
+		void bfxu(parameter dst, parameter src, parameter shift, parameter width) { configure(OP_BFXU, 4, dst, src, shift, width); }
+		void bfxs(parameter dst, parameter src, parameter shift, parameter width) { configure(OP_BFXS, 4, dst, src, shift, width); }
 		void roland(parameter dst, parameter src, parameter shift, parameter mask) { configure(OP_ROLAND, 4, dst, src, shift, mask); }
 		void rolins(parameter dst, parameter src, parameter shift, parameter mask) { configure(OP_ROLINS, 4, dst, src, shift, mask); }
 		void add(parameter dst, parameter src1, parameter src2) { configure(OP_ADD, 4, dst, src1, src2); }
@@ -530,6 +534,8 @@ namespace uml
 		void dmov(parameter dst, parameter src1) { configure(OP_MOV, 8, dst, src1); }
 		void dmov(condition_t cond, parameter dst, parameter src1) { configure(OP_MOV, 8, dst, src1, cond); }
 		void dsext(parameter dst, parameter src1, operand_size size) { configure(OP_SEXT, 8, dst, src1, parameter::make_size(size)); }
+		void dbfxu(parameter dst, parameter src, parameter shift, parameter width) { configure(OP_BFXU, 8, dst, src, shift, width); }
+		void dbfxs(parameter dst, parameter src, parameter shift, parameter width) { configure(OP_BFXS, 8, dst, src, shift, width); }
 		void droland(parameter dst, parameter src, parameter shift, parameter mask) { configure(OP_ROLAND, 8, dst, src, shift, mask); }
 		void drolins(parameter dst, parameter src, parameter shift, parameter mask) { configure(OP_ROLINS, 8, dst, src, shift, mask); }
 		void dadd(parameter dst, parameter src1, parameter src2) { configure(OP_ADD, 8, dst, src1, src2); }
@@ -677,6 +683,7 @@ namespace uml
 	const parameter M7(parameter::make_mapvar(MAPVAR_M0 + 7));
 	const parameter M8(parameter::make_mapvar(MAPVAR_M0 + 8));
 	const parameter M9(parameter::make_mapvar(MAPVAR_M0 + 9));
-}
+
+} // namespace uml
 
 #endif // MAME_CPU_UML_H