lifting-bits
diff --git a/‎include/remill/Arch/AArch64/Runtime/State.h‎
Lines changed: 26 additions & 19 deletions b/‎include/remill/Arch/AArch64/Runtime/State.h‎
Lines changed: 26 additions & 19 deletions
diff --git a/‎include/remill/Arch/Runtime/Float.h‎
Lines changed: 0 additions & 4 deletions b/‎include/remill/Arch/Runtime/Float.h‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎include/remill/Arch/Runtime/Intrinsics.h‎
Lines changed: 29 additions & 11 deletions b/‎include/remill/Arch/Runtime/Intrinsics.h‎
Lines changed: 29 additions & 11 deletions
diff --git a/‎include/remill/Arch/Runtime/sysroot/cfenv‎
Lines changed: 0 additions & 134 deletions b/‎include/remill/Arch/Runtime/sysroot/cfenv‎
Lines changed: 0 additions & 134 deletions
diff --git a/‎include/remill/Arch/X86/Runtime/State.h‎
Lines changed: 26 additions & 15 deletions b/‎include/remill/Arch/X86/Runtime/State.h‎
Lines changed: 26 additions & 15 deletions
diff --git a/‎lib/Arch/AArch64/Semantics/BINARY.cpp‎
Lines changed: 7 additions & 14 deletions b/‎lib/Arch/AArch64/Semantics/BINARY.cpp‎
Lines changed: 7 additions & 14 deletions
@@ -167,35 +167,38 @@ union NZCV {
 
 static_assert(8 == sizeof(NZCV), "Invalid packing of `union NZCV`.");
 
-#if COMPILING_WITH_GCC
-using FPURoundingMode = uint64_t;
-using FPUFlushToZeroMode = uint64_t;
-using FPUDefaultNaNMode = uint64_t;
-using FPUHalfPrecisionMode = uint64_t;
-#else
-
 enum FPURoundingMode : uint64_t {
-  kFPURoundToNearestEven,  // RN (round nearest).
-  kFPURoundUpInf,  // RP (round toward plus infinity).
-  kFPURoundDownNegInf,  // RM (round toward minus infinity).
-  kFPURoundToZero  // RZ (round toward zero).
+  kFPURoundToNearestEven = 0,  // RN (round nearest).
+  kFPURoundUpInf = 1,  // RP (round toward plus infinity).
+  kFPURoundDownNegInf = 2,  // RM (round toward minus infinity).
+  kFPURoundToZero = 3, // RZ (round toward zero).
 };
 
 enum FPUFlushToZeroMode : uint64_t {
-  kFlushToZeroDisabled,
-  kFlushToZeroEnabled
+  kFlushToZeroDisabled = 0,
+  kFlushToZeroEnabled = 1,
 };
 
 enum FPUDefaultNaNMode : uint64_t {
-  kPropagateOriginalNaN,
-  kPropagateDefaultNaN
+  kPropagateOriginalNaN = 0,
+  kPropagateDefaultNaN = 1,
 };
 
 enum FPUHalfPrecisionMode : uint64_t {
-  kIEEEHalfPrecisionMode,
-  kAlternativeHalfPrecisionMode
+  kIEEEHalfPrecisionMode = 0,
+  kAlternativeHalfPrecisionMode = 1,
+};
+
+// AArch64 FPSR cumulative exception flags
+enum FPUExceptionFlag : uint16_t {
+  kFPUExceptionInvalid   = (1 << 0),  // FPSR.ioc, bit 0 - Invalid Operation (FE_INVALID)
+  kFPUExceptionDivByZero = (1 << 1),  // FPSR.dzc, bit 1 - Divide by Zero (FE_DIVBYZERO)
+  kFPUExceptionOverflow  = (1 << 2),  // FPSR.ofc, bit 2 - Overflow (FE_OVERFLOW)
+  kFPUExceptionUnderflow = (1 << 3),  // FPSR.ufc, bit 3 - Underflow (FE_UNDERFLOW)
+  kFPUExceptionPrecision = (1 << 4),  // FPSR.ixc, bit 4 - Inexact/Precision (FE_INEXACT)
+  kFPUExceptionDenormal  = (1 << 7),  // FPSR.idc, bit 7 - Input Denormal (no standard FE_ equivalent)
+  kFPUExceptionAll       = 0x9F       // All exception flags (bits 0-4, 7)
 };
-#endif
 
 // Floating point control register. Really, this is a 32-bit register, but
 // it is accessed 64-bit register instructions: `mrs <Xt>, fpcr`.
@@ -216,6 +219,8 @@ static_assert(sizeof(FPCR) == 8, "Invalid packing of `union FPCR`.");
 
 // Floating point status register. Really, this is a 32-bit register, but
 // it is accessed 64-bit register instructions: `mrs <Xt>, fpsr`.
+// NOTE: This register is not updated directly, the fields are mirrored in
+// the SR register.
 union FPSR {
   uint64_t flat;
   struct {
@@ -265,8 +270,10 @@ struct alignas(8) SR final {
   uint8_t idc;  // Input denormal (cumulative).
   uint8_t _10;
   uint8_t ioc;  // Invalid operation (cumulative).
+  uint8_t _11;
+  uint8_t dzc;  // Divide by zero (cumulative).
 
-  uint8_t _padding[6];
+  uint8_t _padding[4];
 } __attribute__((packed));
 
 static_assert(56 == sizeof(SR), "Invalid packing of `struct SR`.");
 
@@ -37,10 +37,6 @@
 #  define _RC_CHOP 0x00000300  //     chop
 #endif
 
-#if __has_include(<cfenv>)
-#  include <cfenv>
-#endif
-
 #include "Math.h"
 
 // macOS does not have this flag
 
@@ -257,17 +257,35 @@ __remill_compare_exchange_memory_128(Memory *, addr_t addr, uint128_t &expected,
 [[gnu::used]] extern Memory *__remill_fetch_and_nand_64(Memory *, addr_t addr,
                                                         uint64_t &value);
 
-// Read and modify the floating point exception state of the (virtual) machine
-// that is executing the actual floating point operations.
-//
-//      auto old = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
-//      auto y = ...;
-//      auto res = x op y;
-//      auto flags = __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, 0);
-//
-// These flags are also subject to optimizations
-[[gnu::used]] extern int __remill_fpu_exception_test_and_clear(int read_mask,
-                                                               int clear_mask);
+// Read current floating point exception flags.
+// Uses architecture-specific FPUExceptionFlag values that are mapped to
+// cfenv flags. Typically implemented via std::fetestexcept.
+// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
+[[gnu::used]] extern int32_t __remill_fpu_exception_test(int32_t read_mask);
+
+// Clear floating point exception flags.
+// Uses architecture-specific FPUExceptionFlag values that are mapped to
+// cfenv flags. Typically implemented via std::feclearexcept.
+// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
+[[gnu::used]] extern void __remill_fpu_exception_clear(int32_t clear_mask);
+
+// Raise floating point exception flags.
+// Uses architecture-specific FPUExceptionFlag values that are mapped to
+// cfenv flags. Typically implemented via std::feraiseexcept.
+// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
+[[gnu::used]] extern void __remill_fpu_exception_raise(int32_t except_mask);
+
+// Set the floating point rounding mode.
+// Uses architecture-specific FPURoundingControl values that are mapped to
+// cfenv rounding modes. Typically implemented via std::fesetround.
+// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
+[[gnu::used]] extern void __remill_fpu_set_rounding(int32_t round_mode);
+
+// Get the current floating point rounding mode.
+// Returns architecture-specific FPURoundingControl values mapped from
+// cfenv rounding modes. Typically implemented via std::fegetround.
+// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
+[[gnu::used]] extern int32_t __remill_fpu_get_rounding();
 
 // Read/write to I/O ports.
 [[gnu::used]] extern uint8_t __remill_read_io_port_8(Memory *, addr_t);
 
@@ -126,26 +126,34 @@ static_assert(2 == sizeof(FPUStatusWord),
               "Invalid structure packing of `FPUFlags`.");
 
 enum FPUPrecisionControl : uint16_t {
-  kPrecisionSingle,
-  kPrecisionReserved,
-  kPrecisionDouble,
-  kPrecisionExtended
+  kPrecisionSingle = 0,
+  kPrecisionReserved = 1,
+  kPrecisionDouble = 2,
+  kPrecisionExtended = 3,
 };
 
 enum FPURoundingControl : uint16_t {
-  kFPURoundToNearestEven,
-  kFPURoundDownNegInf,
-  kFPURoundUpInf,
-  kFPURoundToZero
+  kFPURoundToNearestEven = 0,
+  kFPURoundDownNegInf = 1,
+  kFPURoundUpInf = 2,
+  kFPURoundToZero = 3,
 };
 
-enum FPUInfinityControl : uint16_t { kInfinityProjective, kInfinityAffine };
+enum FPUInfinityControl : uint16_t {
+  kInfinityProjective = 0,
+  kInfinityAffine = 1,
+};
 
-#ifndef __clang__
-#  define FPUPrecisionControl uint16_t
-#  define FPURoundingControl uint16_t
-#  define FPUInfinityControl uint16_t
-#endif
+enum FPUExceptionFlag : uint16_t {
+  kFPUExceptionInvalid   = (1 << 0),  // FSW.ie, bit 0 - Invalid Operation (FE_INVALID)
+  kFPUExceptionDenormal  = (1 << 1),  // FSW.de, bit 1 - Denormal Operand (FE_DENORMAL)
+  kFPUExceptionDivByZero = (1 << 2),  // FSW.ze, bit 2 - Zero Divide (FE_DIVBYZERO)
+  kFPUExceptionOverflow  = (1 << 3),  // FSW.oe, bit 3 - Overflow (FE_OVERFLOW)
+  kFPUExceptionUnderflow = (1 << 4),  // FSW.ue, bit 4 - Underflow (FE_UNDERFLOW)
+  kFPUExceptionPrecision = (1 << 5),  // FSW.pe, bit 5 - Precision/Inexact (FE_INEXACT)
+  kFPUExceptionStackFault = (1 << 6), // FSW.sf, bit 6 - Stack Fault (no FE_ equivalent, x87-specific)
+  kFPUExceptionAll       = 0x7F       // All exception flags (bits 0-6)
+};
 
 union FPUControlWord final {
   uint16_t flat;
@@ -369,7 +377,10 @@ struct FPUStatusFlags final {
   uint8_t _9;
   uint8_t ie;  // Invalid operation.
 
-  uint8_t _padding[4];
+  uint8_t _10;
+  uint8_t sf; // Stack overflow.
+
+  uint8_t _padding[2];
 } __attribute__((packed));
 
 static_assert(24 == sizeof(FPUStatusFlags),
 
@@ -272,17 +272,14 @@ DEF_SEM(FMADD_S, V128W dst, V32 src1, V32 src2, V32 src3) {
 
   auto old_underflow = state.sr.ufc;
 
-  auto zero = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
+  __remill_fpu_exception_clear(kFPUExceptionAll);
   BarrierReorder();
   auto prod = FMul32(factor1, factor2);
-  BarrierReorder();
-  auto except_mul = __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, zero);
-  BarrierReorder();
   auto res = FAdd32(prod, add);
   BarrierReorder();
-  auto except_add =
-      __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, except_mul);
-  SetFPSRStatusFlags(state, except_add);
+  auto new_except = __remill_fpu_exception_test(kFPUExceptionAll);
+  BarrierReorder();
+  SetFPSRStatusFlags(state, new_except);
 
   // Sets underflow for 0x3fffffff, 0x1 but native doesn't.
   if (state.sr.ufc && !old_underflow) {
@@ -302,17 +299,13 @@ DEF_SEM(FMADD_D, V128W dst, V64 src1, V64 src2, V64 src3) {
 
   auto old_underflow = state.sr.ufc;
 
-  auto zero = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
+  __remill_fpu_exception_clear(kFPUExceptionAll);
   BarrierReorder();
   auto prod = FMul64(factor1, factor2);
-  BarrierReorder();
-  auto except_mul = __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, zero);
-  BarrierReorder();
   auto res = FAdd64(prod, add);
   BarrierReorder();
-  auto except_add =
-      __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, except_mul);
-  SetFPSRStatusFlags(state, except_add);
+  auto except_new = __remill_fpu_exception_test(kFPUExceptionAll);
+  SetFPSRStatusFlags(state, except_new);
 
   // Sets underflow for test case (0x3fffffffffffffff, 0x1) but native doesn't.
   if (state.sr.ufc && !old_underflow) {