Skip to content

Commit 05e5de1

Browse files
committed
Replace cfenv dependency with new remill FPU intrinsics
1 parent 63ad1d5 commit 05e5de1

File tree

13 files changed

+325
-274
lines changed

13 files changed

+325
-274
lines changed

include/remill/Arch/AArch64/Runtime/State.h

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -167,35 +167,38 @@ union NZCV {
167167

168168
static_assert(8 == sizeof(NZCV), "Invalid packing of `union NZCV`.");
169169

170-
#if COMPILING_WITH_GCC
171-
using FPURoundingMode = uint64_t;
172-
using FPUFlushToZeroMode = uint64_t;
173-
using FPUDefaultNaNMode = uint64_t;
174-
using FPUHalfPrecisionMode = uint64_t;
175-
#else
176-
177170
enum FPURoundingMode : uint64_t {
178-
kFPURoundToNearestEven, // RN (round nearest).
179-
kFPURoundUpInf, // RP (round toward plus infinity).
180-
kFPURoundDownNegInf, // RM (round toward minus infinity).
181-
kFPURoundToZero // RZ (round toward zero).
171+
kFPURoundToNearestEven = 0, // RN (round nearest).
172+
kFPURoundUpInf = 1, // RP (round toward plus infinity).
173+
kFPURoundDownNegInf = 2, // RM (round toward minus infinity).
174+
kFPURoundToZero = 3, // RZ (round toward zero).
182175
};
183176

184177
enum FPUFlushToZeroMode : uint64_t {
185-
kFlushToZeroDisabled,
186-
kFlushToZeroEnabled
178+
kFlushToZeroDisabled = 0,
179+
kFlushToZeroEnabled = 1,
187180
};
188181

189182
enum FPUDefaultNaNMode : uint64_t {
190-
kPropagateOriginalNaN,
191-
kPropagateDefaultNaN
183+
kPropagateOriginalNaN = 0,
184+
kPropagateDefaultNaN = 1,
192185
};
193186

194187
enum FPUHalfPrecisionMode : uint64_t {
195-
kIEEEHalfPrecisionMode,
196-
kAlternativeHalfPrecisionMode
188+
kIEEEHalfPrecisionMode = 0,
189+
kAlternativeHalfPrecisionMode = 1,
190+
};
191+
192+
// AArch64 FPSR cumulative exception flags
193+
enum FPUExceptionFlag : uint16_t {
194+
kFPUExceptionInvalid = (1 << 0), // FPSR.ioc, bit 0 - Invalid Operation (FE_INVALID)
195+
kFPUExceptionDivByZero = (1 << 1), // FPSR.dzc, bit 1 - Divide by Zero (FE_DIVBYZERO)
196+
kFPUExceptionOverflow = (1 << 2), // FPSR.ofc, bit 2 - Overflow (FE_OVERFLOW)
197+
kFPUExceptionUnderflow = (1 << 3), // FPSR.ufc, bit 3 - Underflow (FE_UNDERFLOW)
198+
kFPUExceptionPrecision = (1 << 4), // FPSR.ixc, bit 4 - Inexact/Precision (FE_INEXACT)
199+
kFPUExceptionDenormal = (1 << 7), // FPSR.idc, bit 7 - Input Denormal (no standard FE_ equivalent)
200+
kFPUExceptionAll = 0x9F // All exception flags (bits 0-4, 7)
197201
};
198-
#endif
199202

200203
// Floating point control register. Really, this is a 32-bit register, but
201204
// it is accessed 64-bit register instructions: `mrs <Xt>, fpcr`.
@@ -216,6 +219,8 @@ static_assert(sizeof(FPCR) == 8, "Invalid packing of `union FPCR`.");
216219

217220
// Floating point status register. Really, this is a 32-bit register, but
218221
// it is accessed 64-bit register instructions: `mrs <Xt>, fpsr`.
222+
// NOTE: This register is not updated directly, the fields are mirrored in
223+
// the SR register.
219224
union FPSR {
220225
uint64_t flat;
221226
struct {
@@ -265,8 +270,10 @@ struct alignas(8) SR final {
265270
uint8_t idc; // Input denormal (cumulative).
266271
uint8_t _10;
267272
uint8_t ioc; // Invalid operation (cumulative).
273+
uint8_t _11;
274+
uint8_t dzc; // Divide by zero (cumulative).
268275

269-
uint8_t _padding[6];
276+
uint8_t _padding[4];
270277
} __attribute__((packed));
271278

272279
static_assert(56 == sizeof(SR), "Invalid packing of `struct SR`.");

include/remill/Arch/Runtime/Float.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,6 @@
3737
# define _RC_CHOP 0x00000300 // chop
3838
#endif
3939

40-
#if __has_include(<cfenv>)
41-
# include <cfenv>
42-
#endif
43-
4440
#include "Math.h"
4541

4642
// macOS does not have this flag

include/remill/Arch/Runtime/Intrinsics.h

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -257,17 +257,35 @@ __remill_compare_exchange_memory_128(Memory *, addr_t addr, uint128_t &expected,
257257
[[gnu::used]] extern Memory *__remill_fetch_and_nand_64(Memory *, addr_t addr,
258258
uint64_t &value);
259259

260-
// Read and modify the floating point exception state of the (virtual) machine
261-
// that is executing the actual floating point operations.
262-
//
263-
// auto old = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
264-
// auto y = ...;
265-
// auto res = x op y;
266-
// auto flags = __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, 0);
267-
//
268-
// These flags are also subject to optimizations
269-
[[gnu::used]] extern int __remill_fpu_exception_test_and_clear(int read_mask,
270-
int clear_mask);
260+
// Read current floating point exception flags.
261+
// Uses architecture-specific FPUExceptionFlag values that are mapped to
262+
// cfenv flags. Typically implemented via std::fetestexcept.
263+
// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
264+
[[gnu::used]] extern int32_t __remill_fpu_exception_test(int32_t read_mask);
265+
266+
// Clear floating point exception flags.
267+
// Uses architecture-specific FPUExceptionFlag values that are mapped to
268+
// cfenv flags. Typically implemented via std::feclearexcept.
269+
// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
270+
[[gnu::used]] extern void __remill_fpu_exception_clear(int32_t clear_mask);
271+
272+
// Raise floating point exception flags.
273+
// Uses architecture-specific FPUExceptionFlag values that are mapped to
274+
// cfenv flags. Typically implemented via std::feraiseexcept.
275+
// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
276+
[[gnu::used]] extern void __remill_fpu_exception_raise(int32_t except_mask);
277+
278+
// Set the floating point rounding mode.
279+
// Uses architecture-specific FPURoundingControl values that are mapped to
280+
// cfenv rounding modes. Typically implemented via std::fesetround.
281+
// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
282+
[[gnu::used]] extern void __remill_fpu_set_rounding(int32_t round_mode);
283+
284+
// Get the current floating point rounding mode.
285+
// Returns architecture-specific FPURoundingControl values mapped from
286+
// cfenv rounding modes. Typically implemented via std::fegetround.
287+
// NOTE: You need to use BarrierReorder around this to avoid reordering bugs.
288+
[[gnu::used]] extern int32_t __remill_fpu_get_rounding();
271289

272290
// Read/write to I/O ports.
273291
[[gnu::used]] extern uint8_t __remill_read_io_port_8(Memory *, addr_t);

include/remill/Arch/Runtime/sysroot/cfenv

Lines changed: 0 additions & 134 deletions
This file was deleted.

include/remill/Arch/X86/Runtime/State.h

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -126,26 +126,34 @@ static_assert(2 == sizeof(FPUStatusWord),
126126
"Invalid structure packing of `FPUFlags`.");
127127

128128
enum FPUPrecisionControl : uint16_t {
129-
kPrecisionSingle,
130-
kPrecisionReserved,
131-
kPrecisionDouble,
132-
kPrecisionExtended
129+
kPrecisionSingle = 0,
130+
kPrecisionReserved = 1,
131+
kPrecisionDouble = 2,
132+
kPrecisionExtended = 3,
133133
};
134134

135135
enum FPURoundingControl : uint16_t {
136-
kFPURoundToNearestEven,
137-
kFPURoundDownNegInf,
138-
kFPURoundUpInf,
139-
kFPURoundToZero
136+
kFPURoundToNearestEven = 0,
137+
kFPURoundDownNegInf = 1,
138+
kFPURoundUpInf = 2,
139+
kFPURoundToZero = 3,
140140
};
141141

142-
enum FPUInfinityControl : uint16_t { kInfinityProjective, kInfinityAffine };
142+
enum FPUInfinityControl : uint16_t {
143+
kInfinityProjective = 0,
144+
kInfinityAffine = 1,
145+
};
143146

144-
#ifndef __clang__
145-
# define FPUPrecisionControl uint16_t
146-
# define FPURoundingControl uint16_t
147-
# define FPUInfinityControl uint16_t
148-
#endif
147+
enum FPUExceptionFlag : uint16_t {
148+
kFPUExceptionInvalid = (1 << 0), // FSW.ie, bit 0 - Invalid Operation (FE_INVALID)
149+
kFPUExceptionDenormal = (1 << 1), // FSW.de, bit 1 - Denormal Operand (FE_DENORMAL)
150+
kFPUExceptionDivByZero = (1 << 2), // FSW.ze, bit 2 - Zero Divide (FE_DIVBYZERO)
151+
kFPUExceptionOverflow = (1 << 3), // FSW.oe, bit 3 - Overflow (FE_OVERFLOW)
152+
kFPUExceptionUnderflow = (1 << 4), // FSW.ue, bit 4 - Underflow (FE_UNDERFLOW)
153+
kFPUExceptionPrecision = (1 << 5), // FSW.pe, bit 5 - Precision/Inexact (FE_INEXACT)
154+
kFPUExceptionStackFault = (1 << 6), // FSW.sf, bit 6 - Stack Fault (no FE_ equivalent, x87-specific)
155+
kFPUExceptionAll = 0x7F // All exception flags (bits 0-6)
156+
};
149157

150158
union FPUControlWord final {
151159
uint16_t flat;
@@ -369,7 +377,10 @@ struct FPUStatusFlags final {
369377
uint8_t _9;
370378
uint8_t ie; // Invalid operation.
371379

372-
uint8_t _padding[4];
380+
uint8_t _10;
381+
uint8_t sf; // Stack overflow.
382+
383+
uint8_t _padding[2];
373384
} __attribute__((packed));
374385

375386
static_assert(24 == sizeof(FPUStatusFlags),

lib/Arch/AArch64/Semantics/BINARY.cpp

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -272,17 +272,14 @@ DEF_SEM(FMADD_S, V128W dst, V32 src1, V32 src2, V32 src3) {
272272

273273
auto old_underflow = state.sr.ufc;
274274

275-
auto zero = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
275+
__remill_fpu_exception_clear(kFPUExceptionAll);
276276
BarrierReorder();
277277
auto prod = FMul32(factor1, factor2);
278-
BarrierReorder();
279-
auto except_mul = __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, zero);
280-
BarrierReorder();
281278
auto res = FAdd32(prod, add);
282279
BarrierReorder();
283-
auto except_add =
284-
__remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, except_mul);
285-
SetFPSRStatusFlags(state, except_add);
280+
auto new_except = __remill_fpu_exception_test(kFPUExceptionAll);
281+
BarrierReorder();
282+
SetFPSRStatusFlags(state, new_except);
286283

287284
// Sets underflow for 0x3fffffff, 0x1 but native doesn't.
288285
if (state.sr.ufc && !old_underflow) {
@@ -302,17 +299,13 @@ DEF_SEM(FMADD_D, V128W dst, V64 src1, V64 src2, V64 src3) {
302299

303300
auto old_underflow = state.sr.ufc;
304301

305-
auto zero = __remill_fpu_exception_test_and_clear(0, FE_ALL_EXCEPT);
302+
__remill_fpu_exception_clear(kFPUExceptionAll);
306303
BarrierReorder();
307304
auto prod = FMul64(factor1, factor2);
308-
BarrierReorder();
309-
auto except_mul = __remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, zero);
310-
BarrierReorder();
311305
auto res = FAdd64(prod, add);
312306
BarrierReorder();
313-
auto except_add =
314-
__remill_fpu_exception_test_and_clear(FE_ALL_EXCEPT, except_mul);
315-
SetFPSRStatusFlags(state, except_add);
307+
auto except_new = __remill_fpu_exception_test(kFPUExceptionAll);
308+
SetFPSRStatusFlags(state, except_new);
316309

317310
// Sets underflow for test case (0x3fffffffffffffff, 0x1) but native doesn't.
318311
if (state.sr.ufc && !old_underflow) {

0 commit comments

Comments
 (0)