Added support for FADD

wizardengineer · wizardengineer · commit d84ad7042461 · 2025-11-25T21:59:28.000Z
diff --git a/lib/Arch/X86/Semantics/SSE.cpp b/lib/Arch/X86/Semantics/SSE.cpp
@@ -1638,7 +1638,7 @@ IF_AVX(DEF_ISEL(VMOVDDUP_XMMdq_XMMq) = MOVDDUP<VV128W, V128>;)
 namespace {
 
 template <typename D, typename S1>
-DEF_SEM(SQRTSS, D dst, S1 src1) {
+DEF_SEM(SQRTSS, D dst, D _nop_read, S1 src1) {
 
   // Extract a "single-precision" (32-bit) float from [31:0] of src1 vector:
   auto src_float = FExtractV32(FReadV32(src1), 0);
@@ -1654,7 +1654,7 @@ DEF_SEM(SQRTSS, D dst, S1 src1) {
 }
 
 template <typename D, typename S1>
-DEF_SEM(RSQRTSS, D dst, S1 src1) {
+DEF_SEM(RSQRTSS, D dst, D _nop_read, S1 src1) {
 
   // Extract a "single-precision" (32-bit) float from [31:0] of src1 vector:
   auto src_float = FExtractV32(FReadV32(src1), 0);
@@ -1756,8 +1756,11 @@ DEF_HELPER(SquareRoot64, float64_t src_float)->float64_t {
   return square_root;
 }
 
+// NOTE(xed2025): XED 2025+ marks SSE scalar operations as reading the destination
+// register because they preserve the upper bits. The _nop_read parameter handles
+// this extra operand but we don't use it since we already read from dst.
 template <typename D, typename S1>
-DEF_SEM(SQRTSD, D dst, S1 src1) {
+DEF_SEM(SQRTSD, D dst, D _nop_read, S1 src1) {
 
   // Extract a "double-precision" (64-bit) float from [63:0] of src1 vector:
   auto src_float = FExtractV64(FReadV64(src1), 0);
diff --git a/lib/Arch/X86/Semantics/X87.cpp b/lib/Arch/X86/Semantics/X87.cpp
@@ -558,6 +558,20 @@ DEF_FPU_SEM(FADDmem, RF80W dst, RF80W src1, T src2) {
   return FADD(memory, state, dst, src1, src2, pc, fop);
 }
 
+template <typename T>
+DEF_FPU_SEM(FADDmem_ST0_implicit, T src) {
+  SetFPUIpOp();
+  SetFPUDp(src);
+  auto st0 = Read(X87_ST0);
+  auto result = CheckedFloatBinOp(state, FAdd80, st0, Float80(Read(src)));
+  Write(X87_ST0, result);
+
+  state.sw.c0 = UUndefined8();
+  state.sw.c2 = UUndefined8();
+  state.sw.c3 = UUndefined8();
+  return memory;
+}
+
 template <typename T>
 DEF_FPU_SEM(FADDP, RF80W dst, RF80W src1, T src2) {
   memory = FADD<T>(memory, state, dst, src1, src2, pc, fop);
@@ -584,6 +598,9 @@ DEF_ISEL(FADDP_X87_ST0) = FADDP<RF80W>;
 DEF_ISEL(FIADD_ST0_MEMmem32int) = FIADD<M32>;
 DEF_ISEL(FIADD_ST0_MEMmem16int) = FIADD<M16>;
 
+DEF_ISEL(FADD_MEMmem32real) = FADDmem_ST0_implicit<MF32>;
+DEF_ISEL(FADD_MEMm64real) = FADDmem_ST0_implicit<MF64>;
+
 namespace {
 
 template <typename T>
diff --git a/lib/BC/InstructionLifter.cpp b/lib/BC/InstructionLifter.cpp
@@ -593,17 +593,23 @@ llvm::Value *InstructionLifter::LiftRegisterOperand(Instruction &inst,
     auto arg_size = data_layout.getTypeAllocSizeInBits(arg_type);
 
     if (val_size < arg_size) {
-      // Because of using the latest version of Intex XED we support (which is currently v2025.06.08),
-      // it reports XMM/YMM registers as vectors instead of integers. When remills tries to extend/truncate
-      // these values we'll bitcast those vectors into integers 
+      // NOTE(xed2025): XED 2025 reports XMM/YMM/ZMM registers as LLVM vector types
+      // (e.g., <4 x float>) instead of integers. When remill needs to zero-extend
+      // these values to a larger integer type, we must first bitcast the vector
+      // to an integer of the same bit width, then perform the extension.
       if (arg_type->isIntegerTy()) {
         if (val_type->isVectorTy()) {
+          // Vector types can be directly bitcast to integers of the same size.
           auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size);
           val = new llvm::BitCastInst(val, int_type, llvm::Twine::createNull(), block);
 
           val_type = int_type;
         } else if (val_type->isArrayTy()) {
-          // Arrays cannot be bitcast directly. Store to memory, bitcast pointer, then load.
+          // NOTE(xed2025): Some register types in remill's State structure are
+          // represented as arrays (e.g., X87 FPU stack entries as [10 x i8]).
+          // LLVM does not allow direct bitcast of array types to integers.
+          // Workaround: store array to stack, bitcast the pointer to int*, then load.
+          // This gets optimized away by LLVM but satisfies the type system.
           auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size);
           auto temp_alloca = new llvm::AllocaInst(val_type, 0, llvm::Twine::createNull(), block);
           new llvm::StoreInst(val, temp_alloca, block);
@@ -633,14 +639,18 @@ llvm::Value *InstructionLifter::LiftRegisterOperand(Instruction &inst,
       }
 
     } else if (val_size > arg_size) {
+      // NOTE(xed2025): Same type conversion issue as above, but for truncation.
+      // XED 2025 may report registers as vectors/arrays that need conversion
+      // to integers before we can truncate them to the smaller argument size.
       if (arg_type->isIntegerTy()) {
         if (val_type->isVectorTy()) {
+          // Vector types can be directly bitcast to integers of the same size.
           auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size);
           val = new llvm::BitCastInst(val, int_type, llvm::Twine::createNull(), block);
 
           val_type = int_type;
         } else if (val_type->isArrayTy()) {
-          // Arrays cannot be bitcast directly. Store to memory, bitcast pointer, then load.
+          // Array types require store-bitcast-load pattern (see comment above).
           auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size);
           auto temp_alloca = new llvm::AllocaInst(val_type, 0, llvm::Twine::createNull(), block);
           new llvm::StoreInst(val, temp_alloca, block);