From fbd8b65936dd54e9ae16910d456b3fad306e6f8f Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert@gmail.com>
Date: Sun, 23 Nov 2025 18:03:52 -0500
Subject: [PATCH 1/5] Updated XED

---
 README.md              | 2 +-
 dependencies/xed.cmake | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 4454fe557..f97185bb7 100644
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ Remill uses the following dependencies:
 | [Google Test](https://github.com/google/googletest) | v1.17.0 |
 | [LLVM](http://llvm.org/) | 15+ |
 | [Clang](http://clang.llvm.org/) | 15+ |
-| [Intel XED](https://github.com/intelxed/xed) | v2022.04.17 |
+| [Intel XED](https://github.com/intelxed/xed) | v2025.06.08 |
 | [Python](https://www.python.org/) | 3+ |
 
 ## Getting and Building the Code
diff --git a/dependencies/xed.cmake b/dependencies/xed.cmake
index cbc1582f3..69877bcb0 100644
--- a/dependencies/xed.cmake
+++ b/dependencies/xed.cmake
@@ -52,7 +52,7 @@ ExternalProject_Add(mbuild
     GIT_REPOSITORY
         "https://github.com/intelxed/mbuild"
     GIT_TAG
-        "v2022.04.17"
+        "v2024.11.04"
     GIT_PROGRESS
         ON
     CONFIGURE_COMMAND
@@ -69,7 +69,7 @@ ExternalProject_Add(xed
     GIT_REPOSITORY
         "https://github.com/intelxed/xed"
     GIT_TAG
-        "v2022.04.17"
+        "v2025.06.08"
     GIT_PROGRESS
         ON
     CMAKE_CACHE_ARGS

From 4393ce6c6b5ecd11fd89e262d956395c08456699 Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert@gmail.com>
Date: Mon, 24 Nov 2025 00:45:50 -0500
Subject: [PATCH 2/5] Fix for Test error with latest XED

---
 lib/Arch/X86/Arch.cpp        |  8 +++++---
 lib/BC/InstructionLifter.cpp | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/lib/Arch/X86/Arch.cpp b/lib/Arch/X86/Arch.cpp
index 1fd767272..08ba8ae38 100644
--- a/lib/Arch/X86/Arch.cpp
+++ b/lib/Arch/X86/Arch.cpp
@@ -835,7 +835,8 @@ static bool IsAVX512(xed_isa_set_enum_t isa_set, xed_category_enum_t category) {
     case XED_ISA_SET_AVX512BW_128N:
     case XED_ISA_SET_AVX512BW_256:
     case XED_ISA_SET_AVX512BW_512:
-    case XED_ISA_SET_AVX512BW_KOP:
+    case XED_ISA_SET_AVX512BW_KOPD:
+    case XED_ISA_SET_AVX512BW_KOPQ:
     case XED_ISA_SET_AVX512CD_128:
     case XED_ISA_SET_AVX512CD_256:
     case XED_ISA_SET_AVX512CD_512:
@@ -843,7 +844,8 @@ static bool IsAVX512(xed_isa_set_enum_t isa_set, xed_category_enum_t category) {
     case XED_ISA_SET_AVX512DQ_128N:
     case XED_ISA_SET_AVX512DQ_256:
     case XED_ISA_SET_AVX512DQ_512:
-    case XED_ISA_SET_AVX512DQ_KOP:
+    case XED_ISA_SET_AVX512DQ_KOPB:
+    case XED_ISA_SET_AVX512DQ_KOPW:
     case XED_ISA_SET_AVX512DQ_SCALAR:
     case XED_ISA_SET_AVX512ER_512:
     case XED_ISA_SET_AVX512ER_SCALAR:
@@ -851,7 +853,7 @@ static bool IsAVX512(xed_isa_set_enum_t isa_set, xed_category_enum_t category) {
     case XED_ISA_SET_AVX512F_128N:
     case XED_ISA_SET_AVX512F_256:
     case XED_ISA_SET_AVX512F_512:
-    case XED_ISA_SET_AVX512F_KOP:
+    case XED_ISA_SET_AVX512F_KOPW:
     case XED_ISA_SET_AVX512F_SCALAR:
     case XED_ISA_SET_AVX512PF_512:
     case XED_ISA_SET_AVX512_4FMAPS_512:
diff --git a/lib/BC/InstructionLifter.cpp b/lib/BC/InstructionLifter.cpp
index b0b9f5549..761b5609d 100644
--- a/lib/BC/InstructionLifter.cpp
+++ b/lib/BC/InstructionLifter.cpp
@@ -593,7 +593,25 @@ llvm::Value *InstructionLifter::LiftRegisterOperand(Instruction &inst,
     auto arg_size = data_layout.getTypeAllocSizeInBits(arg_type);
 
     if (val_size < arg_size) {
+      // Because of using the latest version of Intex XED we support (which is currently v2025.06.08),
+      // it reports XMM/YMM registers as vectors instead of integers. When remills tries to extend/truncate
+      // these values we'll bitcast those vectors into integers 
       if (arg_type->isIntegerTy()) {
+        if (val_type->isVectorTy()) {
+          auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size);
+          val = new llvm::BitCastInst(val, int_type, llvm::Twine::createNull(), block);
+
+          val_type = int_type;
+        } else if (val_type->isArrayTy()) {
+          // Arrays cannot be bitcast directly. Store to memory, bitcast pointer, then load.
+          auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size);
+          auto temp_alloca = new llvm::AllocaInst(val_type, 0, llvm::Twine::createNull(), block);
+          new llvm::StoreInst(val, temp_alloca, block);
+          auto int_ptr = new llvm::BitCastInst(temp_alloca, llvm::PointerType::get(int_type, 0),
+                                                llvm::Twine::createNull(), block);
+          val = new llvm::LoadInst(int_type, int_ptr, llvm::Twine::createNull(), block);
+          val_type = int_type;
+        }
         CHECK(val_type->isIntegerTy())
             << "Expected " << arch_reg.name << " to be an integral type ("
             << "val_type: " << LLVMThingToString(val_type) << ", "
@@ -616,6 +634,22 @@ llvm::Value *InstructionLifter::LiftRegisterOperand(Instruction &inst,
 
     } else if (val_size > arg_size) {
       if (arg_type->isIntegerTy()) {
+        if (val_type->isVectorTy()) {
+          auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size);
+          val = new llvm::BitCastInst(val, int_type, llvm::Twine::createNull(), block);
+
+          val_type = int_type;
+        } else if (val_type->isArrayTy()) {
+          // Arrays cannot be bitcast directly. Store to memory, bitcast pointer, then load.
+          auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size);
+          auto temp_alloca = new llvm::AllocaInst(val_type, 0, llvm::Twine::createNull(), block);
+          new llvm::StoreInst(val, temp_alloca, block);
+          auto int_ptr = new llvm::BitCastInst(temp_alloca, llvm::PointerType::get(int_type, 0),
+                                                llvm::Twine::createNull(), block);
+          val = new llvm::LoadInst(int_type, int_ptr, llvm::Twine::createNull(), block);
+          val_type = int_type;
+        }
+
         CHECK(val_type->isIntegerTy())
             << "Expected " << arch_reg.name << " to be an integral type ("
             << "val_type: " << LLVMThingToString(val_type) << ", "

From 730db70ea369df1a51a5a7ec0de92495db458666 Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert@gmail.com>
Date: Mon, 24 Nov 2025 21:34:01 +0000
Subject: [PATCH 3/5] Resolved LZCNT & TZCNT test failed with xed 2025

---
 lib/Arch/X86/Arch.cpp | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/lib/Arch/X86/Arch.cpp b/lib/Arch/X86/Arch.cpp
index 08ba8ae38..7e636c013 100644
--- a/lib/Arch/X86/Arch.cpp
+++ b/lib/Arch/X86/Arch.cpp
@@ -359,6 +359,19 @@ static bool DecodeXED(xed_decoded_inst_t *xedd, const xed_state_t *mode,
   auto bytes = reinterpret_cast<const uint8_t *>(inst_bytes.data());
   xed_decoded_inst_zero_set_mode(xedd, mode);
   xed_decoded_inst_set_input_chip(xedd, XED_CHIP_INVALID);
+
+  // Enable LZCNT/TZCNT instructions (required for XED v2025+)
+  // in reference the made in this 
+  // commit: https://github.com/intelxed/xed/commit/1bdc793f5f64cf207f6776f4c0e442e39fa47903
+  // - Backward compatibility for decoder initialization of several ISA features has
+  // been deprecated. Previously default-on features like `P4` (PAUSE), `LZCNT`
+  // (replacing BSR), and `TZCNT` (replacing BSF) are now disabled by default unless
+  // explicitly enabled by users through the raw XED setter APIs or the
+  // chip/chip-features APIs.
+   xed3_operand_set_lzcnt(xedd, 1);
+   xed3_operand_set_tzcnt(xedd, 1);
+   xed3_operand_set_p4(xedd, 1);  // Enable PAUSE as well
+
   auto err = xed_decode(xedd, bytes, static_cast<uint32_t>(num_bytes));
 
   if (XED_ERROR_NONE != err) {

From 2d24f1fb9baf83279a72d1266bfa37fbab13a88a Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert@gmail.com>
Date: Tue, 25 Nov 2025 01:27:47 +0000
Subject: [PATCH 4/5] Resolved issues with MULX

---
 lib/Arch/X86/Semantics/BINARY.cpp | 8 ++++----
 lib/Arch/X86/Semantics/ROTATE.cpp | 8 ++++----
 tests/X86/BINARY/MULX.S           | 4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib/Arch/X86/Semantics/BINARY.cpp b/lib/Arch/X86/Semantics/BINARY.cpp
index f27a3fa02..f29c84578 100644
--- a/lib/Arch/X86/Semantics/BINARY.cpp
+++ b/lib/Arch/X86/Semantics/BINARY.cpp
@@ -400,10 +400,10 @@ DEF_ISEL(MUL_GPRv_16) = MULax<R16>;
 DEF_ISEL(MUL_GPRv_32) = MULeax<R32>;
 IF_64BIT(DEF_ISEL(MUL_GPRv_64) = MULrax<R64>;)
 
-DEF_ISEL(MULX_VGPR32d_VGPR32d_VGPR32d) = MULX<R32W, R32>;
-DEF_ISEL(MULX_VGPR32d_VGPR32d_MEMd) = MULX<R32W, M32>;
-IF_64BIT(DEF_ISEL(MULX_VGPR64q_VGPR64q_VGPR64q) = MULX<R64W, R64>;)
-IF_64BIT(DEF_ISEL(MULX_VGPR64q_VGPR64q_MEMq) = MULX<R64W, M64>;)
+DEF_ISEL(MULX_GPR32d_GPR32d_GPR32d) = MULX<R32W, R32>;
+DEF_ISEL(MULX_GPR32d_GPR32d_MEMd) = MULX<R32W, M32>;
+IF_64BIT(DEF_ISEL(MULX_GPR64q_GPR64q_GPR64q) = MULX<R64W, R64>;)
+IF_64BIT(DEF_ISEL(MULX_GPR64q_GPR64q_MEMq) = MULX<R64W, M64>;)
 
 DEF_ISEL(MULPS_XMMps_MEMps) = MULPS<V128W, V128, MV128>;
 DEF_ISEL(MULPS_XMMps_XMMps) = MULPS<V128W, V128, V128>;
diff --git a/lib/Arch/X86/Semantics/ROTATE.cpp b/lib/Arch/X86/Semantics/ROTATE.cpp
index 85714e650..bfcee5d53 100644
--- a/lib/Arch/X86/Semantics/ROTATE.cpp
+++ b/lib/Arch/X86/Semantics/ROTATE.cpp
@@ -118,10 +118,10 @@ DEF_ISEL(ROR_GPR8_CL) = ROR<R8W, R8, R8>;
 DEF_ISEL_MnW_Mn_Rn(ROR_MEMv_CL, ROR);
 DEF_ISEL_RnW_Rn_Rn(ROR_GPRv_CL, ROR);
 
-DEF_ISEL(RORX_VGPR32d_VGPR32d_IMMb) = RORX<R32W, R32, I8>;
-DEF_ISEL(RORX_VGPR32d_MEMd_IMMb) = RORX<R32W, M32, I8>;
-DEF_ISEL(RORX_VGPR64q_VGPR64q_IMMb) = RORX<R64W, R64, I8>;
-DEF_ISEL(RORX_VGPR64q_MEMq_IMMb) = RORX<R64W, M64, I8>;
+DEF_ISEL(RORX_GPR32d_GPR32d_IMMb) = RORX<R32W, R32, I8>;
+DEF_ISEL(RORX_GPR32d_MEMd_IMMb) = RORX<R32W, M32, I8>;
+DEF_ISEL(RORX_GPR64q_GPR64q_IMMb) = RORX<R64W, R64, I8>;
+DEF_ISEL(RORX_GPR64q_MEMq_IMMb) = RORX<R64W, M64, I8>;
 
 namespace {
 
diff --git a/tests/X86/BINARY/MULX.S b/tests/X86/BINARY/MULX.S
index 5aff71be4..66640c0e7 100644
--- a/tests/X86/BINARY/MULX.S
+++ b/tests/X86/BINARY/MULX.S
@@ -23,7 +23,7 @@ TEST_INPUTS(
     0xFFFFFFFF, 0xFFFF,
     0xFFFFFFFF, 0xFFFFFFFF)
 
-    mov ecx, ARG1_32
+    mov edx, ARG1_32
     mov ebx, ARG2_32
 
     mulx edx, eax, ebx
@@ -40,7 +40,7 @@ TEST_INPUTS(
     0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF,
     0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF)
 
-    mov rcx, ARG1_64
+    mov rdx, ARG1_64
     mov rbx, ARG2_64
 
     mulx rdx, rax, rbx

From 14f1352d68ea20bb83a9bb191c0fb55d5667d2da Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert@gmail.com>
Date: Tue, 25 Nov 2025 20:26:47 +0000
Subject: [PATCH 5/5] Added support for FADD

---
 lib/Arch/X86/Semantics/SSE.cpp |  6 +++---
 lib/Arch/X86/Semantics/X87.cpp | 17 +++++++++++++++++
 lib/BC/InstructionLifter.cpp   | 20 +++++++++++++++-----
 3 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/lib/Arch/X86/Semantics/SSE.cpp b/lib/Arch/X86/Semantics/SSE.cpp
index dfbaa657a..2113476f3 100644
--- a/lib/Arch/X86/Semantics/SSE.cpp
+++ b/lib/Arch/X86/Semantics/SSE.cpp
@@ -1638,7 +1638,7 @@ IF_AVX(DEF_ISEL(VMOVDDUP_XMMdq_XMMq) = MOVDDUP<VV128W, V128>;)
 namespace {
 
 template <typename D, typename S1>
-DEF_SEM(SQRTSS, D dst, S1 src1) {
+DEF_SEM(SQRTSS, D dst, D _nop_read, S1 src1) {
 
   // Extract a "single-precision" (32-bit) float from [31:0] of src1 vector:
   auto src_float = FExtractV32(FReadV32(src1), 0);
@@ -1654,7 +1654,7 @@ DEF_SEM(SQRTSS, D dst, S1 src1) {
 }
 
 template <typename D, typename S1>
-DEF_SEM(RSQRTSS, D dst, S1 src1) {
+DEF_SEM(RSQRTSS, D dst, D _nop_read, S1 src1) {
 
   // Extract a "single-precision" (32-bit) float from [31:0] of src1 vector:
   auto src_float = FExtractV32(FReadV32(src1), 0);
@@ -1757,7 +1757,7 @@ DEF_HELPER(SquareRoot64, float64_t src_float)->float64_t {
 }
 
 template <typename D, typename S1>
-DEF_SEM(SQRTSD, D dst, S1 src1) {
+DEF_SEM(SQRTSD, D dst, D _nop_read, S1 src1) {
 
   // Extract a "double-precision" (64-bit) float from [63:0] of src1 vector:
   auto src_float = FExtractV64(FReadV64(src1), 0);
diff --git a/lib/Arch/X86/Semantics/X87.cpp b/lib/Arch/X86/Semantics/X87.cpp
index 8fe9304f3..84af252c2 100644
--- a/lib/Arch/X86/Semantics/X87.cpp
+++ b/lib/Arch/X86/Semantics/X87.cpp
@@ -558,6 +558,20 @@ DEF_FPU_SEM(FADDmem, RF80W dst, RF80W src1, T src2) {
   return FADD(memory, state, dst, src1, src2, pc, fop);
 }
 
+template <typename T>
+DEF_FPU_SEM(FADDmem_ST0_implicit, T src) {
+  SetFPUIpOp();
+  SetFPUDp(src);
+  auto st0 = Read(X87_ST0);
+  auto result = CheckedFloatBinOp(state, FAdd80, st0, Float80(Read(src)));
+  Write(X87_ST0, result);
+
+  state.sw.c0 = UUndefined8();
+  state.sw.c2 = UUndefined8();
+  state.sw.c3 = UUndefined8();
+  return memory;
+}
+
 template <typename T>
 DEF_FPU_SEM(FADDP, RF80W dst, RF80W src1, T src2) {
   memory = FADD<T>(memory, state, dst, src1, src2, pc, fop);
@@ -584,6 +598,9 @@ DEF_ISEL(FADDP_X87_ST0) = FADDP<RF80W>;
 DEF_ISEL(FIADD_ST0_MEMmem32int) = FIADD<M32>;
 DEF_ISEL(FIADD_ST0_MEMmem16int) = FIADD<M16>;
 
+DEF_ISEL(FADD_MEMmem32real) = FADDmem_ST0_implicit<MF32>;
+DEF_ISEL(FADD_MEMm64real) = FADDmem_ST0_implicit<MF64>;
+
 namespace {
 
 template <typename T>
diff --git a/lib/BC/InstructionLifter.cpp b/lib/BC/InstructionLifter.cpp
index 761b5609d..551c0bd80 100644
--- a/lib/BC/InstructionLifter.cpp
+++ b/lib/BC/InstructionLifter.cpp
@@ -593,17 +593,23 @@ llvm::Value *InstructionLifter::LiftRegisterOperand(Instruction &inst,
     auto arg_size = data_layout.getTypeAllocSizeInBits(arg_type);
 
     if (val_size < arg_size) {
-      // Because of using the latest version of Intex XED we support (which is currently v2025.06.08),
-      // it reports XMM/YMM registers as vectors instead of integers. When remills tries to extend/truncate
-      // these values we'll bitcast those vectors into integers 
+      // NOTE(xed2025): XED 2025 reports XMM/YMM/ZMM registers as LLVM vector types
+      // (e.g., <4 x float>) instead of integers. When remill needs to zero-extend
+      // these values to a larger integer type, we must first bitcast the vector
+      // to an integer of the same bit width, then perform the extension.
       if (arg_type->isIntegerTy()) {
         if (val_type->isVectorTy()) {
+          // Vector types can be directly bitcast to integers of the same size.
           auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size);
           val = new llvm::BitCastInst(val, int_type, llvm::Twine::createNull(), block);
 
           val_type = int_type;
         } else if (val_type->isArrayTy()) {
-          // Arrays cannot be bitcast directly. Store to memory, bitcast pointer, then load.
+          // NOTE(xed2025): Some register types in remill's State structure are
+          // represented as arrays (e.g., X87 FPU stack entries as [10 x i8]).
+          // LLVM does not allow direct bitcast of array types to integers.
+          // Workaround: store array to stack, bitcast the pointer to int*, then load.
+          // This gets optimized away by LLVM but satisfies the type system.
           auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size);
           auto temp_alloca = new llvm::AllocaInst(val_type, 0, llvm::Twine::createNull(), block);
           new llvm::StoreInst(val, temp_alloca, block);
@@ -633,14 +639,18 @@ llvm::Value *InstructionLifter::LiftRegisterOperand(Instruction &inst,
       }
 
     } else if (val_size > arg_size) {
+      // NOTE(xed2025): Same type conversion issue as above, but for truncation.
+      // XED 2025 may report registers as vectors/arrays that need conversion
+      // to integers before we can truncate them to the smaller argument size.
       if (arg_type->isIntegerTy()) {
         if (val_type->isVectorTy()) {
+          // Vector types can be directly bitcast to integers of the same size.
           auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size);
           val = new llvm::BitCastInst(val, int_type, llvm::Twine::createNull(), block);
 
           val_type = int_type;
         } else if (val_type->isArrayTy()) {
-          // Arrays cannot be bitcast directly. Store to memory, bitcast pointer, then load.
+          // Array types require store-bitcast-load pattern (see comment above).
           auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size);
           auto temp_alloca = new llvm::AllocaInst(val_type, 0, llvm::Twine::createNull(), block);
           new llvm::StoreInst(val, temp_alloca, block);