diff --git a/README.md b/README.md index 4454fe557..f97185bb7 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ Remill uses the following dependencies: | [Google Test](https://github.com/google/googletest) | v1.17.0 | | [LLVM](http://llvm.org/) | 15+ | | [Clang](http://clang.llvm.org/) | 15+ | -| [Intel XED](https://github.com/intelxed/xed) | v2022.04.17 | +| [Intel XED](https://github.com/intelxed/xed) | v2025.06.08 | | [Python](https://www.python.org/) | 3+ | ## Getting and Building the Code diff --git a/dependencies/xed.cmake b/dependencies/xed.cmake index cbc1582f3..69877bcb0 100644 --- a/dependencies/xed.cmake +++ b/dependencies/xed.cmake @@ -52,7 +52,7 @@ ExternalProject_Add(mbuild GIT_REPOSITORY "https://github.com/intelxed/mbuild" GIT_TAG - "v2022.04.17" + "v2024.11.04" GIT_PROGRESS ON CONFIGURE_COMMAND @@ -69,7 +69,7 @@ ExternalProject_Add(xed GIT_REPOSITORY "https://github.com/intelxed/xed" GIT_TAG - "v2022.04.17" + "v2025.06.08" GIT_PROGRESS ON CMAKE_CACHE_ARGS diff --git a/lib/Arch/X86/Arch.cpp b/lib/Arch/X86/Arch.cpp index 1fd767272..7e636c013 100644 --- a/lib/Arch/X86/Arch.cpp +++ b/lib/Arch/X86/Arch.cpp @@ -359,6 +359,19 @@ static bool DecodeXED(xed_decoded_inst_t *xedd, const xed_state_t *mode, auto bytes = reinterpret_cast(inst_bytes.data()); xed_decoded_inst_zero_set_mode(xedd, mode); xed_decoded_inst_set_input_chip(xedd, XED_CHIP_INVALID); + + // Enable LZCNT/TZCNT instructions (required for XED v2025+) + // in reference the made in this + // commit: https://github.com/intelxed/xed/commit/1bdc793f5f64cf207f6776f4c0e442e39fa47903 + // - Backward compatibility for decoder initialization of several ISA features has + // been deprecated. Previously default-on features like `P4` (PAUSE), `LZCNT` + // (replacing BSR), and `TZCNT` (replacing BSF) are now disabled by default unless + // explicitly enabled by users through the raw XED setter APIs or the + // chip/chip-features APIs. + xed3_operand_set_lzcnt(xedd, 1); + xed3_operand_set_tzcnt(xedd, 1); + xed3_operand_set_p4(xedd, 1); // Enable PAUSE as well + auto err = xed_decode(xedd, bytes, static_cast(num_bytes)); if (XED_ERROR_NONE != err) { @@ -835,7 +848,8 @@ static bool IsAVX512(xed_isa_set_enum_t isa_set, xed_category_enum_t category) { case XED_ISA_SET_AVX512BW_128N: case XED_ISA_SET_AVX512BW_256: case XED_ISA_SET_AVX512BW_512: - case XED_ISA_SET_AVX512BW_KOP: + case XED_ISA_SET_AVX512BW_KOPD: + case XED_ISA_SET_AVX512BW_KOPQ: case XED_ISA_SET_AVX512CD_128: case XED_ISA_SET_AVX512CD_256: case XED_ISA_SET_AVX512CD_512: @@ -843,7 +857,8 @@ static bool IsAVX512(xed_isa_set_enum_t isa_set, xed_category_enum_t category) { case XED_ISA_SET_AVX512DQ_128N: case XED_ISA_SET_AVX512DQ_256: case XED_ISA_SET_AVX512DQ_512: - case XED_ISA_SET_AVX512DQ_KOP: + case XED_ISA_SET_AVX512DQ_KOPB: + case XED_ISA_SET_AVX512DQ_KOPW: case XED_ISA_SET_AVX512DQ_SCALAR: case XED_ISA_SET_AVX512ER_512: case XED_ISA_SET_AVX512ER_SCALAR: @@ -851,7 +866,7 @@ static bool IsAVX512(xed_isa_set_enum_t isa_set, xed_category_enum_t category) { case XED_ISA_SET_AVX512F_128N: case XED_ISA_SET_AVX512F_256: case XED_ISA_SET_AVX512F_512: - case XED_ISA_SET_AVX512F_KOP: + case XED_ISA_SET_AVX512F_KOPW: case XED_ISA_SET_AVX512F_SCALAR: case XED_ISA_SET_AVX512PF_512: case XED_ISA_SET_AVX512_4FMAPS_512: diff --git a/lib/Arch/X86/Semantics/BINARY.cpp b/lib/Arch/X86/Semantics/BINARY.cpp index f27a3fa02..f29c84578 100644 --- a/lib/Arch/X86/Semantics/BINARY.cpp +++ b/lib/Arch/X86/Semantics/BINARY.cpp @@ -400,10 +400,10 @@ DEF_ISEL(MUL_GPRv_16) = MULax; DEF_ISEL(MUL_GPRv_32) = MULeax; IF_64BIT(DEF_ISEL(MUL_GPRv_64) = MULrax;) -DEF_ISEL(MULX_VGPR32d_VGPR32d_VGPR32d) = MULX; -DEF_ISEL(MULX_VGPR32d_VGPR32d_MEMd) = MULX; -IF_64BIT(DEF_ISEL(MULX_VGPR64q_VGPR64q_VGPR64q) = MULX;) -IF_64BIT(DEF_ISEL(MULX_VGPR64q_VGPR64q_MEMq) = MULX;) +DEF_ISEL(MULX_GPR32d_GPR32d_GPR32d) = MULX; +DEF_ISEL(MULX_GPR32d_GPR32d_MEMd) = MULX; +IF_64BIT(DEF_ISEL(MULX_GPR64q_GPR64q_GPR64q) = MULX;) +IF_64BIT(DEF_ISEL(MULX_GPR64q_GPR64q_MEMq) = MULX;) DEF_ISEL(MULPS_XMMps_MEMps) = MULPS; DEF_ISEL(MULPS_XMMps_XMMps) = MULPS; diff --git a/lib/Arch/X86/Semantics/ROTATE.cpp b/lib/Arch/X86/Semantics/ROTATE.cpp index 85714e650..bfcee5d53 100644 --- a/lib/Arch/X86/Semantics/ROTATE.cpp +++ b/lib/Arch/X86/Semantics/ROTATE.cpp @@ -118,10 +118,10 @@ DEF_ISEL(ROR_GPR8_CL) = ROR; DEF_ISEL_MnW_Mn_Rn(ROR_MEMv_CL, ROR); DEF_ISEL_RnW_Rn_Rn(ROR_GPRv_CL, ROR); -DEF_ISEL(RORX_VGPR32d_VGPR32d_IMMb) = RORX; -DEF_ISEL(RORX_VGPR32d_MEMd_IMMb) = RORX; -DEF_ISEL(RORX_VGPR64q_VGPR64q_IMMb) = RORX; -DEF_ISEL(RORX_VGPR64q_MEMq_IMMb) = RORX; +DEF_ISEL(RORX_GPR32d_GPR32d_IMMb) = RORX; +DEF_ISEL(RORX_GPR32d_MEMd_IMMb) = RORX; +DEF_ISEL(RORX_GPR64q_GPR64q_IMMb) = RORX; +DEF_ISEL(RORX_GPR64q_MEMq_IMMb) = RORX; namespace { diff --git a/lib/Arch/X86/Semantics/SSE.cpp b/lib/Arch/X86/Semantics/SSE.cpp index dfbaa657a..2113476f3 100644 --- a/lib/Arch/X86/Semantics/SSE.cpp +++ b/lib/Arch/X86/Semantics/SSE.cpp @@ -1638,7 +1638,7 @@ IF_AVX(DEF_ISEL(VMOVDDUP_XMMdq_XMMq) = MOVDDUP;) namespace { template -DEF_SEM(SQRTSS, D dst, S1 src1) { +DEF_SEM(SQRTSS, D dst, D _nop_read, S1 src1) { // Extract a "single-precision" (32-bit) float from [31:0] of src1 vector: auto src_float = FExtractV32(FReadV32(src1), 0); @@ -1654,7 +1654,7 @@ DEF_SEM(SQRTSS, D dst, S1 src1) { } template -DEF_SEM(RSQRTSS, D dst, S1 src1) { +DEF_SEM(RSQRTSS, D dst, D _nop_read, S1 src1) { // Extract a "single-precision" (32-bit) float from [31:0] of src1 vector: auto src_float = FExtractV32(FReadV32(src1), 0); @@ -1757,7 +1757,7 @@ DEF_HELPER(SquareRoot64, float64_t src_float)->float64_t { } template -DEF_SEM(SQRTSD, D dst, S1 src1) { +DEF_SEM(SQRTSD, D dst, D _nop_read, S1 src1) { // Extract a "double-precision" (64-bit) float from [63:0] of src1 vector: auto src_float = FExtractV64(FReadV64(src1), 0); diff --git a/lib/Arch/X86/Semantics/X87.cpp b/lib/Arch/X86/Semantics/X87.cpp index 8fe9304f3..84af252c2 100644 --- a/lib/Arch/X86/Semantics/X87.cpp +++ b/lib/Arch/X86/Semantics/X87.cpp @@ -558,6 +558,20 @@ DEF_FPU_SEM(FADDmem, RF80W dst, RF80W src1, T src2) { return FADD(memory, state, dst, src1, src2, pc, fop); } +template +DEF_FPU_SEM(FADDmem_ST0_implicit, T src) { + SetFPUIpOp(); + SetFPUDp(src); + auto st0 = Read(X87_ST0); + auto result = CheckedFloatBinOp(state, FAdd80, st0, Float80(Read(src))); + Write(X87_ST0, result); + + state.sw.c0 = UUndefined8(); + state.sw.c2 = UUndefined8(); + state.sw.c3 = UUndefined8(); + return memory; +} + template DEF_FPU_SEM(FADDP, RF80W dst, RF80W src1, T src2) { memory = FADD(memory, state, dst, src1, src2, pc, fop); @@ -584,6 +598,9 @@ DEF_ISEL(FADDP_X87_ST0) = FADDP; DEF_ISEL(FIADD_ST0_MEMmem32int) = FIADD; DEF_ISEL(FIADD_ST0_MEMmem16int) = FIADD; +DEF_ISEL(FADD_MEMmem32real) = FADDmem_ST0_implicit; +DEF_ISEL(FADD_MEMm64real) = FADDmem_ST0_implicit; + namespace { template diff --git a/lib/BC/InstructionLifter.cpp b/lib/BC/InstructionLifter.cpp index b0b9f5549..551c0bd80 100644 --- a/lib/BC/InstructionLifter.cpp +++ b/lib/BC/InstructionLifter.cpp @@ -593,7 +593,31 @@ llvm::Value *InstructionLifter::LiftRegisterOperand(Instruction &inst, auto arg_size = data_layout.getTypeAllocSizeInBits(arg_type); if (val_size < arg_size) { + // NOTE(xed2025): XED 2025 reports XMM/YMM/ZMM registers as LLVM vector types + // (e.g., <4 x float>) instead of integers. When remill needs to zero-extend + // these values to a larger integer type, we must first bitcast the vector + // to an integer of the same bit width, then perform the extension. if (arg_type->isIntegerTy()) { + if (val_type->isVectorTy()) { + // Vector types can be directly bitcast to integers of the same size. + auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size); + val = new llvm::BitCastInst(val, int_type, llvm::Twine::createNull(), block); + + val_type = int_type; + } else if (val_type->isArrayTy()) { + // NOTE(xed2025): Some register types in remill's State structure are + // represented as arrays (e.g., X87 FPU stack entries as [10 x i8]). + // LLVM does not allow direct bitcast of array types to integers. + // Workaround: store array to stack, bitcast the pointer to int*, then load. + // This gets optimized away by LLVM but satisfies the type system. + auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size); + auto temp_alloca = new llvm::AllocaInst(val_type, 0, llvm::Twine::createNull(), block); + new llvm::StoreInst(val, temp_alloca, block); + auto int_ptr = new llvm::BitCastInst(temp_alloca, llvm::PointerType::get(int_type, 0), + llvm::Twine::createNull(), block); + val = new llvm::LoadInst(int_type, int_ptr, llvm::Twine::createNull(), block); + val_type = int_type; + } CHECK(val_type->isIntegerTy()) << "Expected " << arch_reg.name << " to be an integral type (" << "val_type: " << LLVMThingToString(val_type) << ", " @@ -615,7 +639,27 @@ llvm::Value *InstructionLifter::LiftRegisterOperand(Instruction &inst, } } else if (val_size > arg_size) { + // NOTE(xed2025): Same type conversion issue as above, but for truncation. + // XED 2025 may report registers as vectors/arrays that need conversion + // to integers before we can truncate them to the smaller argument size. if (arg_type->isIntegerTy()) { + if (val_type->isVectorTy()) { + // Vector types can be directly bitcast to integers of the same size. + auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size); + val = new llvm::BitCastInst(val, int_type, llvm::Twine::createNull(), block); + + val_type = int_type; + } else if (val_type->isArrayTy()) { + // Array types require store-bitcast-load pattern (see comment above). + auto int_type = llvm::Type::getIntNTy(module->getContext(), val_size); + auto temp_alloca = new llvm::AllocaInst(val_type, 0, llvm::Twine::createNull(), block); + new llvm::StoreInst(val, temp_alloca, block); + auto int_ptr = new llvm::BitCastInst(temp_alloca, llvm::PointerType::get(int_type, 0), + llvm::Twine::createNull(), block); + val = new llvm::LoadInst(int_type, int_ptr, llvm::Twine::createNull(), block); + val_type = int_type; + } + CHECK(val_type->isIntegerTy()) << "Expected " << arch_reg.name << " to be an integral type (" << "val_type: " << LLVMThingToString(val_type) << ", " diff --git a/tests/X86/BINARY/MULX.S b/tests/X86/BINARY/MULX.S index 5aff71be4..66640c0e7 100644 --- a/tests/X86/BINARY/MULX.S +++ b/tests/X86/BINARY/MULX.S @@ -23,7 +23,7 @@ TEST_INPUTS( 0xFFFFFFFF, 0xFFFF, 0xFFFFFFFF, 0xFFFFFFFF) - mov ecx, ARG1_32 + mov edx, ARG1_32 mov ebx, ARG2_32 mulx edx, eax, ebx @@ -40,7 +40,7 @@ TEST_INPUTS( 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF) - mov rcx, ARG1_64 + mov rdx, ARG1_64 mov rbx, ARG2_64 mulx rdx, rax, rbx