From 91ec25a74e71a002199b105bbe34b37ddf14e797 Mon Sep 17 00:00:00 2001 From: kyle-elliott-tob Date: Mon, 24 Nov 2025 17:23:53 -0500 Subject: [PATCH 1/7] Initial BMI1 support - Implement ANDN, BEXTR, BLSI, BLSMSK, and BLSR with tests - Move TZCNT to be with the rest of the BMI instructions - Remove erroneous TZCNT r8 rm8 instruction --- lib/Arch/X86/Runtime/CMakeLists.txt | 1 + lib/Arch/X86/Semantics/BITBYTE.cpp | 14 -- lib/Arch/X86/Semantics/BMI.cpp | 143 +++++++++++++++++++ tests/X86/BMI/ANDN.S | 122 ++++++++++++++++ tests/X86/BMI/BEXTR.S | 165 ++++++++++++++++++++++ tests/X86/BMI/BLSI.S | 183 ++++++++++++++++++++++++ tests/X86/BMI/BLSMSK.S | 209 +++++++++++++++++++++++++++ tests/X86/BMI/BLSR.S | 212 ++++++++++++++++++++++++++++ tests/X86/{BITBYTE => BMI}/TZCNT.S | 0 tests/X86/Tests.S | 8 +- 10 files changed, 1042 insertions(+), 15 deletions(-) create mode 100644 lib/Arch/X86/Semantics/BMI.cpp create mode 100644 tests/X86/BMI/ANDN.S create mode 100644 tests/X86/BMI/BEXTR.S create mode 100644 tests/X86/BMI/BLSI.S create mode 100644 tests/X86/BMI/BLSMSK.S create mode 100644 tests/X86/BMI/BLSR.S rename tests/X86/{BITBYTE => BMI}/TZCNT.S (100%) diff --git a/lib/Arch/X86/Runtime/CMakeLists.txt b/lib/Arch/X86/Runtime/CMakeLists.txt index 15fd4cdca..a84356e60 100644 --- a/lib/Arch/X86/Runtime/CMakeLists.txt +++ b/lib/Arch/X86/Runtime/CMakeLists.txt @@ -88,6 +88,7 @@ function(add_runtime_helper target_name address_bit_size enable_avx enable_avx51 "${REMILL_LIB_DIR}/Arch/X86/Semantics/X87.cpp" "${REMILL_LIB_DIR}/Arch/X86/Semantics/COND_BR.cpp" "${REMILL_LIB_DIR}/Arch/X86/Semantics/INTERRUPT.cpp" + "${REMILL_LIB_DIR}/Arch/X86/Semantics/BMI.cpp" ) endfunction() diff --git a/lib/Arch/X86/Semantics/BITBYTE.cpp b/lib/Arch/X86/Semantics/BITBYTE.cpp index ffea5d274..9df549888 100644 --- a/lib/Arch/X86/Semantics/BITBYTE.cpp +++ b/lib/Arch/X86/Semantics/BITBYTE.cpp @@ -301,17 +301,6 @@ DEF_SEM(BSWAP_64, R64W dst, R64 src) { } #endif // 64 == ADDRESS_SIZE_BITS -template -DEF_SEM(TZCNT, D dst, S src) { - auto val = Read(src); - auto count = CountTrailingZeros(val); - ClearArithFlags(); - Write(FLAG_ZF, UCmpEq(UAnd(val, 1), 1)); - Write(FLAG_CF, ZeroFlag(val)); - WriteZExt(dst, Select(FLAG_CF, BitSizeOf(src), count)); - return memory; -} - template DEF_SEM(LZCNT, D dst, S src) { auto val = Read(src); @@ -329,9 +318,6 @@ DEF_ISEL(BSWAP_GPRv_16) = BSWAP_16; DEF_ISEL(BSWAP_GPRv_32) = BSWAP_32; IF_64BIT(DEF_ISEL(BSWAP_GPRv_64) = BSWAP_64;) -DEF_ISEL_RnW_Mn(TZCNT_GPRv_MEMv, TZCNT); -DEF_ISEL_RnW_Rn(TZCNT_GPRv_GPRv, TZCNT); - DEF_ISEL_RnW_Mn(LZCNT_GPRv_MEMv, LZCNT); DEF_ISEL_RnW_Rn(LZCNT_GPRv_GPRv, LZCNT); diff --git a/lib/Arch/X86/Semantics/BMI.cpp b/lib/Arch/X86/Semantics/BMI.cpp new file mode 100644 index 000000000..7af5e3b03 --- /dev/null +++ b/lib/Arch/X86/Semantics/BMI.cpp @@ -0,0 +1,143 @@ +/* +* Copyright (c) 2025 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace { + +template +ALWAYS_INLINE void SetFlagsBMI(State &state, T lhs, T rhs, T res) { + state.aflag.cf = false; + state.aflag.pf = __remill_undefined_8(); + state.aflag.zf = ZeroFlag(res, lhs, rhs); + state.aflag.sf = SignFlag(res, lhs, rhs); + state.aflag.of = false; + state.aflag.af = __remill_undefined_8(); +} + +template +DEF_SEM(ANDN, D dst, S1 src1, S2 src2) { + auto lhs = Read(src1); + auto rhs = Read(src2); + auto res = UAnd(UNot(lhs), rhs); + WriteZExt(dst, res); + SetFlagsBMI(state, lhs, rhs, res); + return memory; +} + +template +DEF_SEM(BEXTR, D dst, S1 src1, S2 src2) { + auto source = Read(src1); + auto control = Read(src2); + + // Extract start position from bits [7:0] + auto start = ZExtTo(UAnd(control, Literal(0xFF))); + // Extract length from bits [15:8] + auto length = ZExtTo(UAnd(UShr(control, Literal(8)), Literal(0xFF))); + + // Constrain start and length to operand size to avoid undefined behavior + start = URem(start, BitSizeOf(src1)); + length = URem(length, BitSizeOf(src1)); + + // Extract bits: (source >> start) & ((1 << length) - 1) + auto shifted = UShr(source, start); + auto mask = USub(UShl(Literal(1), length), Literal(1)); + auto result = UAnd(shifted, mask); + + WriteZExt(dst, result); + + // Set flags according to Intel specification + Write(FLAG_ZF, ZeroFlag(result, source, control)); + Write(FLAG_OF, false); + Write(FLAG_CF, false); + Write(FLAG_AF, __remill_undefined_8()); + Write(FLAG_SF, __remill_undefined_8()); + Write(FLAG_PF, __remill_undefined_8()); + + return memory; +} + +template +DEF_SEM(BLSI, D dst, S src) { + auto val = Read(src); + auto res = UAnd(UNeg(val), val); + WriteZExt(dst, res); + SetFlagsBMI(state, val, val, res); + Write(FLAG_CF, ZeroFlag(res, val, val)); + return memory; +} + +template +DEF_SEM(BLSMSK, D dst, S src) { + auto val = Read(src); + auto res = UXor(USub(val, Literal(1)), val); + WriteZExt(dst, res); + SetFlagsBMI(state, val, val, res); + Write(FLAG_CF, UCmpEq(val, 0)); + Write(FLAG_ZF, false); + return memory; +} + +template +DEF_SEM(BLSR, D dst, S src) { + auto val = Read(src); + auto res = UAnd(USub(val, Literal(1)), val); + WriteZExt(dst, res); + SetFlagsBMI(state, val, val, res); + Write(FLAG_CF, UCmpEq(val, 0)); + return memory; +} + +template +DEF_SEM(TZCNT, D dst, S src) { + auto val = Read(src); + auto count = CountTrailingZeros(val); + ClearArithFlags(); + Write(FLAG_ZF, UCmpEq(UAnd(val, Literal(1)), 1)); + Write(FLAG_CF, ZeroFlag(val)); + WriteZExt(dst, Select(FLAG_CF, BitSizeOf(src), count)); + return memory; +} + +DEF_ISEL(ANDN_GPRv_GPRv_GPRv_32) = ANDN; +DEF_ISEL(ANDN_GPRv_GPRv_MEMv_32) = ANDN; +IF_64BIT(DEF_ISEL(ANDN_GPRv_GPRv_GPRv_64) = ANDN;) +IF_64BIT(DEF_ISEL(ANDN_GPRv_GPRv_MEMv_64) = ANDN;) + +DEF_ISEL(BEXTR_VGPR32d_VGPR32d_VGPR32d) = BEXTR; +DEF_ISEL(BEXTR_VGPR32d_MEMd_VGPR32d) = BEXTR; +IF_64BIT(DEF_ISEL(BEXTR_VGPR64q_VGPR64q_VGPR64q) = BEXTR;) +IF_64BIT(DEF_ISEL(BEXTR_VGPR64q_MEMq_VGPR64q) = BEXTR;) + +DEF_ISEL(BLSI_GPRv_GPRv_32) = BLSI; +DEF_ISEL(BLSI_GPRv_MEMv_32) = BLSI; +IF_64BIT(DEF_ISEL(BLSI_GPRv_GPRv_64) = BLSI;) +IF_64BIT(DEF_ISEL(BLSI_GPRv_MEMv_64) = BLSI;) + +DEF_ISEL(BLSMSK_GPRv_GPRv_32) = BLSMSK; +DEF_ISEL(BLSMSK_GPRv_MEMv_32) = BLSMSK; +IF_64BIT(DEF_ISEL(BLSMSK_GPRv_GPRv_64) = BLSMSK;) +IF_64BIT(DEF_ISEL(BLSMSK_GPRv_MEMv_64) = BLSMSK;) + +DEF_ISEL(BLSR_GPRv_GPRv_32) = BLSR; +DEF_ISEL(BLSR_GPRv_MEMv_32) = BLSR; +IF_64BIT(DEF_ISEL(BLSR_GPRv_GPRv_64) = BLSR;) +IF_64BIT(DEF_ISEL(BLSR_GPRv_MEMv_64) = BLSR;) + +DEF_ISEL(TZCNT_GPRv_GPRv_16) = TZCNT; +DEF_ISEL(TZCNT_GPRv_MEMv_16) = TZCNT; +DEF_ISEL(TZCNT_GPRv_GPRv_32) = TZCNT; +DEF_ISEL(TZCNT_GPRv_GPRv_32) = TZCNT; +IF_64BIT(DEF_ISEL(TZCNT_GPRv_GPRv_64) = TZCNT;) +IF_64BIT(DEF_ISEL(TZCNT_GPRv_MEMv_64) = TZCNT;) diff --git a/tests/X86/BMI/ANDN.S b/tests/X86/BMI/ANDN.S new file mode 100644 index 000000000..e507f02c7 --- /dev/null +++ b/tests/X86/BMI/ANDN.S @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2025 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ANDN - Logical AND NOT + * Format: ANDN r32, r32, r/m32 + * Operation: dest = (~src1) & src2 + * Flags: SF, ZF set; CF, OF cleared; AF, PF undefined + */ + +TEST_BEGIN(ANDNr32r32r32_basic, 2) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 0x00000000, 0x00000000, // (~0) & 0 = 0xFFFFFFFF & 0 = 0 + 0x00000000, 0xFFFFFFFF, // (~0) & 0xFFFFFFFF = 0xFFFFFFFF (all bits set) + 0xFFFFFFFF, 0x00000000, // (~0xFFFFFFFF) & 0 = 0 (zero result, ZF=1) + 0xFFFFFFFF, 0xFFFFFFFF, // (~0xFFFFFFFF) & 0xFFFFFFFF = 0 (ZF=1) + 0xAAAAAAAA, 0x55555555, // (~0xAAAAAAAA) & 0x55555555 = 0x55555555 & 0x55555555 + 0x12345678, 0x9ABCDEF0, // Mixed bits + 0x80000000, 0x80000000, // Sign bit: (~0x80000000) & 0x80000000 = 0 + 0x7FFFFFFF, 0xFFFFFFFF) // (~0x7FFFFFFF) & 0xFFFFFFFF = 0x80000000 (SF=1) + + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz andn_nop1 + + // ANDN supported, proceed with test: + mov eax, ARG1_32 + mov edx, ARG2_32 + andn ecx, eax, edx + +andn_nop1: nop + +TEST_END + +TEST_BEGIN_64(ANDNr64r64r64_basic, 2) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0x0000000000000000, // Zero result + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, // Zero result + 0xAAAAAAAAAAAAAAAA, 0x5555555555555555, + 0x123456789ABCDEF0, 0xFEDCBA9876543210, + 0x8000000000000000, 0x8000000000000000, // Sign bit + 0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF) // Result: 0x8000000000000000 (SF=1) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz andn_nop2 + + mov rax, ARG1_64 + mov rdx, ARG2_64 + andn rcx, rax, rdx + +andn_nop2: nop + +TEST_END_64 + +TEST_BEGIN(ANDNr32r32m32, 2) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 0xFFFFFFFF, 0x12345678, + 0x00000000, 0xABCDEF01, + 0x55555555, 0xAAAAAAAA) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz andn_nop3 + + push ARG2_64 + mov eax, ARG1_32 + andn ecx, eax, DWORD PTR [rsp] + add rsp, 8 + +andn_nop3: nop + +TEST_END + +TEST_BEGIN_64(ANDNr64r64m64, 2) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 0xFFFFFFFFFFFFFFFF, 0x123456789ABCDEF0, + 0x0000000000000000, 0xFEDCBA9876543210, + 0xAAAAAAAAAAAAAAAA, 0x5555555555555555) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz andn_nop4 + + push ARG2_64 + mov rax, ARG1_64 + andn rcx, rax, QWORD PTR [rsp] + add rsp, 8 + +andn_nop4: nop + +TEST_END_64 diff --git a/tests/X86/BMI/BEXTR.S b/tests/X86/BMI/BEXTR.S new file mode 100644 index 000000000..f8daab8ce --- /dev/null +++ b/tests/X86/BMI/BEXTR.S @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2025 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* BEXTR - Bit Field Extract + * Format: BEXTR r32, r/m32, r32 + * Control operand format: [15:8] = length, [7:0] = start + */ + +TEST_BEGIN(BEXTRr32r32r32_basic, 2) +TEST_IGNORE_FLAGS(AF SF PF) +TEST_INPUTS( + 0xFFFFFFFF, 0x0800, // Extract 8 bits starting at position 0 + 0x12345678, 0x0808, // Extract 8 bits starting at position 8 + 0xABCDEF01, 0x1000, // Extract 16 bits starting at position 0 + 0xDEADBEEF, 0x0810, // Extract 8 bits starting at position 16 + 0x87654321, 0x2000, // Extract 32 bits starting at position 0 + 0xFFFFFFFF, 0x0000, // Extract 0 bits (length=0) + 0x12345678, 0x0100, // Extract 1 bit at position 0 + 0xAAAAAAAA, 0x0401) // Extract 4 bits starting at position 1 + + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz bextr_nop1 + + // BEXTR supported, proceed with test: + mov eax, ARG1_32 + mov edx, ARG2_32 + bextr ecx, eax, edx + +bextr_nop1: nop + +TEST_END + +TEST_BEGIN(BEXTRr32r32r32_zero, 2) +TEST_IGNORE_FLAGS(AF SF PF) +TEST_INPUTS( + 0x00000000, 0x0800, // Extract from zero source + 0xFFFFFFFF, 0x081F, // Extract 8 bits at position 31 (should be zero) + 0x12345678, 0x0820) // Extract 8 bits at position 32+ (should be zero) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz bextr_nop2 + + mov eax, ARG1_32 + mov edx, ARG2_32 + bextr ecx, eax, edx + +bextr_nop2: nop + +TEST_END + +TEST_BEGIN_64(BEXTRr64r64r64_basic, 2) +TEST_IGNORE_FLAGS(AF SF PF) +TEST_INPUTS( + 0xFFFFFFFFFFFFFFFF, 0x0800, // Extract 8 bits starting at position 0 + 0x123456789ABCDEF0, 0x0808, // Extract 8 bits starting at position 8 + 0xDEADBEEFCAFEBABE, 0x1000, // Extract 16 bits starting at position 0 + 0x0123456789ABCDEF, 0x0820, // Extract 8 bits starting at position 32 + 0xFFFFFFFF00000000, 0x2020, // Extract 32 bits starting at position 32 + 0xAAAAAAAAAAAAAAAA, 0x4000, // Extract 64 bits starting at position 0 + 0x123456789ABCDEF0, 0x0000, // Extract 0 bits (length=0) + 0x8000000000000000, 0x013F) // Extract 1 bit at position 63 + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz bextr_nop3 + + mov rax, ARG1_64 + mov rdx, ARG2_64 + bextr rcx, rax, rdx + +bextr_nop3: nop + +TEST_END_64 + +TEST_BEGIN_64(BEXTRr64r64r64_zero, 2) +TEST_IGNORE_FLAGS(AF SF PF) +TEST_INPUTS( + 0x0000000000000000, 0x0800, // Extract from zero source + 0xFFFFFFFFFFFFFFFF, 0x083F, // Extract 8 bits at position 63 (should be 1 bit) + 0x123456789ABCDEF0, 0x0840) // Extract 8 bits at position 64+ (should be zero) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz bextr_nop4 + + mov rax, ARG1_64 + mov rdx, ARG2_64 + bextr rcx, rax, rdx + +bextr_nop4: nop + +TEST_END_64 + +TEST_BEGIN(BEXTRr32m32r32, 2) +TEST_IGNORE_FLAGS(AF SF PF) +TEST_INPUTS( + 0xDEADBEEF, 0x0800, + 0x12345678, 0x0C10, + 0xFFFFFFFF, 0x1000) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz bextr_nop5 + + push ARG1_64 + mov edx, ARG2_32 + bextr ecx, DWORD PTR [rsp], edx + add rsp, 8 + +bextr_nop5: nop + +TEST_END + +TEST_BEGIN_64(BEXTRr64m64r64, 2) +TEST_IGNORE_FLAGS(AF SF PF) +TEST_INPUTS( + 0xDEADBEEFCAFEBABE, 0x0800, + 0x123456789ABCDEF0, 0x1010, + 0xFFFFFFFFFFFFFFFF, 0x2000) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz bextr_nop6 + + push ARG1_64 + mov rdx, ARG2_64 + bextr rcx, QWORD PTR [rsp], rdx + add rsp, 8 + +bextr_nop6: nop + +TEST_END_64 diff --git a/tests/X86/BMI/BLSI.S b/tests/X86/BMI/BLSI.S new file mode 100644 index 000000000..32e5b0840 --- /dev/null +++ b/tests/X86/BMI/BLSI.S @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2025 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* BLSI - Extract Lowest Set Isolated Bit + * Format: BLSI r32, r/m32 + * Operation: dest = (-src) & src + * Flags: CF set if src!=0; ZF, SF set based on result; OF cleared; AF, PF undefined + */ + +TEST_BEGIN(BLSIr32r32_zero, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS(0) + + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop1 + + // BLSI with zero input: result=0, CF=0, ZF=1 + mov eax, ARG1_32 + blsi edx, eax + +blsi_nop1: nop + +TEST_END + +TEST_BEGIN(BLSIr32r32_basic, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 1, // Result: 1, CF=1 + 2, // Result: 2, CF=1 + 3, // Result: 1 (lowest bit), CF=1 + 0x80000000, // MSB set: result=0x80000000 (SF=1), CF=1 + 0x80000001, // MSB and LSB: result=1, CF=1 + 0xFFFFFFFF, // All bits: result=1 (lowest), CF=1 + 0xFFFFFFFE, // All but LSB: result=2, CF=1 + 0x12345678, // Result: 8 (bit 3), CF=1 + 0xAAAAAAAA, // Alternating bits: result=2, CF=1 + 0x55555555) // Alternating bits: result=1, CF=1 + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop2 + + mov eax, ARG1_32 + blsi edx, eax + +blsi_nop2: nop + +TEST_END + +TEST_BEGIN(BLSIr32r32_powers_of_two, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 0x00000001, // 2^0 + 0x00000002, // 2^1 + 0x00000004, // 2^2 + 0x00000100, // 2^8 + 0x00010000, // 2^16 + 0x40000000, // 2^30 + 0x80000000) // 2^31 (sign bit) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop3 + + mov eax, ARG1_32 + blsi edx, eax + +blsi_nop3: nop + +TEST_END + +TEST_BEGIN_64(BLSIr64r64_zero, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS(0) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop4 + + mov rax, ARG1_64 + blsi rdx, rax + +blsi_nop4: nop + +TEST_END_64 + +TEST_BEGIN_64(BLSIr64r64_basic, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 1, + 2, + 3, + 0x8000000000000000, // MSB: result=0x8000000000000000 (SF=1) + 0x8000000000000001, // MSB and LSB: result=1 + 0xFFFFFFFFFFFFFFFF, // All bits: result=1 + 0xFFFFFFFFFFFFFFFE, // All but LSB: result=2 + 0x123456789ABCDEF0, // Result: 0x10 (bit 4) + 0xAAAAAAAAAAAAAAAA, // Result: 2 + 0x5555555555555555) // Result: 1 + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop5 + + mov rax, ARG1_64 + blsi rdx, rax + +blsi_nop5: nop + +TEST_END_64 + +TEST_BEGIN(BLSIr32m32, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 0, + 0xFFFFFFFF, + 0x12345678) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop6 + + push ARG1_64 + blsi edx, DWORD PTR [rsp] + add rsp, 8 + +blsi_nop6: nop + +TEST_END + +TEST_BEGIN_64(BLSIr64m64, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 0, + 0xFFFFFFFFFFFFFFFF, + 0x123456789ABCDEF0) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop7 + + push ARG1_64 + blsi rdx, QWORD PTR [rsp] + add rsp, 8 + +blsi_nop7: nop + +TEST_END_64 diff --git a/tests/X86/BMI/BLSMSK.S b/tests/X86/BMI/BLSMSK.S new file mode 100644 index 000000000..d619105e5 --- /dev/null +++ b/tests/X86/BMI/BLSMSK.S @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2025 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* BLSMSK - Get Mask Up to Lowest Set Bit + * Format: BLSMSK r32, r/m32 + * Operation: dest = (src - 1) ^ src + * Flags: CF set if src==0; ZF cleared (always 0); SF, set based on result; OF cleared; AF, PF undefined + */ + +TEST_BEGIN(BLSMSKr32r32_zero, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS(0) + + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop1 + + // BLSMSK with zero: result=0xFFFFFFFF, CF=1, ZF=0 + mov eax, ARG1_32 + blsmsk edx, eax + +blsmsk_nop1: nop + +TEST_END + +TEST_BEGIN(BLSMSKr32r32_basic, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 1, // (0) ^ 1 = 1, CF=0 + 2, // (1) ^ 2 = 3, CF=0 + 3, // (2) ^ 3 = 1, CF=0 + 4, // (3) ^ 4 = 7, CF=0 + 0x80000000, // Result: 0x7FFFFFFF ^ 0x80000000 = 0xFFFFFFFF (SF=1), CF=0 + 0xFFFFFFFF, // (0xFFFFFFFE) ^ 0xFFFFFFFF = 1, CF=0 + 0xFFFFFFFE, // Result: 3, CF=0 + 0x12345678, // Result based on bit 3 (LSB at position 3) + 0xAAAAAAAA, // Alternating bits, LSB at position 1 + 0x55555555) // Alternating bits, LSB at position 0 + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop2 + + mov eax, ARG1_32 + blsmsk edx, eax + +blsmsk_nop2: nop + +TEST_END + +TEST_BEGIN(BLSMSKr32r32_powers_of_two, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 0x00000001, // Result: 1 + 0x00000002, // Result: 3 + 0x00000004, // Result: 7 + 0x00000008, // Result: 15 + 0x00000100, // Result: 0xFF + 0x00010000, // Result: 0xFFFF + 0x40000000, // Result: 0x7FFFFFFF + 0x80000000) // Result: 0xFFFFFFFF (all bits, SF=1) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop3 + + mov eax, ARG1_32 + blsmsk edx, eax + +blsmsk_nop3: nop + +TEST_END + +TEST_BEGIN_64(BLSMSKr64r64_zero, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS(0) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop4 + + // Result: 0xFFFFFFFFFFFFFFFF, CF=1 + mov rax, ARG1_64 + blsmsk rdx, rax + +blsmsk_nop4: nop + +TEST_END_64 + +TEST_BEGIN_64(BLSMSKr64r64_basic, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 1, + 2, + 3, + 0x8000000000000000, // Result: 0xFFFFFFFFFFFFFFFF (SF=1) + 0xFFFFFFFFFFFFFFFF, // Result: 1 + 0xFFFFFFFFFFFFFFFE, // Result: 3 + 0x123456789ABCDEF0, // Result based on bit 4 + 0xAAAAAAAAAAAAAAAA, // Result: 3 + 0x5555555555555555) // Result: 1 + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop5 + + mov rax, ARG1_64 + blsmsk rdx, rax + +blsmsk_nop5: nop + +TEST_END_64 + +TEST_BEGIN_64(BLSMSKr64r64_powers_of_two, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 0x0000000000000001, // Result: 1 + 0x0000000000000002, // Result: 3 + 0x0000000000000100, // Result: 0xFF + 0x0000000000010000, // Result: 0xFFFF + 0x0000000100000000, // Result: 0xFFFFFFFF + 0x4000000000000000, // Result: 0x3FFFFFFFFFFFFFFF + 0x8000000000000000) // Result: 0xFFFFFFFFFFFFFFFF (SF=1) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop6 + + mov rax, ARG1_64 + blsmsk rdx, rax + +blsmsk_nop6: nop + +TEST_END_64 + +TEST_BEGIN(BLSMSKr32m32, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 0, + 0x00000001, + 0x12345678) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop7 + + push ARG1_64 + blsmsk edx, DWORD PTR [rsp] + add rsp, 8 + +blsmsk_nop7: nop + +TEST_END + +TEST_BEGIN_64(BLSMSKr64m64, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 0, + 0x0000000000000001, + 0x123456789ABCDEF0) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop8 + + push ARG1_64 + blsmsk rdx, QWORD PTR [rsp] + add rsp, 8 + +blsmsk_nop8: nop + +TEST_END_64 diff --git a/tests/X86/BMI/BLSR.S b/tests/X86/BMI/BLSR.S new file mode 100644 index 000000000..0f33ab2b5 --- /dev/null +++ b/tests/X86/BMI/BLSR.S @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2025 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* BLSR - Reset Lowest Set Bit + * Format: BLSR r32, r/m32 + * Operation: dest = (src - 1) & src + * Flags: CF set if src==0; ZF, SF set based on result; OF cleared; AF, PF undefined + */ + +TEST_BEGIN(BLSRr32r32_zero, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS(0) + + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop1 + + // BLSR with zero: result=0, CF=1, ZF=1 + mov eax, ARG1_32 + blsr edx, eax + +blsr_nop1: nop + +TEST_END + +TEST_BEGIN(BLSRr32r32_single_bit, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 1, // Result: 0, CF=0, ZF=1 + 2, // Result: 0, CF=0, ZF=1 + 4, // Result: 0, CF=0, ZF=1 + 8, // Result: 0, CF=0, ZF=1 + 0x00000100, // Result: 0, CF=0, ZF=1 + 0x00010000, // Result: 0, CF=0, ZF=1 + 0x40000000, // Result: 0, CF=0, ZF=1 + 0x80000000) // Result: 0, CF=0, ZF=1 (MSB) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop2 + + mov eax, ARG1_32 + blsr edx, eax + +blsr_nop2: nop + +TEST_END + +TEST_BEGIN(BLSRr32r32_multiple_bits, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 3, // (2) & 3 = 2, CF=0 + 5, // (4) & 5 = 4, CF=0 + 7, // (6) & 7 = 6, CF=0 + 0xFFFFFFFF, // (0xFFFFFFFE) & 0xFFFFFFFF = 0xFFFFFFFE (SF=1), CF=0 + 0xFFFFFFFE, // Result: 0xFFFFFFFC (SF=1), CF=0 + 0x12345678, // Clear bit 3 (LSB) + 0xAAAAAAAA, // Clear bit 1, result: 0xAAAAAAA8 (SF=1) + 0x55555555, // Clear bit 0, result: 0x55555554 + 0x80000001, // Clear bit 0, result: 0x80000000 (SF=1) + 0x7FFFFFFF) // Clear bit 0, result: 0x7FFFFFFE + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop3 + + mov eax, ARG1_32 + blsr edx, eax + +blsr_nop3: nop + +TEST_END + +TEST_BEGIN_64(BLSRr64r64_zero, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS(0) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop4 + + // Result: 0, CF=1, ZF=1 + mov rax, ARG1_64 + blsr rdx, rax + +blsr_nop4: nop + +TEST_END_64 + +TEST_BEGIN_64(BLSRr64r64_single_bit, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 1, + 2, + 0x0000000000000100, + 0x0000000000010000, + 0x0000000100000000, + 0x4000000000000000, + 0x8000000000000000) // MSB + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop5 + + mov rax, ARG1_64 + blsr rdx, rax + +blsr_nop5: nop + +TEST_END_64 + +TEST_BEGIN_64(BLSRr64r64_multiple_bits, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 3, + 5, + 7, + 0xFFFFFFFFFFFFFFFF, // Result: 0xFFFFFFFFFFFFFFFE (SF=1) + 0xFFFFFFFFFFFFFFFE, // Result: 0xFFFFFFFFFFFFFFFC (SF=1) + 0x123456789ABCDEF0, // Clear bit 4 + 0xAAAAAAAAAAAAAAAA, // Clear bit 1, result: 0xAAAAAAAAAAAAAAA8 (SF=1) + 0x5555555555555555, // Clear bit 0, result: 0x5555555555555554 + 0x8000000000000001, // Clear bit 0, result: 0x8000000000000000 (SF=1) + 0x7FFFFFFFFFFFFFFF) // Clear bit 0, result: 0x7FFFFFFFFFFFFFFE + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop6 + + mov rax, ARG1_64 + blsr rdx, rax + +blsr_nop6: nop + +TEST_END_64 + +TEST_BEGIN(BLSRr32m32, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 0, + 1, + 0xFFFFFFFF, + 0x12345678) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop7 + + push ARG1_64 + blsr edx, DWORD PTR [rsp] + add rsp, 8 + +blsr_nop7: nop + +TEST_END + +TEST_BEGIN_64(BLSRr64m64, 1) +TEST_IGNORE_FLAGS(AF PF) +TEST_INPUTS( + 0, + 1, + 0xFFFFFFFFFFFFFFFF, + 0x123456789ABCDEF0) + + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop8 + + push ARG1_64 + blsr rdx, QWORD PTR [rsp] + add rsp, 8 + +blsr_nop8: nop + +TEST_END_64 diff --git a/tests/X86/BITBYTE/TZCNT.S b/tests/X86/BMI/TZCNT.S similarity index 100% rename from tests/X86/BITBYTE/TZCNT.S rename to tests/X86/BMI/TZCNT.S diff --git a/tests/X86/Tests.S b/tests/X86/Tests.S index a00be4892..b881753fe 100644 --- a/tests/X86/Tests.S +++ b/tests/X86/Tests.S @@ -402,7 +402,13 @@ SYMBOL(__x86_test_table_begin): #include "tests/X86/BITBYTE/BTS.S" #include "tests/X86/BITBYTE/LZCNT.S" #include "tests/X86/BITBYTE/SETcc.S" -#include "tests/X86/BITBYTE/TZCNT.S" + +#include "tests/X86/BMI/ANDN.S" +#include "tests/X86/BMI/BEXTR.S" +#include "tests/X86/BMI/BLSI.S" +#include "tests/X86/BMI/BLSMSK.S" +#include "tests/X86/BMI/BLSR.S" +#include "tests/X86/BMI/TZCNT.S" #include "tests/X86/CMOV/CMOVB.S" #include "tests/X86/CMOV/CMOVBE.S" From c70641e4b9628e86e03bfa0a996ae793d9b914c4 Mon Sep 17 00:00:00 2001 From: kyle-elliott-tob Date: Mon, 24 Nov 2025 18:25:10 -0500 Subject: [PATCH 2/7] Fix ISEL naming for BMI - Fixed BLSI CF flag - Fixed TZCNT duplicate on R32W/R32 to be R32W/M32 --- lib/Arch/X86/Semantics/BMI.cpp | 36 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/lib/Arch/X86/Semantics/BMI.cpp b/lib/Arch/X86/Semantics/BMI.cpp index 7af5e3b03..c8aafd93d 100644 --- a/lib/Arch/X86/Semantics/BMI.cpp +++ b/lib/Arch/X86/Semantics/BMI.cpp @@ -74,7 +74,7 @@ DEF_SEM(BLSI, D dst, S src) { auto res = UAnd(UNeg(val), val); WriteZExt(dst, res); SetFlagsBMI(state, val, val, res); - Write(FLAG_CF, ZeroFlag(res, val, val)); + Write(FLAG_CF, UCmpNeq(val, 0)); return memory; } @@ -110,34 +110,34 @@ DEF_SEM(TZCNT, D dst, S src) { return memory; } -DEF_ISEL(ANDN_GPRv_GPRv_GPRv_32) = ANDN; -DEF_ISEL(ANDN_GPRv_GPRv_MEMv_32) = ANDN; -IF_64BIT(DEF_ISEL(ANDN_GPRv_GPRv_GPRv_64) = ANDN;) -IF_64BIT(DEF_ISEL(ANDN_GPRv_GPRv_MEMv_64) = ANDN;) +DEF_ISEL(ANDN_VGPR32d_VGPR32d_VGPR32d) = ANDN; +DEF_ISEL(ANDN_VGPR32d_VGPR32d_MEMd) = ANDN; +IF_64BIT(DEF_ISEL(ANDN_VGPR64q_VGPR64q_VGPR64q) = ANDN;) +IF_64BIT(DEF_ISEL(ANDN_VGPR64q_VGPR64q_MEMq) = ANDN;) DEF_ISEL(BEXTR_VGPR32d_VGPR32d_VGPR32d) = BEXTR; DEF_ISEL(BEXTR_VGPR32d_MEMd_VGPR32d) = BEXTR; IF_64BIT(DEF_ISEL(BEXTR_VGPR64q_VGPR64q_VGPR64q) = BEXTR;) IF_64BIT(DEF_ISEL(BEXTR_VGPR64q_MEMq_VGPR64q) = BEXTR;) -DEF_ISEL(BLSI_GPRv_GPRv_32) = BLSI; -DEF_ISEL(BLSI_GPRv_MEMv_32) = BLSI; -IF_64BIT(DEF_ISEL(BLSI_GPRv_GPRv_64) = BLSI;) -IF_64BIT(DEF_ISEL(BLSI_GPRv_MEMv_64) = BLSI;) +DEF_ISEL(BLSI_VGPR32d_VGPR32d) = BLSI; +DEF_ISEL(BLSI_VGPR32d_MEMd) = BLSI; +IF_64BIT(DEF_ISEL(BLSI_VGPR64q_VGPR64q) = BLSI;) +IF_64BIT(DEF_ISEL(BLSI_VGPR64q_MEMq) = BLSI;) -DEF_ISEL(BLSMSK_GPRv_GPRv_32) = BLSMSK; -DEF_ISEL(BLSMSK_GPRv_MEMv_32) = BLSMSK; -IF_64BIT(DEF_ISEL(BLSMSK_GPRv_GPRv_64) = BLSMSK;) -IF_64BIT(DEF_ISEL(BLSMSK_GPRv_MEMv_64) = BLSMSK;) +DEF_ISEL(BLSMSK_VGPR32d_VGPR32d) = BLSMSK; +DEF_ISEL(BLSMSK_VGPR32d_MEMd) = BLSMSK; +IF_64BIT(DEF_ISEL(BLSMSK_VGPR64q_VGPR64q) = BLSMSK;) +IF_64BIT(DEF_ISEL(BLSMSK_VGPR64q_MEMq) = BLSMSK;) -DEF_ISEL(BLSR_GPRv_GPRv_32) = BLSR; -DEF_ISEL(BLSR_GPRv_MEMv_32) = BLSR; -IF_64BIT(DEF_ISEL(BLSR_GPRv_GPRv_64) = BLSR;) -IF_64BIT(DEF_ISEL(BLSR_GPRv_MEMv_64) = BLSR;) +DEF_ISEL(BLSR_VGPR32d_VGPR32d) = BLSR; +DEF_ISEL(BLSR_VGPR32d_MEMd) = BLSR; +IF_64BIT(DEF_ISEL(BLSR_VGPR64q_VGPR64q) = BLSR;) +IF_64BIT(DEF_ISEL(BLSR_VGPR64q_MEMq) = BLSR;) DEF_ISEL(TZCNT_GPRv_GPRv_16) = TZCNT; DEF_ISEL(TZCNT_GPRv_MEMv_16) = TZCNT; DEF_ISEL(TZCNT_GPRv_GPRv_32) = TZCNT; -DEF_ISEL(TZCNT_GPRv_GPRv_32) = TZCNT; +DEF_ISEL(TZCNT_GPRv_MEMv_32) = TZCNT; IF_64BIT(DEF_ISEL(TZCNT_GPRv_GPRv_64) = TZCNT;) IF_64BIT(DEF_ISEL(TZCNT_GPRv_MEMv_64) = TZCNT;) From bb1180a970c5bc9d490c5584116ed55872003094 Mon Sep 17 00:00:00 2001 From: kyle-elliott-tob Date: Tue, 25 Nov 2025 08:10:43 -0500 Subject: [PATCH 3/7] fixup: Missing closing brace --- lib/Arch/X86/Semantics/BMI.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/Arch/X86/Semantics/BMI.cpp b/lib/Arch/X86/Semantics/BMI.cpp index c8aafd93d..2d2c108d6 100644 --- a/lib/Arch/X86/Semantics/BMI.cpp +++ b/lib/Arch/X86/Semantics/BMI.cpp @@ -110,6 +110,8 @@ DEF_SEM(TZCNT, D dst, S src) { return memory; } +} // namespace + DEF_ISEL(ANDN_VGPR32d_VGPR32d_VGPR32d) = ANDN; DEF_ISEL(ANDN_VGPR32d_VGPR32d_MEMd) = ANDN; IF_64BIT(DEF_ISEL(ANDN_VGPR64q_VGPR64q_VGPR64q) = ANDN;) From 1262140a252817e783fc5c61dc25ad3dac79bb13 Mon Sep 17 00:00:00 2001 From: kyle-elliott-tob Date: Tue, 25 Nov 2025 09:35:07 -0500 Subject: [PATCH 4/7] fixup: Remove CPUID check for BMI1 - Not a great solution, but I think the value is leaking over into the test cases - Should be fine, handlers are setup to catch unsupported instructions --- tests/X86/BMI/ANDN.S | 37 --------------------- tests/X86/BMI/BEXTR.S | 55 ------------------------------- tests/X86/BMI/BLSI.S | 64 ------------------------------------ tests/X86/BMI/BLSMSK.S | 74 ------------------------------------------ tests/X86/BMI/BLSR.S | 74 ------------------------------------------ tests/X86/BMI/TZCNT.S | 30 ----------------- 6 files changed, 334 deletions(-) diff --git a/tests/X86/BMI/ANDN.S b/tests/X86/BMI/ANDN.S index e507f02c7..91e774813 100644 --- a/tests/X86/BMI/ANDN.S +++ b/tests/X86/BMI/ANDN.S @@ -32,20 +32,10 @@ TEST_INPUTS( 0x80000000, 0x80000000, // Sign bit: (~0x80000000) & 0x80000000 = 0 0x7FFFFFFF, 0xFFFFFFFF) // (~0x7FFFFFFF) & 0xFFFFFFFF = 0x80000000 (SF=1) - // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz andn_nop1 - - // ANDN supported, proceed with test: mov eax, ARG1_32 mov edx, ARG2_32 andn ecx, eax, edx -andn_nop1: nop - TEST_END TEST_BEGIN_64(ANDNr64r64r64_basic, 2) @@ -60,19 +50,10 @@ TEST_INPUTS( 0x8000000000000000, 0x8000000000000000, // Sign bit 0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF) // Result: 0x8000000000000000 (SF=1) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz andn_nop2 - mov rax, ARG1_64 mov rdx, ARG2_64 andn rcx, rax, rdx -andn_nop2: nop - TEST_END_64 TEST_BEGIN(ANDNr32r32m32, 2) @@ -82,20 +63,11 @@ TEST_INPUTS( 0x00000000, 0xABCDEF01, 0x55555555, 0xAAAAAAAA) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz andn_nop3 - push ARG2_64 mov eax, ARG1_32 andn ecx, eax, DWORD PTR [rsp] add rsp, 8 -andn_nop3: nop - TEST_END TEST_BEGIN_64(ANDNr64r64m64, 2) @@ -105,18 +77,9 @@ TEST_INPUTS( 0x0000000000000000, 0xFEDCBA9876543210, 0xAAAAAAAAAAAAAAAA, 0x5555555555555555) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz andn_nop4 - push ARG2_64 mov rax, ARG1_64 andn rcx, rax, QWORD PTR [rsp] add rsp, 8 -andn_nop4: nop - TEST_END_64 diff --git a/tests/X86/BMI/BEXTR.S b/tests/X86/BMI/BEXTR.S index f8daab8ce..baa0b7eb4 100644 --- a/tests/X86/BMI/BEXTR.S +++ b/tests/X86/BMI/BEXTR.S @@ -31,20 +31,10 @@ TEST_INPUTS( 0x12345678, 0x0100, // Extract 1 bit at position 0 0xAAAAAAAA, 0x0401) // Extract 4 bits starting at position 1 - // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz bextr_nop1 - - // BEXTR supported, proceed with test: mov eax, ARG1_32 mov edx, ARG2_32 bextr ecx, eax, edx -bextr_nop1: nop - TEST_END TEST_BEGIN(BEXTRr32r32r32_zero, 2) @@ -54,19 +44,10 @@ TEST_INPUTS( 0xFFFFFFFF, 0x081F, // Extract 8 bits at position 31 (should be zero) 0x12345678, 0x0820) // Extract 8 bits at position 32+ (should be zero) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz bextr_nop2 - mov eax, ARG1_32 mov edx, ARG2_32 bextr ecx, eax, edx -bextr_nop2: nop - TEST_END TEST_BEGIN_64(BEXTRr64r64r64_basic, 2) @@ -81,19 +62,10 @@ TEST_INPUTS( 0x123456789ABCDEF0, 0x0000, // Extract 0 bits (length=0) 0x8000000000000000, 0x013F) // Extract 1 bit at position 63 - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz bextr_nop3 - mov rax, ARG1_64 mov rdx, ARG2_64 bextr rcx, rax, rdx -bextr_nop3: nop - TEST_END_64 TEST_BEGIN_64(BEXTRr64r64r64_zero, 2) @@ -103,19 +75,10 @@ TEST_INPUTS( 0xFFFFFFFFFFFFFFFF, 0x083F, // Extract 8 bits at position 63 (should be 1 bit) 0x123456789ABCDEF0, 0x0840) // Extract 8 bits at position 64+ (should be zero) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz bextr_nop4 - mov rax, ARG1_64 mov rdx, ARG2_64 bextr rcx, rax, rdx -bextr_nop4: nop - TEST_END_64 TEST_BEGIN(BEXTRr32m32r32, 2) @@ -125,20 +88,11 @@ TEST_INPUTS( 0x12345678, 0x0C10, 0xFFFFFFFF, 0x1000) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz bextr_nop5 - push ARG1_64 mov edx, ARG2_32 bextr ecx, DWORD PTR [rsp], edx add rsp, 8 -bextr_nop5: nop - TEST_END TEST_BEGIN_64(BEXTRr64m64r64, 2) @@ -148,18 +102,9 @@ TEST_INPUTS( 0x123456789ABCDEF0, 0x1010, 0xFFFFFFFFFFFFFFFF, 0x2000) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz bextr_nop6 - push ARG1_64 mov rdx, ARG2_64 bextr rcx, QWORD PTR [rsp], rdx add rsp, 8 -bextr_nop6: nop - TEST_END_64 diff --git a/tests/X86/BMI/BLSI.S b/tests/X86/BMI/BLSI.S index 32e5b0840..41b17d681 100644 --- a/tests/X86/BMI/BLSI.S +++ b/tests/X86/BMI/BLSI.S @@ -24,19 +24,9 @@ TEST_BEGIN(BLSIr32r32_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) - // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsi_nop1 - - // BLSI with zero input: result=0, CF=0, ZF=1 mov eax, ARG1_32 blsi edx, eax -blsi_nop1: nop - TEST_END TEST_BEGIN(BLSIr32r32_basic, 1) @@ -53,18 +43,9 @@ TEST_INPUTS( 0xAAAAAAAA, // Alternating bits: result=2, CF=1 0x55555555) // Alternating bits: result=1, CF=1 - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsi_nop2 - mov eax, ARG1_32 blsi edx, eax -blsi_nop2: nop - TEST_END TEST_BEGIN(BLSIr32r32_powers_of_two, 1) @@ -78,36 +59,18 @@ TEST_INPUTS( 0x40000000, // 2^30 0x80000000) // 2^31 (sign bit) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsi_nop3 - mov eax, ARG1_32 blsi edx, eax -blsi_nop3: nop - TEST_END TEST_BEGIN_64(BLSIr64r64_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsi_nop4 - mov rax, ARG1_64 blsi rdx, rax -blsi_nop4: nop - TEST_END_64 TEST_BEGIN_64(BLSIr64r64_basic, 1) @@ -124,18 +87,9 @@ TEST_INPUTS( 0xAAAAAAAAAAAAAAAA, // Result: 2 0x5555555555555555) // Result: 1 - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsi_nop5 - mov rax, ARG1_64 blsi rdx, rax -blsi_nop5: nop - TEST_END_64 TEST_BEGIN(BLSIr32m32, 1) @@ -145,19 +99,10 @@ TEST_INPUTS( 0xFFFFFFFF, 0x12345678) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsi_nop6 - push ARG1_64 blsi edx, DWORD PTR [rsp] add rsp, 8 -blsi_nop6: nop - TEST_END TEST_BEGIN_64(BLSIr64m64, 1) @@ -167,17 +112,8 @@ TEST_INPUTS( 0xFFFFFFFFFFFFFFFF, 0x123456789ABCDEF0) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsi_nop7 - push ARG1_64 blsi rdx, QWORD PTR [rsp] add rsp, 8 -blsi_nop7: nop - TEST_END_64 diff --git a/tests/X86/BMI/BLSMSK.S b/tests/X86/BMI/BLSMSK.S index d619105e5..6110c26d5 100644 --- a/tests/X86/BMI/BLSMSK.S +++ b/tests/X86/BMI/BLSMSK.S @@ -24,19 +24,9 @@ TEST_BEGIN(BLSMSKr32r32_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) - // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsmsk_nop1 - - // BLSMSK with zero: result=0xFFFFFFFF, CF=1, ZF=0 mov eax, ARG1_32 blsmsk edx, eax -blsmsk_nop1: nop - TEST_END TEST_BEGIN(BLSMSKr32r32_basic, 1) @@ -53,18 +43,9 @@ TEST_INPUTS( 0xAAAAAAAA, // Alternating bits, LSB at position 1 0x55555555) // Alternating bits, LSB at position 0 - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsmsk_nop2 - mov eax, ARG1_32 blsmsk edx, eax -blsmsk_nop2: nop - TEST_END TEST_BEGIN(BLSMSKr32r32_powers_of_two, 1) @@ -79,37 +60,18 @@ TEST_INPUTS( 0x40000000, // Result: 0x7FFFFFFF 0x80000000) // Result: 0xFFFFFFFF (all bits, SF=1) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsmsk_nop3 - mov eax, ARG1_32 blsmsk edx, eax -blsmsk_nop3: nop - TEST_END TEST_BEGIN_64(BLSMSKr64r64_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsmsk_nop4 - - // Result: 0xFFFFFFFFFFFFFFFF, CF=1 mov rax, ARG1_64 blsmsk rdx, rax -blsmsk_nop4: nop - TEST_END_64 TEST_BEGIN_64(BLSMSKr64r64_basic, 1) @@ -125,18 +87,9 @@ TEST_INPUTS( 0xAAAAAAAAAAAAAAAA, // Result: 3 0x5555555555555555) // Result: 1 - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsmsk_nop5 - mov rax, ARG1_64 blsmsk rdx, rax -blsmsk_nop5: nop - TEST_END_64 TEST_BEGIN_64(BLSMSKr64r64_powers_of_two, 1) @@ -150,18 +103,9 @@ TEST_INPUTS( 0x4000000000000000, // Result: 0x3FFFFFFFFFFFFFFF 0x8000000000000000) // Result: 0xFFFFFFFFFFFFFFFF (SF=1) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsmsk_nop6 - mov rax, ARG1_64 blsmsk rdx, rax -blsmsk_nop6: nop - TEST_END_64 TEST_BEGIN(BLSMSKr32m32, 1) @@ -171,19 +115,10 @@ TEST_INPUTS( 0x00000001, 0x12345678) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsmsk_nop7 - push ARG1_64 blsmsk edx, DWORD PTR [rsp] add rsp, 8 -blsmsk_nop7: nop - TEST_END TEST_BEGIN_64(BLSMSKr64m64, 1) @@ -193,17 +128,8 @@ TEST_INPUTS( 0x0000000000000001, 0x123456789ABCDEF0) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsmsk_nop8 - push ARG1_64 blsmsk rdx, QWORD PTR [rsp] add rsp, 8 -blsmsk_nop8: nop - TEST_END_64 diff --git a/tests/X86/BMI/BLSR.S b/tests/X86/BMI/BLSR.S index 0f33ab2b5..34f9d0fc6 100644 --- a/tests/X86/BMI/BLSR.S +++ b/tests/X86/BMI/BLSR.S @@ -24,19 +24,9 @@ TEST_BEGIN(BLSRr32r32_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) - // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsr_nop1 - - // BLSR with zero: result=0, CF=1, ZF=1 mov eax, ARG1_32 blsr edx, eax -blsr_nop1: nop - TEST_END TEST_BEGIN(BLSRr32r32_single_bit, 1) @@ -51,18 +41,9 @@ TEST_INPUTS( 0x40000000, // Result: 0, CF=0, ZF=1 0x80000000) // Result: 0, CF=0, ZF=1 (MSB) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsr_nop2 - mov eax, ARG1_32 blsr edx, eax -blsr_nop2: nop - TEST_END TEST_BEGIN(BLSRr32r32_multiple_bits, 1) @@ -79,37 +60,18 @@ TEST_INPUTS( 0x80000001, // Clear bit 0, result: 0x80000000 (SF=1) 0x7FFFFFFF) // Clear bit 0, result: 0x7FFFFFFE - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsr_nop3 - mov eax, ARG1_32 blsr edx, eax -blsr_nop3: nop - TEST_END TEST_BEGIN_64(BLSRr64r64_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsr_nop4 - - // Result: 0, CF=1, ZF=1 mov rax, ARG1_64 blsr rdx, rax -blsr_nop4: nop - TEST_END_64 TEST_BEGIN_64(BLSRr64r64_single_bit, 1) @@ -123,18 +85,9 @@ TEST_INPUTS( 0x4000000000000000, 0x8000000000000000) // MSB - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsr_nop5 - mov rax, ARG1_64 blsr rdx, rax -blsr_nop5: nop - TEST_END_64 TEST_BEGIN_64(BLSRr64r64_multiple_bits, 1) @@ -151,18 +104,9 @@ TEST_INPUTS( 0x8000000000000001, // Clear bit 0, result: 0x8000000000000000 (SF=1) 0x7FFFFFFFFFFFFFFF) // Clear bit 0, result: 0x7FFFFFFFFFFFFFFE - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsr_nop6 - mov rax, ARG1_64 blsr rdx, rax -blsr_nop6: nop - TEST_END_64 TEST_BEGIN(BLSRr32m32, 1) @@ -173,19 +117,10 @@ TEST_INPUTS( 0xFFFFFFFF, 0x12345678) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsr_nop7 - push ARG1_64 blsr edx, DWORD PTR [rsp] add rsp, 8 -blsr_nop7: nop - TEST_END TEST_BEGIN_64(BLSRr64m64, 1) @@ -196,17 +131,8 @@ TEST_INPUTS( 0xFFFFFFFFFFFFFFFF, 0x123456789ABCDEF0) - // Check CPU for BMI1 support - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz blsr_nop8 - push ARG1_64 blsr rdx, QWORD PTR [rsp] add rsp, 8 -blsr_nop8: nop - TEST_END_64 diff --git a/tests/X86/BMI/TZCNT.S b/tests/X86/BMI/TZCNT.S index b137c7c71..9094de316 100644 --- a/tests/X86/BMI/TZCNT.S +++ b/tests/X86/BMI/TZCNT.S @@ -25,19 +25,9 @@ TEST_INPUTS( 0x0080, 0x0800) - // check CPU for TZCNT support first. (cpuid fn 0x00000007, bit 3 of ebx) - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz tzcnt_nop1 - - // TZCNT supported, proceed with test: mov eax, ARG1_32 tzcnt dx, ax -tzcnt_nop1: nop - TEST_END TEST_BEGIN(TZCNTr32r32, 1) @@ -51,19 +41,9 @@ TEST_INPUTS( 0x00000080, 0x08000000) - // check CPU for TZCNT support first. (cpuid fn 0x00000007, bit 3 of ebx) - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz tzcnt_nop2 - - // TZCNT supported, proceed with test: mov eax, ARG1_32 tzcnt edx, eax -tzcnt_nop2: nop - TEST_END TEST_BEGIN_64(TZCNTr64r64, 1) @@ -77,17 +57,7 @@ TEST_INPUTS( 0x0000000000000080, 0x0800000000000000) - // check CPU for TZCNT support first. (cpuid fn 0x00000007, bit 3 of ebx) - mov eax, 0x00000007 - mov ecx, 0x0 - cpuid - and ebx, 0x00000008 - jz tzcnt_nop3 - - // TZCNT supported, proceed with test: mov rax, ARG1_64 tzcnt rdx, rax -tzcnt_nop3: nop - TEST_END_64 From 72186e85c6748f0ed9d7cda8f64b52358298ffdf Mon Sep 17 00:00:00 2001 From: kyle-elliott-tob Date: Tue, 25 Nov 2025 13:36:58 -0500 Subject: [PATCH 5/7] fixup: Insert AVX feature flags for VEX encoded BMI1 instructions --- tests/X86/BMI/ANDN.S | 4 ++++ tests/X86/BMI/BEXTR.S | 4 ++++ tests/X86/BMI/BLSI.S | 4 ++++ tests/X86/BMI/BLSMSK.S | 4 ++++ tests/X86/BMI/BLSR.S | 4 ++++ 5 files changed, 20 insertions(+) diff --git a/tests/X86/BMI/ANDN.S b/tests/X86/BMI/ANDN.S index 91e774813..5dcf307de 100644 --- a/tests/X86/BMI/ANDN.S +++ b/tests/X86/BMI/ANDN.S @@ -20,6 +20,8 @@ * Flags: SF, ZF set; CF, OF cleared; AF, PF undefined */ +#if HAS_FEATURE_AVX + TEST_BEGIN(ANDNr32r32r32_basic, 2) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS( @@ -83,3 +85,5 @@ TEST_INPUTS( add rsp, 8 TEST_END_64 + +#endif // HAS_FEATURE_AVX diff --git a/tests/X86/BMI/BEXTR.S b/tests/X86/BMI/BEXTR.S index baa0b7eb4..b5c98c764 100644 --- a/tests/X86/BMI/BEXTR.S +++ b/tests/X86/BMI/BEXTR.S @@ -19,6 +19,8 @@ * Control operand format: [15:8] = length, [7:0] = start */ +#if HAS_FEATURE_AVX + TEST_BEGIN(BEXTRr32r32r32_basic, 2) TEST_IGNORE_FLAGS(AF SF PF) TEST_INPUTS( @@ -108,3 +110,5 @@ TEST_INPUTS( add rsp, 8 TEST_END_64 + +#endif // HAS_FEATURE_AVX diff --git a/tests/X86/BMI/BLSI.S b/tests/X86/BMI/BLSI.S index 41b17d681..3a8629583 100644 --- a/tests/X86/BMI/BLSI.S +++ b/tests/X86/BMI/BLSI.S @@ -20,6 +20,8 @@ * Flags: CF set if src!=0; ZF, SF set based on result; OF cleared; AF, PF undefined */ +#if HAS_FEATURE_AVX + TEST_BEGIN(BLSIr32r32_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) @@ -117,3 +119,5 @@ TEST_INPUTS( add rsp, 8 TEST_END_64 + +#endif // HAS_FEATURE_AVX diff --git a/tests/X86/BMI/BLSMSK.S b/tests/X86/BMI/BLSMSK.S index 6110c26d5..c298d39ff 100644 --- a/tests/X86/BMI/BLSMSK.S +++ b/tests/X86/BMI/BLSMSK.S @@ -20,6 +20,8 @@ * Flags: CF set if src==0; ZF cleared (always 0); SF, set based on result; OF cleared; AF, PF undefined */ +#if HAS_FEATURE_AVX + TEST_BEGIN(BLSMSKr32r32_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) @@ -133,3 +135,5 @@ TEST_INPUTS( add rsp, 8 TEST_END_64 + +#endif // HAS_FEATURE_AVX diff --git a/tests/X86/BMI/BLSR.S b/tests/X86/BMI/BLSR.S index 34f9d0fc6..7ae5e7fb0 100644 --- a/tests/X86/BMI/BLSR.S +++ b/tests/X86/BMI/BLSR.S @@ -20,6 +20,8 @@ * Flags: CF set if src==0; ZF, SF set based on result; OF cleared; AF, PF undefined */ +#if HAS_FEATURE_AVX + TEST_BEGIN(BLSRr32r32_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) @@ -136,3 +138,5 @@ TEST_INPUTS( add rsp, 8 TEST_END_64 + +#endif // HAS_FEATURE_AVX From 103d56cc9a20cdbba6b5c3be3515f6ab4d5d18b9 Mon Sep 17 00:00:00 2001 From: kyle-elliott-tob Date: Tue, 25 Nov 2025 14:20:02 -0500 Subject: [PATCH 6/7] Revert "fixup: Remove CPUID check for BMI1" This reverts commit 1262140a252817e783fc5c61dc25ad3dac79bb13. --- tests/X86/BMI/ANDN.S | 46 +++++++++++++++++++--- tests/X86/BMI/BEXTR.S | 65 ++++++++++++++++++++++++++++--- tests/X86/BMI/BLSI.S | 76 ++++++++++++++++++++++++++++++++++--- tests/X86/BMI/BLSMSK.S | 86 +++++++++++++++++++++++++++++++++++++++--- tests/X86/BMI/BLSR.S | 85 ++++++++++++++++++++++++++++++++++++++--- tests/X86/BMI/TZCNT.S | 30 +++++++++++++++ 6 files changed, 359 insertions(+), 29 deletions(-) diff --git a/tests/X86/BMI/ANDN.S b/tests/X86/BMI/ANDN.S index 5dcf307de..8c785ef69 100644 --- a/tests/X86/BMI/ANDN.S +++ b/tests/X86/BMI/ANDN.S @@ -14,12 +14,6 @@ * limitations under the License. */ -/* ANDN - Logical AND NOT - * Format: ANDN r32, r32, r/m32 - * Operation: dest = (~src1) & src2 - * Flags: SF, ZF set; CF, OF cleared; AF, PF undefined - */ - #if HAS_FEATURE_AVX TEST_BEGIN(ANDNr32r32r32_basic, 2) @@ -34,10 +28,20 @@ TEST_INPUTS( 0x80000000, 0x80000000, // Sign bit: (~0x80000000) & 0x80000000 = 0 0x7FFFFFFF, 0xFFFFFFFF) // (~0x7FFFFFFF) & 0xFFFFFFFF = 0x80000000 (SF=1) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz andn_nop1 + + // ANDN supported, proceed with test: mov eax, ARG1_32 mov edx, ARG2_32 andn ecx, eax, edx +andn_nop1: nop + TEST_END TEST_BEGIN_64(ANDNr64r64r64_basic, 2) @@ -52,10 +56,20 @@ TEST_INPUTS( 0x8000000000000000, 0x8000000000000000, // Sign bit 0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF) // Result: 0x8000000000000000 (SF=1) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz andn_nop2 + + // ANDN supported, proceed with test: mov rax, ARG1_64 mov rdx, ARG2_64 andn rcx, rax, rdx +andn_nop2: nop + TEST_END_64 TEST_BEGIN(ANDNr32r32m32, 2) @@ -65,11 +79,21 @@ TEST_INPUTS( 0x00000000, 0xABCDEF01, 0x55555555, 0xAAAAAAAA) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz andn_nop3 + + // ANDN supported, proceed with test: push ARG2_64 mov eax, ARG1_32 andn ecx, eax, DWORD PTR [rsp] add rsp, 8 +andn_nop3: nop + TEST_END TEST_BEGIN_64(ANDNr64r64m64, 2) @@ -79,11 +103,21 @@ TEST_INPUTS( 0x0000000000000000, 0xFEDCBA9876543210, 0xAAAAAAAAAAAAAAAA, 0x5555555555555555) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz andn_nop4 + + // ANDN supported, proceed with test: push ARG2_64 mov rax, ARG1_64 andn rcx, rax, QWORD PTR [rsp] add rsp, 8 +andn_nop4: nop + TEST_END_64 #endif // HAS_FEATURE_AVX diff --git a/tests/X86/BMI/BEXTR.S b/tests/X86/BMI/BEXTR.S index b5c98c764..444b71d76 100644 --- a/tests/X86/BMI/BEXTR.S +++ b/tests/X86/BMI/BEXTR.S @@ -14,11 +14,6 @@ * limitations under the License. */ -/* BEXTR - Bit Field Extract - * Format: BEXTR r32, r/m32, r32 - * Control operand format: [15:8] = length, [7:0] = start - */ - #if HAS_FEATURE_AVX TEST_BEGIN(BEXTRr32r32r32_basic, 2) @@ -33,10 +28,20 @@ TEST_INPUTS( 0x12345678, 0x0100, // Extract 1 bit at position 0 0xAAAAAAAA, 0x0401) // Extract 4 bits starting at position 1 + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz bextr_nop1 + + // BEXTR supported, proceed with test: mov eax, ARG1_32 mov edx, ARG2_32 bextr ecx, eax, edx +bextr_nop1: nop + TEST_END TEST_BEGIN(BEXTRr32r32r32_zero, 2) @@ -46,10 +51,20 @@ TEST_INPUTS( 0xFFFFFFFF, 0x081F, // Extract 8 bits at position 31 (should be zero) 0x12345678, 0x0820) // Extract 8 bits at position 32+ (should be zero) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz bextr_nop2 + + // BEXTR supported, proceed with test: mov eax, ARG1_32 mov edx, ARG2_32 bextr ecx, eax, edx +bextr_nop2: nop + TEST_END TEST_BEGIN_64(BEXTRr64r64r64_basic, 2) @@ -64,10 +79,20 @@ TEST_INPUTS( 0x123456789ABCDEF0, 0x0000, // Extract 0 bits (length=0) 0x8000000000000000, 0x013F) // Extract 1 bit at position 63 + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz bextr_nop3 + + // BEXTR supported, proceed with test: mov rax, ARG1_64 mov rdx, ARG2_64 bextr rcx, rax, rdx +bextr_nop3: nop + TEST_END_64 TEST_BEGIN_64(BEXTRr64r64r64_zero, 2) @@ -77,10 +102,20 @@ TEST_INPUTS( 0xFFFFFFFFFFFFFFFF, 0x083F, // Extract 8 bits at position 63 (should be 1 bit) 0x123456789ABCDEF0, 0x0840) // Extract 8 bits at position 64+ (should be zero) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz bextr_nop4 + + // BEXTR supported, proceed with test: mov rax, ARG1_64 mov rdx, ARG2_64 bextr rcx, rax, rdx +bextr_nop4: nop + TEST_END_64 TEST_BEGIN(BEXTRr32m32r32, 2) @@ -90,11 +125,21 @@ TEST_INPUTS( 0x12345678, 0x0C10, 0xFFFFFFFF, 0x1000) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz bextr_nop5 + + // BEXTR supported, proceed with test: push ARG1_64 mov edx, ARG2_32 bextr ecx, DWORD PTR [rsp], edx add rsp, 8 +bextr_nop5: nop + TEST_END TEST_BEGIN_64(BEXTRr64m64r64, 2) @@ -104,11 +149,21 @@ TEST_INPUTS( 0x123456789ABCDEF0, 0x1010, 0xFFFFFFFFFFFFFFFF, 0x2000) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz bextr_nop6 + + // BEXTR supported, proceed with test: push ARG1_64 mov rdx, ARG2_64 bextr rcx, QWORD PTR [rsp], rdx add rsp, 8 +bextr_nop6: nop + TEST_END_64 #endif // HAS_FEATURE_AVX diff --git a/tests/X86/BMI/BLSI.S b/tests/X86/BMI/BLSI.S index 3a8629583..ab4f217d4 100644 --- a/tests/X86/BMI/BLSI.S +++ b/tests/X86/BMI/BLSI.S @@ -14,21 +14,25 @@ * limitations under the License. */ -/* BLSI - Extract Lowest Set Isolated Bit - * Format: BLSI r32, r/m32 - * Operation: dest = (-src) & src - * Flags: CF set if src!=0; ZF, SF set based on result; OF cleared; AF, PF undefined - */ - #if HAS_FEATURE_AVX TEST_BEGIN(BLSIr32r32_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop1 + + // BLSI supported, proceed with test: mov eax, ARG1_32 blsi edx, eax +blsi_nop1: nop + TEST_END TEST_BEGIN(BLSIr32r32_basic, 1) @@ -45,9 +49,19 @@ TEST_INPUTS( 0xAAAAAAAA, // Alternating bits: result=2, CF=1 0x55555555) // Alternating bits: result=1, CF=1 + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop2 + + // BLSI supported, proceed with test: mov eax, ARG1_32 blsi edx, eax +blsi_nop2: nop + TEST_END TEST_BEGIN(BLSIr32r32_powers_of_two, 1) @@ -61,18 +75,38 @@ TEST_INPUTS( 0x40000000, // 2^30 0x80000000) // 2^31 (sign bit) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop3 + + // BLSI supported, proceed with test: mov eax, ARG1_32 blsi edx, eax +blsi_nop3: nop + TEST_END TEST_BEGIN_64(BLSIr64r64_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop4 + + // BLSI supported, proceed with test: mov rax, ARG1_64 blsi rdx, rax +blsi_nop4: nop + TEST_END_64 TEST_BEGIN_64(BLSIr64r64_basic, 1) @@ -89,9 +123,19 @@ TEST_INPUTS( 0xAAAAAAAAAAAAAAAA, // Result: 2 0x5555555555555555) // Result: 1 + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop5 + + // BLSI supported, proceed with test: mov rax, ARG1_64 blsi rdx, rax +blsi_nop5: nop + TEST_END_64 TEST_BEGIN(BLSIr32m32, 1) @@ -101,10 +145,20 @@ TEST_INPUTS( 0xFFFFFFFF, 0x12345678) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop6 + + // BLSI supported, proceed with test: push ARG1_64 blsi edx, DWORD PTR [rsp] add rsp, 8 +blsi_nop6: nop + TEST_END TEST_BEGIN_64(BLSIr64m64, 1) @@ -114,10 +168,20 @@ TEST_INPUTS( 0xFFFFFFFFFFFFFFFF, 0x123456789ABCDEF0) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsi_nop7 + + // BLSI supported, proceed with test: push ARG1_64 blsi rdx, QWORD PTR [rsp] add rsp, 8 +blsi_nop7: nop + TEST_END_64 #endif // HAS_FEATURE_AVX diff --git a/tests/X86/BMI/BLSMSK.S b/tests/X86/BMI/BLSMSK.S index c298d39ff..48ebef1b2 100644 --- a/tests/X86/BMI/BLSMSK.S +++ b/tests/X86/BMI/BLSMSK.S @@ -14,21 +14,25 @@ * limitations under the License. */ -/* BLSMSK - Get Mask Up to Lowest Set Bit - * Format: BLSMSK r32, r/m32 - * Operation: dest = (src - 1) ^ src - * Flags: CF set if src==0; ZF cleared (always 0); SF, set based on result; OF cleared; AF, PF undefined - */ - #if HAS_FEATURE_AVX TEST_BEGIN(BLSMSKr32r32_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop1 + + // BLMSK supported, proceed with test: mov eax, ARG1_32 blsmsk edx, eax +blsmsk_nop1: nop + TEST_END TEST_BEGIN(BLSMSKr32r32_basic, 1) @@ -45,9 +49,19 @@ TEST_INPUTS( 0xAAAAAAAA, // Alternating bits, LSB at position 1 0x55555555) // Alternating bits, LSB at position 0 + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop2 + + // BLSMSK supported, proceed with test: mov eax, ARG1_32 blsmsk edx, eax +blsmsk_nop2: nop + TEST_END TEST_BEGIN(BLSMSKr32r32_powers_of_two, 1) @@ -62,18 +76,38 @@ TEST_INPUTS( 0x40000000, // Result: 0x7FFFFFFF 0x80000000) // Result: 0xFFFFFFFF (all bits, SF=1) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop3 + + // BLSMSK supported, proceed with test: mov eax, ARG1_32 blsmsk edx, eax +blsmsk_nop3: nop + TEST_END TEST_BEGIN_64(BLSMSKr64r64_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop4 + + // BLSMSK supported, proceed with test: mov rax, ARG1_64 blsmsk rdx, rax +blsmsk_nop4: nop + TEST_END_64 TEST_BEGIN_64(BLSMSKr64r64_basic, 1) @@ -89,9 +123,19 @@ TEST_INPUTS( 0xAAAAAAAAAAAAAAAA, // Result: 3 0x5555555555555555) // Result: 1 + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop5 + + // BLSMSK supported, proceed with test: mov rax, ARG1_64 blsmsk rdx, rax +blsmsk_nop5: nop + TEST_END_64 TEST_BEGIN_64(BLSMSKr64r64_powers_of_two, 1) @@ -105,9 +149,19 @@ TEST_INPUTS( 0x4000000000000000, // Result: 0x3FFFFFFFFFFFFFFF 0x8000000000000000) // Result: 0xFFFFFFFFFFFFFFFF (SF=1) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop6 + + // BLSMSK supported, proceed with test: mov rax, ARG1_64 blsmsk rdx, rax +blsmsk_nop6: nop + TEST_END_64 TEST_BEGIN(BLSMSKr32m32, 1) @@ -117,10 +171,20 @@ TEST_INPUTS( 0x00000001, 0x12345678) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop7 + + // BLSMSK supported, proceed with test: push ARG1_64 blsmsk edx, DWORD PTR [rsp] add rsp, 8 +blsmsk_nop7: nop + TEST_END TEST_BEGIN_64(BLSMSKr64m64, 1) @@ -130,10 +194,20 @@ TEST_INPUTS( 0x0000000000000001, 0x123456789ABCDEF0) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsmsk_nop8 + + // BLSMSK supported, proceed with test: push ARG1_64 blsmsk rdx, QWORD PTR [rsp] add rsp, 8 +blsmsk_nop8: nop + TEST_END_64 #endif // HAS_FEATURE_AVX diff --git a/tests/X86/BMI/BLSR.S b/tests/X86/BMI/BLSR.S index 7ae5e7fb0..4ff002fac 100644 --- a/tests/X86/BMI/BLSR.S +++ b/tests/X86/BMI/BLSR.S @@ -14,21 +14,25 @@ * limitations under the License. */ -/* BLSR - Reset Lowest Set Bit - * Format: BLSR r32, r/m32 - * Operation: dest = (src - 1) & src - * Flags: CF set if src==0; ZF, SF set based on result; OF cleared; AF, PF undefined - */ - #if HAS_FEATURE_AVX TEST_BEGIN(BLSRr32r32_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop1 + + // BLSR supported, proceed with test: mov eax, ARG1_32 blsr edx, eax +blsr_nop1: nop + TEST_END TEST_BEGIN(BLSRr32r32_single_bit, 1) @@ -43,9 +47,18 @@ TEST_INPUTS( 0x40000000, // Result: 0, CF=0, ZF=1 0x80000000) // Result: 0, CF=0, ZF=1 (MSB) + // Check CPU for BMI1 support + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop2 + mov eax, ARG1_32 blsr edx, eax +blsr_nop2: nop + TEST_END TEST_BEGIN(BLSRr32r32_multiple_bits, 1) @@ -62,18 +75,38 @@ TEST_INPUTS( 0x80000001, // Clear bit 0, result: 0x80000000 (SF=1) 0x7FFFFFFF) // Clear bit 0, result: 0x7FFFFFFE + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop3 + + // BLSR supported, proceed with test: mov eax, ARG1_32 blsr edx, eax +blsr_nop3: nop + TEST_END TEST_BEGIN_64(BLSRr64r64_zero, 1) TEST_IGNORE_FLAGS(AF PF) TEST_INPUTS(0) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop4 + + // BLSR supported, proceed with test: mov rax, ARG1_64 blsr rdx, rax +blsr_nop4: nop + TEST_END_64 TEST_BEGIN_64(BLSRr64r64_single_bit, 1) @@ -87,9 +120,19 @@ TEST_INPUTS( 0x4000000000000000, 0x8000000000000000) // MSB + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop5 + + // BLSR supported, proceed with test: mov rax, ARG1_64 blsr rdx, rax +blsr_nop5: nop + TEST_END_64 TEST_BEGIN_64(BLSRr64r64_multiple_bits, 1) @@ -106,9 +149,19 @@ TEST_INPUTS( 0x8000000000000001, // Clear bit 0, result: 0x8000000000000000 (SF=1) 0x7FFFFFFFFFFFFFFF) // Clear bit 0, result: 0x7FFFFFFFFFFFFFFE + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop6 + + // BLSR supported, proceed with test: mov rax, ARG1_64 blsr rdx, rax +blsr_nop6: nop + TEST_END_64 TEST_BEGIN(BLSRr32m32, 1) @@ -119,10 +172,20 @@ TEST_INPUTS( 0xFFFFFFFF, 0x12345678) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop7 + + // BLSR supported, proceed with test: push ARG1_64 blsr edx, DWORD PTR [rsp] add rsp, 8 +blsr_nop7: nop + TEST_END TEST_BEGIN_64(BLSRr64m64, 1) @@ -133,10 +196,20 @@ TEST_INPUTS( 0xFFFFFFFFFFFFFFFF, 0x123456789ABCDEF0) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz blsr_nop8 + + // BLSR supported, proceed with test: push ARG1_64 blsr rdx, QWORD PTR [rsp] add rsp, 8 +blsr_nop8: nop + TEST_END_64 #endif // HAS_FEATURE_AVX diff --git a/tests/X86/BMI/TZCNT.S b/tests/X86/BMI/TZCNT.S index 9094de316..1908780c2 100644 --- a/tests/X86/BMI/TZCNT.S +++ b/tests/X86/BMI/TZCNT.S @@ -25,9 +25,19 @@ TEST_INPUTS( 0x0080, 0x0800) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz tzcnt_nop1 + + // TZCNT supported, proceed with test: mov eax, ARG1_32 tzcnt dx, ax +tzcnt_nop1: nop + TEST_END TEST_BEGIN(TZCNTr32r32, 1) @@ -41,9 +51,19 @@ TEST_INPUTS( 0x00000080, 0x08000000) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz tzcnt_nop2 + + // TZCNT supported, proceed with test: mov eax, ARG1_32 tzcnt edx, eax +tzcnt_nop2: nop + TEST_END TEST_BEGIN_64(TZCNTr64r64, 1) @@ -57,7 +77,17 @@ TEST_INPUTS( 0x0000000000000080, 0x0800000000000000) + // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) + mov eax, 0x00000007 + mov ecx, 0x0 + cpuid + and ebx, 0x00000008 + jz tzcnt_nop3 + + // TZCNT supported, proceed with test: mov rax, ARG1_64 tzcnt rdx, rax +tzcnt_nop3: nop + TEST_END_64 From a1975c386b1f586325e0f63ec7414fbd14b4caaf Mon Sep 17 00:00:00 2001 From: kyle-elliott-tob Date: Tue, 25 Nov 2025 15:02:09 -0500 Subject: [PATCH 7/7] fixup: TZCNT is indicated by the ABM flag, not BMI1 --- tests/X86/BMI/TZCNT.S | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/tests/X86/BMI/TZCNT.S b/tests/X86/BMI/TZCNT.S index 1908780c2..78b54d50a 100644 --- a/tests/X86/BMI/TZCNT.S +++ b/tests/X86/BMI/TZCNT.S @@ -25,11 +25,10 @@ TEST_INPUTS( 0x0080, 0x0800) - // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) - mov eax, 0x00000007 - mov ecx, 0x0 + // Check CPU for ABM support (cpuid fn 0x80000001, bit 5 of ecx) + mov eax, 0x80000001 cpuid - and ebx, 0x00000008 + and ecx, 0x00000020 jz tzcnt_nop1 // TZCNT supported, proceed with test: @@ -51,11 +50,10 @@ TEST_INPUTS( 0x00000080, 0x08000000) - // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) - mov eax, 0x00000007 - mov ecx, 0x0 + // Check CPU for ABM support (cpuid fn 0x80000001, bit 5 of ecx) + mov eax, 0x80000001 cpuid - and ebx, 0x00000008 + and ecx, 0x00000020 jz tzcnt_nop2 // TZCNT supported, proceed with test: @@ -77,11 +75,10 @@ TEST_INPUTS( 0x0000000000000080, 0x0800000000000000) - // Check CPU for BMI1 support (cpuid fn 0x00000007, bit 3 of ebx) - mov eax, 0x00000007 - mov ecx, 0x0 + // Check CPU for ABM support (cpuid fn 0x80000001, bit 5 of ecx) + mov eax, 0x80000001 cpuid - and ebx, 0x00000008 + and ecx, 0x00000020 jz tzcnt_nop3 // TZCNT supported, proceed with test: