Skip to content

Commit 6553e2a

Browse files
committed
Use conditional compilation for XOR3 ops
Provides measurably better performance.
1 parent eb307ee commit 6553e2a

File tree

2 files changed

+31
-23
lines changed

2 files changed

+31
-23
lines changed

src/arch/aarch64.rs

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -258,20 +258,27 @@ impl ArchOps for AArch64Ops {
258258
}
259259

260260
#[inline]
261-
#[cfg_attr(target_feature = "sha3", target_feature(enable = "neon,sha3"))]
262-
#[cfg_attr(not(target_feature = "sha3"), target_feature(enable = "neon"))]
261+
#[cfg(target_feature = "sha3")]
262+
#[target_feature(enable = "neon,sha3")]
263263
unsafe fn xor3_vectors(
264264
&self,
265265
a: Self::Vector,
266266
b: Self::Vector,
267267
c: Self::Vector,
268268
) -> Self::Vector {
269-
if is_aarch64_feature_detected!("sha3") {
270-
// Use native 3-way XOR instruction when available
271-
return veor3q_u8(a, b, c);
272-
}
273-
274-
// Fall back to two XOR operations
269+
veor3q_u8(a, b, c)
270+
}
271+
272+
#[inline]
273+
#[cfg(not(target_feature = "sha3"))]
274+
#[target_feature(enable = "neon")]
275+
unsafe fn xor3_vectors(
276+
&self,
277+
a: Self::Vector,
278+
b: Self::Vector,
279+
c: Self::Vector,
280+
) -> Self::Vector {
281+
// Fallback for when SHA3 is not available
275282
veorq_u8(veorq_u8(a, b), c)
276283
}
277284
}

src/arch/x86.rs

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -226,29 +226,30 @@ impl ArchOps for X86Ops {
226226
unsafe fn carryless_mul_11(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector {
227227
_mm_clmulepi64_si128(a, b, 0x11)
228228
}
229-
229+
230230
#[inline]
231-
#[cfg_attr(
232-
any(feature = "vpclmulqdq", feature = "avx512"),
233-
target_feature(enable = "avx512f,avx512vl")
234-
)]
235-
#[cfg_attr(
236-
all(not(feature = "vpclmulqdq"), not(feature = "avx512")),
237-
target_feature(enable = "sse2,sse4.1")
238-
)]
231+
#[cfg(any(feature = "vpclmulqdq", feature = "avx512"))]
232+
#[target_feature(enable = "avx512f,avx512vl")]
239233
unsafe fn xor3_vectors(
240234
&self,
241235
a: Self::Vector,
242236
b: Self::Vector,
243237
c: Self::Vector,
244238
) -> Self::Vector {
245-
#[cfg(any(feature = "vpclmulqdq", feature = "avx512"))]
246-
if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512vl") {
247-
return _mm_ternarylogic_epi64(
248-
a, b, c, 0x96, // XOR3
249-
);
250-
}
239+
_mm_ternarylogic_epi64(
240+
a, b, c, 0x96, // XOR3
241+
)
242+
}
251243

244+
#[inline]
245+
#[cfg(not(any(feature = "vpclmulqdq", feature = "avx512")))]
246+
#[target_feature(enable = "sse2,sse4.1")]
247+
unsafe fn xor3_vectors(
248+
&self,
249+
a: Self::Vector,
250+
b: Self::Vector,
251+
c: Self::Vector,
252+
) -> Self::Vector {
252253
// x86 doesn't have native XOR3 in SSE, use two XORs
253254
_mm_xor_si128(_mm_xor_si128(a, b), c)
254255
}

0 commit comments

Comments
 (0)