Skip to content

Commit 1240c8d

Browse files
mrecachinasCopilot
andcommitted
Refactor: split lib.rs into separate module files
Split the monolithic 2206-line lib.rs into focused module files: - hex.rs: hex character parsing utilities - classic.rs: classic popcount and hamming distance implementations - native.rs: native CPU popcount implementations - x86_simd.rs: x86_64 SSE4.1/AVX2/AVX-512 SIMD implementations - neon_simd.rs: ARM64 NEON SIMD implementations - python.rs: Python/PyO3 bindings - api.rs: public Rust API functions - tests.rs: unit tests lib.rs retains constants, global state, module declarations, dispatch functions, and re-exports the public API. No logic changes — only code movement and visibility adjustments. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent fd348dd commit 1240c8d

File tree

9 files changed

+2115
-2086
lines changed

9 files changed

+2115
-2086
lines changed

src/api.rs

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
use crate::{
2+
hamming_distance_bytes_dispatch, hamming_distance_string_dispatch,
3+
ALGO_CLASSIC, ALGO_NATIVE,
4+
CURRENT_ALGO,
5+
};
6+
#[cfg(target_arch = "x86_64")]
7+
use crate::{ALGO_AVX2, ALGO_AVX512, ALGO_SSE41};
8+
9+
use std::sync::atomic::Ordering;
10+
11+
/// Calculate the bitwise hamming distance between two equal-length hex strings.
12+
///
13+
/// Automatically uses the best SIMD implementation available (NEON/AVX2/SSE4.1).
14+
///
15+
/// # Errors
16+
/// Returns `Err` if the strings differ in length or contain non-hex characters.
17+
///
18+
/// # Example
19+
/// ```
20+
/// let dist = hexhamming::hex_hamming_distance("deadbeef", "00000000").unwrap();
21+
/// assert_eq!(dist, 24);
22+
/// ```
23+
pub fn hex_hamming_distance(a: &str, b: &str) -> Result<u64, &'static str> {
24+
if a.len() != b.len() {
25+
return Err("strings are NOT the same length");
26+
}
27+
if a.is_empty() {
28+
return Ok(0);
29+
}
30+
hamming_distance_string_dispatch(a.as_bytes(), b.as_bytes())
31+
}
32+
33+
/// Calculate the bitwise hamming distance between two equal-length byte slices.
34+
///
35+
/// Automatically uses the best SIMD implementation available (NEON/AVX2/SSE4.1).
36+
///
37+
/// # Errors
38+
/// Returns `Err` if the slices differ in length.
39+
///
40+
/// # Example
41+
/// ```
42+
/// let dist = hexhamming::bytes_hamming_distance(b"\xff", b"\x00").unwrap();
43+
/// assert_eq!(dist, 8);
44+
/// ```
45+
pub fn bytes_hamming_distance(a: &[u8], b: &[u8]) -> Result<u64, &'static str> {
46+
if a.len() != b.len() {
47+
return Err("bytes are NOT the same length");
48+
}
49+
if a.is_empty() {
50+
return Ok(0);
51+
}
52+
Ok(hamming_distance_bytes_dispatch(a, b, -1))
53+
}
54+
55+
/// Check if two byte arrays are within a specified Hamming distance.
56+
///
57+
/// Returns `Ok(true)` if distance <= max_dist, `Ok(false)` otherwise.
58+
pub fn bytes_within_dist(a: &[u8], b: &[u8], max_dist: i64) -> Result<bool, &'static str> {
59+
if a.is_empty() || b.is_empty() {
60+
return Err("array size must be >0");
61+
}
62+
if a.len() != b.len() {
63+
return Err("array sizes need to be the same");
64+
}
65+
Ok(hamming_distance_bytes_dispatch(a, b, max_dist) == 1)
66+
}
67+
68+
/// Find the first element in a byte array within a specified Hamming distance.
69+
///
70+
/// Returns the index of the first matching element, or `None`.
71+
pub fn bytes_array_first_within_dist(big_array: &[u8], small_array: &[u8], max_dist: i64) -> Result<Option<usize>, &'static str> {
72+
if small_array.is_empty() {
73+
return Err("elem_to_compare size must be >0");
74+
}
75+
if big_array.len() % small_array.len() != 0 {
76+
return Err("array_of_elems size must be multiplier of elem_to_compare");
77+
}
78+
let elem_size = small_array.len();
79+
let num_elements = big_array.len() / elem_size;
80+
for i in 0..num_elements {
81+
let chunk = &big_array[i * elem_size..(i + 1) * elem_size];
82+
if hamming_distance_bytes_dispatch(chunk, small_array, max_dist) == 1 {
83+
return Ok(Some(i));
84+
}
85+
}
86+
Ok(None)
87+
}
88+
89+
/// Find the element in a byte array with the smallest Hamming distance.
90+
///
91+
/// Returns `Some((distance, index))` of the best match, or `None` if none within max_dist.
92+
pub fn bytes_array_best_within_dist(big_array: &[u8], small_array: &[u8], max_dist: i64) -> Result<Option<(u64, usize)>, &'static str> {
93+
if small_array.is_empty() {
94+
return Err("elem_to_compare size must be >0");
95+
}
96+
if big_array.len() % small_array.len() != 0 {
97+
return Err("array_of_elems size must be multiplier of elem_to_compare");
98+
}
99+
let elem_size = small_array.len();
100+
let num_elements = big_array.len() / elem_size;
101+
let mut best_dist: i64 = -1;
102+
let mut best_index: Option<usize> = None;
103+
104+
for i in 0..num_elements {
105+
let chunk = &big_array[i * elem_size..(i + 1) * elem_size];
106+
let threshold = if best_dist >= 0 { best_dist - 1 } else { max_dist };
107+
if hamming_distance_bytes_dispatch(chunk, small_array, threshold) == 0 {
108+
continue;
109+
}
110+
let dist = hamming_distance_bytes_dispatch(chunk, small_array, -1) as i64;
111+
if best_dist < 0 || dist < best_dist {
112+
best_dist = dist;
113+
best_index = Some(i);
114+
}
115+
}
116+
Ok(best_index.map(|idx| (best_dist as u64, idx)))
117+
}
118+
119+
/// Find all elements in a byte array within a specified Hamming distance.
120+
///
121+
/// Returns a Vec of `(distance, index)` tuples.
122+
pub fn bytes_array_all_within_dist(big_array: &[u8], small_array: &[u8], max_dist: i64) -> Result<Vec<(u64, usize)>, &'static str> {
123+
if small_array.is_empty() {
124+
return Err("elem_to_compare size must be >0");
125+
}
126+
if big_array.len() % small_array.len() != 0 {
127+
return Err("array_of_elems size must be multiplier of elem_to_compare");
128+
}
129+
let elem_size = small_array.len();
130+
let num_elements = big_array.len() / elem_size;
131+
let mut results = Vec::new();
132+
133+
for i in 0..num_elements {
134+
let chunk = &big_array[i * elem_size..(i + 1) * elem_size];
135+
if hamming_distance_bytes_dispatch(chunk, small_array, max_dist) == 0 {
136+
continue;
137+
}
138+
let dist = hamming_distance_bytes_dispatch(chunk, small_array, -1);
139+
results.push((dist, i));
140+
}
141+
Ok(results)
142+
}
143+
144+
/// Experimental: hex hamming distance using pack-to-bytes approach.
145+
/// Parses 32 hex chars → 16 packed bytes, then uses vcntq_u8.
146+
#[cfg(target_arch = "aarch64")]
147+
pub fn hex_hamming_distance_pack(a: &str, b: &str) -> Result<u64, &'static str> {
148+
if a.len() != b.len() {
149+
return Err("strings are NOT the same length");
150+
}
151+
if a.is_empty() {
152+
return Ok(0);
153+
}
154+
unsafe { crate::neon_simd::hamming_distance_string_neon_pack(a.as_bytes(), b.as_bytes()) }
155+
}
156+
157+
/// Set the SIMD algorithm used for hamming distance calculations.
158+
///
159+
/// Valid algorithm names:
160+
/// - `"avx512"` / `"avx-512"` — AVX-512 BITALG (requires avx512bw + avx512bitalg)
161+
/// - `"avx2"` / `"avx"` / `"extra"` — AVX2
162+
/// - `"sse41"` / `"sse"` — SSE4.1
163+
/// - `"neon"` — ARM NEON (aarch64 only)
164+
/// - `"native"` / `"popcount"` — platform native
165+
/// - `"classic"` — scalar fallback
166+
///
167+
/// Returns `Ok(())` on success, `Err` if the CPU doesn't support the requested algorithm.
168+
pub fn set_algorithm(algo_name: &str) -> Result<(), &'static str> {
169+
match algo_name.to_lowercase().as_str() {
170+
"avx512" | "avx-512" => {
171+
#[cfg(target_arch = "x86_64")]
172+
{
173+
if is_x86_feature_detected!("avx512bw") && is_x86_feature_detected!("avx512bitalg") {
174+
CURRENT_ALGO.store(ALGO_AVX512, Ordering::Relaxed);
175+
return Ok(());
176+
}
177+
return Err("CPU doesn't support AVX-512 BITALG");
178+
}
179+
#[cfg(not(target_arch = "x86_64"))]
180+
Err("AVX-512 not available on this architecture")
181+
}
182+
"extra" | "avx" | "avx2" => {
183+
#[cfg(target_arch = "x86_64")]
184+
{
185+
if is_x86_feature_detected!("avx2") {
186+
CURRENT_ALGO.store(ALGO_AVX2, Ordering::Relaxed);
187+
return Ok(());
188+
}
189+
return Err("CPU doesn't support AVX2");
190+
}
191+
#[cfg(target_arch = "aarch64")]
192+
{
193+
CURRENT_ALGO.store(crate::ALGO_NEON, Ordering::Relaxed);
194+
Ok(())
195+
}
196+
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
197+
Err("not available on this architecture")
198+
}
199+
"sse41" | "sse" => {
200+
#[cfg(target_arch = "x86_64")]
201+
{
202+
if is_x86_feature_detected!("sse4.1") {
203+
CURRENT_ALGO.store(ALGO_SSE41, Ordering::Relaxed);
204+
return Ok(());
205+
}
206+
Err("CPU doesn't support SSE4.1")
207+
}
208+
#[cfg(not(target_arch = "x86_64"))]
209+
Err("SSE not available on this architecture")
210+
}
211+
"neon" => {
212+
#[cfg(target_arch = "aarch64")]
213+
{
214+
CURRENT_ALGO.store(crate::ALGO_NEON, Ordering::Relaxed);
215+
Ok(())
216+
}
217+
#[cfg(not(target_arch = "aarch64"))]
218+
Err("NEON not available on this architecture")
219+
}
220+
"native" | "popcount" => {
221+
CURRENT_ALGO.store(ALGO_NATIVE, Ordering::Relaxed);
222+
Ok(())
223+
}
224+
"classic" => {
225+
CURRENT_ALGO.store(ALGO_CLASSIC, Ordering::Relaxed);
226+
Ok(())
227+
}
228+
_ => Err("unknown algorithm"),
229+
}
230+
}

src/classic.rs

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
use crate::hex::hex_char_to_nibble;
2+
use crate::LOOKUP;
3+
4+
/// Classic popcount implementation using bit manipulation (Wilkes-Wheeler-Gill)
5+
#[inline(always)]
6+
pub(crate) fn popcnt64_classic(mut x: u64) -> u64 {
7+
const M1: u64 = 0x5555555555555555;
8+
const M2: u64 = 0x3333333333333333;
9+
const M4: u64 = 0x0F0F0F0F0F0F0F0F;
10+
const H01: u64 = 0x0101010101010101;
11+
x -= (x >> 1) & M1;
12+
x = (x & M2) + ((x >> 2) & M2);
13+
x = (x + (x >> 4)) & M4;
14+
(x.wrapping_mul(H01)) >> 56
15+
}
16+
17+
/// Calculate hamming distance between two hex strings using classic algorithm
18+
/// Optimized with branchless lookup and bounds check elimination
19+
#[inline(always)]
20+
pub(crate) fn hamming_distance_string_classic(a: &[u8], b: &[u8]) -> Result<u64, &'static str> {
21+
let len = a.len();
22+
let mut result: u64 = 0;
23+
let mut i = 0;
24+
25+
// Process 4 hex chars at a time to reduce loop overhead
26+
while i + 4 <= len {
27+
// SAFETY: i + 3 < len verified by loop condition
28+
unsafe {
29+
let val1_0 = hex_char_to_nibble(*a.get_unchecked(i));
30+
let val2_0 = hex_char_to_nibble(*b.get_unchecked(i));
31+
let val1_1 = hex_char_to_nibble(*a.get_unchecked(i + 1));
32+
let val2_1 = hex_char_to_nibble(*b.get_unchecked(i + 1));
33+
let val1_2 = hex_char_to_nibble(*a.get_unchecked(i + 2));
34+
let val2_2 = hex_char_to_nibble(*b.get_unchecked(i + 2));
35+
let val1_3 = hex_char_to_nibble(*a.get_unchecked(i + 3));
36+
let val2_3 = hex_char_to_nibble(*b.get_unchecked(i + 3));
37+
38+
// Check all 8 values for validity (0xFF indicates invalid)
39+
// Use bitwise OR to combine checks - any 0xFF will result in high bit set
40+
let invalid = (val1_0 | val2_0 | val1_1 | val2_1 | val1_2 | val2_2 | val1_3 | val2_3) & 0xF0;
41+
if invalid != 0 {
42+
return Err("hex string contains invalid char");
43+
}
44+
45+
result += *LOOKUP.get_unchecked((val1_0 ^ val2_0) as usize) as u64
46+
+ *LOOKUP.get_unchecked((val1_1 ^ val2_1) as usize) as u64
47+
+ *LOOKUP.get_unchecked((val1_2 ^ val2_2) as usize) as u64
48+
+ *LOOKUP.get_unchecked((val1_3 ^ val2_3) as usize) as u64;
49+
}
50+
i += 4;
51+
}
52+
53+
// Handle remaining characters
54+
while i < len {
55+
// SAFETY: i < len verified by loop condition
56+
unsafe {
57+
let val1 = hex_char_to_nibble(*a.get_unchecked(i));
58+
let val2 = hex_char_to_nibble(*b.get_unchecked(i));
59+
if (val1 | val2) & 0xF0 != 0 {
60+
return Err("hex string contains invalid char");
61+
}
62+
result += *LOOKUP.get_unchecked((val1 ^ val2) as usize) as u64;
63+
}
64+
i += 1;
65+
}
66+
67+
Ok(result)
68+
}
69+
70+
/// Calculate hamming distance between two byte arrays using classic algorithm
71+
/// Optimized with loop unrolling and bounds check elimination
72+
#[inline(always)]
73+
pub(crate) fn hamming_distance_bytes_classic(a: &[u8], b: &[u8], max_dist: i64) -> u64 {
74+
let length = a.len();
75+
76+
if max_dist < 0 {
77+
// Full distance calculation - heavily optimized
78+
let mut difference: u64 = 0;
79+
let mut i = 0;
80+
81+
// Process 32 bytes at a time (4 x 8-byte chunks)
82+
while i + 32 <= length {
83+
// SAFETY: i + 31 < length verified by loop condition
84+
unsafe {
85+
let a0 = u64::from_ne_bytes(*(a.as_ptr().add(i) as *const [u8; 8]));
86+
let b0 = u64::from_ne_bytes(*(b.as_ptr().add(i) as *const [u8; 8]));
87+
let a1 = u64::from_ne_bytes(*(a.as_ptr().add(i + 8) as *const [u8; 8]));
88+
let b1 = u64::from_ne_bytes(*(b.as_ptr().add(i + 8) as *const [u8; 8]));
89+
let a2 = u64::from_ne_bytes(*(a.as_ptr().add(i + 16) as *const [u8; 8]));
90+
let b2 = u64::from_ne_bytes(*(b.as_ptr().add(i + 16) as *const [u8; 8]));
91+
let a3 = u64::from_ne_bytes(*(a.as_ptr().add(i + 24) as *const [u8; 8]));
92+
let b3 = u64::from_ne_bytes(*(b.as_ptr().add(i + 24) as *const [u8; 8]));
93+
94+
difference += popcnt64_classic(a0 ^ b0)
95+
+ popcnt64_classic(a1 ^ b1)
96+
+ popcnt64_classic(a2 ^ b2)
97+
+ popcnt64_classic(a3 ^ b3);
98+
}
99+
i += 32;
100+
}
101+
102+
// Process remaining 8-byte chunks
103+
while i + 8 <= length {
104+
unsafe {
105+
let a_chunk = u64::from_ne_bytes(*(a.as_ptr().add(i) as *const [u8; 8]));
106+
let b_chunk = u64::from_ne_bytes(*(b.as_ptr().add(i) as *const [u8; 8]));
107+
difference += popcnt64_classic(a_chunk ^ b_chunk);
108+
}
109+
i += 8;
110+
}
111+
112+
// Process remaining bytes
113+
while i < length {
114+
unsafe {
115+
difference += popcnt64_classic((*a.get_unchecked(i) ^ *b.get_unchecked(i)) as u64);
116+
}
117+
i += 1;
118+
}
119+
difference
120+
} else {
121+
// Early termination if exceeds max_dist
122+
let max_dist_u64 = max_dist as u64;
123+
let mut difference: u64 = 0;
124+
let mut i = 0;
125+
126+
while i + 8 <= length {
127+
unsafe {
128+
let a_chunk = u64::from_ne_bytes(*(a.as_ptr().add(i) as *const [u8; 8]));
129+
let b_chunk = u64::from_ne_bytes(*(b.as_ptr().add(i) as *const [u8; 8]));
130+
difference += popcnt64_classic(a_chunk ^ b_chunk);
131+
}
132+
if difference > max_dist_u64 {
133+
return 0;
134+
}
135+
i += 8;
136+
}
137+
while i < length {
138+
unsafe {
139+
difference += popcnt64_classic((*a.get_unchecked(i) ^ *b.get_unchecked(i)) as u64);
140+
}
141+
if difference > max_dist_u64 {
142+
return 0;
143+
}
144+
i += 1;
145+
}
146+
1
147+
}
148+
}

src/hex.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
use crate::HEX_LOOKUP;
2+
3+
/// Branchless hex character to nibble conversion using lookup table
4+
/// Returns 0xFF for invalid characters
5+
#[inline(always)]
6+
pub(crate) fn hex_char_to_nibble(c: u8) -> u8 {
7+
// SAFETY: c is u8, so always in bounds of 256-element table
8+
unsafe { *HEX_LOOKUP.get_unchecked(c as usize) }
9+
}
10+
11+
/// Convert a hex character to its numeric value (0-15)
12+
/// Returns None if the character is not a valid hex digit
13+
#[inline(always)]
14+
#[allow(dead_code)]
15+
pub(crate) fn hex_char_to_val(c: u8) -> Option<u8> {
16+
let val = hex_char_to_nibble(c);
17+
if val == 0xFF { None } else { Some(val) }
18+
}

0 commit comments

Comments
 (0)