Skip to content

Commit 41b7550

Browse files
committed
fix: improve parallelization
1 parent f280fb7 commit 41b7550

1 file changed

Lines changed: 24 additions & 8 deletions

File tree

fuel-vm/src/interpreter/memory.rs

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,7 +1115,12 @@ fn slices_equal_avx2(a: &[u8], b: &[u8]) -> bool {
11151115
}
11161116

11171117
unsafe {
1118-
let mut aggregate_mask = -1i32;
1118+
let mut aggregate_mask_a = -1i32;
1119+
let mut aggregate_mask_b = -1i32;
1120+
let mut aggregate_mask_c = -1i32;
1121+
let mut aggregate_mask_d = -1i32;
1122+
let mut aggregate_mask_a_b = -1i32;
1123+
let mut aggregate_mask_c_d = -1i32;
11191124

11201125
while i + CHUNK <= len {
11211126
let simd_a1 = _mm256_loadu_si256(a.as_ptr().add(i) as *const _);
@@ -1151,9 +1156,15 @@ fn slices_equal_avx2(a: &[u8], b: &[u8]) -> bool {
11511156
let cmp7 = _mm256_movemask_epi8(_mm256_cmpeq_epi8(simd_a7, simd_b7));
11521157
let cmp8 = _mm256_movemask_epi8(_mm256_cmpeq_epi8(simd_a8, simd_b8));
11531158

1154-
aggregate_mask &= cmp1 & cmp2 & cmp3 & cmp4 & cmp5 & cmp6 & cmp7 & cmp8;
1159+
aggregate_mask_a &= cmp1 & cmp2;
1160+
aggregate_mask_b &= cmp3 & cmp4;
1161+
aggregate_mask_c &= cmp5 & cmp6;
1162+
aggregate_mask_d &= cmp7 & cmp8;
11551163

1156-
if aggregate_mask != -1i32 {
1164+
aggregate_mask_a_b &= aggregate_mask_a & aggregate_mask_b;
1165+
aggregate_mask_c_d &= aggregate_mask_c & aggregate_mask_d;
1166+
1167+
if aggregate_mask_a_b & aggregate_mask_c_d != -1i32 {
11571168
return false;
11581169
}
11591170

@@ -1209,7 +1220,6 @@ fn slices_equal_avx512(a: &[u8], b: &[u8]) -> bool {
12091220
let simd_a8 = _mm512_loadu_si512(a.as_ptr().add(i + 448) as *const _);
12101221
let simd_b8 = _mm512_loadu_si512(b.as_ptr().add(i + 448) as *const _);
12111222

1212-
// Compare each pair of registers
12131223
let cmp1 = _mm512_cmpeq_epi8_mask(simd_a1, simd_b1);
12141224
let cmp2 = _mm512_cmpeq_epi8_mask(simd_a2, simd_b2);
12151225
let cmp3 = _mm512_cmpeq_epi8_mask(simd_a3, simd_b3);
@@ -1219,11 +1229,17 @@ fn slices_equal_avx512(a: &[u8], b: &[u8]) -> bool {
12191229
let cmp7 = _mm512_cmpeq_epi8_mask(simd_a7, simd_b7);
12201230
let cmp8 = _mm512_cmpeq_epi8_mask(simd_a8, simd_b8);
12211231

1222-
// Combine all comparison masks
1223-
let combined_cmp = cmp1 & cmp2 & cmp3 & cmp4 & cmp5 & cmp6 & cmp7 & cmp8;
1232+
let cmp1_2 = cmp1 & cmp2;
1233+
let cmp3_4 = cmp3 & cmp4;
1234+
let cmp5_6 = cmp5 & cmp6;
1235+
let cmp7_8 = cmp7 & cmp8;
1236+
1237+
let cmp1_4 = cmp1_2 & cmp3_4;
1238+
let cmp5_8 = cmp5_6 & cmp7_8;
1239+
1240+
let full_cmp = cmp1_4 & cmp5_8;
12241241

1225-
// Check if all bytes are equal (mask should have all bits set)
1226-
if combined_cmp != u64::MAX {
1242+
if full_cmp != u64::MAX {
12271243
return false;
12281244
}
12291245

0 commit comments

Comments
 (0)