|
63 | 63 | let head = self.head.load(Ordering::Relaxed); |
64 | 64 | let next_head = I::wrap(head + 1, self.storage.capacity()); |
65 | 65 |
|
| 66 | + #[cfg(feature = "prefetch")] |
| 67 | + crate::raw::prefetch_slot_write(&self.storage, next_head); |
| 68 | + |
66 | 69 | if next_head == self.tail_cached.get() { |
67 | 70 | let tail = self.tail.load(Ordering::Acquire); |
68 | 71 | self.tail_cached.set(tail); |
|
83 | 86 | pub(crate) fn pop(&self, out: &mut T) -> bool { |
84 | 87 | let tail = self.tail.load(Ordering::Relaxed); |
85 | 88 |
|
| 89 | + #[cfg(feature = "prefetch")] |
| 90 | + crate::raw::prefetch_slot_read(&self.storage, tail); |
| 91 | + |
86 | 92 | if tail == self.head_cached.get() { |
87 | 93 | let head = self.head.load(Ordering::Acquire); |
88 | 94 | self.head_cached.set(head); |
@@ -133,13 +139,29 @@ where |
133 | 139 | } |
134 | 140 |
|
135 | 141 | let n = src.len().min(free); |
136 | | - let mut idx = head; |
137 | | - for item in &src[..n] { |
138 | | - crate::copy_ring::raw::write_slot_copy::<T, S, CP>(&self.storage, idx, item); |
139 | | - idx = I::wrap(idx + 1, cap); |
| 142 | + |
| 143 | + // Two-chunk contiguous copy bypassing per-element CopyPolicy dispatch. |
| 144 | + // `memcpy` auto-vectorizes for bulk transfers; per-element SIMD |
| 145 | + // dispatch adds call overhead that dominates for large batches. |
| 146 | + // |
| 147 | + // `first_chunk`: slots from head to end of backing array (no wrap). |
| 148 | + // `second_chunk`: remaining slots written from index 0 (wrap). |
| 149 | + let first_chunk = n.min(cap - head); |
| 150 | + let second_chunk = n - first_chunk; |
| 151 | + |
| 152 | + // First chunk: slots head..head+first_chunk (no wrap). |
| 153 | + // `head < cap` (ring invariant) and `first_chunk <= cap - head`, |
| 154 | + // so `head + first_chunk <= cap`. Producer owns this range. |
| 155 | + crate::copy_ring::raw::write_batch_copy::<T, S>(&self.storage, head, &src[..first_chunk]); |
| 156 | + |
| 157 | + if second_chunk > 0 { |
| 158 | + // Second chunk: wraps to slots 0..second_chunk. |
| 159 | + // `second_chunk <= n - first_chunk <= free < cap`, |
| 160 | + // so `second_chunk <= cap`. Producer owns this range. |
| 161 | + crate::copy_ring::raw::write_batch_copy::<T, S>(&self.storage, 0, &src[first_chunk..n]); |
140 | 162 | } |
141 | 163 |
|
142 | | - self.head.store(idx, Ordering::Release); |
| 164 | + self.head.store(I::wrap(head + n, cap), Ordering::Release); |
143 | 165 | n |
144 | 166 | } |
145 | 167 |
|
@@ -173,13 +195,37 @@ where |
173 | 195 | } |
174 | 196 |
|
175 | 197 | let n = dst.len().min(avail); |
176 | | - let mut idx = tail; |
177 | | - for out in &mut dst[..n] { |
178 | | - crate::copy_ring::raw::read_slot_copy::<T, S, CP>(&self.storage, idx, out); |
179 | | - idx = I::wrap(idx + 1, cap); |
| 198 | + |
| 199 | + // Two-chunk contiguous copy symmetric to push_batch. |
| 200 | + // `memcpy` auto-vectorizes for bulk transfers; per-element dispatch |
| 201 | + // adds call overhead that dominates for large batches. |
| 202 | + // |
| 203 | + // `first_chunk`: slots from tail to end of backing array (no wrap). |
| 204 | + // `second_chunk`: remaining slots read from index 0 (wrap). |
| 205 | + let first_chunk = n.min(cap - tail); |
| 206 | + let second_chunk = n - first_chunk; |
| 207 | + |
| 208 | + // First chunk: slots tail..tail+first_chunk (no wrap). |
| 209 | + // `tail < cap` (ring invariant) and `first_chunk <= cap - tail`, |
| 210 | + // so `tail + first_chunk <= cap`. Consumer owns this range. |
| 211 | + crate::copy_ring::raw::read_batch_copy::<T, S>( |
| 212 | + &self.storage, |
| 213 | + tail, |
| 214 | + &mut dst[..first_chunk], |
| 215 | + ); |
| 216 | + |
| 217 | + if second_chunk > 0 { |
| 218 | + // Second chunk: wraps to slots 0..second_chunk. |
| 219 | + // `second_chunk <= n - first_chunk <= avail < cap`, |
| 220 | + // so `second_chunk <= cap`. Consumer owns this range. |
| 221 | + crate::copy_ring::raw::read_batch_copy::<T, S>( |
| 222 | + &self.storage, |
| 223 | + 0, |
| 224 | + &mut dst[first_chunk..n], |
| 225 | + ); |
180 | 226 | } |
181 | 227 |
|
182 | | - self.tail.store(idx, Ordering::Release); |
| 228 | + self.tail.store(I::wrap(tail + n, cap), Ordering::Release); |
183 | 229 | n |
184 | 230 | } |
185 | 231 |
|
@@ -212,6 +258,7 @@ where |
212 | 258 | } |
213 | 259 |
|
214 | 260 | #[cfg(test)] |
| 261 | +#[expect(clippy::cast_sign_loss, reason = "test-only usize→u64 conversions")] |
215 | 262 | mod tests { |
216 | 263 | extern crate std; |
217 | 264 | use std::vec; |
@@ -361,6 +408,46 @@ mod tests { |
361 | 408 | assert_eq!(out, vec![100, 101, 102, 103, 104]); |
362 | 409 | } |
363 | 410 |
|
| 411 | + #[test] |
| 412 | + fn pop_batch_wraparound_contiguous() { |
| 413 | + let engine = new_engine(); // capacity=8, usable=7 |
| 414 | + // Advance tail to near end |
| 415 | + let fill: Vec<u64> = (0..6).collect(); |
| 416 | + engine.push_batch(&fill); |
| 417 | + let mut drain = vec![0u64; 6]; |
| 418 | + engine.pop_batch(&mut drain); |
| 419 | + |
| 420 | + // Push 5 (wraps around), then batch-pop all 5 |
| 421 | + let wrap_src: Vec<u64> = (300..305).collect(); |
| 422 | + engine.push_batch(&wrap_src); |
| 423 | + |
| 424 | + let mut out = vec![0u64; 5]; |
| 425 | + let popped = engine.pop_batch(&mut out); |
| 426 | + assert_eq!(popped, 5); |
| 427 | + assert_eq!(out, vec![300, 301, 302, 303, 304]); |
| 428 | + } |
| 429 | + |
| 430 | + #[test] |
| 431 | + fn push_batch_wraparound_contiguous() { |
| 432 | + // Advance head to near end of buffer, then batch-push across wrap |
| 433 | + let engine = new_engine(); // capacity=8, usable=7 |
| 434 | + // Fill 6, drain 6 — head and tail now at index 6 |
| 435 | + let fill: Vec<u64> = (0..6).collect(); |
| 436 | + engine.push_batch(&fill); |
| 437 | + let mut drain = vec![0u64; 6]; |
| 438 | + engine.pop_batch(&mut drain); |
| 439 | + |
| 440 | + // Now push 5 elements starting at index 6: wraps at index 8 -> 0 |
| 441 | + let wrap_src: Vec<u64> = (200..205).collect(); |
| 442 | + let pushed = engine.push_batch(&wrap_src); |
| 443 | + assert_eq!(pushed, 5); |
| 444 | + |
| 445 | + let mut out = vec![0u64; 5]; |
| 446 | + let popped = engine.pop_batch(&mut out); |
| 447 | + assert_eq!(popped, 5); |
| 448 | + assert_eq!(out, vec![200, 201, 202, 203, 204]); |
| 449 | + } |
| 450 | + |
364 | 451 | #[test] |
365 | 452 | fn batch_fifo_ordering() { |
366 | 453 | let engine = new_engine(); |
@@ -391,4 +478,89 @@ mod tests { |
391 | 478 | engine.pop(&mut out); |
392 | 479 | assert_eq!(engine.len(), 0); |
393 | 480 | } |
| 481 | + |
| 482 | + #[test] |
| 483 | + fn push_pop_batch_differential() { |
| 484 | + // Verify contiguous batch produces same results as sequential push/pop |
| 485 | + // for various batch sizes and fill levels. |
| 486 | + for fill_first in 0..7 { |
| 487 | + for batch_size in 1..=7 { |
| 488 | + let engine = new_engine(); // capacity=8, usable=7 |
| 489 | + |
| 490 | + // Advance head/tail by fill_first positions |
| 491 | + for i in 0..fill_first { |
| 492 | + assert!(engine.push(&(i as u64))); |
| 493 | + } |
| 494 | + let mut drain = vec![0u64; fill_first]; |
| 495 | + engine.pop_batch(&mut drain); |
| 496 | + |
| 497 | + // Batch push |
| 498 | + let src: Vec<u64> = (100_u64..100 + batch_size as u64).collect(); |
| 499 | + let pushed = engine.push_batch(&src); |
| 500 | + |
| 501 | + // Batch pop |
| 502 | + let mut dst = vec![0u64; pushed]; |
| 503 | + let popped = engine.pop_batch(&mut dst); |
| 504 | + |
| 505 | + assert_eq!(popped, pushed, "fill={fill_first} batch={batch_size}"); |
| 506 | + assert_eq!( |
| 507 | + dst, |
| 508 | + src[..pushed].to_vec(), |
| 509 | + "FIFO violated: fill={fill_first} batch={batch_size}" |
| 510 | + ); |
| 511 | + } |
| 512 | + } |
| 513 | + } |
| 514 | +} |
| 515 | + |
| 516 | +// proptest uses `getcwd` for failure persistence, which Miri's isolation blocks. |
| 517 | +#[cfg(all(test, not(miri)))] |
| 518 | +mod proptest_tests { |
| 519 | + use super::*; |
| 520 | + use crate::storage::InlineStorage; |
| 521 | + use mantis_core::{ImmediatePush, NoInstr, Pow2Masked}; |
| 522 | + use proptest::prelude::*; |
| 523 | + |
| 524 | + extern crate std; |
| 525 | + use std::vec; |
| 526 | + use std::vec::Vec; |
| 527 | + |
| 528 | + type TestEngine = CopyRingEngine< |
| 529 | + u64, |
| 530 | + InlineStorage<u64, 8>, |
| 531 | + Pow2Masked, |
| 532 | + ImmediatePush, |
| 533 | + NoInstr, |
| 534 | + mantis_platform::DefaultCopyPolicy, |
| 535 | + >; |
| 536 | + |
| 537 | + fn new_engine() -> TestEngine { |
| 538 | + CopyRingEngine::new(InlineStorage::new(), NoInstr) |
| 539 | + } |
| 540 | + |
| 541 | + proptest! { |
| 542 | + #[test] |
| 543 | + fn batch_fifo_preserved( |
| 544 | + fill_level in 0usize..7, |
| 545 | + batch_size in 1usize..8, |
| 546 | + ) { |
| 547 | + let engine = new_engine(); |
| 548 | + |
| 549 | + // Advance to fill_level |
| 550 | + for i in 0..fill_level { |
| 551 | + engine.push(&(i as u64)); |
| 552 | + } |
| 553 | + let mut drain = vec![0u64; fill_level]; |
| 554 | + engine.pop_batch(&mut drain); |
| 555 | + |
| 556 | + let src: Vec<u64> = (0..batch_size as u64).collect(); |
| 557 | + let pushed = engine.push_batch(&src); |
| 558 | + |
| 559 | + let mut dst = vec![0u64; pushed]; |
| 560 | + let popped = engine.pop_batch(&mut dst); |
| 561 | + |
| 562 | + prop_assert_eq!(popped, pushed); |
| 563 | + prop_assert_eq!(dst, src[..pushed].to_vec()); |
| 564 | + } |
| 565 | + } |
394 | 566 | } |
0 commit comments