Skip to content

Commit cda51bc

Browse files
use oncecell to store computed start_pos instead of storing it on shorter-lived CursorData (#10)
1 parent c8b1b41 commit cda51bc

File tree

4 files changed

+77
-48
lines changed

4 files changed

+77
-48
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ rustdoc-args = ["--cfg=doc_cfg"]
1717
[dependencies]
1818
bytes = "1.3.0"
1919
futures-io-03 = { package = "futures-io", version = "0.3.25", optional = true }
20+
once_cell = "1.4.0"
2021
tokio = { version = "1.0.0", features = ["io-std"], optional = true }
2122

2223
[dev-dependencies]

src/cursor/mod.rs

Lines changed: 36 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ impl<T: AsRef<BufList>> Cursor<T> {
5353
/// let cursor = Cursor::new(BufList::new());
5454
/// ```
5555
pub fn new(inner: T) -> Cursor<T> {
56-
let data = CursorData::new(inner.as_ref());
56+
let data = CursorData::new();
5757
Cursor { inner, data }
5858
}
5959

@@ -170,15 +170,15 @@ impl<T: AsRef<BufList>> Cursor<T> {
170170
/// assert_eq!(cursor.position(), 4);
171171
/// ```
172172
pub fn set_position(&mut self, pos: u64) {
173-
self.data.set_pos(pos);
173+
self.data.set_pos(self.inner.as_ref(), pos);
174174
}
175175

176176
// ---
177177
// Helper methods
178178
// ---
179179
#[cfg(test)]
180180
fn assert_invariants(&self) -> anyhow::Result<()> {
181-
self.data.assert_invariants()
181+
self.data.assert_invariants(self.inner.as_ref())
182182
}
183183
}
184184

@@ -203,7 +203,7 @@ where
203203

204204
impl<T: AsRef<BufList>> io::Seek for Cursor<T> {
205205
fn seek(&mut self, style: SeekFrom) -> io::Result<u64> {
206-
self.data.seek_impl(style)
206+
self.data.seek_impl(self.inner.as_ref(), style)
207207
}
208208

209209
#[cfg(seek_convenience)]
@@ -234,16 +234,12 @@ impl<T: AsRef<BufList>> io::BufRead for Cursor<T> {
234234
}
235235

236236
fn consume(&mut self, amt: usize) {
237-
self.data.consume_impl(amt);
237+
self.data.consume_impl(self.inner.as_ref(), amt);
238238
}
239239
}
240240

241241
#[derive(Clone, Debug)]
242242
struct CursorData {
243-
/// An index of chunks and their start positions. There's an additional index at the end, which
244-
/// is the length of the list (list.num_bytes()).
245-
start_pos: Box<[u64]>,
246-
247243
/// The chunk number the cursor is pointing to. Kept in sync with pos.
248244
///
249245
/// This is within the range [0, self.start_pos.len()). It is self.start_pos.len() - 1 iff pos
@@ -255,36 +251,23 @@ struct CursorData {
255251
}
256252

257253
impl CursorData {
258-
fn new(inner: &BufList) -> Self {
259-
let mut start_pos = Vec::with_capacity(inner.num_chunks() + 1);
260-
let mut next = 0u64;
261-
for chunk in inner.iter() {
262-
start_pos.push(next);
263-
next += chunk.len() as u64;
264-
}
265-
// Add the length of the chunk at the end.
266-
start_pos.push(next);
267-
268-
Self {
269-
start_pos: start_pos.into_boxed_slice(),
270-
chunk: 0,
271-
pos: 0,
272-
}
254+
fn new() -> Self {
255+
Self { chunk: 0, pos: 0 }
273256
}
274257

275258
#[cfg(test)]
276-
fn assert_invariants(&self) -> anyhow::Result<()> {
259+
fn assert_invariants(&self, list: &BufList) -> anyhow::Result<()> {
277260
use anyhow::ensure;
278261

279262
ensure!(
280-
self.pos >= self.start_pos[self.chunk],
263+
self.pos >= list.get_start_pos()[self.chunk],
281264
"invariant failed: current position {} >= start position {} (chunk = {})",
282265
self.pos,
283-
self.start_pos[self.chunk],
266+
list.get_start_pos()[self.chunk],
284267
self.chunk
285268
);
286269

287-
let next_pos = self.start_pos.get(self.chunk + 1).copied().into();
270+
let next_pos = list.get_start_pos().get(self.chunk + 1).copied().into();
288271
ensure!(
289272
Offset::Value(self.pos) < next_pos,
290273
"invariant failed: next start position {:?} > current position {} (chunk = {})",
@@ -296,13 +279,13 @@ impl CursorData {
296279
Ok(())
297280
}
298281

299-
fn seek_impl(&mut self, style: SeekFrom) -> io::Result<u64> {
282+
fn seek_impl(&mut self, list: &BufList, style: SeekFrom) -> io::Result<u64> {
300283
let (base_pos, offset) = match style {
301284
SeekFrom::Start(n) => {
302-
self.set_pos(n);
285+
self.set_pos(list, n);
303286
return Ok(n);
304287
}
305-
SeekFrom::End(n) => (self.num_bytes(), n),
288+
SeekFrom::End(n) => (self.num_bytes(list), n),
306289
SeekFrom::Current(n) => (self.pos, n),
307290
};
308291
// Can't use checked_add_signed since it was only stabilized in Rust 1.66. This is adapted
@@ -315,7 +298,7 @@ impl CursorData {
315298
};
316299
match new_pos {
317300
Some(n) => {
318-
self.set_pos(n);
301+
self.set_pos(list, n);
319302
Ok(self.pos)
320303
}
321304
None => Err(io::Error::new(
@@ -370,7 +353,7 @@ impl CursorData {
370353

371354
fn read_exact_impl(&mut self, list: &BufList, buf: &mut [u8]) -> io::Result<()> {
372355
// This is the same as read_impl as long as there's enough space.
373-
let remaining = self.num_bytes().saturating_sub(self.pos);
356+
let remaining = self.num_bytes(list).saturating_sub(self.pos);
374357
let buf_len = buf.len();
375358
if remaining < buf_len as u64 {
376359
return Err(io::Error::new(
@@ -383,7 +366,7 @@ impl CursorData {
383366
Ok(())
384367
}
385368

386-
fn fill_buf_impl<'a>(&self, list: &'a BufList) -> &'a [u8] {
369+
fn fill_buf_impl<'a>(&'a self, list: &'a BufList) -> &[u8] {
387370
const EMPTY_SLICE: &[u8] = &[];
388371
match self.get_chunk_and_pos(list) {
389372
Some((chunk, chunk_pos)) => &chunk.as_ref()[chunk_pos..],
@@ -392,14 +375,15 @@ impl CursorData {
392375
}
393376
}
394377

395-
fn consume_impl(&mut self, amt: usize) {
396-
self.set_pos(self.pos + amt as u64);
378+
fn consume_impl(&mut self, list: &BufList, amt: usize) {
379+
self.set_pos(list, self.pos + amt as u64);
397380
}
398381

399-
fn set_pos(&mut self, new_pos: u64) {
382+
fn set_pos(&mut self, list: &BufList, new_pos: u64) {
400383
match new_pos.cmp(&self.pos) {
401384
Ordering::Greater => {
402-
let next_start = self.start_pos.get(self.chunk + 1).copied().into();
385+
let start_pos = list.get_start_pos();
386+
let next_start = start_pos.get(self.chunk + 1).copied().into();
403387
if Offset::Value(new_pos) < next_start {
404388
// Within the same chunk.
405389
} else {
@@ -408,7 +392,7 @@ impl CursorData {
408392
// n).
409393
//
410394
// Do a binary search for this element.
411-
match self.start_pos[self.chunk + 1..].binary_search(&new_pos) {
395+
match start_pos[self.chunk + 1..].binary_search(&new_pos) {
412396
// We're starting the search from self.chunk + 1, which means that the value
413397
// returned from binary_search is 1 less than the actual delta.
414398
Ok(delta_minus_one) => {
@@ -431,10 +415,11 @@ impl CursorData {
431415
}
432416
Ordering::Equal => {}
433417
Ordering::Less => {
434-
if self.start_pos.get(self.chunk).copied() <= Some(new_pos) {
418+
let start_pos = list.get_start_pos();
419+
if start_pos.get(self.chunk).copied() <= Some(new_pos) {
435420
// Within the same chunk.
436421
} else {
437-
match self.start_pos[..self.chunk].binary_search(&new_pos) {
422+
match start_pos[..self.chunk].binary_search(&new_pos) {
438423
Ok(chunk) => {
439424
// Exactly at the start point of a chunk.
440425
self.chunk = chunk;
@@ -456,17 +441,20 @@ impl CursorData {
456441
}
457442

458443
#[inline]
459-
fn get_chunk_and_pos<'a>(&self, list: &'a BufList) -> Option<(&'a Bytes, usize)> {
444+
fn get_chunk_and_pos<'b>(&self, list: &'b BufList) -> Option<(&'b Bytes, usize)> {
460445
match list.get_chunk(self.chunk) {
461446
Some(chunk) => {
462447
// This guarantees that pos is not past the end of the list.
463448
debug_assert!(
464-
self.pos < self.num_bytes(),
449+
self.pos < self.num_bytes(list),
465450
"self.pos ({}) is less than num_bytes ({})",
466451
self.pos,
467-
self.num_bytes()
452+
self.num_bytes(list)
468453
);
469-
Some((chunk, (self.pos - self.start_pos[self.chunk]) as usize))
454+
Some((
455+
chunk,
456+
(self.pos - list.get_start_pos()[self.chunk]) as usize,
457+
))
470458
}
471459
None => {
472460
// pos is past the end of the list.
@@ -475,9 +463,9 @@ impl CursorData {
475463
}
476464
}
477465

478-
fn num_bytes(&self) -> u64 {
479-
*self
480-
.start_pos
466+
fn num_bytes(&self, list: &BufList) -> u64 {
467+
*list
468+
.get_start_pos()
481469
.last()
482470
.expect("start_pos always has at least one element")
483471
}

src/imp.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// SPDX-License-Identifier: Apache-2.0
44

55
use bytes::{Buf, BufMut, Bytes, BytesMut};
6+
use once_cell::sync::OnceCell;
67
use std::{
78
collections::VecDeque,
89
io::IoSlice,
@@ -16,6 +17,10 @@ use std::{
1617
pub struct BufList {
1718
// Invariant: none of the bufs in this queue are zero-length.
1819
bufs: VecDeque<Bytes>,
20+
21+
/// An index of chunks and their start positions. There's an additional index at the end, which
22+
/// is the length of the list (list.num_bytes()).
23+
start_pos: OnceCell<Box<[u64]>>,
1924
}
2025

2126
impl BufList {
@@ -25,11 +30,27 @@ impl BufList {
2530
Self::default()
2631
}
2732

33+
#[inline]
34+
pub(crate) fn get_start_pos(&self) -> &[u64] {
35+
self.start_pos.get_or_init(|| {
36+
let mut start_pos = Vec::with_capacity(self.bufs.len() + 1);
37+
let mut next = 0u64;
38+
for chunk in self.bufs.iter() {
39+
start_pos.push(next);
40+
next += chunk.len() as u64;
41+
}
42+
// Add the length of the chunk at the end.
43+
start_pos.push(next);
44+
start_pos.into_boxed_slice()
45+
})
46+
}
47+
2848
/// Creates a new, empty, `BufList` with the given capacity.
2949
#[inline]
3050
pub fn with_capacity(capacity: usize) -> Self {
3151
Self {
3252
bufs: VecDeque::with_capacity(capacity),
53+
start_pos: OnceCell::new(),
3354
}
3455
}
3556

@@ -112,6 +133,9 @@ impl BufList {
112133
/// assert_eq!(buf_list.num_chunks(), 2);
113134
/// ```
114135
pub fn push_chunk<B: Buf>(&mut self, mut data: B) -> Bytes {
136+
// mutable borrow acquired, invalidate oncecell
137+
self.start_pos = OnceCell::new();
138+
115139
let len = data.remaining();
116140
// `data` is (almost) certainly a `Bytes`, so `copy_to_bytes` should
117141
// internally be a cheap refcount bump almost all of the time.
@@ -131,6 +155,9 @@ impl BufList {
131155

132156
impl<B: Buf> Extend<B> for BufList {
133157
fn extend<T: IntoIterator<Item = B>>(&mut self, iter: T) {
158+
// mutable borrow acquired, invalidate oncecell
159+
self.start_pos = OnceCell::new();
160+
134161
for buf in iter.into_iter() {
135162
self.push_chunk(buf);
136163
}
@@ -204,6 +231,9 @@ impl Buf for BufList {
204231
}
205232

206233
fn advance(&mut self, mut amt: usize) {
234+
// mutable borrow acquired, invalidate oncecell
235+
self.start_pos = OnceCell::new();
236+
207237
while amt > 0 {
208238
let rem = self.bufs[0].remaining();
209239
// If the amount to advance by is less than the first buffer in
@@ -224,6 +254,9 @@ impl Buf for BufList {
224254
}
225255

226256
fn copy_to_bytes(&mut self, len: usize) -> Bytes {
257+
// mutable borrow acquired, invalidate oncecell
258+
self.start_pos = OnceCell::new();
259+
227260
// If the length of the requested `Bytes` is <= the length of the front
228261
// buffer, we can just use its `copy_to_bytes` implementation (which is
229262
// just a reference count bump).

0 commit comments

Comments
 (0)