Skip to content

Commit 7ae5a89

Browse files
Merge pull request #28 from triblespace/codex/integrate-bytes-and-view-types-with-winnow
Add winnow parser integration
2 parents ce92657 + b6d75c7 commit 7ae5a89

File tree

7 files changed

+294
-0
lines changed

7 files changed

+294
-0
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,14 @@
1010
- move weak reference and downcasting examples into module docs
1111
- expand module introduction describing use cases
1212
- document rationale for separating `ByteSource` and `ByteOwner`
13+
- added optional `winnow` feature for parser integration
1314
- added `INVENTORY.md` for tracking future work and noted it in `AGENTS.md`
15+
- documented safety rationale for `winnow` integration
16+
- implemented `Stream` directly for `Bytes` with a safe `iter_offsets` iterator
17+
- added `pop_back` and `pop_front` helpers and rewrote parser examples
1418
- removed the Completed Work section from `INVENTORY.md` and documented its use
19+
- rewrote `winnow::view` to use safe helpers and added `view_elems(count)` parser
20+
- `winnow::view_elems` now returns a Parser closure for idiomatic usage
1521
in a dedicated AGENTS section
1622
- add tests for weak reference upgrade/downgrade and Kani proofs for view helpers
1723
- add examples for quick start and PyBytes usage

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ ownedbytes = { version = "0.9.0", optional = true }
1212
memmap2 = { version = "0.9.5", optional = true }
1313
zerocopy = { version = "0.8.26", optional = true, features = ["derive"] }
1414
pyo3 = { version = "0.25.1", optional = true }
15+
winnow = { version = "0.7.12", optional = true }
1516

1617
[dev-dependencies]
1718
proptest = "1.7"
@@ -23,6 +24,7 @@ ownedbytes = ["dep:ownedbytes"]
2324
mmap = ["dep:memmap2"]
2425
zerocopy = ["dep:zerocopy"]
2526
pyo3 = ["dep:pyo3"]
27+
winnow = ["dep:winnow"]
2628

2729
[lints.rust]
2830
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)'] }

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ Other optional features provide additional integrations:
7575
- `mmap` – enables memory-mapped file handling via the `memmap2` crate.
7676
- `zerocopy` – exposes the [`view`](src/view.rs) module for typed zero-copy access and allows using `zerocopy` types as sources.
7777
- `pyo3` – builds the [`pybytes`](src/pybytes.rs) module to provide Python bindings for `Bytes`.
78+
- `winnow` – implements the [`Stream`](https://docs.rs/winnow/) traits for `Bytes` and offers parsers (`view`, `view_elems(count)`) that return typed `View`s.
7879

7980
Enabling the `pyo3` feature requires the Python development headers and libraries
8081
(for example `libpython3.x`). Running `cargo test --all-features` therefore

src/bytes.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,20 @@ impl Bytes {
292292
})
293293
}
294294

295+
/// Removes and returns the first byte of `self`.
296+
pub fn pop_front(&mut self) -> Option<u8> {
297+
let (&b, rest) = self.data.split_first()?;
298+
self.data = rest;
299+
Some(b)
300+
}
301+
302+
/// Removes and returns the last byte of `self`.
303+
pub fn pop_back(&mut self) -> Option<u8> {
304+
let (last, rest) = self.data.split_last()?;
305+
self.data = rest;
306+
Some(*last)
307+
}
308+
295309
/// Create a weak pointer.
296310
pub fn downgrade(&self) -> WeakBytes {
297311
WeakBytes {

src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ pub mod view;
2121
/// Python bindings for [`Bytes`].
2222
pub mod pybytes;
2323

24+
#[cfg(feature = "winnow")]
25+
/// Integration with the `winnow` parser library.
26+
pub mod winnow;
27+
2428
#[cfg(test)]
2529
mod tests;
2630

src/tests.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,56 @@ fn test_weakview_clone_upgrade() {
155155
assert!(weak_clone.upgrade().is_none());
156156
}
157157

158+
#[cfg(feature = "winnow")]
159+
#[test]
160+
fn test_winnow_stream_take() {
161+
use winnow::error::ContextError;
162+
use winnow::stream::AsBytes;
163+
use winnow::token::take;
164+
use winnow::Parser;
165+
166+
let mut input = Bytes::from(vec![1u8, 2, 3, 4]);
167+
let mut parser = take::<_, _, ContextError>(2usize);
168+
let prefix: Bytes = parser.parse_next(&mut input).expect("take");
169+
assert_eq!(prefix.as_ref(), [1u8, 2].as_ref());
170+
assert_eq!(input.as_bytes(), [3u8, 4].as_ref());
171+
}
172+
173+
#[cfg(all(feature = "winnow", feature = "zerocopy"))]
174+
#[test]
175+
fn test_winnow_view_parser() {
176+
use winnow::error::ContextError;
177+
use winnow::stream::AsBytes;
178+
use winnow::Parser;
179+
#[derive(zerocopy::TryFromBytes, zerocopy::KnownLayout, zerocopy::Immutable)]
180+
#[repr(C)]
181+
struct Pair {
182+
a: u16,
183+
b: u16,
184+
}
185+
186+
let mut input = Bytes::from(vec![1u8, 0, 2, 0]);
187+
let mut parser = crate::winnow::view::<Pair, ContextError>;
188+
let view = parser.parse_next(&mut input).expect("view");
189+
assert_eq!(view.a, 1);
190+
assert_eq!(view.b, 2);
191+
assert!(input.as_bytes().is_empty());
192+
}
193+
194+
#[cfg(all(feature = "winnow", feature = "zerocopy"))]
195+
#[test]
196+
fn test_winnow_view_elems_parser() {
197+
use winnow::error::ContextError;
198+
use winnow::stream::AsBytes;
199+
use winnow::Parser;
200+
201+
let mut input = Bytes::from(vec![1u8, 2, 3, 4]);
202+
let mut parser = crate::winnow::view_elems::<[u8], ContextError>(3);
203+
let view = parser.parse_next(&mut input).expect("view_elems");
204+
assert_eq!(view.as_ref(), [1u8, 2, 3].as_ref());
205+
assert_eq!(input.as_bytes(), [4u8].as_ref());
206+
}
207+
158208
#[cfg(feature = "mmap")]
159209
#[test]
160210
fn test_mmap_mut_source() {

src/winnow.rs

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
// Winnow integration for anybytes
2+
3+
use crate::Bytes;
4+
use std::num::NonZeroUsize;
5+
use winnow::error::{ErrMode, Needed, ParserError};
6+
use winnow::stream::{
7+
AsBytes, Compare, CompareResult, FindSlice, Offset, SliceLen, Stream, StreamIsPartial,
8+
UpdateSlice,
9+
};
10+
11+
#[cfg(feature = "zerocopy")]
12+
use crate::view::View;
13+
#[cfg(feature = "zerocopy")]
14+
use zerocopy::{Immutable, KnownLayout, TryFromBytes};
15+
16+
/// Checkpoint for [`Bytes`] parsing with winnow.
17+
#[derive(Clone, Debug)]
18+
pub struct BytesCheckpoint(Bytes);
19+
20+
/// Iterator yielding `(offset, byte)` pairs for [`Bytes`].
21+
#[derive(Clone, Debug)]
22+
pub struct BytesIterOffsets {
23+
bytes: Bytes,
24+
offset: usize,
25+
}
26+
27+
impl Iterator for BytesIterOffsets {
28+
type Item = (usize, u8);
29+
30+
fn next(&mut self) -> Option<Self::Item> {
31+
let token = self.bytes.pop_front()?;
32+
let offset = self.offset;
33+
self.offset += 1;
34+
Some((offset, token))
35+
}
36+
}
37+
38+
impl SliceLen for Bytes {
39+
#[inline(always)]
40+
fn slice_len(&self) -> usize {
41+
self.as_slice().len()
42+
}
43+
}
44+
45+
impl Stream for Bytes {
46+
type Token = u8;
47+
type Slice = Bytes;
48+
49+
type IterOffsets = BytesIterOffsets;
50+
51+
type Checkpoint = BytesCheckpoint;
52+
53+
#[inline(always)]
54+
fn iter_offsets(&self) -> Self::IterOffsets {
55+
BytesIterOffsets {
56+
bytes: self.clone(),
57+
offset: 0,
58+
}
59+
}
60+
61+
#[inline(always)]
62+
fn eof_offset(&self) -> usize {
63+
self.as_slice().len()
64+
}
65+
66+
#[inline(always)]
67+
fn next_token(&mut self) -> Option<Self::Token> {
68+
self.pop_front()
69+
}
70+
71+
#[inline(always)]
72+
fn peek_token(&self) -> Option<Self::Token> {
73+
self.as_slice().first().copied()
74+
}
75+
76+
#[inline(always)]
77+
fn offset_for<P>(&self, predicate: P) -> Option<usize>
78+
where
79+
P: Fn(Self::Token) -> bool,
80+
{
81+
self.as_slice().iter().position(|b| predicate(*b))
82+
}
83+
84+
#[inline(always)]
85+
fn offset_at(&self, tokens: usize) -> Result<usize, Needed> {
86+
let remaining = self.as_slice().len();
87+
if let Some(needed) = tokens.checked_sub(remaining).and_then(NonZeroUsize::new) {
88+
Err(Needed::Size(needed))
89+
} else {
90+
Ok(tokens)
91+
}
92+
}
93+
94+
#[inline(always)]
95+
fn next_slice(&mut self, offset: usize) -> Self::Slice {
96+
self.take_prefix(offset).expect("offset within bounds")
97+
}
98+
99+
#[inline(always)]
100+
fn peek_slice(&self, offset: usize) -> Self::Slice {
101+
self.slice(..offset)
102+
}
103+
104+
#[inline(always)]
105+
fn checkpoint(&self) -> Self::Checkpoint {
106+
BytesCheckpoint(self.clone())
107+
}
108+
109+
#[inline(always)]
110+
fn reset(&mut self, checkpoint: &Self::Checkpoint) {
111+
*self = checkpoint.0.clone();
112+
}
113+
114+
#[allow(deprecated)]
115+
#[inline(always)]
116+
fn raw(&self) -> &dyn core::fmt::Debug {
117+
self
118+
}
119+
}
120+
121+
impl StreamIsPartial for Bytes {
122+
type PartialState = ();
123+
124+
#[inline]
125+
fn complete(&mut self) -> Self::PartialState {}
126+
127+
#[inline]
128+
fn restore_partial(&mut self, _state: Self::PartialState) {}
129+
130+
#[inline(always)]
131+
fn is_partial_supported() -> bool {
132+
false
133+
}
134+
}
135+
136+
impl Offset for Bytes {
137+
#[inline(always)]
138+
fn offset_from(&self, start: &Self) -> usize {
139+
let self_ptr = self.as_slice().as_ptr() as usize;
140+
let start_ptr = start.as_slice().as_ptr() as usize;
141+
self_ptr - start_ptr
142+
}
143+
}
144+
145+
impl Offset<BytesCheckpoint> for Bytes {
146+
#[inline(always)]
147+
fn offset_from(&self, other: &BytesCheckpoint) -> usize {
148+
self.offset_from(&other.0)
149+
}
150+
}
151+
152+
impl Offset for BytesCheckpoint {
153+
#[inline(always)]
154+
fn offset_from(&self, start: &Self) -> usize {
155+
self.0.offset_from(&start.0)
156+
}
157+
}
158+
159+
impl AsBytes for Bytes {
160+
#[inline(always)]
161+
fn as_bytes(&self) -> &[u8] {
162+
self.as_slice()
163+
}
164+
}
165+
166+
impl<T> Compare<T> for Bytes
167+
where
168+
for<'a> &'a [u8]: Compare<T>,
169+
{
170+
#[inline(always)]
171+
fn compare(&self, t: T) -> CompareResult {
172+
self.as_slice().compare(t)
173+
}
174+
}
175+
176+
impl<S> FindSlice<S> for Bytes
177+
where
178+
for<'a> &'a [u8]: FindSlice<S>,
179+
{
180+
#[inline(always)]
181+
fn find_slice(&self, substr: S) -> Option<core::ops::Range<usize>> {
182+
self.as_slice().find_slice(substr)
183+
}
184+
}
185+
186+
impl UpdateSlice for Bytes {
187+
#[inline(always)]
188+
fn update_slice(self, inner: Self::Slice) -> Self {
189+
inner
190+
}
191+
}
192+
193+
#[cfg(feature = "zerocopy")]
194+
/// Parse a `View` of `T` from the beginning of the input.
195+
pub fn view<T, E>(input: &mut Bytes) -> Result<View<T>, ErrMode<E>>
196+
where
197+
T: ?Sized + TryFromBytes + KnownLayout + Immutable,
198+
E: ParserError<Bytes>,
199+
{
200+
input
201+
.view_prefix::<T>()
202+
.map_err(|_| ErrMode::Backtrack(E::from_input(input)))
203+
}
204+
205+
#[cfg(feature = "zerocopy")]
206+
/// Return a parser producing a slice-like `View` with `count` elements.
207+
pub fn view_elems<T, E>(count: usize) -> impl winnow::Parser<Bytes, View<T>, ErrMode<E>>
208+
where
209+
T: ?Sized + TryFromBytes + KnownLayout<PointerMetadata = usize> + Immutable,
210+
E: ParserError<Bytes>,
211+
{
212+
move |input: &mut Bytes| {
213+
input
214+
.view_prefix_with_elems::<T>(count)
215+
.map_err(|_| ErrMode::Backtrack(E::from_input(input)))
216+
}
217+
}

0 commit comments

Comments
 (0)