Skip to content

Commit 12f03db

Browse files
committed
Simplify parsers
This is intended to make the winnow 0.5 transition easier
1 parent 2fb4a54 commit 12f03db

File tree

14 files changed

+242
-285
lines changed

14 files changed

+242
-285
lines changed

gix-actor/src/signature/decode.rs

Lines changed: 37 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ pub(crate) mod function {
22
use bstr::ByteSlice;
33
use btoi::btoi;
44
use gix_date::{time::Sign, OffsetInSeconds, SecondsSinceUnixEpoch, Time};
5-
use std::cell::RefCell;
65
use winnow::{
76
combinator::alt,
87
combinator::repeat,
8+
combinator::separated_pair,
99
combinator::terminated,
1010
error::{AddContext, ParserError},
1111
prelude::*,
@@ -21,84 +21,57 @@ pub(crate) mod function {
2121
pub fn decode<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(
2222
i: &'a [u8],
2323
) -> IResult<&'a [u8], SignatureRef<'a>, E> {
24-
let tzsign = RefCell::new(b'-'); // TODO: there should be no need for this.
25-
let (i, (identity, _, time, _tzsign_count, hours, minutes)) = (
24+
separated_pair(
2625
identity,
2726
b" ",
28-
(|i| {
27+
(
2928
terminated(take_until0(SPACE), take(1usize))
30-
.parse_next(i)
31-
.and_then(|(i, v)| {
32-
btoi::<SecondsSinceUnixEpoch>(v)
33-
.map(|v| (i, v))
34-
.map_err(|_| winnow::error::ErrMode::from_error_kind(i, winnow::error::ErrorKind::Verify))
35-
})
36-
})
37-
.context("<timestamp>"),
38-
alt((
39-
repeat(1.., b"-").map(|_: ()| *tzsign.borrow_mut() = b'-'), // TODO: this should be a non-allocating consumer of consecutive tags
40-
repeat(1.., b"+").map(|_: ()| *tzsign.borrow_mut() = b'+'),
41-
))
42-
.context("+|-"),
43-
(|i| {
44-
take_while(2, AsChar::is_dec_digit).parse_next(i).and_then(|(i, v)| {
45-
btoi::<OffsetInSeconds>(v)
46-
.map(|v| (i, v))
47-
.map_err(|_| winnow::error::ErrMode::from_error_kind(i, winnow::error::ErrorKind::Verify))
48-
})
49-
})
50-
.context("HH"),
51-
(|i| {
29+
.verify_map(|v| btoi::<SecondsSinceUnixEpoch>(v).ok())
30+
.context("<timestamp>"),
31+
alt((
32+
repeat(1.., b"-").map(|_: ()| Sign::Minus),
33+
repeat(1.., b"+").map(|_: ()| Sign::Plus),
34+
))
35+
.context("+|-"),
36+
take_while(2, AsChar::is_dec_digit)
37+
.verify_map(|v| btoi::<OffsetInSeconds>(v).ok())
38+
.context("HH"),
5239
take_while(1..=2, AsChar::is_dec_digit)
53-
.parse_next(i)
54-
.and_then(|(i, v)| {
55-
btoi::<OffsetInSeconds>(v)
56-
.map(|v| (i, v))
57-
.map_err(|_| winnow::error::ErrMode::from_error_kind(i, winnow::error::ErrorKind::Verify))
58-
})
59-
})
60-
.context("MM"),
40+
.verify_map(|v| btoi::<OffsetInSeconds>(v).ok())
41+
.context("MM"),
42+
)
43+
.map(|(time, sign, hours, minutes)| {
44+
let offset = (hours * 3600 + minutes * 60) * if sign == Sign::Minus { -1 } else { 1 };
45+
Time {
46+
seconds: time,
47+
offset,
48+
sign,
49+
}
50+
}),
6151
)
62-
.context("<name> <<email>> <timestamp> <+|-><HHMM>")
63-
.parse_next(i)?;
64-
65-
let tzsign = tzsign.into_inner();
66-
debug_assert!(tzsign == b'-' || tzsign == b'+', "parser assure it's +|- only");
67-
let sign = if tzsign == b'-' { Sign::Minus } else { Sign::Plus }; //
68-
let offset = (hours * 3600 + minutes * 60) * if sign == Sign::Minus { -1 } else { 1 };
69-
70-
Ok((
71-
i,
72-
SignatureRef {
73-
name: identity.name,
74-
email: identity.email,
75-
time: Time {
76-
seconds: time,
77-
offset,
78-
sign,
79-
},
80-
},
81-
))
52+
.context("<name> <<email>> <timestamp> <+|-><HHMM>")
53+
.map(|(identity, time)| SignatureRef {
54+
name: identity.name,
55+
email: identity.email,
56+
time,
57+
})
58+
.parse_next(i)
8259
}
8360

8461
/// Parse an identity from the bytes input `i` (like `name <email>`) using `nom`.
8562
pub fn identity<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(
8663
i: &'a [u8],
8764
) -> IResult<&'a [u8], IdentityRef<'a>, E> {
88-
let (i, (name, email)) = (
65+
(
8966
terminated(take_until0(&b" <"[..]), take(2usize)).context("<name>"),
9067
terminated(take_until0(&b">"[..]), take(1usize)).context("<email>"),
9168
)
92-
.context("<name> <<email>>")
93-
.parse_next(i)?;
94-
95-
Ok((
96-
i,
97-
IdentityRef {
69+
.map(|(name, email): (&[u8], &[u8])| IdentityRef {
9870
name: name.as_bstr(),
9971
email: email.as_bstr(),
100-
},
101-
))
72+
})
73+
.context("<name> <<email>>")
74+
.parse_next(i)
10275
}
10376
}
10477
pub use function::identity;
@@ -197,7 +170,7 @@ mod tests {
197170
.map_err(to_bstr_err)
198171
.expect_err("parse fails as > is missing")
199172
.to_string(),
200-
"Parse error:\nVerify at: -1215\nin section '<timestamp>', at: abc -1215\nin section '<name> <<email>> <timestamp> <+|-><HHMM>', at: hello <> abc -1215\n"
173+
"Parse error:\nVerify at: abc -1215\nin section '<timestamp>', at: abc -1215\nin section '<name> <<email>> <timestamp> <+|-><HHMM>', at: hello <> abc -1215\n"
201174
);
202175
}
203176
}

gix-object/src/commit/decode.rs

Lines changed: 38 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@ use std::borrow::Cow;
33
use smallvec::SmallVec;
44
use winnow::{
55
combinator::alt,
6+
combinator::preceded,
67
combinator::repeat,
78
combinator::terminated,
8-
combinator::{eof, opt},
9+
combinator::{eof, opt, rest},
910
error::{AddContext, ParserError},
1011
prelude::*,
11-
token::{tag, take_till1},
12+
token::take_till1,
1213
};
1314

1415
use crate::{parse, parse::NL, BStr, ByteSlice, CommitRef};
@@ -21,52 +22,43 @@ pub fn message<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(i: &'a [u8])
2122
.map(|err: E| err.add_context(i, "newline + <message>")),
2223
);
2324
}
24-
let (i, _) = tag(NL)
25+
preceded(NL, rest.map(ByteSlice::as_bstr))
2526
.context("a newline separates headers from the message")
26-
.parse_next(i)?;
27-
Ok((&[], i.as_bstr()))
27+
.parse_next(i)
2828
}
2929

30-
pub fn commit<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], CommitRef<'_>, E> {
31-
let (i, tree) = (|i| parse::header_field(i, b"tree", parse::hex_hash))
32-
.context("tree <40 lowercase hex char>")
33-
.parse_next(i)?;
34-
let (i, parents): (_, Vec<_>) = repeat(0.., |i| parse::header_field(i, b"parent", parse::hex_hash))
35-
.context("zero or more 'parent <40 lowercase hex char>'")
36-
.parse_next(i)?;
37-
let (i, author) = (|i| parse::header_field(i, b"author", parse::signature))
38-
.context("author <signature>")
39-
.parse_next(i)?;
40-
let (i, committer) = (|i| parse::header_field(i, b"committer", parse::signature))
41-
.context("committer <signature>")
42-
.parse_next(i)?;
43-
let (i, encoding) = opt(|i| parse::header_field(i, b"encoding", take_till1(NL)))
44-
.context("encoding <encoding>")
45-
.parse_next(i)?;
46-
let (i, extra_headers) = repeat(
47-
0..,
48-
alt((
49-
parse::any_header_field_multi_line.map(|(k, o)| (k.as_bstr(), Cow::Owned(o))),
50-
|i| {
51-
parse::any_header_field(i, take_till1(NL))
52-
.map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Borrowed(o.as_bstr()))))
53-
},
54-
)),
30+
pub fn commit<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], CommitRef<'a>, E> {
31+
(
32+
(|i| parse::header_field(i, b"tree", parse::hex_hash)).context("tree <40 lowercase hex char>"),
33+
repeat(0.., |i| parse::header_field(i, b"parent", parse::hex_hash))
34+
.map(|p: Vec<_>| p)
35+
.context("zero or more 'parent <40 lowercase hex char>'"),
36+
(|i| parse::header_field(i, b"author", parse::signature)).context("author <signature>"),
37+
(|i| parse::header_field(i, b"committer", parse::signature)).context("committer <signature>"),
38+
opt(|i| parse::header_field(i, b"encoding", take_till1(NL))).context("encoding <encoding>"),
39+
repeat(
40+
0..,
41+
alt((
42+
parse::any_header_field_multi_line.map(|(k, o)| (k.as_bstr(), Cow::Owned(o))),
43+
|i| {
44+
parse::any_header_field(i, take_till1(NL))
45+
.map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Borrowed(o.as_bstr()))))
46+
},
47+
)),
48+
)
49+
.context("<field> <single-line|multi-line>"),
50+
terminated(message, eof),
5551
)
56-
.context("<field> <single-line|multi-line>")
57-
.parse_next(i)?;
58-
let (i, message) = terminated(message, eof).parse_next(i)?;
59-
60-
Ok((
61-
i,
62-
CommitRef {
63-
tree,
64-
parents: SmallVec::from(parents),
65-
author,
66-
committer,
67-
encoding: encoding.map(ByteSlice::as_bstr),
68-
message,
69-
extra_headers,
70-
},
71-
))
52+
.map(
53+
|(tree, parents, author, committer, encoding, extra_headers, message)| CommitRef {
54+
tree,
55+
parents: SmallVec::from(parents),
56+
author,
57+
committer,
58+
encoding: encoding.map(ByteSlice::as_bstr),
59+
message,
60+
extra_headers,
61+
},
62+
)
63+
.parse_next(i)
7264
}

gix-object/src/commit/message/body.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ use std::ops::Deref;
22

33
use winnow::{
44
combinator::eof,
5+
combinator::rest,
6+
combinator::separated_pair,
57
combinator::terminated,
68
error::{ErrorKind, ParserError},
79
prelude::*,
@@ -33,11 +35,12 @@ pub struct TrailerRef<'a> {
3335
}
3436

3537
fn parse_single_line_trailer<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, &'a BStr), E> {
36-
let (value, token) = terminated(take_until1(b":".as_ref()), b": ").parse_next(i.trim_end())?;
38+
let (i, (token, value)) = separated_pair(take_until1(b":".as_ref()), b": ", rest).parse_next(i.trim_end())?;
39+
3740
if token.trim_end().len() != token.len() || value.trim_start().len() != value.len() {
3841
Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Fail).cut())
3942
} else {
40-
Ok((&[], (token.as_bstr(), value.as_bstr())))
43+
Ok((i, (token.as_bstr(), value.as_bstr())))
4144
}
4245
}
4346

gix-object/src/commit/message/decode.rs

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,46 @@
11
use winnow::{
2-
combinator::alt, combinator::eof, combinator::terminated, error::ParserError, prelude::*, token::take_till1,
2+
combinator::alt, combinator::eof, combinator::preceded, combinator::rest, combinator::terminated,
3+
error::ParserError, prelude::*, stream::Offset, token::take_till1,
34
};
45

56
use crate::bstr::{BStr, ByteSlice};
67

78
pub(crate) fn newline<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E> {
8-
alt((b"\r\n", b"\n")).parse_next(i)
9+
alt((b"\n", b"\r\n")).parse_next(i)
910
}
1011

11-
fn subject_and_body<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, Option<&'a BStr>), E> {
12-
let mut c = i;
13-
let mut consumed_bytes = 0;
14-
while !c.is_empty() {
15-
c = match take_till1::<_, _, E>(|c| c == b'\n' || c == b'\r').parse_next(c) {
16-
Ok((i1, segment)) => {
17-
consumed_bytes += segment.len();
18-
match (newline::<E>, newline::<E>).parse_next(i1) {
19-
Ok((body, _)) => {
20-
return Ok((
21-
&[],
22-
(
23-
i[0usize..consumed_bytes].as_bstr(),
24-
(!body.is_empty()).then(|| body.as_bstr()),
25-
),
26-
));
12+
fn subject_and_body<'a, E: ParserError<&'a [u8]>>(
13+
mut i: &'a [u8],
14+
) -> IResult<&'a [u8], (&'a BStr, Option<&'a BStr>), E> {
15+
let start = i;
16+
while !i.is_empty() {
17+
match take_till1::<_, _, E>(|c| c == b'\n' || c == b'\r').parse_next(i) {
18+
Ok((next, _)) => {
19+
let consumed_bytes = next.offset_from(start);
20+
match preceded((newline::<E>, newline::<E>), rest).parse_next(next) {
21+
Ok((next, body)) => {
22+
let body = (!body.is_empty()).then(|| body.as_bstr());
23+
return Ok((next, (start[0usize..consumed_bytes].as_bstr(), body)));
2724
}
28-
Err(_) => match i1.get(1..) {
25+
Err(_) => match next.get(1..) {
2926
Some(next) => {
30-
consumed_bytes += 1;
31-
next
27+
i = next;
3228
}
3329
None => break,
3430
},
3531
}
3632
}
37-
Err(_) => match c.get(1..) {
33+
Err(_) => match i.get(1..) {
3834
Some(next) => {
39-
consumed_bytes += 1;
40-
next
35+
i = next;
4136
}
4237
None => break,
4338
},
44-
};
39+
}
4540
}
46-
Ok((&[], (i.as_bstr(), None)))
41+
42+
i = start;
43+
rest.map(|r: &[u8]| (r.as_bstr(), None)).parse_next(i)
4744
}
4845

4946
/// Returns title and body, without separator

0 commit comments

Comments
 (0)