Skip to content

Commit ef54aab

Browse files
committed
chore: switch nom to winnow in remaining uses in gix-object, gix-ref, and gix-actor for ~20% more performance.
It's likely that over time, these parsers will get even faster due to improvements to `winnow`. Thanks, Ed Page, for single-handedly performing this transition.
2 parents 7735047 + 02587fc commit ef54aab

File tree

35 files changed

+643
-655
lines changed

35 files changed

+643
-655
lines changed

Cargo.lock

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cargo-smart-release/Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cargo-smart-release/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ toml_edit = "0.19.1"
3434
semver = "1.0.4"
3535
crates-index = { version = "2.1.0", default-features = false, features = ["git-performance", "git-https"] }
3636
cargo_toml = "0.15.1"
37-
winnow = "0.5.1"
37+
winnow = "0.5.12"
3838
git-conventional = "0.12.0"
3939
time = "0.3.23"
4040
pulldown-cmark = "0.9.0"

gix-actor/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ gix-date = { version = "^0.7.1", path = "../gix-date" }
2323
thiserror = "1.0.38"
2424
btoi = "0.4.2"
2525
bstr = { version = "1.3.0", default-features = false, features = ["std", "unicode"]}
26-
nom = { version = "7", default-features = false, features = ["std"]}
26+
winnow = { version = "0.5.14", features = ["simd"] }
2727
itoa = "1.0.1"
2828
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]}
2929

gix-actor/src/identity.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
use bstr::ByteSlice;
2+
use winnow::error::StrContext;
3+
use winnow::prelude::*;
24

35
use crate::{signature::decode, Identity, IdentityRef};
46

57
impl<'a> IdentityRef<'a> {
68
/// Deserialize an identity from the given `data`.
7-
pub fn from_bytes<E>(data: &'a [u8]) -> Result<Self, nom::Err<E>>
9+
pub fn from_bytes<E>(mut data: &'a [u8]) -> Result<Self, winnow::error::ErrMode<E>>
810
where
9-
E: nom::error::ParseError<&'a [u8]> + nom::error::ContextError<&'a [u8]>,
11+
E: winnow::error::ParserError<&'a [u8]> + winnow::error::AddContext<&'a [u8], StrContext>,
1012
{
11-
decode::identity(data).map(|(_, t)| t)
13+
decode::identity.parse_next(&mut data)
1214
}
1315

1416
/// Create an owned instance from this shared one.

gix-actor/src/signature/decode.rs

Lines changed: 74 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -2,101 +2,75 @@ pub(crate) mod function {
22
use bstr::ByteSlice;
33
use btoi::btoi;
44
use gix_date::{time::Sign, OffsetInSeconds, SecondsSinceUnixEpoch, Time};
5-
use nom::multi::many1_count;
6-
use nom::{
7-
branch::alt,
8-
bytes::complete::{tag, take, take_until, take_while_m_n},
9-
character::is_digit,
10-
error::{context, ContextError, ParseError},
11-
sequence::{terminated, tuple},
12-
IResult,
5+
use winnow::{
6+
combinator::alt,
7+
combinator::separated_pair,
8+
combinator::terminated,
9+
error::{AddContext, ParserError, StrContext},
10+
prelude::*,
11+
stream::AsChar,
12+
token::{take, take_until0, take_while},
1313
};
14-
use std::cell::RefCell;
1514

1615
use crate::{IdentityRef, SignatureRef};
1716

1817
const SPACE: &[u8] = b" ";
1918

2019
/// Parse a signature from the bytes input `i` using `nom`.
21-
pub fn decode<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>(
22-
i: &'a [u8],
23-
) -> IResult<&'a [u8], SignatureRef<'a>, E> {
24-
use nom::Parser;
25-
let tzsign = RefCell::new(b'-'); // TODO: there should be no need for this.
26-
let (i, (identity, _, time, _tzsign_count, hours, minutes)) = context(
27-
"<name> <<email>> <timestamp> <+|-><HHMM>",
28-
tuple((
29-
identity,
30-
tag(b" "),
31-
context("<timestamp>", |i| {
32-
terminated(take_until(SPACE), take(1usize))(i).and_then(|(i, v)| {
33-
btoi::<SecondsSinceUnixEpoch>(v)
34-
.map(|v| (i, v))
35-
.map_err(|_| nom::Err::Error(E::from_error_kind(i, nom::error::ErrorKind::MapRes)))
36-
})
20+
pub fn decode<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>(
21+
i: &mut &'a [u8],
22+
) -> PResult<SignatureRef<'a>, E> {
23+
separated_pair(
24+
identity,
25+
b" ",
26+
(
27+
terminated(take_until0(SPACE), take(1usize))
28+
.verify_map(|v| btoi::<SecondsSinceUnixEpoch>(v).ok())
29+
.context(StrContext::Expected("<timestamp>".into())),
30+
alt((
31+
take_while(1.., b'-').map(|_| Sign::Minus),
32+
take_while(1.., b'+').map(|_| Sign::Plus),
33+
))
34+
.context(StrContext::Expected("+|-".into())),
35+
take_while(2, AsChar::is_dec_digit)
36+
.verify_map(|v| btoi::<OffsetInSeconds>(v).ok())
37+
.context(StrContext::Expected("HH".into())),
38+
take_while(1..=2, AsChar::is_dec_digit)
39+
.verify_map(|v| btoi::<OffsetInSeconds>(v).ok())
40+
.context(StrContext::Expected("MM".into())),
41+
)
42+
.map(|(time, sign, hours, minutes)| {
43+
let offset = (hours * 3600 + minutes * 60) * if sign == Sign::Minus { -1 } else { 1 };
44+
Time {
45+
seconds: time,
46+
offset,
47+
sign,
48+
}
3749
}),
38-
context(
39-
"+|-",
40-
alt((
41-
many1_count(tag(b"-")).map(|_| *tzsign.borrow_mut() = b'-'), // TODO: this should be a non-allocating consumer of consecutive tags
42-
many1_count(tag(b"+")).map(|_| *tzsign.borrow_mut() = b'+'),
43-
)),
44-
),
45-
context("HH", |i| {
46-
take_while_m_n(2usize, 2, is_digit)(i).and_then(|(i, v)| {
47-
btoi::<OffsetInSeconds>(v)
48-
.map(|v| (i, v))
49-
.map_err(|_| nom::Err::Error(E::from_error_kind(i, nom::error::ErrorKind::MapRes)))
50-
})
51-
}),
52-
context("MM", |i| {
53-
take_while_m_n(1usize, 2, is_digit)(i).and_then(|(i, v)| {
54-
btoi::<OffsetInSeconds>(v)
55-
.map(|v| (i, v))
56-
.map_err(|_| nom::Err::Error(E::from_error_kind(i, nom::error::ErrorKind::MapRes)))
57-
})
58-
}),
59-
)),
60-
)(i)?;
61-
62-
let tzsign = tzsign.into_inner();
63-
debug_assert!(tzsign == b'-' || tzsign == b'+', "parser assure it's +|- only");
64-
let sign = if tzsign == b'-' { Sign::Minus } else { Sign::Plus }; //
65-
let offset = (hours * 3600 + minutes * 60) * if sign == Sign::Minus { -1 } else { 1 };
66-
67-
Ok((
68-
i,
69-
SignatureRef {
70-
name: identity.name,
71-
email: identity.email,
72-
time: Time {
73-
seconds: time,
74-
offset,
75-
sign,
76-
},
77-
},
78-
))
50+
)
51+
.context(StrContext::Expected("<name> <<email>> <timestamp> <+|-><HHMM>".into()))
52+
.map(|(identity, time)| SignatureRef {
53+
name: identity.name,
54+
email: identity.email,
55+
time,
56+
})
57+
.parse_next(i)
7958
}
8059

8160
/// Parse an identity from the bytes input `i` (like `name <email>`) using `nom`.
82-
pub fn identity<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>(
83-
i: &'a [u8],
84-
) -> IResult<&'a [u8], IdentityRef<'a>, E> {
85-
let (i, (name, email)) = context(
86-
"<name> <<email>>",
87-
tuple((
88-
context("<name>", terminated(take_until(&b" <"[..]), take(2usize))),
89-
context("<email>", terminated(take_until(&b">"[..]), take(1usize))),
90-
)),
91-
)(i)?;
92-
93-
Ok((
94-
i,
95-
IdentityRef {
61+
pub fn identity<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>(
62+
i: &mut &'a [u8],
63+
) -> PResult<IdentityRef<'a>, E> {
64+
(
65+
terminated(take_until0(&b" <"[..]), take(2usize)).context(StrContext::Expected("<name>".into())),
66+
terminated(take_until0(&b">"[..]), take(1usize)).context(StrContext::Expected("<email>".into())),
67+
)
68+
.map(|(name, email): (&[u8], &[u8])| IdentityRef {
9669
name: name.as_bstr(),
9770
email: email.as_bstr(),
98-
},
99-
))
71+
})
72+
.context(StrContext::Expected("<name> <<email>>".into()))
73+
.parse_next(i)
10074
}
10175
}
10276
pub use function::identity;
@@ -107,12 +81,14 @@ mod tests {
10781
use bstr::ByteSlice;
10882
use gix_date::{time::Sign, OffsetInSeconds, SecondsSinceUnixEpoch};
10983
use gix_testtools::to_bstr_err;
110-
use nom::IResult;
84+
use winnow::prelude::*;
11185

11286
use crate::{signature, SignatureRef, Time};
11387

114-
fn decode(i: &[u8]) -> IResult<&[u8], SignatureRef<'_>, nom::error::VerboseError<&[u8]>> {
115-
signature::decode(i)
88+
fn decode<'i>(
89+
i: &mut &'i [u8],
90+
) -> PResult<SignatureRef<'i>, winnow::error::TreeError<&'i [u8], winnow::error::StrContext>> {
91+
signature::decode.parse_next(i)
11692
}
11793

11894
fn signature(
@@ -132,7 +108,8 @@ mod tests {
132108
#[test]
133109
fn tz_minus() {
134110
assert_eq!(
135-
decode(b"Sebastian Thiel <[email protected]> 1528473343 -0230")
111+
decode
112+
.parse_peek(b"Sebastian Thiel <[email protected]> 1528473343 -0230")
136113
.expect("parse to work")
137114
.1,
138115
signature("Sebastian Thiel", "[email protected]", 1528473343, Sign::Minus, -9000)
@@ -142,7 +119,8 @@ mod tests {
142119
#[test]
143120
fn tz_plus() {
144121
assert_eq!(
145-
decode(b"Sebastian Thiel <[email protected]> 1528473343 +0230")
122+
decode
123+
.parse_peek(b"Sebastian Thiel <[email protected]> 1528473343 +0230")
146124
.expect("parse to work")
147125
.1,
148126
signature("Sebastian Thiel", "[email protected]", 1528473343, Sign::Plus, 9000)
@@ -152,7 +130,8 @@ mod tests {
152130
#[test]
153131
fn negative_offset_0000() {
154132
assert_eq!(
155-
decode(b"Sebastian Thiel <[email protected]> 1528473343 -0000")
133+
decode
134+
.parse_peek(b"Sebastian Thiel <[email protected]> 1528473343 -0000")
156135
.expect("parse to work")
157136
.1,
158137
signature("Sebastian Thiel", "[email protected]", 1528473343, Sign::Minus, 0)
@@ -162,7 +141,8 @@ mod tests {
162141
#[test]
163142
fn negative_offset_double_dash() {
164143
assert_eq!(
165-
decode(b"name <[email protected]> 1288373970 --700")
144+
decode
145+
.parse_peek(b"name <[email protected]> 1288373970 --700")
166146
.expect("parse to work")
167147
.1,
168148
signature("name", "[email protected]", 1288373970, Sign::Minus, -252000)
@@ -172,30 +152,30 @@ mod tests {
172152
#[test]
173153
fn empty_name_and_email() {
174154
assert_eq!(
175-
decode(b" <> 12345 -1215").expect("parse to work").1,
155+
decode.parse_peek(b" <> 12345 -1215").expect("parse to work").1,
176156
signature("", "", 12345, Sign::Minus, -44100)
177157
);
178158
}
179159

180160
#[test]
181161
fn invalid_signature() {
182162
assert_eq!(
183-
decode(b"hello < 12345 -1215")
163+
decode.parse_peek(b"hello < 12345 -1215")
184164
.map_err(to_bstr_err)
185165
.expect_err("parse fails as > is missing")
186166
.to_string(),
187-
"Parse error:\nTakeUntil at: 12345 -1215\nin section '<email>', at: 12345 -1215\nin section '<name> <<email>>', at: hello < 12345 -1215\nin section '<name> <<email>> <timestamp> <+|-><HHMM>', at: hello < 12345 -1215\n"
167+
"in slice at ' 12345 -1215'\n 0: expected `<email>` at ' 12345 -1215'\n 1: expected `<name> <<email>>` at ' 12345 -1215'\n 2: expected `<name> <<email>> <timestamp> <+|-><HHMM>` at ' 12345 -1215'\n"
188168
);
189169
}
190170

191171
#[test]
192172
fn invalid_time() {
193173
assert_eq!(
194-
decode(b"hello <> abc -1215")
174+
decode.parse_peek(b"hello <> abc -1215")
195175
.map_err(to_bstr_err)
196176
.expect_err("parse fails as > is missing")
197177
.to_string(),
198-
"Parse error:\nMapRes at: -1215\nin section '<timestamp>', at: abc -1215\nin section '<name> <<email>> <timestamp> <+|-><HHMM>', at: hello <> abc -1215\n"
178+
"in predicate verification at 'abc -1215'\n 0: expected `<timestamp>` at 'abc -1215'\n 1: expected `<name> <<email>> <timestamp> <+|-><HHMM>` at 'abc -1215'\n"
199179
);
200180
}
201181
}

gix-actor/src/signature/mod.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
mod _ref {
22
use bstr::ByteSlice;
3+
use winnow::error::StrContext;
4+
use winnow::prelude::*;
35

46
use crate::{signature::decode, IdentityRef, Signature, SignatureRef};
57

68
impl<'a> SignatureRef<'a> {
79
/// Deserialize a signature from the given `data`.
8-
pub fn from_bytes<E>(data: &'a [u8]) -> Result<SignatureRef<'a>, nom::Err<E>>
10+
pub fn from_bytes<E>(mut data: &'a [u8]) -> Result<SignatureRef<'a>, winnow::error::ErrMode<E>>
911
where
10-
E: nom::error::ParseError<&'a [u8]> + nom::error::ContextError<&'a [u8]>,
12+
E: winnow::error::ParserError<&'a [u8]> + winnow::error::AddContext<&'a [u8], StrContext>,
1113
{
12-
decode(data).map(|(_, t)| t)
14+
decode.parse_next(&mut data)
1315
}
1416

1517
/// Create an owned instance from this shared one.

gix-actor/tests/identity/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ fn round_trip() -> gix_testtools::Result {
99
b".. whitespace \t is explicitly allowed - unicode aware trimming must be done elsewhere <[email protected]>"
1010
];
1111
for input in DEFAULTS {
12-
let signature: Identity = gix_actor::IdentityRef::from_bytes::<()>(input)?.into();
12+
let signature: Identity = gix_actor::IdentityRef::from_bytes::<()>(input).unwrap().into();
1313
let mut output = Vec::new();
1414
signature.write_to(&mut output)?;
1515
assert_eq!(output.as_bstr(), input.as_bstr());

0 commit comments

Comments
 (0)