Skip to content

Commit 07d4b80

Browse files
tertsdiepraamyuankunzhang
authored andcommitted
start parsing date with winnow
1 parent b481a2b commit 07d4b80

File tree

12 files changed

+1342
-341
lines changed

12 files changed

+1342
-341
lines changed

Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ readme = "README.md"
1111
regex = "1.10.4"
1212
chrono = { version="0.4.38", default-features=false, features=["std", "alloc", "clock"] }
1313
nom = "8.0.0"
14+
winnow = "0.5.34"

src/items/combined.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// For the full copyright and license information, please view the LICENSE
2+
// file that was distributed with this source code.
3+
4+
//! Parse an ISO 8601 date and time item
5+
//!
6+
//! The GNU docs state:
7+
//!
8+
//! > The ISO 8601 date and time of day extended format consists of an ISO 8601
9+
//! > date, a ‘T’ character separator, and an ISO 8601 time of day. This format
10+
//! > is also recognized if the ‘T’ is replaced by a space.
11+
//! >
12+
//! > In this format, the time of day should use 24-hour notation. Fractional
13+
//! > seconds are allowed, with either comma or period preceding the fraction.
14+
//! > ISO 8601 fractional minutes and hours are not supported. Typically, hosts
15+
//! > support nanosecond timestamp resolution; excess precision is silently discarded.
16+
17+
use winnow::{combinator::alt, seq, PResult, Parser};
18+
19+
use crate::items::space;
20+
21+
use super::{
22+
date::{self, Date},
23+
s,
24+
time::{self, Time},
25+
};
26+
27+
#[derive(PartialEq, Debug, Clone)]
28+
pub struct DateTime {
29+
date: Date,
30+
time: Time,
31+
}
32+
33+
pub fn parse(input: &mut &str) -> PResult<DateTime> {
34+
seq!(DateTime {
35+
date: date::iso,
36+
// Note: the `T` is lowercased by the main parse function
37+
_: alt((s('t').void(), (' ', space).void())),
38+
time: time::iso,
39+
})
40+
.parse_next(input)
41+
}
42+
43+
#[cfg(test)]
44+
mod tests {
45+
use super::{parse, DateTime};
46+
use crate::items::{date::Date, time::Time};
47+
48+
#[test]
49+
fn some_date() {
50+
let reference = Some(DateTime {
51+
date: Date {
52+
day: 10,
53+
month: 10,
54+
year: Some(2022),
55+
},
56+
time: Time {
57+
hour: 10,
58+
minute: 10,
59+
second: 55.0,
60+
offset: None,
61+
},
62+
});
63+
64+
for mut s in [
65+
"2022-10-10t10:10:55",
66+
"2022-10-10 10:10:55",
67+
"2022-10-10 t 10:10:55",
68+
"2022-10-10 10:10:55",
69+
"2022-10-10 (A comment!) t 10:10:55",
70+
"2022-10-10 (A comment!) 10:10:55",
71+
] {
72+
let old_s = s.to_owned();
73+
assert_eq!(parse(&mut s).ok(), reference, "Failed string: {old_s}")
74+
}
75+
}
76+
}

src/items/date.rs

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
// For the full copyright and license information, please view the LICENSE
2+
// file that was distributed with this source code.
3+
4+
//! Parse a date item (without time component)
5+
//!
6+
//! The GNU docs say:
7+
//!
8+
//! > A calendar date item specifies a day of the year. It is specified
9+
//! > differently, depending on whether the month is specified numerically
10+
//! > or literally.
11+
//! >
12+
//! > ...
13+
//! >
14+
//! > For numeric months, the ISO 8601 format ‘year-month-day’ is allowed,
15+
//! > where year is any positive number, month is a number between 01 and
16+
//! > 12, and day is a number between 01 and 31. A leading zero must be
17+
//! > present if a number is less than ten. If year is 68 or smaller, then
18+
//! > 2000 is added to it; otherwise, if year is less than 100, then 1900
19+
//! > is added to it. The construct ‘month/day/year’, popular in the United
20+
//! > States, is accepted. Also ‘month/day’, omitting the year.
21+
//! >
22+
//! > Literal months may be spelled out in full: ‘January’, ‘February’,
23+
//! > ‘March’, ‘April’, ‘May’, ‘June’, ‘July’, ‘August’, ‘September’,
24+
//! > ‘October’, ‘November’ or ‘December’. Literal months may be
25+
//! > abbreviated to their first three letters, possibly followed by an
26+
//! > abbreviating dot. It is also permitted to write ‘Sept’ instead of
27+
//! > ‘September’.
28+
29+
use winnow::{
30+
ascii::{alpha1, dec_uint},
31+
combinator::{alt, opt, preceded},
32+
seq,
33+
token::take,
34+
PResult, Parser,
35+
};
36+
37+
use super::s;
38+
use crate::ParseDateTimeError;
39+
40+
#[derive(PartialEq, Eq, Clone, Debug)]
41+
pub struct Date {
42+
pub day: u32,
43+
pub month: u32,
44+
pub year: Option<u32>,
45+
}
46+
47+
pub fn parse(input: &mut &str) -> PResult<Date> {
48+
alt((iso, us, literal1, literal2)).parse_next(input)
49+
}
50+
51+
/// Parse `YYYY-MM-DD` or `YY-MM-DD`
52+
///
53+
/// This is also used by [`combined`](super::combined).
54+
pub fn iso(input: &mut &str) -> PResult<Date> {
55+
seq!(Date {
56+
year: year.map(Some),
57+
_: s('-'),
58+
month: month,
59+
_: s('-'),
60+
day: day,
61+
})
62+
.parse_next(input)
63+
}
64+
65+
/// Parse `MM/DD/YYYY`, `MM/DD/YY` or `MM/DD`
66+
fn us(input: &mut &str) -> PResult<Date> {
67+
seq!(Date {
68+
month: month,
69+
_: s('/'),
70+
day: day,
71+
year: opt(preceded(s('/'), year)),
72+
})
73+
.parse_next(input)
74+
}
75+
76+
/// Parse `14 November 2022`, `14 Nov 2022`, "14nov2022", "14-nov-2022", "14-nov2022", "14nov-2022"
77+
fn literal1(input: &mut &str) -> PResult<Date> {
78+
seq!(Date {
79+
day: day,
80+
_: opt(s('-')),
81+
month: literal_month,
82+
year: opt(preceded(opt(s('-')), year)),
83+
})
84+
.parse_next(input)
85+
}
86+
87+
/// Parse `November 14, 2022` and `Nov 14, 2022`
88+
fn literal2(input: &mut &str) -> PResult<Date> {
89+
seq!(Date {
90+
month: literal_month,
91+
day: day,
92+
// FIXME: GNU requires _some_ space between the day and the year,
93+
// probably to distinguish with floats.
94+
year: opt(preceded(s(","), year)),
95+
})
96+
.parse_next(input)
97+
}
98+
99+
fn year(input: &mut &str) -> PResult<u32> {
100+
s(alt((
101+
take(4usize).try_map(|x: &str| x.parse()),
102+
take(3usize).try_map(|x: &str| x.parse()),
103+
take(2usize).try_map(|x: &str| x.parse()).map(
104+
|x: u32| {
105+
if x <= 68 {
106+
x + 2000
107+
} else {
108+
x + 1900
109+
}
110+
},
111+
),
112+
)))
113+
.parse_next(input)
114+
}
115+
116+
fn month(input: &mut &str) -> PResult<u32> {
117+
s(dec_uint)
118+
.try_map(|x| {
119+
(x >= 1 && x <= 12)
120+
.then_some(x)
121+
.ok_or(ParseDateTimeError::InvalidInput)
122+
})
123+
.parse_next(input)
124+
}
125+
126+
fn day(input: &mut &str) -> PResult<u32> {
127+
s(dec_uint)
128+
.try_map(|x| {
129+
(x >= 1 && x <= 31)
130+
.then_some(x)
131+
.ok_or(ParseDateTimeError::InvalidInput)
132+
})
133+
.parse_next(input)
134+
}
135+
136+
/// Parse the name of a month (case-insensitive)
137+
fn literal_month(input: &mut &str) -> PResult<u32> {
138+
s(alpha1)
139+
.verify_map(|s: &str| {
140+
Some(match s {
141+
"january" | "jan" => 1,
142+
"february" | "feb" => 2,
143+
"march" | "mar" => 3,
144+
"april" | "apr" => 4,
145+
"may" => 5,
146+
"june" | "jun" => 6,
147+
"july" | "jul" => 7,
148+
"august" | "aug" => 8,
149+
"september" | "sep" | "sept" => 9,
150+
"october" | "oct" => 10,
151+
"november" | "nov" => 11,
152+
"december" | "dec" => 12,
153+
_ => return None,
154+
})
155+
})
156+
.parse_next(input)
157+
}
158+
159+
#[cfg(test)]
160+
mod tests {
161+
use super::{parse, Date};
162+
163+
// Test cases from the GNU docs:
164+
//
165+
// ```
166+
// 2022-11-14 # ISO 8601.
167+
// 22-11-14 # Assume 19xx for 69 through 99,
168+
// # 20xx for 00 through 68 (not recommended).
169+
// 11/14/2022 # Common U.S. writing.
170+
// 14 November 2022
171+
// 14 Nov 2022 # Three-letter abbreviations always allowed.
172+
// November 14, 2022
173+
// 14-nov-2022
174+
// 14nov2022
175+
// ```
176+
177+
#[test]
178+
fn with_year() {
179+
let reference = Date {
180+
year: Some(2022),
181+
month: 11,
182+
day: 14,
183+
};
184+
185+
for mut s in [
186+
"2022-11-14",
187+
"2022 - 11 - 14",
188+
"22-11-14",
189+
"2022---11----14",
190+
"22(comment 1)-11(comment 2)-14",
191+
"11/14/2022",
192+
"11--/14--/2022",
193+
"11(comment 1)/(comment 2)14(comment 3)/(comment 4)2022",
194+
"11 / 14 / 2022",
195+
"11/14/22",
196+
"14 november 2022",
197+
"14 nov 2022",
198+
"november 14, 2022",
199+
"november 14 , 2022",
200+
"nov 14, 2022",
201+
"14-nov-2022",
202+
"14nov2022",
203+
"14nov 2022",
204+
] {
205+
let old_s = s.to_owned();
206+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
207+
}
208+
}
209+
210+
#[test]
211+
fn no_year() {
212+
let reference = Date {
213+
year: None,
214+
month: 11,
215+
day: 14,
216+
};
217+
for mut s in [
218+
"11/14",
219+
"14 november",
220+
"14 nov",
221+
"14(comment!)nov",
222+
"november 14",
223+
"november(comment!)14",
224+
"nov 14",
225+
"14-nov",
226+
"14nov",
227+
"14(comment????)nov",
228+
] {
229+
assert_eq!(parse(&mut s).unwrap(), reference);
230+
}
231+
}
232+
}

0 commit comments

Comments
 (0)