Skip to content

Commit 25f9e48

Browse files
authored
Merge pull request #205 from yuankunzhang/refactor-timestamp-parsing
refactor: replace floating-point seconds with precise integer representation
2 parents 14b8a94 + 5c686bc commit 25f9e48

File tree

6 files changed

+226
-58
lines changed

6 files changed

+226
-58
lines changed

src/items/builder.rs

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
use chrono::{DateTime, Datelike, FixedOffset, NaiveDate, TimeZone, Timelike};
55

6-
use super::{date, relative, time, timezone, weekday, year};
6+
use super::{date, epoch, relative, time, timezone, weekday, year};
77

88
/// The builder is used to construct a DateTime object from various components.
99
/// The parser creates a `DateTimeBuilder` object with the parsed components,
@@ -13,7 +13,7 @@ use super::{date, relative, time, timezone, weekday, year};
1313
#[derive(Debug, Default)]
1414
pub(crate) struct DateTimeBuilder {
1515
base: Option<DateTime<FixedOffset>>,
16-
timestamp: Option<f64>,
16+
timestamp: Option<epoch::Timestamp>,
1717
date: Option<date::Date>,
1818
time: Option<time::Time>,
1919
weekday: Option<weekday::Weekday>,
@@ -35,7 +35,7 @@ impl DateTimeBuilder {
3535

3636
/// Sets a timestamp value. Timestamp values are exclusive to other date/time
3737
/// items (date, time, weekday, timezone, relative adjustments).
38-
pub(super) fn set_timestamp(mut self, ts: f64) -> Result<Self, &'static str> {
38+
pub(super) fn set_timestamp(mut self, ts: epoch::Timestamp) -> Result<Self, &'static str> {
3939
if self.timestamp.is_some() {
4040
return Err("timestamp cannot appear more than once");
4141
} else if self.date.is_some()
@@ -148,15 +148,15 @@ impl DateTimeBuilder {
148148
self.set_time(time)
149149
}
150150

151-
fn build_from_timestamp(ts: f64, tz: &FixedOffset) -> Option<DateTime<FixedOffset>> {
152-
// TODO: How to make the fract -> nanosecond conversion more precise?
153-
// Maybe considering using the
154-
// [rust_decimal](https://crates.io/crates/rust_decimal) crate?
155-
match chrono::Utc.timestamp_opt(ts as i64, (ts.fract() * 10f64.powi(9)).round() as u32) {
151+
fn build_from_timestamp(
152+
ts: epoch::Timestamp,
153+
tz: &FixedOffset,
154+
) -> Option<DateTime<FixedOffset>> {
155+
match chrono::Utc.timestamp_opt(ts.second, ts.nanosecond) {
156156
chrono::MappedLocalTime::Single(t) => Some(t.with_timezone(tz)),
157157
chrono::MappedLocalTime::Ambiguous(earliest, _latest) => {
158-
// TODO: When there is a fold in the local time, which value
159-
// do we choose? For now, we use the earliest one.
158+
// When there is a fold in the local time, we use the earliest
159+
// one.
160160
Some(earliest.with_timezone(tz))
161161
}
162162
chrono::MappedLocalTime::None => None, // Invalid timestamp
@@ -210,6 +210,7 @@ impl DateTimeBuilder {
210210
hour,
211211
minute,
212212
second,
213+
nanosecond,
213214
ref offset,
214215
}) = self.time
215216
{
@@ -224,8 +225,8 @@ impl DateTimeBuilder {
224225
dt.day(),
225226
hour,
226227
minute,
227-
second as u32,
228-
(second.fract() * 10f64.powi(9)).round() as u32,
228+
second,
229+
nanosecond,
229230
offset,
230231
)?;
231232
}

src/items/combined.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ mod tests {
5555
time: Time {
5656
hour: 10,
5757
minute: 10,
58-
second: 55.0,
58+
second: 55,
59+
nanosecond: 0,
5960
offset: None,
6061
},
6162
});

src/items/epoch.rs

Lines changed: 125 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,35 +15,147 @@
1515
//! > ‘@1483228800’ represents 2017-01-01 00:00:00 UTC, and there is no way to
1616
//! > represent the intervening leap second 2016-12-31 23:59:60 UTC.
1717
18-
use winnow::{combinator::preceded, ModalResult, Parser};
18+
use winnow::{
19+
ascii::digit1,
20+
combinator::{opt, preceded},
21+
token::one_of,
22+
ModalResult, Parser,
23+
};
1924

20-
use super::primitive::{float, s};
25+
use super::primitive::{dec_uint, s};
2126

22-
/// Parse a timestamp in the form of `@1234567890`.
23-
pub fn parse(input: &mut &str) -> ModalResult<f64> {
24-
s(preceded("@", float)).parse_next(input)
27+
/// Represents a timestamp with nanosecond accuracy.
28+
///
29+
/// # Invariants
30+
///
31+
/// - `nanosecond` is always in the range of `0..1_000_000_000`.
32+
/// - Negative timestamps are represented by a negative `second` value and a
33+
/// positive `nanosecond` value.
34+
#[derive(Debug, PartialEq)]
35+
pub(crate) struct Timestamp {
36+
pub(crate) second: i64,
37+
pub(crate) nanosecond: u32,
38+
}
39+
40+
/// Parse a timestamp in the form of `1234567890` or `-1234567890.12345` or
41+
/// `1234567890,12345`.
42+
pub(crate) fn parse(input: &mut &str) -> ModalResult<Timestamp> {
43+
(s("@"), opt(s(one_of(['-', '+']))), sec_and_nsec)
44+
.verify_map(|(_, sign, (sec, nsec))| {
45+
let sec = i64::try_from(sec).ok()?;
46+
let (second, nanosecond) = match (sign, nsec) {
47+
(Some('-'), 0) => (-sec, 0),
48+
// Truncate towards minus infinity.
49+
(Some('-'), _) => ((-sec).checked_sub(1)?, 1_000_000_000 - nsec),
50+
_ => (sec, nsec),
51+
};
52+
Some(Timestamp { second, nanosecond })
53+
})
54+
.parse_next(input)
55+
}
56+
57+
/// Parse a second value in the form of `1234567890` or `1234567890.12345` or
58+
/// `1234567890,12345`.
59+
///
60+
/// The first part represents whole seconds. The optional second part represents
61+
/// fractional seconds, parsed as a nanosecond value from up to 9 digits
62+
/// (padded with zeros on the right if fewer digits are present). If the second
63+
/// part is omitted, it defaults to 0 nanoseconds.
64+
pub(super) fn sec_and_nsec(input: &mut &str) -> ModalResult<(u64, u32)> {
65+
(s(dec_uint), opt(preceded(one_of(['.', ',']), digit1)))
66+
.verify_map(|(sec, opt_nsec_str)| match opt_nsec_str {
67+
Some(nsec_str) if nsec_str.len() >= 9 => Some((sec, nsec_str[..9].parse().ok()?)),
68+
Some(nsec_str) => {
69+
let multiplier = 10_u32.pow(9 - nsec_str.len() as u32);
70+
Some((sec, nsec_str.parse::<u32>().ok()?.checked_mul(multiplier)?))
71+
}
72+
None => Some((sec, 0)),
73+
})
74+
.parse_next(input)
2575
}
2676

2777
#[cfg(test)]
2878
mod tests {
29-
use super::parse;
79+
use super::*;
80+
81+
#[test]
82+
fn sec_and_nsec_test() {
83+
let mut input = "1234567890";
84+
assert_eq!(sec_and_nsec(&mut input).unwrap(), (1234567890, 0));
3085

31-
fn float_eq(a: f64, b: f64) -> bool {
32-
(a - b).abs() < f64::EPSILON
86+
let mut input = "1234567890.12345";
87+
assert_eq!(sec_and_nsec(&mut input).unwrap(), (1234567890, 123450000));
88+
89+
let mut input = "1234567890,12345";
90+
assert_eq!(sec_and_nsec(&mut input).unwrap(), (1234567890, 123450000));
91+
92+
let mut input = "1234567890.1234567890123";
93+
assert_eq!(sec_and_nsec(&mut input).unwrap(), (1234567890, 123456789));
3394
}
3495

3596
#[test]
36-
fn float() {
97+
fn timestamp() {
3798
let mut input = "@1234567890";
38-
assert!(float_eq(parse(&mut input).unwrap(), 1234567890.0));
99+
assert_eq!(
100+
parse(&mut input).unwrap(),
101+
Timestamp {
102+
second: 1234567890,
103+
nanosecond: 0,
104+
}
105+
);
106+
107+
let mut input = "@ 1234567890";
108+
assert_eq!(
109+
parse(&mut input).unwrap(),
110+
Timestamp {
111+
second: 1234567890,
112+
nanosecond: 0,
113+
}
114+
);
115+
116+
let mut input = "@ -1234567890";
117+
assert_eq!(
118+
parse(&mut input).unwrap(),
119+
Timestamp {
120+
second: -1234567890,
121+
nanosecond: 0,
122+
}
123+
);
124+
125+
let mut input = "@ - 1234567890";
126+
assert_eq!(
127+
parse(&mut input).unwrap(),
128+
Timestamp {
129+
second: -1234567890,
130+
nanosecond: 0,
131+
}
132+
);
39133

40134
let mut input = "@1234567890.12345";
41-
assert!(float_eq(parse(&mut input).unwrap(), 1234567890.12345));
135+
assert_eq!(
136+
parse(&mut input).unwrap(),
137+
Timestamp {
138+
second: 1234567890,
139+
nanosecond: 123450000,
140+
}
141+
);
42142

43143
let mut input = "@1234567890,12345";
44-
assert!(float_eq(parse(&mut input).unwrap(), 1234567890.12345));
144+
assert_eq!(
145+
parse(&mut input).unwrap(),
146+
Timestamp {
147+
second: 1234567890,
148+
nanosecond: 123450000,
149+
}
150+
);
45151

46152
let mut input = "@-1234567890.12345";
47-
assert_eq!(parse(&mut input).unwrap(), -1234567890.12345);
153+
assert_eq!(
154+
parse(&mut input).unwrap(),
155+
Timestamp {
156+
second: -1234567891,
157+
nanosecond: 876550000,
158+
}
159+
);
48160
}
49161
}

src/items/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ use crate::ParseDateTimeError;
5858

5959
#[derive(PartialEq, Debug)]
6060
pub(crate) enum Item {
61-
Timestamp(f64),
61+
Timestamp(epoch::Timestamp),
6262
DateTime(combined::DateTime),
6363
Date(date::Date),
6464
Time(time::Time),

src/items/primitive.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33

44
//! Primitive combinators.
55
6+
use std::str::FromStr;
7+
68
use winnow::{
7-
ascii::{digit1, multispace0},
9+
ascii::{digit1, multispace0, Uint},
810
combinator::{alt, delimited, not, opt, peek, preceded, repeat, separated},
911
error::{ContextError, ParserError, StrContext, StrContextValue},
1012
stream::AsChar,
@@ -100,8 +102,9 @@ where
100102
///
101103
/// See the rationale for `dec_int` for why we don't use
102104
/// `winnow::ascii::dec_uint`.
103-
pub(super) fn dec_uint<'a, E>(input: &mut &'a str) -> winnow::Result<u32, E>
105+
pub(super) fn dec_uint<'a, O, E>(input: &mut &'a str) -> winnow::Result<O, E>
104106
where
107+
O: Uint + FromStr,
105108
E: ParserError<&'a str>,
106109
{
107110
digit1

0 commit comments

Comments
 (0)