Skip to content

Commit 1fdbfa1

Browse files
committed
mysql-time: Replace nom-based time parser
Nom is not our favorite framework for parsing, and it's overkill for parsing time strings, which are not complex. Change-Id: Id91128ca1dd877f015e232397cfa775aa054a69d Reviewed-on: https://gerrit.readyset.name/c/readyset/+/12026 Tested-by: Buildkite CI Reviewed-by: Jason Brown <jason.b@readyset.io>
1 parent 684eeda commit 1fdbfa1

File tree

3 files changed

+123
-108
lines changed

3 files changed

+123
-108
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

mysql-time/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ authors = ["Readyset Technology, Inc. <info@readyset.io>"]
66
edition = "2024"
77

88
[dependencies]
9-
nom = { workspace = true }
109
serde = { workspace = true, features = ["rc"] }
1110
readyset-util = { path = "../readyset-util" }
1211
proptest = { workspace = true }

mysql-time/src/lib.rs

Lines changed: 123 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,8 @@ impl MySqlTime {
175175
/// assert!(MySqlTime::from_bytes("60".as_bytes()).is_err());
176176
/// ```
177177
pub fn from_bytes(bytes: &[u8]) -> Result<MySqlTime, ConvertError> {
178-
let (positive, hour, minutes, seconds, microseconds) = parse::h_m_s_us(bytes)
179-
.map(|res| res.1)
180-
.map_err(|_| ConvertError::ParseError)?;
178+
let (positive, hour, minutes, seconds, microseconds) =
179+
parse::h_m_s_us(bytes).map_err(|_| ConvertError::ParseError)?;
181180
if minutes > 59 {
182181
return Err(ConvertError::OutOfBounds(
183182
"Minutes can't be greater than 59".to_owned(),
@@ -370,124 +369,114 @@ impl From<MySqlTime> for Duration {
370369
}
371370

372371
mod parse {
373-
use nom::IResult;
374-
use nom::branch::alt;
375-
use nom::bytes::complete::take_while_m_n;
376-
use nom::character::complete::{char, digit1};
377-
use nom::character::{self, is_digit};
378-
use nom::combinator::{complete, eof, map, map_parser, opt};
379-
use nom::multi::{fold_many0, many0};
380-
use nom::sequence::{preceded, terminated, tuple};
372+
use std::str::{self, FromStr};
381373

382-
use super::*;
383-
384-
fn microseconds_padding(i: &[u8]) -> IResult<&[u8], u32> {
385-
let num_digits = i.len();
386-
map(character::complete::u32, move |number| {
387-
number * (10u32.pow(6 - num_digits as u32))
388-
})(i)
374+
fn parse_ascii<T>(s: &[u8]) -> Result<T, ()>
375+
where
376+
T: FromStr,
377+
{
378+
str::from_utf8(s).map_err(|_| ())?.parse().map_err(|_| ())
389379
}
390380

391-
fn microseconds(i: &[u8]) -> IResult<&[u8], u32> {
392-
preceded(
393-
complete(char('.')),
394-
map_parser(take_while_m_n(1, 6, is_digit), microseconds_padding),
395-
)(i)
396-
}
397-
398-
fn seconds(i: &[u8]) -> IResult<&[u8], u8> {
399-
preceded(
400-
complete(char(':')),
401-
map_parser(take_while_m_n(1, 2, is_digit), character::complete::u8),
402-
)(i)
381+
fn parse_microseconds(s: &[u8]) -> Result<u32, ()> {
382+
if s.is_empty() || s.len() > 6 || !s.iter().all(|b| b.is_ascii_digit()) {
383+
return Err(());
384+
}
385+
let n = parse_ascii::<u32>(s)?;
386+
Ok(n * 10u32.pow(6 - s.len() as u32))
403387
}
404388

405-
/// Creates a number from an array of digits.
406-
/// Each position of the array must be a number from 0-9.
407-
fn to_number(digits: &[u8]) -> u64 {
408-
// These u8 are actual numbers, NOT a byte representing a char. Thus, it is
409-
// safe to perform arithmetic operations on them to yield a number.
410-
let mut res = 0u64;
411-
for &n in digits {
412-
res = res * 10 + n as u64;
389+
fn parse_u8_2digits(s: &[u8]) -> Result<u8, ()> {
390+
if s.is_empty() || s.len() > 2 || !s.iter().all(|b| b.is_ascii_digit()) {
391+
return Err(());
413392
}
414-
res
393+
parse_ascii(s)
415394
}
416395

417-
fn one_digit(i: &[u8]) -> IResult<&[u8], u8> {
418-
map_parser(take_while_m_n(1, 1, is_digit), character::complete::u8)(i)
396+
fn split_dot(s: &[u8]) -> (&[u8], Option<&[u8]>) {
397+
let mut it = s.splitn(2, |&b| b == b'.');
398+
let before = it.next().unwrap();
399+
(before, it.next())
419400
}
420401

421-
fn h_m_s_us_no_colons(i: &[u8]) -> IResult<&[u8], (bool, u16, u8, u8, u32)> {
422-
map(
423-
terminated(
424-
tuple((
425-
opt(char('-')),
426-
fold_many0(one_digit, Vec::new, |mut acc: Vec<u8>, num: u8| {
427-
acc.push(num);
428-
acc
429-
}),
430-
opt(microseconds),
431-
)),
432-
eof,
433-
),
434-
|(sign, numbers, microseconds)| {
435-
let digits = numbers.len();
436-
let (hour, minutes, seconds) = if digits > 4 {
437-
(
438-
to_number(&numbers[0..digits - 4]),
439-
to_number(&numbers[digits - 4..digits - 2]),
440-
to_number(&numbers[digits - 2..digits]),
441-
)
442-
} else if digits > 2 {
443-
(
444-
0,
445-
to_number(&numbers[0..digits - 2]),
446-
to_number(&numbers[digits - 2..digits]),
447-
)
448-
} else {
449-
(0, 0, to_number(&numbers[0..digits]))
402+
fn parse_colon(positive: bool, input: &[u8]) -> Result<(bool, u16, u8, u8, u32), ()> {
403+
// H...H:MM[:SS[.us]]
404+
let c1 = input.iter().position(|&b| b == b':').ok_or(())?;
405+
let hour_bytes = &input[..c1];
406+
if hour_bytes.is_empty() || !hour_bytes.iter().all(|b| b.is_ascii_digit()) {
407+
return Err(());
408+
}
409+
let hour = parse_ascii(hour_bytes).unwrap_or(u16::MAX);
410+
let rest = &input[c1 + 1..];
411+
412+
match rest.iter().position(|&b| b == b':') {
413+
Some(c2) => {
414+
// rest = MM:SS[.us]
415+
let minutes = parse_u8_2digits(&rest[..c2])?;
416+
let (ss_bytes, us_bytes) = split_dot(&rest[c2 + 1..]);
417+
let seconds = parse_u8_2digits(ss_bytes)?;
418+
let microseconds = match us_bytes {
419+
Some(u) => parse_microseconds(u)?,
420+
None => 0,
450421
};
451-
(
452-
sign.is_none(),
453-
hour.try_into().unwrap_or(u16::MAX),
454-
minutes.try_into().unwrap_or(u8::MAX),
455-
seconds.try_into().unwrap_or(u8::MAX),
456-
microseconds.unwrap_or(0),
457-
)
458-
},
459-
)(i)
422+
Ok((positive, hour, minutes, seconds, microseconds))
423+
}
424+
None => {
425+
// rest = MM[.us], no seconds component
426+
let (mm_bytes, us_bytes) = split_dot(rest);
427+
let minutes = parse_u8_2digits(mm_bytes)?;
428+
let microseconds = match us_bytes {
429+
Some(u) => parse_microseconds(u)?,
430+
None => 0,
431+
};
432+
Ok((positive, hour, minutes, 0, microseconds))
433+
}
434+
}
460435
}
461436

462-
fn h_m_s_us_colons(i: &[u8]) -> IResult<&[u8], (bool, u16, u8, u8, u32)> {
463-
map(
464-
terminated(
465-
tuple((
466-
opt(char('-')),
467-
terminated(map_parser(digit1, character::complete::u32), char(':')),
468-
map_parser(take_while_m_n(1, 2, is_digit), character::complete::u8),
469-
opt(seconds),
470-
opt(microseconds),
471-
)),
472-
eof,
473-
),
474-
|(sign, hour, minutes, seconds, microseconds)| {
475-
(
476-
sign.is_none(),
477-
hour.try_into().unwrap_or(u16::MAX),
478-
minutes,
479-
seconds.unwrap_or(0),
480-
microseconds.unwrap_or(0),
481-
)
482-
},
483-
)(i)
437+
fn parse_no_colon(positive: bool, input: &[u8]) -> Result<(bool, u16, u8, u8, u32), ()> {
438+
let (digit_part, us_bytes) = split_dot(input);
439+
if digit_part.is_empty() || !digit_part.iter().all(|b| b.is_ascii_digit()) {
440+
return Err(());
441+
}
442+
let parse = |s| parse_ascii(s).unwrap_or(u64::MAX);
443+
let n = digit_part.len();
444+
let (hour, minutes, seconds) = if n > 4 {
445+
(
446+
parse(&digit_part[..n - 4]),
447+
parse(&digit_part[n - 4..n - 2]),
448+
parse(&digit_part[n - 2..]),
449+
)
450+
} else if n > 2 {
451+
(0, parse(&digit_part[..n - 2]), parse(&digit_part[n - 2..]))
452+
} else {
453+
(0, 0, parse(digit_part))
454+
};
455+
let microseconds = match us_bytes {
456+
Some(u) => parse_microseconds(u)?,
457+
None => 0,
458+
};
459+
Ok((
460+
positive,
461+
hour.try_into().unwrap_or(u16::MAX),
462+
minutes.try_into().unwrap_or(u8::MAX),
463+
seconds.try_into().unwrap_or(u8::MAX),
464+
microseconds,
465+
))
484466
}
485467

486-
pub fn h_m_s_us(i: &[u8]) -> IResult<&[u8], (bool, u16, u8, u8, u32)> {
487-
preceded(
488-
many0(char(' ')),
489-
alt((complete(h_m_s_us_colons), complete(h_m_s_us_no_colons))),
490-
)(i)
468+
pub fn h_m_s_us(input: &[u8]) -> Result<(bool, u16, u8, u8, u32), ()> {
469+
let input = input.trim_ascii_start();
470+
let (positive, rest) = if input.first() == Some(&b'-') {
471+
(false, &input[1..])
472+
} else {
473+
(true, input)
474+
};
475+
if rest.contains(&b':') {
476+
parse_colon(positive, rest)
477+
} else {
478+
parse_no_colon(positive, rest)
479+
}
491480
}
492481
}
493482

@@ -1165,6 +1154,34 @@ mod tests {
11651154
let result = MySqlTime::from_str("banana");
11661155
assert_eq!(result, Err(ConvertError::ParseError));
11671156
}
1157+
1158+
#[test]
1159+
fn from_str_colon_errors() {
1160+
// minutes out of range
1161+
MySqlTime::from_str("1:60").unwrap_err();
1162+
MySqlTime::from_str("1:60:00").unwrap_err();
1163+
// seconds out of range
1164+
MySqlTime::from_str("1:00:60").unwrap_err();
1165+
// too many colons
1166+
MySqlTime::from_str("1:2:3:4").unwrap_err();
1167+
// empty components
1168+
MySqlTime::from_str(":1:2").unwrap_err();
1169+
MySqlTime::from_str("1::2").unwrap_err();
1170+
}
1171+
1172+
#[test]
1173+
fn from_str_leading_spaces() {
1174+
let mysql_time = MySqlTime::from_str(" 123:45:59").unwrap();
1175+
assert_time!(mysql_time, true, 123, 45, 59, 0);
1176+
let mysql_time = MySqlTime::from_str(" 1234559").unwrap();
1177+
assert_time!(mysql_time, true, 123, 45, 59, 0);
1178+
}
1179+
1180+
#[test]
1181+
fn from_str_too_many_fractional_digits() {
1182+
MySqlTime::from_str("1:2:3.1234567").unwrap_err();
1183+
MySqlTime::from_str("1234559.1234567").unwrap_err();
1184+
}
11681185
}
11691186

11701187
mod try_from {

0 commit comments

Comments
 (0)