Skip to content

Commit 7f10883

Browse files
committed
feat: make duration parser lenient by allowing duplicate and out of order units
1 parent d903d06 commit 7f10883

File tree

3 files changed

+63
-132
lines changed

3 files changed

+63
-132
lines changed

docs/docs/core/data_types.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ This is the list of all basic types supported by CocoIndex:
3535
| Time | | `datetime.time` | `datetime.time` |
3636
| LocalDatetime | Date and time without timezone | `cocoindex.LocalDateTime` | `datetime.datetime` |
3737
| OffsetDatetime | Date and time with a timezone offset | `cocoindex.OffsetDateTime` | `datetime.datetime` |
38-
| TimeDelta | A duration of time | `datetime.TimeDelta` | `datetime.timedelta` |
38+
| TimeDelta | A duration of time | `datetime.timedelta` | `datetime.timedelta` |
3939
| Vector[*T*, *Dim*?] | *T* must be basic type. *Dim* is a positive integer and optional. |`cocoindex.Vector[T]` or `cocoindex.Vector[T, Dim]` | `list[T]` |
4040
| Json | | `cocoindex.Json` | Any data convertible to JSON by `json` package |
4141

python/cocoindex/typing.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ def __init__(self, key: str, value: Any):
2929
Json = Annotated[Any, TypeKind('Json')]
3030
LocalDateTime = Annotated[datetime.datetime, TypeKind('LocalDateTime')]
3131
OffsetDateTime = Annotated[datetime.datetime, TypeKind('OffsetDateTime')]
32-
TimeDelta = Annotated[datetime.timedelta, TypeKind('TimeDelta')]
3332

3433
if TYPE_CHECKING:
3534
T_co = TypeVar('T_co', covariant=True)

src/base/duration.rs

Lines changed: 62 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use anyhow::{Result, anyhow};
1+
use anyhow::{Result, anyhow, bail};
22
use chrono::Duration;
33

44
/// Parses a string of number-unit pairs into a vector of (number, unit),
@@ -30,7 +30,7 @@ fn parse_components(
3030
result.push((num, unit));
3131
iter.next();
3232
} else {
33-
return Err(anyhow!("Invalid unit '{}' in: {}", unit, original_input));
33+
bail!("Invalid unit '{}' in: {}", unit, original_input);
3434
}
3535
} else {
3636
return Err(anyhow!(
@@ -43,22 +43,6 @@ fn parse_components(
4343
Ok(result)
4444
}
4545

46-
/// Checks if the sequence of units follows the specified order.
47-
fn check_order(units: &[char], order: &[char], input: &str) -> Result<()> {
48-
let mut last_pos = -1;
49-
for &unit in units {
50-
if let Some(pos) = order.iter().position(|&u| u == unit) {
51-
if pos as i32 <= last_pos {
52-
return Err(anyhow!("Units out of order in: {}", input));
53-
}
54-
last_pos = pos as i32;
55-
} else {
56-
return Err(anyhow!("Invalid unit '{}' in: {}", unit, input));
57-
}
58-
}
59-
Ok(())
60-
}
61-
6246
/// Parses an ISO 8601 duration string into a `chrono::Duration`.
6347
fn parse_iso8601_duration(s: &str, original_input: &str) -> Result<Duration> {
6448
let (is_negative, s_after_sign) = if s.starts_with('-') {
@@ -98,38 +82,6 @@ fn parse_iso8601_duration(s: &str, original_input: &str) -> Result<Duration> {
9882
vec![]
9983
};
10084

101-
// Duplicate units not allowed in date part
102-
let mut seen_date_units = std::collections::HashSet::new();
103-
for &(_, unit) in &date_components {
104-
if !seen_date_units.insert(unit) {
105-
return Err(anyhow!(
106-
"Duplicate '{}' in date part: {}",
107-
unit,
108-
original_input
109-
));
110-
}
111-
}
112-
113-
// Duplicate units not allowed in time part
114-
let mut seen_time_units = std::collections::HashSet::new();
115-
for &(_, unit) in &time_components {
116-
if !seen_time_units.insert(unit) {
117-
return Err(anyhow!(
118-
"Duplicate '{}' in time part: {}",
119-
unit,
120-
original_input
121-
));
122-
}
123-
}
124-
125-
// Check date units are in order
126-
let date_units: Vec<char> = date_components.iter().map(|&(_, u)| u).collect();
127-
check_order(&date_units, &['Y', 'M', 'W', 'D'], original_input)?;
128-
129-
// Check time units are in order
130-
let time_units: Vec<char> = time_components.iter().map(|&(_, u)| u).collect();
131-
check_order(&time_units, &['H', 'M', 'S'], original_input)?;
132-
13385
if date_components.is_empty() && time_components.is_empty() {
13486
return Err(anyhow!("No components in duration: {}", original_input));
13587
}
@@ -175,42 +127,26 @@ fn parse_human_readable_duration(s: &str, original_input: &str) -> Result<Durati
175127
));
176128
}
177129

178-
let components: Vec<(i64, &str)> = parts
130+
let durations: Result<Vec<Duration>> = parts
179131
.chunks(2)
180132
.map(|chunk| {
181-
let num_str = chunk[0];
182-
let num = num_str
133+
let num: i64 = chunk[0]
183134
.parse()
184-
.map_err(|_| anyhow!("Invalid number '{}' in: {}", num_str, original_input))?;
185-
Ok((num, chunk[1]))
186-
})
187-
.collect::<Result<Vec<_>>>()?;
188-
189-
let units: Vec<&str> = components.iter().map(|&(_, u)| u).collect();
190-
check_order(
191-
&units
192-
.iter()
193-
.map(|u| u.to_lowercase().chars().next().unwrap())
194-
.collect::<Vec<_>>(),
195-
&['d', 'h', 'm', 's', 'i', 'c'], // Abbreviation of day, hour, minute, second, millisecond, microsecond
196-
original_input,
197-
)?;
198-
199-
let total = components
200-
.iter()
201-
.fold(Duration::zero(), |acc, &(num, unit)| {
202-
match unit.to_lowercase().as_str() {
203-
"day" | "days" => acc + Duration::days(num),
204-
"hour" | "hours" => acc + Duration::hours(num),
205-
"minute" | "minutes" => acc + Duration::minutes(num),
206-
"second" | "seconds" => acc + Duration::seconds(num),
207-
"millisecond" | "milliseconds" => acc + Duration::milliseconds(num),
208-
"microsecond" | "microseconds" => acc + Duration::microseconds(num),
209-
_ => unreachable!("Invalid unit should be caught by prior validation"),
135+
.map_err(|_| anyhow!("Invalid number '{}' in: {}", chunk[0], original_input))?;
136+
137+
match chunk[1].to_lowercase().as_str() {
138+
"day" | "days" => Ok(Duration::days(num)),
139+
"hour" | "hours" => Ok(Duration::hours(num)),
140+
"minute" | "minutes" => Ok(Duration::minutes(num)),
141+
"second" | "seconds" => Ok(Duration::seconds(num)),
142+
"millisecond" | "milliseconds" => Ok(Duration::milliseconds(num)),
143+
"microsecond" | "microseconds" => Ok(Duration::microseconds(num)),
144+
_ => bail!("Invalid unit '{}' in: {}", chunk[1], original_input),
210145
}
211-
});
146+
})
147+
.collect();
212148

213-
Ok(total)
149+
durations.map(|durs| durs.into_iter().sum())
214150
}
215151

216152
/// Parses a duration string into a `chrono::Duration`, trying ISO 8601 first, then human-readable format.
@@ -230,13 +166,10 @@ pub fn parse_duration(s: &str) -> Result<Duration> {
230166
}
231167

232168
if is_likely_iso8601(s) {
233-
// Try ISO 8601 format without fallbacks
234-
return parse_iso8601_duration(s, original_input);
169+
parse_iso8601_duration(s, original_input)
170+
} else {
171+
parse_human_readable_duration(s, original_input)
235172
}
236-
237-
// Allow maybe ISO-ish formats, so still try iso8601 first
238-
parse_iso8601_duration(s, original_input)
239-
.or_else(|_| parse_human_readable_duration(s, original_input))
240173
}
241174

242175
#[cfg(test)]
@@ -385,39 +318,6 @@ mod tests {
385318
);
386319
}
387320

388-
#[test]
389-
fn test_iso_out_of_order_unit() {
390-
check_err_contains(
391-
parse_duration("P1H2D"),
392-
"Invalid unit 'H' in: P1H2D", // Why not units out of order
393-
"\"P1H2D\"",
394-
);
395-
check_err_contains(
396-
parse_duration("PT2S1H"),
397-
"Units out of order in: PT2S1H",
398-
"\"PT2S1H\"",
399-
);
400-
check_err_contains(
401-
parse_duration("P1W1Y"),
402-
"Units out of order in: P1W1Y",
403-
"\"P1W1Y\"",
404-
);
405-
}
406-
407-
#[test]
408-
fn test_iso_duplicated_unit() {
409-
check_err_contains(
410-
parse_duration("P1D1D"),
411-
"Duplicate 'D' in date part: P1D1D",
412-
"\"P1D1D\"",
413-
);
414-
check_err_contains(
415-
parse_duration("PT1H1H"),
416-
"Duplicate 'H' in time part: PT1H1H",
417-
"\"PT1H1H\"",
418-
);
419-
}
420-
421321
#[test]
422322
fn test_iso_negative_number_after_p() {
423323
check_err_contains(
@@ -476,6 +376,33 @@ mod tests {
476376
);
477377
}
478378

379+
#[test]
380+
fn test_iso_duplicated_unit() {
381+
check_ok(parse_duration("P1D1D"), Duration::days(2), "\"P1D1D\"");
382+
check_ok(parse_duration("PT1H1H"), Duration::hours(2), "\"PT1H1H\"");
383+
}
384+
385+
#[test]
386+
fn test_iso_out_of_order_unit() {
387+
check_ok(
388+
parse_duration("P1W1Y"),
389+
Duration::days(365 + 7),
390+
"\"P1W1Y\"",
391+
);
392+
check_ok(
393+
parse_duration("PT2S1H"),
394+
Duration::hours(1) + Duration::seconds(2),
395+
"\"PT2S1H\"",
396+
);
397+
check_ok(parse_duration("P3M"), Duration::days(90), "\"PT2S1H\"");
398+
check_ok(parse_duration("PT3M"), Duration::minutes(3), "\"PT2S1H\"");
399+
check_err_contains(
400+
parse_duration("P1H2D"),
401+
"Invalid unit 'H' in: P1H2D", // Time part without 'T' is invalid
402+
"\"P1H2D\"",
403+
);
404+
}
405+
479406
#[test]
480407
fn test_iso_negative_duration_p1d() {
481408
check_ok(parse_duration("-P1D"), -Duration::days(1), "\"-P1D\"");
@@ -542,15 +469,6 @@ mod tests {
542469
);
543470
}
544471

545-
#[test]
546-
fn test_human_out_of_order() {
547-
check_err_contains(
548-
parse_duration("1 second 2 hours"),
549-
"Units out of order in: 1 second 2 hours",
550-
"\"1 second 2 hours\"",
551-
);
552-
}
553-
554472
#[test]
555473
fn test_human_float_number_fail() {
556474
check_err_contains(
@@ -573,7 +491,7 @@ mod tests {
573491
fn test_human_unknown_unit() {
574492
check_err_contains(
575493
parse_duration("1 year"),
576-
"Invalid unit 'y' in: 1 year",
494+
"Invalid unit 'year' in: 1 year",
577495
"\"1 year\"",
578496
);
579497
}
@@ -681,6 +599,20 @@ mod tests {
681599
);
682600
}
683601

602+
#[test]
603+
fn test_human_out_of_order() {
604+
check_ok(
605+
parse_duration("1 second 2 hours"),
606+
Duration::hours(2) + Duration::seconds(1),
607+
"\"1 second 2 hours\"",
608+
);
609+
check_ok(
610+
parse_duration("7 minutes 6 hours 5 days"),
611+
Duration::days(5) + Duration::hours(6) + Duration::minutes(7),
612+
"\"7 minutes 6 hours 5 days\"",
613+
)
614+
}
615+
684616
#[test]
685617
fn test_human_zero_duration_seconds() {
686618
check_ok(

0 commit comments

Comments
 (0)