Skip to content

Commit a1b8757

Browse files
authored
Implement val_temporal_unit for deciding how datetimes and dates timestamps get validated. (#1751)
1 parent 4c4ac6e commit a1b8757

File tree

11 files changed

+241
-68
lines changed

11 files changed

+241
-68
lines changed

src/input/datetime.rs

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use pyo3::pyclass::CompareOp;
66
use pyo3::types::PyTuple;
77
use pyo3::types::{PyDate, PyDateTime, PyDelta, PyDeltaAccess, PyDict, PyTime, PyTzInfo};
88
use pyo3::IntoPyObjectExt;
9+
use speedate::DateConfig;
910
use speedate::{
1011
Date, DateTime, DateTimeConfig, Duration, MicrosecondsPrecisionOverflowBehavior, ParseError, Time, TimeConfig,
1112
};
@@ -21,6 +22,7 @@ use super::Input;
2122
use crate::errors::ToErrorValue;
2223
use crate::errors::{ErrorType, ValError, ValResult};
2324
use crate::tools::py_err;
25+
use crate::validators::TemporalUnitMode;
2426

2527
#[cfg_attr(debug_assertions, derive(Debug))]
2628
pub enum EitherDate<'py> {
@@ -411,8 +413,12 @@ impl<'py> EitherDateTime<'py> {
411413
}
412414
}
413415

414-
pub fn bytes_as_date<'py>(input: &(impl Input<'py> + ?Sized), bytes: &[u8]) -> ValResult<EitherDate<'py>> {
415-
match Date::parse_bytes(bytes) {
416+
pub fn bytes_as_date<'py>(
417+
input: &(impl Input<'py> + ?Sized),
418+
bytes: &[u8],
419+
mode: TemporalUnitMode,
420+
) -> ValResult<EitherDate<'py>> {
421+
match Date::parse_bytes_with_config(bytes, &DateConfig::builder().timestamp_unit(mode.into()).build()) {
416422
Ok(date) => Ok(date.into()),
417423
Err(err) => Err(ValError::new(
418424
ErrorType::DateParsing {
@@ -451,6 +457,7 @@ pub fn bytes_as_datetime<'py>(
451457
input: &(impl Input<'py> + ?Sized),
452458
bytes: &[u8],
453459
microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior,
460+
mode: TemporalUnitMode,
454461
) -> ValResult<EitherDateTime<'py>> {
455462
match DateTime::parse_bytes_with_config(
456463
bytes,
@@ -459,7 +466,7 @@ pub fn bytes_as_datetime<'py>(
459466
microseconds_precision_overflow_behavior: microseconds_overflow_behavior,
460467
unix_timestamp_offset: Some(0),
461468
},
462-
..Default::default()
469+
timestamp_unit: mode.into(),
463470
},
464471
) {
465472
Ok(dt) => Ok(dt.into()),
@@ -477,6 +484,7 @@ pub fn int_as_datetime<'py>(
477484
input: &(impl Input<'py> + ?Sized),
478485
timestamp: i64,
479486
timestamp_microseconds: u32,
487+
mode: TemporalUnitMode,
480488
) -> ValResult<EitherDateTime<'py>> {
481489
match DateTime::from_timestamp_with_config(
482490
timestamp,
@@ -486,7 +494,7 @@ pub fn int_as_datetime<'py>(
486494
unix_timestamp_offset: Some(0),
487495
..Default::default()
488496
},
489-
..Default::default()
497+
timestamp_unit: mode.into(),
490498
},
491499
) {
492500
Ok(dt) => Ok(dt.into()),
@@ -514,12 +522,31 @@ macro_rules! nan_check {
514522
};
515523
}
516524

517-
pub fn float_as_datetime<'py>(input: &(impl Input<'py> + ?Sized), timestamp: f64) -> ValResult<EitherDateTime<'py>> {
525+
pub fn float_as_datetime<'py>(
526+
input: &(impl Input<'py> + ?Sized),
527+
timestamp: f64,
528+
mode: TemporalUnitMode,
529+
) -> ValResult<EitherDateTime<'py>> {
518530
nan_check!(input, timestamp, DatetimeParsing);
519-
let microseconds = timestamp.fract().abs() * 1_000_000.0;
520-
// checking for extra digits in microseconds is unreliable with large floats,
521-
// so we just round to the nearest microsecond
522-
int_as_datetime(input, timestamp.floor() as i64, microseconds.round() as u32)
531+
match DateTime::from_float_with_config(
532+
timestamp,
533+
&DateTimeConfig {
534+
time_config: TimeConfig {
535+
unix_timestamp_offset: Some(0),
536+
..Default::default()
537+
},
538+
timestamp_unit: mode.into(),
539+
},
540+
) {
541+
Ok(dt) => Ok(dt.into()),
542+
Err(err) => Err(ValError::new(
543+
ErrorType::DatetimeParsing {
544+
error: Cow::Borrowed(err.get_documentation().unwrap_or_default()),
545+
context: None,
546+
},
547+
input,
548+
)),
549+
}
523550
}
524551

525552
pub fn date_as_datetime<'py>(date: &Bound<'py, PyDate>) -> PyResult<EitherDateTime<'py>> {

src/input/input_abstract.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use pyo3::{intern, prelude::*, IntoPyObjectExt};
88
use crate::errors::{ErrorTypeDefaults, InputValue, LocItem, ValError, ValResult};
99
use crate::lookup_key::{LookupKey, LookupPath};
1010
use crate::tools::py_err;
11-
use crate::validators::ValBytesMode;
11+
use crate::validators::{TemporalUnitMode, ValBytesMode};
1212

1313
use super::datetime::{EitherDate, EitherDateTime, EitherTime, EitherTimedelta};
1414
use super::return_enums::{EitherBytes, EitherComplex, EitherInt, EitherString};
@@ -158,7 +158,7 @@ pub trait Input<'py>: fmt::Debug {
158158

159159
fn validate_iter(&self) -> ValResult<GenericIterator<'static>>;
160160

161-
fn validate_date(&self, strict: bool) -> ValMatch<EitherDate<'py>>;
161+
fn validate_date(&self, strict: bool, mode: TemporalUnitMode) -> ValMatch<EitherDate<'py>>;
162162

163163
fn validate_time(
164164
&self,
@@ -170,6 +170,7 @@ pub trait Input<'py>: fmt::Debug {
170170
&self,
171171
strict: bool,
172172
microseconds_overflow_behavior: speedate::MicrosecondsPrecisionOverflowBehavior,
173+
mode: TemporalUnitMode,
173174
) -> ValMatch<EitherDateTime<'py>>;
174175

175176
fn validate_timedelta(

src/input/input_json.rs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use crate::input::return_enums::EitherComplex;
1212
use crate::lookup_key::{LookupKey, LookupPath};
1313
use crate::validators::complex::string_to_complex;
1414
use crate::validators::decimal::create_decimal;
15-
use crate::validators::ValBytesMode;
15+
use crate::validators::{TemporalUnitMode, ValBytesMode};
1616

1717
use super::datetime::{
1818
bytes_as_date, bytes_as_datetime, bytes_as_time, bytes_as_timedelta, float_as_datetime, float_as_duration,
@@ -277,9 +277,9 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> {
277277
}
278278
}
279279

280-
fn validate_date(&self, _strict: bool) -> ValResult<ValidationMatch<EitherDate<'py>>> {
280+
fn validate_date(&self, _strict: bool, mode: TemporalUnitMode) -> ValResult<ValidationMatch<EitherDate<'py>>> {
281281
match self {
282-
JsonValue::Str(v) => bytes_as_date(self, v.as_bytes()).map(ValidationMatch::strict),
282+
JsonValue::Str(v) => bytes_as_date(self, v.as_bytes(), mode).map(ValidationMatch::strict),
283283
_ => Err(ValError::new(ErrorTypeDefaults::DateType, self)),
284284
}
285285
}
@@ -313,13 +313,14 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> {
313313
&self,
314314
strict: bool,
315315
microseconds_overflow_behavior: speedate::MicrosecondsPrecisionOverflowBehavior,
316+
mode: TemporalUnitMode,
316317
) -> ValResult<ValidationMatch<EitherDateTime<'py>>> {
317318
match self {
318319
JsonValue::Str(v) => {
319-
bytes_as_datetime(self, v.as_bytes(), microseconds_overflow_behavior).map(ValidationMatch::strict)
320+
bytes_as_datetime(self, v.as_bytes(), microseconds_overflow_behavior, mode).map(ValidationMatch::strict)
320321
}
321-
JsonValue::Int(v) if !strict => int_as_datetime(self, *v, 0).map(ValidationMatch::lax),
322-
JsonValue::Float(v) if !strict => float_as_datetime(self, *v).map(ValidationMatch::lax),
322+
JsonValue::Int(v) if !strict => int_as_datetime(self, *v, 0, mode).map(ValidationMatch::lax),
323+
JsonValue::Float(v) if !strict => float_as_datetime(self, *v, mode).map(ValidationMatch::lax),
323324
_ => Err(ValError::new(ErrorTypeDefaults::DatetimeType, self)),
324325
}
325326
}
@@ -485,8 +486,8 @@ impl<'py> Input<'py> for str {
485486
Ok(string_to_vec(self).into())
486487
}
487488

488-
fn validate_date(&self, _strict: bool) -> ValResult<ValidationMatch<EitherDate<'py>>> {
489-
bytes_as_date(self, self.as_bytes()).map(ValidationMatch::lax)
489+
fn validate_date(&self, _strict: bool, mode: TemporalUnitMode) -> ValResult<ValidationMatch<EitherDate<'py>>> {
490+
bytes_as_date(self, self.as_bytes(), mode).map(ValidationMatch::lax)
490491
}
491492

492493
fn validate_time(
@@ -501,8 +502,9 @@ impl<'py> Input<'py> for str {
501502
&self,
502503
_strict: bool,
503504
microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior,
505+
mode: TemporalUnitMode,
504506
) -> ValResult<ValidationMatch<EitherDateTime<'py>>> {
505-
bytes_as_datetime(self, self.as_bytes(), microseconds_overflow_behavior).map(ValidationMatch::lax)
507+
bytes_as_datetime(self, self.as_bytes(), microseconds_overflow_behavior, mode).map(ValidationMatch::lax)
506508
}
507509

508510
fn validate_timedelta(

src/input/input_python.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use crate::tools::{extract_i64, safe_repr};
1919
use crate::validators::complex::string_to_complex;
2020
use crate::validators::decimal::{create_decimal, get_decimal_type};
2121
use crate::validators::Exactness;
22+
use crate::validators::TemporalUnitMode;
2223
use crate::validators::ValBytesMode;
2324
use crate::ArgsKwargs;
2425

@@ -512,7 +513,7 @@ impl<'py> Input<'py> for Bound<'py, PyAny> {
512513
}
513514
}
514515

515-
fn validate_date(&self, strict: bool) -> ValResult<ValidationMatch<EitherDate<'py>>> {
516+
fn validate_date(&self, strict: bool, mode: TemporalUnitMode) -> ValResult<ValidationMatch<EitherDate<'py>>> {
516517
if let Ok(date) = self.downcast_exact::<PyDate>() {
517518
Ok(ValidationMatch::exact(date.clone().into()))
518519
} else if self.is_instance_of::<PyDateTime>() {
@@ -533,7 +534,7 @@ impl<'py> Input<'py> for Bound<'py, PyAny> {
533534
None
534535
}
535536
} {
536-
bytes_as_date(self, bytes).map(ValidationMatch::lax)
537+
bytes_as_date(self, bytes, mode).map(ValidationMatch::lax)
537538
} else {
538539
Err(ValError::new(ErrorTypeDefaults::DateType, self))
539540
}
@@ -577,6 +578,7 @@ impl<'py> Input<'py> for Bound<'py, PyAny> {
577578
&self,
578579
strict: bool,
579580
microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior,
581+
mode: TemporalUnitMode,
580582
) -> ValResult<ValidationMatch<EitherDateTime<'py>>> {
581583
if let Ok(dt) = self.downcast_exact::<PyDateTime>() {
582584
return Ok(ValidationMatch::exact(dt.clone().into()));
@@ -588,15 +590,15 @@ impl<'py> Input<'py> for Bound<'py, PyAny> {
588590
if !strict {
589591
return if let Ok(py_str) = self.downcast::<PyString>() {
590592
let str = py_string_str(py_str)?;
591-
bytes_as_datetime(self, str.as_bytes(), microseconds_overflow_behavior)
593+
bytes_as_datetime(self, str.as_bytes(), microseconds_overflow_behavior, mode)
592594
} else if let Ok(py_bytes) = self.downcast::<PyBytes>() {
593-
bytes_as_datetime(self, py_bytes.as_bytes(), microseconds_overflow_behavior)
595+
bytes_as_datetime(self, py_bytes.as_bytes(), microseconds_overflow_behavior, mode)
594596
} else if self.is_exact_instance_of::<PyBool>() {
595597
Err(ValError::new(ErrorTypeDefaults::DatetimeType, self))
596598
} else if let Some(int) = extract_i64(self) {
597-
int_as_datetime(self, int, 0)
599+
int_as_datetime(self, int, 0, mode)
598600
} else if let Ok(float) = self.extract::<f64>() {
599-
float_as_datetime(self, float)
601+
float_as_datetime(self, float, mode)
600602
} else if let Ok(date) = self.downcast::<PyDate>() {
601603
Ok(date_as_datetime(date)?)
602604
} else {

src/input/input_string.rs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::lookup_key::{LookupKey, LookupPath};
99
use crate::tools::safe_repr;
1010
use crate::validators::complex::string_to_complex;
1111
use crate::validators::decimal::create_decimal;
12-
use crate::validators::ValBytesMode;
12+
use crate::validators::{TemporalUnitMode, ValBytesMode};
1313

1414
use super::datetime::{
1515
bytes_as_date, bytes_as_datetime, bytes_as_time, bytes_as_timedelta, EitherDate, EitherDateTime, EitherTime,
@@ -201,9 +201,9 @@ impl<'py> Input<'py> for StringMapping<'py> {
201201
Err(ValError::new(ErrorTypeDefaults::IterableType, self))
202202
}
203203

204-
fn validate_date(&self, _strict: bool) -> ValResult<ValidationMatch<EitherDate<'py>>> {
204+
fn validate_date(&self, _strict: bool, mode: TemporalUnitMode) -> ValResult<ValidationMatch<EitherDate<'py>>> {
205205
match self {
206-
Self::String(s) => bytes_as_date(self, py_string_str(s)?.as_bytes()).map(ValidationMatch::strict),
206+
Self::String(s) => bytes_as_date(self, py_string_str(s)?.as_bytes(), mode).map(ValidationMatch::strict),
207207
Self::Mapping(_) => Err(ValError::new(ErrorTypeDefaults::DateType, self)),
208208
}
209209
}
@@ -224,10 +224,13 @@ impl<'py> Input<'py> for StringMapping<'py> {
224224
&self,
225225
_strict: bool,
226226
microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior,
227+
mode: TemporalUnitMode,
227228
) -> ValResult<ValidationMatch<EitherDateTime<'py>>> {
228229
match self {
229-
Self::String(s) => bytes_as_datetime(self, py_string_str(s)?.as_bytes(), microseconds_overflow_behavior)
230-
.map(ValidationMatch::strict),
230+
Self::String(s) => {
231+
bytes_as_datetime(self, py_string_str(s)?.as_bytes(), microseconds_overflow_behavior, mode)
232+
.map(ValidationMatch::strict)
233+
}
231234
Self::Mapping(_) => Err(ValError::new(ErrorTypeDefaults::DatetimeType, self)),
232235
}
233236
}

src/validators/config.rs

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
use std::borrow::Cow;
22
use std::str::FromStr;
33

4+
use crate::build_tools::py_schema_err;
5+
use crate::errors::ErrorType;
6+
use crate::input::EitherBytes;
7+
use crate::serializers::BytesMode;
8+
use crate::tools::SchemaDict;
49
use base64::engine::general_purpose::GeneralPurpose;
510
use base64::engine::{DecodePaddingMode, GeneralPurposeConfig};
611
use base64::{alphabet, DecodeError, Engine};
712
use pyo3::types::{PyDict, PyString};
813
use pyo3::{intern, prelude::*};
9-
10-
use crate::errors::ErrorType;
11-
use crate::input::EitherBytes;
12-
use crate::serializers::BytesMode;
13-
use crate::tools::SchemaDict;
14+
use speedate::TimestampUnit;
1415

1516
const URL_SAFE_OPTIONAL_PADDING: GeneralPurpose = GeneralPurpose::new(
1617
&alphabet::URL_SAFE,
@@ -21,6 +22,55 @@ const STANDARD_OPTIONAL_PADDING: GeneralPurpose = GeneralPurpose::new(
2122
GeneralPurposeConfig::new().with_decode_padding_mode(DecodePaddingMode::Indifferent),
2223
);
2324

25+
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
26+
pub enum TemporalUnitMode {
27+
Seconds,
28+
Milliseconds,
29+
#[default]
30+
Infer,
31+
}
32+
33+
impl FromStr for TemporalUnitMode {
34+
type Err = PyErr;
35+
36+
fn from_str(s: &str) -> Result<Self, Self::Err> {
37+
match s {
38+
"seconds" => Ok(Self::Seconds),
39+
"milliseconds" => Ok(Self::Milliseconds),
40+
"infer" => Ok(Self::Infer),
41+
42+
s => py_schema_err!(
43+
"Invalid temporal_unit_mode serialization mode: `{}`, expected seconds, milliseconds or infer",
44+
s
45+
),
46+
}
47+
}
48+
}
49+
50+
impl TemporalUnitMode {
51+
pub fn from_config(config: Option<&Bound<'_, PyDict>>) -> PyResult<Self> {
52+
let Some(config_dict) = config else {
53+
return Ok(Self::default());
54+
};
55+
let raw_mode = config_dict.get_as::<Bound<'_, PyString>>(intern!(config_dict.py(), "val_temporal_unit"))?;
56+
let temporal_unit = raw_mode.map_or_else(
57+
|| Ok(TemporalUnitMode::default()),
58+
|raw| TemporalUnitMode::from_str(&raw.to_cow()?),
59+
)?;
60+
Ok(temporal_unit)
61+
}
62+
}
63+
64+
impl From<TemporalUnitMode> for TimestampUnit {
65+
fn from(value: TemporalUnitMode) -> Self {
66+
match value {
67+
TemporalUnitMode::Seconds => TimestampUnit::Second,
68+
TemporalUnitMode::Milliseconds => TimestampUnit::Millisecond,
69+
TemporalUnitMode::Infer => TimestampUnit::Infer,
70+
}
71+
}
72+
}
73+
2474
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
2575
pub struct ValBytesMode {
2676
pub ser: BytesMode,

0 commit comments

Comments
 (0)