Skip to content

Commit 7ca2a6f

Browse files
authored
FIX polars as-datetime: ignores timezone information on conversion (nushell#15490)
# Description This PR seeks to fix an error in `polars as-datetime` where timezone information is entirely ignored. This behavior raises a host of silent errors when dealing with datetime conversions (see example below). ## Current Implementation Timezones are entirely ignored and datetimes with different timezones are converted to the same naive datetimes even when the user specifically indicates that the timezone should be parsed. For example, "2021-12-30 00:00:00 +0000" and "2021-12-30 00:00:00 -0400" will both be parsed to "2021-12-30 00:00:00" even when the format string specifically includes "%z". ``` $ ["2021-12-30 00:00:00 +0000" "2021-12-30 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z" ╭───┬───────────────────────╮ │ # │ datetime │ ├───┼───────────────────────┤ │ 0 │ 12/30/2021 12:00:00AM │ │ 1 │ 12/30/2021 12:00:00AM │ <-- Same datetime even though the first is +0000 and second is -0400 ╰───┴───────────────────────╯ $ ["2021-12-30 00:00:00 +0000" "2021-12-30 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z" | polars schema ╭──────────┬──────────────╮ │ datetime │ datetime<ns> │ ╰──────────┴──────────────╯ ``` ## New Implementation Datetimes are converted to UTC and timezone information is retained. ``` $ "2021-12-30 00:00:00 +0000" "2021-12-30 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z" ╭───┬───────────────────────╮ │ # │ datetime │ ├───┼───────────────────────┤ │ 0 │ 12/30/2021 12:00:00AM │ │ 1 │ 12/30/2021 04:00:00AM │ <-- Converted to UTC ╰───┴───────────────────────╯ $ ["2021-12-30 00:00:00 +0000" "2021-12-30 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z" | polars schema ╭──────────┬───────────────────╮ │ datetime │ datetime<ns, UTC> │ ╰──────────┴───────────────────╯ ``` The user may intentionally ignore timezone information by setting the `--naive` flag. ``` $ ["2021-12-30 00:00:00 +0000" "2021-12-30 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z" --naive ╭───┬───────────────────────╮ │ # │ datetime │ ├───┼───────────────────────┤ │ 0 │ 12/30/2021 12:00:00AM │ │ 1 │ 12/30/2021 12:00:00AM │ <-- the -0400 offset is ignored when --naive is set ╰───┴───────────────────────╯ $ ["2021-12-30 00:00:00 +0000" "2021-12-30 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z" --naive | polars schema ╭──────────┬──────────────╮ │ datetime │ datetime<ns> │ ╰──────────┴──────────────╯ ``` # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> `polars as-datetime` will now account for timezone information and return type `datetime<ns,UTC>` rather than `datetime<ns>` by default. The user can replicate the previous behavior by setting `--naive`. # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> Tests that incorporated `polars as-datetime` had to be tweaked to include `--naive` flag to replicate previous behavior. # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
1 parent 237a685 commit 7ca2a6f

File tree

4 files changed

+85
-52
lines changed

4 files changed

+85
-52
lines changed

crates/nu_plugin_polars/src/dataframe/command/core/to_repr.rs

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,33 +39,33 @@ impl PluginCommand for ToRepr {
3939
result: Some(Value::string(
4040
r#"
4141
shape: (2, 2)
42-
┌─────────────────────┬─────┐
43-
│ a ┆ b │
44-
│ --- ┆ --- │
45-
│ datetime[ns] ┆ i64 │
46-
╞═════════════════════╪═════╡
47-
│ 2025-01-01 00:00:00 ┆ 2 │
48-
│ 2025-01-02 00:00:00 ┆ 4 │
49-
└─────────────────────┴─────┘"#
42+
┌─────────────────────────┬─────┐
43+
│ a ┆ b │
44+
│ --- ┆ --- │
45+
│ datetime[ns, UTC] ┆ i64 │
46+
╞═════════════════════════╪═════╡
47+
│ 2025-01-01 00:00:00 UTC ┆ 2 │
48+
│ 2025-01-02 00:00:00 UTC ┆ 4 │
49+
└─────────────────────────┴─────┘"#
5050
.trim(),
5151
Span::test_data(),
5252
)),
5353
},
5454
Example {
5555
description: "Shows lazy dataframe in repr format",
5656
example:
57-
"[[a b]; [2025-01-01 2] [2025-01-02 4]] | polars into-df | polars into-lazy | polars into-repr",
57+
"[[a b]; [2025-01-01 2] [2025-01-02 4]] | polars into-lazy | polars into-repr",
5858
result: Some(Value::string(
5959
r#"
6060
shape: (2, 2)
61-
┌─────────────────────┬─────┐
62-
│ a ┆ b │
63-
│ --- ┆ --- │
64-
│ datetime[ns] ┆ i64 │
65-
╞═════════════════════╪═════╡
66-
│ 2025-01-01 00:00:00 ┆ 2 │
67-
│ 2025-01-02 00:00:00 ┆ 4 │
68-
└─────────────────────┴─────┘"#
61+
┌─────────────────────────┬─────┐
62+
│ a ┆ b │
63+
│ --- ┆ --- │
64+
│ datetime[ns, UTC] ┆ i64 │
65+
╞═════════════════════════╪═════╡
66+
│ 2025-01-01 00:00:00 UTC ┆ 2 │
67+
│ 2025-01-02 00:00:00 UTC ┆ 4 │
68+
└─────────────────────────┴─────┘"#
6969
.trim(),
7070
Span::test_data(),
7171
)),

crates/nu_plugin_polars/src/dataframe/command/datetime/as_datetime.rs

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
use crate::{values::CustomValueSupport, PolarsPlugin};
2+
use std::sync::Arc;
23

3-
use super::super::super::values::{Column, NuDataFrame};
4+
use super::super::super::values::{Column, NuDataFrame, NuSchema};
45

56
use chrono::DateTime;
67
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
78
use nu_protocol::{
89
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
910
Value,
1011
};
11-
use polars::prelude::{IntoSeries, StringMethods, TimeUnit};
12+
use polars::prelude::{DataType, Field, IntoSeries, Schema, StringMethods, TimeUnit};
1213

1314
#[derive(Clone)]
1415
pub struct AsDateTime;
@@ -43,6 +44,7 @@ impl PluginCommand for AsDateTime {
4344
Signature::build(self.name())
4445
.required("format", SyntaxShape::String, "formatting date time string")
4546
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
47+
.switch("naive", "the input datetimes should be parsed as naive (i.e., not timezone-aware)", None)
4648
.input_output_type(
4749
Type::Custom("dataframe".into()),
4850
Type::Custom("dataframe".into()),
@@ -54,23 +56,23 @@ impl PluginCommand for AsDateTime {
5456
vec![
5557
Example {
5658
description: "Converts string to datetime",
57-
example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S""#,
59+
example: r#"["2021-12-30 00:00:00 -0400" "2021-12-31 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z""#,
5860
result: Some(
5961
NuDataFrame::try_from_columns(
6062
vec![Column::new(
6163
"datetime".to_string(),
6264
vec![
6365
Value::date(
6466
DateTime::parse_from_str(
65-
"2021-12-30 00:00:00 +0000",
67+
"2021-12-30 00:00:00 -0400",
6668
"%Y-%m-%d %H:%M:%S %z",
6769
)
6870
.expect("date calculation should not fail in test"),
6971
Span::test_data(),
7072
),
7173
Value::date(
7274
DateTime::parse_from_str(
73-
"2021-12-31 00:00:00 +0000",
75+
"2021-12-31 00:00:00 -0400",
7476
"%Y-%m-%d %H:%M:%S %z",
7577
)
7678
.expect("date calculation should not fail in test"),
@@ -86,7 +88,7 @@ impl PluginCommand for AsDateTime {
8688
},
8789
Example {
8890
description: "Converts string to datetime with high resolutions",
89-
example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S.%9f""#,
91+
example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S.%9f" --naive"#,
9092
result: Some(
9193
NuDataFrame::try_from_columns(
9294
vec![Column::new(
@@ -110,15 +112,23 @@ impl PluginCommand for AsDateTime {
110112
),
111113
],
112114
)],
113-
None,
115+
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
116+
Field::new(
117+
"datetime".into(),
118+
DataType::Datetime(
119+
TimeUnit::Nanoseconds,
120+
None
121+
),
122+
),
123+
])))),
114124
)
115125
.expect("simple df for test should not fail")
116126
.into_value(Span::test_data()),
117127
),
118128
},
119129
Example {
120130
description: "Converts string to datetime using the `--not-exact` flag even with excessive symbols",
121-
example: r#"["2021-12-30 00:00:00 GMT+4"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S" --not-exact"#,
131+
example: r#"["2021-12-30 00:00:00 GMT+4"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S" --not-exact --naive"#,
122132
result: Some(
123133
NuDataFrame::try_from_columns(
124134
vec![Column::new(
@@ -134,7 +144,15 @@ impl PluginCommand for AsDateTime {
134144
),
135145
],
136146
)],
137-
None,
147+
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
148+
Field::new(
149+
"datetime".into(),
150+
DataType::Datetime(
151+
TimeUnit::Nanoseconds,
152+
None
153+
),
154+
),
155+
])))),
138156
)
139157
.expect("simple df for test should not fail")
140158
.into_value(Span::test_data()),
@@ -162,6 +180,7 @@ fn command(
162180
) -> Result<PipelineData, ShellError> {
163181
let format: String = call.req(0)?;
164182
let not_exact = call.has_flag("not-exact")?;
183+
let tz_aware = !call.has_flag("naive")?;
165184

166185
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
167186
let series = df.as_series(call.head)?;
@@ -177,7 +196,7 @@ fn command(
177196
casted.as_datetime_not_exact(
178197
Some(format.as_str()),
179198
TimeUnit::Nanoseconds,
180-
false,
199+
tz_aware,
181200
None,
182201
&Default::default(),
183202
)
@@ -186,7 +205,7 @@ fn command(
186205
Some(format.as_str()),
187206
TimeUnit::Nanoseconds,
188207
false,
189-
false,
208+
tz_aware,
190209
None,
191210
&Default::default(),
192211
)

crates/nu_plugin_polars/src/dataframe/command/datetime/datepart.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
use crate::values::NuExpression;
2+
use std::sync::Arc;
23

34
use crate::{
4-
dataframe::values::{Column, NuDataFrame},
5+
dataframe::values::{Column, NuDataFrame, NuSchema},
56
values::CustomValueSupport,
67
PolarsPlugin,
78
};
@@ -13,7 +14,7 @@ use nu_protocol::{
1314
};
1415
use polars::{
1516
datatypes::{DataType, TimeUnit},
16-
prelude::NamedFrom,
17+
prelude::{Field, NamedFrom, Schema},
1718
series::Series,
1819
};
1920

@@ -54,22 +55,28 @@ impl PluginCommand for ExprDatePart {
5455
vec![
5556
Example {
5657
description: "Creates an expression to capture the year date part",
57-
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | polars with-column [(polars col datetime | polars datepart year | polars as datetime_year )]"#,
58+
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" --naive | polars with-column [(polars col datetime | polars datepart year | polars as datetime_year )]"#,
5859
result: Some(
5960
NuDataFrame::try_from_columns(
6061
vec![
6162
Column::new("datetime".to_string(), vec![Value::test_date(dt)]),
6263
Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]),
6364
],
64-
None,
65+
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
66+
Field::new(
67+
"datetime".into(),
68+
DataType::Datetime(TimeUnit::Nanoseconds, None),
69+
),
70+
Field::new("datetime_year".into(), DataType::Int64),
71+
])))),
6572
)
6673
.expect("simple df for test should not fail")
6774
.into_value(Span::test_data()),
6875
),
6976
},
7077
Example {
7178
description: "Creates an expression to capture multiple date parts",
72-
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" |
79+
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" --naive |
7380
polars with-column [ (polars col datetime | polars datepart year | polars as datetime_year ),
7481
(polars col datetime | polars datepart month | polars as datetime_month ),
7582
(polars col datetime | polars datepart day | polars as datetime_day ),

crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,10 @@ fn value_to_data_type(value: &Value) -> Option<DataType> {
245245
Value::Float { .. } => Some(DataType::Float64),
246246
Value::String { .. } => Some(DataType::String),
247247
Value::Bool { .. } => Some(DataType::Boolean),
248-
Value::Date { .. } => Some(DataType::Date),
248+
Value::Date { .. } => Some(DataType::Datetime(
249+
TimeUnit::Nanoseconds,
250+
Some(PlSmallStr::from_static("UTC")),
251+
)),
249252
Value::Duration { .. } => Some(DataType::Duration(TimeUnit::Nanoseconds)),
250253
Value::Filesize { .. } => Some(DataType::Int64),
251254
Value::Binary { .. } => Some(DataType::Binary),
@@ -447,24 +450,28 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
447450
.values
448451
.iter()
449452
.map(|v| {
450-
if let Value::Date { val, .. } = &v {
451-
// If there is a timezone specified, make sure
452-
// the value is converted to it
453-
Ok(maybe_tz
454-
.as_ref()
455-
.map(|tz| tz.parse::<Tz>().map(|tz| val.with_timezone(&tz)))
456-
.transpose()
457-
.map_err(|e| ShellError::GenericError {
458-
error: "Error parsing timezone".into(),
459-
msg: "".into(),
460-
span: None,
461-
help: Some(e.to_string()),
462-
inner: vec![],
463-
})?
464-
.and_then(|dt| dt.timestamp_nanos_opt())
465-
.map(|nanos| nanos_from_timeunit(nanos, *tu)))
466-
} else {
467-
Ok(None)
453+
match (maybe_tz, &v) {
454+
(Some(tz), Value::Date { val, .. }) => {
455+
// If there is a timezone specified, make sure
456+
// the value is converted to it
457+
Ok(tz
458+
.parse::<Tz>()
459+
.map(|tz| val.with_timezone(&tz))
460+
.map_err(|e| ShellError::GenericError {
461+
error: "Error parsing timezone".into(),
462+
msg: "".into(),
463+
span: None,
464+
help: Some(e.to_string()),
465+
inner: vec![],
466+
})?
467+
.timestamp_nanos_opt()
468+
.map(|nanos| nanos_from_timeunit(nanos, *tu)))
469+
}
470+
(None, Value::Date { val, .. }) => Ok(val
471+
.timestamp_nanos_opt()
472+
.map(|nanos| nanos_from_timeunit(nanos, *tu))),
473+
474+
_ => Ok(None),
468475
}
469476
})
470477
.collect::<Result<Vec<Option<i64>>, ShellError>>()?;

0 commit comments

Comments
 (0)