Skip to content

Commit 457f162

Browse files
authored
feat(polars): expand polars unique to allow expressions inputs (nushell#15771)
<!-- if this PR closes one or more issues, you can automatically link the PR with them by using one of the [*linking keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword), e.g. - this PR should close #xxxx - fixes #xxxx you can also mention related issues, PRs or discussions! --> # Description <!-- Thank you for improving Nushell. Please, check our [contributing guide](../CONTRIBUTING.md) and talk to the core team before making major changes. Description of your pull request goes here. **Provide examples and/or screenshots** if your changes affect the user experience. --> `polars unique` currently only operates on entire dataframes. This PR seeks to expand this command to handle expressions as well. See examples: ```nushell Returns unique values in a subset of lazyframe columns > [[a]; [2] [1] [2]] | polars into-lazy | polars select (polars col a | polars unique) | polars collect ╭───┬───╮ │ # │ a │ ├───┼───┤ │ 0 │ 1 │ │ 1 │ 2 │ ╰───┴───╯ Returns unique values in a subset of lazyframe columns > [[a]; [2] [1] [2]] | polars into-lazy | polars select (polars col a | polars unique --maintain-order) | polars collect ╭───┬───╮ │ # │ a │ ├───┼───┤ │ 0 │ 2 │ │ 1 │ 1 │ ╰───┴───╯ ``` # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> No breaking changes. Users have the added option to use `polars unique` in an expressions context. # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> Example tests have been added to `polars unique` # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
1 parent 58a8f30 commit 457f162

File tree

1 file changed

+57
-8
lines changed
  • crates/nu_plugin_polars/src/dataframe/command/data

1 file changed

+57
-8
lines changed

crates/nu_plugin_polars/src/dataframe/command/data/unique.rs

Lines changed: 57 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ use crate::{
44
utils::{extract_sm_strs, extract_strings},
55
values::NuLazyFrame,
66
},
7-
values::{CustomValueSupport, PolarsPluginObject, PolarsPluginType, cant_convert_err},
7+
values::{
8+
CustomValueSupport, NuExpression, PolarsPluginObject, PolarsPluginType, cant_convert_err,
9+
},
810
};
911

1012
use crate::values::{Column, NuDataFrame};
@@ -48,10 +50,16 @@ impl PluginCommand for Unique {
4850
"Keep the same order as the original DataFrame (lazy df)",
4951
Some('k'),
5052
)
51-
.input_output_type(
52-
Type::Custom("dataframe".into()),
53-
Type::Custom("dataframe".into()),
54-
)
53+
.input_output_types(vec![
54+
(
55+
Type::Custom("dataframe".into()),
56+
Type::Custom("dataframe".into()),
57+
),
58+
(
59+
Type::Custom("expression".into()),
60+
Type::Custom("expression".into()),
61+
),
62+
])
5563
.category(Category::Custom("dataframe or lazyframe".into()))
5664
}
5765

@@ -123,9 +131,40 @@ impl PluginCommand for Unique {
123131
),
124132
},
125133
Example {
126-
description: "Creates a is unique expression from a column",
127-
example: "col a | unique",
128-
result: None,
134+
description: "Returns unique values in a subset of lazyframe columns",
135+
example: r#"[[a]; [2] [1] [2]]
136+
| polars into-lazy
137+
| polars select (polars col a | polars unique)
138+
| polars collect"#,
139+
result: Some(
140+
NuDataFrame::try_from_columns(
141+
vec![Column::new(
142+
"a".to_string(),
143+
vec![Value::test_int(1), Value::test_int(2)],
144+
)],
145+
None,
146+
)
147+
.expect("simple df for test should not fail")
148+
.into_value(Span::test_data()),
149+
),
150+
},
151+
Example {
152+
description: "Returns unique values in a subset of lazyframe columns",
153+
example: r#"[[a]; [2] [1] [2]]
154+
| polars into-lazy
155+
| polars select (polars col a | polars unique --maintain-order)
156+
| polars collect"#,
157+
result: Some(
158+
NuDataFrame::try_from_columns(
159+
vec![Column::new(
160+
"a".to_string(),
161+
vec![Value::test_int(2), Value::test_int(1)],
162+
)],
163+
None,
164+
)
165+
.expect("simple df for test should not fail")
166+
.into_value(Span::test_data()),
167+
),
129168
},
130169
]
131170
}
@@ -142,11 +181,21 @@ impl PluginCommand for Unique {
142181
match PolarsPluginObject::try_from_value(plugin, &value)? {
143182
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
144183
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
184+
PolarsPluginObject::NuExpression(expr) => {
185+
let maintain = call.has_flag("maintain-order")?;
186+
let res: NuExpression = if maintain {
187+
expr.into_polars().unique_stable().into()
188+
} else {
189+
expr.into_polars().unique().into()
190+
};
191+
res.to_pipeline_data(plugin, engine, call.head)
192+
}
145193
_ => Err(cant_convert_err(
146194
&value,
147195
&[
148196
PolarsPluginType::NuDataFrame,
149197
PolarsPluginType::NuLazyGroupBy,
198+
PolarsPluginType::NuExpression,
150199
],
151200
)),
152201
}

0 commit comments

Comments
 (0)