Skip to content

Commit 8ddb5e1

Browse files
authored
feat: Add maintain order argument on implode (#26782)
1 parent 23b060d commit 8ddb5e1

File tree

44 files changed

+678
-261
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+678
-261
lines changed

crates/polars-arrow/src/array/boolean/mutable.rs

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -211,18 +211,20 @@ impl MutableBooleanArray {
211211
validity.extend_constant(additional, true);
212212
}
213213
},
214-
None => {
215-
self.values.extend_constant(additional, false);
216-
if let Some(validity) = self.validity.as_mut() {
217-
validity.extend_constant(additional, false)
218-
} else {
219-
self.init_validity();
220-
self.validity
221-
.as_mut()
222-
.unwrap()
223-
.extend_constant(additional, false)
224-
};
225-
},
214+
None => self.extend_null(additional),
215+
};
216+
}
217+
218+
pub fn extend_null(&mut self, additional: usize) {
219+
self.values.extend_constant(additional, false);
220+
if let Some(validity) = self.validity.as_mut() {
221+
validity.extend_constant(additional, false)
222+
} else {
223+
self.init_validity();
224+
self.validity
225+
.as_mut()
226+
.unwrap()
227+
.extend_constant(additional, false)
226228
};
227229
}
228230

crates/polars-arrow/src/array/primitive/mutable.rs

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -171,17 +171,21 @@ impl<T: NativeType> MutablePrimitiveArray<T> {
171171
validity.extend_constant(additional, true)
172172
}
173173
} else {
174-
if let Some(validity) = &mut self.validity {
175-
validity.extend_constant(additional, false)
176-
} else {
177-
let mut validity = MutableBitmap::with_capacity(self.values.capacity());
178-
validity.extend_constant(self.len(), true);
179-
validity.extend_constant(additional, false);
180-
self.validity = Some(validity)
181-
}
182-
self.values
183-
.resize(self.values.len() + additional, T::default());
174+
self.extend_null(additional);
175+
}
176+
}
177+
178+
pub fn extend_null(&mut self, additional: usize) {
179+
if let Some(validity) = &mut self.validity {
180+
validity.extend_constant(additional, false)
181+
} else {
182+
let mut validity = MutableBitmap::with_capacity(self.values.capacity());
183+
validity.extend_constant(self.len(), true);
184+
validity.extend_constant(additional, false);
185+
self.validity = Some(validity)
184186
}
187+
self.values
188+
.resize(self.values.len() + additional, T::default());
185189
}
186190

187191
/// Extends the [`MutablePrimitiveArray`] from an iterator of trusted len.

crates/polars-core/src/chunked_array/builder/list/anonymous.rs

Lines changed: 6 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1,110 +1,5 @@
11
use super::*;
22

3-
pub struct AnonymousListBuilder<'a> {
4-
name: PlSmallStr,
5-
builder: AnonymousBuilder<'a>,
6-
fast_explode: bool,
7-
inner_dtype: Option<DataType>,
8-
}
9-
10-
impl Default for AnonymousListBuilder<'_> {
11-
fn default() -> Self {
12-
Self::new(PlSmallStr::EMPTY, 0, None)
13-
}
14-
}
15-
16-
impl<'a> AnonymousListBuilder<'a> {
17-
pub fn new(name: PlSmallStr, capacity: usize, inner_dtype: Option<DataType>) -> Self {
18-
Self {
19-
name,
20-
builder: AnonymousBuilder::new(capacity),
21-
fast_explode: true,
22-
inner_dtype,
23-
}
24-
}
25-
26-
pub fn append_opt_series(&mut self, opt_s: Option<&'a Series>) -> PolarsResult<()> {
27-
match opt_s {
28-
Some(s) => return self.append_series(s),
29-
None => {
30-
self.append_null();
31-
},
32-
}
33-
Ok(())
34-
}
35-
36-
pub fn append_opt_array(&mut self, opt_s: Option<&'a dyn Array>) {
37-
match opt_s {
38-
Some(s) => self.append_array(s),
39-
None => {
40-
self.append_null();
41-
},
42-
}
43-
}
44-
45-
pub fn append_array(&mut self, arr: &'a dyn Array) {
46-
self.builder.push(arr)
47-
}
48-
49-
#[inline]
50-
pub fn append_null(&mut self) {
51-
self.fast_explode = false;
52-
self.builder.push_null();
53-
}
54-
55-
#[inline]
56-
pub fn append_empty(&mut self) {
57-
self.fast_explode = false;
58-
self.builder.push_empty()
59-
}
60-
61-
pub fn append_series(&mut self, s: &'a Series) -> PolarsResult<()> {
62-
match (s.dtype(), &self.inner_dtype) {
63-
(DataType::Null, _) => {},
64-
(dt, None) => self.inner_dtype = Some(dt.clone()),
65-
(dt, Some(set_dt)) => {
66-
polars_bail!(ComputeError: "dtypes don't match, got {}, expected: {}", dt.pretty_format(), set_dt.pretty_format());
67-
},
68-
}
69-
if s.is_empty() {
70-
self.append_empty();
71-
} else {
72-
self.builder.push_multiple(s.chunks());
73-
}
74-
Ok(())
75-
}
76-
77-
pub fn finish(&mut self) -> ListChunked {
78-
// Don't use self from here on out.
79-
let slf = std::mem::take(self);
80-
if slf.builder.is_empty() {
81-
ListChunked::full_null_with_dtype(
82-
slf.name.clone(),
83-
0,
84-
&slf.inner_dtype.unwrap_or(DataType::Null),
85-
)
86-
} else {
87-
let inner_dtype_physical = self
88-
.inner_dtype
89-
.as_ref()
90-
.map(|dt| dt.to_physical().to_arrow(CompatLevel::newest()));
91-
let arr = slf.builder.finish(inner_dtype_physical.as_ref()).unwrap();
92-
93-
let list_dtype_logical = match &self.inner_dtype {
94-
None => DataType::from_arrow_dtype(arr.dtype()),
95-
Some(dt) => DataType::List(Box::new(dt.clone())),
96-
};
97-
98-
let mut ca = ListChunked::with_chunk(PlSmallStr::EMPTY, arr);
99-
if slf.fast_explode {
100-
ca.set_fast_explode();
101-
}
102-
ca.field = Arc::new(Field::new(slf.name, list_dtype_logical));
103-
ca
104-
}
105-
}
106-
}
107-
1083
pub struct AnonymousOwnedListBuilder {
1094
name: PlSmallStr,
1105
builder: AnonymousBuilder<'static>,
@@ -121,6 +16,10 @@ impl Default for AnonymousOwnedListBuilder {
12116

12217
impl ListBuilderTrait for AnonymousOwnedListBuilder {
12318
fn append_series(&mut self, s: &Series) -> PolarsResult<()> {
19+
self.append_owned_series(s.clone())
20+
}
21+
22+
fn append_owned_series(&mut self, s: Series) -> PolarsResult<()> {
12423
match (s.dtype(), &self.inner_dtype) {
12524
(DataType::Null, _) => {},
12625
(dt, None) => self.inner_dtype = Some(dt.clone()),
@@ -135,8 +34,8 @@ impl ListBuilderTrait for AnonymousOwnedListBuilder {
13534
self.builder
13635
.push_multiple(&*(s.chunks().as_ref() as *const [ArrayRef]));
13736
}
138-
// This make sure that the underlying ArrayRef's are not dropped.
139-
self.owned.push(s.clone());
37+
// This ensures that the underlying ArrayRef's are not dropped.
38+
self.owned.push(s);
14039
}
14140
Ok(())
14241
}

crates/polars-core/src/chunked_array/builder/list/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ pub trait ListBuilderTrait {
2525
Ok(())
2626
}
2727
fn append_series(&mut self, s: &Series) -> PolarsResult<()>;
28+
fn append_owned_series(&mut self, s: Series) -> PolarsResult<()> {
29+
self.append_series(&s)
30+
}
2831
fn append_null(&mut self);
2932

3033
fn field(&self) -> &Field {

crates/polars-core/src/frame/group_by/mod.rs

Lines changed: 3 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -742,44 +742,6 @@ impl<'a> GroupBy<'a> {
742742
DataFrame::new_infer_height(cols)
743743
}
744744

745-
/// Aggregate the groups of the group_by operation into lists.
746-
///
747-
/// # Example
748-
///
749-
/// ```rust
750-
/// # use polars_core::prelude::*;
751-
/// fn example(df: DataFrame) -> PolarsResult<DataFrame> {
752-
/// // GroupBy and aggregate to Lists
753-
/// df.group_by(["date"])?.select(["temp"]).agg_list()
754-
/// }
755-
/// ```
756-
/// Returns:
757-
///
758-
/// ```text
759-
/// +------------+------------------------+
760-
/// | date | temp_agg_list |
761-
/// | --- | --- |
762-
/// | Date | list [i32] |
763-
/// +============+========================+
764-
/// | 2020-08-23 | "[Some(9)]" |
765-
/// +------------+------------------------+
766-
/// | 2020-08-22 | "[Some(7), Some(1)]" |
767-
/// +------------+------------------------+
768-
/// | 2020-08-21 | "[Some(20), Some(10)]" |
769-
/// +------------+------------------------+
770-
/// ```
771-
#[deprecated(since = "0.24.1", note = "use polars.lazy aggregations")]
772-
pub fn agg_list(&self) -> PolarsResult<DataFrame> {
773-
let (mut cols, agg_cols) = self.prepare_agg()?;
774-
for agg_col in agg_cols {
775-
let new_name = fmt_group_by_column(agg_col.name().as_str(), GroupByMethod::Implode);
776-
let mut agg = unsafe { agg_col.agg_list(&self.groups) };
777-
agg.rename(new_name);
778-
cols.push(agg);
779-
}
780-
DataFrame::new_infer_height(cols)
781-
}
782-
783745
fn prepare_apply(&self) -> PolarsResult<DataFrame> {
784746
if let Some(agg) = &self.selected_agg {
785747
if agg.is_empty() {
@@ -912,7 +874,7 @@ pub enum GroupByMethod {
912874
NUnique,
913875
Quantile(f64, QuantileMethod),
914876
Count { include_nulls: bool },
915-
Implode,
877+
Implode { maintain_order: bool },
916878
Std(u8),
917879
Var(u8),
918880
ArgMin,
@@ -939,7 +901,7 @@ impl Display for GroupByMethod {
939901
NUnique => "n_unique",
940902
Quantile(_, _) => "quantile",
941903
Count { .. } => "count",
942-
Implode => "list",
904+
Implode { .. } => "implode",
943905
Std(_) => "std",
944906
Var(_) => "var",
945907
ArgMin => "arg_min",
@@ -968,7 +930,7 @@ pub fn fmt_group_by_column(name: &str, method: GroupByMethod) -> PlSmallStr {
968930
Groups => PlSmallStr::from_static("groups"),
969931
NUnique => format_pl_smallstr!("{name}_n_unique"),
970932
Count { .. } => format_pl_smallstr!("{name}_count"),
971-
Implode => format_pl_smallstr!("{name}_agg_list"),
933+
Implode { .. } => format_pl_smallstr!("{name}_agg_list"),
972934
Quantile(quantile, _interpol) => format_pl_smallstr!("{name}_quantile_{quantile:.2}"),
973935
Std(_) => format_pl_smallstr!("{name}_agg_std"),
974936
Var(_) => format_pl_smallstr!("{name}_agg_var"),

crates/polars-core/src/series/proptest.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@ use arrow::bitmap::bitmask::nth_set_bit_u32;
77
use polars_dtype::categorical::{Categories, FrozenCategories};
88
use proptest::prelude::*;
99

10-
use crate::chunked_array::builder::AnonymousListBuilder;
10+
use crate::chunked_array::builder::AnonymousOwnedListBuilder;
1111
#[cfg(feature = "dtype-categorical")]
1212
use crate::chunked_array::builder::CategoricalChunkedBuilder;
13-
use crate::prelude::{Int32Chunked, Int64Chunked, Int128Chunked, NamedFrom, Series, TimeUnit};
13+
use crate::prelude::{
14+
Int32Chunked, Int64Chunked, Int128Chunked, ListBuilderTrait, NamedFrom, Series, TimeUnit,
15+
};
1416
#[cfg(feature = "dtype-struct")]
1517
use crate::series::StructChunked;
1618
use crate::series::from::IntoSeries;
@@ -393,7 +395,7 @@ fn series_list_strategy(
393395
) -> impl Strategy<Value = Series> {
394396
inner.prop_flat_map(move |sample_series| {
395397
series_length_range.clone().prop_map(move |num_lists| {
396-
let mut builder = AnonymousListBuilder::new(
398+
let mut builder = AnonymousOwnedListBuilder::new(
397399
next_column_name().into(),
398400
num_lists,
399401
Some(sample_series.dtype().clone()),
@@ -417,7 +419,7 @@ fn series_array_strategy(
417419
series_length_range.clone().prop_map(move |num_arrays| {
418420
let width = sample_series.len();
419421

420-
let mut builder = AnonymousListBuilder::new(
422+
let mut builder = AnonymousOwnedListBuilder::new(
421423
next_column_name().into(),
422424
num_arrays,
423425
Some(sample_series.dtype().clone()),

crates/polars-expr/src/expressions/aggregation.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ impl PhysicalExpr for AggregationExpr {
155155

156156
Ok(IdxCa::from_slice(s.name().clone(), &[count as IdxSize]).into_column())
157157
},
158-
GroupByMethod::Implode => s.implode().map(|ca| ca.into_column()),
158+
GroupByMethod::Implode { maintain_order: _ } => s.implode().map(|ca| ca.into_column()),
159159
GroupByMethod::Std(ddof) => s
160160
.std_reduce(ddof)
161161
.map(|sc| sc.into_column(s.name().clone())),
@@ -414,11 +414,13 @@ impl PhysicalExpr for AggregationExpr {
414414
let agg_s = s.agg_n_unique(&groups);
415415
AggregatedScalar(agg_s.with_name(keep_name))
416416
},
417-
GroupByMethod::Implode => AggregatedScalar(match ac.agg_state() {
418-
AggState::LiteralScalar(_) => unreachable!(), // handled above
419-
AggState::AggregatedScalar(c) => c.as_list().into_column(),
420-
AggState::NotAggregated(_) | AggState::AggregatedList(_) => ac.aggregated(),
421-
}),
417+
GroupByMethod::Implode { maintain_order: _ } => {
418+
AggregatedScalar(match ac.agg_state() {
419+
AggState::LiteralScalar(_) => unreachable!(), // handled above
420+
AggState::AggregatedScalar(c) => c.as_list().into_column(),
421+
AggState::NotAggregated(_) | AggState::AggregatedList(_) => ac.aggregated(),
422+
})
423+
},
422424
GroupByMethod::Groups => {
423425
let mut column: ListChunked = ac.groups().as_list_chunked();
424426
column.rename(keep_name);

crates/polars-expr/src/expressions/window.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ impl WindowExpr {
237237
let mut finishes_list = false;
238238
for e in &**function {
239239
match e {
240-
Expr::Agg(AggExpr::Implode(_)) => {
240+
Expr::Agg(AggExpr::Implode { .. }) => {
241241
finishes_list = true;
242242
},
243243
Expr::Alias(_, _) => {},

crates/polars-expr/src/reduce/convert.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use crate::reduce::bitwise::{
1313
use crate::reduce::count::{CountReduce, NullCountReduce};
1414
use crate::reduce::first_last::{new_first_reduction, new_item_reduction, new_last_reduction};
1515
use crate::reduce::first_last_nonnull::{new_first_nonnull_reduction, new_last_nonnull_reduction};
16+
use crate::reduce::implode::new_unordered_implode_reduction;
1617
use crate::reduce::len::LenReduce;
1718
use crate::reduce::mean::new_mean_reduction;
1819
use crate::reduce::min_max::{new_max_reduction, new_min_reduction};
@@ -68,10 +69,14 @@ pub fn into_reduction(
6869
let count = Box::new(CountReduce::new(*include_nulls)) as Box<_>;
6970
(count, *input)
7071
},
72+
IRAggExpr::Implode {
73+
input,
74+
maintain_order: false,
75+
} => (new_unordered_implode_reduction(get_dt(*input)?), *input),
7176
IRAggExpr::Quantile { .. } => todo!(),
7277
IRAggExpr::Median(_) => todo!(),
7378
IRAggExpr::NUnique(_) => todo!(),
74-
IRAggExpr::Implode(_) => todo!(),
79+
IRAggExpr::Implode { .. } => todo!(),
7580
IRAggExpr::AggGroups(_) => todo!(),
7681
},
7782
AExpr::Len => {

0 commit comments

Comments
 (0)