Skip to content

Commit e58f86c

Browse files
authored
refactor character_length impl by unifying null handling logic (#16877)
1 parent 2266547 commit e58f86c

File tree

1 file changed

+17
-36
lines changed

1 file changed

+17
-36
lines changed

datafusion/functions/src/unicode/character_length.rs

Lines changed: 17 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -136,56 +136,37 @@ where
136136
// string is ASCII only is relatively cheap.
137137
// If strings are ASCII only, count bytes instead.
138138
let is_array_ascii_only = array.is_ascii();
139-
let array = if array.null_count() == 0 {
139+
let nulls = array.nulls().cloned();
140+
let array = {
140141
if is_array_ascii_only {
141142
let values: Vec<_> = (0..array.len())
142143
.map(|i| {
143-
let value = array.value(i);
144+
// Safety: we are iterating with array.len() so the index is always valid
145+
let value = unsafe { array.value_unchecked(i) };
144146
T::Native::usize_as(value.len())
145147
})
146148
.collect();
147-
PrimitiveArray::<T>::new(values.into(), None)
149+
PrimitiveArray::<T>::new(values.into(), nulls)
148150
} else {
149151
let values: Vec<_> = (0..array.len())
150152
.map(|i| {
151-
let value = array.value(i);
152-
if value.is_ascii() {
153-
T::Native::usize_as(value.len())
153+
// Safety: we are iterating with array.len() so the index is always valid
154+
if array.is_null(i) {
155+
T::default_value()
154156
} else {
155-
T::Native::usize_as(value.chars().count())
157+
let value = unsafe { array.value_unchecked(i) };
158+
if value.is_empty() {
159+
T::default_value()
160+
} else if value.is_ascii() {
161+
T::Native::usize_as(value.len())
162+
} else {
163+
T::Native::usize_as(value.chars().count())
164+
}
156165
}
157166
})
158167
.collect();
159-
PrimitiveArray::<T>::new(values.into(), None)
168+
PrimitiveArray::<T>::new(values.into(), nulls)
160169
}
161-
} else if is_array_ascii_only {
162-
let values: Vec<_> = (0..array.len())
163-
.map(|i| {
164-
if array.is_null(i) {
165-
T::default_value()
166-
} else {
167-
let value = array.value(i);
168-
T::Native::usize_as(value.len())
169-
}
170-
})
171-
.collect();
172-
PrimitiveArray::<T>::new(values.into(), array.nulls().cloned())
173-
} else {
174-
let values: Vec<_> = (0..array.len())
175-
.map(|i| {
176-
if array.is_null(i) {
177-
T::default_value()
178-
} else {
179-
let value = array.value(i);
180-
if value.is_ascii() {
181-
T::Native::usize_as(value.len())
182-
} else {
183-
T::Native::usize_as(value.chars().count())
184-
}
185-
}
186-
})
187-
.collect();
188-
PrimitiveArray::<T>::new(values.into(), array.nulls().cloned())
189170
};
190171

191172
Ok(Arc::new(array))

0 commit comments

Comments
 (0)