Skip to content

Commit 79d40c4

Browse files
authored
Minor: improve documentation to StringView trim (#12629)
* Minor: improve documentation to StringView trim * clarify what a valid view is
1 parent 9b4f90a commit 79d40c4

File tree

1 file changed

+60
-14
lines changed

1 file changed

+60
-14
lines changed

datafusion/functions/src/string/common.rs

Lines changed: 60 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,19 +35,27 @@ use datafusion_expr::ColumnarValue;
3535

3636
/// Append a new view to the views buffer with the given substr
3737
///
38-
/// raw must be a valid view
39-
/// substr must be a valid substring of raw
40-
/// start must be less than or equal to the length of the string data
38+
/// # Safety
39+
///
40+
/// original_view must be a valid view (the format described on
41+
/// [`GenericByteViewArray`](arrow::array::GenericByteViewArray).
42+
///
43+
/// # Arguments
44+
/// - views_buffer: The buffer to append the new view to
45+
/// - null_builder: The buffer to append the null value to
46+
/// - original_view: The original view value
47+
/// - substr: The substring to append. Must be a valid substring of the original view
48+
/// - start_offset: The start offset of the substring in the view
4149
pub(crate) fn make_and_append_view(
4250
views_buffer: &mut Vec<u128>,
4351
null_builder: &mut NullBufferBuilder,
44-
raw_view: &u128,
52+
original_view: &u128,
4553
substr: &str,
4654
start_offset: u32,
4755
) {
4856
let substr_len = substr.len();
4957
let sub_view = if substr_len > 12 {
50-
let view = ByteView::from(*raw_view);
58+
let view = ByteView::from(*original_view);
5159
make_view(
5260
substr.as_bytes(),
5361
view.buffer_index,
@@ -82,13 +90,6 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
8290
trim_type: TrimType,
8391
use_string_view: bool,
8492
) -> Result<ArrayRef> {
85-
// This is the function used to trim each string row, and it will return:
86-
// - trimmed str
87-
// e.g. ltrim(" abc") -> "abc"
88-
//
89-
// - start offset, needed in `string_view_trim`
90-
// e.g. "abc" actually is " abc"[2..], and the start offset here should be 2
91-
//
9293
let func = match trim_type {
9394
TrimType::Left => |input, pattern: &str| {
9495
let pattern = pattern.chars().collect::<Vec<char>>();
@@ -128,6 +129,28 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
128129
}
129130
}
130131

132+
/// Applies the trim function to the given string view array(s)
133+
/// and returns a new string view array with the trimmed values.
134+
///
135+
/// # `trim_func`: The function to apply to each string view.
136+
///
137+
/// ## Arguments
138+
/// - The original string
139+
/// - the pattern to trim
140+
///
141+
/// ## Returns
142+
/// - trimmed str (must be a substring of the first argument)
143+
/// - start offset, needed in `string_view_trim`
144+
///
145+
/// ## Examples
146+
///
147+
/// For `ltrim`:
148+
/// - `fn(" abc", " ") -> ("abc", 2)`
149+
/// - `fn("abd", " ") -> ("abd", 0)`
150+
///
151+
/// For `btrim`:
152+
/// - `fn(" abc ", " ") -> ("abc", 2)`
153+
/// - `fn("abd", " ") -> ("abd", 0)`
131154
// removing 'a will cause compiler complaining lifetime of `func`
132155
fn string_view_trim<'a>(
133156
trim_func: fn(&'a str, &'a str) -> (&'a str, u32),
@@ -221,23 +244,46 @@ fn string_view_trim<'a>(
221244
}
222245
}
223246

247+
/// Trims the given string and appends the trimmed string to the views buffer
248+
/// and the null buffer.
249+
///
250+
/// Calls `trim_func` on the string value in `original_view`, for non_null
251+
/// values and appends the updated view to the views buffer / null_builder.
252+
///
253+
/// Arguments
254+
/// - `src_str_opt`: The original string value (represented by the view)
255+
/// - `trim_characters_opt`: The characters to trim from the string
256+
/// - `trim_func`: The function to apply to the string (see [`string_view_trim`] for details)
257+
/// - `views_buf`: The buffer to append the updated views to
258+
/// - `null_builder`: The buffer to append the null values to
259+
/// - `original_view`: The original view value (that contains src_str_opt)
224260
fn trim_and_append_str<'a>(
225261
src_str_opt: Option<&'a str>,
226262
trim_characters_opt: Option<&'a str>,
227263
trim_func: fn(&'a str, &'a str) -> (&'a str, u32),
228264
views_buf: &mut Vec<u128>,
229265
null_builder: &mut NullBufferBuilder,
230-
raw: &u128,
266+
original_view: &u128,
231267
) {
232268
if let (Some(src_str), Some(characters)) = (src_str_opt, trim_characters_opt) {
233269
let (trim_str, start_offset) = trim_func(src_str, characters);
234-
make_and_append_view(views_buf, null_builder, raw, trim_str, start_offset);
270+
make_and_append_view(
271+
views_buf,
272+
null_builder,
273+
original_view,
274+
trim_str,
275+
start_offset,
276+
);
235277
} else {
236278
null_builder.append_null();
237279
views_buf.push(0);
238280
}
239281
}
240282

283+
/// Applies the trim function to the given string array(s)
284+
/// and returns a new string array with the trimmed values.
285+
///
286+
/// See [`string_view_trim`] for details on `func`
241287
fn string_trim<'a, T: OffsetSizeTrait>(
242288
func: fn(&'a str, &'a str) -> (&'a str, u32),
243289
args: &'a [ArrayRef],

0 commit comments

Comments
 (0)