@@ -35,19 +35,27 @@ use datafusion_expr::ColumnarValue;
3535
3636/// Append a new view to the views buffer with the given substr
3737///
38- /// raw must be a valid view
39- /// substr must be a valid substring of raw
40- /// start must be less than or equal to the length of the string data
38+ /// # Safety
39+ ///
40+ /// original_view must be a valid view (the format described on
41+ /// [`GenericByteViewArray`](arrow::array::GenericByteViewArray).
42+ ///
43+ /// # Arguments
44+ /// - views_buffer: The buffer to append the new view to
45+ /// - null_builder: The buffer to append the null value to
46+ /// - original_view: The original view value
47+ /// - substr: The substring to append. Must be a valid substring of the original view
48+ /// - start_offset: The start offset of the substring in the view
4149pub ( crate ) fn make_and_append_view (
4250 views_buffer : & mut Vec < u128 > ,
4351 null_builder : & mut NullBufferBuilder ,
44- raw_view : & u128 ,
52+ original_view : & u128 ,
4553 substr : & str ,
4654 start_offset : u32 ,
4755) {
4856 let substr_len = substr. len ( ) ;
4957 let sub_view = if substr_len > 12 {
50- let view = ByteView :: from ( * raw_view ) ;
58+ let view = ByteView :: from ( * original_view ) ;
5159 make_view (
5260 substr. as_bytes ( ) ,
5361 view. buffer_index ,
@@ -82,13 +90,6 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
8290 trim_type : TrimType ,
8391 use_string_view : bool ,
8492) -> Result < ArrayRef > {
85- // This is the function used to trim each string row, and it will return:
86- // - trimmed str
87- // e.g. ltrim(" abc") -> "abc"
88- //
89- // - start offset, needed in `string_view_trim`
90- // e.g. "abc" actually is " abc"[2..], and the start offset here should be 2
91- //
9293 let func = match trim_type {
9394 TrimType :: Left => |input, pattern : & str | {
9495 let pattern = pattern. chars ( ) . collect :: < Vec < char > > ( ) ;
@@ -128,6 +129,28 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
128129 }
129130}
130131
132+ /// Applies the trim function to the given string view array(s)
133+ /// and returns a new string view array with the trimmed values.
134+ ///
135+ /// # `trim_func`: The function to apply to each string view.
136+ ///
137+ /// ## Arguments
138+ /// - The original string
139+ /// - the pattern to trim
140+ ///
141+ /// ## Returns
142+ /// - trimmed str (must be a substring of the first argument)
143+ /// - start offset, needed in `string_view_trim`
144+ ///
145+ /// ## Examples
146+ ///
147+ /// For `ltrim`:
148+ /// - `fn(" abc", " ") -> ("abc", 2)`
149+ /// - `fn("abd", " ") -> ("abd", 0)`
150+ ///
151+ /// For `btrim`:
152+ /// - `fn(" abc ", " ") -> ("abc", 2)`
153+ /// - `fn("abd", " ") -> ("abd", 0)`
131154// removing 'a will cause compiler complaining lifetime of `func`
132155fn string_view_trim < ' a > (
133156 trim_func : fn ( & ' a str , & ' a str ) -> ( & ' a str , u32 ) ,
@@ -221,23 +244,46 @@ fn string_view_trim<'a>(
221244 }
222245}
223246
247+ /// Trims the given string and appends the trimmed string to the views buffer
248+ /// and the null buffer.
249+ ///
250+ /// Calls `trim_func` on the string value in `original_view`, for non_null
251+ /// values and appends the updated view to the views buffer / null_builder.
252+ ///
253+ /// Arguments
254+ /// - `src_str_opt`: The original string value (represented by the view)
255+ /// - `trim_characters_opt`: The characters to trim from the string
256+ /// - `trim_func`: The function to apply to the string (see [`string_view_trim`] for details)
257+ /// - `views_buf`: The buffer to append the updated views to
258+ /// - `null_builder`: The buffer to append the null values to
259+ /// - `original_view`: The original view value (that contains src_str_opt)
224260fn trim_and_append_str < ' a > (
225261 src_str_opt : Option < & ' a str > ,
226262 trim_characters_opt : Option < & ' a str > ,
227263 trim_func : fn ( & ' a str , & ' a str ) -> ( & ' a str , u32 ) ,
228264 views_buf : & mut Vec < u128 > ,
229265 null_builder : & mut NullBufferBuilder ,
230- raw : & u128 ,
266+ original_view : & u128 ,
231267) {
232268 if let ( Some ( src_str) , Some ( characters) ) = ( src_str_opt, trim_characters_opt) {
233269 let ( trim_str, start_offset) = trim_func ( src_str, characters) ;
234- make_and_append_view ( views_buf, null_builder, raw, trim_str, start_offset) ;
270+ make_and_append_view (
271+ views_buf,
272+ null_builder,
273+ original_view,
274+ trim_str,
275+ start_offset,
276+ ) ;
235277 } else {
236278 null_builder. append_null ( ) ;
237279 views_buf. push ( 0 ) ;
238280 }
239281}
240282
283+ /// Applies the trim function to the given string array(s)
284+ /// and returns a new string array with the trimmed values.
285+ ///
286+ /// See [`string_view_trim`] for details on `func`
241287fn string_trim < ' a , T : OffsetSizeTrait > (
242288 func : fn ( & ' a str , & ' a str ) -> ( & ' a str , u32 ) ,
243289 args : & ' a [ ArrayRef ] ,
0 commit comments