@@ -31,7 +31,7 @@ use datafusion_expr::{ColumnarValue, Documentation, Volatility};
3131use datafusion_expr:: { ScalarFunctionArgs , ScalarUDFImpl , Signature } ;
3232use datafusion_macros:: user_doc;
3333
34- /// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character.
34+ /// Returns the character with the given code.
3535/// chr(65) = 'A'
3636pub fn chr ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
3737 let integer_array = as_int64_array ( & args[ 0 ] ) ?;
@@ -47,22 +47,14 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
4747 for integer in integer_array {
4848 match integer {
4949 Some ( integer) => {
50- if integer == 0 {
51- return exec_err ! ( "null character not permitted." ) ;
52- } else if integer < 0 {
53- return exec_err ! ( "negative input not permitted." ) ;
54- } else {
55- match core:: char:: from_u32 ( integer as u32 ) {
56- Some ( c) => {
57- builder. append_value ( c. encode_utf8 ( & mut buf) ) ;
58- }
59- None => {
60- return exec_err ! (
61- "requested character too large for encoding."
62- ) ;
63- }
50+ if let Ok ( u) = u32:: try_from ( integer) {
51+ if let Some ( c) = core:: char:: from_u32 ( u) {
52+ builder. append_value ( c. encode_utf8 ( & mut buf) ) ;
53+ continue ;
6454 }
6555 }
56+
57+ return exec_err ! ( "invalid Unicode scalar value: {integer}" ) ;
6658 }
6759 None => {
6860 builder. append_null ( ) ;
@@ -77,7 +69,7 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
7769
7870#[ user_doc(
7971 doc_section( label = "String Functions" ) ,
80- description = "Returns the character with the specified ASCII or Unicode code value." ,
72+ description = "Returns a string containing the character with the specified Unicode scalar value." ,
8173 syntax_example = "chr(expression)" ,
8274 sql_example = r#"```sql
8375> select chr(128640);
@@ -144,6 +136,7 @@ mod tests {
144136 #[ test]
145137 fn test_chr_normal ( ) {
146138 let input = Arc :: new ( Int64Array :: from ( vec ! [
139+ Some ( 0 ) , // null
147140 Some ( 65 ) , // A
148141 Some ( 66 ) , // B
149142 Some ( 67 ) , // C
@@ -159,6 +152,7 @@ mod tests {
159152 let result = chr ( & [ input] ) . unwrap ( ) ;
160153 let string_array = result. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
161154 let expected = [
155+ "\u{0000} " ,
162156 "A" ,
163157 "B" ,
164158 "C" ,
@@ -172,30 +166,21 @@ mod tests {
172166 "\u{10ffff} " ,
173167 ] ;
174168
175- assert_eq ! ( string_array. len( ) , 11 ) ;
169+ assert_eq ! ( string_array. len( ) , expected . len ( ) ) ;
176170 for ( i, e) in expected. iter ( ) . enumerate ( ) {
177171 assert_eq ! ( string_array. value( i) , * e) ;
178172 }
179173 }
180174
181175 #[ test]
182176 fn test_chr_error ( ) {
183- // chr(0) returns an error
184- let input = Arc :: new ( Int64Array :: from ( vec ! [ 0 ] ) ) ;
185- let result = chr ( & [ input] ) ;
186- assert ! ( result. is_err( ) ) ;
187- assert_contains ! (
188- result. err( ) . unwrap( ) . to_string( ) ,
189- "null character not permitted"
190- ) ;
191-
192177 // invalid Unicode code points (too large)
193178 let input = Arc :: new ( Int64Array :: from ( vec ! [ i64 :: MAX ] ) ) ;
194179 let result = chr ( & [ input] ) ;
195180 assert ! ( result. is_err( ) ) ;
196181 assert_contains ! (
197182 result. err( ) . unwrap( ) . to_string( ) ,
198- "requested character too large for encoding "
183+ "invalid Unicode scalar value: 9223372036854775807 "
199184 ) ;
200185
201186 // invalid Unicode code points (too large) case 2
@@ -204,7 +189,7 @@ mod tests {
204189 assert ! ( result. is_err( ) ) ;
205190 assert_contains ! (
206191 result. err( ) . unwrap( ) . to_string( ) ,
207- "requested character too large for encoding "
192+ "invalid Unicode scalar value: 1114112 "
208193 ) ;
209194
210195 // invalid Unicode code points (surrogate code point)
@@ -214,7 +199,7 @@ mod tests {
214199 assert ! ( result. is_err( ) ) ;
215200 assert_contains ! (
216201 result. err( ) . unwrap( ) . to_string( ) ,
217- "requested character too large for encoding "
202+ "invalid Unicode scalar value: 55297 "
218203 ) ;
219204
220205 // negative input
@@ -223,7 +208,7 @@ mod tests {
223208 assert ! ( result. is_err( ) ) ;
224209 assert_contains ! (
225210 result. err( ) . unwrap( ) . to_string( ) ,
226- "negative input not permitted "
211+ "invalid Unicode scalar value: -9223372036854775806 "
227212 ) ;
228213
229214 // negative input case 2
@@ -232,16 +217,16 @@ mod tests {
232217 assert ! ( result. is_err( ) ) ;
233218 assert_contains ! (
234219 result. err( ) . unwrap( ) . to_string( ) ,
235- "negative input not permitted "
220+ "invalid Unicode scalar value: -1 "
236221 ) ;
237222
238223 // one error with valid values after
239- let input = Arc :: new ( Int64Array :: from ( vec ! [ 65 , 0 , 66 ] ) ) ; // A, NULL_CHAR , B
224+ let input = Arc :: new ( Int64Array :: from ( vec ! [ 65 , - 1 , 66 ] ) ) ; // A, -1 , B
240225 let result = chr ( & [ input] ) ;
241226 assert ! ( result. is_err( ) ) ;
242227 assert_contains ! (
243228 result. err( ) . unwrap( ) . to_string( ) ,
244- "null character not permitted "
229+ "invalid Unicode scalar value: -1 "
245230 ) ;
246231 }
247232
0 commit comments