@@ -31,7 +31,7 @@ use datafusion_expr::{ColumnarValue, Documentation, Volatility};
31
31
use datafusion_expr:: { ScalarFunctionArgs , ScalarUDFImpl , Signature } ;
32
32
use datafusion_macros:: user_doc;
33
33
34
- /// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character.
34
+ /// Returns the character with the given code.
35
35
/// chr(65) = 'A'
36
36
pub fn chr ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
37
37
let integer_array = as_int64_array ( & args[ 0 ] ) ?;
@@ -47,22 +47,14 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
47
47
for integer in integer_array {
48
48
match integer {
49
49
Some ( integer) => {
50
- if integer == 0 {
51
- return exec_err ! ( "null character not permitted." ) ;
52
- } else if integer < 0 {
53
- return exec_err ! ( "negative input not permitted." ) ;
54
- } else {
55
- match core:: char:: from_u32 ( integer as u32 ) {
56
- Some ( c) => {
57
- builder. append_value ( c. encode_utf8 ( & mut buf) ) ;
58
- }
59
- None => {
60
- return exec_err ! (
61
- "requested character too large for encoding."
62
- ) ;
63
- }
50
+ if let Ok ( u) = u32:: try_from ( integer) {
51
+ if let Some ( c) = core:: char:: from_u32 ( u) {
52
+ builder. append_value ( c. encode_utf8 ( & mut buf) ) ;
53
+ continue ;
64
54
}
65
55
}
56
+
57
+ return exec_err ! ( "invalid Unicode scalar value: {integer}" ) ;
66
58
}
67
59
None => {
68
60
builder. append_null ( ) ;
@@ -77,7 +69,7 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
77
69
78
70
#[ user_doc(
79
71
doc_section( label = "String Functions" ) ,
80
- description = "Returns the character with the specified ASCII or Unicode code value." ,
72
+ description = "Returns a string containing the character with the specified Unicode scalar value." ,
81
73
syntax_example = "chr(expression)" ,
82
74
sql_example = r#"```sql
83
75
> select chr(128640);
@@ -144,6 +136,7 @@ mod tests {
144
136
#[ test]
145
137
fn test_chr_normal ( ) {
146
138
let input = Arc :: new ( Int64Array :: from ( vec ! [
139
+ Some ( 0 ) , // null
147
140
Some ( 65 ) , // A
148
141
Some ( 66 ) , // B
149
142
Some ( 67 ) , // C
@@ -159,6 +152,7 @@ mod tests {
159
152
let result = chr ( & [ input] ) . unwrap ( ) ;
160
153
let string_array = result. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
161
154
let expected = [
155
+ "\u{0000} " ,
162
156
"A" ,
163
157
"B" ,
164
158
"C" ,
@@ -172,30 +166,21 @@ mod tests {
172
166
"\u{10ffff} " ,
173
167
] ;
174
168
175
- assert_eq ! ( string_array. len( ) , 11 ) ;
169
+ assert_eq ! ( string_array. len( ) , expected . len ( ) ) ;
176
170
for ( i, e) in expected. iter ( ) . enumerate ( ) {
177
171
assert_eq ! ( string_array. value( i) , * e) ;
178
172
}
179
173
}
180
174
181
175
#[ test]
182
176
fn test_chr_error ( ) {
183
- // chr(0) returns an error
184
- let input = Arc :: new ( Int64Array :: from ( vec ! [ 0 ] ) ) ;
185
- let result = chr ( & [ input] ) ;
186
- assert ! ( result. is_err( ) ) ;
187
- assert_contains ! (
188
- result. err( ) . unwrap( ) . to_string( ) ,
189
- "null character not permitted"
190
- ) ;
191
-
192
177
// invalid Unicode code points (too large)
193
178
let input = Arc :: new ( Int64Array :: from ( vec ! [ i64 :: MAX ] ) ) ;
194
179
let result = chr ( & [ input] ) ;
195
180
assert ! ( result. is_err( ) ) ;
196
181
assert_contains ! (
197
182
result. err( ) . unwrap( ) . to_string( ) ,
198
- "requested character too large for encoding "
183
+ "invalid Unicode scalar value: 9223372036854775807 "
199
184
) ;
200
185
201
186
// invalid Unicode code points (too large) case 2
@@ -204,7 +189,7 @@ mod tests {
204
189
assert ! ( result. is_err( ) ) ;
205
190
assert_contains ! (
206
191
result. err( ) . unwrap( ) . to_string( ) ,
207
- "requested character too large for encoding "
192
+ "invalid Unicode scalar value: 1114112 "
208
193
) ;
209
194
210
195
// invalid Unicode code points (surrogate code point)
@@ -214,7 +199,7 @@ mod tests {
214
199
assert ! ( result. is_err( ) ) ;
215
200
assert_contains ! (
216
201
result. err( ) . unwrap( ) . to_string( ) ,
217
- "requested character too large for encoding "
202
+ "invalid Unicode scalar value: 55297 "
218
203
) ;
219
204
220
205
// negative input
@@ -223,7 +208,7 @@ mod tests {
223
208
assert ! ( result. is_err( ) ) ;
224
209
assert_contains ! (
225
210
result. err( ) . unwrap( ) . to_string( ) ,
226
- "negative input not permitted "
211
+ "invalid Unicode scalar value: -9223372036854775806 "
227
212
) ;
228
213
229
214
// negative input case 2
@@ -232,16 +217,16 @@ mod tests {
232
217
assert ! ( result. is_err( ) ) ;
233
218
assert_contains ! (
234
219
result. err( ) . unwrap( ) . to_string( ) ,
235
- "negative input not permitted "
220
+ "invalid Unicode scalar value: -1 "
236
221
) ;
237
222
238
223
// one error with valid values after
239
- let input = Arc :: new ( Int64Array :: from ( vec ! [ 65 , 0 , 66 ] ) ) ; // A, NULL_CHAR , B
224
+ let input = Arc :: new ( Int64Array :: from ( vec ! [ 65 , - 1 , 66 ] ) ) ; // A, -1 , B
240
225
let result = chr ( & [ input] ) ;
241
226
assert ! ( result. is_err( ) ) ;
242
227
assert_contains ! (
243
228
result. err( ) . unwrap( ) . to_string( ) ,
244
- "null character not permitted "
229
+ "invalid Unicode scalar value: -1 "
245
230
) ;
246
231
}
247
232
0 commit comments