Skip to content

Commit 3b8fabd

Browse files
authored
Add support for chr(0) (#17131)
* #17128 Add support for chr(0) * #17128 Documentation refinement * #17128 Replace manual range check with try_from
1 parent b6d4d3b commit 3b8fabd

File tree

4 files changed

+21
-36
lines changed

4 files changed

+21
-36
lines changed

datafusion/functions/src/string/ascii.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use std::sync::Arc;
3030

3131
#[user_doc(
3232
doc_section(label = "String Functions"),
33-
description = "Returns the Unicode character code of the first character in a string.",
33+
description = "Returns the first Unicode scalar value of a string.",
3434
syntax_example = "ascii(str)",
3535
sql_example = r#"```sql
3636
> select ascii('abc');

datafusion/functions/src/string/chr.rs

Lines changed: 18 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use datafusion_expr::{ColumnarValue, Documentation, Volatility};
3131
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
3232
use datafusion_macros::user_doc;
3333

34-
/// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character.
34+
/// Returns the character with the given code.
3535
/// chr(65) = 'A'
3636
pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
3737
let integer_array = as_int64_array(&args[0])?;
@@ -47,22 +47,14 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
4747
for integer in integer_array {
4848
match integer {
4949
Some(integer) => {
50-
if integer == 0 {
51-
return exec_err!("null character not permitted.");
52-
} else if integer < 0 {
53-
return exec_err!("negative input not permitted.");
54-
} else {
55-
match core::char::from_u32(integer as u32) {
56-
Some(c) => {
57-
builder.append_value(c.encode_utf8(&mut buf));
58-
}
59-
None => {
60-
return exec_err!(
61-
"requested character too large for encoding."
62-
);
63-
}
50+
if let Ok(u) = u32::try_from(integer) {
51+
if let Some(c) = core::char::from_u32(u) {
52+
builder.append_value(c.encode_utf8(&mut buf));
53+
continue;
6454
}
6555
}
56+
57+
return exec_err!("invalid Unicode scalar value: {integer}");
6658
}
6759
None => {
6860
builder.append_null();
@@ -77,7 +69,7 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
7769

7870
#[user_doc(
7971
doc_section(label = "String Functions"),
80-
description = "Returns the character with the specified ASCII or Unicode code value.",
72+
description = "Returns a string containing the character with the specified Unicode scalar value.",
8173
syntax_example = "chr(expression)",
8274
sql_example = r#"```sql
8375
> select chr(128640);
@@ -144,6 +136,7 @@ mod tests {
144136
#[test]
145137
fn test_chr_normal() {
146138
let input = Arc::new(Int64Array::from(vec![
139+
Some(0), // null
147140
Some(65), // A
148141
Some(66), // B
149142
Some(67), // C
@@ -159,6 +152,7 @@ mod tests {
159152
let result = chr(&[input]).unwrap();
160153
let string_array = result.as_any().downcast_ref::<StringArray>().unwrap();
161154
let expected = [
155+
"\u{0000}",
162156
"A",
163157
"B",
164158
"C",
@@ -172,30 +166,21 @@ mod tests {
172166
"\u{10ffff}",
173167
];
174168

175-
assert_eq!(string_array.len(), 11);
169+
assert_eq!(string_array.len(), expected.len());
176170
for (i, e) in expected.iter().enumerate() {
177171
assert_eq!(string_array.value(i), *e);
178172
}
179173
}
180174

181175
#[test]
182176
fn test_chr_error() {
183-
// chr(0) returns an error
184-
let input = Arc::new(Int64Array::from(vec![0]));
185-
let result = chr(&[input]);
186-
assert!(result.is_err());
187-
assert_contains!(
188-
result.err().unwrap().to_string(),
189-
"null character not permitted"
190-
);
191-
192177
// invalid Unicode code points (too large)
193178
let input = Arc::new(Int64Array::from(vec![i64::MAX]));
194179
let result = chr(&[input]);
195180
assert!(result.is_err());
196181
assert_contains!(
197182
result.err().unwrap().to_string(),
198-
"requested character too large for encoding"
183+
"invalid Unicode scalar value: 9223372036854775807"
199184
);
200185

201186
// invalid Unicode code points (too large) case 2
@@ -204,7 +189,7 @@ mod tests {
204189
assert!(result.is_err());
205190
assert_contains!(
206191
result.err().unwrap().to_string(),
207-
"requested character too large for encoding"
192+
"invalid Unicode scalar value: 1114112"
208193
);
209194

210195
// invalid Unicode code points (surrogate code point)
@@ -214,7 +199,7 @@ mod tests {
214199
assert!(result.is_err());
215200
assert_contains!(
216201
result.err().unwrap().to_string(),
217-
"requested character too large for encoding"
202+
"invalid Unicode scalar value: 55297"
218203
);
219204

220205
// negative input
@@ -223,7 +208,7 @@ mod tests {
223208
assert!(result.is_err());
224209
assert_contains!(
225210
result.err().unwrap().to_string(),
226-
"negative input not permitted"
211+
"invalid Unicode scalar value: -9223372036854775806"
227212
);
228213

229214
// negative input case 2
@@ -232,16 +217,16 @@ mod tests {
232217
assert!(result.is_err());
233218
assert_contains!(
234219
result.err().unwrap().to_string(),
235-
"negative input not permitted"
220+
"invalid Unicode scalar value: -1"
236221
);
237222

238223
// one error with valid values after
239-
let input = Arc::new(Int64Array::from(vec![65, 0, 66])); // A, NULL_CHAR, B
224+
let input = Arc::new(Int64Array::from(vec![65, -1, 66])); // A, -1, B
240225
let result = chr(&[input]);
241226
assert!(result.is_err());
242227
assert_contains!(
243228
result.err().unwrap().to_string(),
244-
"null character not permitted"
229+
"invalid Unicode scalar value: -1"
245230
);
246231
}
247232

-60 Bytes
Binary file not shown.

docs/source/user-guide/sql/scalar_functions.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,7 @@ nvl2(expression1, expression2, expression3)
768768

769769
### `ascii`
770770

771-
Returns the Unicode character code of the first character in a string.
771+
Returns the first Unicode scalar value of a string.
772772

773773
```sql
774774
ascii(str)
@@ -909,7 +909,7 @@ character_length(str)
909909

910910
### `chr`
911911

912-
Returns the character with the specified ASCII or Unicode code value.
912+
Returns a string containing the character with the specified Unicode scalar value.
913913

914914
```sql
915915
chr(expression)

0 commit comments

Comments
 (0)