@@ -1706,6 +1706,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
17061706 SQLExpr :: Value ( Value :: Number ( n, _) ) => parse_sql_number ( & n) ,
17071707 SQLExpr :: Value ( Value :: SingleQuotedString ( ref s) ) => Ok ( lit ( s. clone ( ) ) ) ,
17081708 SQLExpr :: Value ( Value :: EscapedStringLiteral ( ref s) ) => Ok ( lit ( s. clone ( ) ) ) ,
1709+ SQLExpr :: Value ( Value :: UnicodeEscapedStringLiteral ( ref s) ) => parse_unicode_escaped_string ( s, '\\' ) ,
17091710 SQLExpr :: Value ( Value :: Boolean ( n) ) => Ok ( lit ( n) ) ,
17101711 SQLExpr :: Value ( Value :: Null ) => Ok ( Expr :: Literal ( ScalarValue :: Null ) ) ,
17111712 SQLExpr :: Extract { field, expr } => Ok ( Expr :: ScalarFunction {
@@ -2859,6 +2860,53 @@ fn parse_sql_number(n: &str) -> Result<Expr> {
28592860 }
28602861}
28612862
2863+ fn parse_unicode_escaped_string ( s : & str , delimiter : char ) -> Result < Expr > {
2864+ let mut result = String :: new ( ) ;
2865+ let mut chars = s. char_indices ( ) . peekable ( ) ;
2866+ while let Some ( ( i, c) ) = chars. next ( ) {
2867+ if c == delimiter {
2868+ if let Some ( ( _, next) ) = chars. peek ( ) {
2869+ if next == & delimiter {
2870+ result. push ( delimiter) ;
2871+ chars. next ( ) ;
2872+ } else {
2873+ let ( parsed, len) =
2874+ parse_unicode_escaped_point ( & s[ i + 1 ..] , delimiter) ?;
2875+ result. push ( parsed) ;
2876+ chars. nth ( len - 1 ) ;
2877+ }
2878+ } else {
2879+ return Err ( invalid_unicode_escape_error ( s, delimiter) ) ;
2880+ }
2881+ } else {
2882+ result. push ( c)
2883+ }
2884+ }
2885+ Ok ( lit ( result) )
2886+ }
2887+
2888+ fn parse_unicode_escaped_point ( s : & str , delimiter : char ) -> Result < ( char , usize ) > {
2889+ let ( point_start, point_end) = if s. starts_with ( '+' ) { ( 1 , 7 ) } else { ( 0 , 4 ) } ;
2890+ if point_end <= s. len ( ) {
2891+ let byte = u32:: from_str_radix ( & s[ point_start..point_end] , 16 )
2892+ . map_err ( |_| invalid_unicode_escape_error ( s, delimiter) ) ?;
2893+ if let Some ( c) = char:: from_u32 ( byte) {
2894+ Ok ( ( c, point_end) )
2895+ } else {
2896+ Err ( invalid_unicode_escape_error ( s, delimiter) )
2897+ }
2898+ } else {
2899+ Err ( invalid_unicode_escape_error ( s, delimiter) )
2900+ }
2901+ }
2902+
2903+ fn invalid_unicode_escape_error ( s : & str , delimiter : char ) -> DataFusionError {
2904+ DataFusionError :: SQL ( ParserError ( format ! (
2905+ "Invalid Unicode escape in {}. Unicode escapes must be {}XXXX or {}+XXXXXX" ,
2906+ s, delimiter, delimiter,
2907+ ) ) )
2908+ }
2909+
28622910#[ cfg( test) ]
28632911mod tests {
28642912 use crate :: datasource:: empty:: EmptyTable ;
@@ -2867,6 +2915,36 @@ mod tests {
28672915
28682916 use super :: * ;
28692917
2918+ #[ test]
2919+ fn test_parse_unicode_escaped_string ( ) {
2920+ assert_eq ! (
2921+ parse_unicode_escaped_string( "pppp" , '\\' ) . unwrap( ) ,
2922+ Expr :: Literal ( ScalarValue :: Utf8 ( Some ( "pppp" . to_string( ) ) ) )
2923+ ) ;
2924+ assert_eq ! (
2925+ parse_unicode_escaped_string( "d\\ 0061t\\ +000061" , '\\' ) . unwrap( ) ,
2926+ Expr :: Literal ( ScalarValue :: Utf8 ( Some ( "data" . to_string( ) ) ) )
2927+ ) ;
2928+ assert_eq ! (
2929+ parse_unicode_escaped_string( "d\\ 0061\\ \\ t\\ +000061" , '\\' ) . unwrap( ) ,
2930+ Expr :: Literal ( ScalarValue :: Utf8 ( Some ( "da\\ ta" . to_string( ) ) ) )
2931+ ) ;
2932+ assert_eq ! (
2933+ parse_unicode_escaped_string( "d!0061t\\ !+000061\\ " , '!' ) . unwrap( ) ,
2934+ Expr :: Literal ( ScalarValue :: Utf8 ( Some ( "dat\\ a\\ " . to_string( ) ) ) )
2935+ ) ;
2936+ assert_eq ! (
2937+ parse_unicode_escaped_string( "!!d!0061!!t\\ !+000061\\ " , '!' ) . unwrap( ) ,
2938+ Expr :: Literal ( ScalarValue :: Utf8 ( Some ( "!da!t\\ a\\ " . to_string( ) ) ) )
2939+ ) ;
2940+ assert_eq ! (
2941+ parse_unicode_escaped_string( "d!0061t\\ !+000061\\ " , '!' ) . unwrap( ) ,
2942+ Expr :: Literal ( ScalarValue :: Utf8 ( Some ( "dat\\ a\\ " . to_string( ) ) ) )
2943+ ) ;
2944+ assert ! ( parse_unicode_escaped_string( "d\\ 0061t\\ +000061\\ " , '\\' ) . is_err( ) ) ;
2945+ assert ! ( parse_unicode_escaped_string( "d\\ 0061t\\ +061" , '\\' ) . is_err( ) ) ;
2946+ assert ! ( parse_unicode_escaped_string( "d\\ H061t\\ +061" , '\\' ) . is_err( ) ) ;
2947+ }
28702948 #[ test]
28712949 fn select_no_relation ( ) {
28722950 quick_test (
0 commit comments