Skip to content

Commit e7e7758

Browse files
authored
Config: Add support default sql varchar to view types (#15104)
* Config: Add support default sql varchar to view types * Fix test * fix test * Address comments * Address comments * Fix slt test
1 parent 2455aab commit e7e7758

File tree

7 files changed

+56
-1
lines changed

7 files changed

+56
-1
lines changed

datafusion/common/src/config.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,11 @@ config_namespace! {
252252
/// string length and thus DataFusion can not enforce such limits.
253253
pub support_varchar_with_length: bool, default = true
254254

255+
/// If true, `VARCHAR` is mapped to `Utf8View` during SQL planning.
256+
/// If false, `VARCHAR` is mapped to `Utf8` during SQL planning.
257+
/// Default is false.
258+
pub map_varchar_to_utf8view: bool, default = false
259+
255260
/// When set to true, the source locations relative to the original SQL
256261
/// query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected
257262
/// and recorded in the logical plan nodes.

datafusion/core/src/execution/session_state.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,7 @@ impl SessionState {
489489
enable_options_value_normalization: sql_parser_options
490490
.enable_options_value_normalization,
491491
support_varchar_with_length: sql_parser_options.support_varchar_with_length,
492+
map_varchar_to_utf8view: sql_parser_options.map_varchar_to_utf8view,
492493
collect_spans: sql_parser_options.collect_spans,
493494
}
494495
}

datafusion/sql/src/planner.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ pub struct ParserOptions {
5454
pub enable_options_value_normalization: bool,
5555
/// Whether to collect spans
5656
pub collect_spans: bool,
57+
/// Whether `VARCHAR` is mapped to `Utf8View` during SQL planning.
58+
pub map_varchar_to_utf8view: bool,
5759
}
5860

5961
impl ParserOptions {
@@ -72,6 +74,7 @@ impl ParserOptions {
7274
parse_float_as_decimal: false,
7375
enable_ident_normalization: true,
7476
support_varchar_with_length: true,
77+
map_varchar_to_utf8view: false,
7578
enable_options_value_normalization: false,
7679
collect_spans: false,
7780
}
@@ -111,6 +114,12 @@ impl ParserOptions {
111114
self
112115
}
113116

117+
/// Sets the `map_varchar_to_utf8view` option.
118+
pub fn with_map_varchar_to_utf8view(mut self, value: bool) -> Self {
119+
self.map_varchar_to_utf8view = value;
120+
self
121+
}
122+
114123
/// Sets the `enable_options_value_normalization` option.
115124
pub fn with_enable_options_value_normalization(mut self, value: bool) -> Self {
116125
self.enable_options_value_normalization = value;
@@ -136,6 +145,7 @@ impl From<&SqlParserOptions> for ParserOptions {
136145
parse_float_as_decimal: options.parse_float_as_decimal,
137146
enable_ident_normalization: options.enable_ident_normalization,
138147
support_varchar_with_length: options.support_varchar_with_length,
148+
map_varchar_to_utf8view: options.map_varchar_to_utf8view,
139149
enable_options_value_normalization: options
140150
.enable_options_value_normalization,
141151
collect_spans: options.collect_spans,
@@ -558,7 +568,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
558568
SQLDataType::Varchar(length) => {
559569
match (length, self.options.support_varchar_with_length) {
560570
(Some(_), false) => plan_err!("does not support Varchar with length, please set `support_varchar_with_length` to be true"),
561-
_ => Ok(DataType::Utf8),
571+
_ => {
572+
if self.options.map_varchar_to_utf8view {
573+
Ok(DataType::Utf8View)
574+
} else {
575+
Ok(DataType::Utf8)
576+
}
577+
}
562578
}
563579
}
564580
SQLDataType::UnsignedBigInt(_) | SQLDataType::UnsignedInt8(_) => Ok(DataType::UInt64),

datafusion/sql/tests/sql_integration.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ fn parse_decimals() {
8383
parse_float_as_decimal: true,
8484
enable_ident_normalization: false,
8585
support_varchar_with_length: false,
86+
map_varchar_to_utf8view: false,
8687
enable_options_value_normalization: false,
8788
collect_spans: false,
8889
},
@@ -139,6 +140,7 @@ fn parse_ident_normalization() {
139140
parse_float_as_decimal: false,
140141
enable_ident_normalization,
141142
support_varchar_with_length: false,
143+
map_varchar_to_utf8view: false,
142144
enable_options_value_normalization: false,
143145
collect_spans: false,
144146
},

datafusion/sqllogictest/test_files/ddl.slt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,3 +827,31 @@ drop table table_with_pk;
827827

828828
statement ok
829829
set datafusion.catalog.information_schema = false;
830+
831+
# Test VARCHAR is mapped to Utf8View during SQL planning when setting map_varchar_to_utf8view to true
832+
statement ok
833+
CREATE TABLE t1(c1 VARCHAR(10) NOT NULL, c2 VARCHAR);
834+
835+
query TTT
836+
DESCRIBE t1;
837+
----
838+
c1 Utf8 NO
839+
c2 Utf8 YES
840+
841+
statement ok
842+
set datafusion.sql_parser.map_varchar_to_utf8view = true;
843+
844+
statement ok
845+
CREATE TABLE t2(c1 VARCHAR(10) NOT NULL, c2 VARCHAR);
846+
847+
query TTT
848+
DESCRIBE t2;
849+
----
850+
c1 Utf8View NO
851+
c2 Utf8View YES
852+
853+
statement ok
854+
DROP TABLE t1;
855+
856+
statement ok
857+
DROP TABLE t2;

datafusion/sqllogictest/test_files/information_schema.slt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ datafusion.sql_parser.collect_spans false
263263
datafusion.sql_parser.dialect generic
264264
datafusion.sql_parser.enable_ident_normalization true
265265
datafusion.sql_parser.enable_options_value_normalization false
266+
datafusion.sql_parser.map_varchar_to_utf8view false
266267
datafusion.sql_parser.parse_float_as_decimal false
267268
datafusion.sql_parser.recursion_limit 50
268269
datafusion.sql_parser.support_varchar_with_length true
@@ -361,6 +362,7 @@ datafusion.sql_parser.collect_spans false When set to true, the source locations
361362
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
362363
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
363364
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
365+
datafusion.sql_parser.map_varchar_to_utf8view false If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false.
364366
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
365367
datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries
366368
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.

docs/source/user-guide/configs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,5 +128,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus
128128
| datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. |
129129
| datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. |
130130
| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |
131+
| datafusion.sql_parser.map_varchar_to_utf8view | false | If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false. |
131132
| datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. |
132133
| datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries |

0 commit comments

Comments
 (0)