1919
2020use std:: any:: Any ;
2121use std:: collections:: { BTreeMap , HashMap } ;
22+ use std:: error:: Error ;
2223use std:: fmt:: { self , Display } ;
2324use std:: str:: FromStr ;
2425
@@ -29,7 +30,9 @@ use crate::{DataFusionError, Result};
2930
3031/// A macro that wraps a configuration struct and automatically derives
3132/// [`Default`] and [`ConfigField`] for it, allowing it to be used
32- /// in the [`ConfigOptions`] configuration tree
33+ /// in the [`ConfigOptions`] configuration tree.
34+ ///
35+ /// `transform` is used to normalize values before parsing.
3336///
3437/// For example,
3538///
@@ -38,7 +41,7 @@ use crate::{DataFusionError, Result};
3841/// /// Amazing config
3942/// pub struct MyConfig {
4043/// /// Field 1 doc
41- /// field1: String, default = "".to_string()
44+ /// field1: String, transform = str::to_lowercase, default = "".to_string()
4245///
4346/// /// Field 2 doc
4447/// field2: usize, default = 232
@@ -67,9 +70,12 @@ use crate::{DataFusionError, Result};
6770/// fn set(&mut self, key: &str, value: &str) -> Result<()> {
6871/// let (key, rem) = key.split_once('.').unwrap_or((key, ""));
6972/// match key {
70- /// "field1" => self.field1.set(rem, value),
71- /// "field2" => self.field2.set(rem, value),
72- /// "field3" => self.field3.set(rem, value),
73+ /// "field1" => {
74+ /// let value = str::to_lowercase(value);
75+ /// self.field1.set(rem, value.as_ref())
76+ /// },
77+ /// "field2" => self.field2.set(rem, value.as_ref()),
78+ /// "field3" => self.field3.set(rem, value.as_ref()),
7379/// _ => _internal_err!(
7480/// "Config value \"{}\" not found on MyConfig",
7581/// key
@@ -102,15 +108,14 @@ use crate::{DataFusionError, Result};
102108/// ```
103109///
104110/// NB: Misplaced commas may result in nonsensical errors
105- ///
106111#[ macro_export]
107112macro_rules! config_namespace {
108113 (
109114 $( #[ doc = $struct_d: tt] ) *
110115 $vis: vis struct $struct_name: ident {
111116 $(
112117 $( #[ doc = $d: tt] ) *
113- $field_vis: vis $field_name: ident : $field_type: ty, default = $default: expr
118+ $field_vis: vis $field_name: ident : $field_type: ty, $ ( warn = $warn : expr , ) ? $ ( transform = $transform : expr , ) ? default = $default: expr
114119 ) * $( , ) *
115120 }
116121 ) => {
@@ -127,9 +132,14 @@ macro_rules! config_namespace {
127132 impl ConfigField for $struct_name {
128133 fn set( & mut self , key: & str , value: & str ) -> Result <( ) > {
129134 let ( key, rem) = key. split_once( '.' ) . unwrap_or( ( key, "" ) ) ;
135+
130136 match key {
131137 $(
132- stringify!( $field_name) => self . $field_name. set( rem, value) ,
138+ stringify!( $field_name) => {
139+ $( let value = $transform( value) ; ) ?
140+ $( log:: warn!( $warn) ; ) ?
141+ self . $field_name. set( rem, value. as_ref( ) )
142+ } ,
133143 ) *
134144 _ => return _config_err!(
135145 "Config value \" {}\" not found on {}" , key, stringify!( $struct_name)
@@ -211,12 +221,15 @@ config_namespace! {
211221 /// When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
212222 pub enable_ident_normalization: bool , default = true
213223
214- /// When set to true, SQL parser will normalize options value (convert value to lowercase)
215- pub enable_options_value_normalization: bool , default = true
224+ /// When set to true, SQL parser will normalize options value (convert value to lowercase).
225+ /// Note that this option is ignored and will be removed in the future. All case-insensitive values
226+ /// are normalized automatically.
227+ pub enable_options_value_normalization: bool , warn = "`enable_options_value_normalization` is deprecated and ignored" , default = false
216228
217229 /// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic,
218230 /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
219231 pub dialect: String , default = "generic" . to_string( )
232+ // no need to lowercase because `sqlparser::dialect_from_str`] is case-insensitive
220233
221234 /// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but
222235 /// ignore the length. If false, error if a `VARCHAR` with a length is
@@ -431,7 +444,7 @@ config_namespace! {
431444 ///
432445 /// Note that this default setting is not the same as
433446 /// the default parquet writer setting.
434- pub compression: Option <String >, default = Some ( "zstd(3)" . into( ) )
447+ pub compression: Option <String >, transform = str :: to_lowercase , default = Some ( "zstd(3)" . into( ) )
435448
436449 /// (writing) Sets if dictionary encoding is enabled. If NULL, uses
437450 /// default parquet writer setting
@@ -444,7 +457,7 @@ config_namespace! {
444457 /// Valid values are: "none", "chunk", and "page"
445458 /// These values are not case sensitive. If NULL, uses
446459 /// default parquet writer setting
447- pub statistics_enabled: Option <String >, default = Some ( "page" . into( ) )
460+ pub statistics_enabled: Option <String >, transform = str :: to_lowercase , default = Some ( "page" . into( ) )
448461
449462 /// (writing) Sets max statistics size for any column. If NULL, uses
450463 /// default parquet writer setting
@@ -470,7 +483,7 @@ config_namespace! {
470483 /// delta_byte_array, rle_dictionary, and byte_stream_split.
471484 /// These values are not case sensitive. If NULL, uses
472485 /// default parquet writer setting
473- pub encoding: Option <String >, default = None
486+ pub encoding: Option <String >, transform = str :: to_lowercase , default = None
474487
475488 /// (writing) Use any available bloom filters when reading parquet files
476489 pub bloom_filter_on_read: bool , default = true
@@ -971,29 +984,45 @@ impl<F: ConfigField + Default> ConfigField for Option<F> {
971984 }
972985}
973986
987+ fn default_transform < T > ( input : & str ) -> Result < T >
988+ where
989+ T : FromStr ,
990+ <T as FromStr >:: Err : Sync + Send + Error + ' static ,
991+ {
992+ input. parse ( ) . map_err ( |e| {
993+ DataFusionError :: Context (
994+ format ! (
995+ "Error parsing '{}' as {}" ,
996+ input,
997+ std:: any:: type_name:: <T >( )
998+ ) ,
999+ Box :: new ( DataFusionError :: External ( Box :: new ( e) ) ) ,
1000+ )
1001+ } )
1002+ }
1003+
9741004#[ macro_export]
9751005macro_rules! config_field {
9761006 ( $t: ty) => {
1007+ config_field!( $t, value => default_transform( value) ?) ;
1008+ } ;
1009+
1010+ ( $t: ty, $arg: ident => $transform: expr) => {
9771011 impl ConfigField for $t {
9781012 fn visit<V : Visit >( & self , v: & mut V , key: & str , description: & ' static str ) {
9791013 v. some( key, self , description)
9801014 }
9811015
982- fn set( & mut self , _: & str , value: & str ) -> Result <( ) > {
983- * self = value. parse( ) . map_err( |e| {
984- DataFusionError :: Context (
985- format!( concat!( "Error parsing {} as " , stringify!( $t) , ) , value) ,
986- Box :: new( DataFusionError :: External ( Box :: new( e) ) ) ,
987- )
988- } ) ?;
1016+ fn set( & mut self , _: & str , $arg: & str ) -> Result <( ) > {
1017+ * self = $transform;
9891018 Ok ( ( ) )
9901019 }
9911020 }
9921021 } ;
9931022}
9941023
9951024config_field ! ( String ) ;
996- config_field ! ( bool ) ;
1025+ config_field ! ( bool , value => default_transform ( value . to_lowercase ( ) . as_str ( ) ) ? ) ;
9971026config_field ! ( usize ) ;
9981027config_field ! ( f64 ) ;
9991028config_field ! ( u64 ) ;
@@ -1508,7 +1537,7 @@ macro_rules! config_namespace_with_hashmap {
15081537 $vis: vis struct $struct_name: ident {
15091538 $(
15101539 $( #[ doc = $d: tt] ) *
1511- $field_vis: vis $field_name: ident : $field_type: ty, default = $default: expr
1540+ $field_vis: vis $field_name: ident : $field_type: ty, $ ( transform = $transform : expr , ) ? default = $default: expr
15121541 ) * $( , ) *
15131542 }
15141543 ) => {
@@ -1527,7 +1556,10 @@ macro_rules! config_namespace_with_hashmap {
15271556 let ( key, rem) = key. split_once( '.' ) . unwrap_or( ( key, "" ) ) ;
15281557 match key {
15291558 $(
1530- stringify!( $field_name) => self . $field_name. set( rem, value) ,
1559+ stringify!( $field_name) => {
1560+ $( let value = $transform( value) ; ) ?
1561+ self . $field_name. set( rem, value. as_ref( ) )
1562+ } ,
15311563 ) *
15321564 _ => _config_err!(
15331565 "Config value \" {}\" not found on {}" , key, stringify!( $struct_name)
@@ -1606,7 +1638,7 @@ config_namespace_with_hashmap! {
16061638 /// lzo, brotli(level), lz4, zstd(level), and lz4_raw.
16071639 /// These values are not case-sensitive. If NULL, uses
16081640 /// default parquet options
1609- pub compression: Option <String >, default = None
1641+ pub compression: Option <String >, transform = str :: to_lowercase , default = None
16101642
16111643 /// Sets if statistics are enabled for the column
16121644 /// Valid values are: "none", "chunk", and "page"
0 commit comments