@@ -1927,6 +1927,16 @@ impl ColumnBuilder {
19271927 }
19281928
19291929 pub fn with_capacity ( ty : & DataType , capacity : usize ) -> ColumnBuilder {
1930+ ColumnBuilder :: with_capacity_hint ( ty, capacity, true )
1931+ }
1932+
1933+ /// Create a new column builder with capacity and enable_datasize_hint
1934+ /// enable_datasize_hint is used in StringColumnBuilder to decide whether to pre-allocate values
1935+ pub fn with_capacity_hint (
1936+ ty : & DataType ,
1937+ capacity : usize ,
1938+ enable_datasize_hint : bool ,
1939+ ) -> ColumnBuilder {
19301940 match ty {
19311941 DataType :: Null => ColumnBuilder :: Null { len : 0 } ,
19321942 DataType :: EmptyArray => ColumnBuilder :: EmptyArray { len : 0 } ,
@@ -1939,27 +1949,28 @@ impl ColumnBuilder {
19391949 }
19401950 DataType :: Boolean => ColumnBuilder :: Boolean ( MutableBitmap :: with_capacity ( capacity) ) ,
19411951 DataType :: String => {
1942- ColumnBuilder :: String ( StringColumnBuilder :: with_capacity ( capacity, 0 ) )
1952+ let data_capacity = if enable_datasize_hint { 0 } else { capacity } ;
1953+ ColumnBuilder :: String ( StringColumnBuilder :: with_capacity ( capacity, data_capacity) )
19431954 }
19441955 DataType :: Timestamp => ColumnBuilder :: Timestamp ( Vec :: with_capacity ( capacity) ) ,
19451956 DataType :: Date => ColumnBuilder :: Date ( Vec :: with_capacity ( capacity) ) ,
19461957 DataType :: Nullable ( ty) => ColumnBuilder :: Nullable ( Box :: new ( NullableColumnBuilder {
1947- builder : Self :: with_capacity ( ty, capacity) ,
1958+ builder : Self :: with_capacity_hint ( ty, capacity, enable_datasize_hint ) ,
19481959 validity : MutableBitmap :: with_capacity ( capacity) ,
19491960 } ) ) ,
19501961 DataType :: Array ( ty) => {
19511962 let mut offsets = Vec :: with_capacity ( capacity + 1 ) ;
19521963 offsets. push ( 0 ) ;
19531964 ColumnBuilder :: Array ( Box :: new ( ArrayColumnBuilder {
1954- builder : Self :: with_capacity ( ty, 0 ) ,
1965+ builder : Self :: with_capacity_hint ( ty, 0 , enable_datasize_hint ) ,
19551966 offsets,
19561967 } ) )
19571968 }
19581969 DataType :: Map ( ty) => {
19591970 let mut offsets = Vec :: with_capacity ( capacity + 1 ) ;
19601971 offsets. push ( 0 ) ;
19611972 ColumnBuilder :: Map ( Box :: new ( ArrayColumnBuilder {
1962- builder : Self :: with_capacity ( ty, 0 ) ,
1973+ builder : Self :: with_capacity_hint ( ty, 0 , enable_datasize_hint ) ,
19631974 offsets,
19641975 } ) )
19651976 }
@@ -1968,12 +1979,15 @@ impl ColumnBuilder {
19681979 ColumnBuilder :: Tuple (
19691980 fields
19701981 . iter ( )
1971- . map ( |field| Self :: with_capacity ( field, capacity) )
1982+ . map ( |field| {
1983+ Self :: with_capacity_hint ( field, capacity, enable_datasize_hint)
1984+ } )
19721985 . collect ( ) ,
19731986 )
19741987 }
19751988 DataType :: Variant => {
1976- ColumnBuilder :: Variant ( StringColumnBuilder :: with_capacity ( capacity, 0 ) )
1989+ let data_capacity = if enable_datasize_hint { 0 } else { capacity } ;
1990+ ColumnBuilder :: Variant ( StringColumnBuilder :: with_capacity ( capacity, data_capacity) )
19771991 }
19781992 DataType :: Generic ( _) => {
19791993 unreachable ! ( "unable to initialize column builder for generic type" )
0 commit comments