Skip to content

Commit 7b2d458

Browse files
authored
chore: use default encoding of parquet columns instead of plain. (#17688)
1 parent 8686791 commit 7b2d458

File tree

5 files changed

+10
-10
lines changed

5 files changed

+10
-10
lines changed

โ€Žsrc/query/storages/stage/src/append/parquet_file/writer_processor.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ use databend_common_pipeline_core::processors::ProcessorPtr;
3434
use opendal::Operator;
3535
use parquet::arrow::ArrowWriter;
3636
use parquet::basic::Compression;
37-
use parquet::basic::Encoding;
3837
use parquet::basic::ZstdLevel;
3938
use parquet::file::properties::EnabledStatistics;
4039
use parquet::file::properties::WriterProperties;
40+
use parquet::file::properties::WriterVersion;
4141

4242
use super::block_batch::BlockBatch;
4343
use crate::append::output::DataSummary;
@@ -98,13 +98,13 @@ fn create_writer(
9898
}
9999

100100
let props = WriterProperties::builder()
101+
.set_writer_version(WriterVersion::PARQUET_2_0)
101102
.set_compression(compression)
103+
.set_created_by(create_by)
102104
.set_max_row_group_size(MAX_ROW_GROUP_SIZE)
103-
.set_encoding(Encoding::PLAIN)
104-
.set_dictionary_enabled(false)
105105
.set_statistics_enabled(EnabledStatistics::Chunk)
106+
.set_dictionary_enabled(true)
106107
.set_bloom_filter_enabled(false)
107-
.set_created_by(create_by)
108108
.build();
109109
let buf_size = match targe_file_size {
110110
Some(n) if n < MAX_BUFFER_SIZE => n,

โ€Žtests/sqllogictests/suites/stage/formats/parquet/options/parquet_missing_uuid.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ select * from t_uuid
2525
query
2626
copy into @data/parquet/unload/uuid/ from (select 1 as a) file_format = (type = parquet)
2727
----
28-
1 1 374
28+
1 1 408
2929

3030
statement ok
3131
truncate table t_uuid
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
>>>> create or replace connection c_00_0005 storage_type='s3' access_key_id = 'minioadmin' endpoint_url = 'http://127.0.0.1:9900' secret_access_key = 'minioadmin'
22
>>>> copy into 's3://testbucket/c_00_0005/ab de/f' connection=(connection_name='c_00_0005') from (select 1) detailed_output=true use_raw_path=true single=true overwrite=true
3-
c_00_0005/ab de/f 374 1
3+
c_00_0005/ab de/f 408 1
44
<<<<

โ€Žtests/suites/1_stateful/00_stage/00_0017_copy_into_parquet.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ stmt "create stage s1;"
99
# one file when #row is small even though multi-threads
1010
echo "copy into @s1/ from (select * from numbers(6000000)) max_file_size=64000000 detailed_output=true" | $BENDSQL_CLIENT_CONNECT | wc -l | sed 's/ //g'
1111

12-
# two files, the larger is about 63569025
13-
echo "copy /*+ set_var(max_threads=1) */ into @s1/ from (select * from numbers(70000000)) max_file_size=64000000 detailed_output=true;" | $BENDSQL_CLIENT_CONNECT | wc -l | sed 's/ //g'
12+
# two files, the larger is 24960476
13+
echo "copy /*+ set_var(max_threads=1) */ into @s1/ from (select * from numbers(70000000)) max_file_size=25000000 detailed_output=true;" | $BENDSQL_CLIENT_CONNECT | wc -l | sed 's/ //g'
1414

1515
# one file
1616
echo "copy /*+ set_var(max_threads=1) */ into @s1/ from (select * from numbers(60000000)) max_file_size=64000000 detailed_output=true;" | $BENDSQL_CLIENT_CONNECT | wc -l | sed 's/ //g'

โ€Žtests/suites/1_stateful/00_stage/00_0019_sequence_as_default.result

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@
4242
>>>> create or replace sequence seq
4343
>>>> create or replace table dest(seq int default nextval(seq), a int)
4444
>>>> copy INTO @sequence_as_default/src1/ from src1 file_format=(type=parquet);
45-
2 9 397
45+
2 9 430
4646
>>>> copy INTO @sequence_as_default/src2/ from src2 file_format=(type=parquet);
47-
2 18 594
47+
2 18 660
4848
>>>> copy INTO dest(a) from @sequence_as_default/src1 file_format=(type=parquet) return_failed_only=true;
4949
>>>> copy INTO dest from @sequence_as_default/src2 file_format=(type=parquet) return_failed_only=true;
5050
>>>> select * from dest order by seq

0 commit comments

Comments
ย (0)