Skip to content

Commit 53c4c87

Browse files
committed
feat(query): enable runtime cast transform in loading parquet files
1 parent b5bcfba commit 53c4c87

File tree

4 files changed

+8
-8
lines changed

4 files changed

+8
-8
lines changed

scripts/benchmark/query/load/tpch100.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ select version();
77
SQL
88

99
for t in customer lineitem nation orders partsupp part region supplier; do
10-
echo "DROP TABLE IF EXISTS $t;" | bendsql query
10+
echo "DROP TABLE IF EXISTS $t;" | bendsql query
1111
done
1212

1313
cat <<SQL | bendsql query
@@ -113,8 +113,8 @@ cat <<SQL | bendsql query
113113
SQL
114114

115115
for t in nation region; do
116-
echo "loading into $t ..."
117-
cat <<SQL | bendsql query
116+
echo "loading into $t ..."
117+
cat <<SQL | bendsql query
118118
COPY INTO $t FROM 's3://repo.databend.rs/tpch100/${t}.tbl'
119119
credentials=(aws_key_id='$REPO_ACCESS_KEY_ID' aws_secret_key='$REPO_SECRET_ACCESS_KEY')
120120
file_format=(type='CSV' field_delimiter='|' record_delimiter='\\n' skip_header=1);
@@ -124,8 +124,8 @@ SQL
124124
done
125125

126126
for t in customer lineitem orders partsupp part supplier; do
127-
echo "loading into $t ..."
128-
cat <<SQL | bendsql query
127+
echo "loading into $t ..."
128+
cat <<SQL | bendsql query
129129
COPY INTO $t FROM 's3://repo.databend.rs/tpch100/${t}/'
130130
credentials=(aws_key_id='$REPO_ACCESS_KEY_ID' aws_secret_key='$REPO_SECRET_ACCESS_KEY') pattern ='${t}.tbl.*'
131131
file_format=(type='CSV' field_delimiter='|' record_delimiter='\\n' skip_header=1);

src/query/pipeline/sources/src/input_formats/impls/input_format_parquet.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ impl BlockBuilderTrait for ParquetBlockBuilder {
367367
let fields: Vec<DataField> = rg
368368
.fields_to_read
369369
.iter()
370-
.map(|f| DataField::from(f))
370+
.map(DataField::from)
371371
.collect::<Vec<_>>();
372372

373373
let input_schema = DataSchema::new(fields);

tests/suites/1_stateful/01_load/01_0000_streaming_load.result

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@
1212
198 2020.0 767
1313
--parquet less
1414
199 2020.0 769
15-
--parquet mismatch schema
15+
--parquet runtime cast schema
1616
199 2020.0 769

tests/suites/1_stateful/01_load/01_0000_streaming_load.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ curl -s -H "insert_sql:insert into ontime_less file_format = (type = Parquet)" -
8888
echo "select count(1), avg(Year), sum(DayOfWeek) from ontime_less;" | $MYSQL_CLIENT_CONNECT
8989

9090
# load parquet with mismatch schema, will auto cast
91-
echo "--parquet mismatch schema"
91+
echo "--parquet runtime cast schema"
9292
cat $CURDIR/../ddl/ontime.sql | sed 's/ontime/ontime_test_schmea_mismatch/g' | sed 's/DATE/TIMESTAMP/g' | $MYSQL_CLIENT_CONNECT
9393
curl -s -H "insert_sql:insert into ontime_test_schmea_mismatch file_format = (type = Parquet)" -F "upload=@/tmp/ontime_200.parquet" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1
9494
echo "select count(1), avg(Year), sum(DayOfWeek) from ontime_test_schmea_mismatch;" | $MYSQL_CLIENT_CONNECT

0 commit comments

Comments
 (0)