Skip to content

Commit 49d9740

Browse files
committed
fix tests
1 parent b0e53bc commit 49d9740

File tree

2 files changed

+20
-21
lines changed

2 files changed

+20
-21
lines changed

tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,14 @@
3131
2 2020
3232
3 2020
3333
4 2021
34-
---- Test max_bytes per file
34+
---- Test max_bytes and max_rows per file
3535
---- Count files in big_destination_max_bytes, should be 5 (4 parquet, 1 commit)
3636
5
3737
---- Count rows in big_table and big_destination_max_bytes
38-
10485760
39-
10485760
40-
---- Test max_rows per file
38+
4194304
39+
4194304
4140
---- Count files in big_destination_max_rows, should be 5 (4 parquet, 1 commit)
4241
5
4342
---- Count rows in big_table and big_destination_max_rows
44-
10485760
45-
10485760
43+
4194304
44+
4194304

tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -76,20 +76,29 @@ sleep 1
7676
echo "---- Both data parts should appear"
7777
query "SELECT * FROM s3(s3_conn, filename='$s3_table_wildcard_partition_expression_with_function/**.parquet') ORDER BY id"
7878

79-
echo "---- Test max_bytes per file"
79+
echo "---- Test max_bytes and max_rows per file"
8080

8181
query "CREATE TABLE $big_table (id UInt64, data String, year UInt16) Engine=MergeTree() order by id partition by year"
8282

8383
query "CREATE TABLE $big_destination_max_bytes(id UInt64, data String, year UInt16) engine=S3(s3_conn, filename='$big_destination_max_bytes', partition_strategy='hive', format=Parquet) partition by year"
8484

85-
# we need to set min_insert_block_size_rows = 10485760, min_insert_block_size_bytes = 0 to make sure a single part is generated
86-
query "INSERT INTO $big_table SELECT number + 10_485_760_0 AS id, repeat('x', 1000) AS data, 2025 AS year FROM numbers(10_485_760) SETTINGS min_insert_block_size_rows = 10485760, min_insert_block_size_bytes = 0, max_insert_threads=1"
85+
query "CREATE TABLE $big_destination_max_rows(id UInt64, data String, year UInt16) engine=S3(s3_conn, filename='$big_destination_max_rows', partition_strategy='hive', format=Parquet) partition by year"
86+
87+
# 4194304 is a number that came up during multiple iterations, it does not really mean anything (aside from the fact that the below numbers depend on it)
88+
query "INSERT INTO $big_table SELECT number AS id, repeat('x', 100) AS data, 2025 AS year FROM numbers(4194304)"
89+
90+
# make sure we have only one part
91+
query "OPTIMIZE TABLE $big_table FINAL"
92+
93+
big_part=$(query "SELECT name FROM system.parts WHERE database = currentDatabase() AND table = '$big_table' AND partition_id = '2025' AND active = 1 ORDER BY name LIMIT 1" | tr -d '\n')
8794

8895
# this should generate ~4 files
89-
query "ALTER TABLE $big_table EXPORT PART '2025_1_1_0' TO TABLE $big_destination_max_bytes SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_bytes_per_file=10000000, output_format_parquet_row_group_size_bytes=5000000"
96+
query "ALTER TABLE $big_table EXPORT PART '$big_part' TO TABLE $big_destination_max_bytes SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_bytes_per_file=3500000, output_format_parquet_row_group_size_bytes=1000000"
97+
# export_merge_tree_part_max_rows_per_file = 1048576 (which is 4194304/4) to generate 4 files
98+
query "ALTER TABLE $big_table EXPORT PART '$big_part' TO TABLE $big_destination_max_rows SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_rows_per_file=1048576"
9099

91-
# sleeping a little longer because it will write multiple files
92-
sleep 50
100+
# sleeping a little longer because it will write multiple files, trying not be flaky
101+
sleep 20
93102

94103
echo "---- Count files in big_destination_max_bytes, should be 5 (4 parquet, 1 commit)"
95104
query "SELECT count(_file) FROM s3(s3_conn, filename='$big_destination_max_bytes/**', format='One')"
@@ -98,15 +107,6 @@ echo "---- Count rows in big_table and big_destination_max_bytes"
98107
query "SELECT COUNT() from $big_table"
99108
query "SELECT COUNT() from $big_destination_max_bytes"
100109

101-
query "CREATE TABLE $big_destination_max_rows(id UInt64, data String, year UInt16) engine=S3(s3_conn, filename='$big_destination_max_rows', partition_strategy='hive', format=Parquet) partition by year"
102-
103-
echo "---- Test max_rows per file"
104-
# export_merge_tree_part_max_rows_per_file = 2621440 is select count()/4 from big_table
105-
query "ALTER TABLE $big_table EXPORT PART '2025_1_1_0' TO TABLE $big_destination_max_rows SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_rows_per_file=2621440"
106-
107-
# sleeping a little longer because it will write multiple files
108-
sleep 50
109-
110110
echo "---- Count files in big_destination_max_rows, should be 5 (4 parquet, 1 commit)"
111111
query "SELECT count(_file) FROM s3(s3_conn, filename='$big_destination_max_rows/**', format='One')"
112112

0 commit comments

Comments
 (0)