You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh
+15-15Lines changed: 15 additions & 15 deletions
Original file line number
Diff line number
Diff line change
@@ -76,20 +76,29 @@ sleep 1
76
76
echo"---- Both data parts should appear"
77
77
query "SELECT * FROM s3(s3_conn, filename='$s3_table_wildcard_partition_expression_with_function/**.parquet') ORDER BY id"
78
78
79
-
echo"---- Test max_bytes per file"
79
+
echo"---- Test max_bytes and max_rows per file"
80
80
81
81
query "CREATE TABLE $big_table (id UInt64, data String, year UInt16) Engine=MergeTree() order by id partition by year"
82
82
83
83
query "CREATE TABLE $big_destination_max_bytes(id UInt64, data String, year UInt16) engine=S3(s3_conn, filename='$big_destination_max_bytes', partition_strategy='hive', format=Parquet) partition by year"
84
84
85
-
# we need to set min_insert_block_size_rows = 10485760, min_insert_block_size_bytes = 0 to make sure a single part is generated
86
-
query "INSERT INTO $big_table SELECT number + 10_485_760_0 AS id, repeat('x', 1000) AS data, 2025 AS year FROM numbers(10_485_760) SETTINGS min_insert_block_size_rows = 10485760, min_insert_block_size_bytes = 0, max_insert_threads=1"
85
+
query "CREATE TABLE $big_destination_max_rows(id UInt64, data String, year UInt16) engine=S3(s3_conn, filename='$big_destination_max_rows', partition_strategy='hive', format=Parquet) partition by year"
86
+
87
+
# 4194304 is a number that came up during multiple iterations, it does not really mean anything (aside from the fact that the below numbers depend on it)
88
+
query "INSERT INTO $big_table SELECT number AS id, repeat('x', 100) AS data, 2025 AS year FROM numbers(4194304)"
89
+
90
+
# make sure we have only one part
91
+
query "OPTIMIZE TABLE $big_table FINAL"
92
+
93
+
big_part=$(query "SELECT name FROM system.parts WHERE database = currentDatabase() AND table = '$big_table' AND partition_id = '2025' AND active = 1 ORDER BY name LIMIT 1"| tr -d '\n')
87
94
88
95
# this should generate ~4 files
89
-
query "ALTER TABLE $big_table EXPORT PART '2025_1_1_0' TO TABLE $big_destination_max_bytes SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_bytes_per_file=10000000, output_format_parquet_row_group_size_bytes=5000000"
96
+
query "ALTER TABLE $big_table EXPORT PART '$big_part' TO TABLE $big_destination_max_bytes SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_bytes_per_file=3500000, output_format_parquet_row_group_size_bytes=1000000"
97
+
# export_merge_tree_part_max_rows_per_file = 1048576 (which is 4194304/4) to generate 4 files
98
+
query "ALTER TABLE $big_table EXPORT PART '$big_part' TO TABLE $big_destination_max_rows SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_rows_per_file=1048576"
90
99
91
-
# sleeping a little longer because it will write multiple files
92
-
sleep 50
100
+
# sleeping a little longer because it will write multiple files, trying not be flaky
101
+
sleep 20
93
102
94
103
echo"---- Count files in big_destination_max_bytes, should be 5 (4 parquet, 1 commit)"
95
104
query "SELECT count(_file) FROM s3(s3_conn, filename='$big_destination_max_bytes/**', format='One')"
@@ -98,15 +107,6 @@ echo "---- Count rows in big_table and big_destination_max_bytes"
98
107
query "SELECT COUNT() from $big_table"
99
108
query "SELECT COUNT() from $big_destination_max_bytes"
100
109
101
-
query "CREATE TABLE $big_destination_max_rows(id UInt64, data String, year UInt16) engine=S3(s3_conn, filename='$big_destination_max_rows', partition_strategy='hive', format=Parquet) partition by year"
102
-
103
-
echo"---- Test max_rows per file"
104
-
# export_merge_tree_part_max_rows_per_file = 2621440 is select count()/4 from big_table
105
-
query "ALTER TABLE $big_table EXPORT PART '2025_1_1_0' TO TABLE $big_destination_max_rows SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_rows_per_file=2621440"
106
-
107
-
# sleeping a little longer because it will write multiple files
108
-
sleep 50
109
-
110
110
echo"---- Count files in big_destination_max_rows, should be 5 (4 parquet, 1 commit)"
111
111
query "SELECT count(_file) FROM s3(s3_conn, filename='$big_destination_max_rows/**', format='One')"
0 commit comments