fix tests

arthurpassos · arthurpassos · commit 49d9740657db · 2025-12-16T08:11:03.000-03:00
diff --git a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference
@@ -31,15 +31,14 @@
 2	2020
 3	2020
 4	2021
----- Test max_bytes per file
+---- Test max_bytes and max_rows per file
 ---- Count files in big_destination_max_bytes, should be 5 (4 parquet, 1 commit)
 5
 ---- Count rows in big_table and big_destination_max_bytes
-10485760
-10485760
----- Test max_rows per file
+4194304
+4194304
 ---- Count files in big_destination_max_rows, should be 5 (4 parquet, 1 commit)
 5
 ---- Count rows in big_table and big_destination_max_rows
-10485760
-10485760
+4194304
+4194304
diff --git a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh
@@ -76,20 +76,29 @@ sleep 1
 echo "---- Both data parts should appear"
 query "SELECT * FROM s3(s3_conn, filename='$s3_table_wildcard_partition_expression_with_function/**.parquet') ORDER BY id"
 
-echo "---- Test max_bytes per file"
+echo "---- Test max_bytes and max_rows per file"
 
 query "CREATE TABLE $big_table (id UInt64, data String, year UInt16) Engine=MergeTree() order by id partition by year"
 
 query "CREATE TABLE $big_destination_max_bytes(id UInt64, data String, year UInt16) engine=S3(s3_conn, filename='$big_destination_max_bytes', partition_strategy='hive', format=Parquet) partition by year"
 
-# we need to set min_insert_block_size_rows = 10485760, min_insert_block_size_bytes = 0 to make sure a single part is generated
-query "INSERT INTO $big_table SELECT number + 10_485_760_0 AS id, repeat('x', 1000) AS data, 2025 AS year FROM numbers(10_485_760) SETTINGS min_insert_block_size_rows = 10485760, min_insert_block_size_bytes = 0, max_insert_threads=1"
+query "CREATE TABLE $big_destination_max_rows(id UInt64, data String, year UInt16) engine=S3(s3_conn, filename='$big_destination_max_rows', partition_strategy='hive', format=Parquet) partition by year"
+
+# 4194304 is a number that came up during multiple iterations, it does not really mean anything (aside from the fact that the below numbers depend on it)
+query "INSERT INTO $big_table SELECT number AS id, repeat('x', 100) AS data, 2025 AS year FROM numbers(4194304)"
+
+# make sure we have only one part
+query "OPTIMIZE TABLE $big_table FINAL"
+
+big_part=$(query "SELECT name FROM system.parts WHERE database = currentDatabase() AND table = '$big_table' AND partition_id = '2025' AND active = 1 ORDER BY name LIMIT 1" | tr -d '\n')
 
 # this should generate ~4 files
-query "ALTER TABLE $big_table EXPORT PART '2025_1_1_0' TO TABLE $big_destination_max_bytes SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_bytes_per_file=10000000, output_format_parquet_row_group_size_bytes=5000000"
+query "ALTER TABLE $big_table EXPORT PART '$big_part' TO TABLE $big_destination_max_bytes SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_bytes_per_file=3500000, output_format_parquet_row_group_size_bytes=1000000"
+# export_merge_tree_part_max_rows_per_file = 1048576 (which is 4194304/4) to generate 4 files
+query "ALTER TABLE $big_table EXPORT PART '$big_part' TO TABLE $big_destination_max_rows SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_rows_per_file=1048576"
 
-# sleeping a little longer because it will write multiple files
-sleep 50
+# sleeping a little longer because it will write multiple files, trying not be flaky
+sleep 20
 
 echo "---- Count files in big_destination_max_bytes, should be 5 (4 parquet, 1 commit)"
 query "SELECT count(_file) FROM s3(s3_conn, filename='$big_destination_max_bytes/**', format='One')"
@@ -98,15 +107,6 @@ echo "---- Count rows in big_table and big_destination_max_bytes"
 query "SELECT COUNT() from $big_table"
 query "SELECT COUNT() from $big_destination_max_bytes"
 
-query "CREATE TABLE $big_destination_max_rows(id UInt64, data String, year UInt16) engine=S3(s3_conn, filename='$big_destination_max_rows', partition_strategy='hive', format=Parquet) partition by year"
-
-echo "---- Test max_rows per file"
-# export_merge_tree_part_max_rows_per_file = 2621440 is select count()/4 from big_table
-query "ALTER TABLE $big_table EXPORT PART '2025_1_1_0' TO TABLE $big_destination_max_rows SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_rows_per_file=2621440"
-
-# sleeping a little longer because it will write multiple files
-sleep 50
-
 echo "---- Count files in big_destination_max_rows, should be 5 (4 parquet, 1 commit)"
 query "SELECT count(_file) FROM s3(s3_conn, filename='$big_destination_max_rows/**', format='One')"