Skip to content

Commit 189433c

Browse files
committed
update docs
1 parent a456c8a commit 189433c

File tree

1 file changed

+112
-3
lines changed

1 file changed

+112
-3
lines changed

docs/en/engines/table-engines/mergetree-family/part_export.md

Lines changed: 112 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
## Overview
44

5-
The `ALTER TABLE EXPORT PART` command exports individual MergeTree data parts to object storage (S3, Azure Blob Storage, etc.), typically in Parquet format.
5+
The `ALTER TABLE EXPORT PART` command exports individual MergeTree data parts to object storage (S3, Azure Blob Storage, etc.), typically in Parquet format. A commit file is shipped to the same destination directory containing all data files exported within that transaction.
66

77
**Key Characteristics:**
88
- **Experimental feature** - must be enabled via `allow_experimental_export_merge_tree_part` setting
@@ -48,6 +48,18 @@ Source and destination tables must be 100% compatible:
4848
- **Default**: `false`
4949
- **Description**: If set to `true`, it will overwrite the file. Otherwise, fails with exception.
5050

51+
### `export_merge_tree_part_max_bytes_per_file` (Optional)
52+
53+
- **Type**: `UInt64`
54+
- **Default**: `0`
55+
- **Description**: Maximum number of bytes to write to a single file when exporting a merge tree part. 0 means no limit. This is not a hard limit, and it highly depends on the output format granularity and input source chunk size.
56+
57+
### `export_merge_tree_part_max_rows_per_file` (Optional)
58+
59+
- **Type**: `UInt64`
60+
- **Default**: `0`
61+
- **Description**: Maximum number of rows to write to a single file when exporting a merge tree part. 0 means no limit. This is not a hard limit, and it highly depends on the output format granularity and input source chunk size.
62+
5163
## Examples
5264

5365
### Basic Export to S3
@@ -93,7 +105,7 @@ destination_database: default
93105
destination_table: destination_table
94106
create_time: 2025-11-19 09:09:11
95107
part_name: 20251016-365_1_1_0
96-
destination_file_path: table_root/eventDate=2025-10-16/retention=365/20251016-365_1_1_0_17B2F6CD5D3C18E787C07AE3DAF16EB1.parquet
108+
destination_file_paths: ['table_root/eventDate=2025-10-16/retention=365/20251016-365_1_1_0_17B2F6CD5D3C18E787C07AE3DAF16EB1.parquet']
97109
elapsed: 2.04845441
98110
rows_read: 1138688 -- 1.14 million
99111
total_rows_to_read: 550961374 -- 550.96 million
@@ -138,7 +150,8 @@ partition_id: 2021
138150
partition: 2021
139151
part_type: Compact
140152
disk_name: default
141-
path_on_disk: year=2021/2021_0_0_0_78C704B133D41CB0EF64DD2A9ED3B6BA.parquet
153+
path_on_disk:
154+
remote_file_paths ['year=2021/2021_0_0_0_78C704B133D41CB0EF64DD2A9ED3B6BA.parquet']
142155
rows: 1
143156
size_in_bytes: 272
144157
merged_from: ['2021_0_0_0']
@@ -158,3 +171,99 @@ ProfileEvents: {}
158171
- `PartsExportDuplicated` - Number of part exports that failed because target already exists.
159172
- `PartsExportTotalMilliseconds` - Total time
160173

174+
### Split large files
175+
176+
```sql
177+
alter table big_table export part '2025_0_32_3' to table replicated_big_destination SETTINGS export_merge_tree_part_max_bytes_per_file=10000000, output_format_parquet_row_group_size_bytes=5000000;
178+
179+
arthur :) select * from system.exports;
180+
181+
SELECT *
182+
FROM system.exports
183+
184+
Query id: d78d9ce5-cfbc-4957-b7dd-bc8129811634
185+
186+
Row 1:
187+
──────
188+
source_database: default
189+
source_table: big_table
190+
destination_database: default
191+
destination_table: replicated_big_destination
192+
create_time: 2025-12-15 13:12:48
193+
part_name: 2025_0_32_3
194+
destination_file_paths: ['replicated_big/year=2025/2025_0_32_3_E439C23833C39C6E5104F6F4D1048BE7.1.parquet','replicated_big/year=2025/2025_0_32_3_E439C23833C39C6E5104F6F4D1048BE7.2.parquet','replicated_big/year=2025/2025_0_32_3_E439C23833C39C6E5104F6F4D1048BE7.3.parquet','replicated_big/year=2025/2025_0_32_3_E439C23833C39C6E5104F6F4D1048BE7.4.parquet']
195+
elapsed: 14.360427274
196+
rows_read: 10256384 -- 10.26 million
197+
total_rows_to_read: 10485760 -- 10.49 million
198+
total_size_bytes_compressed: 83779395 -- 83.78 million
199+
total_size_bytes_uncompressed: 10611691600 -- 10.61 billion
200+
bytes_read_uncompressed: 10440998912 -- 10.44 billion
201+
memory_usage: 89795477 -- 89.80 million
202+
peak_memory_usage: 107362133 -- 107.36 million
203+
204+
1 row in set. Elapsed: 0.014 sec.
205+
206+
arthur :) select * from system.part_log where event_type = 'ExportPart' order by event_time desc limit 1 format Vertical;
207+
208+
SELECT *
209+
FROM system.part_log
210+
WHERE event_type = 'ExportPart'
211+
ORDER BY event_time DESC
212+
LIMIT 1
213+
FORMAT Vertical
214+
215+
Query id: 95128b01-b751-4726-8e3e-320728ac6af7
216+
217+
Row 1:
218+
──────
219+
hostname: arthur
220+
query_id:
221+
event_type: ExportPart
222+
merge_reason: NotAMerge
223+
merge_algorithm: Undecided
224+
event_date: 2025-12-15
225+
event_time: 2025-12-15 13:13:03
226+
event_time_microseconds: 2025-12-15 13:13:03.197492
227+
duration_ms: 14673
228+
database: default
229+
table: big_table
230+
table_uuid: a3eeeea0-295c-41a3-84ef-6b5463dbbe8c
231+
part_name: 2025_0_32_3
232+
partition_id: 2025
233+
partition: 2025
234+
part_type: Wide
235+
disk_name: default
236+
path_on_disk: ./store/a3e/a3eeeea0-295c-41a3-84ef-6b5463dbbe8c/2025_0_32_3/
237+
remote_file_paths: ['replicated_big/year=2025/2025_0_32_3_E439C23833C39C6E5104F6F4D1048BE7.1.parquet','replicated_big/year=2025/2025_0_32_3_E439C23833C39C6E5104F6F4D1048BE7.2.parquet','replicated_big/year=2025/2025_0_32_3_E439C23833C39C6E5104F6F4D1048BE7.3.parquet','replicated_big/year=2025/2025_0_32_3_E439C23833C39C6E5104F6F4D1048BE7.4.parquet']
238+
rows: 10485760 -- 10.49 million
239+
size_in_bytes: 83779395 -- 83.78 million
240+
merged_from: ['2025_0_32_3']
241+
bytes_uncompressed: 10611691600 -- 10.61 billion
242+
read_rows: 10485760 -- 10.49 million
243+
read_bytes: 10674503680 -- 10.67 billion
244+
peak_memory_usage: 107362133 -- 107.36 million
245+
error: 0
246+
exception:
247+
ProfileEvents: {}
248+
249+
1 row in set. Elapsed: 0.044 sec.
250+
251+
arthur :) select _path, formatReadableSize(_size) as _size from s3(s3_conn, filename='**', format=One);
252+
253+
SELECT
254+
_path,
255+
formatReadableSize(_size) AS _size
256+
FROM s3(s3_conn, filename = '**', format = One)
257+
258+
Query id: c48ae709-f590-4d1b-8158-191f8d628966
259+
260+
┌─_path────────────────────────────────────────────────────────────────────────────────┬─_size─────┐
261+
1. │ test/replicated_big/year=2025/2025_0_32_3_E439C23833C39C6E5104F6F4D1048BE7.1.parquet │ 17.36 MiB │
262+
2. │ test/replicated_big/year=2025/2025_0_32_3_E439C23833C39C6E5104F6F4D1048BE7.2.parquet │ 17.32 MiB │
263+
3. │ test/replicated_big/year=2025/2025_0_32_3_E439C23833C39C6E5104F6F4D1048BE7.4.parquet │ 5.04 MiB │
264+
4. │ test/replicated_big/year=2025/2025_0_32_3_E439C23833C39C6E5104F6F4D1048BE7.3.parquet │ 17.40 MiB │
265+
5. │ test/replicated_big/year=2025/commit_2025_0_32_3_E439C23833C39C6E5104F6F4D1048BE7 │ 320.00 B │
266+
└──────────────────────────────────────────────────────────────────────────────────────┴───────────┘
267+
268+
5 rows in set. Elapsed: 0.072 sec.
269+
```

0 commit comments

Comments
 (0)