Skip to content

Commit f39c0d3

Browse files
committed
flush based on bytes
1 parent 5538c74 commit f39c0d3

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

mp_api/client/core/client.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -637,9 +637,10 @@ def _flush(accumulator, group):
637637
accumulator = []
638638
for page in iterator:
639639
# arro3 rb to pyarrow rb for compat w/ pyarrow ds writer
640-
accumulator.append(pa.record_batch(page))
640+
rg = pa.record_batch(page)
641+
accumulator.append(rg)
641642
page_size = page.num_rows
642-
size += page_size
643+
size += rg.get_total_buffer_size()
643644

644645
if pbar is not None:
645646
pbar.update(page_size)

mp_api/client/core/settings.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ class MAPIClientSettings(BaseSettings):
9595
)
9696

9797
DATASET_FLUSH_THRESHOLD: int = Field(
98-
100000,
99-
description="Threshold number of rows to accumulate in memory before flushing dataset to disk",
98+
int(2.75 * 1024**3),
99+
description="Threshold bytes to accumulate in memory before flushing dataset to disk",
100100
)
101101

102102
model_config = SettingsConfigDict(env_prefix="MPRESTER_")

0 commit comments

Comments
 (0)