Skip to content

Commit aeb8792

Browse files
committed
Improving the chunksize parser slicer algorithm
1 parent 0376aef commit aeb8792

File tree

1 file changed

+4
-12
lines changed

1 file changed

+4
-12
lines changed

awswrangler/s3.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,23 +1684,15 @@ def _read_parquet_chunked(
16841684
if chunked is True:
16851685
yield _table2df(table=table, categories=categories, use_threads=use_threads)
16861686
else:
1687-
if next_slice is not None:
1687+
if next_slice:
16881688
table = pa.lib.concat_tables([next_slice, table], promote=promote)
1689-
length: int = len(table)
1690-
while True:
1691-
if length == chunked:
1692-
yield _table2df(table=table, categories=categories, use_threads=use_threads)
1693-
next_slice = None
1694-
break
1695-
if length < chunked:
1696-
next_slice = table
1697-
break
1689+
while len(table) >= chunked:
16981690
yield _table2df(
16991691
table=table.slice(offset=0, length=chunked), categories=categories, use_threads=use_threads
17001692
)
17011693
table = table.slice(offset=chunked, length=None)
1702-
length = len(table)
1703-
if next_slice is not None:
1694+
next_slice = table
1695+
if next_slice:
17041696
yield _table2df(table=next_slice, categories=categories, use_threads=use_threads)
17051697

17061698

0 commit comments

Comments
 (0)