Skip to content

Commit 5d51c45

Browse files
fix: Fix test_empty_parquet
1 parent 3bad1cd commit 5d51c45

File tree

1 file changed

+15
-4
lines changed

1 file changed

+15
-4
lines changed

awswrangler/s3/_read_parquet.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -247,14 +247,16 @@ def _read_parquet_chunked(
247247
if pq_file is None:
248248
continue
249249

250+
schema = pq_file.schema.to_arrow_schema()
251+
if columns:
252+
schema = pa.schema([schema.field(column) for column in columns], schema.metadata)
253+
250254
use_threads_flag: bool = use_threads if isinstance(use_threads, bool) else bool(use_threads > 1)
255+
iterate_at_least_once = False
251256
for chunk in pq_file.iter_batches(
252257
batch_size=batch_size, columns=columns, use_threads=use_threads_flag, use_pandas_metadata=False
253258
):
254-
schema = pq_file.schema.to_arrow_schema()
255-
if columns:
256-
schema = pa.schema([schema.field(column) for column in columns], schema.metadata)
257-
259+
iterate_at_least_once = True
258260
table = _add_table_partitions(
259261
table=pa.Table.from_batches([chunk], schema=schema),
260262
path=path,
@@ -273,6 +275,15 @@ def _read_parquet_chunked(
273275
next_slice = None
274276
else:
275277
next_slice = df
278+
if not iterate_at_least_once:
279+
table = _add_table_partitions(
280+
table=pa.Table.from_batches([], schema=schema),
281+
path=path,
282+
path_root=path_root,
283+
)
284+
df = _table_to_df(table=table, kwargs=arrow_kwargs)
285+
yield df
286+
276287
if next_slice is not None:
277288
yield next_slice
278289

0 commit comments

Comments
 (0)