|
28 | 28 | _get_path_root, |
29 | 29 | _union, |
30 | 30 | ) |
31 | | -from awswrangler.s3._read_concurrent import _read_concurrent |
32 | 31 |
|
33 | 32 | _logger: logging.Logger = logging.getLogger(__name__) |
34 | 33 |
|
@@ -384,41 +383,15 @@ def _read_parquet( |
384 | 383 | s3_additional_kwargs: Optional[Dict[str, str]], |
385 | 384 | use_threads: bool, |
386 | 385 | ) -> pd.DataFrame: |
387 | | - if use_threads is False: |
388 | | - table: pa.Table = _read_parquet_file( |
| 386 | + return _arrowtable2df( |
| 387 | + table=_read_parquet_file( |
389 | 388 | path=path, |
390 | 389 | columns=columns, |
391 | 390 | categories=categories, |
392 | 391 | boto3_session=boto3_session, |
393 | 392 | s3_additional_kwargs=s3_additional_kwargs, |
394 | 393 | use_threads=use_threads, |
395 | | - ) |
396 | | - else: |
397 | | - cpus: int = _utils.ensure_cpu_count(use_threads=use_threads) |
398 | | - num_row_groups: int = _count_row_groups( |
399 | | - path=path, |
400 | | - categories=categories, |
401 | | - boto3_session=boto3_session, |
402 | | - s3_additional_kwargs=s3_additional_kwargs, |
403 | | - use_threads=use_threads, |
404 | | - ) |
405 | | - with concurrent.futures.ThreadPoolExecutor(max_workers=cpus) as executor: |
406 | | - tables: Tuple[pa.Table, ...] = tuple( |
407 | | - executor.map( |
408 | | - _read_parquet_row_group, |
409 | | - range(num_row_groups), |
410 | | - itertools.repeat(path), |
411 | | - itertools.repeat(columns), |
412 | | - itertools.repeat(categories), |
413 | | - itertools.repeat(_utils.boto3_to_primitives(boto3_session=boto3_session)), |
414 | | - itertools.repeat(s3_additional_kwargs), |
415 | | - itertools.repeat(use_threads), |
416 | | - ) |
417 | | - ) |
418 | | - table = pa.lib.concat_tables(tables, promote=False) |
419 | | - _logger.debug("Converting PyArrow Table to Pandas DataFrame...") |
420 | | - return _arrowtable2df( |
421 | | - table=table, |
| 394 | + ), |
422 | 395 | categories=categories, |
423 | 396 | safe=safe, |
424 | 397 | use_threads=use_threads, |
@@ -604,9 +577,6 @@ def read_parquet( |
604 | 577 | boto3_session=boto3_session, |
605 | 578 | s3_additional_kwargs=s3_additional_kwargs, |
606 | 579 | ) |
607 | | - if use_threads is True: |
608 | | - args["use_threads"] = True |
609 | | - return _read_concurrent(func=_read_parquet, paths=paths, ignore_index=None, **args) |
610 | 580 | return _union(dfs=[_read_parquet(path=p, **args) for p in paths], ignore_index=None) |
611 | 581 |
|
612 | 582 |
|
|
0 commit comments