Removing file and row-group concurrency level.

igorborgest · igorborgest · commit b0e6e976f511 · 2020-09-05T12:42:59.000-03:00
diff --git a/awswrangler/s3/_read_concurrent.py b/awswrangler/s3/_read_concurrent.py
diff --git a/awswrangler/s3/_read_parquet.py b/awswrangler/s3/_read_parquet.py
@@ -28,7 +28,6 @@
     _get_path_root,
     _union,
 )
-from awswrangler.s3._read_concurrent import _read_concurrent
 
 _logger: logging.Logger = logging.getLogger(__name__)
 
@@ -384,41 +383,15 @@ def _read_parquet(
     s3_additional_kwargs: Optional[Dict[str, str]],
     use_threads: bool,
 ) -> pd.DataFrame:
-    if use_threads is False:
-        table: pa.Table = _read_parquet_file(
+    return _arrowtable2df(
+        table=_read_parquet_file(
             path=path,
             columns=columns,
             categories=categories,
             boto3_session=boto3_session,
             s3_additional_kwargs=s3_additional_kwargs,
             use_threads=use_threads,
-        )
-    else:
-        cpus: int = _utils.ensure_cpu_count(use_threads=use_threads)
-        num_row_groups: int = _count_row_groups(
-            path=path,
-            categories=categories,
-            boto3_session=boto3_session,
-            s3_additional_kwargs=s3_additional_kwargs,
-            use_threads=use_threads,
-        )
-        with concurrent.futures.ThreadPoolExecutor(max_workers=cpus) as executor:
-            tables: Tuple[pa.Table, ...] = tuple(
-                executor.map(
-                    _read_parquet_row_group,
-                    range(num_row_groups),
-                    itertools.repeat(path),
-                    itertools.repeat(columns),
-                    itertools.repeat(categories),
-                    itertools.repeat(_utils.boto3_to_primitives(boto3_session=boto3_session)),
-                    itertools.repeat(s3_additional_kwargs),
-                    itertools.repeat(use_threads),
-                )
-            )
-            table = pa.lib.concat_tables(tables, promote=False)
-    _logger.debug("Converting PyArrow Table to Pandas DataFrame...")
-    return _arrowtable2df(
-        table=table,
+        ),
         categories=categories,
         safe=safe,
         use_threads=use_threads,
@@ -604,9 +577,6 @@ def read_parquet(
             boto3_session=boto3_session,
             s3_additional_kwargs=s3_additional_kwargs,
         )
-    if use_threads is True:
-        args["use_threads"] = True
-        return _read_concurrent(func=_read_parquet, paths=paths, ignore_index=None, **args)
     return _union(dfs=[_read_parquet(path=p, **args) for p in paths], ignore_index=None)
 
 
diff --git a/awswrangler/s3/_read_text.py b/awswrangler/s3/_read_text.py
@@ -20,7 +20,6 @@
     _get_path_root,
     _union,
 )
-from awswrangler.s3._read_concurrent import _read_concurrent
 
 _logger: logging.Logger = logging.getLogger(__name__)
 
@@ -137,8 +136,6 @@ def _read_text(
         ret = _read_text_chunked(paths=paths, chunksize=chunksize, **args)
     elif len(paths) == 1:
         ret = _read_text_file(path=paths[0], **args)
-    elif use_threads is True:
-        ret = _read_concurrent(func=_read_text_file, paths=paths, ignore_index=ignore_index, **args)
     else:
         ret = _union(dfs=[_read_text_file(path=p, **args) for p in paths], ignore_index=ignore_index)
     return ret
@@ -361,7 +358,7 @@ def read_fwf(
     Reading all fixed-width formatted (FWF) files under a prefix
 
     >>> import awswrangler as wr
-    >>> df = wr.s3.read_fwf(path='s3://bucket/prefix/', widths=[1, 3], names=['c0', 'c1])
+    >>> df = wr.s3.read_fwf(path='s3://bucket/prefix/', widths=[1, 3], names=['c0', 'c1'])
 
     Reading all fixed-width formatted (FWF) files from a list