|
28 | 28 | _apply_partition_filter, |
29 | 29 | _check_version_id, |
30 | 30 | _extract_partitions_dtypes_from_table_details, |
| 31 | + _get_num_output_blocks, |
31 | 32 | _get_path_ignore_suffix, |
32 | 33 | _get_path_root, |
33 | 34 | _get_paths_for_glue_table, |
@@ -137,7 +138,7 @@ def _read_orc( |
137 | 138 | schema: pa.schema | None, |
138 | 139 | columns: list[str] | None, |
139 | 140 | use_threads: bool | int, |
140 | | - parallelism: int, |
| 141 | + override_num_blocks: int, |
141 | 142 | version_ids: dict[str, str] | None, |
142 | 143 | s3_client: "S3Client" | None, |
143 | 144 | s3_additional_kwargs: dict[str, Any] | None, |
@@ -283,8 +284,6 @@ def read_orc( |
283 | 284 | >>> df = wr.s3.read_orc(path, dataset=True, partition_filter=my_filter) |
284 | 285 |
|
285 | 286 | """ |
286 | | - ray_args = ray_args if ray_args else {} |
287 | | - |
288 | 287 | s3_client = _utils.client(service_name="s3", session=boto3_session) |
289 | 288 | paths: list[str] = _path2list( |
290 | 289 | path=path, |
@@ -330,7 +329,7 @@ def read_orc( |
330 | 329 | schema=schema, |
331 | 330 | columns=columns, |
332 | 331 | use_threads=use_threads, |
333 | | - parallelism=ray_args.get("parallelism", -1), |
| 332 | + override_num_blocks=_get_num_output_blocks(ray_args), |
334 | 333 | s3_client=s3_client, |
335 | 334 | s3_additional_kwargs=s3_additional_kwargs, |
336 | 335 | arrow_kwargs=arrow_kwargs, |
|
0 commit comments