Skip to content

Commit 67efc06

Browse files
authored
feat: Support partitioning by index cols (#2528)
Signed-off-by: Anton Kukushkin <[email protected]>
1 parent 86bbb5a commit 67efc06

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

awswrangler/_data_types.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -554,9 +554,11 @@ def pyarrow_types_from_pandas( # pylint: disable=too-many-branches,too-many-sta
554554
fields = pa.Schema.from_pandas(df=df.reset_index().drop(columns=cols), preserve_index=False)
555555
for field in fields:
556556
name = str(field.name)
557-
_logger.debug("Inferring PyArrow type from index: %s", name)
558-
cols_dtypes[name] = field.type
559-
indexes.append(name)
557+
# Check if any of the index columns must be ignored
558+
if name not in ignore_cols:
559+
_logger.debug("Inferring PyArrow type from index: %s", name)
560+
cols_dtypes[name] = field.type
561+
indexes.append(name)
560562

561563
# Merging Index
562564
sorted_cols: List[str] = indexes + list(df.columns) if index_left is True else list(df.columns) + indexes

awswrangler/s3/_write_dataset.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,13 @@ def _to_partitions(
150150
for keys, subgroup in df.groupby(by=partition_cols, observed=True):
151151
# Keys are either a primitive type or a tuple if partitioning by multiple cols
152152
keys = (keys,) if not isinstance(keys, tuple) else keys # ruff: noqa: PLW2901
153-
subgroup = subgroup.drop(partition_cols, axis="columns") # ruff: noqa: PLW2901
153+
# Drop partition columns from df
154+
subgroup.drop(
155+
columns=[col for col in partition_cols if col in subgroup.columns],
156+
inplace=True,
157+
) # ruff: noqa: PLW2901
158+
# Drop index levels if partitioning by index columns
159+
subgroup = subgroup.droplevel(level=[col for col in partition_cols if col in subgroup.index.names])
154160
prefix = _delete_objects(
155161
keys=keys,
156162
path_root=path_root,

0 commit comments

Comments
 (0)