Skip to content

Commit cb8c69f

Browse files
authored
Minor - Adding partition params (#1035)
1 parent df26b1e commit cb8c69f

File tree

3 files changed

+19
-0
lines changed

3 files changed

+19
-0
lines changed

awswrangler/catalog/_add.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def add_csv_partitions(
5353
serde_parameters: Optional[Dict[str, str]] = None,
5454
boto3_session: Optional[boto3.Session] = None,
5555
columns_types: Optional[Dict[str, str]] = None,
56+
partitions_parameters: Optional[Dict[str, str]] = None,
5657
) -> None:
5758
r"""Add partitions (metadata) to a CSV Table in the AWS Glue Catalog.
5859
@@ -89,6 +90,8 @@ def add_csv_partitions(
8990
Only required for Hive compability.
9091
Dictionary with keys as column names and values as data types (e.g. {'col0': 'bigint', 'col1': 'double'}).
9192
P.S. Only materialized columns please, not partition columns.
93+
partitions_parameters: Optional[Dict[str, str]]
94+
Dictionary with key-value pairs defining partition parameters.
9295
9396
Returns
9497
-------
@@ -120,6 +123,7 @@ def add_csv_partitions(
120123
columns_types=columns_types,
121124
serde_library=serde_library,
122125
serde_parameters=serde_parameters,
126+
partitions_parameters=partitions_parameters,
123127
)
124128
for k, v in partitions_values.items()
125129
]
@@ -138,6 +142,7 @@ def add_json_partitions(
138142
serde_parameters: Optional[Dict[str, str]] = None,
139143
boto3_session: Optional[boto3.Session] = None,
140144
columns_types: Optional[Dict[str, str]] = None,
145+
partitions_parameters: Optional[Dict[str, str]] = None,
141146
) -> None:
142147
r"""Add partitions (metadata) to a JSON Table in the AWS Glue Catalog.
143148
@@ -172,6 +177,8 @@ def add_json_partitions(
172177
Only required for Hive compability.
173178
Dictionary with keys as column names and values as data types (e.g. {'col0': 'bigint', 'col1': 'double'}).
174179
P.S. Only materialized columns please, not partition columns.
180+
partitions_parameters: Optional[Dict[str, str]]
181+
Dictionary with key-value pairs defining partition parameters.
175182
176183
Returns
177184
-------
@@ -202,6 +209,7 @@ def add_json_partitions(
202209
columns_types=columns_types,
203210
serde_library=serde_library,
204211
serde_parameters=serde_parameters,
212+
partitions_parameters=partitions_parameters,
205213
)
206214
for k, v in partitions_values.items()
207215
]
@@ -218,6 +226,7 @@ def add_parquet_partitions(
218226
compression: Optional[str] = None,
219227
boto3_session: Optional[boto3.Session] = None,
220228
columns_types: Optional[Dict[str, str]] = None,
229+
partitions_parameters: Optional[Dict[str, str]] = None,
221230
) -> None:
222231
"""Add partitions (metadata) to a Parquet Table in the AWS Glue Catalog.
223232
@@ -245,6 +254,8 @@ def add_parquet_partitions(
245254
Only required for Hive compability.
246255
Dictionary with keys as column names and values as data types (e.g. {'col0': 'bigint', 'col1': 'double'}).
247256
P.S. Only materialized columns please, not partition columns.
257+
partitions_parameters: Optional[Dict[str, str]]
258+
Dictionary with key-value pairs defining partition parameters.
248259
249260
Returns
250261
-------
@@ -274,6 +285,7 @@ def add_parquet_partitions(
274285
bucketing_info=bucketing_info,
275286
compression=compression,
276287
columns_types=columns_types,
288+
partitions_parameters=partitions_parameters,
277289
)
278290
for k, v in partitions_values.items()
279291
]

awswrangler/catalog/_definitions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ def _parquet_partition_definition(
7171
bucketing_info: Optional[Tuple[List[str], int]],
7272
compression: Optional[str],
7373
columns_types: Optional[Dict[str, str]],
74+
partitions_parameters: Optional[Dict[str, str]],
7475
) -> Dict[str, Any]:
7576
compressed: bool = compression is not None
7677
definition: Dict[str, Any] = {
@@ -88,6 +89,7 @@ def _parquet_partition_definition(
8889
"BucketColumns": [] if bucketing_info is None else bucketing_info[0],
8990
},
9091
"Values": values,
92+
"Parameters": {} if partitions_parameters is None else partitions_parameters,
9193
}
9294
if columns_types is not None:
9395
definition["StorageDescriptor"]["Columns"] = [
@@ -155,6 +157,7 @@ def _csv_partition_definition(
155157
serde_library: Optional[str],
156158
serde_parameters: Optional[Dict[str, str]],
157159
columns_types: Optional[Dict[str, str]],
160+
partitions_parameters: Optional[Dict[str, str]],
158161
) -> Dict[str, Any]:
159162
compressed: bool = compression is not None
160163
serde_info = {
@@ -175,6 +178,7 @@ def _csv_partition_definition(
175178
"BucketColumns": [] if bucketing_info is None else bucketing_info[0],
176179
},
177180
"Values": values,
181+
"Parameters": {} if partitions_parameters is None else partitions_parameters,
178182
}
179183
if columns_types is not None:
180184
definition["StorageDescriptor"]["Columns"] = [
@@ -232,6 +236,7 @@ def _json_partition_definition(
232236
serde_library: Optional[str],
233237
serde_parameters: Optional[Dict[str, str]],
234238
columns_types: Optional[Dict[str, str]],
239+
partitions_parameters: Optional[Dict[str, str]],
235240
) -> Dict[str, Any]:
236241
compressed: bool = compression is not None
237242
serde_info = {
@@ -250,6 +255,7 @@ def _json_partition_definition(
250255
"BucketColumns": [] if bucketing_info is None else bucketing_info[0],
251256
},
252257
"Values": values,
258+
"Parameters": {} if partitions_parameters is None else partitions_parameters,
253259
}
254260
if columns_types is not None:
255261
definition["StorageDescriptor"]["Columns"] = [

tests/test_catalog.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,7 @@ def test_catalog_json(path: str, glue_database: str, glue_table: str, account_id
378378
table=glue_table,
379379
partitions_values={f"{path}y=2020/m=1/": ["2020", "1"], f"{path}y=2021/m=2/": ["2021", "2"]},
380380
compression="snappy",
381+
partitions_parameters={"retention": "365"},
381382
)
382383
partitions_values = wr.catalog.get_partitions(database=glue_database, table=glue_table)
383384
assert len(partitions_values) == 2

0 commit comments

Comments
 (0)