Skip to content

Commit e6eca22

Browse files
AWS Glue now supports sort and z-order strategy for managed automated compaction for Iceberg tables in addition to binpack.
1 parent f756441 commit e6eca22

15 files changed

+663
-0
lines changed

generator/ServiceModels/glue/glue-2017-03-31.api.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5720,12 +5720,26 @@
57205720
"min":1,
57215721
"pattern":"[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*"
57225722
},
5723+
"CompactionConfiguration":{
5724+
"type":"structure",
5725+
"members":{
5726+
"icebergConfiguration":{"shape":"IcebergCompactionConfiguration"}
5727+
}
5728+
},
57235729
"CompactionMetrics":{
57245730
"type":"structure",
57255731
"members":{
57265732
"IcebergMetrics":{"shape":"IcebergCompactionMetrics"}
57275733
}
57285734
},
5735+
"CompactionStrategy":{
5736+
"type":"string",
5737+
"enum":[
5738+
"binpack",
5739+
"sort",
5740+
"z-order"
5741+
]
5742+
},
57295743
"Comparator":{
57305744
"type":"string",
57315745
"enum":[
@@ -10623,6 +10637,12 @@
1062310637
"type":"string",
1062410638
"pattern":"^arn:aws(-(cn|us-gov|iso(-[bef])?))?:iam::[0-9]{12}:role/.+"
1062510639
},
10640+
"IcebergCompactionConfiguration":{
10641+
"type":"structure",
10642+
"members":{
10643+
"strategy":{"shape":"CompactionStrategy"}
10644+
}
10645+
},
1062610646
"IcebergCompactionMetrics":{
1062710647
"type":"structure",
1062810648
"members":{
@@ -15192,6 +15212,7 @@
1519215212
"roleArn":{"shape":"ArnString"},
1519315213
"enabled":{"shape":"NullableBoolean"},
1519415214
"vpcConfiguration":{"shape":"TableOptimizerVpcConfiguration"},
15215+
"compactionConfiguration":{"shape":"CompactionConfiguration"},
1519515216
"retentionConfiguration":{"shape":"RetentionConfiguration"},
1519615217
"orphanFileDeletionConfiguration":{"shape":"OrphanFileDeletionConfiguration"}
1519715218
}
@@ -15218,6 +15239,7 @@
1521815239
},
1521915240
"error":{"shape":"MessageString"},
1522015241
"compactionMetrics":{"shape":"CompactionMetrics"},
15242+
"compactionStrategy":{"shape":"CompactionStrategy"},
1522115243
"retentionMetrics":{"shape":"RetentionMetrics"},
1522215244
"orphanFileDeletionMetrics":{"shape":"OrphanFileDeletionMetrics"}
1522315245
}

generator/ServiceModels/glue/glue-2017-03-31.docs.json

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1686,12 +1686,25 @@
16861686
"UpdateSourceControlFromJobRequest$CommitId": "<p>A commit ID for a commit in the remote repository.</p>"
16871687
}
16881688
},
1689+
"CompactionConfiguration": {
1690+
"base": "<p>The configuration for a compaction optimizer. This configuration defines how data files in your table will be compacted to improve query performance and reduce storage costs.</p>",
1691+
"refs": {
1692+
"TableOptimizerConfiguration$compactionConfiguration": "<p>The configuration for a compaction optimizer. This configuration defines how data files in your table will be compacted to improve query performance and reduce storage costs.</p>"
1693+
}
1694+
},
16891695
"CompactionMetrics": {
16901696
"base": "<p>A structure that contains compaction metrics for the optimizer run.</p>",
16911697
"refs": {
16921698
"TableOptimizerRun$compactionMetrics": "<p>A <code>CompactionMetrics</code> object containing metrics for the optimizer run.</p>"
16931699
}
16941700
},
1701+
"CompactionStrategy": {
1702+
"base": null,
1703+
"refs": {
1704+
"IcebergCompactionConfiguration$strategy": "<p>The strategy to use for compaction. Valid values are:</p> <ul> <li> <p> <code>binpack</code>: Combines small files into larger files, typically targeting sizes over 100MB, while applying any pending deletes. This is the recommended compaction strategy for most use cases. </p> </li> <li> <p> <code>sort</code>: Organizes data based on specified columns which are sorted hierarchically during compaction, improving query performance for filtered operations. This strategy is recommended when your queries frequently filter on specific columns. To use this strategy, you must first define a sort order in your Iceberg table properties using the <code>sort_order</code> table property.</p> </li> <li> <p> <code>z-order</code>: Optimizes data organization by blending multiple attributes into a single scalar value that can be used for sorting, allowing efficient querying across multiple dimensions. This strategy is recommended when you need to query data across multiple dimensions simultaneously. To use this strategy, you must first define a sort order in your Iceberg table properties using the <code>sort_order</code> table property. </p> </li> </ul> <p>If an input is not provided, the default value 'binpack' will be used.</p>",
1705+
"TableOptimizerRun$compactionStrategy": "<p>The strategy used for the compaction run. Indicates which algorithm was applied to determine how files were selected and combined during the compaction process. Valid values are:</p> <ul> <li> <p> <code>binpack</code>: Combines small files into larger files, typically targeting sizes over 100MB, while applying any pending deletes. This is the recommended compaction strategy for most use cases. </p> </li> <li> <p> <code>sort</code>: Organizes data based on specified columns which are sorted hierarchically during compaction, improving query performance for filtered operations. This strategy is recommended when your queries frequently filter on specific columns. To use this strategy, you must first define a sort order in your Iceberg table properties using the <code>sort_order</code> table property.</p> </li> <li> <p> <code>z-order</code>: Optimizes data organization by blending multiple attributes into a single scalar value that can be used for sorting, allowing efficient querying across multiple dimensions. This strategy is recommended when you need to query data across multiple dimensions simultaneously. To use this strategy, you must first define a sort order in your Iceberg table properties using the <code>sort_order</code> table property. </p> </li> </ul>"
1706+
}
1707+
},
16951708
"Comparator": {
16961709
"base": null,
16971710
"refs": {
@@ -5267,6 +5280,12 @@
52675280
"EncryptionAtRest$CatalogEncryptionServiceRole": "<p>The role that Glue assumes to encrypt and decrypt the Data Catalog objects on the caller's behalf.</p>"
52685281
}
52695282
},
5283+
"IcebergCompactionConfiguration": {
5284+
"base": "<p>The configuration for an Iceberg compaction optimizer. This configuration defines parameters for optimizing the layout of data files in Iceberg tables.</p>",
5285+
"refs": {
5286+
"CompactionConfiguration$icebergConfiguration": "<p>The configuration for an Iceberg compaction optimizer.</p>"
5287+
}
5288+
},
52705289
"IcebergCompactionMetrics": {
52715290
"base": "<p>Compaction metrics for Iceberg for the optimizer run.</p>",
52725291
"refs": {

generator/ServiceModels/glue/glue-2017-03-31.normal.json

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7349,6 +7349,16 @@
73497349
"min":1,
73507350
"pattern":"[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\t]*"
73517351
},
7352+
"CompactionConfiguration":{
7353+
"type":"structure",
7354+
"members":{
7355+
"icebergConfiguration":{
7356+
"shape":"IcebergCompactionConfiguration",
7357+
"documentation":"<p>The configuration for an Iceberg compaction optimizer.</p>"
7358+
}
7359+
},
7360+
"documentation":"<p>The configuration for a compaction optimizer. This configuration defines how data files in your table will be compacted to improve query performance and reduce storage costs.</p>"
7361+
},
73527362
"CompactionMetrics":{
73537363
"type":"structure",
73547364
"members":{
@@ -7359,6 +7369,14 @@
73597369
},
73607370
"documentation":"<p>A structure that contains compaction metrics for the optimizer run.</p>"
73617371
},
7372+
"CompactionStrategy":{
7373+
"type":"string",
7374+
"enum":[
7375+
"binpack",
7376+
"sort",
7377+
"z-order"
7378+
]
7379+
},
73627380
"Comparator":{
73637381
"type":"string",
73647382
"enum":[
@@ -16545,6 +16563,16 @@
1654516563
"type":"string",
1654616564
"pattern":"^arn:aws(-(cn|us-gov|iso(-[bef])?))?:iam::[0-9]{12}:role/.+"
1654716565
},
16566+
"IcebergCompactionConfiguration":{
16567+
"type":"structure",
16568+
"members":{
16569+
"strategy":{
16570+
"shape":"CompactionStrategy",
16571+
"documentation":"<p>The strategy to use for compaction. Valid values are:</p> <ul> <li> <p> <code>binpack</code>: Combines small files into larger files, typically targeting sizes over 100MB, while applying any pending deletes. This is the recommended compaction strategy for most use cases. </p> </li> <li> <p> <code>sort</code>: Organizes data based on specified columns which are sorted hierarchically during compaction, improving query performance for filtered operations. This strategy is recommended when your queries frequently filter on specific columns. To use this strategy, you must first define a sort order in your Iceberg table properties using the <code>sort_order</code> table property.</p> </li> <li> <p> <code>z-order</code>: Optimizes data organization by blending multiple attributes into a single scalar value that can be used for sorting, allowing efficient querying across multiple dimensions. This strategy is recommended when you need to query data across multiple dimensions simultaneously. To use this strategy, you must first define a sort order in your Iceberg table properties using the <code>sort_order</code> table property. </p> </li> </ul> <p>If an input is not provided, the default value 'binpack' will be used.</p>"
16572+
}
16573+
},
16574+
"documentation":"<p>The configuration for an Iceberg compaction optimizer. This configuration defines parameters for optimizing the layout of data files in Iceberg tables.</p>"
16575+
},
1654816576
"IcebergCompactionMetrics":{
1654916577
"type":"structure",
1655016578
"members":{
@@ -24936,6 +24964,10 @@
2493624964
"shape":"TableOptimizerVpcConfiguration",
2493724965
"documentation":"<p>A <code>TableOptimizerVpcConfiguration</code> object representing the VPC configuration for a table optimizer.</p> <p>This configuration is necessary to perform optimization on tables that are in a customer VPC.</p>"
2493824966
},
24967+
"compactionConfiguration":{
24968+
"shape":"CompactionConfiguration",
24969+
"documentation":"<p>The configuration for a compaction optimizer. This configuration defines how data files in your table will be compacted to improve query performance and reduce storage costs.</p>"
24970+
},
2493924971
"retentionConfiguration":{
2494024972
"shape":"RetentionConfiguration",
2494124973
"documentation":"<p>The configuration for a snapshot retention optimizer.</p>"
@@ -24985,6 +25017,10 @@
2498525017
"shape":"CompactionMetrics",
2498625018
"documentation":"<p>A <code>CompactionMetrics</code> object containing metrics for the optimizer run.</p>"
2498725019
},
25020+
"compactionStrategy":{
25021+
"shape":"CompactionStrategy",
25022+
"documentation":"<p>The strategy used for the compaction run. Indicates which algorithm was applied to determine how files were selected and combined during the compaction process. Valid values are:</p> <ul> <li> <p> <code>binpack</code>: Combines small files into larger files, typically targeting sizes over 100MB, while applying any pending deletes. This is the recommended compaction strategy for most use cases. </p> </li> <li> <p> <code>sort</code>: Organizes data based on specified columns which are sorted hierarchically during compaction, improving query performance for filtered operations. This strategy is recommended when your queries frequently filter on specific columns. To use this strategy, you must first define a sort order in your Iceberg table properties using the <code>sort_order</code> table property.</p> </li> <li> <p> <code>z-order</code>: Optimizes data organization by blending multiple attributes into a single scalar value that can be used for sorting, allowing efficient querying across multiple dimensions. This strategy is recommended when you need to query data across multiple dimensions simultaneously. To use this strategy, you must first define a sort order in your Iceberg table properties using the <code>sort_order</code> table property. </p> </li> </ul>"
25023+
},
2498825024
"retentionMetrics":{
2498925025
"shape":"RetentionMetrics",
2499025026
"documentation":"<p>A <code>RetentionMetrics</code> object containing metrics for the optimizer run.</p>"
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License").
5+
* You may not use this file except in compliance with the License.
6+
* A copy of the License is located at
7+
*
8+
* http://aws.amazon.com/apache2.0
9+
*
10+
* or in the "license" file accompanying this file. This file is distributed
11+
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12+
* express or implied. See the License for the specific language governing
13+
* permissions and limitations under the License.
14+
*/
15+
16+
/*
17+
* Do not modify this file. This file is generated from the glue-2017-03-31.normal.json service model.
18+
*/
19+
using System;
20+
using System.Collections.Generic;
21+
using System.Xml.Serialization;
22+
using System.Text;
23+
using System.IO;
24+
using System.Net;
25+
26+
using Amazon.Runtime;
27+
using Amazon.Runtime.Internal;
28+
29+
#pragma warning disable CS0612,CS0618,CS1570
30+
namespace Amazon.Glue.Model
31+
{
32+
/// <summary>
33+
/// The configuration for a compaction optimizer. This configuration defines how data
34+
/// files in your table will be compacted to improve query performance and reduce storage
35+
/// costs.
36+
/// </summary>
37+
public partial class CompactionConfiguration
38+
{
39+
private IcebergCompactionConfiguration _icebergConfiguration;
40+
41+
/// <summary>
42+
/// Gets and sets the property IcebergConfiguration.
43+
/// <para>
44+
/// The configuration for an Iceberg compaction optimizer.
45+
/// </para>
46+
/// </summary>
47+
public IcebergCompactionConfiguration IcebergConfiguration
48+
{
49+
get { return this._icebergConfiguration; }
50+
set { this._icebergConfiguration = value; }
51+
}
52+
53+
// Check to see if IcebergConfiguration property is set
54+
internal bool IsSetIcebergConfiguration()
55+
{
56+
return this._icebergConfiguration != null;
57+
}
58+
59+
}
60+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License").
5+
* You may not use this file except in compliance with the License.
6+
* A copy of the License is located at
7+
*
8+
* http://aws.amazon.com/apache2.0
9+
*
10+
* or in the "license" file accompanying this file. This file is distributed
11+
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12+
* express or implied. See the License for the specific language governing
13+
* permissions and limitations under the License.
14+
*/
15+
16+
/*
17+
* Do not modify this file. This file is generated from the glue-2017-03-31.normal.json service model.
18+
*/
19+
using System;
20+
using System.Collections.Generic;
21+
using System.Xml.Serialization;
22+
using System.Text;
23+
using System.IO;
24+
using System.Net;
25+
26+
using Amazon.Runtime;
27+
using Amazon.Runtime.Internal;
28+
29+
#pragma warning disable CS0612,CS0618,CS1570
30+
namespace Amazon.Glue.Model
31+
{
32+
/// <summary>
33+
/// The configuration for an Iceberg compaction optimizer. This configuration defines
34+
/// parameters for optimizing the layout of data files in Iceberg tables.
35+
/// </summary>
36+
public partial class IcebergCompactionConfiguration
37+
{
38+
private CompactionStrategy _strategy;
39+
40+
/// <summary>
41+
/// Gets and sets the property Strategy.
42+
/// <para>
43+
/// The strategy to use for compaction. Valid values are:
44+
/// </para>
45+
/// <ul> <li>
46+
/// <para>
47+
/// <c>binpack</c>: Combines small files into larger files, typically targeting sizes
48+
/// over 100MB, while applying any pending deletes. This is the recommended compaction
49+
/// strategy for most use cases.
50+
/// </para>
51+
/// </li> <li>
52+
/// <para>
53+
/// <c>sort</c>: Organizes data based on specified columns which are sorted hierarchically
54+
/// during compaction, improving query performance for filtered operations. This strategy
55+
/// is recommended when your queries frequently filter on specific columns. To use this
56+
/// strategy, you must first define a sort order in your Iceberg table properties using
57+
/// the <c>sort_order</c> table property.
58+
/// </para>
59+
/// </li> <li>
60+
/// <para>
61+
/// <c>z-order</c>: Optimizes data organization by blending multiple attributes into
62+
/// a single scalar value that can be used for sorting, allowing efficient querying across
63+
/// multiple dimensions. This strategy is recommended when you need to query data across
64+
/// multiple dimensions simultaneously. To use this strategy, you must first define a
65+
/// sort order in your Iceberg table properties using the <c>sort_order</c> table property.
66+
///
67+
/// </para>
68+
/// </li> </ul>
69+
/// <para>
70+
/// If an input is not provided, the default value 'binpack' will be used.
71+
/// </para>
72+
/// </summary>
73+
public CompactionStrategy Strategy
74+
{
75+
get { return this._strategy; }
76+
set { this._strategy = value; }
77+
}
78+
79+
// Check to see if Strategy property is set
80+
internal bool IsSetStrategy()
81+
{
82+
return this._strategy != null;
83+
}
84+
85+
}
86+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License").
5+
* You may not use this file except in compliance with the License.
6+
* A copy of the License is located at
7+
*
8+
* http://aws.amazon.com/apache2.0
9+
*
10+
* or in the "license" file accompanying this file. This file is distributed
11+
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12+
* express or implied. See the License for the specific language governing
13+
* permissions and limitations under the License.
14+
*/
15+
16+
/*
17+
* Do not modify this file. This file is generated from the glue-2017-03-31.normal.json service model.
18+
*/
19+
using System;
20+
using System.Collections.Generic;
21+
using System.Globalization;
22+
using System.IO;
23+
using System.Text;
24+
using System.Xml.Serialization;
25+
26+
using Amazon.Glue.Model;
27+
using Amazon.Runtime;
28+
using Amazon.Runtime.Internal;
29+
using Amazon.Runtime.Internal.Transform;
30+
using Amazon.Runtime.Internal.Util;
31+
#pragma warning disable CS0612,CS0618
32+
namespace Amazon.Glue.Model.Internal.MarshallTransformations
33+
{
34+
/// <summary>
35+
/// CompactionConfiguration Marshaller
36+
/// </summary>
37+
public class CompactionConfigurationMarshaller : IRequestMarshaller<CompactionConfiguration, JsonMarshallerContext>
38+
{
39+
/// <summary>
40+
/// Unmarshaller the response from the service to the response class.
41+
/// </summary>
42+
/// <param name="requestObject"></param>
43+
/// <param name="context"></param>
44+
/// <returns></returns>
45+
public void Marshall(CompactionConfiguration requestObject, JsonMarshallerContext context)
46+
{
47+
if(requestObject == null)
48+
return;
49+
if(requestObject.IsSetIcebergConfiguration())
50+
{
51+
context.Writer.WritePropertyName("icebergConfiguration");
52+
context.Writer.WriteStartObject();
53+
54+
var marshaller = IcebergCompactionConfigurationMarshaller.Instance;
55+
marshaller.Marshall(requestObject.IcebergConfiguration, context);
56+
57+
context.Writer.WriteEndObject();
58+
}
59+
60+
}
61+
62+
/// <summary>
63+
/// Singleton Marshaller.
64+
/// </summary>
65+
public readonly static CompactionConfigurationMarshaller Instance = new CompactionConfigurationMarshaller();
66+
67+
}
68+
}

0 commit comments

Comments
 (0)