Skip to content

Commit 6723d40

Browse files
committed
feat: define table properties with default values
Just copied everything from the TableProperties.java as of today
1 parent bc4526d commit 6723d40

File tree

3 files changed

+285
-0
lines changed

3 files changed

+285
-0
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ set(ICEBERG_SOURCES
4141
statistics_file.cc
4242
table.cc
4343
table_metadata.cc
44+
table_properties.cc
4445
table_scan.cc
4546
transform.cc
4647
transform_function.cc

src/iceberg/table_properties.cc

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/table_properties.h"
21+
22+
namespace iceberg {
23+
24+
const std::unordered_set<std::string>& TableProperties::reserved_properties() {
25+
static const std::unordered_set<std::string> kReservedProperties = {
26+
kFormatVersion.key(), kUuid.key(),
27+
kSnapshotCount.key(), kCurrentSnapshotId.key(),
28+
kCurrentSnapshotSummary.key(), kCurrentSnapshotTimestamp.key(),
29+
kCurrentSchema.key(), kDefaultPartitionSpec.key(),
30+
kDefaultSortOrder.key()};
31+
return kReservedProperties;
32+
}
33+
34+
std::unique_ptr<TableProperties> TableProperties::default_properties() {
35+
return std::make_unique<TableProperties>();
36+
}
37+
38+
std::unique_ptr<TableProperties> TableProperties::FromMap(
39+
const std::unordered_map<std::string, std::string>& options) {
40+
auto properties = std::make_unique<TableProperties>();
41+
for (const auto& [key, value] : options) {
42+
properties->configs_[key] = value;
43+
}
44+
return properties;
45+
}
46+
47+
} // namespace iceberg

src/iceberg/table_properties.h

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
#include <limits>
23+
#include <memory>
24+
#include <string>
25+
#include <unordered_map>
26+
#include <unordered_set>
27+
28+
#include "iceberg/iceberg_export.h"
29+
#include "iceberg/util/config.h"
30+
31+
namespace iceberg {
32+
33+
/// \brief Table properties for Iceberg tables.
34+
///
35+
/// This class provides configuration entries for various Iceberg table properties
36+
/// including format settings, commit behavior, file formats, compression settings,
37+
/// and other table-level configurations.
38+
class ICEBERG_EXPORT TableProperties : public ConfigBase<TableProperties> {
39+
public:
40+
template <typename T>
41+
using Entry = const ConfigBase<TableProperties>::Entry<T>;
42+
43+
#define STRING_ENTRY(name, key, value) inline static Entry<std::string> name{key, value};
44+
#define INT32_ENTRY(name, key, value) inline static Entry<int32_t> name{key, value};
45+
#define INT64_ENTRY(name, key, value) inline static Entry<int64_t> name{key, value};
46+
#define BOOL_ENTRY(name, key, value) inline static Entry<bool> name{key, value};
47+
#define DOUBLE_ENTRY(name, key, value) inline static Entry<double> name{key, value};
48+
49+
// Reserved table properties
50+
STRING_ENTRY(kFormatVersion, "format-version", "");
51+
STRING_ENTRY(kUuid, "uuid", "");
52+
STRING_ENTRY(kSnapshotCount, "snapshot-count", "");
53+
STRING_ENTRY(kCurrentSnapshotSummary, "current-snapshot-summary", "");
54+
STRING_ENTRY(kCurrentSnapshotId, "current-snapshot-id", "");
55+
STRING_ENTRY(kCurrentSnapshotTimestamp, "current-snapshot-timestamp-ms", "");
56+
STRING_ENTRY(kCurrentSchema, "current-schema", "");
57+
STRING_ENTRY(kDefaultPartitionSpec, "default-partition-spec", "");
58+
STRING_ENTRY(kDefaultSortOrder, "default-sort-order", "");
59+
60+
// Commit properties
61+
INT32_ENTRY(kCommitNumRetries, "commit.retry.num-retries", 4);
62+
INT32_ENTRY(kCommitMinRetryWaitMs, "commit.retry.min-wait-ms", 100);
63+
INT32_ENTRY(kCommitMaxRetryWaitMs, "commit.retry.max-wait-ms", 60 * 1000);
64+
INT32_ENTRY(kCommitTotalRetryTimeMs, "commit.retry.total-timeout-ms", 30 * 60 * 1000);
65+
INT32_ENTRY(kCommitNumStatusChecks, "commit.status-check.num-retries", 3);
66+
INT64_ENTRY(kCommitStatusChecksMinWaitMs, "commit.status-check.min-wait-ms", 1000);
67+
INT64_ENTRY(kCommitStatusChecksMaxWaitMs, "commit.status-check.max-wait-ms", 60 * 1000);
68+
INT64_ENTRY(kCommitStatusChecksTotalWaitMs, "commit.status-check.total-timeout-ms",
69+
30 * 60 * 1000);
70+
71+
// Manifest properties
72+
INT64_ENTRY(kManifestTargetSizeBytes, "commit.manifest.target-size-bytes",
73+
8 * 1024 * 1024);
74+
INT32_ENTRY(kManifestMinMergeCount, "commit.manifest.min-count-to-merge", 100);
75+
BOOL_ENTRY(kManifestMergeEnabled, "commit.manifest-merge.enabled", true);
76+
77+
// File format properties
78+
STRING_ENTRY(kDefaultFileFormat, "write.format.default", "parquet");
79+
STRING_ENTRY(kDeleteDefaultFileFormat, "write.delete.format.default", "parquet");
80+
81+
// Parquet properties
82+
INT32_ENTRY(kParquetRowGroupSizeBytes, "write.parquet.row-group-size-bytes",
83+
128 * 1024 * 1024);
84+
INT32_ENTRY(kDeleteParquetRowGroupSizeBytes,
85+
"write.delete.parquet.row-group-size-bytes", 128 * 1024 * 1024);
86+
INT32_ENTRY(kParquetPageSizeBytes, "write.parquet.page-size-bytes", 1024 * 1024);
87+
INT32_ENTRY(kDeleteParquetPageSizeBytes, "write.delete.parquet.page-size-bytes",
88+
1024 * 1024);
89+
INT32_ENTRY(kParquetPageRowLimit, "write.parquet.page-row-limit", 20000);
90+
INT32_ENTRY(kDeleteParquetPageRowLimit, "write.delete.parquet.page-row-limit", 20000);
91+
INT32_ENTRY(kParquetDictSizeBytes, "write.parquet.dict-size-bytes", 2 * 1024 * 1024);
92+
INT32_ENTRY(kDeleteParquetDictSizeBytes, "write.delete.parquet.dict-size-bytes",
93+
2 * 1024 * 1024);
94+
STRING_ENTRY(kParquetCompression, "write.parquet.compression-codec", "zstd");
95+
STRING_ENTRY(kDeleteParquetCompression, "write.delete.parquet.compression-codec",
96+
"zstd");
97+
STRING_ENTRY(kParquetCompressionLevel, "write.parquet.compression-level", "");
98+
STRING_ENTRY(kDeleteParquetCompressionLevel, "write.delete.parquet.compression-level",
99+
"");
100+
INT32_ENTRY(kParquetRowGroupCheckMinRecordCount,
101+
"write.parquet.row-group-check-min-record-count", 100);
102+
INT32_ENTRY(kDeleteParquetRowGroupCheckMinRecordCount,
103+
"write.delete.parquet.row-group-check-min-record-count", 100);
104+
INT32_ENTRY(kParquetRowGroupCheckMaxRecordCount,
105+
"write.parquet.row-group-check-max-record-count", 10000);
106+
INT32_ENTRY(kDeleteParquetRowGroupCheckMaxRecordCount,
107+
"write.delete.parquet.row-group-check-max-record-count", 10000);
108+
INT32_ENTRY(kParquetBloomFilterMaxBytes, "write.parquet.bloom-filter-max-bytes",
109+
1024 * 1024);
110+
DOUBLE_ENTRY(kParquetBloomFilterColumnFppDefault,
111+
"write.parquet.bloom-filter-fpp.column", 0.01);
112+
113+
// Avro properties
114+
STRING_ENTRY(kAvroCompression, "write.avro.compression-codec", "gzip");
115+
STRING_ENTRY(kDeleteAvroCompression, "write.delete.avro.compression-codec", "gzip");
116+
STRING_ENTRY(kAvroCompressionLevel, "write.avro.compression-level", "");
117+
STRING_ENTRY(kDeleteAvroCompressionLevel, "write.delete.avro.compression-level", "");
118+
119+
// ORC properties
120+
INT64_ENTRY(kOrcStripeSizeBytes, "write.orc.stripe-size-bytes", 64L * 1024 * 1024);
121+
STRING_ENTRY(kOrcBloomFilterColumns, "write.orc.bloom.filter.columns", "");
122+
DOUBLE_ENTRY(kOrcBloomFilterFpp, "write.orc.bloom.filter.fpp", 0.05);
123+
INT64_ENTRY(kDeleteOrcStripeSizeBytes, "write.delete.orc.stripe-size-bytes",
124+
64L * 1024 * 1024);
125+
INT64_ENTRY(kOrcBlockSizeBytes, "write.orc.block-size-bytes", 256L * 1024 * 1024);
126+
INT64_ENTRY(kDeleteOrcBlockSizeBytes, "write.delete.orc.block-size-bytes",
127+
256L * 1024 * 1024);
128+
INT32_ENTRY(kOrcWriteBatchSize, "write.orc.vectorized.batch-size", 1024);
129+
INT32_ENTRY(kDeleteOrcWriteBatchSize, "write.delete.orc.vectorized.batch-size", 1024);
130+
STRING_ENTRY(kOrcCompression, "write.orc.compression-codec", "zlib");
131+
STRING_ENTRY(kDeleteOrcCompression, "write.delete.orc.compression-codec", "zlib");
132+
STRING_ENTRY(kOrcCompressionStrategy, "write.orc.compression-strategy", "speed");
133+
STRING_ENTRY(kDeleteOrcCompressionStrategy, "write.delete.orc.compression-strategy",
134+
"speed");
135+
136+
// Read properties
137+
INT64_ENTRY(kSplitSize, "read.split.target-size", 128 * 1024 * 1024);
138+
INT64_ENTRY(kMetadataSplitSize, "read.split.metadata-target-size", 32 * 1024 * 1024);
139+
INT32_ENTRY(kSplitLookback, "read.split.planning-lookback", 10);
140+
INT64_ENTRY(kSplitOpenFileCost, "read.split.open-file-cost", 4 * 1024 * 1024);
141+
BOOL_ENTRY(kAdaptiveSplitSizeEnabled, "read.split.adaptive-size.enabled", true);
142+
BOOL_ENTRY(kParquetVectorizationEnabled, "read.parquet.vectorization.enabled", true);
143+
INT32_ENTRY(kParquetBatchSize, "read.parquet.vectorization.batch-size", 5000);
144+
BOOL_ENTRY(kOrcVectorizationEnabled, "read.orc.vectorization.enabled", false);
145+
INT32_ENTRY(kOrcBatchSize, "read.orc.vectorization.batch-size", 5000);
146+
STRING_ENTRY(kDataPlanningMode, "read.data-planning-mode", "auto");
147+
STRING_ENTRY(kDeletePlanningMode, "read.delete-planning-mode", "auto");
148+
149+
// Write properties
150+
BOOL_ENTRY(kObjectStoreEnabled, "write.object-storage.enabled", false);
151+
BOOL_ENTRY(kWriteObjectStorePartitionedPaths, "write.object-storage.partitioned-paths",
152+
true);
153+
STRING_ENTRY(kObjectStorePath, "write.object-storage.path", "");
154+
STRING_ENTRY(kWriteLocationProviderImpl, "write.location-provider.impl", "");
155+
STRING_ENTRY(kWriteFolderStorageLocation, "write.folder-storage.path", "");
156+
STRING_ENTRY(kWriteDataLocation, "write.data.path", "");
157+
STRING_ENTRY(kWriteMetadataLocation, "write.metadata.path", "");
158+
INT32_ENTRY(kWritePartitionSummaryLimit, "write.summary.partition-limit", 0);
159+
BOOL_ENTRY(kManifestListsEnabled, "write.manifest-lists.enabled", true);
160+
STRING_ENTRY(kMetadataCompression, "write.metadata.compression-codec", "none");
161+
INT32_ENTRY(kMetadataPreviousVersionsMax, "write.metadata.previous-versions-max", 100);
162+
BOOL_ENTRY(kMetadataDeleteAfterCommitEnabled,
163+
"write.metadata.delete-after-commit.enabled", false);
164+
INT32_ENTRY(kMetricsMaxInferredColumnDefaults,
165+
"write.metadata.metrics.max-inferred-column-defaults", 100);
166+
STRING_ENTRY(kDefaultWriteMetricsMode, "write.metadata.metrics.default",
167+
"truncate(16)");
168+
STRING_ENTRY(kDefaultNameMapping, "schema.name-mapping.default", "");
169+
STRING_ENTRY(kWriteAuditPublishEnabled, "write.wap.enabled", "false");
170+
INT64_ENTRY(kWriteTargetFileSizeBytes, "write.target-file-size-bytes",
171+
512 * 1024 * 1024);
172+
INT64_ENTRY(kDeleteTargetFileSizeBytes, "write.delete.target-file-size-bytes",
173+
64 * 1024 * 1024);
174+
BOOL_ENTRY(kSparkWritePartitionedFanoutEnabled, "write.spark.fanout.enabled", false);
175+
BOOL_ENTRY(kSparkWriteAcceptAnySchema, "write.spark.accept-any-schema", false);
176+
STRING_ENTRY(kSparkWriteAdvisoryPartitionSizeBytes,
177+
"write.spark.advisory-partition-size-bytes", "");
178+
BOOL_ENTRY(kSnapshotIdInheritanceEnabled,
179+
"compatibility.snapshot-id-inheritance.enabled", false);
180+
BOOL_ENTRY(kEngineHiveEnabled, "engine.hive.enabled", false);
181+
BOOL_ENTRY(kHiveLockEnabled, "engine.hive.lock-enabled", true);
182+
STRING_ENTRY(kWriteDistributionMode, "write.distribution-mode", "");
183+
184+
// Garbage collection properties
185+
BOOL_ENTRY(kGcEnabled, "gc.enabled", true);
186+
INT64_ENTRY(kMaxSnapshotAgeMs, "history.expire.max-snapshot-age-ms",
187+
5 * 24 * 60 * 60 * 1000L);
188+
INT32_ENTRY(kMinSnapshotsToKeep, "history.expire.min-snapshots-to-keep", 1);
189+
INT64_ENTRY(kMaxRefAgeMs, "history.expire.max-ref-age-ms",
190+
std::numeric_limits<int64_t>::max());
191+
192+
// Delete/Update/Merge properties
193+
STRING_ENTRY(kDeleteGranularity, "write.delete.granularity", "partition");
194+
STRING_ENTRY(kDeleteIsolationLevel, "write.delete.isolation-level", "serializable");
195+
STRING_ENTRY(kDeleteMode, "write.delete.mode", "copy-on-write");
196+
STRING_ENTRY(kDeleteDistributionMode, "write.delete.distribution-mode", "");
197+
STRING_ENTRY(kUpdateIsolationLevel, "write.update.isolation-level", "serializable");
198+
STRING_ENTRY(kUpdateMode, "write.update.mode", "copy-on-write");
199+
STRING_ENTRY(kUpdateDistributionMode, "write.update.distribution-mode", "");
200+
STRING_ENTRY(kMergeIsolationLevel, "write.merge.isolation-level", "serializable");
201+
STRING_ENTRY(kMergeMode, "write.merge.mode", "copy-on-write");
202+
STRING_ENTRY(kMergeDistributionMode, "write.merge.distribution-mode", "");
203+
BOOL_ENTRY(kUpsertEnabled, "write.upsert.enabled", false);
204+
205+
// Encryption properties
206+
STRING_ENTRY(kEncryptionTableKey, "encryption.key-id", "");
207+
INT32_ENTRY(kEncryptionDekLength, "encryption.data-key-length", 16);
208+
209+
#undef STRING_ENTRY
210+
#undef INT32_ENTRY
211+
#undef INT64_ENTRY
212+
#undef BOOL_ENTRY
213+
#undef DOUBLE_ENTRY
214+
215+
/// \brief Get the set of reserved table property keys.
216+
///
217+
/// Reserved table properties are only used to control behaviors when creating
218+
/// or updating a table. The values of these properties are not persisted as
219+
/// part of the table metadata.
220+
///
221+
/// \return The set of reserved property keys
222+
static const std::unordered_set<std::string>& reserved_properties();
223+
224+
/// \brief Create a default TableProperties instance.
225+
///
226+
/// \return A unique pointer to a TableProperties instance with default values
227+
static std::unique_ptr<TableProperties> default_properties();
228+
229+
/// \brief Create a TableProperties instance from a map of key-value pairs.
230+
///
231+
/// \param options The map containing property key-value pairs
232+
/// \return A unique pointer to a TableProperties instance
233+
static std::unique_ptr<TableProperties> FromMap(
234+
const std::unordered_map<std::string, std::string>& options);
235+
};
236+
237+
} // namespace iceberg

0 commit comments

Comments
 (0)