|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one |
| 3 | + * or more contributor license agreements. See the NOTICE file |
| 4 | + * distributed with this work for additional information |
| 5 | + * regarding copyright ownership. The ASF licenses this file |
| 6 | + * to you under the Apache License, Version 2.0 (the |
| 7 | + * "License"); you may not use this file except in compliance |
| 8 | + * with the License. You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, |
| 13 | + * software distributed under the License is distributed on an |
| 14 | + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | + * KIND, either express or implied. See the License for the |
| 16 | + * specific language governing permissions and limitations |
| 17 | + * under the License. |
| 18 | + */ |
| 19 | + |
| 20 | +#pragma once |
| 21 | + |
| 22 | +#include <limits> |
| 23 | +#include <memory> |
| 24 | +#include <string> |
| 25 | +#include <unordered_map> |
| 26 | +#include <unordered_set> |
| 27 | + |
| 28 | +#include "iceberg/iceberg_export.h" |
| 29 | +#include "iceberg/util/config.h" |
| 30 | + |
| 31 | +namespace iceberg { |
| 32 | + |
| 33 | +/// \brief Table properties for Iceberg tables. |
| 34 | +/// |
| 35 | +/// This class provides configuration entries for various Iceberg table properties |
| 36 | +/// including format settings, commit behavior, file formats, compression settings, |
| 37 | +/// and other table-level configurations. |
| 38 | +class ICEBERG_EXPORT TableProperties : public ConfigBase<TableProperties> { |
| 39 | + public: |
| 40 | + template <typename T> |
| 41 | + using Entry = const ConfigBase<TableProperties>::Entry<T>; |
| 42 | + |
| 43 | +#define STRING_ENTRY(name, key, value) inline static Entry<std::string> name{key, value}; |
| 44 | +#define INT32_ENTRY(name, key, value) inline static Entry<int32_t> name{key, value}; |
| 45 | +#define INT64_ENTRY(name, key, value) inline static Entry<int64_t> name{key, value}; |
| 46 | +#define BOOL_ENTRY(name, key, value) inline static Entry<bool> name{key, value}; |
| 47 | +#define DOUBLE_ENTRY(name, key, value) inline static Entry<double> name{key, value}; |
| 48 | + |
| 49 | + // Reserved table properties |
| 50 | + STRING_ENTRY(kFormatVersion, "format-version", ""); |
| 51 | + STRING_ENTRY(kUuid, "uuid", ""); |
| 52 | + STRING_ENTRY(kSnapshotCount, "snapshot-count", ""); |
| 53 | + STRING_ENTRY(kCurrentSnapshotSummary, "current-snapshot-summary", ""); |
| 54 | + STRING_ENTRY(kCurrentSnapshotId, "current-snapshot-id", ""); |
| 55 | + STRING_ENTRY(kCurrentSnapshotTimestamp, "current-snapshot-timestamp-ms", ""); |
| 56 | + STRING_ENTRY(kCurrentSchema, "current-schema", ""); |
| 57 | + STRING_ENTRY(kDefaultPartitionSpec, "default-partition-spec", ""); |
| 58 | + STRING_ENTRY(kDefaultSortOrder, "default-sort-order", ""); |
| 59 | + |
| 60 | + // Commit properties |
| 61 | + INT32_ENTRY(kCommitNumRetries, "commit.retry.num-retries", 4); |
| 62 | + INT32_ENTRY(kCommitMinRetryWaitMs, "commit.retry.min-wait-ms", 100); |
| 63 | + INT32_ENTRY(kCommitMaxRetryWaitMs, "commit.retry.max-wait-ms", 60 * 1000); |
| 64 | + INT32_ENTRY(kCommitTotalRetryTimeMs, "commit.retry.total-timeout-ms", 30 * 60 * 1000); |
| 65 | + INT32_ENTRY(kCommitNumStatusChecks, "commit.status-check.num-retries", 3); |
| 66 | + INT64_ENTRY(kCommitStatusChecksMinWaitMs, "commit.status-check.min-wait-ms", 1000); |
| 67 | + INT64_ENTRY(kCommitStatusChecksMaxWaitMs, "commit.status-check.max-wait-ms", 60 * 1000); |
| 68 | + INT64_ENTRY(kCommitStatusChecksTotalWaitMs, "commit.status-check.total-timeout-ms", |
| 69 | + 30 * 60 * 1000); |
| 70 | + |
| 71 | + // Manifest properties |
| 72 | + INT64_ENTRY(kManifestTargetSizeBytes, "commit.manifest.target-size-bytes", |
| 73 | + 8 * 1024 * 1024); |
| 74 | + INT32_ENTRY(kManifestMinMergeCount, "commit.manifest.min-count-to-merge", 100); |
| 75 | + BOOL_ENTRY(kManifestMergeEnabled, "commit.manifest-merge.enabled", true); |
| 76 | + |
| 77 | + // File format properties |
| 78 | + STRING_ENTRY(kDefaultFileFormat, "write.format.default", "parquet"); |
| 79 | + STRING_ENTRY(kDeleteDefaultFileFormat, "write.delete.format.default", "parquet"); |
| 80 | + |
| 81 | + // Parquet properties |
| 82 | + INT32_ENTRY(kParquetRowGroupSizeBytes, "write.parquet.row-group-size-bytes", |
| 83 | + 128 * 1024 * 1024); |
| 84 | + INT32_ENTRY(kDeleteParquetRowGroupSizeBytes, |
| 85 | + "write.delete.parquet.row-group-size-bytes", 128 * 1024 * 1024); |
| 86 | + INT32_ENTRY(kParquetPageSizeBytes, "write.parquet.page-size-bytes", 1024 * 1024); |
| 87 | + INT32_ENTRY(kDeleteParquetPageSizeBytes, "write.delete.parquet.page-size-bytes", |
| 88 | + 1024 * 1024); |
| 89 | + INT32_ENTRY(kParquetPageRowLimit, "write.parquet.page-row-limit", 20000); |
| 90 | + INT32_ENTRY(kDeleteParquetPageRowLimit, "write.delete.parquet.page-row-limit", 20000); |
| 91 | + INT32_ENTRY(kParquetDictSizeBytes, "write.parquet.dict-size-bytes", 2 * 1024 * 1024); |
| 92 | + INT32_ENTRY(kDeleteParquetDictSizeBytes, "write.delete.parquet.dict-size-bytes", |
| 93 | + 2 * 1024 * 1024); |
| 94 | + STRING_ENTRY(kParquetCompression, "write.parquet.compression-codec", "zstd"); |
| 95 | + STRING_ENTRY(kDeleteParquetCompression, "write.delete.parquet.compression-codec", |
| 96 | + "zstd"); |
| 97 | + STRING_ENTRY(kParquetCompressionLevel, "write.parquet.compression-level", ""); |
| 98 | + STRING_ENTRY(kDeleteParquetCompressionLevel, "write.delete.parquet.compression-level", |
| 99 | + ""); |
| 100 | + INT32_ENTRY(kParquetRowGroupCheckMinRecordCount, |
| 101 | + "write.parquet.row-group-check-min-record-count", 100); |
| 102 | + INT32_ENTRY(kDeleteParquetRowGroupCheckMinRecordCount, |
| 103 | + "write.delete.parquet.row-group-check-min-record-count", 100); |
| 104 | + INT32_ENTRY(kParquetRowGroupCheckMaxRecordCount, |
| 105 | + "write.parquet.row-group-check-max-record-count", 10000); |
| 106 | + INT32_ENTRY(kDeleteParquetRowGroupCheckMaxRecordCount, |
| 107 | + "write.delete.parquet.row-group-check-max-record-count", 10000); |
| 108 | + INT32_ENTRY(kParquetBloomFilterMaxBytes, "write.parquet.bloom-filter-max-bytes", |
| 109 | + 1024 * 1024); |
| 110 | + DOUBLE_ENTRY(kParquetBloomFilterColumnFppDefault, |
| 111 | + "write.parquet.bloom-filter-fpp.column", 0.01); |
| 112 | + |
| 113 | + // Avro properties |
| 114 | + STRING_ENTRY(kAvroCompression, "write.avro.compression-codec", "gzip"); |
| 115 | + STRING_ENTRY(kDeleteAvroCompression, "write.delete.avro.compression-codec", "gzip"); |
| 116 | + STRING_ENTRY(kAvroCompressionLevel, "write.avro.compression-level", ""); |
| 117 | + STRING_ENTRY(kDeleteAvroCompressionLevel, "write.delete.avro.compression-level", ""); |
| 118 | + |
| 119 | + // ORC properties |
| 120 | + INT64_ENTRY(kOrcStripeSizeBytes, "write.orc.stripe-size-bytes", 64L * 1024 * 1024); |
| 121 | + STRING_ENTRY(kOrcBloomFilterColumns, "write.orc.bloom.filter.columns", ""); |
| 122 | + DOUBLE_ENTRY(kOrcBloomFilterFpp, "write.orc.bloom.filter.fpp", 0.05); |
| 123 | + INT64_ENTRY(kDeleteOrcStripeSizeBytes, "write.delete.orc.stripe-size-bytes", |
| 124 | + 64L * 1024 * 1024); |
| 125 | + INT64_ENTRY(kOrcBlockSizeBytes, "write.orc.block-size-bytes", 256L * 1024 * 1024); |
| 126 | + INT64_ENTRY(kDeleteOrcBlockSizeBytes, "write.delete.orc.block-size-bytes", |
| 127 | + 256L * 1024 * 1024); |
| 128 | + INT32_ENTRY(kOrcWriteBatchSize, "write.orc.vectorized.batch-size", 1024); |
| 129 | + INT32_ENTRY(kDeleteOrcWriteBatchSize, "write.delete.orc.vectorized.batch-size", 1024); |
| 130 | + STRING_ENTRY(kOrcCompression, "write.orc.compression-codec", "zlib"); |
| 131 | + STRING_ENTRY(kDeleteOrcCompression, "write.delete.orc.compression-codec", "zlib"); |
| 132 | + STRING_ENTRY(kOrcCompressionStrategy, "write.orc.compression-strategy", "speed"); |
| 133 | + STRING_ENTRY(kDeleteOrcCompressionStrategy, "write.delete.orc.compression-strategy", |
| 134 | + "speed"); |
| 135 | + |
| 136 | + // Read properties |
| 137 | + INT64_ENTRY(kSplitSize, "read.split.target-size", 128 * 1024 * 1024); |
| 138 | + INT64_ENTRY(kMetadataSplitSize, "read.split.metadata-target-size", 32 * 1024 * 1024); |
| 139 | + INT32_ENTRY(kSplitLookback, "read.split.planning-lookback", 10); |
| 140 | + INT64_ENTRY(kSplitOpenFileCost, "read.split.open-file-cost", 4 * 1024 * 1024); |
| 141 | + BOOL_ENTRY(kAdaptiveSplitSizeEnabled, "read.split.adaptive-size.enabled", true); |
| 142 | + BOOL_ENTRY(kParquetVectorizationEnabled, "read.parquet.vectorization.enabled", true); |
| 143 | + INT32_ENTRY(kParquetBatchSize, "read.parquet.vectorization.batch-size", 5000); |
| 144 | + BOOL_ENTRY(kOrcVectorizationEnabled, "read.orc.vectorization.enabled", false); |
| 145 | + INT32_ENTRY(kOrcBatchSize, "read.orc.vectorization.batch-size", 5000); |
| 146 | + STRING_ENTRY(kDataPlanningMode, "read.data-planning-mode", "auto"); |
| 147 | + STRING_ENTRY(kDeletePlanningMode, "read.delete-planning-mode", "auto"); |
| 148 | + |
| 149 | + // Write properties |
| 150 | + BOOL_ENTRY(kObjectStoreEnabled, "write.object-storage.enabled", false); |
| 151 | + BOOL_ENTRY(kWriteObjectStorePartitionedPaths, "write.object-storage.partitioned-paths", |
| 152 | + true); |
| 153 | + STRING_ENTRY(kObjectStorePath, "write.object-storage.path", ""); |
| 154 | + STRING_ENTRY(kWriteLocationProviderImpl, "write.location-provider.impl", ""); |
| 155 | + STRING_ENTRY(kWriteFolderStorageLocation, "write.folder-storage.path", ""); |
| 156 | + STRING_ENTRY(kWriteDataLocation, "write.data.path", ""); |
| 157 | + STRING_ENTRY(kWriteMetadataLocation, "write.metadata.path", ""); |
| 158 | + INT32_ENTRY(kWritePartitionSummaryLimit, "write.summary.partition-limit", 0); |
| 159 | + BOOL_ENTRY(kManifestListsEnabled, "write.manifest-lists.enabled", true); |
| 160 | + STRING_ENTRY(kMetadataCompression, "write.metadata.compression-codec", "none"); |
| 161 | + INT32_ENTRY(kMetadataPreviousVersionsMax, "write.metadata.previous-versions-max", 100); |
| 162 | + BOOL_ENTRY(kMetadataDeleteAfterCommitEnabled, |
| 163 | + "write.metadata.delete-after-commit.enabled", false); |
| 164 | + INT32_ENTRY(kMetricsMaxInferredColumnDefaults, |
| 165 | + "write.metadata.metrics.max-inferred-column-defaults", 100); |
| 166 | + STRING_ENTRY(kDefaultWriteMetricsMode, "write.metadata.metrics.default", |
| 167 | + "truncate(16)"); |
| 168 | + STRING_ENTRY(kDefaultNameMapping, "schema.name-mapping.default", ""); |
| 169 | + STRING_ENTRY(kWriteAuditPublishEnabled, "write.wap.enabled", "false"); |
| 170 | + INT64_ENTRY(kWriteTargetFileSizeBytes, "write.target-file-size-bytes", |
| 171 | + 512 * 1024 * 1024); |
| 172 | + INT64_ENTRY(kDeleteTargetFileSizeBytes, "write.delete.target-file-size-bytes", |
| 173 | + 64 * 1024 * 1024); |
| 174 | + BOOL_ENTRY(kSparkWritePartitionedFanoutEnabled, "write.spark.fanout.enabled", false); |
| 175 | + BOOL_ENTRY(kSparkWriteAcceptAnySchema, "write.spark.accept-any-schema", false); |
| 176 | + STRING_ENTRY(kSparkWriteAdvisoryPartitionSizeBytes, |
| 177 | + "write.spark.advisory-partition-size-bytes", ""); |
| 178 | + BOOL_ENTRY(kSnapshotIdInheritanceEnabled, |
| 179 | + "compatibility.snapshot-id-inheritance.enabled", false); |
| 180 | + BOOL_ENTRY(kEngineHiveEnabled, "engine.hive.enabled", false); |
| 181 | + BOOL_ENTRY(kHiveLockEnabled, "engine.hive.lock-enabled", true); |
| 182 | + STRING_ENTRY(kWriteDistributionMode, "write.distribution-mode", ""); |
| 183 | + |
| 184 | + // Garbage collection properties |
| 185 | + BOOL_ENTRY(kGcEnabled, "gc.enabled", true); |
| 186 | + INT64_ENTRY(kMaxSnapshotAgeMs, "history.expire.max-snapshot-age-ms", |
| 187 | + 5 * 24 * 60 * 60 * 1000L); |
| 188 | + INT32_ENTRY(kMinSnapshotsToKeep, "history.expire.min-snapshots-to-keep", 1); |
| 189 | + INT64_ENTRY(kMaxRefAgeMs, "history.expire.max-ref-age-ms", |
| 190 | + std::numeric_limits<int64_t>::max()); |
| 191 | + |
| 192 | + // Delete/Update/Merge properties |
| 193 | + STRING_ENTRY(kDeleteGranularity, "write.delete.granularity", "partition"); |
| 194 | + STRING_ENTRY(kDeleteIsolationLevel, "write.delete.isolation-level", "serializable"); |
| 195 | + STRING_ENTRY(kDeleteMode, "write.delete.mode", "copy-on-write"); |
| 196 | + STRING_ENTRY(kDeleteDistributionMode, "write.delete.distribution-mode", ""); |
| 197 | + STRING_ENTRY(kUpdateIsolationLevel, "write.update.isolation-level", "serializable"); |
| 198 | + STRING_ENTRY(kUpdateMode, "write.update.mode", "copy-on-write"); |
| 199 | + STRING_ENTRY(kUpdateDistributionMode, "write.update.distribution-mode", ""); |
| 200 | + STRING_ENTRY(kMergeIsolationLevel, "write.merge.isolation-level", "serializable"); |
| 201 | + STRING_ENTRY(kMergeMode, "write.merge.mode", "copy-on-write"); |
| 202 | + STRING_ENTRY(kMergeDistributionMode, "write.merge.distribution-mode", ""); |
| 203 | + BOOL_ENTRY(kUpsertEnabled, "write.upsert.enabled", false); |
| 204 | + |
| 205 | + // Encryption properties |
| 206 | + STRING_ENTRY(kEncryptionTableKey, "encryption.key-id", ""); |
| 207 | + INT32_ENTRY(kEncryptionDekLength, "encryption.data-key-length", 16); |
| 208 | + |
| 209 | + /// \brief Get the set of reserved table property keys. |
| 210 | + /// |
| 211 | + /// Reserved table properties are only used to control behaviors when creating |
| 212 | + /// or updating a table. The values of these properties are not persisted as |
| 213 | + /// part of the table metadata. |
| 214 | + /// |
| 215 | + /// \return The set of reserved property keys |
| 216 | + static const std::unordered_set<std::string>& reserved_properties(); |
| 217 | + |
| 218 | + /// \brief Create a default TableProperties instance. |
| 219 | + /// |
| 220 | + /// \return A unique pointer to a TableProperties instance with default values |
| 221 | + static std::unique_ptr<TableProperties> default_properties(); |
| 222 | + |
| 223 | + /// \brief Create a TableProperties instance from a map of key-value pairs. |
| 224 | + /// |
| 225 | + /// \param options The map containing property key-value pairs |
| 226 | + /// \return A unique pointer to a TableProperties instance |
| 227 | + static std::unique_ptr<TableProperties> FromMap( |
| 228 | + const std::unordered_map<std::string, std::string>& options); |
| 229 | +}; |
| 230 | + |
| 231 | +} // namespace iceberg |
0 commit comments