Skip to content

Commit f1156c5

Browse files
committed
feat: update properties api
1 parent dbc9c1c commit f1156c5

19 files changed

+782
-9
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,12 @@ set(ICEBERG_SOURCES
3939
manifest_reader_internal.cc
4040
manifest_writer.cc
4141
metadata_columns.cc
42+
metrics_config.cc
4243
name_mapping.cc
4344
partition_field.cc
4445
partition_spec.cc
4546
partition_summary.cc
47+
properties_update.cc
4648
row/arrow_array_wrapper.cc
4749
row/manifest_wrapper.cc
4850
row/struct_like.cc

src/iceberg/meson.build

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,12 @@ iceberg_sources = files(
6161
'manifest_reader_internal.cc',
6262
'manifest_writer.cc',
6363
'metadata_columns.cc',
64+
'metrics_config.cc',
6465
'name_mapping.cc',
6566
'partition_field.cc',
6667
'partition_spec.cc',
6768
'partition_summary.cc',
69+
'properties_update.cc',
6870
'row/arrow_array_wrapper.cc',
6971
'row/manifest_wrapper.cc',
7072
'row/struct_like.cc',
@@ -166,9 +168,12 @@ install_headers(
166168
'manifest_writer.h',
167169
'metadata_columns.h',
168170
'metrics.h',
171+
'metrics_config.h',
169172
'name_mapping.h',
170173
'partition_field.h',
171174
'partition_spec.h',
175+
'pending_update.h',
176+
'properties_update.h',
172177
'result.h',
173178
'schema_field.h',
174179
'schema.h',

src/iceberg/metrics_config.cc

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/metrics_config.h"
21+
22+
#include <string>
23+
#include <unordered_map>
24+
25+
#include "iceberg/schema.h"
26+
#include "iceberg/table_properties.h"
27+
28+
namespace iceberg {
29+
30+
Status MetricsConfig::VerifyReferencedColumns(
31+
const std::unordered_map<std::string, std::string>& updates, const Schema& schema) {
32+
for (const auto& [key, value] : updates) {
33+
if (!key.starts_with(TableProperties::kMetricModeColumnConfPrefix)) {
34+
continue;
35+
}
36+
auto field_name =
37+
std::string_view(key).substr(TableProperties::kMetricModeColumnConfPrefix.size());
38+
auto field = schema.FindFieldByName(field_name);
39+
if (!field.has_value() || !field.value().has_value()) {
40+
return InvalidArgument(
41+
"Invalid metrics config, could not find column {} from table prop {} in "
42+
"schema {}",
43+
field_name, key, schema.ToString());
44+
}
45+
}
46+
return {};
47+
}
48+
49+
} // namespace iceberg

src/iceberg/metrics_config.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/metrics_config.h
23+
/// \brief Metrics configuration for Iceberg tables
24+
25+
#include <string>
26+
#include <unordered_map>
27+
28+
#include "iceberg/iceberg_export.h"
29+
#include "iceberg/result.h"
30+
#include "iceberg/type_fwd.h"
31+
32+
namespace iceberg {
33+
34+
/// \brief Configuration utilities for table metrics
35+
class ICEBERG_EXPORT MetricsConfig {
36+
public:
37+
/// \brief Verify that all referenced columns are valid
38+
/// \param updates The updates to verify
39+
/// \param schema The schema to verify against
40+
/// \return OK if all referenced columns are valid
41+
static Status VerifyReferencedColumns(
42+
const std::unordered_map<std::string, std::string>& updates, const Schema& schema);
43+
};
44+
45+
} // namespace iceberg

src/iceberg/properties_update.cc

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/properties_update.h"
21+
22+
#include <cstdint>
23+
#include <memory>
24+
25+
#include "iceberg/catalog.h"
26+
#include "iceberg/file_format.h"
27+
#include "iceberg/metrics_config.h"
28+
#include "iceberg/result.h"
29+
#include "iceberg/table.h"
30+
#include "iceberg/table_identifier.h"
31+
#include "iceberg/table_metadata.h"
32+
#include "iceberg/table_properties.h"
33+
#include "iceberg/table_requirements.h"
34+
#include "iceberg/table_update.h"
35+
#include "iceberg/util/macros.h"
36+
37+
namespace iceberg {
38+
39+
PropertiesUpdate::PropertiesUpdate(TableIdentifier identifier,
40+
std::shared_ptr<Catalog> catalog,
41+
std::shared_ptr<TableMetadata> metadata)
42+
: identifier_(std::move(identifier)),
43+
catalog_(std::move(catalog)),
44+
metadata_(std::move(metadata)) {}
45+
46+
PropertiesUpdate& PropertiesUpdate::Set(std::string key, std::string value) {
47+
if (std::ranges::find(removals_, key) != std::ranges::end(removals_)) {
48+
return *this;
49+
}
50+
51+
if (!TableProperties::reserved_properties().contains(key) ||
52+
key == TableProperties::kFormatVersion.key()) {
53+
updates_.emplace(std::move(key), std::move(value));
54+
}
55+
56+
return *this;
57+
}
58+
59+
PropertiesUpdate& PropertiesUpdate::Remove(std::string key) {
60+
if (updates_.contains(key)) {
61+
return *this;
62+
}
63+
if (std::ranges::find(removals_, key) == removals_.end()) {
64+
removals_.push_back(std::move(key));
65+
}
66+
return *this;
67+
}
68+
69+
PropertiesUpdate& PropertiesUpdate::DefaultFormat(FileFormatType format) {
70+
updates_.emplace(TableProperties::kDefaultFileFormat.key(), ToString(format));
71+
return *this;
72+
}
73+
74+
Result<void> PropertiesUpdate::Apply() {
75+
if (!metadata_) {
76+
return InvalidArgument("Cannot commit an empty table");
77+
}
78+
79+
auto iter = updates_.find(TableProperties::kFormatVersion.key());
80+
if (iter != updates_.end()) {
81+
try {
82+
int parsed_version = std::stoi(iter->second);
83+
if (parsed_version > TableMetadata::kSupportedTableFormatVersion) {
84+
return InvalidArgument(
85+
"Cannot upgrade table to unsupported format version: v{} (supported: v{})",
86+
parsed_version, TableMetadata::kSupportedTableFormatVersion);
87+
}
88+
format_version_ = static_cast<int8_t>(parsed_version);
89+
} catch (const std::invalid_argument& e) {
90+
return InvalidArgument("Invalid format version '{}': not a valid integer",
91+
iter->second);
92+
} catch (const std::out_of_range& e) {
93+
return InvalidArgument("Format version '{}' is out of range", iter->second);
94+
}
95+
96+
updates_.erase(iter);
97+
}
98+
99+
if (metadata_->Schema().has_value()) {
100+
ICEBERG_RETURN_UNEXPECTED(
101+
MetricsConfig::VerifyReferencedColumns(updates_, *metadata_->Schema().value()));
102+
}
103+
return {};
104+
}
105+
106+
Status PropertiesUpdate::Commit() {
107+
ICEBERG_RETURN_UNEXPECTED(Apply());
108+
109+
std::vector<std::unique_ptr<TableUpdate>> updates;
110+
if (!updates_.empty()) {
111+
updates.emplace_back(std::make_unique<table::SetProperties>(std::move(updates_)));
112+
}
113+
if (!removals_.empty()) {
114+
updates.emplace_back(std::make_unique<table::RemoveProperties>(std::move(removals_)));
115+
}
116+
if (format_version_.has_value()) {
117+
updates.emplace_back(
118+
std::make_unique<table::UpgradeFormatVersion>(format_version_.value()));
119+
};
120+
121+
if (!updates.empty()) {
122+
ICEBERG_ASSIGN_OR_RAISE(auto requirements,
123+
TableRequirements::ForUpdateTable(*metadata_, updates));
124+
ICEBERG_RETURN_UNEXPECTED(
125+
catalog_->UpdateTable(identifier_, requirements, std::move(updates)));
126+
}
127+
return {};
128+
}
129+
130+
} // namespace iceberg

src/iceberg/properties_update.h

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
#include <memory>
23+
#include <string>
24+
#include <unordered_map>
25+
#include <vector>
26+
27+
#include "iceberg/file_format.h"
28+
#include "iceberg/iceberg_export.h"
29+
#include "iceberg/pending_update.h"
30+
#include "iceberg/table_identifier.h"
31+
#include "iceberg/type_fwd.h"
32+
33+
namespace iceberg {
34+
35+
/// \brief Updates table properties.
36+
class ICEBERG_EXPORT PropertiesUpdate : public PendingUpdateTyped<void> {
37+
public:
38+
/// \brief Constructs a PropertiesUpdate for the specified table.
39+
///
40+
/// \param identifier The table identifier
41+
/// \param catalog The catalog containing the table
42+
/// \param metadata The current table metadata
43+
PropertiesUpdate(TableIdentifier identifier, std::shared_ptr<Catalog> catalog,
44+
std::shared_ptr<TableMetadata> metadata);
45+
46+
/// \brief Sets a property key-value pair.
47+
///
48+
/// If the key was previously marked for removal, this operation cancels the removal.
49+
/// Reserved properties are handled according to TableProperties rules.
50+
///
51+
/// \param key The property key
52+
/// \param value The property value
53+
/// \return Reference to this PropertiesUpdate for chaining
54+
PropertiesUpdate& Set(std::string key, std::string value);
55+
56+
/// \brief Marks a property for removal.
57+
///
58+
/// If the key was previously set for update, this operation cancels the update.
59+
///
60+
/// \param key The property key to remove
61+
/// \return Reference to this PropertiesUpdate for chaining
62+
PropertiesUpdate& Remove(std::string key);
63+
64+
/// \brief Sets the default file format for the table.
65+
///
66+
/// This is a convenience method for setting the "write.format.default" property.
67+
///
68+
/// \param format The file format type to use as default
69+
/// \return Reference to this PropertiesUpdate for chaining
70+
PropertiesUpdate& DefaultFormat(FileFormatType format);
71+
72+
/// \brief Applies the property changes without committing them.
73+
///
74+
/// Validates the pending property changes but does not commit them to the table.
75+
/// This method can be used to validate changes before actually committing them.
76+
///
77+
/// \return Status::OK if the changes are valid, or an error if validation fails
78+
Result<void> Apply() override;
79+
80+
/// \brief Commits the property changes to the table.
81+
///
82+
/// Validates the changes and applies them to the table through the catalog.
83+
///
84+
/// \return OK if the changes are valid and committed successfully, or an error
85+
Status Commit() override;
86+
87+
private:
88+
TableIdentifier identifier_;
89+
std::shared_ptr<Catalog> catalog_;
90+
std::shared_ptr<TableMetadata> metadata_;
91+
92+
std::unordered_map<std::string, std::string> updates_;
93+
std::vector<std::string> removals_;
94+
std::optional<int8_t> format_version_;
95+
};
96+
97+
} // namespace iceberg

src/iceberg/table.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#include "iceberg/catalog.h"
2525
#include "iceberg/partition_spec.h"
26+
#include "iceberg/properties_update.h"
2627
#include "iceberg/schema.h"
2728
#include "iceberg/sort_order.h"
2829
#include "iceberg/table_metadata.h"
@@ -110,6 +111,10 @@ const std::vector<SnapshotLogEntry>& Table::history() const {
110111
return metadata_->snapshot_log;
111112
}
112113

114+
std::unique_ptr<PropertiesUpdate> Table::UpdateProperties() const {
115+
return std::make_unique<PropertiesUpdate>(identifier_, catalog_, metadata_);
116+
}
117+
113118
std::unique_ptr<Transaction> Table::NewTransaction() const {
114119
throw NotImplemented("Table::NewTransaction is not implemented");
115120
}

src/iceberg/table.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ class ICEBERG_EXPORT Table {
104104
/// \return a vector of history entries
105105
const std::vector<SnapshotLogEntry>& history() const;
106106

107+
/// \brief Create a new UpdateProperties to update table properties and commit the
108+
/// changes
109+
///
110+
/// \return a new UpdateProperties instance
111+
virtual std::unique_ptr<PropertiesUpdate> UpdateProperties() const;
112+
107113
/// \brief Create a new table scan builder for this table
108114
///
109115
/// Once a table scan builder is created, it can be refined to project columns and

0 commit comments

Comments
 (0)