Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
82e20f2
allow basic schema updates. need to check defaults and nested types
Tmonster Jan 7, 2026
6428048
almost working partition alter
Tmonster Jan 7, 2026
d1eadba
basic schema update tested and working
Tmonster Jan 8, 2026
5250d7f
can add 1 partition to iceberg tables. now we work on supporting inserts
Tmonster Jan 8, 2026
3149c5f
allow basic schema updates. need to check defaults and nested types
Tmonster Jan 7, 2026
faf891e
almost working partition alter
Tmonster Jan 7, 2026
b475e4d
basic schema update tested and working
Tmonster Jan 8, 2026
9858bf8
can add 1 partition to iceberg tables. now we work on supporting inserts
Tmonster Jan 8, 2026
61c693b
remove ability to add and remove columns. focusing now only on partit…
Tmonster Jan 13, 2026
9605ebd
Merge branch 'schema_change_support_iceberg_tables' of github.com:Tmo…
Tmonster Jan 14, 2026
83ca905
Merge branch 'table_changes_live_in_transaction' into schema_change_s…
Tmonster Jan 21, 2026
332243f
we do not need a working version of nessie
Tmonster Jan 21, 2026
adb8083
Merge branch 'table_changes_live_in_transaction' into schema_change_s…
Tmonster Jan 23, 2026
02cf348
if the partition spec already exists, use that
Tmonster Jan 24, 2026
34a5dcb
adding assert last assigned partition field id
Tmonster Jan 26, 2026
aad5e76
use last assigned field id. need to change it to column id
Tmonster Jan 26, 2026
669c994
cleaned up last assigned field id confusion
Tmonster Jan 26, 2026
a6d99ff
also assert last partition field id
Tmonster Jan 26, 2026
a9bc42e
Merge remote-tracking branch 'upstream/main' into schema_change_suppo…
Tmonster Jan 26, 2026
ed72476
also assert the default spec id
Tmonster Jan 26, 2026
0acf800
working, and no unnecessary snapshot ref checks
Tmonster Jan 26, 2026
dc10dce
remove some unnecessary includes and some comments
Tmonster Jan 27, 2026
d6c67cd
Merge remote-tracking branch 'upstream/main' into schema_change_suppo…
Tmonster Jan 29, 2026
3259368
create table () PARTITIONED BY works now
Tmonster Feb 2, 2026
60da540
small format fix things
Tmonster Feb 2, 2026
07bb069
Merge branch 'main' into schema_change_support_iceberg_tables
Tmonster Feb 2, 2026
7285cf4
make format-fix
Tmonster Feb 2, 2026
1507277
some small format-fix stuff
Tmonster Feb 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions src/catalog_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,9 +343,10 @@ void IRCAPI::CommitMultiTableUpdate(ClientContext &context, IcebergCatalog &cata

// Omit stack from error output
error._error.stack = vector<string>();
throw InvalidConfigurationException("Request to '%s' returned a non-200 status code (%s). \n message: %s\n type: %s\n reason: %s\n",
url_builder.GetURLEncoded(), EnumUtil::ToString(response->status),
error._error.message, error._error.type, response->reason);
throw InvalidConfigurationException(
"Request to '%s' returned a non-200 status code (%s). \n message: %s\n type: %s\n reason: %s\n",
url_builder.GetURLEncoded(), EnumUtil::ToString(response->status), error._error.message, error._error.type,
response->reason);
}
}

Expand Down Expand Up @@ -376,7 +377,6 @@ void IRCAPI::CommitTableDelete(ClientContext &context, IcebergCatalog &catalog,
url_builder.AddPathComponent(catalog.prefix);
url_builder.AddPathComponent("namespaces");
url_builder.AddPathComponent(schema_name);

url_builder.AddPathComponent("tables");
url_builder.AddPathComponent(table_name);
url_builder.SetParam("purgeRequested", Value::BOOLEAN(catalog.attach_options.purge_requested).ToString());
Expand Down Expand Up @@ -439,7 +439,10 @@ rest_api_objects::LoadTableResult IRCAPI::CommitNewTable(ClientContext &context,
yyjson_mut_doc_set_root(doc, root_object);

auto initial_schema = table->table_info.table_metadata.schemas[table->table_info.table_metadata.current_schema_id];
auto create_transaction = make_uniq<IcebergCreateTableRequest>(initial_schema, table->table_info.name);
auto &initial_partition_spec =
table->table_info.table_metadata.partition_specs[table->table_info.table_metadata.default_spec_id];
auto create_transaction =
make_uniq<IcebergCreateTableRequest>(initial_schema, initial_partition_spec, table->table_info.name);
// if stage create is supported, create the table with stage_create = true and the table update will
// commit the table.
auto support_stage_create = catalog.attach_options.supports_stage_create;
Expand Down
1 change: 1 addition & 0 deletions src/include/metadata/iceberg_column_definition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ struct IcebergColumnDefinition {
ParseType(const string &name, int32_t field_id, bool required, rest_api_objects::Type &iceberg_type,
optional_ptr<rest_api_objects::PrimitiveTypeValue> initial_default = nullptr);
bool IsIcebergPrimitiveType();
unique_ptr<IcebergColumnDefinition> Copy() const;

public:
int32_t id;
Expand Down
2 changes: 2 additions & 0 deletions src/include/metadata/iceberg_partition_spec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ struct IcebergPartitionSpec {
bool IsPartitioned() const;
const IcebergPartitionSpecField &GetFieldBySourceId(idx_t field_id) const;
string FieldsToJSON() const;
static void FieldsToJson(yyjson_mut_doc *doc, yyjson_mut_val *root_object,
const vector<rest_api_objects::PartitionField> &fields);

public:
int32_t spec_id;
Expand Down
9 changes: 6 additions & 3 deletions src/include/metadata/iceberg_table_metadata.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,11 @@ struct IcebergTableMetadata {
const string GetDataPath() const;
const string GetMetadataPath() const;

//! For Nessie catalogs (version ?)
bool HasLastColumnId() const;
idx_t GetLastColumnId() const;
bool HasLastAssignedColumnFieldId() const;
idx_t GetLastAssignedColumnFieldId() const;

bool HasLastPartitionId() const;
int32_t GetLastPartitionFieldId() const;

const case_insensitive_map_t<string> &GetTableProperties() const;
string GetTableProperty(string property_string) const;
Expand All @@ -89,6 +91,7 @@ struct IcebergTableMetadata {
idx_t last_updated_ms;

optional_idx last_column_id;
optional_idx last_partition_field_id;

//! partition_spec_id -> partition spec
unordered_map<int32_t, IcebergPartitionSpec> partition_specs;
Expand Down
1 change: 1 addition & 0 deletions src/include/metadata/iceberg_table_schema.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class IcebergTableSchema {
const ColumnIndex &column_index, idx_t depth);

static void SchemaToJson(yyjson_mut_doc *doc, yyjson_mut_val *root_object, const rest_api_objects::Schema &schema);
shared_ptr<IcebergTableSchema> Copy() const;
const LogicalType &GetColumnTypeFromFieldId(idx_t field_id) const;

public:
Expand Down
7 changes: 5 additions & 2 deletions src/include/rest_catalog/objects/table_metadata.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,11 @@ class TableMetadata {
bool has_partition_specs = false;
int32_t default_spec_id;
bool has_default_spec_id = false;
int32_t last_partition_id;
bool has_last_partition_id = false;
// in the IRC spec, it is last_partition_id refers to the last field id
// so we rename it here for clarity
// use default_spec_id for current partition spec id
int32_t last_partition_field_id;
bool has_last_partition_field_id = false;
vector<SortOrder> sort_orders;
bool has_sort_orders = false;
int32_t default_sort_order_id;
Expand Down
4 changes: 4 additions & 0 deletions src/include/storage/catalog/iceberg_catalog.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ class IcebergCatalog : public Catalog {
bool SupportsTimeTravel() const override {
return true;
}
ErrorData SupportsCreateTable(BoundCreateTableInfo &info) override {
// we support partitioned by and WITH
return ErrorData();
}
void DropSchema(ClientContext &context, DropInfo &info) override;
optional_ptr<CatalogEntry> CreateSchema(CatalogTransaction transaction, CreateSchemaInfo &info) override;
void ScanSchemas(ClientContext &context, std::function<void(SchemaCatalogEntry &)> callback) override;
Expand Down
11 changes: 11 additions & 0 deletions src/include/storage/iceberg_table_information.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

namespace duckdb {
class IcebergTableSchema;
class ParsedExpression;
struct CreateTableInfo;
class IcebergSchemaEntry;
struct IcebergManifestEntry;
Expand All @@ -26,6 +27,11 @@ struct IcebergTableInformation {
optional_ptr<CatalogEntry> GetLatestSchema();
optional_ptr<CatalogEntry> GetSchemaVersion(optional_ptr<BoundAtClause> at);
optional_ptr<CatalogEntry> CreateSchemaVersion(IcebergTableSchema &table_schema);
idx_t GetMaxSchemaId();
idx_t GetNextPartitionSpecId();
int64_t GetExistingSpecId(IcebergPartitionSpec &spec);
void SetPartitionedBy(IcebergTransaction &transaction, const vector<unique_ptr<ParsedExpression>> &partition_keys,
const IcebergTableSchema &schema, bool first_partition_spec = false);
IRCAPITableCredentials GetVendedCredentials(ClientContext &context);
const string &BaseFilePath() const;

Expand All @@ -36,6 +42,10 @@ struct IcebergTableInformation {
vector<IcebergManifestEntry> &&data_files);
void AddSchema(IcebergTransaction &transaction);
void AddAssertCreate(IcebergTransaction &transaction);
void AddAssertDefaultSpecId(IcebergTransaction &transaction);
void AddAssertCurrentSchemaId(IcebergTransaction &transaction);
void AddAssertLastAssignedColumnFieldId(IcebergTransaction &transaction);
void AddAssertLastAssignedPartitionId(IcebergTransaction &transaction);
void AddAssignUUID(IcebergTransaction &transaction);
void AddUpradeFormatVersion(IcebergTransaction &transaction);
void AddSetCurrentSchema(IcebergTransaction &transaction);
Expand All @@ -47,6 +57,7 @@ struct IcebergTableInformation {
void RemoveProperties(IcebergTransaction &transaction, vector<string> properties);
void SetLocation(IcebergTransaction &transaction);
bool IsTransactionLocalTable(IcebergTransaction &transaction);

static string GetTableKey(const vector<string> &namespace_items, const string &table_name);
string GetTableKey() const;
// we pass the transaction, because we are only allowed to copy table information state provded by the catalog
Expand Down
4 changes: 4 additions & 0 deletions src/include/storage/iceberg_transaction_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ struct IcebergTransactionData {
// add a schema update for a table
void TableAddSchema();
void TableAddAssertCreate();
void TableAddAssertCurrentSchemaId();
void TableAddAssertLastAssignedColumnFieldId();
void TableAddAssertLastAssignedPartitionId();
void TableAddAssertDefaultSpecId();
void TableAssignUUID();
void TableAddUpradeFormatVersion();
void TableAddSetCurrentSchema();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,18 @@ struct IcebergTableInformation;
class IcebergTableEntry;

struct IcebergCreateTableRequest {
IcebergCreateTableRequest(shared_ptr<IcebergTableSchema> schema, string table_name);
IcebergCreateTableRequest(shared_ptr<IcebergTableSchema> schema, IcebergPartitionSpec &partition_spec,
string table_name);

public:
static shared_ptr<IcebergTableSchema> CreateIcebergSchema(const IcebergTableEntry *table_entry);
string CreateTableToJSON(std::unique_ptr<yyjson_mut_doc, YyjsonDocDeleter> doc_p);
static void PopulateSchema(yyjson_mut_doc *doc, yyjson_mut_val *schema_json, IcebergTableSchema &schema);
static void PopulateSchema(yyjson_mut_doc *doc, yyjson_mut_val *schema_json, const IcebergTableSchema &schema);

private:
string table_name;
shared_ptr<IcebergTableSchema> initial_schema;
IcebergPartitionSpec &partition_spec;
};

} // namespace duckdb
41 changes: 40 additions & 1 deletion src/include/storage/table_update/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ struct AddSchemaUpdate : public IcebergTableUpdate {
explicit AddSchemaUpdate(IcebergTableInformation &table_info);
void CreateUpdate(DatabaseInstance &db, ClientContext &context, IcebergCommitState &commit_state);

optional_ptr<IcebergTableSchema> table_schema = nullptr;
shared_ptr<IcebergTableSchema> table_schema = nullptr;
optional_idx last_column_id;
};

struct AssertCreateRequirement : public IcebergTableRequirement {
Expand All @@ -33,6 +34,44 @@ struct AssertCreateRequirement : public IcebergTableRequirement {
void CreateRequirement(DatabaseInstance &db, ClientContext &context, IcebergCommitState &commit_state);
};

struct AssertCurrentSchemaIdRequirement : public IcebergTableRequirement {
static constexpr const IcebergTableRequirementType TYPE = IcebergTableRequirementType::ASSERT_CURRENT_SCHEMA_ID;

explicit AssertCurrentSchemaIdRequirement(IcebergTableInformation &table_info);
void CreateRequirement(DatabaseInstance &db, ClientContext &context, IcebergCommitState &commit_state);

int32_t current_schema_id;
};

struct AssertLastAssignedColumnFieldIdRequirement : public IcebergTableRequirement {
static constexpr const IcebergTableRequirementType TYPE =
IcebergTableRequirementType::ASSERT_LAST_ASSIGNED_FIELD_ID;

explicit AssertLastAssignedColumnFieldIdRequirement(IcebergTableInformation &table_info);
void CreateRequirement(DatabaseInstance &db, ClientContext &context, IcebergCommitState &commit_state);

int32_t last_assigned_column_field_id;
};

struct AssertLastAssignedPartitionIdRequirement : public IcebergTableRequirement {
static constexpr const IcebergTableRequirementType TYPE =
IcebergTableRequirementType::ASSERT_LAST_ASSIGNED_PARTITION_ID;

explicit AssertLastAssignedPartitionIdRequirement(IcebergTableInformation &table_info);
void CreateRequirement(DatabaseInstance &db, ClientContext &context, IcebergCommitState &commit_state);

int32_t last_assigned_partition_id;
};

struct AssertDefaultSpecIdRequirement : public IcebergTableRequirement {
static constexpr const IcebergTableRequirementType TYPE = IcebergTableRequirementType::ASSERT_DEFAULT_SPEC_ID;

explicit AssertDefaultSpecIdRequirement(IcebergTableInformation &table_info);
void CreateRequirement(DatabaseInstance &db, ClientContext &context, IcebergCommitState &commit_state);

int32_t default_spec_id;
};

struct AssignUUIDUpdate : public IcebergTableUpdate {
static constexpr const IcebergTableUpdateType TYPE = IcebergTableUpdateType::ASSIGN_UUID;

Expand Down
13 changes: 13 additions & 0 deletions src/metadata/iceberg_column_definition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,4 +196,17 @@ bool IcebergColumnDefinition::IsIcebergPrimitiveType() {
}
}

unique_ptr<IcebergColumnDefinition> IcebergColumnDefinition::Copy() const {
auto res = make_uniq<IcebergColumnDefinition>();
res->id = id;
res->name = name;
res->type = type;
res->initial_default = initial_default;
res->required = required;
for (auto &child : children) {
res->children.push_back(child->Copy());
}
return res;
}

} // namespace duckdb
14 changes: 14 additions & 0 deletions src/metadata/iceberg_partition_spec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,18 @@ string IcebergPartitionSpec::FieldsToJSON() const {
return ICUtils::JsonToString(std::move(doc_p));
}

void IcebergPartitionSpec::FieldsToJson(yyjson_mut_doc *doc, yyjson_mut_val *root_object,
const vector<rest_api_objects::PartitionField> &fields) {
yyjson_mut_obj_add_strcpy(doc, root_object, "type", "struct");
auto fields_arr = yyjson_mut_obj_add_arr(doc, root_object, "fields");

for (auto &field : fields) {
auto field_obj = yyjson_mut_arr_add_obj(doc, fields_arr);
yyjson_mut_obj_add_strcpy(doc, field_obj, "name", field.name.c_str());
yyjson_mut_obj_add_strcpy(doc, field_obj, "transform", field.transform.value.c_str());
yyjson_mut_obj_add_int(doc, field_obj, "source-id", field.source_id);
yyjson_mut_obj_add_int(doc, field_obj, "field-id", field.field_id);
}
}

} // namespace duckdb
17 changes: 15 additions & 2 deletions src/metadata/iceberg_table_metadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,14 +259,23 @@ string IcebergTableMetadata::GetMetaDataPath(ClientContext &context, const strin
return GuessTableVersion(meta_path, fs, options);
}

bool IcebergTableMetadata::HasLastColumnId() const {
bool IcebergTableMetadata::HasLastAssignedColumnFieldId() const {
return last_column_id.IsValid();
}

idx_t IcebergTableMetadata::GetLastColumnId() const {
idx_t IcebergTableMetadata::GetLastAssignedColumnFieldId() const {
return last_column_id.GetIndex();
}

bool IcebergTableMetadata::HasLastPartitionId() const {
return last_partition_field_id.IsValid();
}

int32_t IcebergTableMetadata::GetLastPartitionFieldId() const {
D_ASSERT(HasLastPartitionId());
return static_cast<int32_t>(last_partition_field_id.GetIndex());
}

//! ----------- Parse the Metadata JSON -----------

rest_api_objects::TableMetadata IcebergTableMetadata::Parse(const string &path, FileSystem &fs,
Expand Down Expand Up @@ -355,6 +364,10 @@ IcebergTableMetadata IcebergTableMetadata::FromTableMetadata(const rest_api_obje
res.last_column_id = table_metadata.last_column_id;
}

if (table_metadata.has_last_partition_field_id) {
res.last_partition_field_id = table_metadata.last_partition_field_id;
}

return res;
}

Expand Down
11 changes: 10 additions & 1 deletion src/metadata/iceberg_table_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,16 @@ void IcebergTableSchema::SchemaToJson(yyjson_mut_doc *doc, yyjson_mut_val *root_
yyjson_mut_obj_add_arr(doc, root_object, "identifier-field-ids");
}

shared_ptr<IcebergTableSchema> IcebergTableSchema::Copy() const {
auto res = make_shared_ptr<IcebergTableSchema>();
res->schema_id = schema_id;
res->last_column_id = last_column_id;
for (auto &column : columns) {
res->columns.push_back(column->Copy());
}
return res;
}

const LogicalType &IcebergTableSchema::GetColumnTypeFromFieldId(idx_t field_id) const {
for (auto &column : columns) {
if (column->id == field_id) {
Expand All @@ -146,5 +156,4 @@ const LogicalType &IcebergTableSchema::GetColumnTypeFromFieldId(idx_t field_id)
throw InvalidInputException("GetColumnTypeFromFieldId:: field id %d does not exist in schema with id %d", field_id,
schema_id);
}

} // namespace duckdb
4 changes: 2 additions & 2 deletions src/rest_catalog/objects/table_metadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,9 @@ string TableMetadata::TryFromJSON(yyjson_val *obj) {
}
auto last_partition_id_val = yyjson_obj_get(obj, "last-partition-id");
if (last_partition_id_val) {
has_last_partition_id = true;
has_last_partition_field_id = true;
if (yyjson_is_int(last_partition_id_val)) {
last_partition_id = yyjson_get_int(last_partition_id_val);
last_partition_field_id = yyjson_get_int(last_partition_id_val);
} else {
return StringUtil::Format(
"TableMetadata property 'last_partition_id' is not of type 'integer', found '%s' instead",
Expand Down
Loading
Loading