2222#include < algorithm>
2323#include < cstdint>
2424#include < format>
25+ #include < ranges>
2526#include < regex>
2627#include < type_traits>
2728#include < unordered_set>
2829
30+ #include < iceberg/table.h>
2931#include < nlohmann/json.hpp>
3032
33+ #include " iceberg/partition_field.h"
3134#include " iceberg/partition_spec.h"
3235#include " iceberg/result.h"
3336#include " iceberg/schema.h"
@@ -248,7 +251,7 @@ Result<std::vector<T>> FromJsonList(
248251 list.emplace_back (std::move (entry));
249252 }
250253 }
251- return {} ;
254+ return list ;
252255}
253256
254257// / \brief Parse a list of items from a JSON object.
@@ -471,7 +474,7 @@ nlohmann::json ToJson(const Type& type) {
471474
472475nlohmann::json ToJson (const Schema& schema) {
473476 nlohmann::json json = ToJson (static_cast <const Type&>(schema));
474- json[ kSchemaId ] = schema.schema_id ();
477+ SetOptionalField ( json, kSchemaId , schema.schema_id () );
475478 // TODO(gangwu): add identifier-field-ids.
476479 return json;
477480}
@@ -625,7 +628,7 @@ Result<std::unique_ptr<SchemaField>> FieldFromJson(const nlohmann::json& json) {
625628}
626629
627630Result<std::unique_ptr<Schema>> SchemaFromJson (const nlohmann::json& json) {
628- ICEBERG_ASSIGN_OR_RAISE (auto schema_id, GetJsonValue <int32_t >(json, kSchemaId ));
631+ ICEBERG_ASSIGN_OR_RAISE (auto schema_id, GetJsonValueOptional <int32_t >(json, kSchemaId ));
629632 ICEBERG_ASSIGN_OR_RAISE (auto type, TypeFromJson (json));
630633
631634 if (type->type_id () != TypeId::kStruct ) [[unlikely]] {
@@ -658,9 +661,16 @@ nlohmann::json ToJson(const PartitionSpec& partition_spec) {
658661}
659662
660663Result<std::unique_ptr<PartitionField>> PartitionFieldFromJson (
661- const nlohmann::json& json) {
664+ const nlohmann::json& json, bool allow_field_id_missing ) {
662665 ICEBERG_ASSIGN_OR_RAISE (auto source_id, GetJsonValue<int32_t >(json, kSourceId ));
663- ICEBERG_ASSIGN_OR_RAISE (auto field_id, GetJsonValue<int32_t >(json, kFieldId ));
666+ int32_t field_id;
667+ if (allow_field_id_missing) {
668+ // Partition field id in v1 is not tracked, so we use -1 to indicate that.
669+ ICEBERG_ASSIGN_OR_RAISE (field_id, GetJsonValueOrDefault<int32_t >(
670+ json, kFieldId , SchemaField::kInvalidFieldId ));
671+ } else {
672+ ICEBERG_ASSIGN_OR_RAISE (field_id, GetJsonValue<int32_t >(json, kFieldId ));
673+ }
664674 ICEBERG_ASSIGN_OR_RAISE (
665675 auto transform,
666676 GetJsonValue<std::string>(json, kTransform ).and_then (TransformFromString));
@@ -905,7 +915,7 @@ nlohmann::json ToJson(const TableMetadata& table_metadata) {
905915 }
906916
907917 // write the current schema ID and schema list
908- json[ kCurrentSchemaId ] = table_metadata.current_schema_id ;
918+ SetOptionalField ( json, kCurrentSchemaId , table_metadata.current_schema_id ) ;
909919 json[kSchemas ] = ToJsonList (table_metadata.schemas );
910920
911921 // for older readers, continue writing the default spec as "partition-spec"
@@ -963,7 +973,8 @@ namespace {
963973// /
964974// / \return The current schema or parse error.
965975Result<std::shared_ptr<Schema>> ParseSchemas (
966- const nlohmann::json& json, int8_t format_version, int32_t & current_schema_id,
976+ const nlohmann::json& json, int8_t format_version,
977+ std::optional<int32_t >& current_schema_id,
967978 std::vector<std::shared_ptr<Schema>>& schemas) {
968979 std::shared_ptr<Schema> current_schema;
969980 if (json.contains (kSchemas )) {
@@ -986,7 +997,7 @@ Result<std::shared_ptr<Schema>> ParseSchemas(
986997 }
987998 if (!current_schema) {
988999 return JsonParseError (" Cannot find schema with {}={} from {}" , kCurrentSchemaId ,
989- current_schema_id, schema_array.dump ());
1000+ current_schema_id. value () , schema_array.dump ());
9901001 }
9911002 } else {
9921003 if (format_version != 1 ) {
@@ -1031,13 +1042,30 @@ Status ParsePartitionSpecs(const nlohmann::json& json, int8_t format_version,
10311042 return JsonParseError (" {} must exist in format v{}" , kPartitionSpecs ,
10321043 format_version);
10331044 }
1034- default_spec_id = TableMetadata::kInitialSpecId ;
10351045
1036- ICEBERG_ASSIGN_OR_RAISE (auto spec, GetJsonValue<nlohmann::json>(json, kPartitionSpec )
1037- .and_then ([current_schema](const auto & json) {
1038- return PartitionSpecFromJson (current_schema,
1039- json);
1040- }));
1046+ ICEBERG_ASSIGN_OR_RAISE (auto partition_spec_json,
1047+ GetJsonValue<nlohmann::json>(json, kPartitionSpec ));
1048+ if (!partition_spec_json.is_array ()) {
1049+ return JsonParseError (" Cannot parse v1 partition spec from non-array: {}" ,
1050+ partition_spec_json.dump ());
1051+ }
1052+
1053+ int32_t next_partition_field_id = PartitionSpec::kLegacyPartitionDataIdStart ;
1054+ std::vector<PartitionField> fields;
1055+ for (const auto & entry_json : partition_spec_json) {
1056+ ICEBERG_ASSIGN_OR_RAISE (auto field, PartitionFieldFromJson (entry_json));
1057+ int32_t field_id = field->field_id ();
1058+ if (field_id == SchemaField::kInvalidFieldId ) {
1059+ // If the field ID is not set, we need to assign a new one
1060+ field_id = next_partition_field_id++;
1061+ }
1062+ fields.emplace_back (field->source_id (), field_id, std::string (field->name ()),
1063+ std::move (field->transform ()));
1064+ }
1065+
1066+ auto spec = std::make_unique<PartitionSpec>(
1067+ current_schema, PartitionSpec::kInitialSpecId , std::move (fields));
1068+ default_spec_id = spec->spec_id ();
10411069 partition_specs.push_back (std::move (spec));
10421070 }
10431071
@@ -1066,7 +1094,9 @@ Status ParseSortOrders(const nlohmann::json& json, int8_t format_version,
10661094 if (format_version > 1 ) {
10671095 return JsonParseError (" {} must exist in format v{}" , kSortOrders , format_version);
10681096 }
1069- return NotImplementedError (" Assign a default sort order" );
1097+ auto sort_order = SortOrder::Unsorted ();
1098+ default_sort_order_id = sort_order->order_id ();
1099+ sort_orders.push_back (std::move (sort_order));
10701100 }
10711101 return {};
10721102}
@@ -1119,10 +1149,16 @@ Result<std::unique_ptr<TableMetadata>> TableMetadataFromJson(const nlohmann::jso
11191149 return JsonParseError (" {} must exist in format v{}" , kLastPartitionId ,
11201150 table_metadata->format_version );
11211151 }
1122- // TODO(gangwu): iterate all partition specs to find the largest partition
1123- // field id or assign a default value for unpartitioned tables. However,
1124- // PartitionSpec::lastAssignedFieldId() is not implemented yet.
1125- return NotImplementedError (" Find the largest partition field id" );
1152+
1153+ if (table_metadata->partition_specs .empty ()) {
1154+ table_metadata->last_partition_id =
1155+ PartitionSpec::Unpartitioned ()->last_assigned_field_id ();
1156+ } else {
1157+ table_metadata->last_partition_id =
1158+ std::ranges::max (table_metadata->partition_specs , {}, [](const auto & spec) {
1159+ return spec->last_assigned_field_id ();
1160+ })->last_assigned_field_id ();
1161+ }
11261162 }
11271163
11281164 ICEBERG_RETURN_UNEXPECTED (ParseSortOrders (json, table_metadata->format_version ,
@@ -1134,10 +1170,9 @@ Result<std::unique_ptr<TableMetadata>> TableMetadataFromJson(const nlohmann::jso
11341170 }
11351171
11361172 // This field is optional, but internally we set this to -1 when not set
1137- ICEBERG_ASSIGN_OR_RAISE (
1138- table_metadata->current_snapshot_id ,
1139- GetJsonValueOrDefault<int64_t >(json, kCurrentSnapshotId ,
1140- TableMetadata::kInvalidSnapshotId ));
1173+ ICEBERG_ASSIGN_OR_RAISE (table_metadata->current_snapshot_id ,
1174+ GetJsonValueOrDefault<int64_t >(json, kCurrentSnapshotId ,
1175+ Snapshot::kInvalidSnapshotId ));
11411176
11421177 if (table_metadata->format_version >= 3 ) {
11431178 ICEBERG_ASSIGN_OR_RAISE (table_metadata->next_row_id ,
@@ -1155,7 +1190,7 @@ Result<std::unique_ptr<TableMetadata>> TableMetadataFromJson(const nlohmann::jso
11551190 ICEBERG_ASSIGN_OR_RAISE (
11561191 table_metadata->refs ,
11571192 FromJsonMap<std::shared_ptr<SnapshotRef>>(json, kRefs , SnapshotRefFromJson));
1158- } else if (table_metadata->current_snapshot_id != TableMetadata ::kInvalidSnapshotId ) {
1193+ } else if (table_metadata->current_snapshot_id != Snapshot ::kInvalidSnapshotId ) {
11591194 table_metadata->refs [" main" ] = std::make_unique<SnapshotRef>(SnapshotRef{
11601195 .snapshot_id = table_metadata->current_snapshot_id ,
11611196 .retention = SnapshotRef::Branch{},
0 commit comments