|
23 | 23 | #include <ranges> |
24 | 24 | #include <string> |
25 | 25 |
|
| 26 | +#include <nlohmann/json.hpp> |
| 27 | + |
| 28 | +#include "iceberg/file_io.h" |
| 29 | +#include "iceberg/json_internal.h" |
26 | 30 | #include "iceberg/partition_spec.h" |
27 | 31 | #include "iceberg/result.h" |
28 | 32 | #include "iceberg/schema.h" |
| 33 | +#include "iceberg/snapshot.h" |
29 | 34 | #include "iceberg/sort_order.h" |
| 35 | +#include "iceberg/util/macros.h" |
| 36 | + |
30 | 37 | namespace iceberg { |
31 | 38 |
|
32 | 39 | std::string ToString(const SnapshotLogEntry& entry) { |
@@ -69,4 +76,96 @@ Result<std::shared_ptr<SortOrder>> TableMetadata::SortOrder() const { |
69 | 76 | return *iter; |
70 | 77 | } |
71 | 78 |
|
| 79 | +template <typename T> |
| 80 | +bool SharedPtrVectorEquals(const std::vector<std::shared_ptr<T>>& lhs, |
| 81 | + const std::vector<std::shared_ptr<T>>& rhs) { |
| 82 | + if (lhs.size() != rhs.size()) { |
| 83 | + return false; |
| 84 | + } |
| 85 | + for (size_t i = 0; i < lhs.size(); ++i) { |
| 86 | + if (*lhs[i] != *rhs[i]) { |
| 87 | + return false; |
| 88 | + } |
| 89 | + } |
| 90 | + return true; |
| 91 | +} |
| 92 | + |
| 93 | +bool SnapshotRefEquals( |
| 94 | + const std::unordered_map<std::string, std::shared_ptr<SnapshotRef>>& lhs, |
| 95 | + const std::unordered_map<std::string, std::shared_ptr<SnapshotRef>>& rhs) { |
| 96 | + if (lhs.size() != rhs.size()) { |
| 97 | + return false; |
| 98 | + } |
| 99 | + for (const auto& [key, value] : lhs) { |
| 100 | + auto iter = rhs.find(key); |
| 101 | + if (iter == rhs.end()) { |
| 102 | + return false; |
| 103 | + } |
| 104 | + if (*iter->second != *value) { |
| 105 | + return false; |
| 106 | + } |
| 107 | + } |
| 108 | + return true; |
| 109 | +} |
| 110 | + |
| 111 | +bool operator==(const TableMetadata& lhs, const TableMetadata& rhs) { |
| 112 | + return lhs.format_version == rhs.format_version && lhs.table_uuid == rhs.table_uuid && |
| 113 | + lhs.location == rhs.location && |
| 114 | + lhs.last_sequence_number == rhs.last_sequence_number && |
| 115 | + lhs.last_updated_ms == rhs.last_updated_ms && |
| 116 | + lhs.last_column_id == rhs.last_column_id && |
| 117 | + lhs.current_schema_id == rhs.current_schema_id && |
| 118 | + SharedPtrVectorEquals(lhs.schemas, rhs.schemas) && |
| 119 | + lhs.default_spec_id == rhs.default_spec_id && |
| 120 | + lhs.last_partition_id == rhs.last_partition_id && |
| 121 | + lhs.properties == rhs.properties && |
| 122 | + lhs.current_snapshot_id == rhs.current_snapshot_id && |
| 123 | + SharedPtrVectorEquals(lhs.snapshots, rhs.snapshots) && |
| 124 | + lhs.snapshot_log == rhs.snapshot_log && lhs.metadata_log == rhs.metadata_log && |
| 125 | + SharedPtrVectorEquals(lhs.sort_orders, rhs.sort_orders) && |
| 126 | + lhs.default_sort_order_id == rhs.default_sort_order_id && |
| 127 | + SnapshotRefEquals(lhs.refs, rhs.refs) && |
| 128 | + SharedPtrVectorEquals(lhs.statistics, rhs.statistics) && |
| 129 | + SharedPtrVectorEquals(lhs.partition_statistics, rhs.partition_statistics) && |
| 130 | + lhs.next_row_id == rhs.next_row_id; |
| 131 | +} |
| 132 | + |
| 133 | +Result<MetadataFileCodecType> TableMetadataUtil::FromFileName( |
| 134 | + std::string_view file_name) { |
| 135 | + if (file_name.find(".metadata.json") == std::string::npos) { |
| 136 | + return InvalidArgument("{} is not a valid metadata file", file_name); |
| 137 | + } |
| 138 | + |
| 139 | + // We have to be backward-compatible with .metadata.json.gz files |
| 140 | + if (file_name.ends_with(".metadata.json.gz")) { |
| 141 | + return MetadataFileCodecType::kGzip; |
| 142 | + } |
| 143 | + |
| 144 | + std::string_view file_name_without_suffix = |
| 145 | + file_name.substr(0, file_name.find_last_of(".metadata.json")); |
| 146 | + if (file_name_without_suffix.ends_with(".gz")) { |
| 147 | + return MetadataFileCodecType::kGzip; |
| 148 | + } |
| 149 | + return MetadataFileCodecType::kNone; |
| 150 | +} |
| 151 | + |
| 152 | +Result<std::unique_ptr<TableMetadata>> TableMetadataUtil::Read( |
| 153 | + FileIO& io, const std::string& location, std::optional<size_t> length) { |
| 154 | + ICEBERG_ASSIGN_OR_RAISE(auto codec_type, FromFileName(location)); |
| 155 | + if (codec_type == MetadataFileCodecType::kGzip) { |
| 156 | + return NotImplemented("Reading gzip-compressed metadata files is not supported yet"); |
| 157 | + } |
| 158 | + |
| 159 | + ICEBERG_ASSIGN_OR_RAISE(auto content, io.ReadFile(location, length)); |
| 160 | + ICEBERG_ASSIGN_OR_RAISE(auto json, FromJsonString(content)); |
| 161 | + return TableMetadataFromJson(json); |
| 162 | +} |
| 163 | + |
| 164 | +Status TableMetadataUtil::Write(FileIO& io, const std::string& location, |
| 165 | + const TableMetadata& metadata) { |
| 166 | + auto json = ToJson(metadata); |
| 167 | + ICEBERG_ASSIGN_OR_RAISE(auto json_string, ToJsonString(json)); |
| 168 | + return io.WriteFile(location, json_string); |
| 169 | +} |
| 170 | + |
72 | 171 | } // namespace iceberg |
0 commit comments