Skip to content

Commit cb375f1

Browse files
committed
feat: add table metadata reader
1 parent dbf9592 commit cb375f1

File tree

4 files changed

+90
-0
lines changed

4 files changed

+90
-0
lines changed

src/iceberg/json_internal.cc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,4 +1209,20 @@ Result<std::unique_ptr<TableMetadata>> TableMetadataFromJson(const nlohmann::jso
12091209
return table_metadata;
12101210
}
12111211

1212+
Result<nlohmann::json> FromJsonString(const std::string& json_string) {
1213+
try {
1214+
return nlohmann::json::parse(json_string);
1215+
} catch (const std::exception& e) {
1216+
return JsonParseError("Failed to parse JSON string: {}", e.what());
1217+
}
1218+
}
1219+
1220+
Result<std::string> ToJsonString(const nlohmann::json& json) {
1221+
try {
1222+
return json.dump();
1223+
} catch (const std::exception& e) {
1224+
return JsonParseError("Failed to serialize to JSON string: {}", e.what());
1225+
}
1226+
}
1227+
12121228
} // namespace iceberg

src/iceberg/json_internal.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,4 +244,16 @@ nlohmann::json ToJson(const TableMetadata& table_metadata);
244244
/// \return A `TableMetadata` object or an error if the conversion fails.
245245
Result<std::unique_ptr<TableMetadata>> TableMetadataFromJson(const nlohmann::json& json);
246246

247+
/// \brief Deserialize a JSON string into a `nlohmann::json` object.
248+
///
249+
/// \param json_string The JSON string to deserialize.
250+
/// \return A `nlohmann::json` object or an error if the deserialization fails.
251+
Result<nlohmann::json> FromJsonString(const std::string& json_string);
252+
253+
/// \brief Serialize a `nlohmann::json` object into a JSON string.
254+
///
255+
/// \param json The `nlohmann::json` object to serialize.
256+
/// \return A JSON string or an error if the serialization fails.
257+
Result<std::string> ToJsonString(const nlohmann::json& json);
258+
247259
} // namespace iceberg

src/iceberg/table_metadata.cc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,15 @@
2323
#include <ranges>
2424
#include <string>
2525

26+
#include <nlohmann/json.hpp>
27+
28+
#include "iceberg/file_io.h"
29+
#include "iceberg/json_internal.h"
2630
#include "iceberg/partition_spec.h"
2731
#include "iceberg/result.h"
2832
#include "iceberg/schema.h"
2933
#include "iceberg/sort_order.h"
34+
#include "iceberg/util/macros.h"
3035
namespace iceberg {
3136

3237
std::string ToString(const SnapshotLogEntry& entry) {
@@ -69,4 +74,35 @@ Result<std::shared_ptr<SortOrder>> TableMetadata::SortOrder() const {
6974
return *iter;
7075
}
7176

77+
Result<MetadataFileCodecType> TableMetadataUtil::FromFileName(
78+
std::string_view file_name) {
79+
if (file_name.find(".metadata.json") == std::string::npos) {
80+
return InvalidArgument("{} is not a valid metadata file", file_name);
81+
}
82+
83+
// We have to be backward-compatible with .metadata.json.gz files
84+
if (file_name.ends_with(".metadata.json.gz")) {
85+
return MetadataFileCodecType::kGzip;
86+
}
87+
88+
std::string_view file_name_without_suffix =
89+
file_name.substr(0, file_name.find_last_of(".metadata.json"));
90+
if (file_name_without_suffix.ends_with(".gz")) {
91+
return MetadataFileCodecType::kGzip;
92+
}
93+
return MetadataFileCodecType::kNone;
94+
}
95+
96+
Result<std::unique_ptr<TableMetadata>> TableMetadataUtil::Read(
97+
FileIO& io, const std::string& location, std::optional<size_t> length) {
98+
ICEBERG_ASSIGN_OR_RAISE(auto codec_type, FromFileName(location));
99+
if (codec_type == MetadataFileCodecType::kGzip) {
100+
return NotImplemented("Reading gzip-compressed metadata files is not supported yet");
101+
}
102+
103+
ICEBERG_ASSIGN_OR_RAISE(auto content, io.ReadFile(location, length));
104+
ICEBERG_ASSIGN_OR_RAISE(auto json, FromJsonString(content));
105+
return TableMetadataFromJson(json);
106+
}
107+
72108
} // namespace iceberg

src/iceberg/table_metadata.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#include <memory>
2626
#include <string>
27+
#include <string_view>
2728
#include <unordered_map>
2829
#include <vector>
2930

@@ -140,4 +141,29 @@ ICEBERG_EXPORT std::string ToString(const SnapshotLogEntry& entry);
140141
/// \brief Returns a string representation of a MetadataLogEntry
141142
ICEBERG_EXPORT std::string ToString(const MetadataLogEntry& entry);
142143

144+
/// \brief The codec type of the table metadata file.
145+
ICEBERG_EXPORT enum class MetadataFileCodecType {
146+
kNone,
147+
kGzip,
148+
};
149+
150+
/// \brief Utility class for table metadata
151+
struct ICEBERG_EXPORT TableMetadataUtil {
152+
/// \brief Get the codec type from the table metadata file name.
153+
///
154+
/// \param file_name The name of the table metadata file.
155+
/// \return The codec type of the table metadata file.
156+
static Result<MetadataFileCodecType> FromFileName(std::string_view file_name);
157+
158+
/// \brief Read the table metadata file.
159+
///
160+
/// \param io The file IO to use to read the table metadata.
161+
/// \param location The location of the table metadata file.
162+
/// \param length The optional length of the table metadata file.
163+
/// \return The table metadata.
164+
static Result<std::unique_ptr<TableMetadata>> Read(class FileIO& io,
165+
const std::string& location,
166+
std::optional<size_t> length);
167+
};
168+
143169
} // namespace iceberg

0 commit comments

Comments
 (0)