Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 8 additions & 30 deletions src/iceberg/statistics_file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

#include <format>

#include "iceberg/util/formatter.h"

namespace iceberg {

std::string ToString(const BlobMetadata& blob_metadata) {
Expand All @@ -29,26 +31,9 @@ std::string ToString(const BlobMetadata& blob_metadata) {
"type='{}',sourceSnapshotId={},sourceSnapshotSequenceNumber={},",
blob_metadata.type, blob_metadata.source_snapshot_id,
blob_metadata.source_snapshot_sequence_number);
std::format_to(std::back_inserter(repr), "fields=[");
for (auto iter = blob_metadata.fields.cbegin(); iter != blob_metadata.fields.cend();
++iter) {
if (iter != blob_metadata.fields.cbegin()) {
std::format_to(std::back_inserter(repr), ",{}", *iter);
} else {
std::format_to(std::back_inserter(repr), "{}", *iter);
}
}
std::format_to(std::back_inserter(repr), "],properties=[");
for (auto iter = blob_metadata.properties.cbegin();
iter != blob_metadata.properties.cend(); ++iter) {
const auto& [key, value] = *iter;
if (iter != blob_metadata.properties.cbegin()) {
std::format_to(std::back_inserter(repr), ",{}:{}", key, value);
} else {
std::format_to(std::back_inserter(repr), "{}:{}", key, value);
}
}
repr += "]]";
std::format_to(std::back_inserter(repr), "fields={},", blob_metadata.fields);
std::format_to(std::back_inserter(repr), "properties={}", blob_metadata.properties);
std::format_to(std::back_inserter(repr), "]");
return repr;
}

Expand All @@ -59,16 +44,9 @@ std::string ToString(const StatisticsFile& statistics_file) {
statistics_file.snapshot_id, statistics_file.path,
statistics_file.file_size_in_bytes,
statistics_file.file_footer_size_in_bytes);
std::format_to(std::back_inserter(repr), "blobMetadata=[");
for (auto iter = statistics_file.blob_metadata.cbegin();
iter != statistics_file.blob_metadata.cend(); ++iter) {
if (iter != statistics_file.blob_metadata.cbegin()) {
std::format_to(std::back_inserter(repr), ",{}", ToString(*iter));
} else {
std::format_to(std::back_inserter(repr), "{}", ToString(*iter));
}
}
repr += "]]";
std::format_to(std::back_inserter(repr), "blobMetadata={}",
statistics_file.blob_metadata);
std::format_to(std::back_inserter(repr), "]");
return repr;
}

Expand Down
102 changes: 102 additions & 0 deletions src/iceberg/util/formatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@

#include <concepts>
#include <format>
#include <map>
#include <string_view>
#include <unordered_map>
#include <vector>

#include "iceberg/util/formattable.h"

Expand All @@ -40,3 +43,102 @@ struct std::formatter<Derived> : std::formatter<std::string_view> {
return std::formatter<string_view>::format(obj.ToString(), ctx);
}
};

/// \brief std::formatter specialization for std::vector
template <typename T>
struct std::formatter<std::vector<T>> : std::formatter<std::string_view> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, I think the "idiomatic" way is supposed to be std::format::join and ranges. I wonder if specializing on std types may come back to bite us later.

Example: https://stackoverflow.com/a/77993463

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or another example, albeit C++23: https://stackoverflow.com/a/71196142

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to rename this file to formatter_internal.h so we don't expose it to the downstream. In this approach, we take full control of it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SGTM.

It might be good to expose formatter specializations on our own types, just so long as they're separate from the specializations for std types

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok I see that's what you've done.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes :)

template <class FormatContext>
auto format(const std::vector<T>& vec, FormatContext& ctx) const {
std::string result = "[";

bool first = true;
for (const auto& item : vec) {
if (!first) {
std::format_to(std::back_inserter(result), ", ");
}
if constexpr (requires { *item; }) {
if (item) {
std::format_to(std::back_inserter(result), "{}", *item);
} else {
std::format_to(std::back_inserter(result), "null");
}
} else {
std::format_to(std::back_inserter(result), "{}", item);
}
first = false;
}

std::format_to(std::back_inserter(result), "]");
return std::formatter<std::string_view>::format(result, ctx);
}
};

/// \brief Helper template for formatting map-like containers
template <typename MapType>
std::string FormatMap(const MapType& map) {
std::string result = "{";

bool first = true;
for (const auto& [key, value] : map) {
if (!first) {
std::format_to(std::back_inserter(result), ", ");
}

// Format key (handle if it's a smart pointer)
if constexpr (requires { *key; }) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this also fire on something like std::optional that might be undesirable?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've modified the concept to strictly check smart pointer type.

if (key) {
std::format_to(std::back_inserter(result), "{}: ", *key);
} else {
std::format_to(std::back_inserter(result), "null: ");
}
} else {
std::format_to(std::back_inserter(result), "{}: ", key);
}

// Format value (handle if it's a smart pointer)
if constexpr (requires { *value; }) {
if (value) {
std::format_to(std::back_inserter(result), "{}", *value);
} else {
std::format_to(std::back_inserter(result), "null");
}
} else {
std::format_to(std::back_inserter(result), "{}", value);
}

first = false;
}

std::format_to(std::back_inserter(result), "}}");
return result;
}

/// \brief std::formatter specialization for std::map
template <typename K, typename V>
struct std::formatter<std::map<K, V>> : std::formatter<std::string_view> {
template <class FormatContext>
auto format(const std::map<K, V>& map, FormatContext& ctx) const {
return std::formatter<std::string_view>::format(FormatMap(map), ctx);
}
};

/// \brief std::formatter specialization for std::unordered_map
template <typename K, typename V>
struct std::formatter<std::unordered_map<K, V>> : std::formatter<std::string_view> {
template <class FormatContext>
auto format(const std::unordered_map<K, V>& map, FormatContext& ctx) const {
return std::formatter<std::string_view>::format(FormatMap(map), ctx);
}
};

/// \brief std::formatter specialization for any type that has a ToString function
template <typename T>
requires requires(const T& t) {
{ ToString(t) } -> std::convertible_to<std::string>;
}
struct std::formatter<T> : std::formatter<std::string_view> {
template <class FormatContext>
auto format(const T& value, FormatContext& ctx) const {
return std::formatter<std::string_view>::format(ToString(value), ctx);
}
};
10 changes: 5 additions & 5 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@ target_sources(schema_test
target_link_libraries(schema_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock)
add_test(NAME schema_test COMMAND schema_test)

add_executable(expected_test)
target_sources(expected_test PRIVATE expected_test.cc)
target_link_libraries(expected_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock)
add_test(NAME expected_test COMMAND expected_test)

add_executable(expression_test)
target_sources(expression_test PRIVATE expression_test.cc)
target_link_libraries(expression_test PRIVATE iceberg_static GTest::gtest_main
Expand All @@ -61,6 +56,11 @@ target_link_libraries(json_serde_test PRIVATE iceberg_static GTest::gtest_main
GTest::gmock)
add_test(NAME json_serde_test COMMAND json_serde_test)

add_executable(util_test)
target_sources(util_test PRIVATE expected_test.cc formatter_test.cc)
target_link_libraries(util_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock)
add_test(NAME util_test COMMAND util_test)

if(ICEBERG_BUILD_BUNDLE)
add_executable(avro_test)
target_sources(avro_test PRIVATE avro_test.cc)
Expand Down
163 changes: 163 additions & 0 deletions test/formatter_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/util/formatter.h"

#include <format>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include <gtest/gtest.h>

#include "iceberg/statistics_file.h"

namespace iceberg {

// Tests for the std::format specializations
TEST(FormatterTest, VectorFormat) {
std::vector<int> empty;
EXPECT_EQ("[]", std::format("{}", empty));

std::vector<int> nums = {1, 2, 3, 4, 5};
EXPECT_EQ("[1, 2, 3, 4, 5]", std::format("{}", nums));

std::vector<std::string> names = {"Alice", "Bob", "Charlie"};
EXPECT_EQ("[Alice, Bob, Charlie]", std::format("{}", names));
}

TEST(FormatterTest, MapFormat) {
std::map<std::string, int> empty;
EXPECT_EQ("{}", std::format("{}", empty));

std::map<std::string, int> ages = {{"Alice", 30}, {"Bob", 25}, {"Charlie", 35}};
EXPECT_EQ("{Alice: 30, Bob: 25, Charlie: 35}", std::format("{}", ages));
}

TEST(FormatterTest, UnorderedMapFormat) {
std::unordered_map<std::string, double> empty;
EXPECT_EQ("{}", std::format("{}", empty));

std::unordered_map<std::string, double> scores = {
{"Alice", 95.5}, {"Bob", 87.0}, {"Charlie", 92.3}};
std::string str = std::format("{}", scores);
EXPECT_TRUE(str.find("Alice: 95.5") != std::string::npos);
EXPECT_TRUE(str.find("Bob: 87") != std::string::npos);
EXPECT_TRUE(str.find("Charlie: 92.3") != std::string::npos);
}

TEST(FormatterTest, NestedContainersFormat) {
std::vector<std::map<std::string, int>> nested = {{{"a", 1}, {"b", 2}},
{{"c", 3}, {"d", 4}}};

EXPECT_EQ("[{a: 1, b: 2}, {c: 3, d: 4}]", std::format("{}", nested));

std::map<std::string, std::vector<int>> nested_map = {
{"primes", {2, 3, 5, 7, 11}}, {"fibonacci", {1, 1, 2, 3, 5, 8, 13}}};
std::string result = std::format("{}", nested_map);
EXPECT_TRUE(result.find("primes") != std::string::npos);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that you can do EXPECT_THAT(result, testing::HasSubstr("primes")); which will also give a much better error on failure

EXPECT_TRUE(result.find("fibonacci") != std::string::npos);
EXPECT_TRUE(result.find("[2, 3, 5, 7, 11]") != std::string::npos);
EXPECT_TRUE(result.find("[1, 1, 2, 3, 5, 8, 13]") != std::string::npos);
}

TEST(FormatterTest, EdgeCasesFormat) {
std::vector<int> single_vec = {42};
EXPECT_EQ("[42]", std::format("{}", single_vec));

std::map<std::string, int> single_map = {{"key", 42}};
EXPECT_EQ("{key: 42}", std::format("{}", single_map));

std::vector<std::vector<int>> nested_empty = {{}, {1, 2}, {}};
EXPECT_EQ("[[], [1, 2], []]", std::format("{}", nested_empty));
}

TEST(FormatterTest, SmartPointerFormat) {
std::vector<std::shared_ptr<int>> int_ptrs;
int_ptrs.push_back(std::make_shared<int>(42));
int_ptrs.push_back(std::make_shared<int>(123));
int_ptrs.push_back(nullptr);
EXPECT_EQ("[42, 123, null]", std::format("{}", int_ptrs));

std::vector<std::shared_ptr<std::string>> str_ptrs;
str_ptrs.push_back(std::make_shared<std::string>("hello"));
str_ptrs.push_back(std::make_shared<std::string>("world"));
str_ptrs.push_back(nullptr);
EXPECT_EQ("[hello, world, null]", std::format("{}", str_ptrs));

std::map<std::string, std::shared_ptr<int>> map_with_ptr_values;
map_with_ptr_values["one"] = std::make_shared<int>(1);
map_with_ptr_values["two"] = std::make_shared<int>(2);
map_with_ptr_values["null"] = nullptr;
EXPECT_EQ("{null: null, one: 1, two: 2}", std::format("{}", map_with_ptr_values));

std::unordered_map<std::string, std::shared_ptr<double>> scores;
scores["Alice"] = std::make_shared<double>(95.5);
scores["Bob"] = std::make_shared<double>(87.0);
scores["Charlie"] = nullptr;
std::string str = std::format("{}", scores);
EXPECT_TRUE(str.find("Alice: 95.5") != std::string::npos);
EXPECT_TRUE(str.find("Bob: 87") != std::string::npos);
EXPECT_TRUE(str.find("Charlie: null") != std::string::npos);

std::vector<std::map<std::string, std::shared_ptr<int>>> nested;
std::map<std::string, std::shared_ptr<int>> map1;
map1["a"] = std::make_shared<int>(1);
map1["b"] = std::make_shared<int>(2);
std::map<std::string, std::shared_ptr<int>> map2;
map2["c"] = std::make_shared<int>(3);
map2["d"] = nullptr;
nested.push_back(std::move(map1));
nested.push_back(std::move(map2));
EXPECT_EQ("[{a: 1, b: 2}, {c: 3, d: null}]", std::format("{}", nested));
}

TEST(FormatterTest, StatisticsFileFormat) {
StatisticsFile statistics_file{
.snapshot_id = 123,
.path = "test_path",
.file_size_in_bytes = 100,
.file_footer_size_in_bytes = 20,
.blob_metadata = {BlobMetadata{.type = "type1",
.source_snapshot_id = 1,
.source_snapshot_sequence_number = 1,
.fields = {1, 2, 3},
.properties = {{"key1", "value1"}}},
BlobMetadata{.type = "type2",
.source_snapshot_id = 2,
.source_snapshot_sequence_number = 2,
.fields = {4, 5, 6},
.properties = {}}}};

const std::string expected =
"StatisticsFile["
"snapshotId=123,path=test_path,fileSizeInBytes=100,fileFooterSizeInBytes=20,"
"blobMetadata=["
"BlobMetadata[type='type1',sourceSnapshotId=1,sourceSnapshotSequenceNumber=1,"
"fields=[1, 2, 3],properties={key1: value1}], "
"BlobMetadata[type='type2',sourceSnapshotId=2,sourceSnapshotSequenceNumber=2,"
"fields=[4, 5, 6],properties={}]"
"]"
"]";
EXPECT_EQ(expected, std::format("{}", statistics_file));
}

} // namespace iceberg
Loading