Skip to content

Commit 1274056

Browse files
committed
fix: make Uuid a wrapper class
1 parent 7ca5ee8 commit 1274056

File tree

5 files changed

+112
-54
lines changed

5 files changed

+112
-54
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ set(ICEBERG_SOURCES
5353
util/murmurhash3_internal.cc
5454
util/timepoint.cc
5555
util/gzip_internal.cc
56-
util/uuid_util.cc)
56+
util/uuid.cc)
5757

5858
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
5959
set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS)

src/iceberg/util/uuid_util.cc renamed to src/iceberg/util/uuid.cc

Lines changed: 50 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
#include "iceberg/util/uuid_util.h"
20+
#include "iceberg/util/uuid.h"
2121

2222
#include <chrono>
2323
#include <cstdint>
@@ -32,7 +32,9 @@
3232

3333
namespace iceberg {
3434

35-
std::array<uint8_t, 16> UUIDUtils::GenerateUuidV4() {
35+
Uuid::Uuid(std::array<uint8_t, kUuidSize> data) : data_(std::move(data)) {}
36+
37+
Uuid Uuid::GenerateV4() {
3638
static std::random_device rd;
3739
static std::mt19937 gen(rd());
3840
static std::uniform_int_distribution<uint64_t> distrib(
@@ -55,20 +57,20 @@ std::array<uint8_t, 16> UUIDUtils::GenerateUuidV4() {
5557
// Set variant field, top two bits are 1, 0
5658
uuid[8] = (uuid[8] & 0x3F) | 0x80;
5759

58-
return uuid;
60+
return Uuid(std::move(uuid));
5961
}
6062

61-
std::array<uint8_t, 16> UUIDUtils::GenerateUuidV7() {
63+
Uuid Uuid::GenerateV7() {
6264
// Get the current time in milliseconds since the Unix epoch
6365
auto now = std::chrono::system_clock::now();
6466
auto duration_since_epoch = now.time_since_epoch();
6567
auto unix_ts_ms =
6668
std::chrono::duration_cast<std::chrono::milliseconds>(duration_since_epoch).count();
6769

68-
return GenerateUuidV7(static_cast<uint64_t>(unix_ts_ms));
70+
return GenerateV7(static_cast<uint64_t>(unix_ts_ms));
6971
}
7072

71-
std::array<uint8_t, 16> UUIDUtils::GenerateUuidV7(uint64_t unix_ts_ms) {
73+
Uuid Uuid::GenerateV7(uint64_t unix_ts_ms) {
7274
std::array<uint8_t, 16> uuid = {};
7375

7476
// Set the timestamp (in milliseconds since Unix epoch)
@@ -98,14 +100,14 @@ std::array<uint8_t, 16> UUIDUtils::GenerateUuidV7(uint64_t unix_ts_ms) {
98100
// set variant field, top two bits are 1, 0
99101
uuid[8] = (uuid[8] & 0x3F) | 0x80;
100102

101-
return uuid;
103+
return Uuid(std::move(uuid));
102104
}
103105

104106
namespace {
105107

106108
constexpr std::array<uint8_t, 256> BuildHexTable() {
107109
std::array<uint8_t, 256> buf{};
108-
for (int i = 0; i < 256; i++) {
110+
for (int32_t i = 0; i < 256; i++) {
109111
if (i >= '0' && i <= '9') {
110112
buf[i] = static_cast<uint8_t>(i - '0');
111113
} else if (i >= 'a' && i <= 'f') {
@@ -121,35 +123,35 @@ constexpr std::array<uint8_t, 256> BuildHexTable() {
121123

122124
constexpr std::array<uint8_t, 256> BuildShl4Table() {
123125
std::array<uint8_t, 256> buf{};
124-
for (int i = 0; i < 256; i++) {
126+
for (int32_t i = 0; i < 256; i++) {
125127
buf[i] = static_cast<uint8_t>(i << 4);
126128
}
127129
return buf;
128130
}
129131

130-
constexpr auto HEX_TABLE = BuildHexTable();
131-
constexpr auto SHL4_TABLE = BuildShl4Table();
132+
constexpr auto kHexTable = BuildHexTable();
133+
constexpr auto kShl4Table = BuildShl4Table();
132134

133135
// Parse a UUID string without dashes, e.g. "67e5504410b1426f9247bb680e5fe0c8"
134-
inline Result<std::array<uint8_t, 16>> ParseSimple(std::string_view s) {
136+
inline Result<Uuid> ParseSimple(std::string_view s) {
135137
ICEBERG_DCHECK(s.size() == 32, "s must be 32 characters long");
136138

137-
std::array<uint8_t, 16> buf{};
139+
std::array<uint8_t, 16> uuid{};
138140
for (size_t i = 0; i < 16; i++) {
139-
uint8_t h1 = HEX_TABLE[static_cast<uint8_t>(s[i * 2])];
140-
uint8_t h2 = HEX_TABLE[static_cast<uint8_t>(s[i * 2 + 1])];
141+
uint8_t h1 = kHexTable[static_cast<uint8_t>(s[i * 2])];
142+
uint8_t h2 = kHexTable[static_cast<uint8_t>(s[i * 2 + 1])];
141143

142-
if ((h1 | h2) == 0xff) {
144+
if ((h1 | h2) == 0xFF) {
143145
return InvalidArgument("Invalid UUID string: {}", s);
144146
}
145147

146-
buf[i] = static_cast<uint8_t>(SHL4_TABLE[h1] | h2);
148+
uuid[i] = static_cast<uint8_t>(kShl4Table[h1] | h2);
147149
}
148-
return buf;
150+
return Uuid(std::move(uuid));
149151
}
150152

151153
// Parse a UUID string with dashes, e.g. "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
152-
inline Result<std::array<uint8_t, 16>> ParseHyphenated(std::string_view s) {
154+
inline Result<Uuid> ParseHyphenated(std::string_view s) {
153155
ICEBERG_DCHECK(s.size() == 36, "s must be 36 characters long");
154156

155157
// Check that dashes are in the right places
@@ -158,29 +160,29 @@ inline Result<std::array<uint8_t, 16>> ParseHyphenated(std::string_view s) {
158160
}
159161

160162
constexpr std::array<size_t, 8> positions = {0, 4, 9, 14, 19, 24, 28, 32};
161-
std::array<uint8_t, 16> buf{};
163+
std::array<uint8_t, 16> uuid{};
162164

163165
for (size_t j = 0; j < 8; j++) {
164166
size_t i = positions[j];
165-
uint8_t h1 = HEX_TABLE[static_cast<uint8_t>(s[i])];
166-
uint8_t h2 = HEX_TABLE[static_cast<uint8_t>(s[i + 1])];
167-
uint8_t h3 = HEX_TABLE[static_cast<uint8_t>(s[i + 2])];
168-
uint8_t h4 = HEX_TABLE[static_cast<uint8_t>(s[i + 3])];
167+
uint8_t h1 = kHexTable[static_cast<uint8_t>(s[i])];
168+
uint8_t h2 = kHexTable[static_cast<uint8_t>(s[i + 1])];
169+
uint8_t h3 = kHexTable[static_cast<uint8_t>(s[i + 2])];
170+
uint8_t h4 = kHexTable[static_cast<uint8_t>(s[i + 3])];
169171

170-
if ((h1 | h2 | h3 | h4) == 0xff) {
172+
if ((h1 | h2 | h3 | h4) == 0xFF) {
171173
return InvalidArgument("Invalid UUID string: {}", s);
172174
}
173175

174-
buf[j * 2] = static_cast<uint8_t>(SHL4_TABLE[h1] | h2);
175-
buf[j * 2 + 1] = static_cast<uint8_t>(SHL4_TABLE[h3] | h4);
176+
uuid[j * 2] = static_cast<uint8_t>(kShl4Table[h1] | h2);
177+
uuid[j * 2 + 1] = static_cast<uint8_t>(kShl4Table[h3] | h4);
176178
}
177179

178-
return buf;
180+
return Uuid(std::move(uuid));
179181
}
180182

181183
} // namespace
182184

183-
Result<std::array<uint8_t, 16>> UUIDUtils::FromString(std::string_view str) {
185+
Result<Uuid> Uuid::FromString(std::string_view str) {
184186
if (str.size() == 32) {
185187
return ParseSimple(str);
186188
} else if (str.size() == 36) {
@@ -190,15 +192,30 @@ Result<std::array<uint8_t, 16>> UUIDUtils::FromString(std::string_view str) {
190192
}
191193
}
192194

193-
std::string UUIDUtils::ToString(std::span<uint8_t> uuid) {
195+
Result<Uuid> Uuid::FromBytes(std::span<const uint8_t> bytes) {
196+
if (bytes.size() != kUuidSize) [[unlikely]] {
197+
return InvalidArgument("UUID byte array must be exactly {} bytes, was {}", kUuidSize,
198+
bytes.size());
199+
}
200+
std::array<uint8_t, kUuidSize> data;
201+
std::memcpy(data.data(), bytes.data(), kUuidSize);
202+
return Uuid(std::move(data));
203+
}
204+
205+
uint8_t Uuid::operator[](size_t index) const {
206+
ICEBERG_CHECK(index < kUuidSize, "UUID index out of range: {}", index);
207+
return data_[index];
208+
}
209+
210+
std::string Uuid::ToString() const {
194211
static const char* hex_chars = "0123456789abcdef";
195-
ICEBERG_CHECK(uuid.size() == 16, "uuid must be 16 bytes long");
196212

197213
return std::format(
198214
"{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}"
199215
"{:02x}{:02x}{:02x}",
200-
uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7], uuid[8],
201-
uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]);
216+
data_[0], data_[1], data_[2], data_[3], data_[4], data_[5], data_[6], data_[7],
217+
data_[8], data_[9], data_[10], data_[11], data_[12], data_[13], data_[14],
218+
data_[15]);
202219
}
203220

204221
} // namespace iceberg

src/iceberg/util/uuid_util.h renamed to src/iceberg/util/uuid.h

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,23 @@
2727
#include "iceberg/iceberg_export.h"
2828
#include "iceberg/result.h"
2929

30-
/// \file iceberg/util/uuid_util.h
31-
/// \brief UUID (Universally Unique Identifier) utilities.
30+
/// \file iceberg/util/uuid.h
31+
/// \brief UUID (Universally Unique Identifier) representation.
3232

3333
namespace iceberg {
3434

35-
class ICEBERG_EXPORT UUIDUtils {
35+
class ICEBERG_EXPORT Uuid {
3636
public:
37+
Uuid() = delete;
38+
constexpr static size_t kUuidSize = 16;
39+
40+
explicit Uuid(std::array<uint8_t, kUuidSize> data);
41+
3742
/// \brief Generate a random UUID (version 4).
38-
static std::array<uint8_t, 16> GenerateUuidV4();
43+
static Uuid GenerateV4();
3944

4045
/// \brief Generate UUID version 7 per RFC 9562, with the current timestamp.
41-
static std::array<uint8_t, 16> GenerateUuidV7();
46+
static Uuid GenerateV7();
4247

4348
/// \brief Generate UUID version 7 per RFC 9562, with the given timestamp.
4449
///
@@ -48,13 +53,29 @@ class ICEBERG_EXPORT UUIDUtils {
4853
/// \param unix_ts_ms number of milliseconds since start of the UNIX epoch
4954
///
5055
/// \note unix_ts_ms cannot be negative per RFC.
51-
static std::array<uint8_t, 16> GenerateUuidV7(uint64_t unix_ts_ms);
56+
static Uuid GenerateV7(uint64_t unix_ts_ms);
5257

5358
/// \brief Create a UUID from a string in standard format.
54-
static Result<std::array<uint8_t, 16>> FromString(std::string_view str);
59+
static Result<Uuid> FromString(std::string_view str);
60+
61+
/// \brief Create a UUID from a 16-byte array.
62+
static Result<Uuid> FromBytes(std::span<const uint8_t> bytes);
63+
64+
/// \brief Get the raw bytes of the UUID.
65+
std::span<const uint8_t> bytes() const { return data_; }
66+
67+
/// \brief Access individual bytes of the UUID.
68+
uint8_t operator[](size_t index) const;
69+
70+
/// \brief Convert the UUID to a string in standard format.
71+
std::string ToString() const;
72+
73+
friend bool operator==(const Uuid& lhs, const Uuid& rhs) {
74+
return lhs.data_ == rhs.data_;
75+
}
5576

56-
/// \brief Convert a UUID to a string in standard format.
57-
static std::string ToString(std::span<uint8_t> uuid);
77+
private:
78+
std::array<uint8_t, kUuidSize> data_;
5879
};
5980

6081
} // namespace iceberg

test/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ add_iceberg_test(util_test
9191
endian_test.cc
9292
formatter_test.cc
9393
string_util_test.cc
94-
uuid_util_test.cc
94+
uuid_test.cc
9595
visit_type_test.cc)
9696

9797
add_iceberg_test(roaring_test SOURCES roaring_test.cc)

test/uuid_util_test.cc renamed to test/uuid_test.cc

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
#include "iceberg/util/uuid_util.h"
20+
#include "iceberg/util/uuid.h"
2121

2222
#include <vector>
2323

@@ -28,19 +28,19 @@
2828
namespace iceberg {
2929

3030
TEST(UUIDUtilTest, GenerateV4) {
31-
auto uuid = UUIDUtils::GenerateUuidV4();
31+
auto uuid = Uuid::GenerateV4();
3232
// just ensure it runs and produces a value
33-
EXPECT_EQ(uuid.size(), 16);
33+
EXPECT_EQ(uuid.bytes().size(), Uuid::kUuidSize);
3434
// Version 4 UUIDs have the version number (4) in the 7th byte
3535
EXPECT_EQ((uuid[6] >> 4) & 0x0F, 4);
3636
// Variant is in the 9th byte, the two most significant bits should be 10
3737
EXPECT_EQ((uuid[8] >> 6) & 0x03, 0b10);
3838
}
3939

4040
TEST(UUIDUtilTest, GenerateV7) {
41-
auto uuid = UUIDUtils::GenerateUuidV7();
41+
auto uuid = Uuid::GenerateV7();
4242
// just ensure it runs and produces a value
43-
EXPECT_EQ(uuid.size(), 16);
43+
EXPECT_EQ(uuid.bytes().size(), 16);
4444
// Version 7 UUIDs have the version number (7) in the 7th byte
4545
EXPECT_EQ((uuid[6] >> 4) & 0x0F, 7);
4646
// Variant is in the 9th byte, the two most significant bits should be 10
@@ -55,10 +55,10 @@ TEST(UUIDUtilTest, FromString) {
5555
};
5656

5757
for (const auto& uuid_str : uuid_strings) {
58-
auto result = UUIDUtils::FromString(uuid_str);
58+
auto result = Uuid::FromString(uuid_str);
5959
EXPECT_THAT(result, IsOk());
6060
auto uuid = result.value();
61-
EXPECT_EQ(UUIDUtils::ToString(uuid), uuid_str);
61+
EXPECT_EQ(uuid.ToString(), uuid_str);
6262
}
6363

6464
std::vector<std::pair<std::string, std::string>> uuid_string_pairs = {
@@ -68,10 +68,10 @@ TEST(UUIDUtilTest, FromString) {
6868
};
6969

7070
for (const auto& [input_str, expected_str] : uuid_string_pairs) {
71-
auto result = UUIDUtils::FromString(input_str);
71+
auto result = Uuid::FromString(input_str);
7272
EXPECT_THAT(result, IsOk());
7373
auto uuid = result.value();
74-
EXPECT_EQ(UUIDUtils::ToString(uuid), expected_str);
74+
EXPECT_EQ(uuid.ToString(), expected_str);
7575
}
7676
}
7777

@@ -88,10 +88,30 @@ TEST(UUIDUtilTest, FromStringInvalid) {
8888
};
8989

9090
for (const auto& uuid_str : invalid_uuid_strings) {
91-
auto result = UUIDUtils::FromString(uuid_str);
91+
auto result = Uuid::FromString(uuid_str);
9292
EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument));
9393
EXPECT_THAT(result, HasErrorMessage("Invalid UUID string"));
9494
}
9595
}
9696

97+
TEST(UUIDUtilTest, FromBytes) {
98+
std::array<uint8_t, Uuid::kUuidSize> bytes = {0x12, 0x3e, 0x45, 0x67, 0xe8, 0x9b,
99+
0x12, 0xd3, 0xa4, 0x56, 0x42, 0x66,
100+
0x14, 0x17, 0x40, 0x00};
101+
auto result = Uuid::FromBytes(bytes);
102+
EXPECT_THAT(result, IsOk());
103+
auto uuid = result.value();
104+
EXPECT_EQ(uuid.ToString(), "123e4567-e89b-12d3-a456-426614174000");
105+
EXPECT_EQ(uuid, Uuid(bytes));
106+
}
107+
108+
TEST(UUIDUtilTest, FromBytesInvalid) {
109+
std::array<uint8_t, Uuid::kUuidSize - 1> short_bytes = {0x12, 0x3e, 0x45, 0x67, 0xe8,
110+
0x9b, 0x12, 0xd3, 0xa4, 0x56,
111+
0x42, 0x66, 0x14, 0x17, 0x40};
112+
auto result = Uuid::FromBytes(short_bytes);
113+
EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument));
114+
EXPECT_THAT(result, HasErrorMessage("UUID byte array must be exactly 16 bytes"));
115+
}
116+
97117
} // namespace iceberg

0 commit comments

Comments
 (0)