Skip to content

Commit 2963f5b

Browse files
committed
Merge remote-tracking branch 'upstream/main' into move-tests
2 parents 7bc79de + a89b101 commit 2963f5b

File tree

13 files changed

+476
-131
lines changed

13 files changed

+476
-131
lines changed

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ project(Iceberg
2828
DESCRIPTION "Iceberg C++ Project"
2929
LANGUAGES CXX)
3030

31+
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/iceberg/version.h.in"
32+
"${CMAKE_BINARY_DIR}/src/iceberg/version.h")
33+
3134
set(CMAKE_CXX_STANDARD 23)
3235
set(CMAKE_CXX_STANDARD_REQUIRED ON)
3336
set(CMAKE_CXX_EXTENSIONS OFF)

LICENSE

Lines changed: 13 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -202,117 +202,28 @@
202202

203203
--------------------------------------------------------------------------------
204204

205-
3rdparty dependency nlohmann-json is statically linked in certain binary
206-
distributions. nlohmann-json has the following license:
205+
This product includes code from smhasher.
207206

208-
MIT License
207+
* src/iceberg/murmur3_internal.h and src/iceberg/murmur3_internal.cc are adapted from MurmurHash3.
209208

210-
Copyright (c) 2013-2022 Niels Lohmann
211-
212-
Permission is hereby granted, free of charge, to any person obtaining a copy
213-
of this software and associated documentation files (the "Software"), to deal
214-
in the Software without restriction, including without limitation the rights
215-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
216-
copies of the Software, and to permit persons to whom the Software is
217-
furnished to do so, subject to the following conditions:
218-
219-
The above copyright notice and this permission notice shall be included in all
220-
copies or substantial portions of the Software.
221-
222-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
223-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
224-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
225-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
226-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
227-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
228-
SOFTWARE.
229-
230-
--------------------------------------------------------------------------------
231-
232-
The file src/iceberg/murmur3_internal.h contains code adapted from
233-
234-
https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.h
235-
236-
The file src/iceberg/murmur3_internal.cc contains code adapted from
237-
238-
https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
239-
240-
MurmurHash3 was written by Austin Appleby, and is placed in the public
241-
domain. The author disclaims copyright to this source code.
209+
Copyright: Austin Appleby (placed in the public domain).
210+
Home page: https://github.com/aappleby/smhasher
211+
License: Public Domain
242212

243213
--------------------------------------------------------------------------------
244214

245-
The file src/iceberg/util/checked_cast.h contains code adapted from
246-
247-
https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/checked_cast.h
248-
249-
The file src/iceberg/util/visit_type.h contains code adapted from
250-
251-
https://github.com/apache/arrow/blob/main/cpp/src/arrow/visit_type_inline.h
252-
253-
The file src/iceberg/util/decimal.h contains code adapted from
254-
255-
https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.h
256-
257-
The file src/iceberg/util/decimal.cc contains code adapted from
215+
This product includes code from Apache Arrow.
258216

259-
https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.cc
217+
* Core utilities:
218+
* checked_cast utility in src/iceberg/util/checked_cast.h.
219+
* visit_type utility in src/iceberg/util/visit_type.h.
220+
* Decimal128 implementation details in src/iceberg/util/decimal files.
221+
* Build system modules:
222+
* cmake_modules/IcebergBuildUtils.cmake.
223+
* cmake_modules/IcebergThirdpartyToolchain.cmake.
260224

261225
Copyright: 2016-2025 The Apache Software Foundation.
262226
Home page: https://arrow.apache.org/
263227
License: https://www.apache.org/licenses/LICENSE-2.0
264228

265229
--------------------------------------------------------------------------------
266-
267-
3rdparty dependency spdlog is statically linked in certain binary
268-
distributions. spdlog has the following license:
269-
270-
MIT License
271-
272-
Copyright (c) 2016 Gabi Melman
273-
274-
Permission is hereby granted, free of charge, to any person obtaining a copy
275-
of this software and associated documentation files (the "Software"), to deal
276-
in the Software without restriction, including without limitation the rights
277-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
278-
copies of the Software, and to permit persons to whom the Software is
279-
furnished to do so, subject to the following conditions:
280-
281-
The above copyright notice and this permission notice shall be included in all
282-
copies or substantial portions of the Software.
283-
284-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
285-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
286-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
287-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
288-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
289-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
290-
SOFTWARE.
291-
292-
--------------------------------------------------------------------------------
293-
294-
3rdparty dependency zlib is used by certain binary distributions. zlib has
295-
the following license:
296-
297-
zlib License
298-
299-
Copyright (c) 1995-2024 Jean-loup Gailly and Mark Adler
300-
301-
This software is provided 'as-is', without any express or implied
302-
warranty. In no event will the authors be held liable for any damages
303-
arising from the use of this software.
304-
305-
Permission is granted to anyone to use this software for any purpose,
306-
including commercial applications, and to alter it and redistribute it
307-
freely, subject to the following restrictions:
308-
309-
1. The origin of this software must not be misrepresented; you must not
310-
claim that you wrote the original software. If you use this software
311-
in a product, an acknowledgment in the product documentation would be
312-
appreciated but is not required.
313-
2. Altered source versions must be plainly marked as such, and must not be
314-
misrepresented as being the original software.
315-
3. This notice may not be removed or altered from any source distribution.
316-
317-
Jean-loup Gailly Mark Adler
318-

NOTICE

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,3 @@ Copyright 2024-2025 The Apache Software Foundation
33

44
This product includes software developed at
55
The Apache Software Foundation (http://www.apache.org/).
6-
7-
This product includes code from smhasher
8-
* MurmurHash3 was written by Austin Appleby, and is placed in the public
9-
* domain. The author hereby disclaims copyright to this source code.
10-
* https://github.com/aappleby/smhasher
11-
12-
This product includes code from Apache Arrow
13-
* Copyright 2016-2025 The Apache Software Foundation
14-
* https://github.com/apache/arrow
15-
16-
This product includes software developed by Gabi Melman
17-
* spdlog: Very fast, header-only/compiled, C++ logging library
18-
* Copyright (c) 2016 Gabi Melman
19-
* https://github.com/gabime/spdlog
20-
21-
This product includes software developed by Jean-loup Gailly and Mark Adler
22-
* zlib: A Massively Spiffy Yet Delicately Unobtrusive Compression Library
23-
* Copyright (c) 1995-2024 Jean-loup Gailly and Mark Adler
24-
* https://zlib.net/

src/iceberg/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ set(ICEBERG_SOURCES
5252
util/decimal.cc
5353
util/murmurhash3_internal.cc
5454
util/timepoint.cc
55-
util/gzip_internal.cc)
55+
util/gzip_internal.cc
56+
util/uuid.cc)
5657

5758
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
5859
set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS)

src/iceberg/json_internal.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
#include <cstdint>
2424
#include <format>
2525
#include <regex>
26-
#include <type_traits>
2726
#include <unordered_set>
2827
#include <utility>
2928

@@ -351,8 +350,8 @@ nlohmann::json ToJson(const SortField& sort_field) {
351350
nlohmann::json json;
352351
json[kTransform] = std::format("{}", *sort_field.transform());
353352
json[kSourceId] = sort_field.source_id();
354-
json[kDirection] = SortDirectionToString(sort_field.direction());
355-
json[kNullOrder] = NullOrderToString(sort_field.null_order());
353+
json[kDirection] = std::format("{}", sort_field.direction());
354+
json[kNullOrder] = std::format("{}", sort_field.null_order());
356355
return json;
357356
}
358357

@@ -491,7 +490,7 @@ nlohmann::json ToJson(const Schema& schema) {
491490
nlohmann::json ToJson(const SnapshotRef& ref) {
492491
nlohmann::json json;
493492
json[kSnapshotId] = ref.snapshot_id;
494-
json[kType] = SnapshotRefTypeToString(ref.type());
493+
json[kType] = std::format("{}", ref.type());
495494
if (ref.type() == SnapshotRefType::kBranch) {
496495
const auto& branch = std::get<SnapshotRef::Branch>(ref.retention);
497496
SetOptionalField(json, kMinSnapshotsToKeep, branch.min_snapshots_to_keep);

src/iceberg/snapshot.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ enum class SnapshotRefType {
4343
};
4444

4545
/// \brief Get the relative snapshot reference type name
46-
ICEBERG_EXPORT constexpr std::string_view SnapshotRefTypeToString(
47-
SnapshotRefType type) noexcept {
46+
ICEBERG_EXPORT constexpr std::string_view ToString(SnapshotRefType type) noexcept {
4847
switch (type) {
4948
case SnapshotRefType::kBranch:
5049
return "branch";

src/iceberg/sort_field.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ NullOrder SortField::null_order() const { return null_order_; }
4444
std::string SortField::ToString() const {
4545
return std::format(
4646
"sort_field(source_id={}, transform={}, direction={}, null_order={})", source_id_,
47-
*transform_, SortDirectionToString(direction_), NullOrderToString(null_order_));
47+
*transform_, direction_, null_order_);
4848
}
4949

5050
bool SortField::Equals(const SortField& other) const {

src/iceberg/sort_field.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ enum class SortDirection {
4242
kDescending,
4343
};
4444
/// \brief Get the relative sort direction name
45-
ICEBERG_EXPORT constexpr std::string_view SortDirectionToString(SortDirection direction) {
45+
ICEBERG_EXPORT constexpr std::string_view ToString(SortDirection direction) {
4646
switch (direction) {
4747
case SortDirection::kAscending:
4848
return "asc";
@@ -67,7 +67,7 @@ enum class NullOrder {
6767
kLast,
6868
};
6969
/// \brief Get the relative null order name
70-
ICEBERG_EXPORT constexpr std::string_view NullOrderToString(NullOrder null_order) {
70+
ICEBERG_EXPORT constexpr std::string_view ToString(NullOrder null_order) {
7171
switch (null_order) {
7272
case NullOrder::kFirst:
7373
return "nulls-first";

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ add_iceberg_test(util_test
9090
endian_test.cc
9191
formatter_test.cc
9292
string_util_test.cc
93+
uuid_test.cc
9394
visit_type_test.cc)
9495

9596
add_iceberg_test(roaring_test SOURCES roaring_test.cc)

src/iceberg/test/uuid_test.cc

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/util/uuid.h"
21+
22+
#include <vector>
23+
24+
#include <gtest/gtest.h>
25+
26+
#include "matchers.h"
27+
28+
namespace iceberg {
29+
30+
TEST(UUIDUtilTest, GenerateV4) {
31+
auto uuid = Uuid::GenerateV4();
32+
// just ensure it runs and produces a value
33+
EXPECT_EQ(uuid.bytes().size(), Uuid::kLength);
34+
// Version 4 UUIDs have the version number (4) in the 7th byte
35+
EXPECT_EQ((uuid[6] >> 4) & 0x0F, 4);
36+
// Variant is in the 9th byte, the two most significant bits should be 10
37+
EXPECT_EQ((uuid[8] >> 6) & 0x03, 0b10);
38+
}
39+
40+
TEST(UUIDUtilTest, GenerateV7) {
41+
auto uuid = Uuid::GenerateV7();
42+
// just ensure it runs and produces a value
43+
EXPECT_EQ(uuid.bytes().size(), 16);
44+
// Version 7 UUIDs have the version number (7) in the 7th byte
45+
EXPECT_EQ((uuid[6] >> 4) & 0x0F, 7);
46+
// Variant is in the 9th byte, the two most significant bits should be 10
47+
EXPECT_EQ((uuid[8] >> 6) & 0x03, 0b10);
48+
}
49+
50+
TEST(UUIDUtilTest, FromString) {
51+
std::vector<std::string> uuid_strings = {
52+
"123e4567-e89b-12d3-a456-426614174000",
53+
"550e8400-e29b-41d4-a716-446655440000",
54+
"f47ac10b-58cc-4372-a567-0e02b2c3d479",
55+
};
56+
57+
for (const auto& uuid_str : uuid_strings) {
58+
auto result = Uuid::FromString(uuid_str);
59+
EXPECT_THAT(result, IsOk());
60+
auto uuid = result.value();
61+
EXPECT_EQ(uuid.ToString(), uuid_str);
62+
}
63+
64+
std::vector<std::pair<std::string, std::string>> uuid_string_pairs = {
65+
{"123e4567e89b12d3a456426614174000", "123e4567-e89b-12d3-a456-426614174000"},
66+
{"550E8400E29B41D4A716446655440000", "550e8400-e29b-41d4-a716-446655440000"},
67+
{"F47AC10B58CC4372A5670E02B2C3D479", "f47ac10b-58cc-4372-a567-0e02b2c3d479"},
68+
};
69+
70+
for (const auto& [input_str, expected_str] : uuid_string_pairs) {
71+
auto result = Uuid::FromString(input_str);
72+
EXPECT_THAT(result, IsOk());
73+
auto uuid = result.value();
74+
EXPECT_EQ(uuid.ToString(), expected_str);
75+
}
76+
}
77+
78+
TEST(UUIDUtilTest, FromStringInvalid) {
79+
std::vector<std::string> invalid_uuid_strings = {
80+
"123e4567-e89b-12d3-a456-42661417400", // too short
81+
"123e4567-e89b-12d3-a456-4266141740000", // too long
82+
"g23e4567-e89b-12d3-a456-426614174000", // invalid character
83+
"123e4567e89b12d3a45642661417400", // too short without dashes
84+
"123e4567e89b12d3a4564266141740000", // too long without dashes
85+
"550e8400-e29b-41d4-a716-44665544000Z", // invalid character at end
86+
"550e8400-e29b-41d4-a716-44665544000-", // invalid character at end
87+
"550e8400-e29b-41d4-a716-4466554400", // too short
88+
};
89+
90+
for (const auto& uuid_str : invalid_uuid_strings) {
91+
auto result = Uuid::FromString(uuid_str);
92+
EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument));
93+
EXPECT_THAT(result, HasErrorMessage("Invalid UUID string"));
94+
}
95+
}
96+
97+
TEST(UUIDUtilTest, FromBytes) {
98+
std::array<uint8_t, Uuid::kLength> bytes = {0x12, 0x3e, 0x45, 0x67, 0xe8, 0x9b,
99+
0x12, 0xd3, 0xa4, 0x56, 0x42, 0x66,
100+
0x14, 0x17, 0x40, 0x00};
101+
auto result = Uuid::FromBytes(bytes);
102+
EXPECT_THAT(result, IsOk());
103+
auto uuid = result.value();
104+
EXPECT_EQ(uuid.ToString(), "123e4567-e89b-12d3-a456-426614174000");
105+
EXPECT_EQ(uuid, Uuid(bytes));
106+
}
107+
108+
TEST(UUIDUtilTest, FromBytesInvalid) {
109+
std::array<uint8_t, Uuid::kLength - 1> short_bytes = {0x12, 0x3e, 0x45, 0x67, 0xe8,
110+
0x9b, 0x12, 0xd3, 0xa4, 0x56,
111+
0x42, 0x66, 0x14, 0x17, 0x40};
112+
auto result = Uuid::FromBytes(short_bytes);
113+
EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument));
114+
EXPECT_THAT(result, HasErrorMessage("UUID byte array must be exactly 16 bytes"));
115+
}
116+
117+
} // namespace iceberg

0 commit comments

Comments
 (0)