Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,14 @@ set(ICEBERG_SOURCES
transform.cc
transform_function.cc
type.cc
util/bucket_util.cc
util/conversions.cc
util/decimal.cc
util/gzip_internal.cc
util/murmurhash3_internal.cc
util/temporal_util.cc
util/timepoint.cc
util/truncate_util.cc
util/uuid.cc
v1_metadata.cc
v2_metadata.cc
Expand Down
26 changes: 24 additions & 2 deletions src/iceberg/expression/literal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
#include <cstdint>
#include <string>

#include "iceberg/type_fwd.h"
#include "iceberg/util/checked_cast.h"
#include "iceberg/util/conversions.h"
#include "iceberg/util/macros.h"

namespace iceberg {

Expand Down Expand Up @@ -188,11 +188,14 @@ Result<Literal> LiteralCaster::CastFromString(
const auto& str_val = std::get<std::string>(literal.value_);

switch (target_type->type_id()) {
case TypeId::kUuid: {
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(str_val));
return Literal::UUID(uuid);
}
case TypeId::kDate:
case TypeId::kTime:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
case TypeId::kUuid:
return NotImplemented("Cast from String to {} is not implemented yet",
target_type->ToString());
default:
Expand Down Expand Up @@ -296,6 +299,10 @@ Literal Literal::Fixed(std::vector<uint8_t> value) {
return {Value{std::move(value)}, fixed(size)};
}

Literal Literal::Decimal(int128_t value, int32_t precision, int32_t scale) {
return {Value{::iceberg::Decimal(value)}, decimal(precision, scale)};
}

Result<Literal> Literal::Deserialize(std::span<const uint8_t> data,
std::shared_ptr<PrimitiveType> type) {
return Conversions::FromBytes(std::move(type), data);
Expand Down Expand Up @@ -385,6 +392,15 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
return CompareFloat(this_val, other_val);
}

case TypeId::kDecimal: {
auto& this_val = std::get<::iceberg::Decimal>(value_);
auto& other_val = std::get<::iceberg::Decimal>(other.value_);
const auto& this_decimal_type = internal::checked_cast<DecimalType&>(*type_);
const auto& other_decimal_type = internal::checked_cast<DecimalType&>(*other.type_);
return ::iceberg::Decimal::Compare(this_val, other_val, this_decimal_type.scale(),
other_decimal_type.scale());
}

case TypeId::kString: {
auto& this_val = std::get<std::string>(value_);
auto& other_val = std::get<std::string>(other.value_);
Expand Down Expand Up @@ -440,6 +456,12 @@ std::string Literal::ToString() const {
case TypeId::kDouble: {
return std::to_string(std::get<double>(value_));
}
case TypeId::kDecimal: {
const auto& decimal_type = internal::checked_cast<DecimalType&>(*type_);
const auto& decimal = std::get<::iceberg::Decimal>(value_);
return decimal.ToString(decimal_type.scale())
.value_or("invalid literal of type decimal");
}
case TypeId::kString: {
return "\"" + std::get<std::string>(value_) + "\"";
}
Expand Down
17 changes: 14 additions & 3 deletions src/iceberg/expression/literal.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@

#include "iceberg/result.h"
#include "iceberg/type.h"
#include "iceberg/util/decimal.h"
#include "iceberg/util/formattable.h"
#include "iceberg/util/int128.h"
#include "iceberg/util/uuid.h"

namespace iceberg {
Expand Down Expand Up @@ -57,9 +59,9 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
float, // for float
double, // for double
std::string, // for string
Uuid, // for uuid
std::vector<uint8_t>, // for binary, fixed
std::array<uint8_t, 16>, // for decimal
std::vector<uint8_t>, // for binary, fixed
::iceberg::Decimal, // for decimal
Uuid, // for uuid
BelowMin, AboveMax>;

/// \brief Factory methods for primitive types
Expand All @@ -77,6 +79,10 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
static Literal Binary(std::vector<uint8_t> value);
static Literal Fixed(std::vector<uint8_t> value);

/// \brief Create a decimal literal.
/// \param value The unscaled 128-bit integer value.
static Literal Decimal(int128_t value, int32_t precision, int32_t scale);

/// \brief Create a literal representing a null value.
static Literal Null(std::shared_ptr<PrimitiveType> type) {
return {Value{std::monostate{}}, std::move(type)};
Expand Down Expand Up @@ -205,6 +211,11 @@ struct LiteralTraits<TypeId::kDouble> {
using ValueType = double;
};

template <>
struct LiteralTraits<TypeId::kDecimal> {
using ValueType = Decimal;
};

template <>
struct LiteralTraits<TypeId::kString> {
using ValueType = std::string;
Expand Down
5 changes: 4 additions & 1 deletion src/iceberg/manifest_adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,12 @@ Status ManifestEntryAdapter::AppendPartitionValues(
break;
case TypeId::kDecimal:
ICEBERG_RETURN_UNEXPECTED(AppendField(
child_array, std::get<std::array<uint8_t, 16>>(partition_value.value())));
child_array, std::get<Decimal>(partition_value.value()).ToBytes()));
break;
case TypeId::kUuid:
ICEBERG_RETURN_UNEXPECTED(
AppendField(child_array, std::get<Uuid>(partition_value.value()).bytes()));
break;
case TypeId::kStruct:
case TypeId::kList:
case TypeId::kMap:
Expand Down
3 changes: 3 additions & 0 deletions src/iceberg/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,14 @@ iceberg_sources = files(
'transform.cc',
'transform_function.cc',
'type.cc',
'util/bucket_util.cc',
'util/conversions.cc',
'util/decimal.cc',
'util/gzip_internal.cc',
'util/murmurhash3_internal.cc',
'util/temporal_util.cc',
'util/timepoint.cc',
'util/truncate_util.cc',
'util/uuid.cc',
'v1_metadata.cc',
'v2_metadata.cc',
Expand Down
2 changes: 2 additions & 0 deletions src/iceberg/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,13 @@ add_iceberg_test(json_serde_test

add_iceberg_test(util_test
SOURCES
bucket_util_test.cc
config_test.cc
decimal_test.cc
endian_test.cc
formatter_test.cc
string_util_test.cc
truncate_util_test.cc
uuid_test.cc
visit_type_test.cc)

Expand Down
81 changes: 81 additions & 0 deletions src/iceberg/test/bucket_util_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/util/bucket_util.h"

#include <chrono>

#include <gtest/gtest.h>

#include "iceberg/util/decimal.h"
#include "iceberg/util/uuid.h"

namespace iceberg {

// The following tests are from
// https://iceberg.apache.org/spec/#appendix-b-32-bit-hash-requirements
TEST(BucketUtilsTest, HashHelper) {
// int and long
EXPECT_EQ(BucketUtils::HashInt(34), 2017239379);
EXPECT_EQ(BucketUtils::HashLong(34L), 2017239379);

// decimal hash
auto decimal = Decimal::FromString("14.20");
ASSERT_TRUE(decimal.has_value());
EXPECT_EQ(BucketUtils::HashBytes(decimal->ToBigEndian()), -500754589);

// date hash
std::chrono::sys_days sd = std::chrono::year{2017} / 11 / 16;
std::chrono::sys_days epoch{std::chrono::year{1970} / 1 / 1};
int32_t days = (sd - epoch).count();
EXPECT_EQ(BucketUtils::HashInt(days), -653330422);

// time
// 22:31:08 in microseconds
int64_t time_micros = (22 * 3600 + 31 * 60 + 8) * 1000000LL;
EXPECT_EQ(BucketUtils::HashLong(time_micros), -662762989);

// timestamp
// 2017-11-16T22:31:08 in microseconds
std::chrono::system_clock::time_point tp =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we can add some utility functions in the temporal_util.h to create date/time/timestamp from structural inputs.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, do you mind if I add these in a separate PR?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem!

std::chrono::sys_days{std::chrono::year{2017} / 11 / 16} + std::chrono::hours{22} +
std::chrono::minutes{31} + std::chrono::seconds{8};
int64_t timestamp_micros =
std::chrono::duration_cast<std::chrono::microseconds>(tp.time_since_epoch())
.count();
EXPECT_EQ(BucketUtils::HashLong(timestamp_micros), -2047944441);
// 2017-11-16T22:31:08.000001 in microseconds
EXPECT_EQ(BucketUtils::HashLong(timestamp_micros + 1), -1207196810);

// string
std::string str = "iceberg";
EXPECT_EQ(BucketUtils::HashBytes(std::span<const uint8_t>(
reinterpret_cast<const uint8_t*>(str.data()), str.size())),
1210000089);

// uuid
auto uuid = Uuid::FromString("f79c3e09-677c-4bbd-a479-3f349cb785e7");
EXPECT_EQ(BucketUtils::HashBytes(uuid->bytes()), 1488055340);

// fixed & binary
std::vector<uint8_t> fixed = {0, 1, 2, 3};
EXPECT_EQ(BucketUtils::HashBytes(fixed), -188683207);
}

} // namespace iceberg
98 changes: 98 additions & 0 deletions src/iceberg/test/decimal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,50 @@ TEST(DecimalTest, FromBigEndianInvalid) {
IsError(ErrorKind::kInvalidArgument));
}

TEST(DecimalTest, ToBigEndian) {
std::vector<int64_t> high_values = {0,
1,
-1,
INT32_MAX,
INT32_MIN,
static_cast<int64_t>(INT32_MAX) + 1,
static_cast<int64_t>(INT32_MIN) - 1,
INT64_MAX,
INT64_MIN};
std::vector<uint64_t> low_values = {0,
1,
255,
UINT32_MAX,
static_cast<uint64_t>(UINT32_MAX) + 1,
static_cast<uint64_t>(UINT32_MAX) + 2,
static_cast<uint64_t>(UINT32_MAX) + 3,
static_cast<uint64_t>(UINT32_MAX) + 4,
static_cast<uint64_t>(UINT32_MAX) + 5,
static_cast<uint64_t>(UINT32_MAX) + 6,
static_cast<uint64_t>(UINT32_MAX) + 7,
static_cast<uint64_t>(UINT32_MAX) + 8,
UINT64_MAX};

for (int64_t high : high_values) {
for (uint64_t low : low_values) {
Decimal decimal(high, low);
auto bytes = decimal.ToBigEndian();
auto result = Decimal::FromBigEndian(bytes.data(), bytes.size());
ASSERT_THAT(result, IsOk());
EXPECT_EQ(result.value(), decimal);
}
}

for (int128_t value : std::vector<int128_t>{-INT64_MAX, -INT32_MAX, -255, -1, 0, 1, 255,
256, INT32_MAX, INT64_MAX}) {
Decimal decimal(value);
auto bytes = decimal.ToBigEndian();
auto result = Decimal::FromBigEndian(bytes.data(), bytes.size());
ASSERT_THAT(result, IsOk());
EXPECT_EQ(result.value(), decimal);
}
}

TEST(DecimalTestFunctionality, Multiply) {
ASSERT_EQ(Decimal(60501), Decimal(301) * Decimal(201));
ASSERT_EQ(Decimal(-60501), Decimal(-301) * Decimal(201));
Expand Down Expand Up @@ -671,4 +715,58 @@ TEST(DecimalTest, Rescale) {
ASSERT_THAT(Decimal(5555555).Rescale(6, 1), IsError(ErrorKind::kInvalid));
}

TEST(DecimalTest, Compare) {
// max positive unscaled value
// 10^38 - 1 scale cause overflow
ASSERT_EQ(Decimal::Compare(Decimal("99999999999999999999999999999999999999"),
Decimal("99999999999999999999999999999999999999"), 2, 3),
std::partial_ordering::greater);
// 10^37 - 1 scale no overflow
ASSERT_EQ(Decimal::Compare(Decimal("9999999999999999999999999999999999999"),
Decimal("99999999999999999999999999999999999999"), 2, 3),
std::partial_ordering::less);

// min negative unscaled value
// -10^38 + 1 scale cause overflow
ASSERT_EQ(Decimal::Compare(Decimal("-99999999999999999999999999999999999999"),
Decimal("-99999999999999999999999999999999999999"), 2, 3),
std::partial_ordering::less);
// -10^37 + 1 scale no overflow
ASSERT_EQ(Decimal::Compare(Decimal("-9999999999999999999999999999999999999"),
Decimal("-99999999999999999999999999999999999999"), 2, 3),
std::partial_ordering::greater);

// equal values with different scales
ASSERT_EQ(Decimal::Compare(Decimal("123456789"), Decimal("1234567890"), 2, 3),
std::partial_ordering::equivalent);
ASSERT_EQ(Decimal::Compare(Decimal("-1234567890"), Decimal("-123456789"), 3, 2),
std::partial_ordering::equivalent);

// different values with different scales
ASSERT_EQ(Decimal::Compare(Decimal("123456788"), Decimal("1234567890"), 2, 3),
std::partial_ordering::less);
ASSERT_EQ(Decimal::Compare(Decimal("-1234567890"), Decimal("-123456788"), 2, 3),
std::partial_ordering::less);

// different values with same scales
ASSERT_EQ(Decimal::Compare(Decimal("123456790"), Decimal("123456789"), 2, 2),
std::partial_ordering::greater);
ASSERT_EQ(Decimal::Compare(Decimal("-123456790"), Decimal("-123456789"), 2, 2),
std::partial_ordering::less);

// different signs
ASSERT_EQ(Decimal::Compare(Decimal("123456789"), Decimal("-123456789"), 2, 3),
std::partial_ordering::greater);
ASSERT_EQ(Decimal::Compare(Decimal("-123456789"), Decimal("123456789"), 2, 3),
std::partial_ordering::less);

// zero comparisons
ASSERT_EQ(Decimal::Compare(Decimal("0"), Decimal("0"), 2, 3),
std::partial_ordering::equivalent);
ASSERT_EQ(Decimal::Compare(Decimal("0"), Decimal("123456789"), 2, 3),
std::partial_ordering::less);
ASSERT_EQ(Decimal::Compare(Decimal("-123456789"), Decimal("0"), 2, 3),
std::partial_ordering::less);
}

} // namespace iceberg
Loading
Loading