-
Notifications
You must be signed in to change notification settings - Fork 70
feat: implement Primitive type Literal #117
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 9 commits
Commits
Show all changes
20 commits
Select commit
Hold shift + click to select a range
59f60da
init templates
mapleFU 9355670
some minor enhancement
mapleFU 082c7e1
try to fix lint
mapleFU e1dd11d
Resolve comments and add impl for compare / cast
mapleFU 1132ef5
Fix some logic for using datum
mapleFU 1eb9a9b
Rename files
mapleFU 07dd257
Resolve some comments, and remove castFromDouble
mapleFU 146a86e
Add basic tests, and fix float compare bug(qNan != sNan)
mapleFU d125408
Fix lint
mapleFU 012966f
some NotImplement -> NotSupported
mapleFU 88f67dc
Move literal to expression dir
mapleFU 694cb55
Apply suggestions and create PrimitiveLiteralCaster
mapleFU 6d72f80
Merge branch 'primitive-literal-impl' of github.com:mapleFU/iceberg-c…
mapleFU 1d7f904
rename variables
mapleFU 470259d
Apply suggestions from code review
mapleFU 56c265e
Resolve comments
mapleFU c46ff60
continue resolve comments
mapleFU 197102e
Fix lint
mapleFU c9e5765
Rename tests
mapleFU 644bb9d
Compare float using <=>
mapleFU File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,323 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| #include "iceberg/literal.h" | ||
|
|
||
| #include <cmath> | ||
| #include <concepts> | ||
| #include <sstream> | ||
|
|
||
| #include "iceberg/exception.h" | ||
|
|
||
| namespace iceberg { | ||
|
|
||
| // Constructor | ||
| PrimitiveLiteral::PrimitiveLiteral(PrimitiveLiteralValue value, | ||
| std::shared_ptr<PrimitiveType> type) | ||
| : value_(std::move(value)), type_(std::move(type)) {} | ||
|
|
||
| // Factory methods | ||
| PrimitiveLiteral PrimitiveLiteral::Boolean(bool value) { | ||
| return {PrimitiveLiteralValue{value}, std::make_shared<BooleanType>()}; | ||
| } | ||
|
|
||
| PrimitiveLiteral PrimitiveLiteral::Int(int32_t value) { | ||
| return {PrimitiveLiteralValue{value}, std::make_shared<IntType>()}; | ||
| } | ||
|
|
||
| PrimitiveLiteral PrimitiveLiteral::Long(int64_t value) { | ||
| return {PrimitiveLiteralValue{value}, std::make_shared<LongType>()}; | ||
| } | ||
|
|
||
| PrimitiveLiteral PrimitiveLiteral::Float(float value) { | ||
| return {PrimitiveLiteralValue{value}, std::make_shared<FloatType>()}; | ||
| } | ||
|
|
||
| PrimitiveLiteral PrimitiveLiteral::Double(double value) { | ||
| return {PrimitiveLiteralValue{value}, std::make_shared<DoubleType>()}; | ||
| } | ||
|
|
||
| PrimitiveLiteral PrimitiveLiteral::String(std::string value) { | ||
| return {PrimitiveLiteralValue{std::move(value)}, std::make_shared<StringType>()}; | ||
| } | ||
|
|
||
| PrimitiveLiteral PrimitiveLiteral::Binary(std::vector<uint8_t> value) { | ||
| return {PrimitiveLiteralValue{std::move(value)}, std::make_shared<BinaryType>()}; | ||
| } | ||
|
|
||
| PrimitiveLiteral PrimitiveLiteral::BelowMinLiteral(std::shared_ptr<PrimitiveType> type) { | ||
| return {PrimitiveLiteralValue{BelowMin{}}, std::move(type)}; | ||
| } | ||
|
|
||
| PrimitiveLiteral PrimitiveLiteral::AboveMaxLiteral(std::shared_ptr<PrimitiveType> type) { | ||
| return {PrimitiveLiteralValue{AboveMax{}}, std::move(type)}; | ||
| } | ||
|
|
||
| Result<PrimitiveLiteral> PrimitiveLiteral::Deserialize(std::span<const uint8_t> data) { | ||
| return NotImplemented("Deserialization of PrimitiveLiteral is not implemented yet"); | ||
| } | ||
|
|
||
| Result<std::vector<uint8_t>> PrimitiveLiteral::Serialize() const { | ||
| return NotImplemented("Serialization of PrimitiveLiteral is not implemented yet"); | ||
| } | ||
|
|
||
| // Getters | ||
|
|
||
| const std::shared_ptr<PrimitiveType>& PrimitiveLiteral::type() const { return type_; } | ||
|
|
||
| // Cast method | ||
| Result<PrimitiveLiteral> PrimitiveLiteral::CastTo( | ||
| const std::shared_ptr<PrimitiveType>& target_type) const { | ||
| if (*type_ == *target_type) { | ||
| // If types are the same, return a copy of the current literal | ||
| return PrimitiveLiteral(value_, target_type); | ||
| } | ||
|
|
||
| // Handle special values | ||
| if (std::holds_alternative<BelowMin>(value_) || | ||
| std::holds_alternative<AboveMax>(value_)) { | ||
| // Cannot cast type for special values | ||
| return NotSupported("Cannot cast type for {}", ToString()); | ||
| } | ||
|
|
||
| auto source_type_id = type_->type_id(); | ||
| auto target_type_id = target_type->type_id(); | ||
|
|
||
| // Delegate to specific cast functions based on source type | ||
| switch (source_type_id) { | ||
| case TypeId::kInt: | ||
| return CastFromInt(target_type_id); | ||
| case TypeId::kLong: | ||
| return CastFromLong(target_type_id); | ||
| case TypeId::kFloat: | ||
| return CastFromFloat(target_type_id); | ||
| case TypeId::kDouble: | ||
| case TypeId::kBoolean: | ||
| case TypeId::kString: | ||
| case TypeId::kBinary: | ||
| break; | ||
| default: | ||
| break; | ||
| } | ||
|
|
||
| return NotSupported("Cast from {} to {} is not implemented", type_->ToString(), | ||
Fokko marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| target_type->ToString()); | ||
| } | ||
|
|
||
| Result<PrimitiveLiteral> PrimitiveLiteral::CastFromInt(TypeId target_type_id) const { | ||
| auto int_val = std::get<int32_t>(value_); | ||
|
|
||
| switch (target_type_id) { | ||
| case TypeId::kLong: | ||
| return PrimitiveLiteral::Long(static_cast<int64_t>(int_val)); | ||
| case TypeId::kFloat: | ||
| return PrimitiveLiteral::Float(static_cast<float>(int_val)); | ||
| case TypeId::kDouble: | ||
| return PrimitiveLiteral::Double(static_cast<double>(int_val)); | ||
| // TODO(mwish): Supports casts to date and literal | ||
| default: | ||
| return NotSupported("Cast from Int to {} is not implemented", | ||
| static_cast<int>(target_type_id)); | ||
| } | ||
| } | ||
|
|
||
| Result<PrimitiveLiteral> PrimitiveLiteral::CastFromLong(TypeId target_type_id) const { | ||
| auto long_val = std::get<int64_t>(value_); | ||
|
|
||
| switch (target_type_id) { | ||
| case TypeId::kInt: { | ||
| // Check for overflow | ||
| if (long_val >= std::numeric_limits<int32_t>::max()) { | ||
| return PrimitiveLiteral::AboveMaxLiteral(type_); | ||
| } | ||
| if (long_val <= std::numeric_limits<int32_t>::min()) { | ||
| return PrimitiveLiteral::BelowMinLiteral(type_); | ||
| } | ||
| return PrimitiveLiteral::Int(static_cast<int32_t>(long_val)); | ||
| } | ||
| case TypeId::kFloat: | ||
| return PrimitiveLiteral::Float(static_cast<float>(long_val)); | ||
| case TypeId::kDouble: | ||
| return PrimitiveLiteral::Double(static_cast<double>(long_val)); | ||
| default: | ||
| return NotImplemented("Cast from Long to {} is not implemented", | ||
| static_cast<int>(target_type_id)); | ||
| } | ||
| } | ||
|
|
||
| Result<PrimitiveLiteral> PrimitiveLiteral::CastFromFloat(TypeId target_type_id) const { | ||
| auto float_val = std::get<float>(value_); | ||
|
|
||
| switch (target_type_id) { | ||
| case TypeId::kDouble: | ||
| return PrimitiveLiteral::Double(static_cast<double>(float_val)); | ||
| default: | ||
| return NotImplemented("Cast from Float to {} is not implemented", | ||
| static_cast<int>(target_type_id)); | ||
| } | ||
| } | ||
|
|
||
| // Template function for floating point comparison following Iceberg rules: | ||
| // -NaN < NaN, but all NaN values (qNaN, sNaN) are treated as equivalent within their sign | ||
| template <std::floating_point T> | ||
| std::partial_ordering iceberg_float_compare(T lhs, T rhs) { | ||
| bool lhs_is_nan = std::isnan(lhs); | ||
| bool rhs_is_nan = std::isnan(rhs); | ||
|
|
||
| // If both are NaN, check their signs | ||
| if (lhs_is_nan && rhs_is_nan) { | ||
| bool lhs_is_negative = std::signbit(lhs); | ||
| bool rhs_is_negative = std::signbit(rhs); | ||
|
|
||
| if (lhs_is_negative == rhs_is_negative) { | ||
| // Same sign NaN values are equivalent (no qNaN vs sNaN distinction) | ||
| return std::partial_ordering::equivalent; | ||
| } | ||
| // -NaN < NaN | ||
| return lhs_is_negative ? std::partial_ordering::less : std::partial_ordering::greater; | ||
| } | ||
|
|
||
| // For non-NaN values, use standard strong ordering | ||
| return std::strong_order(lhs, rhs); | ||
| } | ||
|
|
||
| // Three-way comparison operator | ||
| std::partial_ordering PrimitiveLiteral::operator<=>(const PrimitiveLiteral& other) const { | ||
| // If types are different, comparison is unordered | ||
| if (type_->type_id() != other.type_->type_id()) { | ||
| return std::partial_ordering::unordered; | ||
| } | ||
|
|
||
| // If either value is AboveMax or BelowMin, comparison is unordered | ||
| if (isAboveMax() || isBelowMin() || other.isAboveMax() || other.isBelowMin()) { | ||
| return std::partial_ordering::unordered; | ||
| } | ||
|
|
||
| // Same type comparison for normal values | ||
| switch (type_->type_id()) { | ||
| case TypeId::kBoolean: { | ||
| auto this_val = std::get<bool>(value_); | ||
| auto other_val = std::get<bool>(other.value_); | ||
| if (this_val == other_val) return std::partial_ordering::equivalent; | ||
| return this_val ? std::partial_ordering::greater : std::partial_ordering::less; | ||
| } | ||
|
|
||
| case TypeId::kInt: { | ||
| auto this_val = std::get<int32_t>(value_); | ||
| auto other_val = std::get<int32_t>(other.value_); | ||
| return this_val <=> other_val; | ||
| } | ||
|
|
||
| case TypeId::kLong: { | ||
| auto this_val = std::get<int64_t>(value_); | ||
| auto other_val = std::get<int64_t>(other.value_); | ||
| return this_val <=> other_val; | ||
| } | ||
|
|
||
| case TypeId::kFloat: { | ||
| auto this_val = std::get<float>(value_); | ||
| auto other_val = std::get<float>(other.value_); | ||
| // Use strong_ordering for floating point as spec requests | ||
| return iceberg_float_compare(this_val, other_val); | ||
| } | ||
|
|
||
| case TypeId::kDouble: { | ||
| auto this_val = std::get<double>(value_); | ||
| auto other_val = std::get<double>(other.value_); | ||
| // Use strong_ordering for floating point as spec requests | ||
| return iceberg_float_compare(this_val, other_val); | ||
| } | ||
|
|
||
| case TypeId::kString: { | ||
| auto& this_val = std::get<std::string>(value_); | ||
| auto& other_val = std::get<std::string>(other.value_); | ||
| return this_val <=> other_val; | ||
| } | ||
|
|
||
| case TypeId::kBinary: { | ||
| auto& this_val = std::get<std::vector<uint8_t>>(value_); | ||
| auto& other_val = std::get<std::vector<uint8_t>>(other.value_); | ||
| return this_val <=> other_val; | ||
| } | ||
|
|
||
| default: | ||
| // For unsupported types, return unordered | ||
| return std::partial_ordering::unordered; | ||
| } | ||
| } | ||
|
|
||
| std::string PrimitiveLiteral::ToString() const { | ||
| if (std::holds_alternative<BelowMin>(value_)) { | ||
| return "BelowMin"; | ||
| } | ||
| if (std::holds_alternative<AboveMax>(value_)) { | ||
| return "AboveMax"; | ||
| } | ||
|
|
||
| switch (type_->type_id()) { | ||
| case TypeId::kBoolean: { | ||
| return std::get<bool>(value_) ? "true" : "false"; | ||
| } | ||
| case TypeId::kInt: { | ||
| return std::to_string(std::get<int32_t>(value_)); | ||
| } | ||
| case TypeId::kLong: { | ||
| return std::to_string(std::get<int64_t>(value_)); | ||
| } | ||
| case TypeId::kFloat: { | ||
| return std::to_string(std::get<float>(value_)); | ||
| } | ||
| case TypeId::kDouble: { | ||
| return std::to_string(std::get<double>(value_)); | ||
| } | ||
| case TypeId::kString: { | ||
| return std::get<std::string>(value_); | ||
| } | ||
| case TypeId::kBinary: { | ||
| const auto& binary_data = std::get<std::vector<uint8_t>>(value_); | ||
| std::string result; | ||
| result.reserve(binary_data.size() * 2); // 2 chars per byte | ||
| for (const auto& byte : binary_data) { | ||
| result += std::format("{:02X}", byte); | ||
| } | ||
| return result; | ||
| } | ||
| case TypeId::kDecimal: | ||
| case TypeId::kUuid: | ||
| case TypeId::kFixed: | ||
| case TypeId::kDate: | ||
| case TypeId::kTime: | ||
| case TypeId::kTimestamp: | ||
| case TypeId::kTimestampTz: { | ||
| throw IcebergError("Not implemented: ToString for " + type_->ToString()); | ||
| } | ||
| default: { | ||
| throw IcebergError("Unknown type: " + type_->ToString()); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| bool PrimitiveLiteral::isBelowMin() const { | ||
| return std::holds_alternative<BelowMin>(value_); | ||
| } | ||
|
|
||
| bool PrimitiveLiteral::isAboveMax() const { | ||
| return std::holds_alternative<AboveMax>(value_); | ||
| } | ||
| } // namespace iceberg | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.