|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one |
| 3 | + * or more contributor license agreements. See the NOTICE file |
| 4 | + * distributed with this work for additional information |
| 5 | + * regarding copyright ownership. The ASF licenses this file |
| 6 | + * to you under the Apache License, Version 2.0 (the |
| 7 | + * "License"); you may not use this file except in compliance |
| 8 | + * with the License. You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, |
| 13 | + * software distributed under the License is distributed on an |
| 14 | + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | + * KIND, either express or implied. See the License for the |
| 16 | + * specific language governing permissions and limitations |
| 17 | + * under the License. |
| 18 | + */ |
| 19 | + |
| 20 | +#include "iceberg/expression/literal.h" |
| 21 | + |
| 22 | +#include <cmath> |
| 23 | +#include <concepts> |
| 24 | +#include <sstream> |
| 25 | + |
| 26 | +#include "iceberg/exception.h" |
| 27 | + |
| 28 | +namespace iceberg { |
| 29 | + |
| 30 | +/// \brief LiteralCaster handles type casting operations for Literal. |
| 31 | +/// This is an internal implementation class. |
| 32 | +class LiteralCaster { |
| 33 | + public: |
| 34 | + /// Cast a Literal to the target type. |
| 35 | + static Result<Literal> CastTo(const Literal& literal, |
| 36 | + const std::shared_ptr<PrimitiveType>& target_type); |
| 37 | + |
| 38 | + /// Create a literal representing a value below the minimum for the given type. |
| 39 | + static Literal BelowMinLiteral(std::shared_ptr<PrimitiveType> type); |
| 40 | + |
| 41 | + /// Create a literal representing a value above the maximum for the given type. |
| 42 | + static Literal AboveMaxLiteral(std::shared_ptr<PrimitiveType> type); |
| 43 | + |
| 44 | + private: |
| 45 | + /// Cast from Int type to target type. |
| 46 | + static Result<Literal> CastFromInt(const Literal& literal, |
| 47 | + const std::shared_ptr<PrimitiveType>& target_type); |
| 48 | + |
| 49 | + /// Cast from Long type to target type. |
| 50 | + static Result<Literal> CastFromLong(const Literal& literal, |
| 51 | + const std::shared_ptr<PrimitiveType>& target_type); |
| 52 | + |
| 53 | + /// Cast from Float type to target type. |
| 54 | + static Result<Literal> CastFromFloat(const Literal& literal, |
| 55 | + const std::shared_ptr<PrimitiveType>& target_type); |
| 56 | +}; |
| 57 | + |
| 58 | +Literal LiteralCaster::BelowMinLiteral(std::shared_ptr<PrimitiveType> type) { |
| 59 | + return Literal(Literal::BelowMin{}, std::move(type)); |
| 60 | +} |
| 61 | + |
| 62 | +Literal LiteralCaster::AboveMaxLiteral(std::shared_ptr<PrimitiveType> type) { |
| 63 | + return Literal(Literal::AboveMax{}, std::move(type)); |
| 64 | +} |
| 65 | + |
| 66 | +Result<Literal> LiteralCaster::CastFromInt( |
| 67 | + const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) { |
| 68 | + auto int_val = std::get<int32_t>(literal.value_); |
| 69 | + auto target_type_id = target_type->type_id(); |
| 70 | + |
| 71 | + switch (target_type_id) { |
| 72 | + case TypeId::kLong: |
| 73 | + return Literal::Long(static_cast<int64_t>(int_val)); |
| 74 | + case TypeId::kFloat: |
| 75 | + return Literal::Float(static_cast<float>(int_val)); |
| 76 | + case TypeId::kDouble: |
| 77 | + return Literal::Double(static_cast<double>(int_val)); |
| 78 | + default: |
| 79 | + return NotSupported("Cast from Int to {} is not implemented", |
| 80 | + target_type->ToString()); |
| 81 | + } |
| 82 | +} |
| 83 | + |
| 84 | +Result<Literal> LiteralCaster::CastFromLong( |
| 85 | + const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) { |
| 86 | + auto long_val = std::get<int64_t>(literal.value_); |
| 87 | + auto target_type_id = target_type->type_id(); |
| 88 | + |
| 89 | + switch (target_type_id) { |
| 90 | + case TypeId::kInt: { |
| 91 | + // Check for overflow |
| 92 | + if (long_val >= std::numeric_limits<int32_t>::max()) { |
| 93 | + return AboveMaxLiteral(target_type); |
| 94 | + } |
| 95 | + if (long_val <= std::numeric_limits<int32_t>::min()) { |
| 96 | + return BelowMinLiteral(target_type); |
| 97 | + } |
| 98 | + return Literal::Int(static_cast<int32_t>(long_val)); |
| 99 | + } |
| 100 | + case TypeId::kFloat: |
| 101 | + return Literal::Float(static_cast<float>(long_val)); |
| 102 | + case TypeId::kDouble: |
| 103 | + return Literal::Double(static_cast<double>(long_val)); |
| 104 | + default: |
| 105 | + return NotSupported("Cast from Long to {} is not supported", |
| 106 | + target_type->ToString()); |
| 107 | + } |
| 108 | +} |
| 109 | + |
| 110 | +Result<Literal> LiteralCaster::CastFromFloat( |
| 111 | + const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) { |
| 112 | + auto float_val = std::get<float>(literal.value_); |
| 113 | + auto target_type_id = target_type->type_id(); |
| 114 | + |
| 115 | + switch (target_type_id) { |
| 116 | + case TypeId::kDouble: |
| 117 | + return Literal::Double(static_cast<double>(float_val)); |
| 118 | + default: |
| 119 | + return NotSupported("Cast from Float to {} is not supported", |
| 120 | + target_type->ToString()); |
| 121 | + } |
| 122 | +} |
| 123 | + |
| 124 | +// Constructor |
| 125 | +Literal::Literal(Value value, std::shared_ptr<PrimitiveType> type) |
| 126 | + : value_(std::move(value)), type_(std::move(type)) {} |
| 127 | + |
| 128 | +// Factory methods |
| 129 | +Literal Literal::Boolean(bool value) { |
| 130 | + return {Value{value}, std::make_shared<BooleanType>()}; |
| 131 | +} |
| 132 | + |
| 133 | +Literal Literal::Int(int32_t value) { |
| 134 | + return {Value{value}, std::make_shared<IntType>()}; |
| 135 | +} |
| 136 | + |
| 137 | +Literal Literal::Long(int64_t value) { |
| 138 | + return {Value{value}, std::make_shared<LongType>()}; |
| 139 | +} |
| 140 | + |
| 141 | +Literal Literal::Float(float value) { |
| 142 | + return {Value{value}, std::make_shared<FloatType>()}; |
| 143 | +} |
| 144 | + |
| 145 | +Literal Literal::Double(double value) { |
| 146 | + return {Value{value}, std::make_shared<DoubleType>()}; |
| 147 | +} |
| 148 | + |
| 149 | +Literal Literal::String(std::string value) { |
| 150 | + return {Value{std::move(value)}, std::make_shared<StringType>()}; |
| 151 | +} |
| 152 | + |
| 153 | +Literal Literal::Binary(std::vector<uint8_t> value) { |
| 154 | + return {Value{std::move(value)}, std::make_shared<BinaryType>()}; |
| 155 | +} |
| 156 | + |
| 157 | +Result<Literal> Literal::Deserialize(std::span<const uint8_t> data, |
| 158 | + std::shared_ptr<PrimitiveType> type) { |
| 159 | + return NotImplemented("Deserialization of Literal is not implemented yet"); |
| 160 | +} |
| 161 | + |
| 162 | +Result<std::vector<uint8_t>> Literal::Serialize() const { |
| 163 | + return NotImplemented("Serialization of Literal is not implemented yet"); |
| 164 | +} |
| 165 | + |
| 166 | +// Getters |
| 167 | + |
| 168 | +const std::shared_ptr<PrimitiveType>& Literal::type() const { return type_; } |
| 169 | + |
| 170 | +// Cast method |
| 171 | +Result<Literal> Literal::CastTo(const std::shared_ptr<PrimitiveType>& target_type) const { |
| 172 | + return LiteralCaster::CastTo(*this, target_type); |
| 173 | +} |
| 174 | + |
| 175 | +// Template function for floating point comparison following Iceberg rules: |
| 176 | +// -NaN < NaN, but all NaN values (qNaN, sNaN) are treated as equivalent within their sign |
| 177 | +template <std::floating_point T> |
| 178 | +std::strong_ordering CompareFloat(T lhs, T rhs) { |
| 179 | + // If both are NaN, check their signs |
| 180 | + bool all_nan = std::isnan(lhs) && std::isnan(rhs); |
| 181 | + if (!all_nan) { |
| 182 | + // If not both NaN, use strong ordering |
| 183 | + return std::strong_order(lhs, rhs); |
| 184 | + } |
| 185 | + // Same sign NaN values are equivalent (no qNaN vs sNaN distinction), |
| 186 | + // and -NAN < NAN. |
| 187 | + bool lhs_is_negative = std::signbit(lhs); |
| 188 | + bool rhs_is_negative = std::signbit(rhs); |
| 189 | + return lhs_is_negative <=> rhs_is_negative; |
| 190 | +} |
| 191 | + |
| 192 | +// Three-way comparison operator |
| 193 | +std::partial_ordering Literal::operator<=>(const Literal& other) const { |
| 194 | + // If types are different, comparison is unordered |
| 195 | + if (type_->type_id() != other.type_->type_id()) { |
| 196 | + return std::partial_ordering::unordered; |
| 197 | + } |
| 198 | + |
| 199 | + // If either value is AboveMax or BelowMin, comparison is unordered |
| 200 | + if (IsAboveMax() || IsBelowMin() || other.IsAboveMax() || other.IsBelowMin()) { |
| 201 | + return std::partial_ordering::unordered; |
| 202 | + } |
| 203 | + |
| 204 | + // Same type comparison for normal values |
| 205 | + switch (type_->type_id()) { |
| 206 | + case TypeId::kBoolean: { |
| 207 | + auto this_val = std::get<bool>(value_); |
| 208 | + auto other_val = std::get<bool>(other.value_); |
| 209 | + if (this_val == other_val) return std::partial_ordering::equivalent; |
| 210 | + return this_val ? std::partial_ordering::greater : std::partial_ordering::less; |
| 211 | + } |
| 212 | + |
| 213 | + case TypeId::kInt: { |
| 214 | + auto this_val = std::get<int32_t>(value_); |
| 215 | + auto other_val = std::get<int32_t>(other.value_); |
| 216 | + return this_val <=> other_val; |
| 217 | + } |
| 218 | + |
| 219 | + case TypeId::kLong: { |
| 220 | + auto this_val = std::get<int64_t>(value_); |
| 221 | + auto other_val = std::get<int64_t>(other.value_); |
| 222 | + return this_val <=> other_val; |
| 223 | + } |
| 224 | + |
| 225 | + case TypeId::kFloat: { |
| 226 | + auto this_val = std::get<float>(value_); |
| 227 | + auto other_val = std::get<float>(other.value_); |
| 228 | + // Use strong_ordering for floating point as spec requests |
| 229 | + return CompareFloat(this_val, other_val); |
| 230 | + } |
| 231 | + |
| 232 | + case TypeId::kDouble: { |
| 233 | + auto this_val = std::get<double>(value_); |
| 234 | + auto other_val = std::get<double>(other.value_); |
| 235 | + // Use strong_ordering for floating point as spec requests |
| 236 | + return CompareFloat(this_val, other_val); |
| 237 | + } |
| 238 | + |
| 239 | + case TypeId::kString: { |
| 240 | + auto& this_val = std::get<std::string>(value_); |
| 241 | + auto& other_val = std::get<std::string>(other.value_); |
| 242 | + return this_val <=> other_val; |
| 243 | + } |
| 244 | + |
| 245 | + case TypeId::kBinary: { |
| 246 | + auto& this_val = std::get<std::vector<uint8_t>>(value_); |
| 247 | + auto& other_val = std::get<std::vector<uint8_t>>(other.value_); |
| 248 | + return this_val <=> other_val; |
| 249 | + } |
| 250 | + |
| 251 | + default: |
| 252 | + // For unsupported types, return unordered |
| 253 | + return std::partial_ordering::unordered; |
| 254 | + } |
| 255 | +} |
| 256 | + |
| 257 | +std::string Literal::ToString() const { |
| 258 | + if (std::holds_alternative<BelowMin>(value_)) { |
| 259 | + return "belowMin"; |
| 260 | + } |
| 261 | + if (std::holds_alternative<AboveMax>(value_)) { |
| 262 | + return "aboveMax"; |
| 263 | + } |
| 264 | + |
| 265 | + switch (type_->type_id()) { |
| 266 | + case TypeId::kBoolean: { |
| 267 | + return std::get<bool>(value_) ? "true" : "false"; |
| 268 | + } |
| 269 | + case TypeId::kInt: { |
| 270 | + return std::to_string(std::get<int32_t>(value_)); |
| 271 | + } |
| 272 | + case TypeId::kLong: { |
| 273 | + return std::to_string(std::get<int64_t>(value_)); |
| 274 | + } |
| 275 | + case TypeId::kFloat: { |
| 276 | + return std::to_string(std::get<float>(value_)); |
| 277 | + } |
| 278 | + case TypeId::kDouble: { |
| 279 | + return std::to_string(std::get<double>(value_)); |
| 280 | + } |
| 281 | + case TypeId::kString: { |
| 282 | + return std::get<std::string>(value_); |
| 283 | + } |
| 284 | + case TypeId::kBinary: { |
| 285 | + const auto& binary_data = std::get<std::vector<uint8_t>>(value_); |
| 286 | + std::string result; |
| 287 | + result.reserve(binary_data.size() * 2); // 2 chars per byte |
| 288 | + for (const auto& byte : binary_data) { |
| 289 | + std::format_to(std::back_inserter(result), "{:02X}", byte); |
| 290 | + } |
| 291 | + return result; |
| 292 | + } |
| 293 | + case TypeId::kDecimal: |
| 294 | + case TypeId::kUuid: |
| 295 | + case TypeId::kFixed: |
| 296 | + case TypeId::kDate: |
| 297 | + case TypeId::kTime: |
| 298 | + case TypeId::kTimestamp: |
| 299 | + case TypeId::kTimestampTz: { |
| 300 | + throw IcebergError("Not implemented: ToString for " + type_->ToString()); |
| 301 | + } |
| 302 | + default: { |
| 303 | + throw IcebergError("Unknown type: " + type_->ToString()); |
| 304 | + } |
| 305 | + } |
| 306 | +} |
| 307 | + |
| 308 | +bool Literal::IsBelowMin() const { return std::holds_alternative<BelowMin>(value_); } |
| 309 | + |
| 310 | +bool Literal::IsAboveMax() const { return std::holds_alternative<AboveMax>(value_); } |
| 311 | + |
| 312 | +// LiteralCaster implementation |
| 313 | + |
| 314 | +Result<Literal> LiteralCaster::CastTo(const Literal& literal, |
| 315 | + const std::shared_ptr<PrimitiveType>& target_type) { |
| 316 | + if (*literal.type_ == *target_type) { |
| 317 | + // If types are the same, return a copy of the current literal |
| 318 | + return Literal(literal.value_, target_type); |
| 319 | + } |
| 320 | + |
| 321 | + // Handle special values |
| 322 | + if (std::holds_alternative<Literal::BelowMin>(literal.value_) || |
| 323 | + std::holds_alternative<Literal::AboveMax>(literal.value_)) { |
| 324 | + // Cannot cast type for special values |
| 325 | + return NotSupported("Cannot cast type for {}", literal.ToString()); |
| 326 | + } |
| 327 | + |
| 328 | + auto source_type_id = literal.type_->type_id(); |
| 329 | + |
| 330 | + // Delegate to specific cast functions based on source type |
| 331 | + switch (source_type_id) { |
| 332 | + case TypeId::kInt: |
| 333 | + return CastFromInt(literal, target_type); |
| 334 | + case TypeId::kLong: |
| 335 | + return CastFromLong(literal, target_type); |
| 336 | + case TypeId::kFloat: |
| 337 | + return CastFromFloat(literal, target_type); |
| 338 | + case TypeId::kDouble: |
| 339 | + case TypeId::kBoolean: |
| 340 | + case TypeId::kString: |
| 341 | + case TypeId::kBinary: |
| 342 | + break; |
| 343 | + default: |
| 344 | + break; |
| 345 | + } |
| 346 | + |
| 347 | + return NotSupported("Cast from {} to {} is not implemented", literal.type_->ToString(), |
| 348 | + target_type->ToString()); |
| 349 | +} |
| 350 | + |
| 351 | +} // namespace iceberg |
0 commit comments