|
21 | 21 |
|
22 | 22 | #include <cmath> |
23 | 23 | #include <concepts> |
| 24 | +#include <utility> |
24 | 25 |
|
25 | 26 | #include "iceberg/exception.h" |
| 27 | +#include "iceberg/result.h" |
| 28 | +#include "iceberg/util/decimal.h" |
| 29 | +#include "iceberg/util/endian.h" |
| 30 | +#include "iceberg/util/macros.h" |
26 | 31 |
|
27 | 32 | namespace iceberg { |
28 | 33 |
|
@@ -149,13 +154,168 @@ Literal Literal::Binary(std::vector<uint8_t> value) { |
149 | 154 | return {Value{std::move(value)}, binary()}; |
150 | 155 | } |
151 | 156 |
|
| 157 | +Literal Literal::Decimal(int128_t value, int32_t precision, int32_t scale) { |
| 158 | + return {Value{value}, decimal(precision, scale)}; |
| 159 | +} |
| 160 | + |
| 161 | +Result<Literal> Literal::Decimal(std::string_view value) { |
| 162 | + int32_t precision = 0; |
| 163 | + int32_t scale = 0; |
| 164 | + ICEBERG_ASSIGN_OR_RAISE(auto decimal_value, |
| 165 | + Decimal::FromString(value, &precision, &scale)); |
| 166 | + return Literal{Value{decimal_value.value()}, decimal(precision, scale)}; |
| 167 | +} |
| 168 | + |
152 | 169 | Result<Literal> Literal::Deserialize(std::span<const uint8_t> data, |
153 | 170 | std::shared_ptr<PrimitiveType> type) { |
154 | | - return NotImplemented("Deserialization of Literal is not implemented yet"); |
| 171 | + Literal::Value value; |
| 172 | + switch (type->type_id()) { |
| 173 | + case TypeId::kBoolean: |
| 174 | + if (data.size() == 1 && data[0] == 1) { |
| 175 | + value = true; |
| 176 | + } else { |
| 177 | + value = false; |
| 178 | + } |
| 179 | + break; |
| 180 | + case TypeId::kInt: |
| 181 | + case TypeId::kDate: |
| 182 | + if (data.size() != sizeof(int32_t)) { |
| 183 | + return Invalid("Invalid data size for Int literal deserialization"); |
| 184 | + } |
| 185 | + value = FromLittleEndian(*reinterpret_cast<const int32_t*>(data.data())); |
| 186 | + break; |
| 187 | + case TypeId::kLong: |
| 188 | + // In the case of an evolved field |
| 189 | + if (data.size() == sizeof(int32_t)) { |
| 190 | + value = static_cast<int64_t>( |
| 191 | + FromLittleEndian(*reinterpret_cast<const int32_t*>(data.data()))); |
| 192 | + } else if (data.size() == sizeof(int64_t)) { |
| 193 | + value = FromLittleEndian(*reinterpret_cast<const int64_t*>(data.data())); |
| 194 | + } else { |
| 195 | + return Invalid("Invalid data size for Long literal deserialization"); |
| 196 | + } |
| 197 | + break; |
| 198 | + case TypeId::kFloat: |
| 199 | + if (data.size() != sizeof(float)) { |
| 200 | + return Invalid("Invalid data size for Float literal deserialization"); |
| 201 | + } |
| 202 | + value = FromLittleEndian(*reinterpret_cast<const float*>(data.data())); |
| 203 | + break; |
| 204 | + case TypeId::kDouble: |
| 205 | + // In the case of an evolved field |
| 206 | + if (data.size() == sizeof(float)) { |
| 207 | + value = static_cast<double>( |
| 208 | + FromLittleEndian(*reinterpret_cast<const float*>(data.data()))); |
| 209 | + } else if (data.size() == sizeof(double)) { |
| 210 | + value = FromLittleEndian(*reinterpret_cast<const double*>(data.data())); |
| 211 | + } else { |
| 212 | + return Invalid("Invalid data size for Double literal deserialization"); |
| 213 | + } |
| 214 | + break; |
| 215 | + case TypeId::kTime: |
| 216 | + case TypeId::kTimestamp: |
| 217 | + case TypeId::kTimestampTz: |
| 218 | + if (data.size() != sizeof(int64_t)) { |
| 219 | + return Invalid("Invalid data size for Timestamp/Time literal deserialization"); |
| 220 | + } |
| 221 | + value = FromLittleEndian(*reinterpret_cast<const int64_t*>(data.data())); |
| 222 | + break; |
| 223 | + case TypeId::kString: |
| 224 | + value = std::string(data.begin(), data.end()); |
| 225 | + break; |
| 226 | + case TypeId::kUuid: |
| 227 | + if (data.size() != 16) { |
| 228 | + return Invalid("Invalid data size for UUID literal deserialization"); |
| 229 | + } |
| 230 | + value = *reinterpret_cast<const std::array<uint8_t, 16>*>(data.data()); |
| 231 | + break; |
| 232 | + case TypeId::kDecimal: { |
| 233 | + ICEBERG_ASSIGN_OR_RAISE(auto unscaled_decimal, |
| 234 | + Decimal::FromBigEndian(data.data(), data.size())); |
| 235 | + value = unscaled_decimal.value(); |
| 236 | + } break; |
| 237 | + case TypeId::kFixed: |
| 238 | + case TypeId::kBinary: |
| 239 | + value = std::vector<uint8_t>(data.begin(), data.end()); |
| 240 | + break; |
| 241 | + default: |
| 242 | + std::unreachable(); |
| 243 | + } |
| 244 | + |
| 245 | + return Literal(value, std::move(type)); |
155 | 246 | } |
156 | 247 |
|
157 | 248 | Result<std::vector<uint8_t>> Literal::Serialize() const { |
158 | | - return NotImplemented("Serialization of Literal is not implemented yet"); |
| 249 | + if (IsAboveMax() || IsBelowMin()) { |
| 250 | + return Invalid("Cannot serialize AboveMax or BelowMin literal"); |
| 251 | + } |
| 252 | + if (IsNull()) { |
| 253 | + return std::vector<uint8_t>{}; |
| 254 | + } |
| 255 | + |
| 256 | + switch (type_->type_id()) { |
| 257 | + case TypeId::kBoolean: { |
| 258 | + bool bool_val = std::get<bool>(value_); |
| 259 | + return std::vector<uint8_t>{static_cast<uint8_t>(bool_val ? 1 : 0)}; |
| 260 | + } |
| 261 | + case TypeId::kInt: |
| 262 | + case TypeId::kDate: { |
| 263 | + int32_t int_val = std::get<int32_t>(value_); |
| 264 | + int32_t le_val = ToLittleEndian(int_val); |
| 265 | + const auto* bytes = |
| 266 | + reinterpret_cast<const uint8_t*>(static_cast<const void*>(&le_val)); |
| 267 | + return std::vector<uint8_t>(bytes, bytes + sizeof(int32_t)); |
| 268 | + } |
| 269 | + case TypeId::kLong: { |
| 270 | + int64_t long_val = std::get<int64_t>(value_); |
| 271 | + int64_t le_val = ToLittleEndian(long_val); |
| 272 | + const auto* bytes = |
| 273 | + reinterpret_cast<const uint8_t*>(static_cast<const void*>(&le_val)); |
| 274 | + return std::vector<uint8_t>(bytes, bytes + sizeof(int64_t)); |
| 275 | + } |
| 276 | + case TypeId::kFloat: { |
| 277 | + float float_val = std::get<float>(value_); |
| 278 | + float le_val = ToLittleEndian(float_val); |
| 279 | + const auto* bytes = |
| 280 | + reinterpret_cast<const uint8_t*>(static_cast<const void*>(&le_val)); |
| 281 | + return std::vector<uint8_t>(bytes, bytes + sizeof(float)); |
| 282 | + } |
| 283 | + case TypeId::kDouble: { |
| 284 | + double double_val = std::get<double>(value_); |
| 285 | + double le_val = ToLittleEndian(double_val); |
| 286 | + const auto* bytes = |
| 287 | + reinterpret_cast<const uint8_t*>(static_cast<const void*>(&le_val)); |
| 288 | + return std::vector<uint8_t>(bytes, bytes + sizeof(double)); |
| 289 | + } |
| 290 | + case TypeId::kTime: |
| 291 | + case TypeId::kTimestamp: |
| 292 | + case TypeId::kTimestampTz: { |
| 293 | + int64_t time_val = std::get<int64_t>(value_); |
| 294 | + int64_t le_val = ToLittleEndian(time_val); |
| 295 | + const auto* bytes = |
| 296 | + reinterpret_cast<const uint8_t*>(static_cast<const void*>(&le_val)); |
| 297 | + return std::vector<uint8_t>(bytes, bytes + sizeof(int64_t)); |
| 298 | + } |
| 299 | + case TypeId::kString: { |
| 300 | + const auto& str_val = std::get<std::string>(value_); |
| 301 | + return std::vector<uint8_t>(str_val.begin(), str_val.end()); |
| 302 | + } |
| 303 | + case TypeId::kUuid: { |
| 304 | + const auto& uuid_val = std::get<std::array<uint8_t, 16>>(value_); |
| 305 | + return std::vector<uint8_t>(uuid_val.begin(), uuid_val.end()); |
| 306 | + } |
| 307 | + case TypeId::kDecimal: { |
| 308 | + int128_t decimal_val = std::get<int128_t>(value_); |
| 309 | + return Decimal::ToBigEndian(decimal_val); |
| 310 | + } |
| 311 | + case TypeId::kFixed: |
| 312 | + case TypeId::kBinary: { |
| 313 | + const auto& bin_val = std::get<std::vector<uint8_t>>(value_); |
| 314 | + return bin_val; |
| 315 | + } |
| 316 | + default: |
| 317 | + std::unreachable(); |
| 318 | + } |
159 | 319 | } |
160 | 320 |
|
161 | 321 | // Getters |
@@ -249,6 +409,13 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const { |
249 | 409 | return this_val <=> other_val; |
250 | 410 | } |
251 | 411 |
|
| 412 | + case TypeId::kDecimal: { |
| 413 | + // TODO(zhjwpku): Handle precision/scale differences |
| 414 | + auto this_val = std::get<int128_t>(value_); |
| 415 | + auto other_val = std::get<int128_t>(other.value_); |
| 416 | + return this_val <=> other_val; |
| 417 | + } |
| 418 | + |
252 | 419 | default: |
253 | 420 | // For unsupported types, return unordered |
254 | 421 | return std::partial_ordering::unordered; |
|
0 commit comments