Skip to content

Commit 5e26ef6

Browse files
mapleFUwgtmac
andauthored
feat: implement Primitive type Literal (#117)
Before implement expression, Primitive should be implemented. The spec follows: https://iceberg.apache.org/spec/#schemas-and-data-types In this patch, only few types of Literals is supported, including int, long, float, double, boolean, string and binary. TODO: * [x] Implement detail logics about comparing * [x] Implement detail logics about castTo * [x] ToString and printing logics * [x] Tests Would in other patch: * More primitive type basic supports ( maybe without decimal now ) * Implement detail logics about serialize/deserialize --------- Co-authored-by: Gang Wu <[email protected]>
1 parent 3cf5963 commit 5e26ef6

File tree

6 files changed

+876
-2
lines changed

6 files changed

+876
-2
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ set(ICEBERG_SOURCES
2222
catalog/in_memory_catalog.cc
2323
demo.cc
2424
expression/expression.cc
25+
expression/literal.cc
2526
file_reader.cc
2627
json_internal.cc
2728
manifest_entry.cc

src/iceberg/expression/expression.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
#pragma once
2121

22-
/// \file iceberg/expression.h
22+
/// \file iceberg/expression/expression.h
2323
/// Expression interface for Iceberg table operations.
2424

2525
#include <memory>

src/iceberg/expression/literal.cc

Lines changed: 351 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,351 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/expression/literal.h"
21+
22+
#include <cmath>
23+
#include <concepts>
24+
#include <sstream>
25+
26+
#include "iceberg/exception.h"
27+
28+
namespace iceberg {
29+
30+
/// \brief LiteralCaster handles type casting operations for Literal.
31+
/// This is an internal implementation class.
32+
class LiteralCaster {
33+
public:
34+
/// Cast a Literal to the target type.
35+
static Result<Literal> CastTo(const Literal& literal,
36+
const std::shared_ptr<PrimitiveType>& target_type);
37+
38+
/// Create a literal representing a value below the minimum for the given type.
39+
static Literal BelowMinLiteral(std::shared_ptr<PrimitiveType> type);
40+
41+
/// Create a literal representing a value above the maximum for the given type.
42+
static Literal AboveMaxLiteral(std::shared_ptr<PrimitiveType> type);
43+
44+
private:
45+
/// Cast from Int type to target type.
46+
static Result<Literal> CastFromInt(const Literal& literal,
47+
const std::shared_ptr<PrimitiveType>& target_type);
48+
49+
/// Cast from Long type to target type.
50+
static Result<Literal> CastFromLong(const Literal& literal,
51+
const std::shared_ptr<PrimitiveType>& target_type);
52+
53+
/// Cast from Float type to target type.
54+
static Result<Literal> CastFromFloat(const Literal& literal,
55+
const std::shared_ptr<PrimitiveType>& target_type);
56+
};
57+
58+
Literal LiteralCaster::BelowMinLiteral(std::shared_ptr<PrimitiveType> type) {
59+
return Literal(Literal::BelowMin{}, std::move(type));
60+
}
61+
62+
Literal LiteralCaster::AboveMaxLiteral(std::shared_ptr<PrimitiveType> type) {
63+
return Literal(Literal::AboveMax{}, std::move(type));
64+
}
65+
66+
Result<Literal> LiteralCaster::CastFromInt(
67+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
68+
auto int_val = std::get<int32_t>(literal.value_);
69+
auto target_type_id = target_type->type_id();
70+
71+
switch (target_type_id) {
72+
case TypeId::kLong:
73+
return Literal::Long(static_cast<int64_t>(int_val));
74+
case TypeId::kFloat:
75+
return Literal::Float(static_cast<float>(int_val));
76+
case TypeId::kDouble:
77+
return Literal::Double(static_cast<double>(int_val));
78+
default:
79+
return NotSupported("Cast from Int to {} is not implemented",
80+
target_type->ToString());
81+
}
82+
}
83+
84+
Result<Literal> LiteralCaster::CastFromLong(
85+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
86+
auto long_val = std::get<int64_t>(literal.value_);
87+
auto target_type_id = target_type->type_id();
88+
89+
switch (target_type_id) {
90+
case TypeId::kInt: {
91+
// Check for overflow
92+
if (long_val >= std::numeric_limits<int32_t>::max()) {
93+
return AboveMaxLiteral(target_type);
94+
}
95+
if (long_val <= std::numeric_limits<int32_t>::min()) {
96+
return BelowMinLiteral(target_type);
97+
}
98+
return Literal::Int(static_cast<int32_t>(long_val));
99+
}
100+
case TypeId::kFloat:
101+
return Literal::Float(static_cast<float>(long_val));
102+
case TypeId::kDouble:
103+
return Literal::Double(static_cast<double>(long_val));
104+
default:
105+
return NotSupported("Cast from Long to {} is not supported",
106+
target_type->ToString());
107+
}
108+
}
109+
110+
Result<Literal> LiteralCaster::CastFromFloat(
111+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
112+
auto float_val = std::get<float>(literal.value_);
113+
auto target_type_id = target_type->type_id();
114+
115+
switch (target_type_id) {
116+
case TypeId::kDouble:
117+
return Literal::Double(static_cast<double>(float_val));
118+
default:
119+
return NotSupported("Cast from Float to {} is not supported",
120+
target_type->ToString());
121+
}
122+
}
123+
124+
// Constructor
125+
Literal::Literal(Value value, std::shared_ptr<PrimitiveType> type)
126+
: value_(std::move(value)), type_(std::move(type)) {}
127+
128+
// Factory methods
129+
Literal Literal::Boolean(bool value) {
130+
return {Value{value}, std::make_shared<BooleanType>()};
131+
}
132+
133+
Literal Literal::Int(int32_t value) {
134+
return {Value{value}, std::make_shared<IntType>()};
135+
}
136+
137+
Literal Literal::Long(int64_t value) {
138+
return {Value{value}, std::make_shared<LongType>()};
139+
}
140+
141+
Literal Literal::Float(float value) {
142+
return {Value{value}, std::make_shared<FloatType>()};
143+
}
144+
145+
Literal Literal::Double(double value) {
146+
return {Value{value}, std::make_shared<DoubleType>()};
147+
}
148+
149+
Literal Literal::String(std::string value) {
150+
return {Value{std::move(value)}, std::make_shared<StringType>()};
151+
}
152+
153+
Literal Literal::Binary(std::vector<uint8_t> value) {
154+
return {Value{std::move(value)}, std::make_shared<BinaryType>()};
155+
}
156+
157+
Result<Literal> Literal::Deserialize(std::span<const uint8_t> data,
158+
std::shared_ptr<PrimitiveType> type) {
159+
return NotImplemented("Deserialization of Literal is not implemented yet");
160+
}
161+
162+
Result<std::vector<uint8_t>> Literal::Serialize() const {
163+
return NotImplemented("Serialization of Literal is not implemented yet");
164+
}
165+
166+
// Getters
167+
168+
const std::shared_ptr<PrimitiveType>& Literal::type() const { return type_; }
169+
170+
// Cast method
171+
Result<Literal> Literal::CastTo(const std::shared_ptr<PrimitiveType>& target_type) const {
172+
return LiteralCaster::CastTo(*this, target_type);
173+
}
174+
175+
// Template function for floating point comparison following Iceberg rules:
176+
// -NaN < NaN, but all NaN values (qNaN, sNaN) are treated as equivalent within their sign
177+
template <std::floating_point T>
178+
std::strong_ordering CompareFloat(T lhs, T rhs) {
179+
// If both are NaN, check their signs
180+
bool all_nan = std::isnan(lhs) && std::isnan(rhs);
181+
if (!all_nan) {
182+
// If not both NaN, use strong ordering
183+
return std::strong_order(lhs, rhs);
184+
}
185+
// Same sign NaN values are equivalent (no qNaN vs sNaN distinction),
186+
// and -NAN < NAN.
187+
bool lhs_is_negative = std::signbit(lhs);
188+
bool rhs_is_negative = std::signbit(rhs);
189+
return lhs_is_negative <=> rhs_is_negative;
190+
}
191+
192+
// Three-way comparison operator
193+
std::partial_ordering Literal::operator<=>(const Literal& other) const {
194+
// If types are different, comparison is unordered
195+
if (type_->type_id() != other.type_->type_id()) {
196+
return std::partial_ordering::unordered;
197+
}
198+
199+
// If either value is AboveMax or BelowMin, comparison is unordered
200+
if (IsAboveMax() || IsBelowMin() || other.IsAboveMax() || other.IsBelowMin()) {
201+
return std::partial_ordering::unordered;
202+
}
203+
204+
// Same type comparison for normal values
205+
switch (type_->type_id()) {
206+
case TypeId::kBoolean: {
207+
auto this_val = std::get<bool>(value_);
208+
auto other_val = std::get<bool>(other.value_);
209+
if (this_val == other_val) return std::partial_ordering::equivalent;
210+
return this_val ? std::partial_ordering::greater : std::partial_ordering::less;
211+
}
212+
213+
case TypeId::kInt: {
214+
auto this_val = std::get<int32_t>(value_);
215+
auto other_val = std::get<int32_t>(other.value_);
216+
return this_val <=> other_val;
217+
}
218+
219+
case TypeId::kLong: {
220+
auto this_val = std::get<int64_t>(value_);
221+
auto other_val = std::get<int64_t>(other.value_);
222+
return this_val <=> other_val;
223+
}
224+
225+
case TypeId::kFloat: {
226+
auto this_val = std::get<float>(value_);
227+
auto other_val = std::get<float>(other.value_);
228+
// Use strong_ordering for floating point as spec requests
229+
return CompareFloat(this_val, other_val);
230+
}
231+
232+
case TypeId::kDouble: {
233+
auto this_val = std::get<double>(value_);
234+
auto other_val = std::get<double>(other.value_);
235+
// Use strong_ordering for floating point as spec requests
236+
return CompareFloat(this_val, other_val);
237+
}
238+
239+
case TypeId::kString: {
240+
auto& this_val = std::get<std::string>(value_);
241+
auto& other_val = std::get<std::string>(other.value_);
242+
return this_val <=> other_val;
243+
}
244+
245+
case TypeId::kBinary: {
246+
auto& this_val = std::get<std::vector<uint8_t>>(value_);
247+
auto& other_val = std::get<std::vector<uint8_t>>(other.value_);
248+
return this_val <=> other_val;
249+
}
250+
251+
default:
252+
// For unsupported types, return unordered
253+
return std::partial_ordering::unordered;
254+
}
255+
}
256+
257+
std::string Literal::ToString() const {
258+
if (std::holds_alternative<BelowMin>(value_)) {
259+
return "belowMin";
260+
}
261+
if (std::holds_alternative<AboveMax>(value_)) {
262+
return "aboveMax";
263+
}
264+
265+
switch (type_->type_id()) {
266+
case TypeId::kBoolean: {
267+
return std::get<bool>(value_) ? "true" : "false";
268+
}
269+
case TypeId::kInt: {
270+
return std::to_string(std::get<int32_t>(value_));
271+
}
272+
case TypeId::kLong: {
273+
return std::to_string(std::get<int64_t>(value_));
274+
}
275+
case TypeId::kFloat: {
276+
return std::to_string(std::get<float>(value_));
277+
}
278+
case TypeId::kDouble: {
279+
return std::to_string(std::get<double>(value_));
280+
}
281+
case TypeId::kString: {
282+
return std::get<std::string>(value_);
283+
}
284+
case TypeId::kBinary: {
285+
const auto& binary_data = std::get<std::vector<uint8_t>>(value_);
286+
std::string result;
287+
result.reserve(binary_data.size() * 2); // 2 chars per byte
288+
for (const auto& byte : binary_data) {
289+
std::format_to(std::back_inserter(result), "{:02X}", byte);
290+
}
291+
return result;
292+
}
293+
case TypeId::kDecimal:
294+
case TypeId::kUuid:
295+
case TypeId::kFixed:
296+
case TypeId::kDate:
297+
case TypeId::kTime:
298+
case TypeId::kTimestamp:
299+
case TypeId::kTimestampTz: {
300+
throw IcebergError("Not implemented: ToString for " + type_->ToString());
301+
}
302+
default: {
303+
throw IcebergError("Unknown type: " + type_->ToString());
304+
}
305+
}
306+
}
307+
308+
bool Literal::IsBelowMin() const { return std::holds_alternative<BelowMin>(value_); }
309+
310+
bool Literal::IsAboveMax() const { return std::holds_alternative<AboveMax>(value_); }
311+
312+
// LiteralCaster implementation
313+
314+
Result<Literal> LiteralCaster::CastTo(const Literal& literal,
315+
const std::shared_ptr<PrimitiveType>& target_type) {
316+
if (*literal.type_ == *target_type) {
317+
// If types are the same, return a copy of the current literal
318+
return Literal(literal.value_, target_type);
319+
}
320+
321+
// Handle special values
322+
if (std::holds_alternative<Literal::BelowMin>(literal.value_) ||
323+
std::holds_alternative<Literal::AboveMax>(literal.value_)) {
324+
// Cannot cast type for special values
325+
return NotSupported("Cannot cast type for {}", literal.ToString());
326+
}
327+
328+
auto source_type_id = literal.type_->type_id();
329+
330+
// Delegate to specific cast functions based on source type
331+
switch (source_type_id) {
332+
case TypeId::kInt:
333+
return CastFromInt(literal, target_type);
334+
case TypeId::kLong:
335+
return CastFromLong(literal, target_type);
336+
case TypeId::kFloat:
337+
return CastFromFloat(literal, target_type);
338+
case TypeId::kDouble:
339+
case TypeId::kBoolean:
340+
case TypeId::kString:
341+
case TypeId::kBinary:
342+
break;
343+
default:
344+
break;
345+
}
346+
347+
return NotSupported("Cast from {} to {} is not implemented", literal.type_->ToString(),
348+
target_type->ToString());
349+
}
350+
351+
} // namespace iceberg

0 commit comments

Comments
 (0)