Skip to content

Commit a994322

Browse files
authored
feat: add visit type support (#94)
Add a `TypeId` field to each concrete type to enable macro-based type visitor support. --------- Signed-off-by: Junwang Zhao <[email protected]>
1 parent 1c2530c commit a994322

File tree

13 files changed

+647
-57
lines changed

13 files changed

+647
-57
lines changed

LICENSE

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,17 @@ https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
267267

268268
MurmurHash3 was written by Austin Appleby, and is placed in the public
269269
domain. The author disclaims copyright to this source code.
270+
271+
--------------------------------------------------------------------------------
272+
273+
The file src/iceberg/util/checked_cast.h contains code adapted from
274+
275+
https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/checked_cast.h
276+
277+
The file src/iceberg/util/visit_type.h contains code adapted from
278+
279+
https://github.com/apache/arrow/blob/main/cpp/src/arrow/visit_type_inline.h
280+
281+
Copyright: 2016-2025 The Apache Software Foundation.
282+
Home page: https://arrow.apache.org/
283+
License: https://www.apache.org/licenses/LICENSE-2.0

NOTICE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,7 @@ This product includes code from smhasher
1212
* MurmurHash3 was written by Austin Appleby, and is placed in the public
1313
* domain. The author hereby disclaims copyright to this source code.
1414
* https://github.com/aappleby/smhasher
15+
16+
This product includes code from Apache Arrow
17+
* Copyright 2016-2025 The Apache Software Foundation
18+
* https://github.com/apache/arrow

src/iceberg/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ set(ICEBERG_SOURCES
3838
transform_function.cc
3939
type.cc
4040
util/murmurhash3_internal.cc
41-
util/timepoint.cc)
41+
util/timepoint.cc
42+
util/unreachable.cc)
4243

4344
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
4445
set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS)

src/iceberg/type.cc

Lines changed: 31 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ StructType::StructType(std::vector<SchemaField> fields) : fields_(std::move(fiel
4141
}
4242
}
4343

44-
TypeId StructType::type_id() const { return TypeId::kStruct; }
44+
TypeId StructType::type_id() const { return kTypeId; }
4545
std::string StructType::ToString() const {
4646
std::string repr = "struct<\n";
4747
for (const auto& field : fields_) {
@@ -93,7 +93,7 @@ ListType::ListType(SchemaField element) : element_(std::move(element)) {
9393
ListType::ListType(int32_t field_id, std::shared_ptr<Type> type, bool optional)
9494
: element_(field_id, std::string(kElementName), std::move(type), optional) {}
9595

96-
TypeId ListType::type_id() const { return TypeId::kList; }
96+
TypeId ListType::type_id() const { return kTypeId; }
9797
std::string ListType::ToString() const {
9898
// XXX: work around Clang/libc++: "<{}>" in a format string appears to get
9999
// parsed as {<>} or something; split up the format string to avoid that
@@ -146,7 +146,7 @@ MapType::MapType(SchemaField key, SchemaField value)
146146

147147
const SchemaField& MapType::key() const { return fields_[0]; }
148148
const SchemaField& MapType::value() const { return fields_[1]; }
149-
TypeId MapType::type_id() const { return TypeId::kMap; }
149+
TypeId MapType::type_id() const { return kTypeId; }
150150
std::string MapType::ToString() const {
151151
// XXX: work around Clang/libc++: "<{}>" in a format string appears to get
152152
// parsed as {<>} or something; split up the format string to avoid that
@@ -192,33 +192,25 @@ bool MapType::Equals(const Type& other) const {
192192
return fields_ == map.fields_;
193193
}
194194

195-
TypeId BooleanType::type_id() const { return TypeId::kBoolean; }
195+
TypeId BooleanType::type_id() const { return kTypeId; }
196196
std::string BooleanType::ToString() const { return "boolean"; }
197-
bool BooleanType::Equals(const Type& other) const {
198-
return other.type_id() == TypeId::kBoolean;
199-
}
197+
bool BooleanType::Equals(const Type& other) const { return other.type_id() == kTypeId; }
200198

201-
TypeId IntType::type_id() const { return TypeId::kInt; }
199+
TypeId IntType::type_id() const { return kTypeId; }
202200
std::string IntType::ToString() const { return "int"; }
203-
bool IntType::Equals(const Type& other) const { return other.type_id() == TypeId::kInt; }
201+
bool IntType::Equals(const Type& other) const { return other.type_id() == kTypeId; }
204202

205-
TypeId LongType::type_id() const { return TypeId::kLong; }
203+
TypeId LongType::type_id() const { return kTypeId; }
206204
std::string LongType::ToString() const { return "long"; }
207-
bool LongType::Equals(const Type& other) const {
208-
return other.type_id() == TypeId::kLong;
209-
}
205+
bool LongType::Equals(const Type& other) const { return other.type_id() == kTypeId; }
210206

211-
TypeId FloatType::type_id() const { return TypeId::kFloat; }
207+
TypeId FloatType::type_id() const { return kTypeId; }
212208
std::string FloatType::ToString() const { return "float"; }
213-
bool FloatType::Equals(const Type& other) const {
214-
return other.type_id() == TypeId::kFloat;
215-
}
209+
bool FloatType::Equals(const Type& other) const { return other.type_id() == kTypeId; }
216210

217-
TypeId DoubleType::type_id() const { return TypeId::kDouble; }
211+
TypeId DoubleType::type_id() const { return kTypeId; }
218212
std::string DoubleType::ToString() const { return "double"; }
219-
bool DoubleType::Equals(const Type& other) const {
220-
return other.type_id() == TypeId::kDouble;
221-
}
213+
bool DoubleType::Equals(const Type& other) const { return other.type_id() == kTypeId; }
222214

223215
DecimalType::DecimalType(int32_t precision, int32_t scale)
224216
: precision_(precision), scale_(scale) {
@@ -230,57 +222,47 @@ DecimalType::DecimalType(int32_t precision, int32_t scale)
230222

231223
int32_t DecimalType::precision() const { return precision_; }
232224
int32_t DecimalType::scale() const { return scale_; }
233-
TypeId DecimalType::type_id() const { return TypeId::kDecimal; }
225+
TypeId DecimalType::type_id() const { return kTypeId; }
234226
std::string DecimalType::ToString() const {
235227
return std::format("decimal({}, {})", precision_, scale_);
236228
}
237229
bool DecimalType::Equals(const Type& other) const {
238-
if (other.type_id() != TypeId::kDecimal) {
230+
if (other.type_id() != kTypeId) {
239231
return false;
240232
}
241233
const auto& decimal = static_cast<const DecimalType&>(other);
242234
return precision_ == decimal.precision_ && scale_ == decimal.scale_;
243235
}
244236

245-
TypeId DateType::type_id() const { return TypeId::kDate; }
237+
TypeId DateType::type_id() const { return kTypeId; }
246238
std::string DateType::ToString() const { return "date"; }
247-
bool DateType::Equals(const Type& other) const {
248-
return other.type_id() == TypeId::kDate;
249-
}
239+
bool DateType::Equals(const Type& other) const { return other.type_id() == kTypeId; }
250240

251-
TypeId TimeType::type_id() const { return TypeId::kTime; }
241+
TypeId TimeType::type_id() const { return kTypeId; }
252242
std::string TimeType::ToString() const { return "time"; }
253-
bool TimeType::Equals(const Type& other) const {
254-
return other.type_id() == TypeId::kTime;
255-
}
243+
bool TimeType::Equals(const Type& other) const { return other.type_id() == kTypeId; }
256244

257245
bool TimestampType::is_zoned() const { return false; }
258246
TimeUnit TimestampType::time_unit() const { return TimeUnit::kMicrosecond; }
259-
TypeId TimestampType::type_id() const { return TypeId::kTimestamp; }
247+
TypeId TimestampType::type_id() const { return kTypeId; }
260248
std::string TimestampType::ToString() const { return "timestamp"; }
261-
bool TimestampType::Equals(const Type& other) const {
262-
return other.type_id() == TypeId::kTimestamp;
263-
}
249+
bool TimestampType::Equals(const Type& other) const { return other.type_id() == kTypeId; }
264250

265251
bool TimestampTzType::is_zoned() const { return true; }
266252
TimeUnit TimestampTzType::time_unit() const { return TimeUnit::kMicrosecond; }
267-
TypeId TimestampTzType::type_id() const { return TypeId::kTimestampTz; }
253+
TypeId TimestampTzType::type_id() const { return kTypeId; }
268254
std::string TimestampTzType::ToString() const { return "timestamptz"; }
269255
bool TimestampTzType::Equals(const Type& other) const {
270-
return other.type_id() == TypeId::kTimestampTz;
256+
return other.type_id() == kTypeId;
271257
}
272258

273-
TypeId StringType::type_id() const { return TypeId::kString; }
259+
TypeId StringType::type_id() const { return kTypeId; }
274260
std::string StringType::ToString() const { return "string"; }
275-
bool StringType::Equals(const Type& other) const {
276-
return other.type_id() == TypeId::kString;
277-
}
261+
bool StringType::Equals(const Type& other) const { return other.type_id() == kTypeId; }
278262

279-
TypeId UuidType::type_id() const { return TypeId::kUuid; }
263+
TypeId UuidType::type_id() const { return kTypeId; }
280264
std::string UuidType::ToString() const { return "uuid"; }
281-
bool UuidType::Equals(const Type& other) const {
282-
return other.type_id() == TypeId::kUuid;
283-
}
265+
bool UuidType::Equals(const Type& other) const { return other.type_id() == kTypeId; }
284266

285267
FixedType::FixedType(int32_t length) : length_(length) {
286268
if (length < 0) {
@@ -289,20 +271,18 @@ FixedType::FixedType(int32_t length) : length_(length) {
289271
}
290272

291273
int32_t FixedType::length() const { return length_; }
292-
TypeId FixedType::type_id() const { return TypeId::kFixed; }
274+
TypeId FixedType::type_id() const { return kTypeId; }
293275
std::string FixedType::ToString() const { return std::format("fixed({})", length_); }
294276
bool FixedType::Equals(const Type& other) const {
295-
if (other.type_id() != TypeId::kFixed) {
277+
if (other.type_id() != kTypeId) {
296278
return false;
297279
}
298280
const auto& fixed = static_cast<const FixedType&>(other);
299281
return length_ == fixed.length_;
300282
}
301283

302-
TypeId BinaryType::type_id() const { return TypeId::kBinary; }
284+
TypeId BinaryType::type_id() const { return kTypeId; }
303285
std::string BinaryType::ToString() const { return "binary"; }
304-
bool BinaryType::Equals(const Type& other) const {
305-
return other.type_id() == TypeId::kBinary;
306-
}
286+
bool BinaryType::Equals(const Type& other) const { return other.type_id() == kTypeId; }
307287

308288
} // namespace iceberg

src/iceberg/type.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ class ICEBERG_EXPORT NestedType : public Type {
104104
/// \brief A data type representing a struct with nested fields.
105105
class ICEBERG_EXPORT StructType : public NestedType {
106106
public:
107+
constexpr static TypeId kTypeId = TypeId::kStruct;
107108
explicit StructType(std::vector<SchemaField> fields);
108109
~StructType() override = default;
109110

@@ -128,6 +129,7 @@ class ICEBERG_EXPORT StructType : public NestedType {
128129
/// \brief A data type representing a list of values.
129130
class ICEBERG_EXPORT ListType : public NestedType {
130131
public:
132+
constexpr static const TypeId kTypeId = TypeId::kList;
131133
constexpr static const std::string_view kElementName = "element";
132134

133135
/// \brief Construct a list of the given element. The name of the child
@@ -157,6 +159,7 @@ class ICEBERG_EXPORT ListType : public NestedType {
157159
/// \brief A data type representing a dictionary of values.
158160
class ICEBERG_EXPORT MapType : public NestedType {
159161
public:
162+
constexpr static const TypeId kTypeId = TypeId::kMap;
160163
constexpr static const std::string_view kKeyName = "key";
161164
constexpr static const std::string_view kValueName = "value";
162165

@@ -194,6 +197,8 @@ class ICEBERG_EXPORT MapType : public NestedType {
194197
/// \brief A data type representing a boolean (true or false).
195198
class ICEBERG_EXPORT BooleanType : public PrimitiveType {
196199
public:
200+
constexpr static const TypeId kTypeId = TypeId::kBoolean;
201+
197202
BooleanType() = default;
198203
~BooleanType() override = default;
199204

@@ -207,6 +212,8 @@ class ICEBERG_EXPORT BooleanType : public PrimitiveType {
207212
/// \brief A data type representing a 32-bit signed integer.
208213
class ICEBERG_EXPORT IntType : public PrimitiveType {
209214
public:
215+
constexpr static const TypeId kTypeId = TypeId::kInt;
216+
210217
IntType() = default;
211218
~IntType() override = default;
212219

@@ -220,6 +227,8 @@ class ICEBERG_EXPORT IntType : public PrimitiveType {
220227
/// \brief A data type representing a 64-bit signed integer.
221228
class ICEBERG_EXPORT LongType : public PrimitiveType {
222229
public:
230+
constexpr static const TypeId kTypeId = TypeId::kLong;
231+
223232
LongType() = default;
224233
~LongType() override = default;
225234

@@ -234,6 +243,8 @@ class ICEBERG_EXPORT LongType : public PrimitiveType {
234243
/// float.
235244
class ICEBERG_EXPORT FloatType : public PrimitiveType {
236245
public:
246+
constexpr static const TypeId kTypeId = TypeId::kFloat;
247+
237248
FloatType() = default;
238249
~FloatType() override = default;
239250

@@ -248,6 +259,8 @@ class ICEBERG_EXPORT FloatType : public PrimitiveType {
248259
/// float.
249260
class ICEBERG_EXPORT DoubleType : public PrimitiveType {
250261
public:
262+
constexpr static const TypeId kTypeId = TypeId::kDouble;
263+
251264
DoubleType() = default;
252265
~DoubleType() override = default;
253266

@@ -261,6 +274,7 @@ class ICEBERG_EXPORT DoubleType : public PrimitiveType {
261274
/// \brief A data type representing a fixed-precision decimal.
262275
class ICEBERG_EXPORT DecimalType : public PrimitiveType {
263276
public:
277+
constexpr static const TypeId kTypeId = TypeId::kDecimal;
264278
constexpr static const int32_t kMaxPrecision = 38;
265279

266280
/// \brief Construct a decimal type with the given precision and scale.
@@ -288,6 +302,8 @@ class ICEBERG_EXPORT DecimalType : public PrimitiveType {
288302
/// timezone or time.
289303
class ICEBERG_EXPORT DateType : public PrimitiveType {
290304
public:
305+
constexpr static const TypeId kTypeId = TypeId::kDate;
306+
291307
DateType() = default;
292308
~DateType() override = default;
293309

@@ -302,6 +318,8 @@ class ICEBERG_EXPORT DateType : public PrimitiveType {
302318
/// reference to a timezone or date.
303319
class ICEBERG_EXPORT TimeType : public PrimitiveType {
304320
public:
321+
constexpr static const TypeId kTypeId = TypeId::kTime;
322+
305323
TimeType() = default;
306324
~TimeType() override = default;
307325

@@ -326,6 +344,8 @@ class ICEBERG_EXPORT TimestampBase : public PrimitiveType {
326344
/// reference to a timezone.
327345
class ICEBERG_EXPORT TimestampType : public TimestampBase {
328346
public:
347+
constexpr static const TypeId kTypeId = TypeId::kTimestamp;
348+
329349
TimestampType() = default;
330350
~TimestampType() override = default;
331351

@@ -343,6 +363,8 @@ class ICEBERG_EXPORT TimestampType : public TimestampBase {
343363
/// epoch in UTC. A time zone or offset is not stored.
344364
class ICEBERG_EXPORT TimestampTzType : public TimestampBase {
345365
public:
366+
constexpr static const TypeId kTypeId = TypeId::kTimestampTz;
367+
346368
TimestampTzType() = default;
347369
~TimestampTzType() override = default;
348370

@@ -359,6 +381,8 @@ class ICEBERG_EXPORT TimestampTzType : public TimestampBase {
359381
/// \brief A data type representing an arbitrary-length byte sequence.
360382
class ICEBERG_EXPORT BinaryType : public PrimitiveType {
361383
public:
384+
constexpr static const TypeId kTypeId = TypeId::kBinary;
385+
362386
BinaryType() = default;
363387
~BinaryType() override = default;
364388

@@ -373,6 +397,8 @@ class ICEBERG_EXPORT BinaryType : public PrimitiveType {
373397
/// (encoded in UTF-8).
374398
class ICEBERG_EXPORT StringType : public PrimitiveType {
375399
public:
400+
constexpr static const TypeId kTypeId = TypeId::kString;
401+
376402
StringType() = default;
377403
~StringType() override = default;
378404

@@ -386,6 +412,8 @@ class ICEBERG_EXPORT StringType : public PrimitiveType {
386412
/// \brief A data type representing a fixed-length bytestring.
387413
class ICEBERG_EXPORT FixedType : public PrimitiveType {
388414
public:
415+
constexpr static const TypeId kTypeId = TypeId::kFixed;
416+
389417
/// \brief Construct a fixed type with the given length.
390418
explicit FixedType(int32_t length);
391419
~FixedType() override = default;
@@ -407,6 +435,8 @@ class ICEBERG_EXPORT FixedType : public PrimitiveType {
407435
/// it is effectively a fixed(16).
408436
class ICEBERG_EXPORT UuidType : public PrimitiveType {
409437
public:
438+
constexpr static const TypeId kTypeId = TypeId::kUuid;
439+
410440
UuidType() = default;
411441
~UuidType() override = default;
412442

0 commit comments

Comments
 (0)