1919
2020#include " iceberg/transform_function.h"
2121
22+ #include < cassert>
23+ #include < chrono>
24+
2225#include " iceberg/type.h"
26+ #include " iceberg/util/murmurhash3_internal.h"
2327
2428namespace iceberg {
2529
@@ -30,6 +34,10 @@ Result<ArrowArray> IdentityTransform::Transform(const ArrowArray& input) {
3034 return NotImplemented (" IdentityTransform::Transform" );
3135}
3236
37+ Result<std::optional<Literal>> IdentityTransform::Transform (const Literal& literal) {
38+ return literal;
39+ }
40+
3341Result<std::shared_ptr<Type>> IdentityTransform::ResultType () const {
3442 return source_type ();
3543}
@@ -51,6 +59,57 @@ Result<ArrowArray> BucketTransform::Transform(const ArrowArray& input) {
5159 return NotImplemented (" BucketTransform::Transform" );
5260}
5361
62+ Result<std::optional<Literal>> BucketTransform::Transform (const Literal& literal) {
63+ assert (literal.type () == source_type ());
64+ if (literal.IsBelowMin () || literal.IsAboveMax ()) {
65+ return InvalidArgument (
66+ " Cannot apply bucket transform to literal with value {} of type {}" ,
67+ literal.ToString (), source_type ()->ToString ());
68+ }
69+ int32_t hash_value = 0 ;
70+ switch (source_type ()->type_id ()) {
71+ case TypeId::kInt :
72+ case TypeId::kDate : {
73+ auto value = std::get<int32_t >(literal.value ());
74+ MurmurHash3_x86_32 (&value, sizeof (int32_t ), 0 , &hash_value);
75+ break ;
76+ }
77+ case TypeId::kLong :
78+ case TypeId::kTime :
79+ case TypeId::kTimestamp :
80+ case TypeId::kTimestampTz : {
81+ auto value = std::get<int64_t >(literal.value ());
82+ MurmurHash3_x86_32 (&value, sizeof (int64_t ), 0 , &hash_value);
83+ break ;
84+ }
85+ case TypeId::kDecimal :
86+ case TypeId::kUuid : {
87+ auto value = std::get<std::array<uint8_t , 16 >>(literal.value ());
88+ MurmurHash3_x86_32 (value.data (), sizeof (uint8_t ) * 16 , 0 , &hash_value);
89+ break ;
90+ }
91+ case TypeId::kString : {
92+ auto value = std::get<std::string>(literal.value ());
93+ MurmurHash3_x86_32 (value.data (), value.size (), 0 , &hash_value);
94+ break ;
95+ }
96+ case TypeId::kFixed :
97+ case TypeId::kBinary : {
98+ auto value = std::get<std::vector<uint8_t >>(literal.value ());
99+ MurmurHash3_x86_32 (value.data (), value.size (), 0 , &hash_value);
100+ break ;
101+ }
102+ default :
103+ std::unreachable ();
104+ }
105+
106+ // Calculate the bucket index
107+ int32_t bucket_index =
108+ (hash_value & std::numeric_limits<int32_t >::max ()) % num_buckets_;
109+
110+ return Literal::Int (bucket_index);
111+ }
112+
54113Result<std::shared_ptr<Type>> BucketTransform::ResultType () const {
55114 return iceberg::int32 ();
56115}
@@ -91,6 +150,46 @@ Result<ArrowArray> TruncateTransform::Transform(const ArrowArray& input) {
91150 return NotImplemented (" TruncateTransform::Transform" );
92151}
93152
153+ Result<std::optional<Literal>> TruncateTransform::Transform (const Literal& literal) {
154+ assert (literal.type () == source_type ());
155+ if (literal.IsBelowMin () || literal.IsAboveMax ()) {
156+ return InvalidArgument (
157+ " Cannot apply truncate transform to literal with value {} of type {}" ,
158+ literal.ToString (), source_type ()->ToString ());
159+ }
160+
161+ switch (source_type ()->type_id ()) {
162+ case TypeId::kInt : {
163+ auto value = std::get<int32_t >(literal.value ());
164+ return Literal::Int (value % width_);
165+ }
166+ case TypeId::kLong : {
167+ auto value = std::get<int64_t >(literal.value ());
168+ return Literal::Long (value % width_);
169+ }
170+ case TypeId::kDecimal : {
171+ // TODO(zhjwpku): Handle decimal truncation logic here
172+ return NotImplemented (" Truncate for Decimal is not implemented yet" );
173+ }
174+ case TypeId::kString : {
175+ auto value = std::get<std::string>(literal.value ());
176+ if (value.size () > static_cast <size_t >(width_)) {
177+ value.resize (width_);
178+ }
179+ return Literal::String (value);
180+ }
181+ case TypeId::kBinary : {
182+ auto value = std::get<std::vector<uint8_t >>(literal.value ());
183+ if (value.size () > static_cast <size_t >(width_)) {
184+ value.resize (width_);
185+ }
186+ return Literal::Binary (value);
187+ }
188+ default :
189+ std::unreachable ();
190+ }
191+ }
192+
94193Result<std::shared_ptr<Type>> TruncateTransform::ResultType () const {
95194 return source_type ();
96195}
@@ -124,6 +223,34 @@ Result<ArrowArray> YearTransform::Transform(const ArrowArray& input) {
124223 return NotImplemented (" YearTransform::Transform" );
125224}
126225
226+ Result<std::optional<Literal>> YearTransform::Transform (const Literal& literal) {
227+ assert (literal.type () == source_type ());
228+ if (literal.IsBelowMin () || literal.IsAboveMax ()) {
229+ return InvalidArgument (
230+ " Cannot apply year transform to literal with value {} of type {}" ,
231+ literal.ToString (), source_type ()->ToString ());
232+ }
233+
234+ using namespace std ::chrono;
235+ switch (source_type ()->type_id ()) {
236+ case TypeId::kDate : {
237+ auto value = std::get<int32_t >(literal.value ());
238+ auto epoch = sys_days (year{1970 } / January / 1 );
239+ auto ymd = year_month_day (epoch + days{value});
240+ return Literal::Int (static_cast <int32_t >(ymd.year ()));
241+ }
242+ case TypeId::kTimestamp :
243+ case TypeId::kTimestampTz : {
244+ auto value = std::get<int64_t >(literal.value ());
245+ // Convert milliseconds-since-epoch into a `year_month_day` object
246+ auto ymd = year_month_day (floor<days>(sys_time<milliseconds>(milliseconds{value})));
247+ return Literal::Int (static_cast <int32_t >(ymd.year ()));
248+ }
249+ default :
250+ std::unreachable ();
251+ }
252+ }
253+
127254Result<std::shared_ptr<Type>> YearTransform::ResultType () const {
128255 return iceberg::int32 ();
129256}
@@ -152,6 +279,46 @@ Result<ArrowArray> MonthTransform::Transform(const ArrowArray& input) {
152279 return NotImplemented (" MonthTransform::Transform" );
153280}
154281
282+ Result<std::optional<Literal>> MonthTransform::Transform (const Literal& literal) {
283+ assert (literal.type () == source_type ());
284+ if (literal.IsBelowMin () || literal.IsAboveMax ()) {
285+ return InvalidArgument (
286+ " Cannot apply month transform to literal with value {} of type {}" ,
287+ literal.ToString (), source_type ()->ToString ());
288+ }
289+
290+ using namespace std ::chrono;
291+ switch (source_type ()->type_id ()) {
292+ case TypeId::kDate : {
293+ auto value = std::get<int32_t >(literal.value ());
294+ auto epoch = sys_days (year{1970 } / January / 1 );
295+ auto ymd = year_month_day (epoch + days{value});
296+ auto epoch_ymd = year_month_day (epoch);
297+ auto delta = ymd.year () - epoch_ymd.year ();
298+ // Calculate the month as months from 1970-01
299+ // Note: January is month 1, so we subtract 1 to get zero-based
300+ // month count.
301+ return Literal::Int (static_cast <int32_t >(delta.count () * 12 +
302+ static_cast <unsigned >(ymd.month ()) - 1 ));
303+ }
304+ case TypeId::kTimestamp :
305+ case TypeId::kTimestampTz : {
306+ auto value = std::get<int64_t >(literal.value ());
307+ // Convert milliseconds-since-epoch into a `year_month_day` object
308+ auto ymd = year_month_day (floor<days>(sys_time<milliseconds>(milliseconds{value})));
309+ auto epoch_ymd = year_month_day (year{1970 } / January / 1 );
310+ auto delta = ymd.year () - epoch_ymd.year ();
311+ // Calculate the month as months from 1970-01
312+ // Note: January is month 1, so we subtract 1 to get zero-based
313+ // month count.
314+ return Literal::Int (static_cast <int32_t >(delta.count () * 12 +
315+ static_cast <unsigned >(ymd.month ()) - 1 ));
316+ }
317+ default :
318+ std::unreachable ();
319+ }
320+ }
321+
155322Result<std::shared_ptr<Type>> MonthTransform::ResultType () const {
156323 return iceberg::int32 ();
157324}
@@ -180,6 +347,35 @@ Result<ArrowArray> DayTransform::Transform(const ArrowArray& input) {
180347 return NotImplemented (" DayTransform::Transform" );
181348}
182349
350+ Result<std::optional<Literal>> DayTransform::Transform (const Literal& literal) {
351+ assert (literal.type () == source_type ());
352+ if (literal.IsBelowMin () || literal.IsAboveMax ()) {
353+ return InvalidArgument (
354+ " Cannot apply day transform to literal with value {} of type {}" ,
355+ literal.ToString (), source_type ()->ToString ());
356+ }
357+
358+ using namespace std ::chrono;
359+ switch (source_type ()->type_id ()) {
360+ case TypeId::kDate : {
361+ // Day is the same as the date value
362+ return literal;
363+ }
364+ case TypeId::kTimestamp :
365+ case TypeId::kTimestampTz : {
366+ auto value = std::get<int64_t >(literal.value ());
367+ // Convert milliseconds to `sys_days` (chronological days since epoch)
368+ auto timestamp = sys_time<milliseconds>(milliseconds{value});
369+ auto days_since_epoch = floor<days>(timestamp);
370+
371+ return Literal::Date (
372+ static_cast <int32_t >(days_since_epoch.time_since_epoch ().count ()));
373+ }
374+ default :
375+ std::unreachable ();
376+ }
377+ }
378+
183379Result<std::shared_ptr<Type>> DayTransform::ResultType () const { return iceberg::date (); }
184380
185381Result<std::unique_ptr<TransformFunction>> DayTransform::Make (
@@ -206,6 +402,32 @@ Result<ArrowArray> HourTransform::Transform(const ArrowArray& input) {
206402 return NotImplemented (" HourTransform::Transform" );
207403}
208404
405+ Result<std::optional<Literal>> HourTransform::Transform (const Literal& literal) {
406+ assert (literal.type () == source_type ());
407+ if (literal.IsBelowMin () || literal.IsAboveMax ()) {
408+ return InvalidArgument (
409+ " Cannot apply hour transform to literal with value {} of type {}" ,
410+ literal.ToString (), source_type ()->ToString ());
411+ }
412+
413+ using namespace std ::chrono;
414+ switch (source_type ()->type_id ()) {
415+ case TypeId::kTimestamp :
416+ case TypeId::kTimestampTz : {
417+ auto value = std::get<int64_t >(literal.value ());
418+ // Create a `sys_time` object from the milliseconds value
419+ auto timestamp = sys_time<milliseconds>(milliseconds{value});
420+
421+ // Convert the time since epoch directly into hours
422+ auto hours_since_epoch = duration_cast<hours>(timestamp.time_since_epoch ()).count ();
423+
424+ return Literal::Int (static_cast <int32_t >(hours_since_epoch));
425+ }
426+ default :
427+ std::unreachable ();
428+ }
429+ }
430+
209431Result<std::shared_ptr<Type>> HourTransform::ResultType () const {
210432 return iceberg::int32 ();
211433}
@@ -233,6 +455,10 @@ Result<ArrowArray> VoidTransform::Transform(const ArrowArray& input) {
233455 return NotImplemented (" VoidTransform::Transform" );
234456}
235457
458+ Result<std::optional<Literal>> VoidTransform::Transform (const Literal& literal) {
459+ return std::nullopt ;
460+ }
461+
236462Result<std::shared_ptr<Type>> VoidTransform::ResultType () const { return source_type (); }
237463
238464Result<std::unique_ptr<TransformFunction>> VoidTransform::Make (
0 commit comments