2323#include < chrono>
2424#include < type_traits>
2525#include < utility>
26+ #include < variant>
2627
2728#include " iceberg/type.h"
2829#include " iceberg/util/murmurhash3_internal.h"
@@ -32,9 +33,7 @@ namespace iceberg {
3233IdentityTransform::IdentityTransform (std::shared_ptr<Type> const & source_type)
3334 : TransformFunction(TransformType::kIdentity , source_type) {}
3435
35- Result<std::optional<Literal>> IdentityTransform::Transform (const Literal& literal) {
36- return literal;
37- }
36+ Result<Literal> IdentityTransform::Transform (const Literal& literal) { return literal; }
3837
3938Result<std::shared_ptr<Type>> IdentityTransform::ResultType () const {
4039 return source_type ();
@@ -53,7 +52,7 @@ BucketTransform::BucketTransform(std::shared_ptr<Type> const& source_type,
5352 int32_t num_buckets)
5453 : TransformFunction(TransformType::kBucket , source_type), num_buckets_(num_buckets) {}
5554
56- Result<std::optional< Literal> > BucketTransform::Transform (const Literal& literal) {
55+ Result<Literal> BucketTransform::Transform (const Literal& literal) {
5756 assert (literal.type () == source_type ());
5857 if (literal.IsBelowMin () || literal.IsAboveMax ()) {
5958 return InvalidArgument (
@@ -74,7 +73,8 @@ Result<std::optional<Literal>> BucketTransform::Transform(const Literal& literal
7473 MurmurHash3_x86_32 (value.data (), value.size (), 0 , &hash_value);
7574 } else if constexpr (std::is_same_v<T, std::vector<uint8_t >>) {
7675 MurmurHash3_x86_32 (value.data (), value.size (), 0 , &hash_value);
77- } else if constexpr (std::is_same_v<T, bool > || std::is_same_v<T, float > ||
76+ } else if constexpr (std::is_same_v<T, std::monostate> ||
77+ std::is_same_v<T, bool > || std::is_same_v<T, float > ||
7878 std::is_same_v<T, double > ||
7979 std::is_same_v<T, Literal::BelowMin> ||
8080 std::is_same_v<T, Literal::AboveMax>) {
@@ -128,7 +128,7 @@ TruncateTransform::TruncateTransform(std::shared_ptr<Type> const& source_type,
128128 int32_t width)
129129 : TransformFunction(TransformType::kTruncate , source_type), width_(width) {}
130130
131- Result<std::optional< Literal> > TruncateTransform::Transform (const Literal& literal) {
131+ Result<Literal> TruncateTransform::Transform (const Literal& literal) {
132132 assert (literal.type () == source_type ());
133133 if (literal.IsBelowMin () || literal.IsAboveMax ()) {
134134 return InvalidArgument (
@@ -150,17 +150,25 @@ Result<std::optional<Literal>> TruncateTransform::Transform(const Literal& liter
150150 return NotImplemented (" Truncate for Decimal is not implemented yet" );
151151 }
152152 case TypeId::kString : {
153+ // Strings are truncated to a valid UTF-8 string with no more than L code points.
153154 auto value = std::get<std::string>(literal.value ());
154- if (value.size () > static_cast <size_t >(width_)) {
155- size_t safe_point = width_;
156- while (safe_point > 0 && (value[safe_point] & 0xC0 ) == 0x80 ) {
157- // Find the last valid UTF-8 character boundary before or at width_
158- safe_point--;
155+ size_t code_point_count = 0 ;
156+ size_t safe_point = 0 ;
157+
158+ for (size_t i = 0 ; i < value.size (); ++i) {
159+ // Start of a new UTF-8 code point
160+ if ((value[i] & 0xC0 ) != 0x80 ) {
161+ code_point_count++;
162+ if (code_point_count > static_cast <size_t >(width_)) {
163+ safe_point = i;
164+ break ;
165+ }
159166 }
160- // Resize the string to the safe point
161- value.resize (safe_point);
162167 }
163168
169+ if (safe_point != 0 ) {
170+ value.resize (safe_point); // Resize the string to the safe point
171+ }
164172 return Literal::String (value);
165173 }
166174 case TypeId::kBinary : {
@@ -204,7 +212,7 @@ Result<std::unique_ptr<TransformFunction>> TruncateTransform::Make(
204212YearTransform::YearTransform (std::shared_ptr<Type> const & source_type)
205213 : TransformFunction(TransformType::kTruncate , source_type) {}
206214
207- Result<std::optional< Literal> > YearTransform::Transform (const Literal& literal) {
215+ Result<Literal> YearTransform::Transform (const Literal& literal) {
208216 assert (literal.type () == source_type ());
209217 if (literal.IsBelowMin () || literal.IsAboveMax ()) {
210218 return InvalidArgument (
@@ -256,7 +264,7 @@ Result<std::unique_ptr<TransformFunction>> YearTransform::Make(
256264MonthTransform::MonthTransform (std::shared_ptr<Type> const & source_type)
257265 : TransformFunction(TransformType::kMonth , source_type) {}
258266
259- Result<std::optional< Literal> > MonthTransform::Transform (const Literal& literal) {
267+ Result<Literal> MonthTransform::Transform (const Literal& literal) {
260268 assert (literal.type () == source_type ());
261269 if (literal.IsBelowMin () || literal.IsAboveMax ()) {
262270 return InvalidArgument (
@@ -320,7 +328,7 @@ Result<std::unique_ptr<TransformFunction>> MonthTransform::Make(
320328DayTransform::DayTransform (std::shared_ptr<Type> const & source_type)
321329 : TransformFunction(TransformType::kDay , source_type) {}
322330
323- Result<std::optional< Literal> > DayTransform::Transform (const Literal& literal) {
331+ Result<Literal> DayTransform::Transform (const Literal& literal) {
324332 assert (literal.type () == source_type ());
325333 if (literal.IsBelowMin () || literal.IsAboveMax ()) {
326334 return InvalidArgument (
@@ -371,7 +379,7 @@ Result<std::unique_ptr<TransformFunction>> DayTransform::Make(
371379HourTransform::HourTransform (std::shared_ptr<Type> const & source_type)
372380 : TransformFunction(TransformType::kHour , source_type) {}
373381
374- Result<std::optional< Literal> > HourTransform::Transform (const Literal& literal) {
382+ Result<Literal> HourTransform::Transform (const Literal& literal) {
375383 assert (literal.type () == source_type ());
376384 if (literal.IsBelowMin () || literal.IsAboveMax ()) {
377385 return InvalidArgument (
@@ -420,8 +428,8 @@ Result<std::unique_ptr<TransformFunction>> HourTransform::Make(
420428VoidTransform::VoidTransform (std::shared_ptr<Type> const & source_type)
421429 : TransformFunction(TransformType::kVoid , source_type) {}
422430
423- Result<std::optional< Literal> > VoidTransform::Transform (const Literal& literal) {
424- return std:: nullopt ;
431+ Result<Literal> VoidTransform::Transform (const Literal& literal) {
432+ return Literal::Null () ;
425433}
426434
427435Result<std::shared_ptr<Type>> VoidTransform::ResultType () const { return source_type (); }
0 commit comments