2727
2828#include " iceberg/type.h"
2929#include " iceberg/util/murmurhash3_internal.h"
30+ #include " iceberg/util/truncate_utils.h"
3031
3132namespace iceberg {
3233
@@ -57,7 +58,7 @@ Result<Literal> BucketTransform::Transform(const Literal& literal) {
5758 " Cannot apply bucket transform to literal with value {} of type {}" ,
5859 literal.ToString (), source_type ()->ToString ());
5960 }
60- if (literal.IsNull ()) {
61+ if (literal.IsNull ()) [[unlikely]] {
6162 return Literal::Null (iceberg::int32 ());
6263 }
6364
@@ -135,19 +136,19 @@ Result<Literal> TruncateTransform::Transform(const Literal& literal) {
135136 " Cannot apply truncate transform to literal with value {} of type {}" ,
136137 literal.ToString (), source_type ()->ToString ());
137138 }
138- if (literal.IsNull ()) {
139+ if (literal.IsNull ()) [[unlikely]] {
139140 // Return null as is
140141 return literal;
141142 }
142143
143144 switch (source_type ()->type_id ()) {
144145 case TypeId::kInt : {
145146 auto value = std::get<int32_t >(literal.value ());
146- return Literal::Int (value - ((( value % width_) + width_) % width_));
147+ return Literal::Int (TruncateUtils::TruncateInt ( value, width_));
147148 }
148149 case TypeId::kLong : {
149150 auto value = std::get<int64_t >(literal.value ());
150- return Literal::Long (value - ((( value % width_) + width_) % width_));
151+ return Literal::Long (TruncateUtils::TruncateLong ( value, width_));
151152 }
152153 case TypeId::kDecimal : {
153154 // TODO(zhjwpku): Handle decimal truncation logic here
@@ -156,26 +157,11 @@ Result<Literal> TruncateTransform::Transform(const Literal& literal) {
156157 case TypeId::kString : {
157158 // Strings are truncated to a valid UTF-8 string with no more than L code points.
158159 auto value = std::get<std::string>(literal.value ());
159- size_t code_point_count = 0 ;
160- size_t safe_point = 0 ;
161-
162- for (size_t i = 0 ; i < value.size (); ++i) {
163- // Start of a new UTF-8 code point
164- if ((value[i] & 0xC0 ) != 0x80 ) {
165- code_point_count++;
166- if (code_point_count > static_cast <size_t >(width_)) {
167- safe_point = i;
168- break ;
169- }
170- }
171- }
172-
173- if (safe_point != 0 ) {
174- value.resize (safe_point); // Resize the string to the safe point
175- }
176- return Literal::String (value);
160+ return Literal::String (TruncateUtils::TruncateUTF8 (std::move (value), width_));
177161 }
178162 case TypeId::kBinary : {
163+ // / In contrast to strings, binary values do not have an assumed encoding and are
164+ // / truncated to L bytes.
179165 auto value = std::get<std::vector<uint8_t >>(literal.value ());
180166 if (value.size () > static_cast <size_t >(width_)) {
181167 value.resize (width_);
@@ -221,7 +207,7 @@ Result<Literal> YearTransform::Transform(const Literal& literal) {
221207 " Cannot apply year transform to literal with value {} of type {}" ,
222208 literal.ToString (), source_type ()->ToString ());
223209 }
224- if (literal.IsNull ()) {
210+ if (literal.IsNull ()) [[unlikely]] {
225211 return Literal::Null (iceberg::int32 ());
226212 }
227213
@@ -274,7 +260,7 @@ Result<Literal> MonthTransform::Transform(const Literal& literal) {
274260 " Cannot apply month transform to literal with value {} of type {}" ,
275261 literal.ToString (), source_type ()->ToString ());
276262 }
277- if (literal.IsNull ()) {
263+ if (literal.IsNull ()) [[unlikely]] {
278264 return Literal::Null (iceberg::int32 ());
279265 }
280266
@@ -339,7 +325,7 @@ Result<Literal> DayTransform::Transform(const Literal& literal) {
339325 " Cannot apply day transform to literal with value {} of type {}" ,
340326 literal.ToString (), source_type ()->ToString ());
341327 }
342- if (literal.IsNull ()) {
328+ if (literal.IsNull ()) [[unlikely]] {
343329 return Literal::Null (iceberg::int32 ());
344330 }
345331
@@ -394,7 +380,7 @@ Result<Literal> HourTransform::Transform(const Literal& literal) {
394380 literal.ToString (), source_type ()->ToString ());
395381 }
396382
397- if (literal.IsNull ()) {
383+ if (literal.IsNull ()) [[unlikely]] {
398384 return Literal::Null (int32 ());
399385 }
400386
0 commit comments