feat: implement endian conversion utilities

HeartLinked · HeartLinked · commit 98984a60a282 · 2025-08-27T10:53:00.000+08:00
diff --git a/src/iceberg/util/endian.h b/src/iceberg/util/endian.h
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <bit>
+#include <concepts>
+
+/// \file iceberg/util/endian.h
+/// \brief Endianness conversion utilities
+
+namespace iceberg {
+
+/// \brief Concept for values that can be written in little-endian format.
+template <typename T>
+concept EndianConvertible = std::is_arithmetic_v<T> && !std::same_as<T, bool>;
+
+/// \brief Convert a value to little-endian format.
+template <EndianConvertible T>
+constexpr T ToLittleEndian(T value) {
+  if constexpr (std::endian::native == std::endian::little || sizeof(T) <= 1) {
+    return value;
+  } else {
+    if constexpr (std::is_integral_v<T>) {
+      return std::byteswap(value);
+    } else if constexpr (std::is_floating_point_v<T>) {
+      // For floats, use the bit_cast -> byteswap -> bit_cast pattern.
+      if constexpr (sizeof(T) == sizeof(uint32_t)) {
+        uint32_t int_representation = std::bit_cast<uint32_t>(value);
+        int_representation = std::byteswap(int_representation);
+        return std::bit_cast<T>(int_representation);
+      } else if constexpr (sizeof(T) == sizeof(uint64_t)) {
+        uint64_t int_representation = std::bit_cast<uint64_t>(value);
+        int_representation = std::byteswap(int_representation);
+        return std::bit_cast<T>(int_representation);
+      }
+    }
+  }
+}
+
+/// \brief Convert a value from little-endian format.
+template <EndianConvertible T>
+constexpr T FromLittleEndian(T value) {
+  if constexpr (std::endian::native == std::endian::little || sizeof(T) <= 1) {
+    return value;
+  } else {
+    if constexpr (std::is_integral_v<T>) {
+      return std::byteswap(value);
+    } else if constexpr (std::is_floating_point_v<T>) {
+      // For floats, use the bit_cast -> byteswap -> bit_cast pattern.
+      if constexpr (sizeof(T) == sizeof(uint32_t)) {
+        uint32_t int_representation = std::bit_cast<uint32_t>(value);
+        int_representation = std::byteswap(int_representation);
+        return std::bit_cast<T>(int_representation);
+      } else if constexpr (sizeof(T) == sizeof(uint64_t)) {
+        uint64_t int_representation = std::bit_cast<uint64_t>(value);
+        int_representation = std::byteswap(int_representation);
+        return std::bit_cast<T>(int_representation);
+      }
+    }
+  }
+}
+
+/// \brief Convert a value to big-endian format.
+template <EndianConvertible T>
+constexpr T ToBigEndian(T value) {
+  if constexpr (std::endian::native == std::endian::big || sizeof(T) <= 1) {
+    return value;
+  } else {
+    if constexpr (std::is_integral_v<T>) {
+      return std::byteswap(value);
+    } else if constexpr (std::is_floating_point_v<T>) {
+      if constexpr (sizeof(T) == sizeof(uint32_t)) {
+        uint32_t int_representation = std::bit_cast<uint32_t>(value);
+        int_representation = std::byteswap(int_representation);
+        return std::bit_cast<T>(int_representation);
+      } else if constexpr (sizeof(T) == sizeof(uint64_t)) {
+        uint64_t int_representation = std::bit_cast<uint64_t>(value);
+        int_representation = std::byteswap(int_representation);
+        return std::bit_cast<T>(int_representation);
+      }
+    }
+  }
+}
+
+/// \brief Convert a value from big-endian format.
+template <EndianConvertible T>
+constexpr T FromBigEndian(T value) {
+  if constexpr (std::endian::native == std::endian::big || sizeof(T) <= 1) {
+    return value;
+  } else {
+    if constexpr (std::is_integral_v<T>) {
+      return std::byteswap(value);
+    } else if constexpr (std::is_floating_point_v<T>) {
+      if constexpr (sizeof(T) == sizeof(uint32_t)) {
+        uint32_t int_representation = std::bit_cast<uint32_t>(value);
+        int_representation = std::byteswap(int_representation);
+        return std::bit_cast<T>(int_representation);
+      } else if constexpr (sizeof(T) == sizeof(uint64_t)) {
+        uint64_t int_representation = std::bit_cast<uint64_t>(value);
+        int_representation = std::byteswap(int_representation);
+        return std::bit_cast<T>(int_representation);
+      }
+    }
+  }
+}
+
+}  // namespace iceberg
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -89,7 +89,8 @@ add_iceberg_test(util_test
                  formatter_test.cc
                  config_test.cc
                  visit_type_test.cc
-                 string_utils_test.cc)
+                 string_utils_test.cc
+                 endian_test.cc)
 
 if(ICEBERG_BUILD_BUNDLE)
   add_iceberg_test(avro_test
diff --git a/test/endian_test.cc b/test/endian_test.cc
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/endian.h"
+
+#include <array>
+#include <cmath>
+#include <limits>
+
+#include <gtest/gtest.h>
+
+namespace iceberg {
+
+// test round trip preserves value
+TEST(EndianTest, RoundTripPreservesValue) {
+  EXPECT_EQ(FromLittleEndian(ToLittleEndian<uint16_t>(0x1234)), 0x1234);
+  EXPECT_EQ(FromBigEndian(ToBigEndian<uint32_t>(0xDEADBEEF)), 0xDEADBEEF);
+  EXPECT_EQ(FromLittleEndian(ToLittleEndian(std::numeric_limits<uint64_t>::max())),
+            std::numeric_limits<uint64_t>::max());
+  EXPECT_EQ(FromBigEndian(ToBigEndian<uint32_t>(0)), 0);
+
+  EXPECT_EQ(FromBigEndian(ToBigEndian<int16_t>(-1)), -1);
+  EXPECT_EQ(FromLittleEndian(ToLittleEndian<int32_t>(-0x12345678)), -0x12345678);
+  EXPECT_EQ(FromBigEndian(ToBigEndian(std::numeric_limits<int64_t>::min())),
+            std::numeric_limits<int64_t>::min());
+  EXPECT_EQ(FromLittleEndian(ToLittleEndian(std::numeric_limits<int16_t>::max())),
+            std::numeric_limits<int16_t>::max());
+
+  EXPECT_EQ(FromLittleEndian(ToLittleEndian(3.14f)), 3.14f);
+  EXPECT_EQ(FromBigEndian(ToBigEndian(2.718281828459045)), 2.718281828459045);
+
+  EXPECT_EQ(FromLittleEndian(ToLittleEndian(std::numeric_limits<float>::infinity())),
+            std::numeric_limits<float>::infinity());
+  EXPECT_EQ(FromBigEndian(ToBigEndian(-std::numeric_limits<float>::infinity())),
+            -std::numeric_limits<float>::infinity());
+  EXPECT_TRUE(std::isnan(
+      FromLittleEndian(ToLittleEndian(std::numeric_limits<float>::quiet_NaN()))));
+  EXPECT_EQ(FromBigEndian(ToBigEndian(0.0f)), 0.0f);
+  EXPECT_EQ(FromLittleEndian(ToLittleEndian(-0.0f)), -0.0f);
+
+  EXPECT_EQ(FromBigEndian(ToBigEndian(std::numeric_limits<double>::infinity())),
+            std::numeric_limits<double>::infinity());
+  EXPECT_EQ(FromLittleEndian(ToLittleEndian(-std::numeric_limits<double>::infinity())),
+            -std::numeric_limits<double>::infinity());
+  EXPECT_TRUE(
+      std::isnan(FromBigEndian(ToBigEndian(std::numeric_limits<double>::quiet_NaN()))));
+  EXPECT_EQ(FromLittleEndian(ToLittleEndian(0.0)), 0.0);
+  EXPECT_EQ(FromBigEndian(ToBigEndian(-0.0)), -0.0);
+}
+
+// test constexpr evaluation
+TEST(EndianTest, ConstexprEvaluation) {
+  static_assert(FromBigEndian(ToBigEndian<uint16_t>(0x1234)) == 0x1234);
+  static_assert(FromLittleEndian(ToLittleEndian<uint32_t>(0x12345678)) == 0x12345678);
+  static_assert(FromBigEndian(ToBigEndian<int64_t>(-1)) == -1);
+
+  static_assert(ToBigEndian<uint8_t>(0xFF) == 0xFF);
+  static_assert(FromLittleEndian<int8_t>(-1) == -1);
+
+  static_assert(FromLittleEndian(ToLittleEndian(3.14f)) == 3.14f);
+  static_assert(FromBigEndian(ToBigEndian(2.718)) == 2.718);
+}
+
+// test platform dependent behavior
+TEST(EndianTest, PlatformDependentBehavior) {
+  uint32_t test_value = 0x12345678;
+
+  if constexpr (std::endian::native == std::endian::little) {
+    EXPECT_EQ(ToLittleEndian(test_value), test_value);
+    EXPECT_EQ(FromLittleEndian(test_value), test_value);
+    EXPECT_NE(ToBigEndian(test_value), test_value);
+  } else if constexpr (std::endian::native == std::endian::big) {
+    EXPECT_EQ(ToBigEndian(test_value), test_value);
+    EXPECT_EQ(FromBigEndian(test_value), test_value);
+    EXPECT_NE(ToLittleEndian(test_value), test_value);
+  }
+
+  EXPECT_EQ(ToLittleEndian<uint8_t>(0xAB), 0xAB);
+  EXPECT_EQ(ToBigEndian<uint8_t>(0xAB), 0xAB);
+}
+
+// test specific byte pattern validation
+TEST(EndianTest, SpecificBytePatternValidation) {
+  uint32_t original_int = 0x12345678;
+  uint32_t little_endian_int = ToLittleEndian(original_int);
+  uint32_t big_endian_int = ToBigEndian(original_int);
+
+  auto little_int_bytes = std::bit_cast<std::array<uint8_t, 4>>(little_endian_int);
+  auto big_int_bytes = std::bit_cast<std::array<uint8_t, 4>>(big_endian_int);
+
+  EXPECT_EQ(little_int_bytes, (std::array<uint8_t, 4>{0x78, 0x56, 0x34, 0x12}));
+  EXPECT_EQ(big_int_bytes, (std::array<uint8_t, 4>{0x12, 0x34, 0x56, 0x78}));
+
+  float original_float = 3.14f;
+  float little_endian_float = ToLittleEndian(original_float);
+  float big_endian_float = ToBigEndian(original_float);
+
+  auto little_float_bytes = std::bit_cast<std::array<uint8_t, 4>>(little_endian_float);
+  auto big_float_bytes = std::bit_cast<std::array<uint8_t, 4>>(big_endian_float);
+
+  EXPECT_EQ(little_float_bytes, (std::array<uint8_t, 4>{0xC3, 0xF5, 0x48, 0x40}));
+  EXPECT_EQ(big_float_bytes, (std::array<uint8_t, 4>{0x40, 0x48, 0xF5, 0xC3}));
+}
+
+}  // namespace iceberg