Update UVarInt implementation (#40)

sergkaprovich · web-flow · commit 3a086bb7b80a · 2019-12-18T10:57:50.000+03:00
Signed-off-by: SergKaprovich &lt;serginetty@gmail.com&gt;
diff --git a/include/libp2p/multi/uvarint.hpp b/include/libp2p/multi/uvarint.hpp
@@ -16,12 +16,8 @@
 namespace libp2p::multi {
 
   /**
-   * A C++ wrapper for varint encoding implementation, which can be found in
-   * c-utils library. Encodes unsigned integers into variable-length byte
-   * arrays, efficient, having both an ability to store large numbers and not
-   * wasting space on small ones. Mind that the maximum length of a varint is 8
-   * bytes and it can store only unsigned integers
-   * @see https://github.com/multiformats/unsigned-varint
+   * @class Encodes and decodes unsigned integers into and from
+   * variable-length byte arrays using LEB128 algorithm.
    */
   class UVarint {
    public:
@@ -86,7 +82,7 @@ namespace libp2p::multi {
 
    private:
     /// private ctor for unsafe creation
-    UVarint(gsl::span<const uint8_t> varint_bytes, int64_t varint_size);
+    UVarint(gsl::span<const uint8_t> varint_bytes, size_t varint_size);
 
     std::vector<uint8_t> bytes_{};
   };
diff --git a/src/multi/uvarint.cpp b/src/multi/uvarint.cpp
@@ -11,53 +11,39 @@ namespace libp2p::multi {
   using common::hex_upper;
 
   UVarint::UVarint(uint64_t number) {
-    bytes_.resize(8);
-    size_t i = 0;
-    size_t size = 0;
-    for (; i < 8; i++) {
-      bytes_[i] = static_cast<uint8_t>((number & 0xFFul) | 0x80ul);
-      number >>= 7ul;
-      if (number == 0) {
-        bytes_[i] &= 0x7Ful;
-        size = i + 1;
-        break;
-      }
-    }
-    bytes_.resize(size);
+    do {
+      uint8_t byte = static_cast<uint8_t>(number) & 0x7f;
+      number >>= 7;
+      if (number != 0)
+        byte |= 0x80;
+      bytes_.push_back(byte);
+    } while (number != 0);
   }
 
   UVarint::UVarint(gsl::span<const uint8_t> varint_bytes)
       : bytes_(varint_bytes.begin(),
                varint_bytes.begin() + calculateSize(varint_bytes)) {}
 
-  UVarint::UVarint(gsl::span<const uint8_t> varint_bytes, int64_t varint_size)
+  UVarint::UVarint(gsl::span<const uint8_t> varint_bytes, size_t varint_size)
       : bytes_(varint_bytes.begin(), varint_bytes.begin() + varint_size) {}
 
   boost::optional<UVarint> UVarint::create(
       gsl::span<const uint8_t> varint_bytes) {
-    if (varint_bytes.empty()) {
-      return {};
+    size_t size = calculateSize(varint_bytes);
+    if (size > 0) {
+      return UVarint{varint_bytes, size};
     }
-    // no use of calculateSize(..), as it is unsafe in this case
-    int64_t s = 0;
-    while ((varint_bytes[s] & 0x80u) != 0) {
-      ++s;
-      if (s >= varint_bytes.size()) {
-        return {};
-      }
-    }
-    return UVarint{varint_bytes, s + 1};
+    return {};
   }
 
   uint64_t UVarint::toUInt64() const {
     uint64_t res = 0;
-    for (size_t i = 0; i < 8 && i < bytes_.size(); i++) {
-      res |= ((bytes_[i] & 0x7ful) << (7 * i));
-      if ((bytes_[i] & 0x80ul) == 0) {
-        return res;
-      }
+    size_t index = 0;
+    for (const auto &byte : bytes_) {
+      res += static_cast<uint64_t>((byte & 0x7f)) << index;
+      index += 7;
     }
-    return -1;
+    return res;
   }
 
   gsl::span<const uint8_t> UVarint::toBytes() const {
@@ -91,12 +77,24 @@ namespace libp2p::multi {
   }
 
   size_t UVarint::calculateSize(gsl::span<const uint8_t> varint_bytes) {
-    size_t s = 0;
-
-    while ((varint_bytes[s] & 0x80u) != 0) {
-      s++;
+    size_t size = 0;
+    size_t shift = 0;
+    constexpr size_t capacity = sizeof(uint64_t) * 8;
+    bool last_byte_found = false;
+    for (const auto &byte : varint_bytes) {
+      ++size;
+      uint64_t slice = byte & 0x7f;
+      if (shift >= capacity || slice << shift >> shift != slice) {
+        size = 0;
+        break;
+      }
+      if ((byte & 0x80) == 0) {
+        last_byte_found = true;
+        break;
+      }
+      shift += 7;
     }
-    return s + 1;
+    return last_byte_found ? size : 0;
   }
 
 }  // namespace libp2p::multi
diff --git a/test/libp2p/multi/utils/uvarint_test.cpp b/test/libp2p/multi/utils/uvarint_test.cpp
@@ -61,3 +61,43 @@ TEST(UVarint, CalculateSize) {
   uint8_t another_bytes[] = {0x71, 0xA3, 0x75, 0x43, 0xAA};
   ASSERT_EQ(UVarint::calculateSize(gsl::span(another_bytes, 5)), 1);
 }
+
+/**
+ * @given Sample integers from (N/2, N)
+ * @when Encoding and decoding back sample integer
+ * @then Encoding/decoding must be successful without losses of data
+ */
+TEST(UVarint, ReversibilitySuccess) {
+  uint64_t max = std::numeric_limits<uint64_t>::max() / 2;
+  for (uint64_t data = 2; data < max; data *= 2) {
+    UVarint var{data};
+    uint64_t decoded = var.toUInt64();
+    ASSERT_EQ(data, decoded);
+  }
+}
+
+/**
+ * @given Minimum and maximum possible values
+ * @when Encoding and decoding back sample integer
+ * @then Encoding/decoding must be successful without losses of data
+ */
+TEST(UVarint, EncodeLimitsAreCorrect) {
+  uint64_t min = std::numeric_limits<uint64_t>::min();
+  uint64_t max = std::numeric_limits<uint64_t>::max();
+  UVarint var_min{min};
+  UVarint var_max{max};
+  ASSERT_EQ(min, var_min.toUInt64());
+  ASSERT_EQ(max, var_max.toUInt64());
+}
+
+/**
+ * @given Encoded 2^64 value bytes (max size of uint64_t is 2^64-1)
+ * @when Creating new UVarint from raw bytes
+ * @then Decoding raw bytes must be failed
+ */
+TEST(UVaring, DecodeOverflowFailure) {
+  std::vector<uint8_t> overflow_encoded_data{
+      {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02}};
+  auto var = UVarint::create(overflow_encoded_data);
+  ASSERT_FALSE(var);
+}
diff --git a/test/libp2p/multi/uvarint_test.cpp b/test/libp2p/multi/uvarint_test.cpp