diff --git a/CMakeLists.txt b/CMakeLists.txt index c5751a6..2df211e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -173,6 +173,8 @@ if(FASTPFOR_WITH_TEST) unittest/test_composite.cpp unittest/test_driver.cpp unittest/test_fastpfor.cpp + unittest/test_simple8b.cpp + unittest/test_simple16.cpp unittest/test_variablebyte.cpp) target_link_libraries(FastPFOR_unittest gtest FastPFOR) enable_testing() diff --git a/headers/codecs.h b/headers/codecs.h index adfcec1..a8681c5 100644 --- a/headers/codecs.h +++ b/headers/codecs.h @@ -53,6 +53,10 @@ class IntegerCODEC { * of the variable nvalue gets updated with the number actually use * (if nvalue exceeds the original value, there might be a buffer * overrun). + * + * NOTE: Decoding can be performed with an unknown input length. This + * case is indicated by a length of 0; however, nvalue must be provided + * in order for the decoder knows how many values to decode. */ virtual const uint32_t *decodeArray(const uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue) = 0; diff --git a/headers/simple16.h b/headers/simple16.h index cd4e06a..e8e3423 100644 --- a/headers/simple16.h +++ b/headers/simple16.h @@ -745,7 +745,7 @@ const uint32_t *Simple16::decodeArray(const uint32_t *in, printf("simple16 stats[%u]=%f\n", k, stats[k] * 1.0 / sum); } #endif - ASSERT(in <= endin, std::to_string(in - endin)); + ASSERT(len == 0 || in <= endin, std::to_string(in - endin)); return in; } diff --git a/headers/simple8b.h b/headers/simple8b.h index 3bb2a5d..4e4874c 100644 --- a/headers/simple8b.h +++ b/headers/simple8b.h @@ -637,9 +637,9 @@ const uint32_t *Simple8b::decodeArray(const uint32_t *in, printf("simple8b stats[%u]=%f\n", k, stats[k] * 1.0 / sum); } #endif - assert(in64 <= finalin64); + assert(len == 0 || in64 <= finalin64); in = reinterpret_cast(in64); - assert(in <= endin); + assert(len == 0 || in <= endin); // check that we don't overrun the buffer too much? ASSERT(out < end + 240, std::to_string(out - end)); nvalue = MarkLength ? actualvalue : out - initout; diff --git a/unittest/test_simple16.cpp b/unittest/test_simple16.cpp new file mode 100644 index 0000000..333bcb6 --- /dev/null +++ b/unittest/test_simple16.cpp @@ -0,0 +1,20 @@ +#include + +#include "simple16.h" +#include "util.h" + +namespace FastPForLib { + +TEST(Simple16Test, DecodesWithUnknownLength) { + Simple16 codec; + std::vector in; + for (uint32_t i = 0; i < 128; ++i) { + in.push_back(i); + } + + // Simple16 may overrun the output buffer regardless of `n`, so a headroom of + // 28 (the maximum number of elements in a single pack) is added. + std::vector decoded(in.size() + 28, 0); + verifyUnknownInputLengthDecode(codec, in, decoded); +} +} // namespace FastPForLib diff --git a/unittest/test_simple8b.cpp b/unittest/test_simple8b.cpp new file mode 100644 index 0000000..00f84dc --- /dev/null +++ b/unittest/test_simple8b.cpp @@ -0,0 +1,18 @@ +#include + +#include "simple8b.h" +#include "util.h" + +namespace FastPForLib { + +TEST(Simple8bTest, DecodesWithUnknownLength) { + Simple8b codec; + std::vector in; + for (uint32_t i = 0; i < 128; ++i) { + in.push_back(i); + } + + std::vector decoded(in.size(), 0); + verifyUnknownInputLengthDecode(codec, in, decoded); +} +} // namespace FastPForLib diff --git a/unittest/util.h b/unittest/util.h new file mode 100644 index 0000000..4316d15 --- /dev/null +++ b/unittest/util.h @@ -0,0 +1,38 @@ +#ifndef FASTPFORLIB_UTIL_H_ +#define FASTPFORLIB_UTIL_H_ + +#include + +#include "gtest/gtest.h" + +namespace FastPForLib { + +template +void verifyUnknownInputLengthDecode(Codec &codec, + const std::vector &in, + std::vector &decoded) { + std::vector encoded(in.size() * 2, 0); + size_t encodedSize; + codec.encodeArray(in.data(), in.size(), encoded.data(), encodedSize); + encoded.resize(encodedSize); + + size_t n = in.size(); + const uint32_t *decodedUntil = + codec.decodeArray(encoded.data(), 0, decoded.data(), n); + decoded.resize(n); + + // Check that the decoded size matches the input size. + EXPECT_EQ(n, in.size()); + + // Check that the returned pointer matches the end of the encoded buffer. + EXPECT_EQ(decodedUntil, encoded.data() + encodedSize); + + // Check each decoded element matches its corresponding input value. + for (size_t i = 0; i < in.size(); ++i) { + EXPECT_EQ(in[i], decoded[i]); + } +} + +} // namespace FastPForLib + +#endif // FASTPFORLIB_UTIL_H_