|
| 1 | +/* |
| 2 | + * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | +#include <gtest/gtest.h> |
| 17 | + |
| 18 | +#include "dwio/nimble/common/Buffer.h" |
| 19 | +#include "dwio/nimble/common/tests/TestUtils.h" |
| 20 | +#include "dwio/nimble/index/tests/TabletIndexTestUtils.h" |
| 21 | +#include "dwio/nimble/tablet/TabletReader.h" |
| 22 | +#include "dwio/nimble/tablet/TabletWriter.h" |
| 23 | +#include "dwio/nimble/tablet/tests/TabletTestUtils.h" |
| 24 | +#include "velox/common/file/File.h" |
| 25 | +#include "velox/common/memory/Memory.h" |
| 26 | + |
| 27 | +using namespace facebook; |
| 28 | + |
| 29 | +namespace { |
| 30 | + |
| 31 | +class TabletReaderTest : public ::testing::Test { |
| 32 | + protected: |
| 33 | + using ChunkSpec = nimble::index::test::ChunkSpec; |
| 34 | + using KeyChunkSpec = nimble::index::test::KeyChunkSpec; |
| 35 | + using StreamSpec = nimble::index::test::StreamSpec; |
| 36 | + |
| 37 | + static void SetUpTestCase() { |
| 38 | + // Initialize a fresh global MemoryManager instance for tests to ensure |
| 39 | + // deterministic behavior and isolation across unit tests. |
| 40 | + velox::memory::MemoryManager::testingSetInstance({}); |
| 41 | + } |
| 42 | + |
| 43 | + void SetUp() override { |
| 44 | + // Per-test setup hook. No-op for now, reserved for future initialization. |
| 45 | + } |
| 46 | + |
| 47 | + // Test case describing the expected outcome of a key lookup against the |
| 48 | + // tablet index. If expectedStripeIndex is std::nullopt, no matching stripe |
| 49 | + // should be found for the given key. |
| 50 | + struct LookupTestCase { |
| 51 | + std::string key; // lookup key (serialized) |
| 52 | + std::optional<uint32_t> |
| 53 | + expectedStripeIndex; // expected stripe index or nullopt if no match |
| 54 | + }; |
| 55 | + |
| 56 | + // Test case for verifying StripeIndexGroup::lookupChunk. This validates both |
| 57 | + // which stripe should contain the key and the global file row ID where the |
| 58 | + // key-range begins (relative to the start of the file across all stripes). |
| 59 | + struct KeyLookupTestCase { |
| 60 | + std::string key; // key to look up (matches key encoding in KeyStream) |
| 61 | + uint32_t expectedStripeIndex; // stripe index expected to contain the key |
| 62 | + uint32_t |
| 63 | + expectedFileRowId; // absolute row id in the file where the chunk starts |
| 64 | + }; |
| 65 | + |
| 66 | + // Create a single data Stream from a specification using shared test |
| 67 | + // utilities. The spec includes stream offset and chunk metadata (rowCount, |
| 68 | + // size). |
| 69 | + static nimble::Stream createStream( |
| 70 | + nimble::Buffer& buffer, |
| 71 | + const StreamSpec& spec) { |
| 72 | + return nimble::index::test::createStream(buffer, spec); |
| 73 | + } |
| 74 | + |
| 75 | + // Create multiple data Streams from a list of specifications. Reserves |
| 76 | + // capacity to avoid reallocations and uses createStream(...) for each entry. |
| 77 | + static std::vector<nimble::Stream> createStreams( |
| 78 | + nimble::Buffer& buffer, |
| 79 | + const std::vector<StreamSpec>& specs) { |
| 80 | + std::vector<nimble::Stream> streams; |
| 81 | + streams.reserve(specs.size()); |
| 82 | + for (const auto& spec : specs) { |
| 83 | + streams.push_back(createStream(buffer, spec)); |
| 84 | + } |
| 85 | + return streams; |
| 86 | + } |
| 87 | + |
| 88 | + // Create a KeyStream from chunk specifications. Each KeyChunkSpec defines the |
| 89 | + // row count and [minKey, maxKey] range for a chunk used by the index. |
| 90 | + static nimble::KeyStream createKeyStream( |
| 91 | + nimble::Buffer& buffer, |
| 92 | + const std::vector<KeyChunkSpec>& chunkSpecs) { |
| 93 | + return nimble::index::test::createKeyStream(buffer, chunkSpecs); |
| 94 | + } |
| 95 | + |
| 96 | + // Memory pools used by writer/reader and test buffers. rootPool_ owns pool_. |
| 97 | + std::shared_ptr<velox::memory::MemoryPool> rootPool_{ |
| 98 | + velox::memory::memoryManager()->addRootPool("TabletTest")}; |
| 99 | + std::shared_ptr<velox::memory::MemoryPool> pool_{ |
| 100 | + rootPool_->addLeafChild("TabletTest")}; |
| 101 | +}; |
| 102 | + |
| 103 | +TEST_F(TabletReaderTest, SimpleStreamReader) { |
| 104 | + std::string file; |
| 105 | + // Write-side in-memory file; backed by a std::string to capture the |
| 106 | + // serialized tablet. |
| 107 | + velox::InMemoryWriteFile writeFile(&file); |
| 108 | + |
| 109 | + // Configure an index over two columns with ascending order and nulls-first. |
| 110 | + // enforceKeyOrder ensures input rows respect the sorted key order per stripe. |
| 111 | + nimble::TabletIndexConfig indexConfig{ |
| 112 | + .columns = {"col1", "col2"}, |
| 113 | + .sortOrders = {velox::core::kAscNullsFirst, velox::core::kAscNullsFirst}, |
| 114 | + .enforceKeyOrder = true, |
| 115 | + }; |
| 116 | + |
| 117 | + // Create a tablet writer. metadataFlushThreshold=0 forces metadata to flush |
| 118 | + // after each stripe for easier testing. Stream deduplication disabled to |
| 119 | + // keep stream layout predictable. |
| 120 | + auto tabletWriter = nimble::TabletWriter::create( |
| 121 | + &writeFile, |
| 122 | + *pool_, |
| 123 | + { |
| 124 | + // Force metadata flush after every stripe to simplify verification. |
| 125 | + .metadataFlushThreshold = 0, |
| 126 | + // Keep each stream distinct (no dedup) for deterministic |
| 127 | + // sizes/offsets. |
| 128 | + .streamDeduplicationEnabled = false, |
| 129 | + .indexConfig = indexConfig, |
| 130 | + }); |
| 131 | + |
| 132 | + // Shared buffer used by test helpers to assemble streams/key streams. |
| 133 | + nimble::Buffer buffer{*pool_}; |
| 134 | + |
| 135 | + // Write stripe 0 (Stripe Group 0) |
| 136 | + // - Total rows in stripe: 100 |
| 137 | + // - Stream 0 has 2 chunks, each with 50 rows (sizes 10 and 12 bytes) |
| 138 | + |
| 139 | + // - KeyStream encodes two key chunks: [aaa, bbb] for first 50 rows, |
| 140 | + // and [bbb, ccc] for the next 50 rows. |
| 141 | + { |
| 142 | + auto streams = createStreams( |
| 143 | + buffer, |
| 144 | + { |
| 145 | + {.offset = 0, |
| 146 | + .chunks = |
| 147 | + { |
| 148 | + {.rowCount = 50, .size = 10}, |
| 149 | + {.rowCount = 50, .size = 12}, |
| 150 | + }}, |
| 151 | + }); |
| 152 | + |
| 153 | + auto keyStream = createKeyStream( |
| 154 | + buffer, |
| 155 | + { |
| 156 | + // First 50 rows span keys ["aaa", "bbb"]; second 50 span ["bbb", |
| 157 | + // "ccc"]. |
| 158 | + {50, "aaa", "bbb"}, |
| 159 | + {50, "bbb", "ccc"}, |
| 160 | + }); |
| 161 | + |
| 162 | + // Persist stripe 0: write its streams and key metadata into the tablet. |
| 163 | + tabletWriter->writeStripe(100, std::move(streams), std::move(keyStream)); |
| 164 | + } |
| 165 | + |
| 166 | + // Finalize writer and seal the in-memory file contents. |
| 167 | + tabletWriter->close(); |
| 168 | + writeFile.close(); |
| 169 | + |
| 170 | + // Read back the tablet from the in-memory buffer to validate invariants. |
| 171 | + nimble::testing::InMemoryTrackableReadFile readFile(file, false); |
| 172 | + auto tablet = nimble::TabletReader::create(&readFile, *pool_); |
| 173 | + |
| 174 | + // Helper exposes internal details for test assertions (e.g., stripe groups). |
| 175 | + nimble::test::TabletReaderTestHelper tabletHelper(tablet.get()); |
| 176 | + EXPECT_EQ(tabletHelper.numStripeGroups(), 1) |
| 177 | + << "Expected a single stripe group"; |
| 178 | + |
| 179 | + // Basic sanity checks on tablet properties. |
| 180 | + EXPECT_EQ(tablet->stripeCount(), 1) << "One stripe should have been written"; |
| 181 | + EXPECT_EQ(tablet->tabletRowCount(), 100) |
| 182 | + << "Total row count across all stripes"; |
| 183 | + EXPECT_EQ(tablet->stripeRowCount(0), 100) |
| 184 | + << "Stripe 0 should contain 100 rows"; |
| 185 | +} |
| 186 | + |
| 187 | +} // namespace |
0 commit comments