Skip to content

Commit 43994c7

Browse files
xiaoxmengmeta-codesync[bot]
authored andcommitted
feat: Improve index processing and code refactor (#385)
Summary: Pull Request resolved: #385 Move StripeIndexGroup and TabletIndex to facebook::nimble::index namespace for better organization Add NIMBLE_USER_CHECK macro for user-facing validation errors Improve error messages in EncodingFactory to include encoding type Optimize StripePositionIndex by using accumulated chunk counts (prefix sums) instead of separate count and index arrays Add IndexConfig.cpp implementation file and move defaultEncodingLayout out of header Add test utilities (TestChunkDecoder, SingleChunkDecoder, StripeIndexGroupTestHelper) for chunk decoding and index testing Add TabletReaderTestHelper for testing stripe/index group caching behavior Reviewed By: HuamengJiang Differential Revision: D89835718 fbshipit-source-id: 034d70dc66b3ac05629dadea4b1778656a0fa884
1 parent 075e136 commit 43994c7

22 files changed

+560
-124
lines changed

dwio/nimble/common/Exceptions.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,10 @@ NIMBLE_DECLARE_CHECK_FAIL_TEMPLATES(::facebook::nimble::NimbleInternalError);
157157

158158
#define NIMBLE_CHECK(expr, ...) _NIMBLE_CHECK_IMPL(expr, #expr, ##__VA_ARGS__)
159159

160-
// Verify an expected file format conditions. Failure of this condition means
160+
#define NIMBLE_USER_CHECK(expr, ...) \
161+
_NIMBLE_USER_CHECK_IMPL(expr, #expr, ##__VA_ARGS__)
162+
163+
// Verify an expected file format conditions.
161164
// the file is corrupted (e.g. passed magic number and version verification, but
162165
// got unexpected format). This will trigger a user error.
163166
#define NIMBLE_CHECK_FILE(condition, ...) \

dwio/nimble/encodings/EncodingFactory.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,8 @@ std::unique_ptr<Encoding> EncodingFactory::decode(
190190
}
191191
default: {
192192
NIMBLE_UNREACHABLE(
193-
"Trying to deserialize invalid EncodingType -- garbage input?");
193+
"Trying to deserialize invalid EncodingType:{} -- garbage input?",
194+
static_cast<int>(encodingType));
194195
}
195196
}
196197
}

dwio/nimble/index/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
add_library(nimble_index StripeIndexGroup.cpp TabletIndex.cpp)
14+
add_library(nimble_index IndexConfig.cpp StripeIndexGroup.cpp TabletIndex.cpp)
1515
target_link_libraries(
1616
nimble_index
1717
nimble_common

dwio/nimble/index/IndexConfig.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "dwio/nimble/index/IndexConfig.h"
18+
19+
namespace facebook::nimble {
20+
21+
EncodingLayout IndexConfig::defaultEncodingLayout(
22+
CompressionType compressionType) {
23+
return EncodingLayout{
24+
EncodingType::Trivial,
25+
compressionType,
26+
{EncodingLayout{EncodingType::Trivial, CompressionType::Uncompressed}}};
27+
}
28+
29+
} // namespace facebook::nimble

dwio/nimble/index/IndexConfig.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,7 @@ struct IndexConfig {
5050
/// TrivialEncoding for string_view needs a nested encoding for string
5151
/// lengths.
5252
static EncodingLayout defaultEncodingLayout(
53-
CompressionType compressionType = CompressionType::Zstd) {
54-
return EncodingLayout{
55-
EncodingType::Trivial,
56-
compressionType,
57-
{EncodingLayout{EncodingType::Trivial, CompressionType::Uncompressed}}};
58-
}
53+
CompressionType compressionType = CompressionType::Zstd);
5954
};
6055

6156
} // namespace facebook::nimble

dwio/nimble/index/StripeIndexGroup.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
#include <algorithm>
2222

23-
namespace facebook::nimble {
23+
namespace facebook::nimble::index {
2424

2525
namespace {
2626
template <typename T>
@@ -162,22 +162,23 @@ std::optional<ChunkLocation> StripeIndexGroup::lookupChunk(
162162
const auto* positionIndex = root->position_index();
163163
NIMBLE_CHECK_NOT_NULL(positionIndex);
164164

165-
// Use stream_chunk_counts and stream_chunk_indexes to find the range
165+
// Use accumulated stream_chunk_counts to find the range
166166
// for this specific stream in this stripe
167167
const auto* streamChunkCounts = positionIndex->stream_chunk_counts();
168168
NIMBLE_CHECK_NOT_NULL(streamChunkCounts);
169-
const auto* streamChunkIndexes = positionIndex->stream_chunk_indexes();
170-
NIMBLE_CHECK_NOT_NULL(streamChunkIndexes);
171-
NIMBLE_CHECK_EQ(streamChunkCounts->size(), streamChunkIndexes->size());
172169

173170
// Calculate the index in the flattened stream_chunk_counts array
174171
const uint32_t streamChunkCountIndex = stripeOffset * streamCount_ + streamId;
175172
NIMBLE_CHECK_LT(streamChunkCountIndex, streamChunkCounts->size());
176173

177-
const uint32_t chunkCount = streamChunkCounts->Get(streamChunkCountIndex);
178-
const uint32_t startChunkIndex =
179-
streamChunkIndexes->Get(streamChunkCountIndex);
180-
const uint32_t endChunkIndex = startChunkIndex + chunkCount;
174+
// stream_chunk_counts is accumulated, so:
175+
// - Start index: streamChunkCounts[streamChunkCountIndex - 1] (or 0 for
176+
// first)
177+
// - End index: streamChunkCounts[streamChunkCountIndex]
178+
const uint32_t endChunkIndex = streamChunkCounts->Get(streamChunkCountIndex);
179+
const uint32_t startChunkIndex = streamChunkCountIndex == 0
180+
? 0
181+
: streamChunkCounts->Get(streamChunkCountIndex - 1);
181182

182183
const auto* chunkRows = positionIndex->stream_chunk_rows();
183184
NIMBLE_CHECK_NOT_NULL(chunkRows);
@@ -252,4 +253,4 @@ std::optional<ChunkLocation> StreamIndex::lookupChunk(uint32_t rowId) const {
252253
return stripeIndexGroup_->lookupChunk(stripe_, streamId_, rowId);
253254
}
254255

255-
} // namespace facebook::nimble
256+
} // namespace facebook::nimble::index

dwio/nimble/index/StripeIndexGroup.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,14 @@
2424
#include "velox/common/file/Region.h"
2525

2626
namespace facebook::nimble {
27-
2827
class MetadataBuffer;
29-
class StripeGroup;
28+
} // namespace facebook::nimble
29+
30+
namespace facebook::nimble::index {
31+
32+
namespace test {
33+
class StripeIndexGroupTestHelper;
34+
} // namespace test
3035

3136
/// Represents the location of a chunk within a stream.
3237
/// Both offsets are relative:
@@ -96,6 +101,8 @@ class StripeIndexGroup {
96101
const uint32_t firstStripe_;
97102
const uint32_t stripeCount_;
98103
const uint32_t streamCount_;
104+
105+
friend class test::StripeIndexGroupTestHelper;
99106
};
100107

101108
/// StreamIndex is a convenience wrapper around StripeIndexGroup for rowId-based
@@ -122,4 +129,4 @@ class StreamIndex {
122129
const uint32_t streamId_;
123130
};
124131

125-
} // namespace facebook::nimble
132+
} // namespace facebook::nimble::index

dwio/nimble/index/TabletIndex.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
#include "dwio/nimble/tablet/IndexGenerated.h"
2222
#include "dwio/nimble/tablet/MetadataBuffer.h"
2323

24-
namespace facebook::nimble {
24+
namespace facebook::nimble::index {
2525

2626
namespace {
2727
const serialization::Index* getIndexRoot(const Section& indexSection) {
@@ -38,6 +38,13 @@ uint32_t getStripeCount(const Section& indexSection) {
3838
return stripeKeys->size() - 1;
3939
}
4040

41+
uint32_t getIndexGroupCount(const Section& indexSection) {
42+
const auto* indexRoot = getIndexRoot(indexSection);
43+
const auto* stripeIndexGroups = indexRoot->stripe_index_groups();
44+
NIMBLE_CHECK_NOT_NULL(stripeIndexGroups);
45+
return stripeIndexGroups->size();
46+
}
47+
4148
std::vector<std::string_view> getStripeKeys(const Section& indexSection) {
4249
const auto* indexRoot = getIndexRoot(indexSection);
4350
const auto* stripeKeys = indexRoot->stripe_keys();
@@ -71,18 +78,16 @@ std::unique_ptr<TabletIndex> TabletIndex::create(Section indexSection) {
7178
TabletIndex::TabletIndex(Section indexSection)
7279
: indexSection_{std::move(indexSection)},
7380
numStripes_{getStripeCount(indexSection_)},
81+
numIndexGroups_{getIndexGroupCount(indexSection_)},
7482
stripeKeys_{getStripeKeys(indexSection_)},
7583
indexColumns_{getIndexColumns(indexSection_)} {
7684
NIMBLE_CHECK(!indexColumns_.empty());
7785
NIMBLE_CHECK_EQ(numStripes_ + 1, stripeKeys_.size());
7886

7987
// Validate consistency between stripe group indices and stripe index groups
80-
const auto* indexRoot = getIndexRoot(indexSection_);
81-
const auto* stripeIndexGroups = indexRoot->stripe_index_groups();
82-
NIMBLE_CHECK_NOT_NULL(stripeIndexGroups);
8388
NIMBLE_CHECK_GE(
8489
numStripes_,
85-
stripeIndexGroups->size(),
90+
numIndexGroups_,
8691
"Number of stripe index groups cannot exceed number of stripes");
8792
}
8893

@@ -113,10 +118,10 @@ std::optional<StripeLocation> TabletIndex::lookup(
113118
}
114119

115120
MetadataSection TabletIndex::groupIndexMetadata(uint32_t groupIndex) const {
121+
NIMBLE_CHECK_LT(groupIndex, numIndexGroups_);
116122
const auto* indexRoot = getIndexRoot(indexSection_);
117123
const auto* indexGroupSections = indexRoot->stripe_index_groups();
118124
NIMBLE_CHECK_NOT_NULL(indexGroupSections);
119-
NIMBLE_CHECK_LT(groupIndex, indexGroupSections->size());
120125
const auto* metadata = indexGroupSections->Get(groupIndex);
121126
return MetadataSection{
122127
metadata->offset(),
@@ -127,4 +132,4 @@ MetadataSection TabletIndex::groupIndexMetadata(uint32_t groupIndex) const {
127132
const MetadataBuffer& TabletIndex::rootIndex() const {
128133
return indexSection_.buffer();
129134
}
130-
} // namespace facebook::nimble
135+
} // namespace facebook::nimble::index

dwio/nimble/index/TabletIndex.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
#include "dwio/nimble/tablet/MetadataBuffer.h"
2424

25-
namespace facebook::nimble {
25+
namespace facebook::nimble::index {
2626

2727
/// Represents the global stripe index within a tablet.
2828
struct StripeLocation {
@@ -126,13 +126,19 @@ class TabletIndex {
126126
/// This is needed by StripeIndexGroup to read stripe group information.
127127
const MetadataBuffer& rootIndex() const;
128128

129+
/// Returns the number of index groups for testing purposes.
130+
size_t numIndexGroups() const {
131+
return numIndexGroups_;
132+
}
133+
129134
private:
130135
explicit TabletIndex(Section indexSection);
131136

132137
const Section indexSection_;
133138
const uint32_t numStripes_;
139+
const uint32_t numIndexGroups_;
134140
const std::vector<std::string_view> stripeKeys_;
135141
const std::vector<std::string> indexColumns_;
136142
};
137143

138-
} // namespace facebook::nimble
144+
} // namespace facebook::nimble::index

dwio/nimble/index/tests/StripeIndexGroupTest.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
#include "dwio/nimble/index/tests/TabletIndexTestBase.h"
2323
#include "dwio/nimble/tablet/TabletReader.h"
2424

25-
namespace facebook::nimble::test {
25+
namespace facebook::nimble::index::test {
2626

2727
class StripeIndexGroupTest : public TabletIndexTestBase {};
2828

@@ -664,4 +664,4 @@ TEST_F(StripeIndexGroupTest, stripeIndexAndStreamIdOutOfBound) {
664664
NIMBLE_ASSERT_THROW(
665665
stripeIndexGroup1->lookupChunk(3, 5, 0), "streamId out of range");
666666
}
667-
} // namespace facebook::nimble::test
667+
} // namespace facebook::nimble::index::test

0 commit comments

Comments
 (0)