Skip to content

Commit 9a0ca08

Browse files
authored
Fixed over-consumption / wastage of memory in IResearch address_table (#10)
* Fixed over consumption / wastage of memory in IResearch address_table * Fixed review comments
1 parent 14a258d commit 9a0ca08

File tree

3 files changed

+95
-44
lines changed

3 files changed

+95
-44
lines changed

core/formats/columnstore2.cpp

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,6 +1321,10 @@ void column::Prepare(doc_id_t key) {
13211321
prev_ = pend_;
13221322
pend_ = key;
13231323
docs_writer_.push_back(key);
1324+
1325+
// TODO:
1326+
// push_back() can fail. Inspect the return
1327+
// value and handle failures.
13241328
addr_table_.push_back(data_.stream.file_pointer());
13251329
}
13261330
}
@@ -1332,7 +1336,13 @@ void column::reset() {
13321336

13331337
[[maybe_unused]] const bool res = docs_writer_.erase(pend_);
13341338
IRS_ASSERT(res);
1335-
data_.stream.truncate(addr_table_.back());
1339+
1340+
// TODO:
1341+
// back()/push_back() can fail. Inspect the return
1342+
// values and handle failures.
1343+
uint64_t lastElem;
1344+
addr_table_.back(lastElem);
1345+
data_.stream.truncate(lastElem);
13361346
addr_table_.pop_back();
13371347
pend_ = prev_;
13381348
}
@@ -1345,15 +1355,24 @@ void column::flush_block() {
13451355
auto& data_out = *ctx_.data_out;
13461356
auto& block = blocks_.emplace_back();
13471357
block.addr = data_out.file_pointer();
1348-
block.last_size = data_.file.length() - addr_table_.back();
1358+
1359+
// TODO:
1360+
// addr_table_.back() can fail. Inspect the return
1361+
// value and handle failures.
1362+
uint64_t lastElem;
1363+
addr_table_.back(lastElem);
1364+
block.last_size = data_.file.length() - lastElem;
13491365

13501366
const uint32_t docs_count = addr_table_.size();
1351-
const uint64_t addr_table_size =
1352-
math::ceil64(docs_count, packed::BLOCK_SIZE_64);
1353-
auto* begin = addr_table_.begin();
1354-
auto* end = begin + addr_table_size;
1355-
if (auto* it = addr_table_.current(); it != end) {
1356-
std::memset(it, 0, (end - it) * sizeof(*it));
1367+
const uint32_t addr_table_size =
1368+
math::ceil32(docs_count, packed::BLOCK_SIZE_64);
1369+
1370+
addr_table_.grow_size(addr_table_size);
1371+
auto begin = addr_table_.begin();
1372+
auto end = begin + addr_table_size;
1373+
1374+
if (auto it = addr_table_.current(); it != end) {
1375+
std::fill(it, end, 0);
13571376
}
13581377

13591378
bool all_equal = !data_.file.length();
@@ -1377,11 +1396,11 @@ void column::flush_block() {
13771396
(0 == docs_count_ || block.avg == prev_avg_));
13781397
prev_avg_ = block.avg;
13791398
} else {
1380-
block.bits = packed::maxbits64(begin, end);
1399+
block.bits = packed::maxbits64(begin, &*end);
13811400
const size_t buf_size =
13821401
packed::bytes_required_64(addr_table_size, block.bits);
13831402
std::memset(ctx_.u64buf, 0, buf_size);
1384-
packed::pack(begin, end, ctx_.u64buf, block.bits);
1403+
packed::pack(begin, &*end, ctx_.u64buf, block.bits);
13851404

13861405
data_out.write_bytes(ctx_.u8buf, buf_size);
13871406
fixed_length_ = false;

core/formats/columnstore2.hpp

Lines changed: 53 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ enum class Version : int32_t { kMin = 0, kMax = kMin };
3737

3838
class column final : public irs::column_output {
3939
public:
40-
static constexpr size_t kBlockSize = sparse_bitmap_writer::kBlockSize;
40+
static constexpr uint32_t kBlockSize = sparse_bitmap_writer::kBlockSize;
4141
static_assert(math::is_power2(kBlockSize));
4242

4343
struct context {
@@ -82,54 +82,81 @@ class column final : public irs::column_output {
8282
public:
8383
class address_table {
8484
public:
85-
address_table(ManagedTypedAllocator<uint64_t> alloc) : alloc_(alloc) {
86-
offsets_ = alloc_.allocate(kBlockSize);
87-
offset_ = offsets_;
88-
}
85+
address_table(ManagedTypedAllocator<uint64_t> alloc) : offsets_(alloc) { }
86+
87+
~address_table() = default;
8988

90-
~address_table() { alloc_.deallocate(offsets_, kBlockSize); }
89+
bool back(uint64_t& backValue) const noexcept {
90+
IRS_ASSERT(!offsets_.empty() && current_pos_ > 0);
91+
if (offsets_.empty() || 0 == current_pos_)
92+
return false;
9193

92-
uint64_t back() const noexcept {
93-
IRS_ASSERT(offsets_ < offset_);
94-
return offset_[-1];
94+
backValue = offsets_[current_pos_ - 1];
95+
return true;
9596
}
9697

98+
// TODO:
99+
// The present users of the push/pop functions
100+
// assume that these functions will always succeed,
101+
// so they don't check for failures.
97102
bool push_back(uint64_t offset) noexcept {
98-
IRS_ASSERT(offset_ < offsets_ + kBlockSize);
99-
if (!(offset_ < offsets_ + kBlockSize))
103+
IRS_ASSERT(current_pos_ < column::kBlockSize);
104+
105+
if (!(current_pos_ < column::kBlockSize))
100106
return false;
101107

102-
*offset_++ = offset;
108+
if (offsets_.size() <= static_cast<decltype(offsets_)::size_type>(current_pos_))
109+
offsets_.resize(current_pos_ * 2 + 1);
110+
111+
offsets_[current_pos_++] = offset;
103112
return true;
104113
}
105114

106-
bool pop_back() noexcept {
107-
IRS_ASSERT(offsets_ < offset_);
108-
if (!(offsets_ < offset_))
115+
bool pop_back() {
116+
IRS_ASSERT(!offsets_.empty() && current_pos_ > 0);
117+
if (offsets_.empty() || 0 == current_pos_)
109118
return false;
110119

111-
--offset_;
120+
--current_pos_;
112121
return true;
113122
}
114123

115124
uint32_t size() const noexcept {
116-
return static_cast<uint32_t>(offset_ - offsets_);
125+
return current_pos_;
126+
}
127+
128+
// Allocates more space if required.
129+
// Doesn't shrink if max_elem < size.
130+
bool grow_size(uint32_t max_elems) {
131+
IRS_ASSERT(max_elems <= column::kBlockSize);
132+
if (!(max_elems <= column::kBlockSize))
133+
return false;
134+
135+
if (max_elems > static_cast<uint32_t>(offsets_.size()))
136+
offsets_.resize(max_elems);
137+
138+
return true;
117139
}
118140

119-
bool empty() const noexcept { return offset_ == offsets_; }
141+
bool empty() const noexcept { return 0 == current_pos_; }
142+
143+
bool full() const noexcept { return current_pos_ == column::kBlockSize; }
120144

121-
bool full() const noexcept { return offset_ == offsets_ + kBlockSize; }
145+
void reset() noexcept {
146+
offsets_.clear();
147+
current_pos_ = 0;
148+
}
122149

123-
void reset() noexcept { offset_ = offsets_; }
150+
uint64_t* begin() noexcept { return &offsets_[0]; }
124151

125-
uint64_t* begin() noexcept { return offsets_; }
126-
uint64_t* current() noexcept { return offset_; }
127-
uint64_t* end() noexcept { return offsets_ + kBlockSize; }
152+
// current() points to the next location in the table
153+
// where new data will be added.
154+
// Call grow_size() to allocate space before using current()
155+
uint64_t* current() noexcept { return &offsets_[current_pos_]; }
128156

129157
private:
130-
ManagedTypedAllocator<uint64_t> alloc_;
131-
uint64_t* offsets_{nullptr};
132-
uint64_t* offset_{nullptr};
158+
std::vector<uint64_t, ManagedTypedAllocator<uint64_t>> offsets_;
159+
uint32_t current_pos_ { 0 };
133160
};
134161

135162
private:

tests/formats/address_table_tests.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,17 +100,20 @@ TEST_F(address_table_tests, write_using_current_ptr) {
100100

101101
column::address_table addr_table(alloc);
102102

103+
uint64_t elemCount = 60;
103104
// add data via push_back.
104105
auto data = 0;
105-
for (uint64_t i = 0; i < 60; i++) {
106+
for (uint64_t i = 0; i < elemCount; i++) {
106107
ASSERT_TRUE(addr_table.push_back(data++));
107108
}
108109

109110
// add data via current() ptr.
110111
// column::flush_block() uses address_table
111112
// in this way.
112-
auto curr = addr_table.current();
113113
uint64_t addr_table_size = 40900;
114+
ASSERT_TRUE(addr_table.grow_size(addr_table.size() + addr_table_size));
115+
116+
auto curr = addr_table.current();
114117
auto end = curr + addr_table_size;
115118

116119
while (curr != end) {
@@ -148,15 +151,17 @@ TEST_F(address_table_tests, max_size_allowed) {
148151
// coz address_table pre-allocates memory to
149152
// accommodate kBlockSize elements.
150153
ASSERT_EQ(addr_table.size(), 0);
151-
ASSERT_EQ(addr_table.end(), addr_table.begin() + column::kBlockSize);
154+
152155
ASSERT_EQ(addr_table.current(), addr_table.begin() + 0);
153156

154-
for (uint64_t i = 0; i < 1234; i++) {
157+
ASSERT_TRUE(addr_table.grow_size(column::kBlockSize));
158+
159+
for (uint64_t i = 0; i < column::kBlockSize; i++) {
160+
ASSERT_FALSE(addr_table.full());
155161
ASSERT_TRUE(addr_table.push_back(i));
156162
}
157-
158-
ASSERT_EQ(addr_table.end(), addr_table.begin() + column::kBlockSize);
159-
ASSERT_EQ(addr_table.current(), addr_table.begin() + 1234);
163+
ASSERT_TRUE(addr_table.full());
164+
ASSERT_EQ(addr_table.current(), addr_table.begin() + column::kBlockSize);
160165
}
161166

162167
// full
@@ -166,13 +171,13 @@ TEST_F(address_table_tests, empty_full_and_reset) {
166171

167172
ASSERT_FALSE(addr_table.full());
168173
for (uint64_t i = 0; i < column::kBlockSize; i++) {
174+
ASSERT_FALSE(addr_table.full());
169175
ASSERT_TRUE(addr_table.push_back(i));
170176
}
171177
ASSERT_TRUE(addr_table.full());
172178

173179
addr_table.pop_back();
174180
ASSERT_FALSE(addr_table.full());
175-
ASSERT_EQ(addr_table.current() + 1, addr_table.end());
176181

177182
addr_table.reset();
178183
ASSERT_FALSE(addr_table.full());

0 commit comments

Comments
 (0)