Skip to content

Commit ca58677

Browse files
authored
feat(io): add Resize API for efficient storage allocation (#1571)
- Add Resize() method to basic_io.h with ResizeImpl support via SFINAE - Implement ResizeImpl for async_io, buffer_io, and memory_block_io - Replace Write-based expansion hack with Resize in datacell classes - Add null pointer check for memory_block_io allocation Signed-off-by: LHT129 <tianlan.lht@antgroup.com>
1 parent e7b7a01 commit ca58677

15 files changed

+144
-14
lines changed

src/datacell/extra_info_datacell.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,7 @@ class ExtraInfoDataCell : public ExtraInfoInterface {
5454
this->max_capacity_ = new_capacity;
5555
uint64_t io_size =
5656
static_cast<uint64_t>(new_capacity) * static_cast<uint64_t>(extra_info_size_);
57-
uint8_t end_flag =
58-
127; // the value is meaningless, only to occupy the position for io allocate
59-
this->io_->Write(&end_flag, 1, io_size);
57+
this->io_->Resize(io_size);
6058
}
6159

6260
void

src/datacell/flatten_datacell.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,7 @@ class FlattenDataCell : public FlattenInterface {
9292
}
9393
this->max_capacity_ = new_capacity;
9494
uint64_t io_size = static_cast<uint64_t>(new_capacity) * static_cast<uint64_t>(code_size_);
95-
uint8_t end_flag =
96-
127; // the value is meaningless, only to occupy the position for io allocate
97-
this->io_->Write(&end_flag, 1, io_size);
95+
this->io_->Resize(io_size);
9896
}
9997

10098
void

src/datacell/graph_datacell.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -264,9 +264,7 @@ GraphDataCell<IOTmpl>::Resize(InnerIdType new_size) {
264264
}
265265
this->max_capacity_ = new_size;
266266
uint64_t io_size = static_cast<uint64_t>(new_size) * static_cast<uint64_t>(code_line_size_);
267-
uint8_t end_flag =
268-
127; // the value is meaningless, only to occupy the position for io allocate
269-
this->io_->Write(&end_flag, 1, io_size);
267+
this->io_->Resize(io_size);
270268
}
271269

272270
template <typename IOTmpl>

src/datacell/sparse_vector_datacell.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,8 @@ class SparseVectorDataCell : public FlattenInterface {
8080
}
8181
uint64_t io_size = (new_capacity - total_count_) * max_code_size_ + current_offset_;
8282
this->max_capacity_ = new_capacity;
83-
uint8_t end_flag =
84-
127; // the value is meaingless, only to occupy the position for io allocate
85-
this->io_->Write(&end_flag, 1, io_size);
86-
this->offset_io_->Write(&end_flag, 1, new_capacity * sizeof(uint32_t));
83+
this->io_->Resize(io_size);
84+
this->offset_io_->Resize(static_cast<uint64_t>(new_capacity) * sizeof(uint32_t));
8785
}
8886

8987
void

src/io/async_io.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,19 @@ AsyncIO::WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset) {
8181
fsync(wfd_);
8282
}
8383

84+
void
85+
AsyncIO::ResizeImpl(uint64_t size) {
86+
#ifdef __APPLE__
87+
auto ret = ftruncate(this->wfd_, static_cast<off_t>(size));
88+
#else
89+
auto ret = ftruncate64(this->wfd_, static_cast<int64_t>(size));
90+
#endif
91+
if (ret == -1) {
92+
throw VsagException(ErrorType::INTERNAL_ERROR, "ftruncate failed");
93+
}
94+
this->size_ = size;
95+
}
96+
8497
bool
8598
AsyncIO::ReadImpl(uint64_t size, uint64_t offset, uint8_t* data) const {
8699
bool need_release = true;

src/io/async_io.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ class AsyncIO : public BasicIO<AsyncIO> {
4040
void
4141
WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset);
4242

43+
void
44+
ResizeImpl(uint64_t size);
45+
4346
bool
4447
ReadImpl(uint64_t size, uint64_t offset, uint8_t* data) const;
4548

src/io/basic_io.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,24 @@ class BasicIO {
218218
}
219219
}
220220

221+
inline void
222+
Resize(uint64_t size) {
223+
if constexpr (has_ResizeImpl<IOTmpl>::value) {
224+
return cast().ResizeImpl(size);
225+
} else {
226+
if (size <= this->size_) {
227+
return;
228+
}
229+
ByteBuffer buffer(SERIALIZE_BUFFER_SIZE, this->allocator_);
230+
uint64_t offset = this->size_;
231+
while (offset < size) {
232+
auto cur_size = std::min(SERIALIZE_BUFFER_SIZE, size - offset);
233+
this->Write(buffer.data, cur_size, offset);
234+
offset += cur_size;
235+
}
236+
}
237+
}
238+
221239
inline int64_t
222240
GetMemoryUsage() const {
223241
return this->size_;
@@ -314,5 +332,6 @@ class BasicIO {
314332
std::declval<uint64_t>())
315333
GENERATE_HAS_MEMBER_FUNCTION(ReleaseImpl, void, std::declval<const uint8_t*>())
316334
GENERATE_HAS_MEMBER_FUNCTION(InitIOImpl, void, std::declval<const IOParamPtr&>())
335+
GENERATE_HAS_MEMBER_FUNCTION(ResizeImpl, void, std::declval<uint64_t>())
317336
};
318337
} // namespace vsag

src/io/buffer_io.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,19 @@ BufferIO::WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset) {
5858
}
5959
}
6060

61+
void
62+
BufferIO::ResizeImpl(uint64_t size) {
63+
#ifdef __APPLE__
64+
auto ret = ftruncate(this->fd_, static_cast<off_t>(size));
65+
#else
66+
auto ret = ftruncate64(this->fd_, static_cast<int64_t>(size));
67+
#endif
68+
if (ret == -1) {
69+
throw VsagException(ErrorType::INTERNAL_ERROR, "ftruncate failed");
70+
}
71+
this->size_ = size;
72+
}
73+
6174
bool
6275
BufferIO::ReadImpl(uint64_t size, uint64_t offset, uint8_t* data) const {
6376
if (size == 0) {

src/io/buffer_io.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ class BufferIO : public BasicIO<BufferIO> {
4848
void
4949
WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset);
5050

51+
void
52+
ResizeImpl(uint64_t size);
53+
5154
bool
5255
ReadImpl(uint64_t size, uint64_t offset, uint8_t* data) const;
5356

src/io/memory_block_io.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,25 @@ MemoryBlockIO::check_and_realloc(uint64_t size) {
129129
auto cur_block_size = this->blocks_.size();
130130
this->blocks_.reserve(new_block_count);
131131
while (cur_block_size < new_block_count) {
132-
this->blocks_.emplace_back(static_cast<uint8_t*>(this->allocator_->Allocate(block_size_)));
132+
auto* ptr = static_cast<uint8_t*>(this->allocator_->Allocate(block_size_));
133+
if (ptr == nullptr) {
134+
throw VsagException(ErrorType::INTERNAL_ERROR, "MemoryBlockIO allocation failed");
135+
}
136+
this->blocks_.emplace_back(ptr);
133137
++cur_block_size;
134138
}
135139
}
136140

141+
void
142+
MemoryBlockIO::ResizeImpl(uint64_t size) {
143+
if (size <= this->size_) {
144+
this->size_ = size;
145+
return;
146+
}
147+
check_and_realloc(size);
148+
this->size_ = size;
149+
}
150+
137151
static int
138152
countr_zero(uint64_t x) {
139153
if (x == 0) {

0 commit comments

Comments
 (0)