Skip to content

Commit 7031fc6

Browse files
committed
[opt](packed-file) Avoid unnecessary HEAD Object requests when opening an S3 object
1 parent e3e0590 commit 7031fc6

File tree

7 files changed

+20
-8
lines changed

7 files changed

+20
-8
lines changed

be/src/cloud/cloud_rowset_writer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ Status CloudRowsetWriter::_collect_packed_slice_location(io::FileWriter* file_wr
195195
}
196196

197197
rowset_meta->add_packed_slice_location(file_path, index.packed_file_path, index.offset,
198-
index.size);
198+
index.size, index.packed_file_size);
199199
LOG(INFO) << "collect packed file index: " << file_path << " -> " << index.packed_file_path
200200
<< ", offset: " << index.offset << ", size: " << index.size;
201201
return Status::OK();

be/src/io/fs/packed_file_manager.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,15 @@ void PackedFileManager::process_uploading_packed_files() {
609609
first_slice = false;
610610
slices_stream << small_file_path << "(txn=" << index.txn_id
611611
<< ", offset=" << index.offset << ", size=" << index.size << ")";
612+
613+
// Update packed_file_size in global index
614+
{
615+
std::lock_guard<std::mutex> global_lock(_global_index_mutex);
616+
auto it = _global_slice_locations.find(small_file_path);
617+
if (it != _global_slice_locations.end()) {
618+
it->second.packed_file_size = packed_file->total_size;
619+
}
620+
}
612621
}
613622
LOG(INFO) << "Packed file " << packed_file->packed_file_path
614623
<< " uploaded; slices=" << packed_file->slice_locations.size()

be/src/io/fs/packed_file_manager.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ struct PackedSliceLocation {
4949
std::string rowset_id;
5050
std::string resource_id;
5151
int64_t txn_id = 0;
52+
int64_t packed_file_size = -1; // Total size of the packed file, -1 means not set
5253
};
5354

5455
struct PackedAppendContext {

be/src/io/fs/packed_file_system.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,11 @@ Status PackedFileSystem::open_file_impl(const Path& file, FileReaderSPtr* reader
7171
FileReaderSPtr inner_reader;
7272
// Create a new FileReaderOptions with the correct file size
7373
FileReaderOptions local_opts = opts ? *opts : FileReaderOptions();
74-
// DCHECK(opts->file_size == -1 || opts->file_size == index.size)
75-
// << "file size is not correct, expected: " << index.size
76-
// << ", actual: " << opts->file_size;
77-
// local_opts.file_size = index.size + index.offset;
78-
local_opts.file_size = -1;
74+
// Set file_size to packed file size to avoid head object request
75+
local_opts.file_size = index.packed_file_size;
7976
LOG(INFO) << "open packed file: " << index.packed_file_path << ", file: " << file.native()
80-
<< ", offset: " << index.offset << ", size: " << index.size;
77+
<< ", offset: " << index.offset << ", size: " << index.size
78+
<< ", packed_file_size: " << index.packed_file_size;
8179
RETURN_IF_ERROR(
8280
_inner_fs->open_file(Path(index.packed_file_path), &inner_reader, &local_opts));
8381

be/src/olap/rowset/rowset_meta.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ io::FileSystemSPtr RowsetMeta::fs() {
143143
index.packed_file_path = index_pb.packed_file_path();
144144
index.offset = index_pb.offset();
145145
index.size = index_pb.size();
146+
index.packed_file_size =
147+
index_pb.has_packed_file_size() ? index_pb.packed_file_size() : -1;
146148
index.tablet_id = tablet_id();
147149
index.rowset_id = _rowset_id.to_string();
148150
index.resource_id = wrapped->id();

be/src/olap/rowset/rowset_meta.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,12 +426,13 @@ class RowsetMeta : public MetadataAdder<RowsetMeta> {
426426

427427
void add_packed_slice_location(const std::string& segment_path,
428428
const std::string& packed_file_path, int64_t offset,
429-
int64_t size) {
429+
int64_t size, int64_t packed_file_size) {
430430
auto* index_map = _rowset_meta_pb.mutable_packed_slice_locations();
431431
auto& index_pb = (*index_map)[segment_path];
432432
index_pb.set_packed_file_path(packed_file_path);
433433
index_pb.set_offset(offset);
434434
index_pb.set_size(size);
435+
index_pb.set_packed_file_size(packed_file_size);
435436
}
436437

437438
private:

gensrc/proto/olap_file.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ message PackedSliceLocationPB {
6969
optional string packed_file_path = 1;
7070
optional int64 offset = 2;
7171
optional int64 size = 3;
72+
optional int64 packed_file_size = 4; // Total size of the packed file
7273
}
7374

7475
// ATTN: When adding or deleting fields, please update `message RowsetMetaCloudPB`

0 commit comments

Comments
 (0)