Skip to content

Commit 5e2a022

Browse files
Enmkianton-ru
authored andcommitted
Merge pull request #742 from Altinity/feature/lazy_load_metadata
Make DataLake metadata more lazy
1 parent 852e5f5 commit 5e2a022

File tree

5 files changed

+28
-24
lines changed

5 files changed

+28
-24
lines changed

src/Disks/ObjectStorages/IObjectStorage.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,4 +148,18 @@ std::string RelativePathWithMetadata::CommandInTaskResponse::to_string() const
148148
return oss.str();
149149
}
150150

151+
152+
void RelativePathWithMetadata::loadMetadata(ObjectStoragePtr object_storage)
153+
{
154+
if (!metadata)
155+
{
156+
const auto & path = isArchive() ? getPathToArchive() : getPath();
157+
158+
if (query_settings.ignore_non_existent_file)
159+
metadata = object_storage->tryGetObjectMetadata(path);
160+
else
161+
metadata = object_storage->getObjectMetadata(path);
162+
}
163+
}
164+
151165
}

src/Disks/ObjectStorages/IObjectStorage.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ struct RelativePathWithMetadata
174174
std::optional<DataFileMetaInfoPtr> getFileMetaInfo() const { return file_meta_info; }
175175

176176
const CommandInTaskResponse & getCommand() const { return command; }
177+
178+
void loadMetadata(ObjectStoragePtr object_storage);
177179
};
178180

179181
struct ObjectKeyWithMetadata

src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,16 @@ class KeysIterator : public IObjectIterator
5050
return nullptr;
5151

5252
auto key = data_files[current_index];
53-
auto object_metadata = object_storage->getObjectMetadata(key);
5453

5554
if (callback)
56-
callback(FileProgress(0, object_metadata.size_bytes));
57-
58-
return std::make_shared<ObjectInfo>(key, std::move(object_metadata));
55+
{
56+
/// Too expencive to load size for metadata always
57+
/// because it requires API call to external storage.
58+
/// In many cases only keys are needed.
59+
callback(FileProgress(0, 1));
60+
}
61+
62+
return std::make_shared<ObjectInfo>(key, std::nullopt);
5963
}
6064
}
6165

src/Storages/ObjectStorage/ReadBufferIterator.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,7 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
7676
const auto & object_info = (*it);
7777
auto get_last_mod_time = [&] -> std::optional<time_t>
7878
{
79-
const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath();
80-
if (!object_info->metadata)
81-
object_info->metadata = object_storage->tryGetObjectMetadata(path);
82-
79+
object_info->loadMetadata(object_storage);
8380
return object_info->metadata
8481
? std::optional<time_t>(object_info->metadata->last_modified.epochTime())
8582
: std::nullopt;
@@ -151,7 +148,6 @@ std::unique_ptr<ReadBuffer> ReadBufferIterator::recreateLastReadBuffer()
151148
{
152149
auto context = getContext();
153150

154-
const auto & path = current_object_info->isArchive() ? current_object_info->getPathToArchive() : current_object_info->getPath();
155151
auto impl = createReadBuffer(*current_object_info, object_storage, context, getLogger("ReadBufferIterator"));
156152

157153
const auto compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->getCompressionMethod());
@@ -250,6 +246,8 @@ ReadBufferIterator::Data ReadBufferIterator::next()
250246
prev_read_keys_size = read_keys.size();
251247
}
252248

249+
current_object_info->loadMetadata(object_storage);
250+
253251
if (query_settings.skip_empty_files
254252
&& current_object_info->metadata && current_object_info->metadata->size_bytes == 0)
255253
continue;

src/Storages/ObjectStorage/StorageObjectStorageSource.cpp

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -511,21 +511,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
511511
if (object_info->getPath().empty())
512512
return {};
513513

514-
if (!object_info->metadata)
515-
{
516-
const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath();
517-
518-
if (query_settings.ignore_non_existent_file)
519-
{
520-
auto metadata = object_storage->tryGetObjectMetadata(path);
521-
if (!metadata)
522-
return {};
523-
524-
object_info->metadata = metadata;
525-
}
526-
else
527-
object_info->metadata = object_storage->getObjectMetadata(path);
528-
}
514+
object_info->loadMetadata(object_storage, query_settings.ignore_non_existent_file);
529515
}
530516
while (not_a_path || (query_settings.skip_empty_files && object_info->metadata->size_bytes == 0));
531517

0 commit comments

Comments
 (0)