Skip to content

Commit 6940b68

Browse files
authored
Merge branch 'antalya-25.8' into mf_25.8_hybrid2
2 parents 87f0a4e + daa90b5 commit 6940b68

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+831
-320
lines changed

src/Databases/DataLake/GlueCatalog.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ String GlueCatalog::resolveMetadataPathFromTableLocation(const String & table_lo
594594
return "";
595595

596596
// List all files in metadata directory
597-
DB::RelativePathsWithMetadata files;
597+
DB::PathsWithMetadata files;
598598
object_storage->listObjects(metadata_dir_path, files, 0);
599599

600600
// Filter for .metadata.json files and find the most recent one

src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync
7373
}
7474

7575
private:
76-
bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) override
76+
bool getBatchAndCheckNext(PathsWithMetadata & batch) override
7777
{
7878
ProfileEvents::increment(ProfileEvents::AzureListObjects);
7979
if (client->IsClientForDisk())
@@ -87,7 +87,7 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync
8787

8888
for (const auto & blob : blobs_list)
8989
{
90-
batch.emplace_back(std::make_shared<RelativePathWithMetadata>(
90+
batch.emplace_back(std::make_shared<PathWithMetadata>(
9191
blob.Name,
9292
ObjectMetadata{
9393
static_cast<uint64_t>(blob.BlobSize),
@@ -169,7 +169,7 @@ ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_pr
169169
return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, max_keys ? max_keys : settings_ptr->list_object_keys_size);
170170
}
171171

172-
void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
172+
void AzureObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const
173173
{
174174
auto client_ptr = client.get();
175175

@@ -195,7 +195,7 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith
195195

196196
for (const auto & blob : blobs_list)
197197
{
198-
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
198+
children.emplace_back(std::make_shared<PathWithMetadata>(
199199
blob.Name,
200200
ObjectMetadata{
201201
static_cast<uint64_t>(blob.BlobSize),

src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class AzureObjectStorage : public IObjectStorage
3737

3838
bool supportsListObjectsCache() override { return true; }
3939

40-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
40+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
4141

4242
/// Sanitizer build may crash with max_keys=1; this looks like a false positive.
4343
ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override;

src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ void CachedObjectStorage::copyObject( // NOLINT
193193
object_storage->copyObject(object_from, object_to, read_settings, write_settings, object_to_attributes);
194194
}
195195

196-
void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
196+
void CachedObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const
197197
{
198198
object_storage->listObjects(path, children, max_keys);
199199
}

src/Disks/ObjectStorages/Cached/CachedObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class CachedObjectStorage final : public IObjectStorage
6464
IObjectStorage & object_storage_to,
6565
std::optional<ObjectAttributes> object_to_attributes = {}) override;
6666

67-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
67+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
6868

6969
ObjectMetadata getObjectMetadata(const std::string & path) const override;
7070

src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co
167167
return metadata;
168168
}
169169

170-
void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
170+
void HDFSObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const
171171
{
172172
initializeHDFSFS();
173173
LOG_TEST(log, "Trying to list files for {}", path);
@@ -203,7 +203,7 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM
203203
}
204204
else
205205
{
206-
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
206+
children.emplace_back(std::make_shared<PathWithMetadata>(
207207
String(file_path),
208208
ObjectMetadata{
209209
static_cast<uint64_t>(ls.file_info[i].mSize),

src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class HDFSObjectStorage : public IObjectStorage, public HDFSErrorWrapper
9292
const WriteSettings & write_settings,
9393
std::optional<ObjectAttributes> object_to_attributes = {}) override;
9494

95-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
95+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
9696

9797
String getObjectsNamespace() const override { return ""; }
9898

src/Disks/ObjectStorages/IObjectStorage.cpp

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,20 @@ const MetadataStorageMetrics & IObjectStorage::getMetadataStorageMetrics() const
3030

3131
bool IObjectStorage::existsOrHasAnyChild(const std::string & path) const
3232
{
33-
RelativePathsWithMetadata files;
33+
PathsWithMetadata files;
3434
listObjects(path, files, 1);
3535
return !files.empty();
3636
}
3737

38-
void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, size_t) const
38+
void IObjectStorage::listObjects(const std::string &, PathsWithMetadata &, size_t) const
3939
{
4040
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "listObjects() is not supported");
4141
}
4242

4343

4444
ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
4545
{
46-
RelativePathsWithMetadata files;
46+
PathsWithMetadata files;
4747
listObjects(path_prefix, files, max_keys);
4848

4949
return std::make_shared<ObjectStorageIteratorFromList>(std::move(files));
@@ -102,21 +102,14 @@ WriteSettings IObjectStorage::patchSettings(const WriteSettings & write_settings
102102
return write_settings;
103103
}
104104

105-
RelativePathWithMetadata::RelativePathWithMetadata(const DataFileInfo & info, std::optional<ObjectMetadata> metadata_)
106-
: metadata(std::move(metadata_))
107-
{
108-
relative_path = info.file_path;
109-
file_meta_info = info.file_meta_info;
110-
}
111-
112-
std::string RelativePathWithMetadata::getPathOrPathToArchiveIfArchive() const
105+
std::string PathWithMetadata::getPathOrPathToArchiveIfArchive() const
113106
{
114107
if (isArchive())
115108
return getPathToArchive();
116109
return getPath();
117110
}
118111

119-
RelativePathWithMetadata::CommandInTaskResponse::CommandInTaskResponse(const std::string & task)
112+
PathWithMetadata::CommandInTaskResponse::CommandInTaskResponse(const std::string & task)
120113
{
121114
Poco::JSON::Parser parser;
122115
try
@@ -136,7 +129,7 @@ RelativePathWithMetadata::CommandInTaskResponse::CommandInTaskResponse(const std
136129
}
137130
}
138131

139-
std::string RelativePathWithMetadata::CommandInTaskResponse::to_string() const
132+
std::string PathWithMetadata::CommandInTaskResponse::to_string() const
140133
{
141134
Poco::JSON::Object json;
142135
if (retry_after_us.has_value())
@@ -149,16 +142,18 @@ std::string RelativePathWithMetadata::CommandInTaskResponse::to_string() const
149142
}
150143

151144

152-
void RelativePathWithMetadata::loadMetadata(ObjectStoragePtr object_storage, bool ignore_non_existent_file)
145+
void PathWithMetadata::loadMetadata(ObjectStoragePtr object_storage, bool ignore_non_existent_file)
153146
{
154147
if (!metadata)
155148
{
156149
const auto & path = isArchive() ? getPathToArchive() : getPath();
157150

151+
auto storage_to_use = object_storage_to_use ? object_storage_to_use : object_storage;
152+
158153
if (ignore_non_existent_file)
159-
metadata = object_storage->tryGetObjectMetadata(path);
154+
metadata = storage_to_use->tryGetObjectMetadata(path);
160155
else
161-
metadata = object_storage->getObjectMetadata(path);
156+
metadata = storage_to_use->getObjectMetadata(path);
162157
}
163158
}
164159

src/Disks/ObjectStorages/IObjectStorage.h

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,12 @@ struct ObjectMetadata
108108
};
109109

110110

111-
struct DataFileInfo;
112111
class DataFileMetaInfo;
113112
using DataFileMetaInfoPtr = std::shared_ptr<DataFileMetaInfo>;
114113

115114
struct DataLakeObjectMetadata;
116115

117-
struct RelativePathWithMetadata
116+
struct PathWithMetadata
118117
{
119118
class CommandInTaskResponse
120119
{
@@ -143,28 +142,35 @@ struct RelativePathWithMetadata
143142
std::optional<DataFileMetaInfoPtr> file_meta_info;
144143
/// Retry request after short pause
145144
CommandInTaskResponse command;
145+
std::optional<String> absolute_path;
146+
ObjectStoragePtr object_storage_to_use = nullptr;
146147

147-
RelativePathWithMetadata() = default;
148+
PathWithMetadata() = default;
148149

149-
explicit RelativePathWithMetadata(String command_or_path, std::optional<ObjectMetadata> metadata_ = std::nullopt)
150+
explicit PathWithMetadata(
151+
const String & command_or_path,
152+
std::optional<ObjectMetadata> metadata_ = std::nullopt,
153+
std::optional<String> absolute_path_ = std::nullopt,
154+
ObjectStoragePtr object_storage_to_use_ = nullptr)
150155
: relative_path(std::move(command_or_path))
151156
, metadata(std::move(metadata_))
152157
, command(relative_path)
158+
, absolute_path((absolute_path_.has_value() && !absolute_path_.value().empty()) ? absolute_path_ : std::nullopt)
159+
, object_storage_to_use(object_storage_to_use_)
153160
{
154161
if (command.is_parsed())
155162
relative_path = "";
156163
}
157164

158-
explicit RelativePathWithMetadata(const DataFileInfo & info, std::optional<ObjectMetadata> metadata_ = std::nullopt);
165+
PathWithMetadata(const PathWithMetadata & other) = default;
159166

160-
RelativePathWithMetadata(const RelativePathWithMetadata & other) = default;
161-
162-
virtual ~RelativePathWithMetadata() = default;
167+
virtual ~PathWithMetadata() = default;
163168

164169
virtual std::string getFileName() const { return std::filesystem::path(relative_path).filename(); }
165170
virtual std::string getFileNameWithoutExtension() const { return std::filesystem::path(relative_path).stem(); }
166171

167172
virtual std::string getPath() const { return relative_path; }
173+
virtual std::optional<std::string> getAbsolutePath() const { return absolute_path; }
168174
virtual bool isArchive() const { return false; }
169175
virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
170176
virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
@@ -176,6 +182,8 @@ struct RelativePathWithMetadata
176182
const CommandInTaskResponse & getCommand() const { return command; }
177183

178184
void loadMetadata(ObjectStoragePtr object_storage, bool ignore_non_existent_file = true);
185+
186+
ObjectStoragePtr getObjectStorage() const { return object_storage_to_use; }
179187
};
180188

181189
struct ObjectKeyWithMetadata
@@ -191,8 +199,8 @@ struct ObjectKeyWithMetadata
191199
{}
192200
};
193201

194-
using RelativePathWithMetadataPtr = std::shared_ptr<RelativePathWithMetadata>;
195-
using RelativePathsWithMetadata = std::vector<RelativePathWithMetadataPtr>;
202+
using PathWithMetadataPtr = std::shared_ptr<PathWithMetadata>;
203+
using PathsWithMetadata = std::vector<PathWithMetadataPtr>;
196204
using ObjectKeysWithMetadata = std::vector<ObjectKeyWithMetadata>;
197205

198206
class IObjectStorageIterator;
@@ -233,7 +241,7 @@ class IObjectStorage
233241
virtual bool existsOrHasAnyChild(const std::string & path) const;
234242

235243
/// List objects recursively by certain prefix.
236-
virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const;
244+
virtual void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const;
237245

238246
/// List objects recursively by certain prefix. Use it instead of listObjects, if you want to list objects lazily.
239247
virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const;

src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & path) c
151151
return object_metadata;
152152
}
153153

154-
void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t/* max_keys */) const
154+
void LocalObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t/* max_keys */) const
155155
{
156156
if (!fs::exists(path) || !fs::is_directory(path))
157157
return;
@@ -164,7 +164,7 @@ void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWith
164164
continue;
165165
}
166166

167-
children.emplace_back(std::make_shared<RelativePathWithMetadata>(entry.path(), getObjectMetadata(entry.path())));
167+
children.emplace_back(std::make_shared<PathWithMetadata>(entry.path(), getObjectMetadata(entry.path())));
168168
}
169169
}
170170

0 commit comments

Comments
 (0)