Skip to content

Commit e0663d5

Browse files
committed
first draft
fix build fix ObjectInfo creation another cleaned attempt fix schema id cache population correct splitting of URI into parts fix storage schema normalization store per-file object_storage_ptr in object_info make new storages properly remove cloneObjectStorage parse s3 uri differently tmp
1 parent 852e5f5 commit e0663d5

36 files changed

+476
-196
lines changed

src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync
7171
}
7272

7373
private:
74-
bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) override
74+
bool getBatchAndCheckNext(PathsWithMetadata & batch) override
7575
{
7676
ProfileEvents::increment(ProfileEvents::AzureListObjects);
7777
if (client->IsClientForDisk())
@@ -84,7 +84,7 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync
8484

8585
for (const auto & blob : blobs_list)
8686
{
87-
batch.emplace_back(std::make_shared<RelativePathWithMetadata>(
87+
batch.emplace_back(std::make_shared<PathWithMetadata>(
8888
blob.Name,
8989
ObjectMetadata{
9090
static_cast<uint64_t>(blob.BlobSize),
@@ -166,7 +166,7 @@ ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_pr
166166
return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, max_keys ? max_keys : settings_ptr->list_object_keys_size);
167167
}
168168

169-
void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
169+
void AzureObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const
170170
{
171171
auto client_ptr = client.get();
172172

@@ -188,7 +188,7 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith
188188

189189
for (const auto & blob : blobs_list)
190190
{
191-
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
191+
children.emplace_back(std::make_shared<PathWithMetadata>(
192192
blob.Name,
193193
ObjectMetadata{
194194
static_cast<uint64_t>(blob.BlobSize),

src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class AzureObjectStorage : public IObjectStorage
3737

3838
bool supportsListObjectsCache() override { return true; }
3939

40-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
40+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
4141

4242
/// Sanitizer build may crash with max_keys=1; this looks like a false positive.
4343
ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override;

src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ void CachedObjectStorage::copyObject( // NOLINT
193193
object_storage->copyObject(object_from, object_to, read_settings, write_settings, object_to_attributes);
194194
}
195195

196-
void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
196+
void CachedObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const
197197
{
198198
object_storage->listObjects(path, children, max_keys);
199199
}

src/Disks/ObjectStorages/Cached/CachedObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class CachedObjectStorage final : public IObjectStorage
6464
IObjectStorage & object_storage_to,
6565
std::optional<ObjectAttributes> object_to_attributes = {}) override;
6666

67-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
67+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
6868

6969
ObjectMetadata getObjectMetadata(const std::string & path) const override;
7070

src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co
167167
return metadata;
168168
}
169169

170-
void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
170+
void HDFSObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const
171171
{
172172
initializeHDFSFS();
173173
LOG_TEST(log, "Trying to list files for {}", path);
@@ -203,7 +203,7 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM
203203
}
204204
else
205205
{
206-
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
206+
children.emplace_back(std::make_shared<PathWithMetadata>(
207207
String(file_path),
208208
ObjectMetadata{
209209
static_cast<uint64_t>(ls.file_info[i].mSize),

src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class HDFSObjectStorage : public IObjectStorage, public HDFSErrorWrapper
9292
const WriteSettings & write_settings,
9393
std::optional<ObjectAttributes> object_to_attributes = {}) override;
9494

95-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
95+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
9696

9797
String getObjectsNamespace() const override { return ""; }
9898

src/Disks/ObjectStorages/IObjectStorage.cpp

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,20 @@ const MetadataStorageMetrics & IObjectStorage::getMetadataStorageMetrics() const
3030

3131
bool IObjectStorage::existsOrHasAnyChild(const std::string & path) const
3232
{
33-
RelativePathsWithMetadata files;
33+
PathsWithMetadata files;
3434
listObjects(path, files, 1);
3535
return !files.empty();
3636
}
3737

38-
void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, size_t) const
38+
void IObjectStorage::listObjects(const std::string &, PathsWithMetadata &, size_t) const
3939
{
4040
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "listObjects() is not supported");
4141
}
4242

4343

4444
ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
4545
{
46-
RelativePathsWithMetadata files;
46+
PathsWithMetadata files;
4747
listObjects(path_prefix, files, max_keys);
4848

4949
return std::make_shared<ObjectStorageIteratorFromList>(std::move(files));
@@ -102,21 +102,14 @@ WriteSettings IObjectStorage::patchSettings(const WriteSettings & write_settings
102102
return write_settings;
103103
}
104104

105-
RelativePathWithMetadata::RelativePathWithMetadata(const DataFileInfo & info, std::optional<ObjectMetadata> metadata_)
106-
: metadata(std::move(metadata_))
107-
{
108-
relative_path = info.file_path;
109-
file_meta_info = info.file_meta_info;
110-
}
111-
112-
std::string RelativePathWithMetadata::getPathOrPathToArchiveIfArchive() const
105+
std::string PathWithMetadata::getPathOrPathToArchiveIfArchive() const
113106
{
114107
if (isArchive())
115108
return getPathToArchive();
116109
return getPath();
117110
}
118111

119-
RelativePathWithMetadata::CommandInTaskResponse::CommandInTaskResponse(const std::string & task)
112+
PathWithMetadata::CommandInTaskResponse::CommandInTaskResponse(const std::string & task)
120113
{
121114
Poco::JSON::Parser parser;
122115
try
@@ -136,7 +129,7 @@ RelativePathWithMetadata::CommandInTaskResponse::CommandInTaskResponse(const std
136129
}
137130
}
138131

139-
std::string RelativePathWithMetadata::CommandInTaskResponse::to_string() const
132+
std::string PathWithMetadata::CommandInTaskResponse::to_string() const
140133
{
141134
Poco::JSON::Object json;
142135
if (retry_after_us.has_value())

src/Disks/ObjectStorages/IObjectStorage.h

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ using DataFileMetaInfoPtr = std::shared_ptr<DataFileMetaInfo>;
114114

115115
struct DataLakeObjectMetadata;
116116

117-
struct RelativePathWithMetadata
117+
struct PathWithMetadata
118118
{
119119
class CommandInTaskResponse
120120
{
@@ -143,28 +143,49 @@ struct RelativePathWithMetadata
143143
std::optional<DataFileMetaInfoPtr> file_meta_info;
144144
/// Retry request after short pause
145145
CommandInTaskResponse command;
146+
std::optional<String> absolute_path;
147+
std::optional<ObjectStoragePtr> object_storage_to_use = std::nullopt;
146148

147-
RelativePathWithMetadata() = default;
149+
PathWithMetadata() = default;
148150

149-
explicit RelativePathWithMetadata(String command_or_path, std::optional<ObjectMetadata> metadata_ = std::nullopt)
151+
explicit PathWithMetadata(
152+
const String & command_or_path,
153+
std::optional<ObjectMetadata> metadata_ = std::nullopt,
154+
std::optional<String> absolute_path_ = std::nullopt,
155+
std::optional<ObjectStoragePtr> object_storage_to_use_ = std::nullopt)
150156
: relative_path(std::move(command_or_path))
151157
, metadata(std::move(metadata_))
152158
, command(relative_path)
159+
, absolute_path(absolute_path_)
160+
, object_storage_to_use(object_storage_to_use_)
153161
{
154162
if (command.is_parsed())
155163
relative_path = "";
156164
}
157165

158-
explicit RelativePathWithMetadata(const DataFileInfo & info, std::optional<ObjectMetadata> metadata_ = std::nullopt);
166+
explicit PathWithMetadata(
167+
const DataFileInfo & info,
168+
std::optional<ObjectMetadata> metadata_ = std::nullopt,
169+
std::optional<String> absolute_path_ = std::nullopt,
170+
std::optional<ObjectStoragePtr> object_storage_to_use_ = std::nullopt)
171+
: metadata(std::move(metadata_))
172+
, absolute_path(absolute_path_)
173+
, object_storage_to_use(object_storage_to_use_)
174+
{
175+
relative_path = info.file_path;
176+
file_meta_info = info.file_meta_info;
177+
}
159178

160-
RelativePathWithMetadata(const RelativePathWithMetadata & other) = default;
179+
PathWithMetadata(const PathWithMetadata & other) = default;
161180

162-
virtual ~RelativePathWithMetadata() = default;
181+
virtual ~PathWithMetadata() = default;
182+
//>>>>>>> 53a743ff8c6 (first draft)
163183

164184
virtual std::string getFileName() const { return std::filesystem::path(relative_path).filename(); }
165185
virtual std::string getFileNameWithoutExtension() const { return std::filesystem::path(relative_path).stem(); }
166186

167187
virtual std::string getPath() const { return relative_path; }
188+
virtual std::optional<std::string> getAbsolutePath() const { return absolute_path; }
168189
virtual bool isArchive() const { return false; }
169190
virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
170191
virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
@@ -174,6 +195,8 @@ struct RelativePathWithMetadata
174195
std::optional<DataFileMetaInfoPtr> getFileMetaInfo() const { return file_meta_info; }
175196

176197
const CommandInTaskResponse & getCommand() const { return command; }
198+
199+
std::optional<ObjectStoragePtr> getObjectStorage() const { return object_storage_to_use; }
177200
};
178201

179202
struct ObjectKeyWithMetadata
@@ -189,8 +212,8 @@ struct ObjectKeyWithMetadata
189212
{}
190213
};
191214

192-
using RelativePathWithMetadataPtr = std::shared_ptr<RelativePathWithMetadata>;
193-
using RelativePathsWithMetadata = std::vector<RelativePathWithMetadataPtr>;
215+
using PathWithMetadataPtr = std::shared_ptr<PathWithMetadata>;
216+
using PathsWithMetadata = std::vector<PathWithMetadataPtr>;
194217
using ObjectKeysWithMetadata = std::vector<ObjectKeyWithMetadata>;
195218

196219
class IObjectStorageIterator;
@@ -231,7 +254,7 @@ class IObjectStorage
231254
virtual bool existsOrHasAnyChild(const std::string & path) const;
232255

233256
/// List objects recursively by certain prefix.
234-
virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const;
257+
virtual void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const;
235258

236259
/// List objects recursively by certain prefix. Use it instead of listObjects, if you want to list objects lazily.
237260
virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const;

src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & path) c
151151
return object_metadata;
152152
}
153153

154-
void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t/* max_keys */) const
154+
void LocalObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t/* max_keys */) const
155155
{
156156
if (!fs::exists(path) || !fs::is_directory(path))
157157
return;
@@ -164,7 +164,7 @@ void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWith
164164
continue;
165165
}
166166

167-
children.emplace_back(std::make_shared<RelativePathWithMetadata>(entry.path(), getObjectMetadata(entry.path())));
167+
children.emplace_back(std::make_shared<PathWithMetadata>(entry.path(), getObjectMetadata(entry.path())));
168168
}
169169
}
170170

src/Disks/ObjectStorages/Local/LocalObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class LocalObjectStorage : public IObjectStorage
6262

6363
ObjectMetadata getObjectMetadata(const std::string & path) const override;
6464

65-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
65+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
6666

6767
bool existsOrHasAnyChild(const std::string & path) const override;
6868

0 commit comments

Comments
 (0)