Skip to content

Commit 53a743f

Browse files
committed
first draft
fix build fix ObjectInfo creation another cleaned attempt fix schema id cache population correct splitting of URI into parts fix storage schema normalization store per-file object_storage_ptr in object_info make new storages properly remove cloneObjectStorage parse s3 uri differently tmp
1 parent 48110e0 commit 53a743f

33 files changed

+685
-212
lines changed

src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync
6565
}
6666

6767
private:
68-
bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) override
68+
bool getBatchAndCheckNext(PathsWithMetadata & batch) override
6969
{
7070
ProfileEvents::increment(ProfileEvents::AzureListObjects);
7171
if (client->IsClientForDisk())
@@ -78,7 +78,7 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync
7878

7979
for (const auto & blob : blobs_list)
8080
{
81-
batch.emplace_back(std::make_shared<RelativePathWithMetadata>(
81+
batch.emplace_back(std::make_shared<PathWithMetadata>(
8282
blob.Name,
8383
ObjectMetadata{
8484
static_cast<uint64_t>(blob.BlobSize),
@@ -160,7 +160,7 @@ ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_pr
160160
return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, max_keys ? max_keys : settings_ptr->list_object_keys_size);
161161
}
162162

163-
void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
163+
void AzureObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const
164164
{
165165
auto client_ptr = client.get();
166166

@@ -182,7 +182,7 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith
182182

183183
for (const auto & blob : blobs_list)
184184
{
185-
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
185+
children.emplace_back(std::make_shared<PathWithMetadata>(
186186
blob.Name,
187187
ObjectMetadata{
188188
static_cast<uint64_t>(blob.BlobSize),

src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class AzureObjectStorage : public IObjectStorage
3737

3838
bool supportsListObjectsCache() override { return true; }
3939

40-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
40+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
4141

4242
/// Sanitizer build may crash with max_keys=1; this looks like a false positive.
4343
ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override;

src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ void CachedObjectStorage::copyObject( // NOLINT
193193
object_storage->copyObject(object_from, object_to, read_settings, write_settings, object_to_attributes);
194194
}
195195

196-
void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
196+
void CachedObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const
197197
{
198198
object_storage->listObjects(path, children, max_keys);
199199
}

src/Disks/ObjectStorages/Cached/CachedObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class CachedObjectStorage final : public IObjectStorage
6464
IObjectStorage & object_storage_to,
6565
std::optional<ObjectAttributes> object_to_attributes = {}) override;
6666

67-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
67+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
6868

6969
ObjectMetadata getObjectMetadata(const std::string & path) const override;
7070

src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co
167167
return metadata;
168168
}
169169

170-
void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
170+
void HDFSObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const
171171
{
172172
initializeHDFSFS();
173173
LOG_TEST(log, "Trying to list files for {}", path);
@@ -203,7 +203,7 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM
203203
}
204204
else
205205
{
206-
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
206+
children.emplace_back(std::make_shared<PathWithMetadata>(
207207
String(file_path),
208208
ObjectMetadata{
209209
static_cast<uint64_t>(ls.file_info[i].mSize),

src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class HDFSObjectStorage : public IObjectStorage, public HDFSErrorWrapper
9292
const WriteSettings & write_settings,
9393
std::optional<ObjectAttributes> object_to_attributes = {}) override;
9494

95-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
95+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
9696

9797
String getObjectsNamespace() const override { return ""; }
9898

src/Disks/ObjectStorages/IObjectStorage.cpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,20 @@ const MetadataStorageMetrics & IObjectStorage::getMetadataStorageMetrics() const
3232

3333
bool IObjectStorage::existsOrHasAnyChild(const std::string & path) const
3434
{
35-
RelativePathsWithMetadata files;
35+
PathsWithMetadata files;
3636
listObjects(path, files, 1);
3737
return !files.empty();
3838
}
3939

40-
void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, size_t) const
40+
void IObjectStorage::listObjects(const std::string &, PathsWithMetadata &, size_t) const
4141
{
4242
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "listObjects() is not supported");
4343
}
4444

4545

4646
ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
4747
{
48-
RelativePathsWithMetadata files;
48+
PathsWithMetadata files;
4949
listObjects(path_prefix, files, max_keys);
5050

5151
return std::make_shared<ObjectStorageIteratorFromList>(std::move(files));
@@ -104,9 +104,15 @@ WriteSettings IObjectStorage::patchSettings(const WriteSettings & write_settings
104104
return write_settings;
105105
}
106106

107-
RelativePathWithMetadata::RelativePathWithMetadata(const String & task_string, std::optional<ObjectMetadata> metadata_)
107+
PathWithMetadata::PathWithMetadata(
108+
const String & task_string,
109+
std::optional<ObjectMetadata> metadata_,
110+
std::optional<String> absolute_path_,
111+
std::optional<ObjectStoragePtr> object_storage_to_use_)
108112
: metadata(std::move(metadata_))
109113
, command(task_string)
114+
, absolute_path(absolute_path_)
115+
, object_storage_to_use(object_storage_to_use_)
110116
{
111117
if (!command.isParsed())
112118
relative_path = task_string;
@@ -119,14 +125,20 @@ RelativePathWithMetadata::RelativePathWithMetadata(const String & task_string, s
119125
}
120126
}
121127

122-
RelativePathWithMetadata::RelativePathWithMetadata(const DataFileInfo & info, std::optional<ObjectMetadata> metadata_)
128+
PathWithMetadata::PathWithMetadata(
129+
const DataFileInfo & info,
130+
std::optional<ObjectMetadata> metadata_,
131+
std::optional<String> absolute_path_,
132+
std::optional<ObjectStoragePtr> object_storage_to_use_)
123133
: metadata(std::move(metadata_))
134+
, absolute_path(absolute_path_)
135+
, object_storage_to_use(object_storage_to_use_)
124136
{
125137
relative_path = info.file_path;
126138
file_meta_info = info.file_meta_info;
127139
}
128140

129-
void RelativePathWithMetadata::loadMetadata(ObjectStoragePtr object_storage, bool ignore_non_existent_file)
141+
void PathWithMetadata::loadMetadata(ObjectStoragePtr object_storage, bool ignore_non_existent_file)
130142
{
131143
if (!metadata)
132144
{
@@ -143,7 +155,7 @@ void RelativePathWithMetadata::loadMetadata(ObjectStoragePtr object_storage, boo
143155
}
144156
}
145157

146-
RelativePathWithMetadata::CommandInTaskResponse::CommandInTaskResponse(const std::string & task)
158+
PathWithMetadata::CommandInTaskResponse::CommandInTaskResponse(const std::string & task)
147159
{
148160
Poco::JSON::Parser parser;
149161
try
@@ -167,7 +179,7 @@ RelativePathWithMetadata::CommandInTaskResponse::CommandInTaskResponse(const std
167179
}
168180
}
169181

170-
std::string RelativePathWithMetadata::CommandInTaskResponse::toString() const
182+
std::string PathWithMetadata::CommandInTaskResponse::toString() const
171183
{
172184
Poco::JSON::Object json;
173185

src/Disks/ObjectStorages/IObjectStorage.h

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ struct DataFileInfo;
106106
class DataFileMetaInfo;
107107
using DataFileMetaInfoPtr = std::shared_ptr<DataFileMetaInfo>;
108108

109-
struct RelativePathWithMetadata
109+
struct PathWithMetadata
110110
{
111111
class CommandInTaskResponse
112112
{
@@ -138,18 +138,30 @@ struct RelativePathWithMetadata
138138
std::optional<ObjectMetadata> metadata;
139139
CommandInTaskResponse command;
140140
std::optional<DataFileMetaInfoPtr> file_meta_info;
141+
std::optional<String> absolute_path;
142+
std::optional<ObjectStoragePtr> object_storage_to_use = std::nullopt;
141143

142-
RelativePathWithMetadata() = default;
144+
PathWithMetadata() = default;
143145

144-
explicit RelativePathWithMetadata(const String & task_string, std::optional<ObjectMetadata> metadata_ = std::nullopt);
145-
explicit RelativePathWithMetadata(const DataFileInfo & info, std::optional<ObjectMetadata> metadata_ = std::nullopt);
146+
explicit PathWithMetadata(
147+
const String & task_string,
148+
std::optional<ObjectMetadata> metadata_ = std::nullopt,
149+
std::optional<String> absolute_path_ = std::nullopt,
150+
std::optional<ObjectStoragePtr> object_storage_to_use_ = std::nullopt);
146151

147-
virtual ~RelativePathWithMetadata() = default;
152+
explicit PathWithMetadata(
153+
const DataFileInfo & info,
154+
std::optional<ObjectMetadata> metadata_ = std::nullopt,
155+
std::optional<String> absolute_path_ = std::nullopt,
156+
std::optional<ObjectStoragePtr> object_storage_to_use_ = std::nullopt);
157+
158+
virtual ~PathWithMetadata() = default;
148159

149160
virtual std::string getFileName() const { return std::filesystem::path(relative_path).filename(); }
150161
virtual std::string getFileNameWithoutExtension() const { return std::filesystem::path(relative_path).stem(); }
151162

152163
virtual std::string getPath() const { return relative_path; }
164+
virtual std::optional<std::string> getAbsolutePath() const { return absolute_path; }
153165
virtual bool isArchive() const { return false; }
154166
virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
155167
virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
@@ -160,6 +172,8 @@ struct RelativePathWithMetadata
160172

161173
void loadMetadata(ObjectStoragePtr object_storage, bool ignore_non_existent_file);
162174
const CommandInTaskResponse & getCommand() const { return command; }
175+
176+
std::optional<ObjectStoragePtr> getObjectStorage() const { return object_storage_to_use; }
163177
};
164178

165179
struct ObjectKeyWithMetadata
@@ -175,8 +189,8 @@ struct ObjectKeyWithMetadata
175189
{}
176190
};
177191

178-
using RelativePathWithMetadataPtr = std::shared_ptr<RelativePathWithMetadata>;
179-
using RelativePathsWithMetadata = std::vector<RelativePathWithMetadataPtr>;
192+
using PathWithMetadataPtr = std::shared_ptr<PathWithMetadata>;
193+
using PathsWithMetadata = std::vector<PathWithMetadataPtr>;
180194
using ObjectKeysWithMetadata = std::vector<ObjectKeyWithMetadata>;
181195

182196
class IObjectStorageIterator;
@@ -217,7 +231,7 @@ class IObjectStorage
217231
virtual bool existsOrHasAnyChild(const std::string & path) const;
218232

219233
/// List objects recursively by certain prefix.
220-
virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const;
234+
virtual void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const;
221235

222236
/// List objects recursively by certain prefix. Use it instead of listObjects, if you want to list objects lazily.
223237
virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const;

src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & path) c
151151
return object_metadata;
152152
}
153153

154-
void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t/* max_keys */) const
154+
void LocalObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t/* max_keys */) const
155155
{
156156
if (!fs::exists(path) || !fs::is_directory(path))
157157
return;
@@ -164,7 +164,7 @@ void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWith
164164
continue;
165165
}
166166

167-
children.emplace_back(std::make_shared<RelativePathWithMetadata>(entry.path(), getObjectMetadata(entry.path())));
167+
children.emplace_back(std::make_shared<PathWithMetadata>(entry.path(), getObjectMetadata(entry.path())));
168168
}
169169
}
170170

src/Disks/ObjectStorages/Local/LocalObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class LocalObjectStorage : public IObjectStorage
6262

6363
ObjectMetadata getObjectMetadata(const std::string & path) const override;
6464

65-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
65+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
6666

6767
bool existsOrHasAnyChild(const std::string & path) const override;
6868

0 commit comments

Comments
 (0)