Skip to content

Commit d5cea61

Browse files
authored
Merge pull request ClickHouse#76245 from ClickHouse/backport/24.8/75063
Backport ClickHouse#75063 to 24.8: Fix: Wrapper around libhdfs3 functions using rpc
2 parents c8de3aa + 7b90eeb commit d5cea61

File tree

8 files changed

+121
-34
lines changed

8 files changed

+121
-34
lines changed

src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ void HDFSObjectStorage::initializeHDFSFS() const
3232
if (initialized)
3333
return;
3434

35-
hdfs_builder = createHDFSBuilder(url, config);
36-
hdfs_fs = createHDFSFS(hdfs_builder.get());
35+
hdfs_fs = createHDFSFS(builder.get());
3736
initialized = true;
3837
}
3938

@@ -70,7 +69,7 @@ bool HDFSObjectStorage::exists(const StoredObject & object) const
7069
if (path.starts_with(url_without_path))
7170
path = path.substr(url_without_path.size());
7271

73-
return (0 == hdfsExists(hdfs_fs.get(), path.c_str()));
72+
return (0 == wrapErr<int>(hdfsExists, hdfs_fs.get(), path.c_str()));
7473
}
7574

7675
std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObject( /// NOLINT
@@ -141,7 +140,7 @@ void HDFSObjectStorage::removeObject(const StoredObject & object)
141140
path = path.substr(url_without_path.size());
142141

143142
/// Add path from root to file name
144-
int res = hdfsDelete(hdfs_fs.get(), path.c_str(), 0);
143+
int res = wrapErr<int>(hdfsDelete, hdfs_fs.get(), path.c_str(), 0);
145144
if (res == -1)
146145
throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: {}", path);
147146

@@ -171,7 +170,7 @@ void HDFSObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
171170
ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) const
172171
{
173172
initializeHDFSFS();
174-
auto * file_info = hdfsGetPathInfo(hdfs_fs.get(), path.data());
173+
auto * file_info = wrapErr<hdfsFileInfo *>(hdfsGetPathInfo, hdfs_fs.get(), path.data());
175174
if (!file_info)
176175
throw Exception(ErrorCodes::HDFS_ERROR,
177176
"Cannot get file info for: {}. Error: {}", path, hdfsGetLastError());
@@ -190,14 +189,17 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM
190189
LOG_TEST(log, "Trying to list files for {}", path);
191190

192191
HDFSFileInfo ls;
193-
ls.file_info = hdfsListDirectory(hdfs_fs.get(), path.data(), &ls.length);
192+
ls.file_info = wrapErr<hdfsFileInfo *>(hdfsListDirectory, hdfs_fs.get(), path.data(), &ls.length);
194193

195194
if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
196195
{
197196
// ignore file not found exception, keep throw other exception,
198197
// libhdfs3 doesn't have function to get exception type, so use errno.
199-
throw Exception(ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}",
198+
if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
199+
{
200+
throw Exception(ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}",
200201
path, String(hdfsGetLastError()));
202+
}
201203
}
202204

203205
if (!ls.file_info && ls.length > 0)

src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <Disks/IDisk.h>
88
#include <Disks/ObjectStorages/IObjectStorage.h>
99
#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
10+
#include <Storages/ObjectStorage/HDFS/HDFSErrorWrapper.h>
1011
#include <Core/UUID.h>
1112
#include <memory>
1213
#include <Poco/Util/AbstractConfiguration.h>
@@ -26,7 +27,7 @@ struct HDFSObjectStorageSettings
2627
};
2728

2829

29-
class HDFSObjectStorage : public IObjectStorage
30+
class HDFSObjectStorage : public IObjectStorage, public HDFSErrorWrapper
3031
{
3132
public:
3233

@@ -37,7 +38,8 @@ class HDFSObjectStorage : public IObjectStorage
3738
SettingsPtr settings_,
3839
const Poco::Util::AbstractConfiguration & config_,
3940
bool lazy_initialize)
40-
: config(config_)
41+
: HDFSErrorWrapper(hdfs_root_path_, config_)
42+
, config(config_)
4143
, settings(std::move(settings_))
4244
, log(getLogger("HDFSObjectStorage(" + hdfs_root_path_ + ")"))
4345
{
@@ -125,7 +127,6 @@ class HDFSObjectStorage : public IObjectStorage
125127

126128
const Poco::Util::AbstractConfiguration & config;
127129

128-
mutable HDFSBuilderWrapper hdfs_builder;
129130
mutable HDFSFSPtr hdfs_fs;
130131

131132
mutable std::mutex init_mutex;

src/Storages/ObjectStorage/HDFS/HDFSCommon.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ void HDFSBuilderWrapper::loadFromConfig(
9393
}
9494

9595
#if USE_KRB5
96-
void HDFSBuilderWrapper::runKinit()
96+
void HDFSBuilderWrapper::runKinit() const
9797
{
9898
LOG_DEBUG(getLogger("HDFSClient"), "Running KerberosInit");
9999
try

src/Storages/ObjectStorage/HDFS/HDFSCommon.h

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class HDFSBuilderWrapper
4848
{
4949

5050
friend HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::AbstractConfiguration &);
51+
friend class HDFSErrorWrapper;
5152

5253
static const String CONFIG_PREFIX;
5354

@@ -75,7 +76,19 @@ static const String CONFIG_PREFIX;
7576
return *this;
7677
}
7778

78-
hdfsBuilder * get() { return hdfs_builder; }
79+
hdfsBuilder * get() const { return hdfs_builder; }
80+
81+
#if USE_KRB5
82+
void runKinit() const;
83+
#endif // USE_KRB5
84+
85+
protected:
86+
#if USE_KRB5
87+
String hadoop_kerberos_keytab;
88+
String hadoop_kerberos_principal;
89+
String hadoop_security_kerberos_ticket_cache_path;
90+
bool need_kinit{false};
91+
#endif // USE_KRB5
7992

8093
private:
8194
void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const String & prefix, bool isUser = false);
@@ -88,14 +101,6 @@ static const String CONFIG_PREFIX;
88101

89102
hdfsBuilder * hdfs_builder = nullptr;
90103
std::vector<std::pair<String, String>> config_stor;
91-
92-
#if USE_KRB5
93-
void runKinit();
94-
String hadoop_kerberos_keytab;
95-
String hadoop_kerberos_principal;
96-
String hadoop_security_kerberos_ticket_cache_path;
97-
bool need_kinit{false};
98-
#endif // USE_KRB5
99104
};
100105

101106
using HDFSFSPtr = std::unique_ptr<std::remove_pointer_t<hdfsFS>, detail::HDFSFsDeleter>;
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include "HDFSErrorWrapper.h"
2+
3+
#if USE_HDFS
4+
5+
namespace DB
6+
{
7+
8+
HDFSErrorWrapper::HDFSErrorWrapper(
9+
const std::string & hdfs_uri_,
10+
const Poco::Util::AbstractConfiguration & config_
11+
): builder(createHDFSBuilder(hdfs_uri_, config_))
12+
{
13+
}
14+
15+
}
16+
17+
#endif
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#pragma once
2+
3+
#include "config.h"
4+
5+
#if USE_HDFS
6+
#include "HDFSCommon.h"
7+
#include <base/types.h>
8+
9+
namespace DB
10+
{
11+
12+
class HDFSErrorWrapper
13+
{
14+
public:
15+
mutable HDFSBuilderWrapper builder;
16+
17+
HDFSErrorWrapper(
18+
const std::string & hdfs_uri_,
19+
const Poco::Util::AbstractConfiguration & config_
20+
);
21+
22+
template <typename R, typename F, typename... P> R wrapErr(F fn, const P... p) const
23+
{
24+
R r = fn(p...);
25+
#if USE_KRB5
26+
if (errno == EACCES) // NOLINT
27+
{
28+
builder.runKinit(); // krb5 keytab reinitialization
29+
r = fn(p...);
30+
}
31+
#endif // USE_KRB5
32+
return r;
33+
}
34+
35+
};
36+
37+
}
38+
39+
#endif

src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#if USE_HDFS
44
#include "HDFSCommon.h"
5+
#include "HDFSErrorWrapper.h"
56
#include <Common/Scheduler/ResourceGuard.h>
67
#include <IO/Progress.h>
78
#include <Common/Throttler.h>
@@ -28,16 +29,16 @@ namespace ErrorCodes
2829
extern const int SEEK_POSITION_OUT_OF_BOUND;
2930
extern const int LOGICAL_ERROR;
3031
extern const int UNKNOWN_FILE_SIZE;
32+
extern const int HDFS_ERROR;
3133
}
3234

3335

34-
struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<SeekableReadBuffer>, public WithFileSize
36+
struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<SeekableReadBuffer>, public WithFileSize, public HDFSErrorWrapper
3537
{
3638
String hdfs_uri;
3739
String hdfs_file_path;
3840

3941
hdfsFile fin;
40-
HDFSBuilderWrapper builder;
4142
HDFSFSPtr fs;
4243
ReadSettings read_settings;
4344

@@ -54,14 +55,14 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
5455
bool use_external_buffer_,
5556
std::optional<size_t> file_size_)
5657
: BufferWithOwnMemory<SeekableReadBuffer>(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size)
58+
, HDFSErrorWrapper(hdfs_uri_, config_)
5759
, hdfs_uri(hdfs_uri_)
5860
, hdfs_file_path(hdfs_file_path_)
5961
, read_settings(read_settings_)
6062
, read_until_position(read_until_position_)
6163
{
62-
builder = createHDFSBuilder(hdfs_uri_, config_);
6364
fs = createHDFSFS(builder.get());
64-
fin = hdfsOpenFile(fs.get(), hdfs_file_path.c_str(), O_RDONLY, 0, 0, 0);
65+
fin = wrapErr<hdfsFile>(hdfsOpenFile, fs.get(), hdfs_file_path.c_str(), O_RDONLY, 0, 0, 0);
6566

6667
if (fin == nullptr)
6768
throw Exception(ErrorCodes::CANNOT_OPEN_FILE,
@@ -74,7 +75,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
7475
}
7576
else
7677
{
77-
auto * file_info = hdfsGetPathInfo(fs.get(), hdfs_file_path.c_str());
78+
auto * file_info = wrapErr<hdfsFileInfo *>(hdfsGetPathInfo, fs.get(), hdfs_file_path.c_str());
7879
if (!file_info)
7980
{
8081
hdfsCloseFile(fs.get(), fin);
@@ -123,7 +124,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
123124
int bytes_read;
124125
try
125126
{
126-
bytes_read = hdfsRead(fs.get(), fin, internal_buffer.begin(), safe_cast<int>(num_bytes_to_read));
127+
bytes_read = wrapErr<tSize>(hdfsRead, fs.get(), fin, internal_buffer.begin(), safe_cast<int>(num_bytes_to_read));
127128
}
128129
catch (...)
129130
{
@@ -160,7 +161,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
160161
if (whence != SEEK_SET)
161162
throw Exception(ErrorCodes::LOGICAL_ERROR, "Only SEEK_SET is supported");
162163

163-
int seek_status = hdfsSeek(fs.get(), fin, file_offset_);
164+
int seek_status = wrapErr<int>(hdfsSeek, fs.get(), fin, file_offset_);
164165
if (seek_status != 0)
165166
throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Fail to seek HDFS file: {}, error: {}", hdfs_uri, std::string(hdfsGetLastError()));
166167
file_offset = file_offset_;
@@ -172,6 +173,29 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
172173
{
173174
return file_offset;
174175
}
176+
177+
size_t pread(char * buffer, size_t size, size_t offset)
178+
{
179+
ResourceGuard rlock(read_settings.resource_link, size);
180+
auto bytes_read = wrapErr<tSize>(hdfsPread, fs.get(), fin, buffer, safe_cast<int>(size), offset);
181+
rlock.unlock();
182+
183+
if (bytes_read < 0)
184+
{
185+
throw Exception(
186+
ErrorCodes::HDFS_ERROR,
187+
"Fail to read from HDFS: {}, file path: {}. Error: {}",
188+
hdfs_uri,
189+
hdfs_file_path,
190+
std::string(hdfsGetLastError()));
191+
}
192+
if (bytes_read && read_settings.remote_throttler)
193+
{
194+
read_settings.remote_throttler->add(
195+
bytes_read, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds);
196+
}
197+
return bytes_read;
198+
}
175199
};
176200

177201
ReadBufferFromHDFS::ReadBufferFromHDFS(

src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#include "WriteBufferFromHDFS.h"
66
#include "HDFSCommon.h"
7+
#include "HDFSErrorWrapper.h"
78
#include <Common/Scheduler/ResourceGuard.h>
89
#include <Common/Throttler.h>
910
#include <Common/safe_cast.h>
@@ -26,11 +27,10 @@ extern const int CANNOT_OPEN_FILE;
2627
extern const int CANNOT_FSYNC;
2728
}
2829

29-
struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl
30+
struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl : public HDFSErrorWrapper
3031
{
3132
std::string hdfs_uri;
3233
hdfsFile fout;
33-
HDFSBuilderWrapper builder;
3434
HDFSFSPtr fs;
3535
WriteSettings write_settings;
3636

@@ -40,8 +40,8 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl
4040
int replication_,
4141
const WriteSettings & write_settings_,
4242
int flags)
43-
: hdfs_uri(hdfs_uri_)
44-
, builder(createHDFSBuilder(hdfs_uri, config_))
43+
: HDFSErrorWrapper(hdfs_uri_, config_)
44+
, hdfs_uri(hdfs_uri_)
4545
, fs(createHDFSFS(builder.get()))
4646
, write_settings(write_settings_)
4747
{
@@ -63,14 +63,13 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl
6363
hdfsCloseFile(fs.get(), fout);
6464
}
6565

66-
6766
int write(const char * start, size_t size)
6867
{
6968
ResourceGuard rlock(write_settings.resource_link, size);
7069
int bytes_written;
7170
try
7271
{
73-
bytes_written = hdfsWrite(fs.get(), fout, start, safe_cast<int>(size));
72+
bytes_written = wrapErr<tSize>(hdfsWrite, fs.get(), fout, start, safe_cast<int>(size));
7473
}
7574
catch (...)
7675
{
@@ -94,7 +93,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl
9493

9594
void sync() const
9695
{
97-
int result = hdfsSync(fs.get(), fout);
96+
int result = wrapErr<int>(hdfsSync, fs.get(), fout);
9897
if (result < 0)
9998
throw ErrnoException(ErrorCodes::CANNOT_FSYNC, "Cannot HDFS sync {} {}", hdfs_uri, std::string(hdfsGetLastError()));
10099
}

0 commit comments

Comments
 (0)