Skip to content

Commit 9fa5b62

Browse files
authored
Merge pull request ClickHouse#79203 from ClickHouse/fix-delta-kernel-auth
Fix delta-kernel with http based endpoints, fix NOSIGN
2 parents ed71993 + faf49c8 commit 9fa5b62

File tree

4 files changed

+43
-8
lines changed

4 files changed

+43
-8
lines changed

src/Storages/ObjectStorage/DataLakes/DeltaLake/KernelHelper.cpp

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ namespace DB::ErrorCodes
1111
extern const int NOT_IMPLEMENTED;
1212
}
1313

14+
namespace DB::S3AuthSetting
15+
{
16+
extern const S3AuthSettingsBool no_sign_request;
17+
}
18+
1419
namespace DeltaLake
1520
{
1621

@@ -23,14 +28,20 @@ class S3KernelHelper final : public IKernelHelper
2328
const std::string & access_key_id_,
2429
const std::string & secret_access_key_,
2530
const std::string & region_,
26-
const std::string & token_)
31+
const std::string & token_,
32+
bool no_sign_)
2733
: url(url_)
2834
, access_key_id(access_key_id_)
2935
, secret_access_key(secret_access_key_)
3036
, region(region_)
3137
, token(token_)
38+
, no_sign(no_sign_)
3239
, table_location(getTableLocation(url_))
3340
{
41+
/// Check if user didn't mention any region.
42+
/// Same as in S3/Client.cpp (stripping len("https://s3.")).
43+
if (url.endpoint.substr(11) == "amazonaws.com")
44+
url.addRegionToURI(region);
3445
}
3546

3647
const std::string & getTableLocation() const override { return table_location; }
@@ -58,27 +69,42 @@ class S3KernelHelper final : public IKernelHelper
5869

5970
/// Supported options
6071
/// https://github.com/apache/arrow-rs/blob/main/object_store/src/aws/builder.rs#L191
61-
set_option("aws_access_key_id", access_key_id);
62-
set_option("aws_secret_access_key", secret_access_key);
72+
if (!access_key_id.empty())
73+
set_option("aws_access_key_id", access_key_id);
74+
if (!secret_access_key.empty())
75+
set_option("aws_secret_access_key", secret_access_key);
76+
77+
/// Set even if token is empty to prevent delta-kernel
78+
/// from trying to access token api.
6379
set_option("aws_token", token);
80+
81+
if (no_sign || (access_key_id.empty() && secret_access_key.empty()))
82+
set_option("aws_skip_signature", "true");
83+
6484
set_option("aws_region", region);
85+
set_option("aws_bucket", url.bucket);
6586

6687
if (url.uri_str.starts_with("http"))
6788
{
6889
set_option("allow_http", "true");
6990
set_option("aws_endpoint", url.endpoint);
7091
}
7192

72-
LOG_TRACE(getLogger("KernelHelper"), "Using region: {}, endpoint: {}, uri: {}", region, url.endpoint, url.uri_str);
93+
LOG_TRACE(
94+
getLogger("KernelHelper"),
95+
"Using endpoint: {}, uri: {}, region: {}, bucket: {}",
96+
url.endpoint, url.uri_str, region, url.bucket);
97+
7398
return builder;
7499
}
75100

76101
private:
77-
const DB::S3::URI url;
102+
DB::S3::URI url;
78103
const std::string access_key_id;
79104
const std::string secret_access_key;
80105
const std::string region;
81106
const std::string token;
107+
const bool no_sign;
82108

83109
const std::string table_location;
84110

@@ -109,6 +135,7 @@ DeltaLake::KernelHelperPtr getKernelHelper(
109135
case DB::ObjectStorageType::S3:
110136
{
111137
const auto * s3_conf = dynamic_cast<const DB::StorageS3Configuration *>(configuration.get());
138+
const auto & auth_settings = s3_conf->getAuthSettings();
112139
const auto & s3_client = object_storage->getS3StorageClient();
113140
const auto & s3_credentials = s3_client->getCredentials();
114141
const auto & url = s3_conf->getURL();
@@ -118,7 +145,8 @@ DeltaLake::KernelHelperPtr getKernelHelper(
118145
s3_credentials.GetAWSAccessKeyId(),
119146
s3_credentials.GetAWSSecretKey(),
120147
s3_client->getRegionForBucket(url.bucket),
121-
s3_credentials.GetSessionToken());
148+
s3_credentials.GetSessionToken(),
149+
auth_settings[S3AuthSetting::no_sign_request]);
122150
}
123151
default:
124152
{

src/Storages/ObjectStorage/DataLakes/DeltaLake/TableSnapshot.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,10 +385,10 @@ void TableSnapshot::initSnapshotImpl() const
385385
LOG_TRACE(log, "Initialized scan state");
386386

387387
std::tie(table_schema, physical_names_map) = getTableSchemaFromSnapshot(snapshot.get());
388-
LOG_TRACE(log, "Table schema: {}", fmt::join(table_schema.getNames(), ", "));
388+
LOG_TRACE(log, "Table logical schema: {}", fmt::join(table_schema.getNames(), ", "));
389389

390390
read_schema = getReadSchemaFromSnapshot(scan_state.get());
391-
LOG_TRACE(log, "Read schema: {}", fmt::join(read_schema.getNames(), ", "));
391+
LOG_TRACE(log, "Table read schema: {}", fmt::join(read_schema.getNames(), ", "));
392392

393393
partition_columns = getPartitionColumnsFromSnapshot(scan_state.get());
394394
LOG_TRACE(log, "Partition columns: {}", fmt::join(partition_columns, ", "));
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
99997497
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
-- Tags: no-fasttest, no-msan
2+
-- Tag no-fasttest: Depends on AWS
3+
-- Tag no-msan: delta-kernel is not built with msan
4+
5+
SELECT count()
6+
FROM deltaLake('https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/', SETTINGS allow_experimental_delta_kernel_rs = 1);

0 commit comments

Comments
 (0)