Skip to content

Commit f790be1

Browse files
authored
Merge branch 'antalya-25.6.5' into improvement/antalya/874
2 parents d76eaeb + 8bcbf47 commit f790be1

File tree

82 files changed

+3058
-596
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+3058
-596
lines changed

src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,14 @@ class FunctionTreeNodeImpl : public AbstractFunction
7171
{
7272
public:
7373
explicit ArgumentsTreeNode(const QueryTreeNodes * arguments_) : arguments(arguments_) {}
74-
size_t size() const override { return arguments ? arguments->size() : 0; }
75-
std::unique_ptr<Argument> at(size_t n) const override { return std::make_unique<ArgumentTreeNode>(arguments->at(n).get()); }
74+
size_t size() const override
75+
{ /// size withous skipped indexes
76+
return arguments ? arguments->size() - skippedSize() : 0;
77+
}
78+
std::unique_ptr<Argument> at(size_t n) const override
79+
{ /// n is relative index, some can be skipped
80+
return std::make_unique<ArgumentTreeNode>(arguments->at(getRealIndex(n)).get());
81+
}
7682
private:
7783
const QueryTreeNodes * arguments = nullptr;
7884
};

src/Core/Settings.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6841,6 +6841,15 @@ Enable PRQL - an alternative to SQL.
68416841
)", EXPERIMENTAL) \
68426842
DECLARE(Bool, enable_adaptive_memory_spill_scheduler, false, R"(
68436843
Trigger processor to spill data into external storage adpatively. grace join is supported at present.
6844+
)", EXPERIMENTAL) \
6845+
DECLARE(String, object_storage_cluster, "", R"(
6846+
Cluster to make distributed requests to object storages with alternative syntax.
6847+
)", EXPERIMENTAL) \
6848+
DECLARE(UInt64, object_storage_max_nodes, 0, R"(
6849+
Limit for hosts used for request in object storage cluster table functions - azureBlobStorageCluster, s3Cluster, hdfsCluster, etc.
6850+
Possible values:
6851+
- Positive integer.
6852+
- 0 — All hosts in cluster.
68446853
)", EXPERIMENTAL) \
68456854
DECLARE(Bool, allow_experimental_delta_kernel_rs, true, R"(
68466855
Allow experimental delta-kernel-rs implementation.

src/Core/SettingsChangesHistory.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,14 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
6767
/// controls new feature and it's 'true' by default, use 'false' as previous_value).
6868
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
6969
/// Note: please check if the key already exists to prevent duplicate entries.
70-
7170
addSettingsChanges(settings_changes_history, "25.6.5.2000",
7271
{
7372
{"allow_experimental_database_iceberg", false, true, "Turned ON by default for Antalya"},
7473
{"allow_experimental_database_unity_catalog", false, true, "Turned ON by default for Antalya"},
7574
{"allow_experimental_database_glue_catalog", false, true, "Turned ON by default for Antalya"},
7675
{"output_format_parquet_enum_as_byte_array", true, true, "Enable writing Enum as byte array in Parquet by default"},
76+
{"object_storage_cluster", "", "", "New setting"},
77+
{"object_storage_max_nodes", 0, 0, "New setting"},
7778
});
7879
addSettingsChanges(settings_changes_history, "25.6",
7980
{

src/Databases/DataLake/DatabaseDataLake.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <Storages/ConstraintsDescription.h>
1919
#include <Storages/StorageNull.h>
2020
#include <Storages/ObjectStorage/DataLakes/DataLakeConfiguration.h>
21+
#include <Storages/ObjectStorage/StorageObjectStorageCluster.h>
2122

2223
#include <Interpreters/evaluateConstantExpression.h>
2324
#include <Interpreters/Context.h>
@@ -43,6 +44,7 @@ namespace DatabaseDataLakeSetting
4344
extern const DatabaseDataLakeSettingsString storage_endpoint;
4445
extern const DatabaseDataLakeSettingsString oauth_server_uri;
4546
extern const DatabaseDataLakeSettingsBool vended_credentials;
47+
extern const DatabaseDataLakeSettingsString object_storage_cluster;
4648

4749

4850
extern const DatabaseDataLakeSettingsString aws_access_key_id;
@@ -428,21 +430,22 @@ StoragePtr DatabaseDataLake::tryGetTableImpl(const String & name, ContextPtr con
428430

429431
/// with_table_structure = false: because there will be
430432
/// no table structure in table definition AST.
431-
StorageObjectStorage::Configuration::initialize(*configuration, args, context_copy, /* with_table_structure */false);
433+
configuration->initialize(args, context_copy, /* with_table_structure */false);
432434

433-
return std::make_shared<StorageObjectStorage>(
435+
auto cluster_name = settings[DatabaseDataLakeSetting::object_storage_cluster].value;
436+
437+
return std::make_shared<StorageObjectStorageCluster>(
438+
cluster_name,
434439
configuration,
435440
configuration->createObjectStorage(context_copy, /* is_readonly */ false),
436-
context_copy,
437441
StorageID(getDatabaseName(), name),
438442
/* columns */columns,
439443
/* constraints */ConstraintsDescription{},
444+
/* partition_by */nullptr,
445+
context_copy,
440446
/* comment */"",
441447
getFormatSettings(context_copy),
442448
LoadingStrictnessLevel::CREATE,
443-
/* distributed_processing */false,
444-
/* partition_by */nullptr,
445-
/* is_table_function */false,
446449
/* lazy_init */true);
447450
}
448451

@@ -488,7 +491,7 @@ DatabaseTablesIteratorPtr DatabaseDataLake::getLightweightTablesIterator(
488491
const FilterByNameFunction & filter_by_table_name,
489492
bool skip_not_loaded) const
490493
{
491-
Tables tables;
494+
Tables tables;
492495
auto catalog = getCatalog();
493496
DB::Names iceberg_tables;
494497

src/Databases/DataLake/GlueCatalog.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ bool GlueCatalog::classifyTimestampTZ(const String & column_name, const TableMet
441441
auto storage_settings = std::make_shared<DB::DataLakeStorageSettings>();
442442
storage_settings->loadFromSettingsChanges(settings.allChanged());
443443
auto configuration = std::make_shared<DB::StorageS3IcebergConfiguration>(storage_settings);
444-
DB::StorageObjectStorage::Configuration::initialize(*configuration, args, getContext(), false);
444+
configuration->initialize(args, getContext(), false);
445445

446446
auto object_storage = configuration->createObjectStorage(getContext(), true);
447447
const auto & read_settings = getContext()->getReadSettings();

src/Databases/DataLake/ICatalog.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,19 @@ void TableMetadata::setLocation(const std::string & location_)
7070
auto pos_to_path = location_.substr(pos_to_bucket).find('/');
7171

7272
if (pos_to_path == std::string::npos)
73-
throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Unexpected location format: {}", location_);
74-
75-
pos_to_path = pos_to_bucket + pos_to_path;
73+
{ // empty path
74+
location_without_path = location_;
75+
path.clear();
76+
bucket = location_.substr(pos_to_bucket);
77+
}
78+
else
79+
{
80+
pos_to_path = pos_to_bucket + pos_to_path;
7681

77-
location_without_path = location_.substr(0, pos_to_path);
78-
path = location_.substr(pos_to_path + 1);
79-
bucket = location_.substr(pos_to_bucket, pos_to_path - pos_to_bucket);
82+
location_without_path = location_.substr(0, pos_to_path);
83+
path = location_.substr(pos_to_path + 1);
84+
bucket = location_.substr(pos_to_bucket, pos_to_path - pos_to_bucket);
85+
}
8086

8187
LOG_TEST(getLogger("TableMetadata"),
8288
"Parsed location without path: {}, path: {}",

src/Disks/DiskType.cpp

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ namespace ErrorCodes
1212
extern const int LOGICAL_ERROR;
1313
}
1414

15-
MetadataStorageType metadataTypeFromString(const String & type)
15+
MetadataStorageType metadataTypeFromString(const std::string & type)
1616
{
1717
auto check_type = Poco::toLower(type);
1818
if (check_type == "local")
@@ -60,25 +60,49 @@ std::string DataSourceDescription::toString() const
6060
case DataSourceType::RAM:
6161
return "memory";
6262
case DataSourceType::ObjectStorage:
63-
{
64-
switch (object_storage_type)
65-
{
66-
case ObjectStorageType::S3:
67-
return "s3";
68-
case ObjectStorageType::HDFS:
69-
return "hdfs";
70-
case ObjectStorageType::Azure:
71-
return "azure_blob_storage";
72-
case ObjectStorageType::Local:
73-
return "local_blob_storage";
74-
case ObjectStorageType::Web:
75-
return "web";
76-
case ObjectStorageType::None:
77-
return "none";
78-
case ObjectStorageType::Max:
79-
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected object storage type: Max");
80-
}
81-
}
63+
return DB::toString(object_storage_type);
8264
}
8365
}
66+
67+
ObjectStorageType objectStorageTypeFromString(const std::string & type)
68+
{
69+
auto check_type = Poco::toLower(type);
70+
if (check_type == "s3")
71+
return ObjectStorageType::S3;
72+
if (check_type == "hdfs")
73+
return ObjectStorageType::HDFS;
74+
if (check_type == "azure_blob_storage" || check_type == "azure")
75+
return ObjectStorageType::Azure;
76+
if (check_type == "local_blob_storage" || check_type == "local")
77+
return ObjectStorageType::Local;
78+
if (check_type == "web")
79+
return ObjectStorageType::Web;
80+
if (check_type == "none")
81+
return ObjectStorageType::None;
82+
83+
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG,
84+
"Unknown object storage type: {}", type);
85+
}
86+
87+
std::string toString(ObjectStorageType type)
88+
{
89+
switch (type)
90+
{
91+
case ObjectStorageType::S3:
92+
return "s3";
93+
case ObjectStorageType::HDFS:
94+
return "hdfs";
95+
case ObjectStorageType::Azure:
96+
return "azure_blob_storage";
97+
case ObjectStorageType::Local:
98+
return "local_blob_storage";
99+
case ObjectStorageType::Web:
100+
return "web";
101+
case ObjectStorageType::None:
102+
return "none";
103+
case ObjectStorageType::Max:
104+
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected object storage type: Max");
105+
}
106+
}
107+
84108
}

src/Disks/DiskType.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,10 @@ enum class MetadataStorageType : uint8_t
3636
Memory,
3737
};
3838

39-
MetadataStorageType metadataTypeFromString(const String & type);
40-
String toString(DataSourceType data_source_type);
39+
MetadataStorageType metadataTypeFromString(const std::string & type);
40+
41+
ObjectStorageType objectStorageTypeFromString(const std::string & type);
42+
std::string toString(ObjectStorageType type);
4143

4244
struct DataSourceDescription
4345
{

src/IO/S3/Client.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ Model::HeadObjectOutcome Client::HeadObject(HeadObjectRequest & request) const
384384
auto bucket_uri = getURIForBucket(bucket);
385385
if (!bucket_uri)
386386
{
387-
if (auto maybe_error = updateURIForBucketForHead(bucket); maybe_error.has_value())
387+
if (auto maybe_error = updateURIForBucketForHead(bucket, request.GetKey()); maybe_error.has_value())
388388
return *maybe_error;
389389

390390
if (auto region = getRegionForBucket(bucket); !region.empty())
@@ -589,7 +589,6 @@ Client::doRequest(RequestType & request, RequestFn request_fn) const
589589
if (auto uri = getURIForBucket(bucket); uri.has_value())
590590
request.overrideURI(std::move(*uri));
591591

592-
593592
bool found_new_endpoint = false;
594593
// if we found correct endpoint after 301 responses, update the cache for future requests
595594
SCOPE_EXIT(
@@ -869,12 +868,15 @@ std::optional<S3::URI> Client::getURIFromError(const Aws::S3::S3Error & error) c
869868
}
870869

871870
// Do a list request because head requests don't have body in response
872-
std::optional<Aws::S3::S3Error> Client::updateURIForBucketForHead(const std::string & bucket) const
871+
// S3 Tables don't support ListObjects, so made dirty workaroung - changed on GetObject
872+
std::optional<Aws::S3::S3Error> Client::updateURIForBucketForHead(const std::string & bucket, const std::string & key) const
873873
{
874-
ListObjectsV2Request req;
874+
GetObjectRequest req;
875875
req.SetBucket(bucket);
876-
req.SetMaxKeys(1);
877-
auto result = ListObjectsV2(req);
876+
req.SetKey(key);
877+
req.SetRange("bytes=0-1");
878+
auto result = GetObject(req);
879+
878880
if (result.IsSuccess())
879881
return std::nullopt;
880882
return result.GetError();

src/IO/S3/Client.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ class Client : private Aws::S3::S3Client
279279

280280
void updateURIForBucket(const std::string & bucket, S3::URI new_uri) const;
281281
std::optional<S3::URI> getURIFromError(const Aws::S3::S3Error & error) const;
282-
std::optional<Aws::S3::S3Error> updateURIForBucketForHead(const std::string & bucket) const;
282+
std::optional<Aws::S3::S3Error> updateURIForBucketForHead(const std::string & bucket, const std::string & key) const;
283283

284284
std::optional<S3::URI> getURIForBucket(const std::string & bucket) const;
285285

0 commit comments

Comments
 (0)