Skip to content

Commit a854b28

Browse files
Enmkarthurpassos
authored andcommitted
Merge pull request #805 from Altinity/list_objects_object_storage_cache_25.3
1 parent 0904ede commit a854b28

21 files changed

+787
-17
lines changed

programs/server/Server.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
#include <Storages/System/attachSystemTables.h>
8484
#include <Storages/System/attachInformationSchemaTables.h>
8585
#include <Storages/Cache/registerRemoteFileMetadatas.h>
86+
#include <Storages/Cache/ObjectStorageListObjectsCache.h>
8687
#include <AggregateFunctions/registerAggregateFunctions.h>
8788
#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
8889
#include <Functions/registerFunctions.h>
@@ -348,6 +349,10 @@ namespace ErrorCodes
348349
namespace FileCacheSetting
349350
{
350351
extern const FileCacheSettingsBool load_metadata_asynchronously;
352+
extern const ServerSettingsUInt64 input_format_parquet_metadata_cache_max_size;
353+
extern const ServerSettingsUInt64 object_storage_list_objects_cache_size;
354+
extern const ServerSettingsUInt64 object_storage_list_objects_cache_max_entries;
355+
extern const ServerSettingsUInt64 object_storage_list_objects_cache_ttl;
351356
}
352357

353358
}
@@ -2520,6 +2525,10 @@ try
25202525
if (dns_cache_updater)
25212526
dns_cache_updater->start();
25222527

2528+
ObjectStorageListObjectsCache::instance().setMaxSizeInBytes(server_settings[ServerSetting::object_storage_list_objects_cache_size]);
2529+
ObjectStorageListObjectsCache::instance().setMaxCount(server_settings[ServerSetting::object_storage_list_objects_cache_max_entries]);
2530+
ObjectStorageListObjectsCache::instance().setTTL(server_settings[ServerSetting::object_storage_list_objects_cache_ttl]);
2531+
25232532
auto replicas_reconnector = ReplicasReconnector::init(global_context);
25242533

25252534
/// Set current database name before loading tables and databases because

src/Access/Common/AccessType.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,8 @@ enum class AccessType : uint8_t
320320
M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
321321
M(SYSTEM_DROP_FORMAT_SCHEMA_CACHE, "SYSTEM DROP FORMAT SCHEMA CACHE, DROP FORMAT SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
322322
M(SYSTEM_DROP_S3_CLIENT_CACHE, "SYSTEM DROP S3 CLIENT, DROP S3 CLIENT CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
323+
M(SYSTEM_DROP_PARQUET_METADATA_CACHE, "SYSTEM DROP PARQUET METADATA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
324+
M(SYSTEM_DROP_OBJECT_STORAGE_LIST_OBJECTS_CACHE, "SYSTEM DROP OBJECT STORAGE LIST OBJECTS CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
323325
M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \
324326
M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \
325327
M(SYSTEM_RELOAD_USERS, "RELOAD USERS", GLOBAL, SYSTEM_RELOAD) \

src/Common/ProfileEvents.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1135,7 +1135,10 @@ The server successfully detected this situation and will download merged part fr
11351135
M(AsyncLoggingErrorFileLogDroppedMessages, "How many messages have been dropped from error file log due to the async log queue being full", ValueType::Number) \
11361136
M(AsyncLoggingSyslogDroppedMessages, "How many messages have been dropped from the syslog due to the async log queue being full", ValueType::Number) \
11371137
M(AsyncLoggingTextLogDroppedMessages, "How many messages have been dropped from text_log due to the async log queue being full", ValueType::Number) \
1138-
1138+
M(ObjectStorageListObjectsCacheHits, "Number of times object storage list objects operation hit the cache.", ValueType::Number) \
1139+
M(ObjectStorageListObjectsCacheMisses, "Number of times object storage list objects operation miss the cache.", ValueType::Number) \
1140+
M(ObjectStorageListObjectsCacheExactMatchHits, "Number of times object storage list objects operation hit the cache with an exact match.", ValueType::Number) \
1141+
M(ObjectStorageListObjectsCachePrefixMatchHits, "Number of times object storage list objects operation miss the cache using prefix matching.", ValueType::Number) \
11391142

11401143
#ifdef APPLY_FOR_EXTERNAL_EVENTS
11411144
#define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M)

src/Common/TTLCachePolicy.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,10 @@ class TTLCachePolicy : public ICachePolicy<Key, Mapped, HashFunction, WeightFunc
271271
return res;
272272
}
273273

274-
private:
274+
protected:
275275
using Cache = std::unordered_map<Key, MappedPtr, HashFunction>;
276276
Cache cache;
277-
277+
private:
278278
/// TODO To speed up removal of stale entries, we could also add another container sorted on expiry times which maps keys to iterators
279279
/// into the cache. To insert an entry, add it to the cache + add the iterator to the sorted container. To remove stale entries, do a
280280
/// binary search on the sorted container and erase all left of the found key.

src/Core/ServerSettings.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,8 +1139,9 @@ The policy on how to perform a scheduling of CPU slots specified by `concurrent_
11391139
DECLARE(UInt64, threadpool_local_fs_reader_queue_size, 1000000, R"(The maximum number of jobs that can be scheduled on the thread pool for reading from local filesystem.)", 0) \
11401140
DECLARE(NonZeroUInt64, threadpool_remote_fs_reader_pool_size, 250, R"(Number of threads in the Thread pool used for reading from remote filesystem when `remote_filesystem_read_method = 'threadpool'`.)", 0) \
11411141
DECLARE(UInt64, threadpool_remote_fs_reader_queue_size, 1000000, R"(The maximum number of jobs that can be scheduled on the thread pool for reading from remote filesystem.)", 0) \
1142-
1143-
1142+
DECLARE(UInt64, object_storage_list_objects_cache_size, 500000000, "Maximum size of ObjectStorage list objects cache in bytes. Zero means disabled.", 0) \
1143+
DECLARE(UInt64, object_storage_list_objects_cache_max_entries, 1000, "Maximum size of ObjectStorage list objects cache in entries. Zero means disabled.", 0) \
1144+
DECLARE(UInt64, object_storage_list_objects_cache_ttl, 3600, "Time to live of records in ObjectStorage list objects cache in seconds. Zero means unlimited", 0)
11441145
// clang-format on
11451146

11461147
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in dumpToSystemServerSettingsColumns below

src/Core/Settings.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6985,6 +6985,9 @@ Write full paths (including s3://) into iceberg metadata files.
69856985
)", EXPERIMENTAL) \
69866986
DECLARE(String, iceberg_metadata_compression_method, "", R"(
69876987
Method to compress `.metadata.json` file.
6988+
)", EXPERIMENTAL) \
6989+
DECLARE(Bool, use_object_storage_list_objects_cache, false, R"(
6990+
Cache the list of objects returned by list objects calls in object storage
69886991
)", EXPERIMENTAL) \
69896992
DECLARE(Bool, make_distributed_plan, false, R"(
69906993
Make distributed query plan.

src/Core/SettingsChangesHistory.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,10 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
236236
{"function_date_trunc_return_type_behavior", 1, 0, "Change the result type for dateTrunc function for DateTime64/Date32 arguments to DateTime64/Date32 regardless of time unit to get correct result for negative values"},
237237
{"enable_scopes_for_with_statement", true, true, "New setting for backward compatibility with the old analyzer."},
238238
/// Release closed. Please use 25.5
239+
// Altinity Antalya modifications atop of 25.2
240+
{"object_storage_cluster", "", "", "New setting"},
241+
{"object_storage_max_nodes", 0, 0, "New setting"},
242+
{"use_object_storage_list_objects_cache", true, false, "New setting."},
239243
});
240244
addSettingsChanges(settings_changes_history, "25.3",
241245
{

src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ class AzureObjectStorage : public IObjectStorage
3535
const String & description_,
3636
const String & common_key_prefix_);
3737

38+
bool supportsListObjectsCache() override { return true; }
39+
3840
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
3941

4042
/// Sanitizer build may crash with max_keys=1; this looks like a false positive.

src/Disks/ObjectStorages/IObjectStorage.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,14 @@ class IObjectStorage
327327
}
328328
virtual std::shared_ptr<const S3::Client> tryGetS3StorageClient() { return nullptr; }
329329
#endif
330+
331+
332+
virtual bool supportsListObjectsCache() { return false; }
333+
334+
private:
335+
mutable std::mutex throttlers_mutex;
336+
ThrottlerPtr remote_read_throttler;
337+
ThrottlerPtr remote_write_throttler;
330338
};
331339

332340
using ObjectStoragePtr = std::shared_ptr<IObjectStorage>;

src/Disks/ObjectStorages/S3/S3ObjectStorage.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ class S3ObjectStorage : public IObjectStorage
6161

6262
ObjectStorageType getType() const override { return ObjectStorageType::S3; }
6363

64+
bool supportsListObjectsCache() override { return true; }
65+
6466
bool exists(const StoredObject & object) const override;
6567

6668
std::unique_ptr<ReadBufferFromFileBase> readObject( /// NOLINT

0 commit comments

Comments
 (0)