Skip to content

Commit bc4fd69

Browse files
Enmkarthurpassos
authored andcommitted
Merge pull request #795 from Altinity/parquet_metadata_caching_forward_port_25.3
25.3 Antalya port - Parquet metadata caching
1 parent efd42b3 commit bc4fd69

19 files changed

+312
-10
lines changed

programs/server/Server.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@
156156
# include <azure/core/diagnostics/logger.hpp>
157157
#endif
158158

159+
#if USE_PARQUET
160+
# include <Processors/Formats/Impl/ParquetFileMetaDataCache.h>
161+
#endif
162+
159163

160164
#include <incbin.h>
161165
/// A minimal file used when the server is run without installation
@@ -326,6 +330,7 @@ namespace ServerSetting
326330
extern const ServerSettingsUInt64 os_cpu_busy_time_threshold;
327331
extern const ServerSettingsFloat min_os_cpu_wait_time_ratio_to_drop_connection;
328332
extern const ServerSettingsFloat max_os_cpu_wait_time_ratio_to_drop_connection;
333+
extern const ServerSettingsUInt64 input_format_parquet_metadata_cache_max_size;
329334
}
330335

331336
namespace ErrorCodes
@@ -2422,6 +2427,7 @@ try
24222427
dns_cache_updater->start();
24232428

24242429
auto replicas_reconnector = ReplicasReconnector::init(global_context);
2430+
ParquetFileMetaDataCache::instance()->setMaxSizeInBytes(server_settings[ServerSetting::input_format_parquet_metadata_cache_max_size]);
24252431

24262432
/// Set current database name before loading tables and databases because
24272433
/// system logs may copy global context.

src/Access/Common/AccessType.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ enum class AccessType : uint8_t
184184
M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
185185
M(SYSTEM_DROP_FORMAT_SCHEMA_CACHE, "SYSTEM DROP FORMAT SCHEMA CACHE, DROP FORMAT SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
186186
M(SYSTEM_DROP_S3_CLIENT_CACHE, "SYSTEM DROP S3 CLIENT, DROP S3 CLIENT CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
187+
M(SYSTEM_DROP_PARQUET_METADATA_CACHE, "SYSTEM DROP PARQUET METADATA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
187188
M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \
188189
M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \
189190
M(SYSTEM_RELOAD_USERS, "RELOAD USERS", GLOBAL, SYSTEM_RELOAD) \

src/Common/ProfileEvents.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1047,7 +1047,8 @@ The server successfully detected this situation and will download merged part fr
10471047
M(IndexBinarySearchAlgorithm, "Number of times the binary search algorithm is used over the index marks", ValueType::Number) \
10481048
M(IndexGenericExclusionSearchAlgorithm, "Number of times the generic exclusion search algorithm is used over the index marks", ValueType::Number) \
10491049
M(ParallelReplicasQueryCount, "Number of (sub)queries executed using parallel replicas during a query execution", ValueType::Number) \
1050-
1050+
M(ParquetMetaDataCacheHits, "Number of times the read from filesystem cache hit the cache.", ValueType::Number) \
1051+
M(ParquetMetaDataCacheMisses, "Number of times the read from filesystem cache miss the cache.", ValueType::Number) \
10511052

10521053
#ifdef APPLY_FOR_EXTERNAL_EVENTS
10531054
#define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M)

src/Core/FormatFactorySettings.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,8 +1348,7 @@ Limits the size of the blocks formed during data parsing in input formats in byt
13481348
DECLARE(Bool, input_format_parquet_allow_geoparquet_parser, true, R"(
13491349
Use geo column parser to convert Array(UInt8) into Point/Linestring/Polygon/MultiLineString/MultiPolygon types
13501350
)", 0) \
1351-
1352-
1351+
DECLARE(Bool, input_format_parquet_use_metadata_cache, true, R"(Enable parquet file metadata caching)", 0) \
13531352
// End of FORMAT_FACTORY_SETTINGS
13541353

13551354
#define OBSOLETE_FORMAT_SETTINGS(M, ALIAS) \

src/Core/ServerSettings.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,8 +1064,7 @@ The policy on how to perform a scheduling of CPU slots specified by `concurrent_
10641064
See [Controlling behavior on server CPU overload](/operations/settings/server-overload) for more details.
10651065
)", 0) \
10661066
DECLARE(Float, distributed_cache_keep_up_free_connections_ratio, 0.1f, "Soft limit for number of active connection distributed cache will try to keep free. After the number of free connections goes below distributed_cache_keep_up_free_connections_ratio * max_connections, connections with oldest activity will be closed until the number goes above the limit.", 0) \
1067-
1068-
1067+
DECLARE(UInt64, input_format_parquet_metadata_cache_max_size, 500000000, "Maximum size of parquet file metadata cache", 0) \
10691068
// clang-format on
10701069

10711070
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in dumpToSystemServerSettingsColumns below

src/Core/SettingsChangesHistory.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,11 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
180180
{"parallel_hash_join_threshold", 0, 0, "New setting"},
181181
/// Release closed. Please use 25.4
182182
});
183+
addSettingsChanges(settings_changes_history, "24.12.2.20000",
184+
{
185+
// Altinity Antalya modifications atop of 24.12
186+
{"input_format_parquet_use_metadata_cache", true, true, "New setting, turned ON by default"}, // https://github.com/Altinity/ClickHouse/pull/586
187+
});
183188
addSettingsChanges(settings_changes_history, "25.2",
184189
{
185190
/// Release closed. Please use 25.3

src/Interpreters/InterpreterSystemQuery.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@
7979
#include <Formats/ProtobufSchemas.h>
8080
#endif
8181

82+
#if USE_PARQUET
83+
#include <Processors/Formats/Impl/ParquetFileMetaDataCache.h>
84+
#endif
85+
8286
#if USE_AWS_S3
8387
#include <IO/S3/Client.h>
8488
#endif
@@ -433,6 +437,16 @@ BlockIO InterpreterSystemQuery::execute()
433437
getContext()->clearQueryResultCache(query.query_result_cache_tag);
434438
break;
435439
}
440+
case Type::DROP_PARQUET_METADATA_CACHE:
441+
{
442+
#if USE_PARQUET
443+
getContext()->checkAccess(AccessType::SYSTEM_DROP_PARQUET_METADATA_CACHE);
444+
ParquetFileMetaDataCache::instance()->clear();
445+
break;
446+
#else
447+
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "The server was compiled without the support for Parquet");
448+
#endif
449+
}
436450
case Type::DROP_COMPILED_EXPRESSION_CACHE:
437451
#if USE_EMBEDDED_COMPILER
438452
getContext()->checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE);
@@ -1518,6 +1532,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
15181532
case Type::DROP_PAGE_CACHE:
15191533
case Type::DROP_SCHEMA_CACHE:
15201534
case Type::DROP_FORMAT_SCHEMA_CACHE:
1535+
case Type::DROP_PARQUET_METADATA_CACHE:
15211536
case Type::DROP_S3_CLIENT_CACHE:
15221537
{
15231538
required_access.emplace_back(AccessType::SYSTEM_DROP_CACHE);

src/Parsers/ASTSystemQuery.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,7 @@ void ASTSystemQuery::formatImpl(WriteBuffer & ostr, const FormatSettings & setti
462462
case Type::DROP_COMPILED_EXPRESSION_CACHE:
463463
case Type::DROP_S3_CLIENT_CACHE:
464464
case Type::DROP_ICEBERG_METADATA_CACHE:
465+
case Type::DROP_PARQUET_METADATA_CACHE:
465466
case Type::RESET_COVERAGE:
466467
case Type::RESTART_REPLICAS:
467468
case Type::JEMALLOC_PURGE:

src/Parsers/ASTSystemQuery.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class ASTSystemQuery : public IAST, public ASTQueryWithOnCluster
4242
DROP_SCHEMA_CACHE,
4343
DROP_FORMAT_SCHEMA_CACHE,
4444
DROP_S3_CLIENT_CACHE,
45+
DROP_PARQUET_METADATA_CACHE,
4546
STOP_LISTEN,
4647
START_LISTEN,
4748
RESTART_REPLICAS,

src/Processors/Formats/IInputFormat.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ class IInputFormat : public SourceWithKeyCondition
7070

7171
void needOnlyCount() { need_only_count = true; }
7272

73+
/// Set additional info/key/id related to underlying storage of the ReadBuffer
74+
virtual void setStorageRelatedUniqueKey(const Settings & /*settings*/, const String & /*key*/) {}
75+
7376
protected:
7477
ReadBuffer & getReadBuffer() const { chassert(in); return *in; }
7578

0 commit comments

Comments
 (0)