Skip to content

Commit a4abf75

Browse files
authored
Merge pull request #1123 from Altinity/feature/antalya-25.8/iceberg_metrics
More profile metrics for Iceberg, S3 and Azure
2 parents cc82fa1 + b746e01 commit a4abf75

File tree

12 files changed

+68
-22
lines changed

12 files changed

+68
-22
lines changed

src/Common/ProfileEvents.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,11 @@
299299
M(IcebergTrivialCountOptimizationApplied, "Trivial count optimization applied while reading from Iceberg", ValueType::Number) \
300300
M(IcebergVersionHintUsed, "Number of times version-hint.text has been used.", ValueType::Number) \
301301
M(IcebergMinMaxIndexPrunedFiles, "Number of skipped files by using MinMax index in Iceberg", ValueType::Number) \
302+
M(IcebergAvroFileParsing, "Number of times avro metadata files have been parsed.", ValueType::Number) \
303+
M(IcebergAvroFileParsingMicroseconds, "Time spent for parsing avro metadata files for Iceberg tables.", ValueType::Microseconds) \
304+
M(IcebergJsonFileParsing, "Number of times json metadata files have been parsed.", ValueType::Number) \
305+
M(IcebergJsonFileParsingMicroseconds, "Time spent for parsing json metadata files for Iceberg tables.", ValueType::Microseconds) \
306+
\
302307
M(JoinBuildTableRowCount, "Total number of rows in the build table for a JOIN operation.", ValueType::Number) \
303308
M(JoinProbeTableRowCount, "Total number of rows in the probe table for a JOIN operation.", ValueType::Number) \
304309
M(JoinResultRowCount, "Total number of rows in the result of a JOIN operation.", ValueType::Number) \
@@ -580,7 +585,9 @@ The server successfully detected this situation and will download merged part fr
580585
M(S3DeleteObjects, "Number of S3 API DeleteObject(s) calls.", ValueType::Number) \
581586
M(S3CopyObject, "Number of S3 API CopyObject calls.", ValueType::Number) \
582587
M(S3ListObjects, "Number of S3 API ListObjects calls.", ValueType::Number) \
588+
M(S3ListObjectsMicroseconds, "Time of S3 API ListObjects execution.", ValueType::Microseconds) \
583589
M(S3HeadObject, "Number of S3 API HeadObject calls.", ValueType::Number) \
590+
M(S3HeadObjectMicroseconds, "Time of S3 API HeadObject execution.", ValueType::Microseconds) \
584591
M(S3GetObjectAttributes, "Number of S3 API GetObjectAttributes calls.", ValueType::Number) \
585592
M(S3CreateMultipartUpload, "Number of S3 API CreateMultipartUpload calls.", ValueType::Number) \
586593
M(S3UploadPartCopy, "Number of S3 API UploadPartCopy calls.", ValueType::Number) \
@@ -634,6 +641,7 @@ The server successfully detected this situation and will download merged part fr
634641
M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls", ValueType::Number) \
635642
M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.", ValueType::Number) \
636643
M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.", ValueType::Number) \
644+
M(AzureListObjectsMicroseconds, "Time of Azure blob storage API ListObjects execution.", ValueType::Microseconds) \
637645
M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.", ValueType::Number) \
638646
M(AzureCreateContainer, "Number of Azure blob storage API CreateContainer calls.", ValueType::Number) \
639647
\

src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h>
1414
#include <Disks/ObjectStorages/ObjectStorageIteratorAsync.h>
1515
#include <Interpreters/Context.h>
16+
#include <Common/ElapsedTimeProfileEventIncrement.h>
1617

1718

1819
namespace CurrentMetrics
@@ -25,6 +26,7 @@ namespace CurrentMetrics
2526
namespace ProfileEvents
2627
{
2728
extern const Event AzureListObjects;
29+
extern const Event AzureListObjectsMicroseconds;
2830
extern const Event DiskAzureListObjects;
2931
extern const Event AzureDeleteObjects;
3032
extern const Event DiskAzureDeleteObjects;
@@ -76,6 +78,7 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync
7678
ProfileEvents::increment(ProfileEvents::AzureListObjects);
7779
if (client->IsClientForDisk())
7880
ProfileEvents::increment(ProfileEvents::DiskAzureListObjects);
81+
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::AzureListObjectsMicroseconds);
7982

8083
chassert(batch.empty());
8184
auto blob_list_response = client->ListBlobs(options);
@@ -183,7 +186,11 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith
183186
if (client_ptr->IsClientForDisk())
184187
ProfileEvents::increment(ProfileEvents::DiskAzureListObjects);
185188

186-
blob_list_response = client_ptr->ListBlobs(options);
189+
{
190+
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::AzureListObjectsMicroseconds);
191+
blob_list_response = client_ptr->ListBlobs(options);
192+
}
193+
187194
const auto & blobs_list = blob_list_response.Blobs;
188195

189196
for (const auto & blob : blobs_list)

src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,13 @@
3030
#include <Common/logger_useful.h>
3131
#include <Common/MultiVersion.h>
3232
#include <Common/Macros.h>
33+
#include <Common/ElapsedTimeProfileEventIncrement.h>
3334

3435

3536
namespace ProfileEvents
3637
{
3738
extern const Event S3ListObjects;
39+
extern const Event S3ListObjectsMicroseconds;
3840
extern const Event DiskS3DeleteObjects;
3941
extern const Event DiskS3ListObjects;
4042
}
@@ -137,7 +139,12 @@ class S3IteratorAsync final : public IObjectStorageIteratorAsync
137139
ProfileEvents::increment(ProfileEvents::S3ListObjects);
138140
ProfileEvents::increment(ProfileEvents::DiskS3ListObjects);
139141

140-
auto outcome = client->ListObjectsV2(*request);
142+
Aws::S3::Model::ListObjectsV2Outcome outcome;
143+
144+
{
145+
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::S3ListObjectsMicroseconds);
146+
outcome = client->ListObjectsV2(*request);
147+
}
141148

142149
/// Outcome failure will be handled on the caller side.
143150
if (outcome.IsSuccess())
@@ -264,7 +271,11 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
264271
ProfileEvents::increment(ProfileEvents::S3ListObjects);
265272
ProfileEvents::increment(ProfileEvents::DiskS3ListObjects);
266273

267-
outcome = client.get()->ListObjectsV2(request);
274+
{
275+
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::S3ListObjectsMicroseconds);
276+
outcome = client.get()->ListObjectsV2(request);
277+
}
278+
268279
throwIfError(outcome);
269280

270281
auto result = outcome.GetResult();

src/IO/S3/getObjectInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <optional>
22
#include <IO/S3/getObjectInfo.h>
33
#include <IO/Expect404ResponseScope.h>
4+
#include <Common/ElapsedTimeProfileEventIncrement.h>
45

56
#if USE_AWS_S3
67

@@ -15,6 +16,7 @@ namespace ProfileEvents
1516
extern const Event S3GetObject;
1617
extern const Event S3GetObjectAttributes;
1718
extern const Event S3HeadObject;
19+
extern const Event S3HeadObjectMicroseconds;
1820
extern const Event DiskS3GetObject;
1921
extern const Event DiskS3GetObjectAttributes;
2022
extern const Event DiskS3HeadObject;
@@ -32,6 +34,7 @@ namespace
3234
ProfileEvents::increment(ProfileEvents::S3HeadObject);
3335
if (client.isClientForDisk())
3436
ProfileEvents::increment(ProfileEvents::DiskS3HeadObject);
37+
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::S3HeadObjectMicroseconds);
3538

3639
S3::HeadObjectRequest req;
3740
req.SetBucket(bucket);

src/IO/S3Common.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,6 @@
1919
#include <IO/S3/Requests.h>
2020

2121

22-
namespace ProfileEvents
23-
{
24-
extern const Event S3GetObjectAttributes;
25-
extern const Event S3GetObjectMetadata;
26-
extern const Event S3HeadObject;
27-
extern const Event DiskS3GetObjectAttributes;
28-
extern const Event DiskS3GetObjectMetadata;
29-
extern const Event DiskS3HeadObject;
30-
}
31-
3222
namespace DB
3323
{
3424

src/Interpreters/IcebergMetadataLog.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <Processors/QueryPlan/ReadFromSystemNumbersStep.h>
2121
#include <Storages/ObjectStorage/DataLakes/DataLakeConfiguration.h>
2222
#include <Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h>
23+
#include <Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.h>
2324
#include <Storages/ObjectStorage/StorageObjectStorage.h>
2425
#include <Storages/SelectQueryInfo.h>
2526
#include <base/Decimal.h>
@@ -83,7 +84,7 @@ void IcebergMetadataLogElement::appendToBlock(MutableColumns & columns) const
8384

8485
void insertRowToLogTable(
8586
const ContextPtr & local_context,
86-
String row,
87+
std::function<String()> get_row,
8788
IcebergMetadataLogLevel row_log_level,
8889
const String & table_path,
8990
const String & file_path,
@@ -103,7 +104,7 @@ void insertRowToLogTable(
103104
.content_type = row_log_level,
104105
.table_path = table_path,
105106
.file_path = file_path,
106-
.metadata_content = row,
107+
.metadata_content = get_row(),
107108
.row_in_file = row_in_file});
108109
}
109110
}

src/Interpreters/IcebergMetadataLog.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <Interpreters/SystemLog.h>
44
#include <Storages/ColumnsDescription.h>
5+
#include <Poco/JSON/Object.h>
56

67
namespace DB
78
{
@@ -23,9 +24,11 @@ struct IcebergMetadataLogElement
2324
void appendToBlock(MutableColumns & columns) const;
2425
};
2526

27+
/// Here `get_row` function is used instead `row` string to calculate string only when required.
28+
/// Inside `insertRowToLogTable` code can exit immediately after `iceberg_metadata_log_level` setting check.
2629
void insertRowToLogTable(
2730
const ContextPtr & local_context,
28-
String row,
31+
std::function<String()> get_row,
2932
IcebergMetadataLogLevel row_log_level,
3033
const String & table_path,
3134
const String & file_path,

src/Storages/ObjectStorage/DataLakes/Iceberg/AvroForIcebergDeserializer.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,20 @@
1212
#include <Common/assert_cast.h>
1313
#include <base/find_symbols.h>
1414
#include <Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h>
15+
#include <Common/ElapsedTimeProfileEventIncrement.h>
1516

1617
namespace DB::ErrorCodes
1718
{
1819
extern const int ICEBERG_SPECIFICATION_VIOLATION;
1920
extern const int INCORRECT_DATA;
2021
}
2122

23+
namespace ProfileEvents
24+
{
25+
extern const Event IcebergAvroFileParsing;
26+
extern const Event IcebergAvroFileParsingMicroseconds;
27+
}
28+
2229
namespace DB::Iceberg
2330
{
2431

@@ -30,6 +37,9 @@ try
3037
: buffer(std::move(buffer_))
3138
, manifest_file_path(manifest_file_path_)
3239
{
40+
ProfileEvents::increment(ProfileEvents::IcebergAvroFileParsing);
41+
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::IcebergAvroFileParsingMicroseconds);
42+
3343
auto manifest_file_reader
3444
= std::make_unique<avro::DataFileReaderBase>(std::make_unique<AvroInputStreamReadBufferAdapter>(*buffer));
3545

src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,9 +243,10 @@ bool IcebergMetadata::update(const ContextPtr & local_context)
243243

244244
updateState(local_context, metadata_object);
245245

246+
auto dump_metadata = [&]()->String { return dumpMetadataObjectToString(metadata_object); };
246247
insertRowToLogTable(
247248
local_context,
248-
dumpMetadataObjectToString(metadata_object),
249+
dump_metadata,
249250
DB::IcebergMetadataLogLevel::Metadata,
250251
configuration_ptr->getRawPath().path,
251252
metadata_file_path,
@@ -773,9 +774,10 @@ DataLakeMetadataPtr IcebergMetadata::create(
773774

774775
auto format_version = object->getValue<int>(f_format_version);
775776

777+
auto dump_metadata = [&]()->String { return dumpMetadataObjectToString(object); };
776778
insertRowToLogTable(
777779
local_context,
778-
dumpMetadataObjectToString(object),
780+
dump_metadata,
779781
DB::IcebergMetadataLogLevel::Metadata,
780782
configuration_ptr->getRawPath().path,
781783
metadata_file_path,

src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,10 @@ ManifestFileContent::ManifestFileContent(
155155
const String & path_to_manifest_file_)
156156
: path_to_manifest_file(path_to_manifest_file_)
157157
{
158+
auto dump_metadata = [&]()->String { return manifest_file_deserializer.getMetadataContent(); };
158159
insertRowToLogTable(
159160
context,
160-
manifest_file_deserializer.getMetadataContent(),
161+
dump_metadata,
161162
DB::IcebergMetadataLogLevel::ManifestFileMetadata,
162163
common_path,
163164
path_to_manifest_file,
@@ -230,9 +231,10 @@ ManifestFileContent::ManifestFileContent(
230231

231232
for (size_t i = 0; i < manifest_file_deserializer.rows(); ++i)
232233
{
234+
auto dump_row_metadata = [&]()->String { return manifest_file_deserializer.getContent(i); };
233235
insertRowToLogTable(
234236
context,
235-
manifest_file_deserializer.getContent(i),
237+
dump_row_metadata,
236238
DB::IcebergMetadataLogLevel::ManifestFileEntry,
237239
common_path,
238240
path_to_manifest_file,

0 commit comments

Comments
 (0)