Skip to content

Commit b5156af

Browse files
author
Innocent
committed
feat: metrics reporting for scan and commit
1 parent 3e7b20a commit b5156af

24 files changed

Lines changed: 3037 additions & 0 deletions

src/iceberg/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ set(ICEBERG_SOURCES
5858
manifest/v3_metadata.cc
5959
metadata_columns.cc
6060
metrics_config.cc
61+
metrics/commit_report.cc
62+
metrics/counter.cc
63+
metrics/json_serde.cc
64+
metrics/metrics_context.cc
65+
metrics/metrics_reporters.cc
66+
metrics/scan_report.cc
67+
metrics/timer.cc
6168
name_mapping.cc
6269
partition_field.cc
6370
partition_spec.cc
@@ -219,6 +226,7 @@ add_subdirectory(puffin)
219226
add_subdirectory(row)
220227
add_subdirectory(update)
221228
add_subdirectory(util)
229+
add_subdirectory(metrics)
222230

223231
if(ICEBERG_BUILD_BUNDLE)
224232
set(ICEBERG_BUNDLE_SOURCES

src/iceberg/constants.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ namespace iceberg {
3333
constexpr std::string_view kParquetFieldIdKey = "PARQUET:field_id";
3434
constexpr int64_t kInvalidSnapshotId = -1;
3535
constexpr int64_t kInvalidSequenceNumber = -1;
36+
constexpr int64_t kInvalidSchemaId = -1;
3637
/// \brief Stand-in for the current sequence number that will be assigned when the commit
3738
/// is successful. This is replaced when writing a manifest list by the ManifestFile
3839
/// adapter.

src/iceberg/meson.build

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,13 @@ iceberg_sources = files(
7979
'manifest/v2_metadata.cc',
8080
'manifest/v3_metadata.cc',
8181
'metadata_columns.cc',
82+
'metrics/commit_report.cc',
83+
'metrics/counter.cc',
84+
'metrics/json_serde.cc',
85+
'metrics/metrics_context.cc',
86+
'metrics/metrics_reporters.cc',
87+
'metrics/scan_report.cc',
88+
'metrics/timer.cc',
8289
'metrics_config.cc',
8390
'name_mapping.cc',
8491
'partition_field.cc',

src/iceberg/metrics/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
iceberg_install_all_headers(iceberg/metrics)
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/metrics/commit_report.h"
21+
22+
#include "iceberg/snapshot.h"
23+
#include "iceberg/util/string_util.h"
24+
25+
namespace iceberg {
26+
27+
CommitMetrics CommitMetrics::Of(MetricsContext& context) {
28+
CommitMetrics m;
29+
m.total_duration = context.GetTimer("total-duration");
30+
m.attempts = context.GetCounter("attempts");
31+
return m;
32+
}
33+
34+
CommitMetrics CommitMetrics::Noop() { return CommitMetrics::Of(MetricsContext::Null()); }
35+
36+
void CommitMetrics::PopulateResult(CommitMetricsResult& result) const {
37+
result.total_duration =
38+
total_duration ? TimerResult{.unit = std::string(total_duration->Unit()),
39+
.count = total_duration->Count(),
40+
.total_duration = total_duration->TotalDuration()}
41+
: TimerResult{};
42+
result.attempts =
43+
attempts ? CounterResult{.unit = attempts->Unit(), .value = attempts->Value()}
44+
: CounterResult{};
45+
}
46+
47+
CommitMetricsResult CommitMetricsResult::From(
48+
const CommitMetrics& live_metrics,
49+
const std::unordered_map<std::string, std::string>& snapshot_summary) {
50+
CommitMetricsResult result;
51+
live_metrics.PopulateResult(result);
52+
53+
// Helpers: parse a summary key and wrap as a typed CounterResult.
54+
auto count_field = [&snapshot_summary](const std::string& key) -> CounterResult {
55+
auto it = snapshot_summary.find(key);
56+
if (it == snapshot_summary.end()) return {};
57+
auto parsed = StringUtils::ParseNumber<int64_t>(it->second);
58+
return {.unit = CounterUnit::kCount,
59+
.value = parsed.has_value() ? parsed.value() : 0};
60+
};
61+
auto bytes_field = [&snapshot_summary](const std::string& key) -> CounterResult {
62+
auto it = snapshot_summary.find(key);
63+
if (it == snapshot_summary.end()) return {.unit = CounterUnit::kBytes};
64+
auto parsed = StringUtils::ParseNumber<int64_t>(it->second);
65+
return {.unit = CounterUnit::kBytes,
66+
.value = parsed.has_value() ? parsed.value() : 0};
67+
};
68+
69+
result.added_data_files = count_field(SnapshotSummaryFields::kAddedDataFiles);
70+
result.removed_data_files = count_field(SnapshotSummaryFields::kDeletedDataFiles);
71+
result.total_data_files = count_field(SnapshotSummaryFields::kTotalDataFiles);
72+
result.added_delete_files = count_field(SnapshotSummaryFields::kAddedDeleteFiles);
73+
result.added_equality_delete_files =
74+
count_field(SnapshotSummaryFields::kAddedEqDeleteFiles);
75+
result.added_positional_delete_files =
76+
count_field(SnapshotSummaryFields::kAddedPosDeleteFiles);
77+
result.added_dvs = count_field(SnapshotSummaryFields::kAddedDVs);
78+
result.removed_positional_delete_files =
79+
count_field(SnapshotSummaryFields::kRemovedPosDeleteFiles);
80+
result.removed_dvs = count_field(SnapshotSummaryFields::kRemovedDVs);
81+
result.removed_equality_delete_files =
82+
count_field(SnapshotSummaryFields::kRemovedEqDeleteFiles);
83+
result.removed_delete_files = count_field(SnapshotSummaryFields::kRemovedDeleteFiles);
84+
result.total_delete_files = count_field(SnapshotSummaryFields::kTotalDeleteFiles);
85+
result.added_records = count_field(SnapshotSummaryFields::kAddedRecords);
86+
result.removed_records = count_field(SnapshotSummaryFields::kDeletedRecords);
87+
result.total_records = count_field(SnapshotSummaryFields::kTotalRecords);
88+
result.added_files_size_bytes = bytes_field(SnapshotSummaryFields::kAddedFileSize);
89+
result.removed_files_size_bytes = bytes_field(SnapshotSummaryFields::kRemovedFileSize);
90+
result.total_files_size_bytes = bytes_field(SnapshotSummaryFields::kTotalFileSize);
91+
result.added_positional_deletes = count_field(SnapshotSummaryFields::kAddedPosDeletes);
92+
result.removed_positional_deletes =
93+
count_field(SnapshotSummaryFields::kRemovedPosDeletes);
94+
result.total_positional_deletes = count_field(SnapshotSummaryFields::kTotalPosDeletes);
95+
result.added_equality_deletes = count_field(SnapshotSummaryFields::kAddedEqDeletes);
96+
result.removed_equality_deletes = count_field(SnapshotSummaryFields::kRemovedEqDeletes);
97+
result.total_equality_deletes = count_field(SnapshotSummaryFields::kTotalEqDeletes);
98+
result.kept_manifest_count = count_field(SnapshotSummaryFields::kManifestsKept);
99+
result.created_manifest_count = count_field(SnapshotSummaryFields::kManifestsCreated);
100+
result.replaced_manifest_count = count_field(SnapshotSummaryFields::kManifestsReplaced);
101+
result.processed_manifest_entries_count =
102+
count_field(SnapshotSummaryFields::kEntriesProcessed);
103+
return result;
104+
}
105+
106+
} // namespace iceberg
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
#include <memory>
23+
#include <string>
24+
#include <unordered_map>
25+
26+
#include "iceberg/constants.h"
27+
#include "iceberg/iceberg_export.h"
28+
#include "iceberg/metrics/metrics_context.h"
29+
#include "iceberg/metrics/metrics_types.h" // CounterResult, TimerResult, DurationNs
30+
#include "iceberg/metrics/timer.h"
31+
32+
namespace iceberg {
33+
34+
// Forward declaration: CommitMetricsResult is defined later in this header.
35+
struct CommitMetricsResult;
36+
37+
/// \brief Live commit metrics collected during a table commit operation.
38+
///
39+
/// Tracks the overall commit duration and retry count. File/record counts come
40+
/// from the snapshot summary after the commit succeeds and are stored separately
41+
/// in CommitMetricsResult.
42+
class ICEBERG_EXPORT CommitMetrics {
43+
public:
44+
/// \brief Create a CommitMetrics instance backed by the given MetricsContext.
45+
static CommitMetrics Of(MetricsContext& context);
46+
47+
/// \brief Create a CommitMetrics instance with all-noop timer and counter.
48+
static CommitMetrics Noop();
49+
50+
/// \brief Snapshot timer and counter values into the corresponding fields of result.
51+
///
52+
/// Only total_duration and attempts are written; the caller is responsible for
53+
/// populating the remaining snapshot-summary fields.
54+
void PopulateResult(CommitMetricsResult& result) const;
55+
56+
/// \brief Timer measuring total wall-clock time of the commit call.
57+
std::shared_ptr<Timer> total_duration;
58+
59+
/// \brief Counter for the number of commit attempts (including retries).
60+
std::shared_ptr<Counter> attempts;
61+
};
62+
63+
/// \brief Immutable snapshot of commit metrics for use in CommitReport.
64+
struct ICEBERG_EXPORT CommitMetricsResult {
65+
/// \brief Total wall-clock duration of the commit attempt.
66+
TimerResult total_duration;
67+
/// \brief Number of commit attempts (1 on success without retries).
68+
CounterResult attempts;
69+
/// \brief Number of data files added in this commit.
70+
CounterResult added_data_files;
71+
/// \brief Number of data files removed in this commit.
72+
CounterResult removed_data_files;
73+
/// \brief Total live data files after this commit.
74+
CounterResult total_data_files;
75+
/// \brief Number of delete files added in this commit.
76+
CounterResult added_delete_files;
77+
/// \brief Equality delete files added.
78+
CounterResult added_equality_delete_files;
79+
/// \brief Positional delete files added.
80+
CounterResult added_positional_delete_files;
81+
/// \brief Deletion vectors added.
82+
CounterResult added_dvs;
83+
/// \brief Positional delete files removed.
84+
CounterResult removed_positional_delete_files;
85+
/// \brief Deletion vectors removed.
86+
CounterResult removed_dvs;
87+
/// \brief Equality delete files removed.
88+
CounterResult removed_equality_delete_files;
89+
/// \brief Number of delete files removed in this commit.
90+
CounterResult removed_delete_files;
91+
/// \brief Total live delete files after this commit.
92+
CounterResult total_delete_files;
93+
/// \brief Number of records added in this commit.
94+
CounterResult added_records;
95+
/// \brief Number of records removed in this commit.
96+
CounterResult removed_records;
97+
/// \brief Total live records after this commit.
98+
CounterResult total_records;
99+
/// \brief Total byte size of files added.
100+
CounterResult added_files_size_bytes;
101+
/// \brief Total byte size of files removed.
102+
CounterResult removed_files_size_bytes;
103+
/// \brief Total byte size of all live files after this commit.
104+
CounterResult total_files_size_bytes;
105+
/// \brief Positional delete records added.
106+
CounterResult added_positional_deletes;
107+
/// \brief Positional delete records removed.
108+
CounterResult removed_positional_deletes;
109+
/// \brief Total positional delete records after this commit.
110+
CounterResult total_positional_deletes;
111+
/// \brief Equality delete records added.
112+
CounterResult added_equality_deletes;
113+
/// \brief Equality delete records removed.
114+
CounterResult removed_equality_deletes;
115+
/// \brief Total equality delete records after this commit.
116+
CounterResult total_equality_deletes;
117+
/// \brief Manifest files kept unchanged in this commit.
118+
CounterResult kept_manifest_count;
119+
/// \brief Manifest files created in this commit.
120+
CounterResult created_manifest_count;
121+
/// \brief Manifest files replaced in this commit.
122+
CounterResult replaced_manifest_count;
123+
/// \brief Manifest entries processed in this commit.
124+
CounterResult processed_manifest_entries_count;
125+
126+
/// \brief Build a CommitMetricsResult from live metrics and a snapshot summary map.
127+
///
128+
/// Combines timer/retry measurements from \p live_metrics with records parsed
129+
/// from \p snapshot_summary. Missing or unparseable summary keys
130+
/// default to 0.
131+
static CommitMetricsResult From(
132+
const CommitMetrics& live_metrics,
133+
const std::unordered_map<std::string, std::string>& snapshot_summary);
134+
};
135+
136+
/// \brief Report generated after a commit operation.
137+
///
138+
/// Contains metrics about the changes made in a commit.
139+
struct ICEBERG_EXPORT CommitReport {
140+
/// \brief The fully qualified name of the table that was modified.
141+
std::string table_name;
142+
/// \brief The snapshot ID created by this commit.
143+
int64_t snapshot_id = kInvalidSnapshotId;
144+
/// \brief The sequence number assigned to this commit.
145+
int64_t sequence_number = kInvalidSequenceNumber;
146+
/// \brief The operation that was performed (write, delete, etc.).
147+
std::string operation;
148+
/// \brief Metrics collected during the commit operation.
149+
CommitMetricsResult commit_metrics;
150+
/// \brief Additional key-value metadata.
151+
std::unordered_map<std::string, std::string> metadata;
152+
};
153+
154+
} // namespace iceberg

src/iceberg/metrics/counter.cc

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/metrics/counter.h"
21+
22+
namespace iceberg {
23+
24+
namespace {
25+
26+
class NoopCounter final : public Counter {
27+
public:
28+
void Increment() override {}
29+
void Increment(int64_t) override {}
30+
int64_t Value() const override { return 0; }
31+
bool IsNoop() const override { return true; }
32+
};
33+
34+
} // namespace
35+
36+
Counter& Counter::Noop() {
37+
static NoopCounter instance;
38+
return instance;
39+
}
40+
41+
DefaultCounter::DefaultCounter(CounterUnit unit) : unit_(unit) {}
42+
43+
void DefaultCounter::Increment() { count_.fetch_add(1, std::memory_order_relaxed); }
44+
45+
void DefaultCounter::Increment(int64_t amount) {
46+
count_.fetch_add(amount, std::memory_order_relaxed);
47+
}
48+
49+
int64_t DefaultCounter::Value() const { return count_.load(std::memory_order_relaxed); }
50+
51+
} // namespace iceberg

0 commit comments

Comments
 (0)