Skip to content

Commit c1faf95

Browse files
disagg: Add O11y on object store usage summary of each tiflash store (#10764) (#10768)
ref #10763 disagg: add configurable owner-only S3 storage summary and per-store usage metrics Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io> Signed-off-by: JaySon-Huang <tshent@qq.com> Co-authored-by: JaySon <tshent@qq.com> Co-authored-by: JaySon-Huang <tshent@qq.com>
1 parent f050924 commit c1faf95

File tree

7 files changed

+597
-38
lines changed

7 files changed

+597
-38
lines changed

dbms/src/Common/TiFlashMetrics.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ TiFlashMetrics::TiFlashMetrics()
7373
.Name("tiflash_storage_ru_read_bytes")
7474
.Help("Read bytes for storage RU calculation")
7575
.Register(*registry);
76+
77+
registered_s3_store_summary_bytes_family = &prometheus::BuildGauge()
78+
.Name("tiflash_storage_s3_store_summary_bytes")
79+
.Help("S3 storage summary bytes by store and file type")
80+
.Register(*registry);
7681
}
7782

7883
void TiFlashMetrics::addReplicaSyncRU(UInt32 keyspace_id, UInt64 ru)
@@ -249,4 +254,37 @@ prometheus::Counter & TiFlashMetrics::getStorageRUReadBytesCounter(
249254
return counter;
250255
}
251256
}
257+
258+
void TiFlashMetrics::setS3StoreSummaryBytes(UInt64 store_id, UInt64 data_file_bytes, UInt64 dt_file_bytes)
259+
{
260+
// Fast path.
261+
{
262+
std::shared_lock lock(s3_store_summary_bytes_mtx);
263+
auto it = registered_s3_store_summary_bytes_metrics.find(store_id);
264+
if (it != registered_s3_store_summary_bytes_metrics.end())
265+
{
266+
it->second.data_file_bytes->Set(data_file_bytes);
267+
it->second.dt_file_bytes->Set(dt_file_bytes);
268+
return;
269+
}
270+
}
271+
272+
std::unique_lock lock(s3_store_summary_bytes_mtx);
273+
auto [it, inserted] = registered_s3_store_summary_bytes_metrics.try_emplace(store_id);
274+
if (inserted)
275+
{
276+
auto store_id_str = std::to_string(store_id);
277+
auto & data_file_bytes_metric
278+
= registered_s3_store_summary_bytes_family->Add({{"store_id", store_id_str}, {"type", "data_file_bytes"}});
279+
auto & dt_file_bytes_metric
280+
= registered_s3_store_summary_bytes_family->Add({{"store_id", store_id_str}, {"type", "dt_file_bytes"}});
281+
it->second = S3StoreSummaryBytesMetrics{
282+
.data_file_bytes = &data_file_bytes_metric,
283+
.dt_file_bytes = &dt_file_bytes_metric,
284+
};
285+
}
286+
287+
it->second.data_file_bytes->Set(data_file_bytes);
288+
it->second.dt_file_bytes->Set(dt_file_bytes);
289+
}
252290
} // namespace DB

dbms/src/Common/TiFlashMetrics.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1334,6 +1334,8 @@ class TiFlashMetrics
13341334
const String & resource_group,
13351335
const DM::ReadRUType type);
13361336

1337+
void setS3StoreSummaryBytes(UInt64 store_id, UInt64 data_file_bytes, UInt64 dt_file_bytes);
1338+
13371339
private:
13381340
TiFlashMetrics();
13391341

@@ -1375,6 +1377,15 @@ class TiFlashMetrics
13751377
// {keyspace}_{resource_group}_{type} -> Counter
13761378
std::unordered_map<std::string, prometheus::Counter *> registered_storage_ru_read_bytes_metrics;
13771379

1380+
struct S3StoreSummaryBytesMetrics
1381+
{
1382+
prometheus::Gauge * data_file_bytes;
1383+
prometheus::Gauge * dt_file_bytes;
1384+
};
1385+
prometheus::Family<prometheus::Gauge> * registered_s3_store_summary_bytes_family;
1386+
std::shared_mutex s3_store_summary_bytes_mtx;
1387+
std::unordered_map<UInt64, S3StoreSummaryBytesMetrics> registered_s3_store_summary_bytes_metrics;
1388+
13781389
public:
13791390
#define MAKE_METRIC_MEMBER_M(family_name, help, type, ...) \
13801391
MetricFamily<prometheus::type> family_name \

dbms/src/Interpreters/Settings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ struct Settings
238238
M(SettingBool, remote_checkpoint_only_upload_manifest, true, "Only upload manifest data when uploading checkpoint") \
239239
M(SettingInt64, remote_gc_method, 1, "The method of running GC task on the remote store. 1 - lifecycle, 2 - scan.") \
240240
M(SettingInt64, remote_gc_interval_seconds, 3600, "The interval of running GC task on the remote store. Unit is second.") \
241+
M(SettingInt64, remote_summary_interval_seconds, 0, "The interval of collecting remote S3 storage summary. Unit is second. <=0 disables periodic summary task.") \
241242
M(SettingInt64, remote_gc_verify_consistency, 0, "[testing] Verify the consistenct of valid locks when doing GC") \
242243
M(SettingInt64, remote_gc_min_age_seconds, 3600, "The file will NOT be compacted when the time difference between the last modification is less than this threshold") \
243244
M(SettingDouble, remote_gc_ratio, 0.5, "The files with valid rate less than this threshold will be compacted") \

dbms/src/Storages/KVStore/TMTContext.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ void TMTContext::initS3GCManager(const TiFlashRaftProxyHelper * proxy_helper)
222222
}
223223
// TODO: make it reloadable
224224
remote_gc_config.interval_seconds = context.getSettingsRef().remote_gc_interval_seconds;
225+
remote_gc_config.summary_interval_seconds = context.getSettingsRef().remote_summary_interval_seconds;
225226
remote_gc_config.verify_locks = context.getSettingsRef().remote_gc_verify_consistency > 0;
226227
// set the gc_method so that S3LockService can set tagging when create delmark
227228
S3::ClientFactory::instance().gc_method = remote_gc_config.method;

dbms/src/Storages/S3/S3GCManager.cpp

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,11 @@ bool S3GCManager::runOnAllStores()
218218
return false;
219219
}
220220

221+
bool S3GCManager::isOwner() const
222+
{
223+
return gc_owner_manager->isOwner();
224+
}
225+
221226
void S3GCManager::runForStore(UInt64 gc_store_id)
222227
{
223228
// get a timepoint at the begin, only remove objects that expired compare
@@ -824,6 +829,17 @@ S3StoreStorageSummary S3GCManager::getStoreStorageSummary(StoreID store_id)
824829
String last_dtfile_key;
825830
size_t num_dtfile_keys_for_last_dtfile = 0;
826831
S3::listPrefix(*client, prefix, [&](const Aws::S3::Model::Object & object) {
832+
if (shutdown_called)
833+
{
834+
LOG_INFO(
835+
log,
836+
"getS3StorageSummary shutting down, break, store_id={} processed_keys={}",
837+
store_id,
838+
num_processed_keys);
839+
// .more=false to break the listing early
840+
return PageResult{.num_keys = 1, .more = false};
841+
}
842+
827843
const auto & key = object.GetKey();
828844
const auto view = S3FilenameView::fromKey(key);
829845
if (watch.elapsedSeconds() - last_elapsed > log_interval_seconds)
@@ -897,6 +913,7 @@ S3StoreStorageSummary S3GCManager::getStoreStorageSummary(StoreID store_id)
897913
return PageResult{.num_keys = 1, .more = true};
898914
});
899915
summary.num_keys = num_processed_keys;
916+
TiFlashMetrics::instance().setS3StoreSummaryBytes(store_id, summary.data_file.bytes, summary.dt_file.bytes);
900917
LOG_INFO(log, "getS3StorageSummary finish, elapsed={:.3f}s summary={}", watch.elapsedSeconds(), summary);
901918
return summary;
902919
}
@@ -922,6 +939,49 @@ S3GCManagerService::S3GCManagerService(
922939
[this]() { return manager->runOnAllStores(); },
923940
false,
924941
/*interval_ms*/ config.interval_seconds * 1000);
942+
943+
if (config.summary_interval_seconds <= 0)
944+
{
945+
LOG_INFO(
946+
Logger::get("S3GCManagerService"),
947+
"The periodic S3 storage summary will be disabled, summary_interval_seconds={}",
948+
config.summary_interval_seconds);
949+
}
950+
else
951+
{
952+
if (config.summary_interval_seconds < 12 * 3600)
953+
{
954+
LOG_WARNING(
955+
Logger::get("S3GCManagerService"),
956+
"The summary_interval_seconds is too small, it may cause high overhead on S3. "
957+
"It is recommended to set it to a value larger than 12 hours (43200 seconds), "
958+
"summary_interval_seconds={}",
959+
config.summary_interval_seconds);
960+
}
961+
962+
summary_timer = global_ctx.getBackgroundPool().addTask(
963+
[this]() {
964+
// Only run summary in the owner instance
965+
if (!manager || !manager->isOwner())
966+
return false;
967+
968+
try
969+
{
970+
auto summary = manager->getS3StorageSummary({});
971+
LOG_INFO(
972+
Logger::get("S3GCManagerService"),
973+
"Periodic S3 storage summary finished, num_stores={}",
974+
summary.stores.size());
975+
}
976+
catch (...)
977+
{
978+
tryLogCurrentException(Logger::get("S3GCManagerService"), "periodic getS3StorageSummary failed");
979+
}
980+
return false;
981+
},
982+
false,
983+
config.summary_interval_seconds * 1000);
984+
}
925985
}
926986

927987
S3GCManagerService::~S3GCManagerService()
@@ -942,9 +1002,16 @@ void S3GCManagerService::shutdown()
9421002
// Remove the task handler. It will block until the task break
9431003
global_ctx.getBackgroundPool().removeTask(timer);
9441004
timer = nullptr;
945-
// then we can reset the manager
946-
manager = nullptr;
9471005
}
1006+
1007+
if (summary_timer)
1008+
{
1009+
global_ctx.getBackgroundPool().removeTask(summary_timer);
1010+
summary_timer = nullptr;
1011+
}
1012+
1013+
// then we can reset the manager
1014+
manager = nullptr;
9481015
}
9491016

9501017
void S3GCManagerService::wake() const

dbms/src/Storages/S3/S3GCManager.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ struct S3GCConfig
7171
// The interval of the S3 GC routine runs
7272
Int64 interval_seconds = 600;
7373

74+
// The interval of periodic S3 storage summary task.
75+
Int64 summary_interval_seconds = 24 * 60 * 60;
76+
7477
// The maximum number of manifest files preserve
7578
// for each store
7679
size_t manifest_preserve_count = 10;
@@ -143,6 +146,8 @@ class S3GCManager
143146

144147
bool runOnAllStores();
145148

149+
bool isOwner() const;
150+
146151
void shutdown() { shutdown_called = true; }
147152

148153
S3StoreStorageSummary getStoreStorageSummary(StoreID store_id);
@@ -222,6 +227,7 @@ class S3GCManagerService
222227
Context & global_ctx;
223228
std::unique_ptr<S3GCManager> manager;
224229
BackgroundProcessingPool::TaskHandle timer;
230+
BackgroundProcessingPool::TaskHandle summary_timer;
225231
};
226232

227233
} // namespace DB::S3

0 commit comments

Comments
 (0)